1""" 2Implementation of JSONEncoder 3""" 4import re 5try: 6 from simplejson import _speedups 7except ImportError: 8 _speedups = None 9 10ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]') 11ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])') 12ESCAPE_DCT = { 13 # escape all forward slashes to prevent </script> attack 14 '/': '\\/', 15 '\\': '\\\\', 16 '"': '\\"', 17 '\b': '\\b', 18 '\f': '\\f', 19 '\n': '\\n', 20 '\r': '\\r', 21 '\t': '\\t', 22} 23for i in range(0x20): 24 ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) 25 26# assume this produces an infinity on all machines (probably not guaranteed) 27INFINITY = float('1e66666') 28 29def floatstr(o, allow_nan=True): 30 # Check for specials. Note that this type of test is processor- and/or 31 # platform-specific, so do tests which don't depend on the internals. 32 33 if o != o: 34 text = 'NaN' 35 elif o == INFINITY: 36 text = 'Infinity' 37 elif o == -INFINITY: 38 text = '-Infinity' 39 else: 40 return repr(o) 41 42 if not allow_nan: 43 raise ValueError("Out of range float values are not JSON compliant: %r" 44 % (o,)) 45 46 return text 47 48 49def encode_basestring(s): 50 """ 51 Return a JSON representation of a Python string 52 """ 53 def replace(match): 54 return ESCAPE_DCT[match.group(0)] 55 return '"' + ESCAPE.sub(replace, s) + '"' 56 57def encode_basestring_ascii(s): 58 def replace(match): 59 s = match.group(0) 60 try: 61 return ESCAPE_DCT[s] 62 except KeyError: 63 n = ord(s) 64 if n < 0x10000: 65 return '\\u%04x' % (n,) 66 else: 67 # surrogate pair 68 n -= 0x10000 69 s1 = 0xd800 | ((n >> 10) & 0x3ff) 70 s2 = 0xdc00 | (n & 0x3ff) 71 return '\\u%04x\\u%04x' % (s1, s2) 72 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' 73 74try: 75 encode_basestring_ascii = _speedups.encode_basestring_ascii 76 _need_utf8 = True 77except AttributeError: 78 _need_utf8 = False 79 80class JSONEncoder(object): 81 """ 82 Extensible JSON <http://json.org> encoder for Python data structures. 83 84 Supports the following objects and types by default: 85 86 +-------------------+---------------+ 87 | Python | JSON | 88 +===================+===============+ 89 | dict | object | 90 +-------------------+---------------+ 91 | list, tuple | array | 92 +-------------------+---------------+ 93 | str, unicode | string | 94 +-------------------+---------------+ 95 | int, long, float | number | 96 +-------------------+---------------+ 97 | True | true | 98 +-------------------+---------------+ 99 | False | false | 100 +-------------------+---------------+ 101 | None | null | 102 +-------------------+---------------+ 103 104 To extend this to recognize other objects, subclass and implement a 105 ``.default()`` method with another method that returns a serializable 106 object for ``o`` if possible, otherwise it should call the superclass 107 implementation (to raise ``TypeError``). 108 """ 109 __all__ = ['__init__', 'default', 'encode', 'iterencode'] 110 item_separator = ', ' 111 key_separator = ': ' 112 def __init__(self, skipkeys=False, ensure_ascii=True, 113 check_circular=True, allow_nan=True, sort_keys=False, 114 indent=None, separators=None, encoding='utf-8'): 115 """ 116 Constructor for JSONEncoder, with sensible defaults. 117 118 If skipkeys is False, then it is a TypeError to attempt 119 encoding of keys that are not str, int, long, float or None. If 120 skipkeys is True, such items are simply skipped. 121 122 If ensure_ascii is True, the output is guaranteed to be str 123 objects with all incoming unicode characters escaped. If 124 ensure_ascii is false, the output will be unicode object. 125 126 If check_circular is True, then lists, dicts, and custom encoded 127 objects will be checked for circular references during encoding to 128 prevent an infinite recursion (which would cause an OverflowError). 129 Otherwise, no such check takes place. 130 131 If allow_nan is True, then NaN, Infinity, and -Infinity will be 132 encoded as such. This behavior is not JSON specification compliant, 133 but is consistent with most JavaScript based encoders and decoders. 134 Otherwise, it will be a ValueError to encode such floats. 135 136 If sort_keys is True, then the output of dictionaries will be 137 sorted by key; this is useful for regression tests to ensure 138 that JSON serializations can be compared on a day-to-day basis. 139 140 If indent is a non-negative integer, then JSON array 141 elements and object members will be pretty-printed with that 142 indent level. An indent level of 0 will only insert newlines. 143 None is the most compact representation. 144 145 If specified, separators should be a (item_separator, key_separator) 146 tuple. The default is (', ', ': '). To get the most compact JSON 147 representation you should specify (',', ':') to eliminate whitespace. 148 149 If encoding is not None, then all input strings will be 150 transformed into unicode using that encoding prior to JSON-encoding. 151 The default is UTF-8. 152 """ 153 154 self.skipkeys = skipkeys 155 self.ensure_ascii = ensure_ascii 156 self.check_circular = check_circular 157 self.allow_nan = allow_nan 158 self.sort_keys = sort_keys 159 self.indent = indent 160 self.current_indent_level = 0 161 if separators is not None: 162 self.item_separator, self.key_separator = separators 163 self.encoding = encoding 164 165 def _newline_indent(self): 166 return '\n' + (' ' * (self.indent * self.current_indent_level)) 167 168 def _iterencode_list(self, lst, markers=None): 169 if not lst: 170 yield '[]' 171 return 172 if markers is not None: 173 markerid = id(lst) 174 if markerid in markers: 175 raise ValueError("Circular reference detected") 176 markers[markerid] = lst 177 yield '[' 178 if self.indent is not None: 179 self.current_indent_level += 1 180 newline_indent = self._newline_indent() 181 separator = self.item_separator + newline_indent 182 yield newline_indent 183 else: 184 newline_indent = None 185 separator = self.item_separator 186 first = True 187 for value in lst: 188 if first: 189 first = False 190 else: 191 yield separator 192 for chunk in self._iterencode(value, markers): 193 yield chunk 194 if newline_indent is not None: 195 self.current_indent_level -= 1 196 yield self._newline_indent() 197 yield ']' 198 if markers is not None: 199 del markers[markerid] 200 201 def _iterencode_dict(self, dct, markers=None): 202 if not dct: 203 yield '{}' 204 return 205 if markers is not None: 206 markerid = id(dct) 207 if markerid in markers: 208 raise ValueError("Circular reference detected") 209 markers[markerid] = dct 210 yield '{' 211 key_separator = self.key_separator 212 if self.indent is not None: 213 self.current_indent_level += 1 214 newline_indent = self._newline_indent() 215 item_separator = self.item_separator + newline_indent 216 yield newline_indent 217 else: 218 newline_indent = None 219 item_separator = self.item_separator 220 first = True 221 if self.ensure_ascii: 222 encoder = encode_basestring_ascii 223 else: 224 encoder = encode_basestring 225 allow_nan = self.allow_nan 226 if self.sort_keys: 227 keys = dct.keys() 228 keys.sort() 229 items = [(k, dct[k]) for k in keys] 230 else: 231 items = dct.iteritems() 232 _encoding = self.encoding 233 _do_decode = (_encoding is not None 234 and not (_need_utf8 and _encoding == 'utf-8')) 235 for key, value in items: 236 if isinstance(key, str): 237 if _do_decode: 238 key = key.decode(_encoding) 239 elif isinstance(key, basestring): 240 pass 241 # JavaScript is weakly typed for these, so it makes sense to 242 # also allow them. Many encoders seem to do something like this. 243 elif isinstance(key, float): 244 key = floatstr(key, allow_nan) 245 elif isinstance(key, (int, long)): 246 key = str(key) 247 elif key is True: 248 key = 'true' 249 elif key is False: 250 key = 'false' 251 elif key is None: 252 key = 'null' 253 elif self.skipkeys: 254 continue 255 else: 256 raise TypeError("key %r is not a string" % (key,)) 257 if first: 258 first = False 259 else: 260 yield item_separator 261 yield encoder(key) 262 yield key_separator 263 for chunk in self._iterencode(value, markers): 264 yield chunk 265 if newline_indent is not None: 266 self.current_indent_level -= 1 267 yield self._newline_indent() 268 yield '}' 269 if markers is not None: 270 del markers[markerid] 271 272 def _iterencode(self, o, markers=None): 273 if isinstance(o, basestring): 274 if self.ensure_ascii: 275 encoder = encode_basestring_ascii 276 else: 277 encoder = encode_basestring 278 _encoding = self.encoding 279 if (_encoding is not None and isinstance(o, str) 280 and not (_need_utf8 and _encoding == 'utf-8')): 281 o = o.decode(_encoding) 282 yield encoder(o) 283 elif o is None: 284 yield 'null' 285 elif o is True: 286 yield 'true' 287 elif o is False: 288 yield 'false' 289 elif isinstance(o, (int, long)): 290 yield str(o) 291 elif isinstance(o, float): 292 yield floatstr(o, self.allow_nan) 293 elif isinstance(o, (list, tuple)): 294 for chunk in self._iterencode_list(o, markers): 295 yield chunk 296 elif isinstance(o, dict): 297 for chunk in self._iterencode_dict(o, markers): 298 yield chunk 299 else: 300 if markers is not None: 301 markerid = id(o) 302 if markerid in markers: 303 raise ValueError("Circular reference detected") 304 markers[markerid] = o 305 for chunk in self._iterencode_default(o, markers): 306 yield chunk 307 if markers is not None: 308 del markers[markerid] 309 310 def _iterencode_default(self, o, markers=None): 311 newobj = self.default(o) 312 return self._iterencode(newobj, markers) 313 314 def default(self, o): 315 """ 316 Implement this method in a subclass such that it returns 317 a serializable object for ``o``, or calls the base implementation 318 (to raise a ``TypeError``). 319 320 For example, to support arbitrary iterators, you could 321 implement default like this:: 322 323 def default(self, o): 324 try: 325 iterable = iter(o) 326 except TypeError: 327 pass 328 else: 329 return list(iterable) 330 return JSONEncoder.default(self, o) 331 """ 332 raise TypeError("%r is not JSON serializable" % (o,)) 333 334 def encode(self, o): 335 """ 336 Return a JSON string representation of a Python data structure. 337 338 >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) 339 '{"foo":["bar", "baz"]}' 340 """ 341 # This is for extremely simple cases and benchmarks... 342 if isinstance(o, basestring): 343 if isinstance(o, str): 344 _encoding = self.encoding 345 if (_encoding is not None 346 and not (_encoding == 'utf-8' and _need_utf8)): 347 o = o.decode(_encoding) 348 return encode_basestring_ascii(o) 349 # This doesn't pass the iterator directly to ''.join() because it 350 # sucks at reporting exceptions. It's going to do this internally 351 # anyway because it uses PySequence_Fast or similar. 352 chunks = list(self.iterencode(o)) 353 return ''.join(chunks) 354 355 def iterencode(self, o): 356 """ 357 Encode the given object and yield each string 358 representation as available. 359 360 For example:: 361 362 for chunk in JSONEncoder().iterencode(bigobject): 363 mysocket.write(chunk) 364 """ 365 if self.check_circular: 366 markers = {} 367 else: 368 markers = None 369 return self._iterencode(o, markers) 370 371__all__ = ['JSONEncoder'] 372