• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Implementation of JSONEncoder
2"""
3import re
4
5try:
6    from _json import encode_basestring_ascii as c_encode_basestring_ascii
7except ImportError:
8    c_encode_basestring_ascii = None
9try:
10    from _json import make_encoder as c_make_encoder
11except ImportError:
12    c_make_encoder = None
13
14ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
15ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
16HAS_UTF8 = re.compile(r'[\x80-\xff]')
17ESCAPE_DCT = {
18    '\\': '\\\\',
19    '"': '\\"',
20    '\b': '\\b',
21    '\f': '\\f',
22    '\n': '\\n',
23    '\r': '\\r',
24    '\t': '\\t',
25}
26for i in range(0x20):
27    ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
28    #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
29
30INFINITY = float('inf')
31FLOAT_REPR = float.__repr__
32
33def encode_basestring(s):
34    """Return a JSON representation of a Python string
35
36    """
37    def replace(match):
38        return ESCAPE_DCT[match.group(0)]
39    return '"' + ESCAPE.sub(replace, s) + '"'
40
41
42def py_encode_basestring_ascii(s):
43    """Return an ASCII-only JSON representation of a Python string
44
45    """
46    if isinstance(s, str) and HAS_UTF8.search(s) is not None:
47        s = s.decode('utf-8')
48    def replace(match):
49        s = match.group(0)
50        try:
51            return ESCAPE_DCT[s]
52        except KeyError:
53            n = ord(s)
54            if n < 0x10000:
55                return '\\u{0:04x}'.format(n)
56                #return '\\u%04x' % (n,)
57            else:
58                # surrogate pair
59                n -= 0x10000
60                s1 = 0xd800 | ((n >> 10) & 0x3ff)
61                s2 = 0xdc00 | (n & 0x3ff)
62                return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
63                #return '\\u%04x\\u%04x' % (s1, s2)
64    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
65
66
67encode_basestring_ascii = (
68    c_encode_basestring_ascii or py_encode_basestring_ascii)
69
70class JSONEncoder(object):
71    """Extensible JSON <http://json.org> encoder for Python data structures.
72
73    Supports the following objects and types by default:
74
75    +-------------------+---------------+
76    | Python            | JSON          |
77    +===================+===============+
78    | dict              | object        |
79    +-------------------+---------------+
80    | list, tuple       | array         |
81    +-------------------+---------------+
82    | str, unicode      | string        |
83    +-------------------+---------------+
84    | int, long, float  | number        |
85    +-------------------+---------------+
86    | True              | true          |
87    +-------------------+---------------+
88    | False             | false         |
89    +-------------------+---------------+
90    | None              | null          |
91    +-------------------+---------------+
92
93    To extend this to recognize other objects, subclass and implement a
94    ``.default()`` method with another method that returns a serializable
95    object for ``o`` if possible, otherwise it should call the superclass
96    implementation (to raise ``TypeError``).
97
98    """
99    item_separator = ', '
100    key_separator = ': '
101    def __init__(self, skipkeys=False, ensure_ascii=True,
102            check_circular=True, allow_nan=True, sort_keys=False,
103            indent=None, separators=None, encoding='utf-8', default=None):
104        """Constructor for JSONEncoder, with sensible defaults.
105
106        If skipkeys is false, then it is a TypeError to attempt
107        encoding of keys that are not str, int, long, float or None.  If
108        skipkeys is True, such items are simply skipped.
109
110        If *ensure_ascii* is true (the default), all non-ASCII
111        characters in the output are escaped with \uXXXX sequences,
112        and the results are str instances consisting of ASCII
113        characters only.  If ensure_ascii is False, a result may be a
114        unicode instance.  This usually happens if the input contains
115        unicode strings or the *encoding* parameter is used.
116
117        If check_circular is true, then lists, dicts, and custom encoded
118        objects will be checked for circular references during encoding to
119        prevent an infinite recursion (which would cause an OverflowError).
120        Otherwise, no such check takes place.
121
122        If allow_nan is true, then NaN, Infinity, and -Infinity will be
123        encoded as such.  This behavior is not JSON specification compliant,
124        but is consistent with most JavaScript based encoders and decoders.
125        Otherwise, it will be a ValueError to encode such floats.
126
127        If sort_keys is true, then the output of dictionaries will be
128        sorted by key; this is useful for regression tests to ensure
129        that JSON serializations can be compared on a day-to-day basis.
130
131        If indent is a non-negative integer, then JSON array
132        elements and object members will be pretty-printed with that
133        indent level.  An indent level of 0 will only insert newlines.
134        None is the most compact representation.  Since the default
135        item separator is ', ',  the output might include trailing
136        whitespace when indent is specified.  You can use
137        separators=(',', ': ') to avoid this.
138
139        If specified, separators should be a (item_separator, key_separator)
140        tuple.  The default is (', ', ': ').  To get the most compact JSON
141        representation you should specify (',', ':') to eliminate whitespace.
142
143        If specified, default is a function that gets called for objects
144        that can't otherwise be serialized.  It should return a JSON encodable
145        version of the object or raise a ``TypeError``.
146
147        If encoding is not None, then all input strings will be
148        transformed into unicode using that encoding prior to JSON-encoding.
149        The default is UTF-8.
150
151        """
152
153        self.skipkeys = skipkeys
154        self.ensure_ascii = ensure_ascii
155        self.check_circular = check_circular
156        self.allow_nan = allow_nan
157        self.sort_keys = sort_keys
158        self.indent = indent
159        if separators is not None:
160            self.item_separator, self.key_separator = separators
161        if default is not None:
162            self.default = default
163        self.encoding = encoding
164
165    def default(self, o):
166        """Implement this method in a subclass such that it returns
167        a serializable object for ``o``, or calls the base implementation
168        (to raise a ``TypeError``).
169
170        For example, to support arbitrary iterators, you could
171        implement default like this::
172
173            def default(self, o):
174                try:
175                    iterable = iter(o)
176                except TypeError:
177                    pass
178                else:
179                    return list(iterable)
180                # Let the base class default method raise the TypeError
181                return JSONEncoder.default(self, o)
182
183        """
184        raise TypeError(repr(o) + " is not JSON serializable")
185
186    def encode(self, o):
187        """Return a JSON string representation of a Python data structure.
188
189        >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
190        '{"foo": ["bar", "baz"]}'
191
192        """
193        # This is for extremely simple cases and benchmarks.
194        if isinstance(o, basestring):
195            if isinstance(o, str):
196                _encoding = self.encoding
197                if (_encoding is not None
198                        and not (_encoding == 'utf-8')):
199                    o = o.decode(_encoding)
200            if self.ensure_ascii:
201                return encode_basestring_ascii(o)
202            else:
203                return encode_basestring(o)
204        # This doesn't pass the iterator directly to ''.join() because the
205        # exceptions aren't as detailed.  The list call should be roughly
206        # equivalent to the PySequence_Fast that ''.join() would do.
207        chunks = self.iterencode(o, _one_shot=True)
208        if not isinstance(chunks, (list, tuple)):
209            chunks = list(chunks)
210        return ''.join(chunks)
211
212    def iterencode(self, o, _one_shot=False):
213        """Encode the given object and yield each string
214        representation as available.
215
216        For example::
217
218            for chunk in JSONEncoder().iterencode(bigobject):
219                mysocket.write(chunk)
220
221        """
222        if self.check_circular:
223            markers = {}
224        else:
225            markers = None
226        if self.ensure_ascii:
227            _encoder = encode_basestring_ascii
228        else:
229            _encoder = encode_basestring
230        if self.encoding != 'utf-8':
231            def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
232                if isinstance(o, str):
233                    o = o.decode(_encoding)
234                return _orig_encoder(o)
235
236        def floatstr(o, allow_nan=self.allow_nan,
237                _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
238            # Check for specials.  Note that this type of test is processor
239            # and/or platform-specific, so do tests which don't depend on the
240            # internals.
241
242            if o != o:
243                text = 'NaN'
244            elif o == _inf:
245                text = 'Infinity'
246            elif o == _neginf:
247                text = '-Infinity'
248            else:
249                return _repr(o)
250
251            if not allow_nan:
252                raise ValueError(
253                    "Out of range float values are not JSON compliant: " +
254                    repr(o))
255
256            return text
257
258
259        if (_one_shot and c_make_encoder is not None
260                and self.indent is None and not self.sort_keys):
261            _iterencode = c_make_encoder(
262                markers, self.default, _encoder, self.indent,
263                self.key_separator, self.item_separator, self.sort_keys,
264                self.skipkeys, self.allow_nan)
265        else:
266            _iterencode = _make_iterencode(
267                markers, self.default, _encoder, self.indent, floatstr,
268                self.key_separator, self.item_separator, self.sort_keys,
269                self.skipkeys, _one_shot)
270        return _iterencode(o, 0)
271
272def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
273        _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
274        ## HACK: hand-optimized bytecode; turn globals into locals
275        ValueError=ValueError,
276        basestring=basestring,
277        dict=dict,
278        float=float,
279        id=id,
280        int=int,
281        isinstance=isinstance,
282        list=list,
283        long=long,
284        str=str,
285        tuple=tuple,
286    ):
287
288    def _iterencode_list(lst, _current_indent_level):
289        if not lst:
290            yield '[]'
291            return
292        if markers is not None:
293            markerid = id(lst)
294            if markerid in markers:
295                raise ValueError("Circular reference detected")
296            markers[markerid] = lst
297        buf = '['
298        if _indent is not None:
299            _current_indent_level += 1
300            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
301            separator = _item_separator + newline_indent
302            buf += newline_indent
303        else:
304            newline_indent = None
305            separator = _item_separator
306        first = True
307        for value in lst:
308            if first:
309                first = False
310            else:
311                buf = separator
312            if isinstance(value, basestring):
313                yield buf + _encoder(value)
314            elif value is None:
315                yield buf + 'null'
316            elif value is True:
317                yield buf + 'true'
318            elif value is False:
319                yield buf + 'false'
320            elif isinstance(value, (int, long)):
321                yield buf + str(value)
322            elif isinstance(value, float):
323                yield buf + _floatstr(value)
324            else:
325                yield buf
326                if isinstance(value, (list, tuple)):
327                    chunks = _iterencode_list(value, _current_indent_level)
328                elif isinstance(value, dict):
329                    chunks = _iterencode_dict(value, _current_indent_level)
330                else:
331                    chunks = _iterencode(value, _current_indent_level)
332                for chunk in chunks:
333                    yield chunk
334        if newline_indent is not None:
335            _current_indent_level -= 1
336            yield '\n' + (' ' * (_indent * _current_indent_level))
337        yield ']'
338        if markers is not None:
339            del markers[markerid]
340
341    def _iterencode_dict(dct, _current_indent_level):
342        if not dct:
343            yield '{}'
344            return
345        if markers is not None:
346            markerid = id(dct)
347            if markerid in markers:
348                raise ValueError("Circular reference detected")
349            markers[markerid] = dct
350        yield '{'
351        if _indent is not None:
352            _current_indent_level += 1
353            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
354            item_separator = _item_separator + newline_indent
355            yield newline_indent
356        else:
357            newline_indent = None
358            item_separator = _item_separator
359        first = True
360        if _sort_keys:
361            items = sorted(dct.items(), key=lambda kv: kv[0])
362        else:
363            items = dct.iteritems()
364        for key, value in items:
365            if isinstance(key, basestring):
366                pass
367            # JavaScript is weakly typed for these, so it makes sense to
368            # also allow them.  Many encoders seem to do something like this.
369            elif isinstance(key, float):
370                key = _floatstr(key)
371            elif key is True:
372                key = 'true'
373            elif key is False:
374                key = 'false'
375            elif key is None:
376                key = 'null'
377            elif isinstance(key, (int, long)):
378                key = str(key)
379            elif _skipkeys:
380                continue
381            else:
382                raise TypeError("key " + repr(key) + " is not a string")
383            if first:
384                first = False
385            else:
386                yield item_separator
387            yield _encoder(key)
388            yield _key_separator
389            if isinstance(value, basestring):
390                yield _encoder(value)
391            elif value is None:
392                yield 'null'
393            elif value is True:
394                yield 'true'
395            elif value is False:
396                yield 'false'
397            elif isinstance(value, (int, long)):
398                yield str(value)
399            elif isinstance(value, float):
400                yield _floatstr(value)
401            else:
402                if isinstance(value, (list, tuple)):
403                    chunks = _iterencode_list(value, _current_indent_level)
404                elif isinstance(value, dict):
405                    chunks = _iterencode_dict(value, _current_indent_level)
406                else:
407                    chunks = _iterencode(value, _current_indent_level)
408                for chunk in chunks:
409                    yield chunk
410        if newline_indent is not None:
411            _current_indent_level -= 1
412            yield '\n' + (' ' * (_indent * _current_indent_level))
413        yield '}'
414        if markers is not None:
415            del markers[markerid]
416
417    def _iterencode(o, _current_indent_level):
418        if isinstance(o, basestring):
419            yield _encoder(o)
420        elif o is None:
421            yield 'null'
422        elif o is True:
423            yield 'true'
424        elif o is False:
425            yield 'false'
426        elif isinstance(o, (int, long)):
427            yield str(o)
428        elif isinstance(o, float):
429            yield _floatstr(o)
430        elif isinstance(o, (list, tuple)):
431            for chunk in _iterencode_list(o, _current_indent_level):
432                yield chunk
433        elif isinstance(o, dict):
434            for chunk in _iterencode_dict(o, _current_indent_level):
435                yield chunk
436        else:
437            if markers is not None:
438                markerid = id(o)
439                if markerid in markers:
440                    raise ValueError("Circular reference detected")
441                markers[markerid] = o
442            o = _default(o)
443            for chunk in _iterencode(o, _current_indent_level):
444                yield chunk
445            if markers is not None:
446                del markers[markerid]
447
448    return _iterencode
449