• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Implementation of JSONEncoder
2"""
3import re
4
5try:
6    from _json import encode_basestring_ascii as c_encode_basestring_ascii
7except ImportError:
8    c_encode_basestring_ascii = None
9try:
10    from _json import make_encoder as c_make_encoder
11except ImportError:
12    c_make_encoder = None
13
14ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
15ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
16HAS_UTF8 = re.compile(r'[\x80-\xff]')
17ESCAPE_DCT = {
18    '\\': '\\\\',
19    '"': '\\"',
20    '\b': '\\b',
21    '\f': '\\f',
22    '\n': '\\n',
23    '\r': '\\r',
24    '\t': '\\t',
25}
26for i in range(0x20):
27    ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
28    #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
29
30# Assume this produces an infinity on all machines (probably not guaranteed)
31INFINITY = float('1e66666')
32FLOAT_REPR = repr
33
34def encode_basestring(s):
35    """Return a JSON representation of a Python string
36
37    """
38    def replace(match):
39        return ESCAPE_DCT[match.group(0)]
40    return '"' + ESCAPE.sub(replace, s) + '"'
41
42
43def py_encode_basestring_ascii(s):
44    """Return an ASCII-only JSON representation of a Python string
45
46    """
47    if isinstance(s, str) and HAS_UTF8.search(s) is not None:
48        s = s.decode('utf-8')
49    def replace(match):
50        s = match.group(0)
51        try:
52            return ESCAPE_DCT[s]
53        except KeyError:
54            n = ord(s)
55            if n < 0x10000:
56                return '\\u{0:04x}'.format(n)
57                #return '\\u%04x' % (n,)
58            else:
59                # surrogate pair
60                n -= 0x10000
61                s1 = 0xd800 | ((n >> 10) & 0x3ff)
62                s2 = 0xdc00 | (n & 0x3ff)
63                return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
64                #return '\\u%04x\\u%04x' % (s1, s2)
65    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
66
67
68encode_basestring_ascii = (
69    c_encode_basestring_ascii or py_encode_basestring_ascii)
70
71class JSONEncoder(object):
72    """Extensible JSON <http://json.org> encoder for Python data structures.
73
74    Supports the following objects and types by default:
75
76    +-------------------+---------------+
77    | Python            | JSON          |
78    +===================+===============+
79    | dict              | object        |
80    +-------------------+---------------+
81    | list, tuple       | array         |
82    +-------------------+---------------+
83    | str, unicode      | string        |
84    +-------------------+---------------+
85    | int, long, float  | number        |
86    +-------------------+---------------+
87    | True              | true          |
88    +-------------------+---------------+
89    | False             | false         |
90    +-------------------+---------------+
91    | None              | null          |
92    +-------------------+---------------+
93
94    To extend this to recognize other objects, subclass and implement a
95    ``.default()`` method with another method that returns a serializable
96    object for ``o`` if possible, otherwise it should call the superclass
97    implementation (to raise ``TypeError``).
98
99    """
100    item_separator = ', '
101    key_separator = ': '
102    def __init__(self, skipkeys=False, ensure_ascii=True,
103            check_circular=True, allow_nan=True, sort_keys=False,
104            indent=None, separators=None, encoding='utf-8', default=None):
105        """Constructor for JSONEncoder, with sensible defaults.
106
107        If skipkeys is false, then it is a TypeError to attempt
108        encoding of keys that are not str, int, long, float or None.  If
109        skipkeys is True, such items are simply skipped.
110
111        If ensure_ascii is true, the output is guaranteed to be str
112        objects with all incoming unicode characters escaped.  If
113        ensure_ascii is false, the output will be unicode object.
114
115        If check_circular is true, then lists, dicts, and custom encoded
116        objects will be checked for circular references during encoding to
117        prevent an infinite recursion (which would cause an OverflowError).
118        Otherwise, no such check takes place.
119
120        If allow_nan is true, then NaN, Infinity, and -Infinity will be
121        encoded as such.  This behavior is not JSON specification compliant,
122        but is consistent with most JavaScript based encoders and decoders.
123        Otherwise, it will be a ValueError to encode such floats.
124
125        If sort_keys is true, then the output of dictionaries will be
126        sorted by key; this is useful for regression tests to ensure
127        that JSON serializations can be compared on a day-to-day basis.
128
129        If indent is a non-negative integer, then JSON array
130        elements and object members will be pretty-printed with that
131        indent level.  An indent level of 0 will only insert newlines.
132        None is the most compact representation.
133
134        If specified, separators should be a (item_separator, key_separator)
135        tuple.  The default is (', ', ': ').  To get the most compact JSON
136        representation you should specify (',', ':') to eliminate whitespace.
137
138        If specified, default is a function that gets called for objects
139        that can't otherwise be serialized.  It should return a JSON encodable
140        version of the object or raise a ``TypeError``.
141
142        If encoding is not None, then all input strings will be
143        transformed into unicode using that encoding prior to JSON-encoding.
144        The default is UTF-8.
145
146        """
147
148        self.skipkeys = skipkeys
149        self.ensure_ascii = ensure_ascii
150        self.check_circular = check_circular
151        self.allow_nan = allow_nan
152        self.sort_keys = sort_keys
153        self.indent = indent
154        if separators is not None:
155            self.item_separator, self.key_separator = separators
156        if default is not None:
157            self.default = default
158        self.encoding = encoding
159
160    def default(self, o):
161        """Implement this method in a subclass such that it returns
162        a serializable object for ``o``, or calls the base implementation
163        (to raise a ``TypeError``).
164
165        For example, to support arbitrary iterators, you could
166        implement default like this::
167
168            def default(self, o):
169                try:
170                    iterable = iter(o)
171                except TypeError:
172                    pass
173                else:
174                    return list(iterable)
175                return JSONEncoder.default(self, o)
176
177        """
178        raise TypeError(repr(o) + " is not JSON serializable")
179
180    def encode(self, o):
181        """Return a JSON string representation of a Python data structure.
182
183        >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
184        '{"foo": ["bar", "baz"]}'
185
186        """
187        # This is for extremely simple cases and benchmarks.
188        if isinstance(o, basestring):
189            if isinstance(o, str):
190                _encoding = self.encoding
191                if (_encoding is not None
192                        and not (_encoding == 'utf-8')):
193                    o = o.decode(_encoding)
194            if self.ensure_ascii:
195                return encode_basestring_ascii(o)
196            else:
197                return encode_basestring(o)
198        # This doesn't pass the iterator directly to ''.join() because the
199        # exceptions aren't as detailed.  The list call should be roughly
200        # equivalent to the PySequence_Fast that ''.join() would do.
201        chunks = self.iterencode(o, _one_shot=True)
202        if not isinstance(chunks, (list, tuple)):
203            chunks = list(chunks)
204        return ''.join(chunks)
205
206    def iterencode(self, o, _one_shot=False):
207        """Encode the given object and yield each string
208        representation as available.
209
210        For example::
211
212            for chunk in JSONEncoder().iterencode(bigobject):
213                mysocket.write(chunk)
214
215        """
216        if self.check_circular:
217            markers = {}
218        else:
219            markers = None
220        if self.ensure_ascii:
221            _encoder = encode_basestring_ascii
222        else:
223            _encoder = encode_basestring
224        if self.encoding != 'utf-8':
225            def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
226                if isinstance(o, str):
227                    o = o.decode(_encoding)
228                return _orig_encoder(o)
229
230        def floatstr(o, allow_nan=self.allow_nan,
231                _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
232            # Check for specials.  Note that this type of test is processor
233            # and/or platform-specific, so do tests which don't depend on the
234            # internals.
235
236            if o != o:
237                text = 'NaN'
238            elif o == _inf:
239                text = 'Infinity'
240            elif o == _neginf:
241                text = '-Infinity'
242            else:
243                return _repr(o)
244
245            if not allow_nan:
246                raise ValueError(
247                    "Out of range float values are not JSON compliant: " +
248                    repr(o))
249
250            return text
251
252
253        if (_one_shot and c_make_encoder is not None
254                and self.indent is None and not self.sort_keys):
255            _iterencode = c_make_encoder(
256                markers, self.default, _encoder, self.indent,
257                self.key_separator, self.item_separator, self.sort_keys,
258                self.skipkeys, self.allow_nan)
259        else:
260            _iterencode = _make_iterencode(
261                markers, self.default, _encoder, self.indent, floatstr,
262                self.key_separator, self.item_separator, self.sort_keys,
263                self.skipkeys, _one_shot)
264        return _iterencode(o, 0)
265
266def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
267        _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
268        ## HACK: hand-optimized bytecode; turn globals into locals
269        ValueError=ValueError,
270        basestring=basestring,
271        dict=dict,
272        float=float,
273        id=id,
274        int=int,
275        isinstance=isinstance,
276        list=list,
277        long=long,
278        str=str,
279        tuple=tuple,
280    ):
281
282    def _iterencode_list(lst, _current_indent_level):
283        if not lst:
284            yield '[]'
285            return
286        if markers is not None:
287            markerid = id(lst)
288            if markerid in markers:
289                raise ValueError("Circular reference detected")
290            markers[markerid] = lst
291        buf = '['
292        if _indent is not None:
293            _current_indent_level += 1
294            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
295            separator = _item_separator + newline_indent
296            buf += newline_indent
297        else:
298            newline_indent = None
299            separator = _item_separator
300        first = True
301        for value in lst:
302            if first:
303                first = False
304            else:
305                buf = separator
306            if isinstance(value, basestring):
307                yield buf + _encoder(value)
308            elif value is None:
309                yield buf + 'null'
310            elif value is True:
311                yield buf + 'true'
312            elif value is False:
313                yield buf + 'false'
314            elif isinstance(value, (int, long)):
315                yield buf + str(value)
316            elif isinstance(value, float):
317                yield buf + _floatstr(value)
318            else:
319                yield buf
320                if isinstance(value, (list, tuple)):
321                    chunks = _iterencode_list(value, _current_indent_level)
322                elif isinstance(value, dict):
323                    chunks = _iterencode_dict(value, _current_indent_level)
324                else:
325                    chunks = _iterencode(value, _current_indent_level)
326                for chunk in chunks:
327                    yield chunk
328        if newline_indent is not None:
329            _current_indent_level -= 1
330            yield '\n' + (' ' * (_indent * _current_indent_level))
331        yield ']'
332        if markers is not None:
333            del markers[markerid]
334
335    def _iterencode_dict(dct, _current_indent_level):
336        if not dct:
337            yield '{}'
338            return
339        if markers is not None:
340            markerid = id(dct)
341            if markerid in markers:
342                raise ValueError("Circular reference detected")
343            markers[markerid] = dct
344        yield '{'
345        if _indent is not None:
346            _current_indent_level += 1
347            newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
348            item_separator = _item_separator + newline_indent
349            yield newline_indent
350        else:
351            newline_indent = None
352            item_separator = _item_separator
353        first = True
354        if _sort_keys:
355            items = sorted(dct.items(), key=lambda kv: kv[0])
356        else:
357            items = dct.iteritems()
358        for key, value in items:
359            if isinstance(key, basestring):
360                pass
361            # JavaScript is weakly typed for these, so it makes sense to
362            # also allow them.  Many encoders seem to do something like this.
363            elif isinstance(key, float):
364                key = _floatstr(key)
365            elif key is True:
366                key = 'true'
367            elif key is False:
368                key = 'false'
369            elif key is None:
370                key = 'null'
371            elif isinstance(key, (int, long)):
372                key = str(key)
373            elif _skipkeys:
374                continue
375            else:
376                raise TypeError("key " + repr(key) + " is not a string")
377            if first:
378                first = False
379            else:
380                yield item_separator
381            yield _encoder(key)
382            yield _key_separator
383            if isinstance(value, basestring):
384                yield _encoder(value)
385            elif value is None:
386                yield 'null'
387            elif value is True:
388                yield 'true'
389            elif value is False:
390                yield 'false'
391            elif isinstance(value, (int, long)):
392                yield str(value)
393            elif isinstance(value, float):
394                yield _floatstr(value)
395            else:
396                if isinstance(value, (list, tuple)):
397                    chunks = _iterencode_list(value, _current_indent_level)
398                elif isinstance(value, dict):
399                    chunks = _iterencode_dict(value, _current_indent_level)
400                else:
401                    chunks = _iterencode(value, _current_indent_level)
402                for chunk in chunks:
403                    yield chunk
404        if newline_indent is not None:
405            _current_indent_level -= 1
406            yield '\n' + (' ' * (_indent * _current_indent_level))
407        yield '}'
408        if markers is not None:
409            del markers[markerid]
410
411    def _iterencode(o, _current_indent_level):
412        if isinstance(o, basestring):
413            yield _encoder(o)
414        elif o is None:
415            yield 'null'
416        elif o is True:
417            yield 'true'
418        elif o is False:
419            yield 'false'
420        elif isinstance(o, (int, long)):
421            yield str(o)
422        elif isinstance(o, float):
423            yield _floatstr(o)
424        elif isinstance(o, (list, tuple)):
425            for chunk in _iterencode_list(o, _current_indent_level):
426                yield chunk
427        elif isinstance(o, dict):
428            for chunk in _iterencode_dict(o, _current_indent_level):
429                yield chunk
430        else:
431            if markers is not None:
432                markerid = id(o)
433                if markerid in markers:
434                    raise ValueError("Circular reference detected")
435                markers[markerid] = o
436            o = _default(o)
437            for chunk in _iterencode(o, _current_indent_level):
438                yield chunk
439            if markers is not None:
440                del markers[markerid]
441
442    return _iterencode
443