• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Implementation of JSONEncoder
2"""
3import re
4
5try:
6    from _json import encode_basestring_ascii as c_encode_basestring_ascii
7except ImportError:
8    c_encode_basestring_ascii = None
9try:
10    from _json import encode_basestring as c_encode_basestring
11except ImportError:
12    c_encode_basestring = None
13try:
14    from _json import make_encoder as c_make_encoder
15except ImportError:
16    c_make_encoder = None
17
18ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
19ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
20HAS_UTF8 = re.compile(b'[\x80-\xff]')
21ESCAPE_DCT = {
22    '\\': '\\\\',
23    '"': '\\"',
24    '\b': '\\b',
25    '\f': '\\f',
26    '\n': '\\n',
27    '\r': '\\r',
28    '\t': '\\t',
29}
30for i in range(0x20):
31    ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
32    #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
33
34INFINITY = float('inf')
35
36def py_encode_basestring(s):
37    """Return a JSON representation of a Python string
38
39    """
40    def replace(match):
41        return ESCAPE_DCT[match.group(0)]
42    return '"' + ESCAPE.sub(replace, s) + '"'
43
44
45encode_basestring = (c_encode_basestring or py_encode_basestring)
46
47
48def py_encode_basestring_ascii(s):
49    """Return an ASCII-only JSON representation of a Python string
50
51    """
52    def replace(match):
53        s = match.group(0)
54        try:
55            return ESCAPE_DCT[s]
56        except KeyError:
57            n = ord(s)
58            if n < 0x10000:
59                return '\\u{0:04x}'.format(n)
60                #return '\\u%04x' % (n,)
61            else:
62                # surrogate pair
63                n -= 0x10000
64                s1 = 0xd800 | ((n >> 10) & 0x3ff)
65                s2 = 0xdc00 | (n & 0x3ff)
66                return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
67    return '"' + ESCAPE_ASCII.sub(replace, s) + '"'
68
69
70encode_basestring_ascii = (
71    c_encode_basestring_ascii or py_encode_basestring_ascii)
72
73class JSONEncoder(object):
74    """Extensible JSON <http://json.org> encoder for Python data structures.
75
76    Supports the following objects and types by default:
77
78    +-------------------+---------------+
79    | Python            | JSON          |
80    +===================+===============+
81    | dict              | object        |
82    +-------------------+---------------+
83    | list, tuple       | array         |
84    +-------------------+---------------+
85    | str               | string        |
86    +-------------------+---------------+
87    | int, float        | number        |
88    +-------------------+---------------+
89    | True              | true          |
90    +-------------------+---------------+
91    | False             | false         |
92    +-------------------+---------------+
93    | None              | null          |
94    +-------------------+---------------+
95
96    To extend this to recognize other objects, subclass and implement a
97    ``.default()`` method with another method that returns a serializable
98    object for ``o`` if possible, otherwise it should call the superclass
99    implementation (to raise ``TypeError``).
100
101    """
102    item_separator = ', '
103    key_separator = ': '
104    def __init__(self, *, skipkeys=False, ensure_ascii=True,
105            check_circular=True, allow_nan=True, sort_keys=False,
106            indent=None, separators=None, default=None):
107        """Constructor for JSONEncoder, with sensible defaults.
108
109        If skipkeys is false, then it is a TypeError to attempt
110        encoding of keys that are not str, int, float or None.  If
111        skipkeys is True, such items are simply skipped.
112
113        If ensure_ascii is true, the output is guaranteed to be str
114        objects with all incoming non-ASCII characters escaped.  If
115        ensure_ascii is false, the output can contain non-ASCII characters.
116
117        If check_circular is true, then lists, dicts, and custom encoded
118        objects will be checked for circular references during encoding to
119        prevent an infinite recursion (which would cause an OverflowError).
120        Otherwise, no such check takes place.
121
122        If allow_nan is true, then NaN, Infinity, and -Infinity will be
123        encoded as such.  This behavior is not JSON specification compliant,
124        but is consistent with most JavaScript based encoders and decoders.
125        Otherwise, it will be a ValueError to encode such floats.
126
127        If sort_keys is true, then the output of dictionaries will be
128        sorted by key; this is useful for regression tests to ensure
129        that JSON serializations can be compared on a day-to-day basis.
130
131        If indent is a non-negative integer, then JSON array
132        elements and object members will be pretty-printed with that
133        indent level.  An indent level of 0 will only insert newlines.
134        None is the most compact representation.
135
136        If specified, separators should be an (item_separator, key_separator)
137        tuple.  The default is (', ', ': ') if *indent* is ``None`` and
138        (',', ': ') otherwise.  To get the most compact JSON representation,
139        you should specify (',', ':') to eliminate whitespace.
140
141        If specified, default is a function that gets called for objects
142        that can't otherwise be serialized.  It should return a JSON encodable
143        version of the object or raise a ``TypeError``.
144
145        """
146
147        self.skipkeys = skipkeys
148        self.ensure_ascii = ensure_ascii
149        self.check_circular = check_circular
150        self.allow_nan = allow_nan
151        self.sort_keys = sort_keys
152        self.indent = indent
153        if separators is not None:
154            self.item_separator, self.key_separator = separators
155        elif indent is not None:
156            self.item_separator = ','
157        if default is not None:
158            self.default = default
159
160    def default(self, o):
161        """Implement this method in a subclass such that it returns
162        a serializable object for ``o``, or calls the base implementation
163        (to raise a ``TypeError``).
164
165        For example, to support arbitrary iterators, you could
166        implement default like this::
167
168            def default(self, o):
169                try:
170                    iterable = iter(o)
171                except TypeError:
172                    pass
173                else:
174                    return list(iterable)
175                # Let the base class default method raise the TypeError
176                return JSONEncoder.default(self, o)
177
178        """
179        raise TypeError(f'Object of type {o.__class__.__name__} '
180                        f'is not JSON serializable')
181
182    def encode(self, o):
183        """Return a JSON string representation of a Python data structure.
184
185        >>> from json.encoder import JSONEncoder
186        >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
187        '{"foo": ["bar", "baz"]}'
188
189        """
190        # This is for extremely simple cases and benchmarks.
191        if isinstance(o, str):
192            if self.ensure_ascii:
193                return encode_basestring_ascii(o)
194            else:
195                return encode_basestring(o)
196        # This doesn't pass the iterator directly to ''.join() because the
197        # exceptions aren't as detailed.  The list call should be roughly
198        # equivalent to the PySequence_Fast that ''.join() would do.
199        chunks = self.iterencode(o, _one_shot=True)
200        if not isinstance(chunks, (list, tuple)):
201            chunks = list(chunks)
202        return ''.join(chunks)
203
204    def iterencode(self, o, _one_shot=False):
205        """Encode the given object and yield each string
206        representation as available.
207
208        For example::
209
210            for chunk in JSONEncoder().iterencode(bigobject):
211                mysocket.write(chunk)
212
213        """
214        if self.check_circular:
215            markers = {}
216        else:
217            markers = None
218        if self.ensure_ascii:
219            _encoder = encode_basestring_ascii
220        else:
221            _encoder = encode_basestring
222
223        def floatstr(o, allow_nan=self.allow_nan,
224                _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY):
225            # Check for specials.  Note that this type of test is processor
226            # and/or platform-specific, so do tests which don't depend on the
227            # internals.
228
229            if o != o:
230                text = 'NaN'
231            elif o == _inf:
232                text = 'Infinity'
233            elif o == _neginf:
234                text = '-Infinity'
235            else:
236                return _repr(o)
237
238            if not allow_nan:
239                raise ValueError(
240                    "Out of range float values are not JSON compliant: " +
241                    repr(o))
242
243            return text
244
245
246        if (_one_shot and c_make_encoder is not None
247                and self.indent is None):
248            _iterencode = c_make_encoder(
249                markers, self.default, _encoder, self.indent,
250                self.key_separator, self.item_separator, self.sort_keys,
251                self.skipkeys, self.allow_nan)
252        else:
253            _iterencode = _make_iterencode(
254                markers, self.default, _encoder, self.indent, floatstr,
255                self.key_separator, self.item_separator, self.sort_keys,
256                self.skipkeys, _one_shot)
257        return _iterencode(o, 0)
258
259def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
260        _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
261        ## HACK: hand-optimized bytecode; turn globals into locals
262        ValueError=ValueError,
263        dict=dict,
264        float=float,
265        id=id,
266        int=int,
267        isinstance=isinstance,
268        list=list,
269        str=str,
270        tuple=tuple,
271        _intstr=int.__repr__,
272    ):
273
274    if _indent is not None and not isinstance(_indent, str):
275        _indent = ' ' * _indent
276
277    def _iterencode_list(lst, _current_indent_level):
278        if not lst:
279            yield '[]'
280            return
281        if markers is not None:
282            markerid = id(lst)
283            if markerid in markers:
284                raise ValueError("Circular reference detected")
285            markers[markerid] = lst
286        buf = '['
287        if _indent is not None:
288            _current_indent_level += 1
289            newline_indent = '\n' + _indent * _current_indent_level
290            separator = _item_separator + newline_indent
291            buf += newline_indent
292        else:
293            newline_indent = None
294            separator = _item_separator
295        first = True
296        for value in lst:
297            if first:
298                first = False
299            else:
300                buf = separator
301            if isinstance(value, str):
302                yield buf + _encoder(value)
303            elif value is None:
304                yield buf + 'null'
305            elif value is True:
306                yield buf + 'true'
307            elif value is False:
308                yield buf + 'false'
309            elif isinstance(value, int):
310                # Subclasses of int/float may override __repr__, but we still
311                # want to encode them as integers/floats in JSON. One example
312                # within the standard library is IntEnum.
313                yield buf + _intstr(value)
314            elif isinstance(value, float):
315                # see comment above for int
316                yield buf + _floatstr(value)
317            else:
318                yield buf
319                if isinstance(value, (list, tuple)):
320                    chunks = _iterencode_list(value, _current_indent_level)
321                elif isinstance(value, dict):
322                    chunks = _iterencode_dict(value, _current_indent_level)
323                else:
324                    chunks = _iterencode(value, _current_indent_level)
325                yield from chunks
326        if newline_indent is not None:
327            _current_indent_level -= 1
328            yield '\n' + _indent * _current_indent_level
329        yield ']'
330        if markers is not None:
331            del markers[markerid]
332
333    def _iterencode_dict(dct, _current_indent_level):
334        if not dct:
335            yield '{}'
336            return
337        if markers is not None:
338            markerid = id(dct)
339            if markerid in markers:
340                raise ValueError("Circular reference detected")
341            markers[markerid] = dct
342        yield '{'
343        if _indent is not None:
344            _current_indent_level += 1
345            newline_indent = '\n' + _indent * _current_indent_level
346            item_separator = _item_separator + newline_indent
347            yield newline_indent
348        else:
349            newline_indent = None
350            item_separator = _item_separator
351        first = True
352        if _sort_keys:
353            items = sorted(dct.items())
354        else:
355            items = dct.items()
356        for key, value in items:
357            if isinstance(key, str):
358                pass
359            # JavaScript is weakly typed for these, so it makes sense to
360            # also allow them.  Many encoders seem to do something like this.
361            elif isinstance(key, float):
362                # see comment for int/float in _make_iterencode
363                key = _floatstr(key)
364            elif key is True:
365                key = 'true'
366            elif key is False:
367                key = 'false'
368            elif key is None:
369                key = 'null'
370            elif isinstance(key, int):
371                # see comment for int/float in _make_iterencode
372                key = _intstr(key)
373            elif _skipkeys:
374                continue
375            else:
376                raise TypeError(f'keys must be str, int, float, bool or None, '
377                                f'not {key.__class__.__name__}')
378            if first:
379                first = False
380            else:
381                yield item_separator
382            yield _encoder(key)
383            yield _key_separator
384            if isinstance(value, str):
385                yield _encoder(value)
386            elif value is None:
387                yield 'null'
388            elif value is True:
389                yield 'true'
390            elif value is False:
391                yield 'false'
392            elif isinstance(value, int):
393                # see comment for int/float in _make_iterencode
394                yield _intstr(value)
395            elif isinstance(value, float):
396                # see comment for int/float in _make_iterencode
397                yield _floatstr(value)
398            else:
399                if isinstance(value, (list, tuple)):
400                    chunks = _iterencode_list(value, _current_indent_level)
401                elif isinstance(value, dict):
402                    chunks = _iterencode_dict(value, _current_indent_level)
403                else:
404                    chunks = _iterencode(value, _current_indent_level)
405                yield from chunks
406        if newline_indent is not None:
407            _current_indent_level -= 1
408            yield '\n' + _indent * _current_indent_level
409        yield '}'
410        if markers is not None:
411            del markers[markerid]
412
413    def _iterencode(o, _current_indent_level):
414        if isinstance(o, str):
415            yield _encoder(o)
416        elif o is None:
417            yield 'null'
418        elif o is True:
419            yield 'true'
420        elif o is False:
421            yield 'false'
422        elif isinstance(o, int):
423            # see comment for int/float in _make_iterencode
424            yield _intstr(o)
425        elif isinstance(o, float):
426            # see comment for int/float in _make_iterencode
427            yield _floatstr(o)
428        elif isinstance(o, (list, tuple)):
429            yield from _iterencode_list(o, _current_indent_level)
430        elif isinstance(o, dict):
431            yield from _iterencode_dict(o, _current_indent_level)
432        else:
433            if markers is not None:
434                markerid = id(o)
435                if markerid in markers:
436                    raise ValueError("Circular reference detected")
437                markers[markerid] = o
438            o = _default(o)
439            yield from _iterencode(o, _current_indent_level)
440            if markers is not None:
441                del markers[markerid]
442    return _iterencode
443