• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import codecs
2from collections import OrderedDict
3from test.test_json import PyTest, CTest
4
5
6class TestUnicode:
7    # test_encoding1 and test_encoding2 from 2.x are irrelevant (only str
8    # is supported as input, not bytes).
9
10    def test_encoding3(self):
11        u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
12        j = self.dumps(u)
13        self.assertEqual(j, '"\\u03b1\\u03a9"')
14
15    def test_encoding4(self):
16        u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
17        j = self.dumps([u])
18        self.assertEqual(j, '["\\u03b1\\u03a9"]')
19
20    def test_encoding5(self):
21        u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
22        j = self.dumps(u, ensure_ascii=False)
23        self.assertEqual(j, '"{0}"'.format(u))
24
25    def test_encoding6(self):
26        u = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
27        j = self.dumps([u], ensure_ascii=False)
28        self.assertEqual(j, '["{0}"]'.format(u))
29
30    def test_big_unicode_encode(self):
31        u = '\U0001d120'
32        self.assertEqual(self.dumps(u), '"\\ud834\\udd20"')
33        self.assertEqual(self.dumps(u, ensure_ascii=False), '"\U0001d120"')
34
35    def test_big_unicode_decode(self):
36        u = 'z\U0001d120x'
37        self.assertEqual(self.loads('"' + u + '"'), u)
38        self.assertEqual(self.loads('"z\\ud834\\udd20x"'), u)
39
40    def test_unicode_decode(self):
41        for i in range(0, 0xd7ff):
42            u = chr(i)
43            s = '"\\u{0:04x}"'.format(i)
44            self.assertEqual(self.loads(s), u)
45
46    def test_unicode_preservation(self):
47        self.assertEqual(type(self.loads('""')), str)
48        self.assertEqual(type(self.loads('"a"')), str)
49        self.assertEqual(type(self.loads('["a"]')[0]), str)
50
51    def test_bytes_encode(self):
52        self.assertRaises(TypeError, self.dumps, b"hi")
53        self.assertRaises(TypeError, self.dumps, [b"hi"])
54
55    def test_bytes_decode(self):
56        for encoding, bom in [
57                ('utf-8', codecs.BOM_UTF8),
58                ('utf-16be', codecs.BOM_UTF16_BE),
59                ('utf-16le', codecs.BOM_UTF16_LE),
60                ('utf-32be', codecs.BOM_UTF32_BE),
61                ('utf-32le', codecs.BOM_UTF32_LE),
62            ]:
63            data = ["a\xb5\u20ac\U0001d120"]
64            encoded = self.dumps(data).encode(encoding)
65            self.assertEqual(self.loads(bom + encoded), data)
66            self.assertEqual(self.loads(encoded), data)
67        self.assertRaises(UnicodeDecodeError, self.loads, b'["\x80"]')
68        # RFC-7159 and ECMA-404 extend JSON to allow documents that
69        # consist of only a string, which can present a special case
70        # not covered by the encoding detection patterns specified in
71        # RFC-4627 for utf-16-le (XX 00 XX 00).
72        self.assertEqual(self.loads('"\u2600"'.encode('utf-16-le')),
73                         '\u2600')
74        # Encoding detection for small (<4) bytes objects
75        # is implemented as a special case. RFC-7159 and ECMA-404
76        # allow single codepoint JSON documents which are only two
77        # bytes in utf-16 encodings w/o BOM.
78        self.assertEqual(self.loads(b'5\x00'), 5)
79        self.assertEqual(self.loads(b'\x007'), 7)
80        self.assertEqual(self.loads(b'57'), 57)
81
82    def test_object_pairs_hook_with_unicode(self):
83        s = '{"xkd":1, "kcw":2, "art":3, "hxm":4, "qrt":5, "pad":6, "hoy":7}'
84        p = [("xkd", 1), ("kcw", 2), ("art", 3), ("hxm", 4),
85             ("qrt", 5), ("pad", 6), ("hoy", 7)]
86        self.assertEqual(self.loads(s), eval(s))
87        self.assertEqual(self.loads(s, object_pairs_hook = lambda x: x), p)
88        od = self.loads(s, object_pairs_hook = OrderedDict)
89        self.assertEqual(od, OrderedDict(p))
90        self.assertEqual(type(od), OrderedDict)
91        # the object_pairs_hook takes priority over the object_hook
92        self.assertEqual(self.loads(s, object_pairs_hook = OrderedDict,
93                                    object_hook = lambda x: None),
94                         OrderedDict(p))
95
96
97class TestPyUnicode(TestUnicode, PyTest): pass
98class TestCUnicode(TestUnicode, CTest): pass
99