• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#
2# test_codecencodings_jp.py
3#   Codec encoding tests for Japanese encodings.
4#
5
6from test import multibytecodec_support
7import unittest
8
9class Test_CP932(multibytecodec_support.TestBase, unittest.TestCase):
10    encoding = 'cp932'
11    tstring = multibytecodec_support.load_teststring('shift_jis')
12    codectests = (
13        # invalid bytes
14        (b"abc\x81\x00\x81\x00\x82\x84", "strict",  None),
15        (b"abc\xf8", "strict",  None),
16        (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\x00\uff44"),
17        (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\x00\uff44\ufffd"),
18        (b"abc\x81\x00\x82\x84", "ignore",  "abc\x00\uff44"),
19        (b"ab\xEBxy", "replace", "ab\uFFFDxy"),
20        (b"ab\xF0\x39xy", "replace", "ab\uFFFD9xy"),
21        (b"ab\xEA\xF0xy", "replace", 'ab\ufffd\ue038y'),
22        # sjis vs cp932
23        (b"\\\x7e", "replace", "\\\x7e"),
24        (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"),
25    )
26
27euc_commontests = (
28    # invalid bytes
29    (b"abc\x80\x80\xc1\xc4", "strict",  None),
30    (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u7956"),
31    (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u7956\ufffd"),
32    (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u7956"),
33    (b"abc\xc8", "strict",  None),
34    (b"abc\x8f\x83\x83", "replace", "abc\ufffd\ufffd\ufffd"),
35    (b"\x82\xFCxy", "replace", "\ufffd\ufffdxy"),
36    (b"\xc1\x64", "strict", None),
37    (b"\xa1\xc0", "strict", "\uff3c"),
38    (b"\xa1\xc0\\", "strict", "\uff3c\\"),
39    (b"\x8eXY", "replace", "\ufffdXY"),
40)
41
42class Test_EUC_JIS_2004(multibytecodec_support.TestBase,
43                        unittest.TestCase):
44    encoding = 'euc_jis_2004'
45    tstring = multibytecodec_support.load_teststring('euc_jisx0213')
46    codectests = euc_commontests
47    xmlcharnametest = (
48        "\xab\u211c\xbb = \u2329\u1234\u232a",
49        b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
50    )
51
52class Test_EUC_JISX0213(multibytecodec_support.TestBase,
53                        unittest.TestCase):
54    encoding = 'euc_jisx0213'
55    tstring = multibytecodec_support.load_teststring('euc_jisx0213')
56    codectests = euc_commontests
57    xmlcharnametest = (
58        "\xab\u211c\xbb = \u2329\u1234\u232a",
59        b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩"
60    )
61
62class Test_EUC_JP_COMPAT(multibytecodec_support.TestBase,
63                         unittest.TestCase):
64    encoding = 'euc_jp'
65    tstring = multibytecodec_support.load_teststring('euc_jp')
66    codectests = euc_commontests + (
67        ("\xa5", "strict", b"\x5c"),
68        ("\u203e", "strict", b"\x7e"),
69    )
70
71shiftjis_commonenctests = (
72    (b"abc\x80\x80\x82\x84", "strict",  None),
73    (b"abc\xf8", "strict",  None),
74    (b"abc\x80\x80\x82\x84def", "ignore",  "abc\uff44def"),
75)
76
77class Test_SJIS_COMPAT(multibytecodec_support.TestBase, unittest.TestCase):
78    encoding = 'shift_jis'
79    tstring = multibytecodec_support.load_teststring('shift_jis')
80    codectests = shiftjis_commonenctests + (
81        (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
82        (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
83
84        (b"\\\x7e", "strict", "\\\x7e"),
85        (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
86        (b"abc\x81\x39", "replace",  "abc\ufffd9"),
87        (b"abc\xEA\xFC", "replace",  "abc\ufffd\ufffd"),
88        (b"abc\xFF\x58", "replace",  "abc\ufffdX"),
89    )
90
91class Test_SJIS_2004(multibytecodec_support.TestBase, unittest.TestCase):
92    encoding = 'shift_jis_2004'
93    tstring = multibytecodec_support.load_teststring('shift_jis')
94    codectests = shiftjis_commonenctests + (
95        (b"\\\x7e", "strict", "\xa5\u203e"),
96        (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\\\u2016\u2212"),
97        (b"abc\xEA\xFC", "strict",  "abc\u64bf"),
98        (b"\x81\x39xy", "replace",  "\ufffd9xy"),
99        (b"\xFF\x58xy", "replace",  "\ufffdXxy"),
100        (b"\x80\x80\x82\x84xy", "replace", "\ufffd\ufffd\uff44xy"),
101        (b"\x80\x80\x82\x84\x88xy", "replace", "\ufffd\ufffd\uff44\u5864y"),
102        (b"\xFC\xFBxy", "replace", '\ufffd\u95b4y'),
103    )
104    xmlcharnametest = (
105        "\xab\u211c\xbb = \u2329\u1234\u232a",
106        b"\x85Gℜ\x85Q = ⟨ሴ⟩"
107    )
108
109class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase):
110    encoding = 'shift_jisx0213'
111    tstring = multibytecodec_support.load_teststring('shift_jisx0213')
112    codectests = shiftjis_commonenctests + (
113        (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"),
114        (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"),
115
116        # sjis vs cp932
117        (b"\\\x7e", "replace", "\xa5\u203e"),
118        (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"),
119    )
120    xmlcharnametest = (
121        "\xab\u211c\xbb = \u2329\u1234\u232a",
122        b"\x85Gℜ\x85Q = ⟨ሴ⟩"
123    )
124
125if __name__ == "__main__":
126    unittest.main()
127