• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#
2# test_codecencodings_cn.py
3#   Codec encoding tests for PRC encodings.
4#
5
6from test import multibytecodec_support
7import unittest
8
9class Test_GB2312(multibytecodec_support.TestBase, unittest.TestCase):
10    encoding = 'gb2312'
11    tstring = multibytecodec_support.load_teststring('gb2312')
12    codectests = (
13        # invalid bytes
14        (b"abc\x81\x81\xc1\xc4", "strict",  None),
15        (b"abc\xc8", "strict",  None),
16        (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
17        (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
18        (b"abc\x81\x81\xc1\xc4", "ignore",  "abc\u804a"),
19        (b"\xc1\x64", "strict", None),
20    )
21
22class Test_GBK(multibytecodec_support.TestBase, unittest.TestCase):
23    encoding = 'gbk'
24    tstring = multibytecodec_support.load_teststring('gbk')
25    codectests = (
26        # invalid bytes
27        (b"abc\x80\x80\xc1\xc4", "strict",  None),
28        (b"abc\xc8", "strict",  None),
29        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
30        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
31        (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u804a"),
32        (b"\x83\x34\x83\x31", "strict", None),
33        ("\u30fb", "strict", None),
34    )
35
36class Test_GB18030(multibytecodec_support.TestBase, unittest.TestCase):
37    encoding = 'gb18030'
38    tstring = multibytecodec_support.load_teststring('gb18030')
39    codectests = (
40        # invalid bytes
41        (b"abc\x80\x80\xc1\xc4", "strict",  None),
42        (b"abc\xc8", "strict",  None),
43        (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"),
44        (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"),
45        (b"abc\x80\x80\xc1\xc4", "ignore",  "abc\u804a"),
46        (b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd9\ufffd9\u804a"),
47        ("\u30fb", "strict", b"\x819\xa79"),
48        (b"abc\x84\x32\x80\x80def", "replace", 'abc\ufffd2\ufffd\ufffddef'),
49        (b"abc\x81\x30\x81\x30def", "strict", 'abc\x80def'),
50        (b"abc\x86\x30\x81\x30def", "replace", 'abc\ufffd0\ufffd0def'),
51        # issue29990
52        (b"\xff\x30\x81\x30", "strict", None),
53        (b"\x81\x30\xff\x30", "strict", None),
54        (b"abc\x81\x39\xff\x39\xc1\xc4", "replace", "abc\ufffd\x39\ufffd\x39\u804a"),
55        (b"abc\xab\x36\xff\x30def", "replace", 'abc\ufffd\x36\ufffd\x30def'),
56        (b"abc\xbf\x38\xff\x32\xc1\xc4", "ignore",  "abc\x38\x32\u804a"),
57    )
58    has_iso10646 = True
59
60class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase):
61    encoding = 'hz'
62    tstring = multibytecodec_support.load_teststring('hz')
63    codectests = (
64        # test '~\n' (3 lines)
65        (b'This sentence is in ASCII.\n'
66         b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n'
67         b'~{NpJ)l6HK!#~}Bye.\n',
68         'strict',
69         'This sentence is in ASCII.\n'
70         'The next sentence is in GB.'
71         '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
72         'Bye.\n'),
73        # test '~\n' (4 lines)
74        (b'This sentence is in ASCII.\n'
75         b'The next sentence is in GB.~\n'
76         b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n'
77         b'Bye.\n',
78         'strict',
79         'This sentence is in ASCII.\n'
80         'The next sentence is in GB.'
81         '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
82         'Bye.\n'),
83        # invalid bytes
84        (b'ab~cd', 'replace', 'ab\uFFFDcd'),
85        (b'ab\xffcd', 'replace', 'ab\uFFFDcd'),
86        (b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'),
87        (b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'),
88        (b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"),
89        # issue 30003
90        ('ab~cd', 'strict',  b'ab~~cd'),  # escape ~
91        (b'~{Dc~~:C~}', 'strict', None),  # ~~ only in ASCII mode
92        (b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode
93    )
94
95if __name__ == "__main__":
96    unittest.main()
97