• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#
2# test_codecencodings_cn.py
3#   Codec encoding tests for PRC encodings.
4#
5
6from test import test_support
7from test import multibytecodec_support
8import unittest
9
10class Test_GB2312(multibytecodec_support.TestBase, unittest.TestCase):
11    encoding = 'gb2312'
12    tstring = multibytecodec_support.load_teststring('gb2312')
13    codectests = (
14        # invalid bytes
15        ("abc\x81\x81\xc1\xc4", "strict",  None),
16        ("abc\xc8", "strict",  None),
17        ("abc\x81\x81\xc1\xc4", "replace", u"abc\ufffd\u804a"),
18        ("abc\x81\x81\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
19        ("abc\x81\x81\xc1\xc4", "ignore",  u"abc\u804a"),
20        ("\xc1\x64", "strict", None),
21    )
22
23class Test_GBK(multibytecodec_support.TestBase, unittest.TestCase):
24    encoding = 'gbk'
25    tstring = multibytecodec_support.load_teststring('gbk')
26    codectests = (
27        # invalid bytes
28        ("abc\x80\x80\xc1\xc4", "strict",  None),
29        ("abc\xc8", "strict",  None),
30        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
31        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
32        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\u804a"),
33        ("\x83\x34\x83\x31", "strict", None),
34        (u"\u30fb", "strict", None),
35    )
36
37class Test_GB18030(multibytecodec_support.TestBase, unittest.TestCase):
38    encoding = 'gb18030'
39    tstring = multibytecodec_support.load_teststring('gb18030')
40    codectests = (
41        # invalid bytes
42        ("abc\x80\x80\xc1\xc4", "strict",  None),
43        ("abc\xc8", "strict",  None),
44        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
45        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
46        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\u804a"),
47        ("abc\x84\x39\x84\x39\xc1\xc4", "replace", u"abc\ufffd\u804a"),
48        (u"\u30fb", "strict", "\x819\xa79"),
49        # issue29990
50        ("\xff\x30\x81\x30", "strict", None),
51        ("\x81\x30\xff\x30", "strict", None),
52        ("abc\x81\x39\xff\x39\xc1\xc4", "replace", u"abc\ufffd\u804a"),
53        ("abc\xab\x36\xff\x30def", "replace", u'abc\ufffddef'),
54        ("abc\xbf\x38\xff\x32\xc1\xc4", "ignore", u"abc\u804a"),
55    )
56    has_iso10646 = True
57
58class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase):
59    encoding = 'hz'
60    tstring = multibytecodec_support.load_teststring('hz')
61    codectests = (
62        # test '~\n' (3 lines)
63        (b'This sentence is in ASCII.\n'
64         b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n'
65         b'~{NpJ)l6HK!#~}Bye.\n',
66         'strict',
67         u'This sentence is in ASCII.\n'
68         u'The next sentence is in GB.'
69         u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
70         u'Bye.\n'),
71        # test '~\n' (4 lines)
72        (b'This sentence is in ASCII.\n'
73         b'The next sentence is in GB.~\n'
74         b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n'
75         b'Bye.\n',
76         'strict',
77         u'This sentence is in ASCII.\n'
78         u'The next sentence is in GB.'
79         u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002'
80         u'Bye.\n'),
81        # invalid bytes
82        (b'ab~cd', 'replace', u'ab\uFFFDd'),
83        (b'ab\xffcd', 'replace', u'ab\uFFFDcd'),
84        (b'ab~{\x81\x81\x41\x44~}cd', 'replace', u'ab\uFFFD\uFFFD\u804Acd'),
85        # issue 30003
86        (u'ab~cd', 'strict',  b'ab~~cd'), # escape ~
87        (b'~{Dc~~:C~}', 'strict', None),  # ~~ only in ASCII mode
88        (b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode
89    )
90
91def test_main():
92    test_support.run_unittest(__name__)
93
94if __name__ == "__main__":
95    test_main()
96