1# 2# test_codecencodings_cn.py 3# Codec encoding tests for PRC encodings. 4# 5 6from test import test_support 7from test import test_multibytecodec_support 8import unittest 9 10class Test_GB2312(test_multibytecodec_support.TestBase, unittest.TestCase): 11 encoding = 'gb2312' 12 tstring = test_multibytecodec_support.load_teststring('gb2312') 13 codectests = ( 14 # invalid bytes 15 ("abc\x81\x81\xc1\xc4", "strict", None), 16 ("abc\xc8", "strict", None), 17 ("abc\x81\x81\xc1\xc4", "replace", u"abc\ufffd\u804a"), 18 ("abc\x81\x81\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"), 19 ("abc\x81\x81\xc1\xc4", "ignore", u"abc\u804a"), 20 ("\xc1\x64", "strict", None), 21 ) 22 23class Test_GBK(test_multibytecodec_support.TestBase, unittest.TestCase): 24 encoding = 'gbk' 25 tstring = test_multibytecodec_support.load_teststring('gbk') 26 codectests = ( 27 # invalid bytes 28 ("abc\x80\x80\xc1\xc4", "strict", None), 29 ("abc\xc8", "strict", None), 30 ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"), 31 ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"), 32 ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"), 33 ("\x83\x34\x83\x31", "strict", None), 34 (u"\u30fb", "strict", None), 35 ) 36 37class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase): 38 encoding = 'gb18030' 39 tstring = test_multibytecodec_support.load_teststring('gb18030') 40 codectests = ( 41 # invalid bytes 42 ("abc\x80\x80\xc1\xc4", "strict", None), 43 ("abc\xc8", "strict", None), 44 ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"), 45 ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"), 46 ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"), 47 ("abc\x84\x39\x84\x39\xc1\xc4", "replace", u"abc\ufffd\u804a"), 48 (u"\u30fb", "strict", "\x819\xa79"), 49 ) 50 has_iso10646 = True 51 52class Test_HZ(test_multibytecodec_support.TestBase, unittest.TestCase): 53 encoding = 'hz' 54 tstring = test_multibytecodec_support.load_teststring('hz') 55 codectests = ( 56 # test '~\n' (3 lines) 57 (b'This sentence is in ASCII.\n' 58 b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n' 59 b'~{NpJ)l6HK!#~}Bye.\n', 60 'strict', 61 u'This sentence is in ASCII.\n' 62 u'The next sentence is in GB.' 63 u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002' 64 u'Bye.\n'), 65 # test '~\n' (4 lines) 66 (b'This sentence is in ASCII.\n' 67 b'The next sentence is in GB.~\n' 68 b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n' 69 b'Bye.\n', 70 'strict', 71 u'This sentence is in ASCII.\n' 72 u'The next sentence is in GB.' 73 u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002' 74 u'Bye.\n'), 75 # invalid bytes 76 (b'ab~cd', 'replace', u'ab\uFFFDd'), 77 (b'ab\xffcd', 'replace', u'ab\uFFFDcd'), 78 (b'ab~{\x81\x81\x41\x44~}cd', 'replace', u'ab\uFFFD\uFFFD\u804Acd'), 79 ) 80 81def test_main(): 82 test_support.run_unittest(__name__) 83 84if __name__ == "__main__": 85 test_main() 86