1# 2# test_codecencodings_cn.py 3# Codec encoding tests for PRC encodings. 4# 5 6from test import test_support 7from test import multibytecodec_support 8import unittest 9 10class Test_GB2312(multibytecodec_support.TestBase, unittest.TestCase): 11 encoding = 'gb2312' 12 tstring = multibytecodec_support.load_teststring('gb2312') 13 codectests = ( 14 # invalid bytes 15 ("abc\x81\x81\xc1\xc4", "strict", None), 16 ("abc\xc8", "strict", None), 17 ("abc\x81\x81\xc1\xc4", "replace", u"abc\ufffd\u804a"), 18 ("abc\x81\x81\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"), 19 ("abc\x81\x81\xc1\xc4", "ignore", u"abc\u804a"), 20 ("\xc1\x64", "strict", None), 21 ) 22 23class Test_GBK(multibytecodec_support.TestBase, unittest.TestCase): 24 encoding = 'gbk' 25 tstring = multibytecodec_support.load_teststring('gbk') 26 codectests = ( 27 # invalid bytes 28 ("abc\x80\x80\xc1\xc4", "strict", None), 29 ("abc\xc8", "strict", None), 30 ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"), 31 ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"), 32 ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"), 33 ("\x83\x34\x83\x31", "strict", None), 34 (u"\u30fb", "strict", None), 35 ) 36 37class Test_GB18030(multibytecodec_support.TestBase, unittest.TestCase): 38 encoding = 'gb18030' 39 tstring = multibytecodec_support.load_teststring('gb18030') 40 codectests = ( 41 # invalid bytes 42 ("abc\x80\x80\xc1\xc4", "strict", None), 43 ("abc\xc8", "strict", None), 44 ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"), 45 ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"), 46 ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u804a"), 47 ("abc\x84\x39\x84\x39\xc1\xc4", "replace", u"abc\ufffd\u804a"), 48 (u"\u30fb", "strict", "\x819\xa79"), 49 # issue29990 50 ("\xff\x30\x81\x30", "strict", None), 51 ("\x81\x30\xff\x30", "strict", None), 52 ("abc\x81\x39\xff\x39\xc1\xc4", "replace", u"abc\ufffd\u804a"), 53 ("abc\xab\x36\xff\x30def", "replace", u'abc\ufffddef'), 54 ("abc\xbf\x38\xff\x32\xc1\xc4", "ignore", u"abc\u804a"), 55 ) 56 has_iso10646 = True 57 58class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase): 59 encoding = 'hz' 60 tstring = multibytecodec_support.load_teststring('hz') 61 codectests = ( 62 # test '~\n' (3 lines) 63 (b'This sentence is in ASCII.\n' 64 b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n' 65 b'~{NpJ)l6HK!#~}Bye.\n', 66 'strict', 67 u'This sentence is in ASCII.\n' 68 u'The next sentence is in GB.' 69 u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002' 70 u'Bye.\n'), 71 # test '~\n' (4 lines) 72 (b'This sentence is in ASCII.\n' 73 b'The next sentence is in GB.~\n' 74 b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n' 75 b'Bye.\n', 76 'strict', 77 u'This sentence is in ASCII.\n' 78 u'The next sentence is in GB.' 79 u'\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002' 80 u'Bye.\n'), 81 # invalid bytes 82 (b'ab~cd', 'replace', u'ab\uFFFDd'), 83 (b'ab\xffcd', 'replace', u'ab\uFFFDcd'), 84 (b'ab~{\x81\x81\x41\x44~}cd', 'replace', u'ab\uFFFD\uFFFD\u804Acd'), 85 # issue 30003 86 (u'ab~cd', 'strict', b'ab~~cd'), # escape ~ 87 (b'~{Dc~~:C~}', 'strict', None), # ~~ only in ASCII mode 88 (b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode 89 ) 90 91def test_main(): 92 test_support.run_unittest(__name__) 93 94if __name__ == "__main__": 95 test_main() 96