1# 2# test_codecencodings_jp.py 3# Codec encoding tests for Japanese encodings. 4# 5 6from test import test_support 7from test import test_multibytecodec_support 8import unittest 9 10class Test_CP932(test_multibytecodec_support.TestBase, unittest.TestCase): 11 encoding = 'cp932' 12 tstring = test_multibytecodec_support.load_teststring('shift_jis') 13 codectests = ( 14 # invalid bytes 15 ("abc\x81\x00\x81\x00\x82\x84", "strict", None), 16 ("abc\xf8", "strict", None), 17 ("abc\x81\x00\x82\x84", "replace", u"abc\ufffd\uff44"), 18 ("abc\x81\x00\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"), 19 ("abc\x81\x00\x82\x84", "ignore", u"abc\uff44"), 20 # sjis vs cp932 21 ("\\\x7e", "replace", u"\\\x7e"), 22 ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\uff3c\u2225\uff0d"), 23 ) 24 25class Test_EUC_JISX0213(test_multibytecodec_support.TestBase, 26 unittest.TestCase): 27 encoding = 'euc_jisx0213' 28 tstring = test_multibytecodec_support.load_teststring('euc_jisx0213') 29 codectests = ( 30 # invalid bytes 31 ("abc\x80\x80\xc1\xc4", "strict", None), 32 ("abc\xc8", "strict", None), 33 ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u7956"), 34 ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u7956\ufffd"), 35 ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u7956"), 36 ("abc\x8f\x83\x83", "replace", u"abc\ufffd"), 37 ("\xc1\x64", "strict", None), 38 ("\xa1\xc0", "strict", u"\uff3c"), 39 ) 40 xmlcharnametest = ( 41 u"\xab\u211c\xbb = \u2329\u1234\u232a", 42 "\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩" 43 ) 44 45eucjp_commontests = ( 46 ("abc\x80\x80\xc1\xc4", "strict", None), 47 ("abc\xc8", "strict", None), 48 ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u7956"), 49 ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u7956\ufffd"), 50 ("abc\x80\x80\xc1\xc4", "ignore", u"abc\u7956"), 51 ("abc\x8f\x83\x83", "replace", u"abc\ufffd"), 52 ("\xc1\x64", "strict", None), 53) 54 55class Test_EUC_JP_COMPAT(test_multibytecodec_support.TestBase, 56 unittest.TestCase): 57 encoding = 'euc_jp' 58 tstring = test_multibytecodec_support.load_teststring('euc_jp') 59 codectests = eucjp_commontests + ( 60 ("\xa1\xc0\\", "strict", u"\uff3c\\"), 61 (u"\xa5", "strict", "\x5c"), 62 (u"\u203e", "strict", "\x7e"), 63 ) 64 65shiftjis_commonenctests = ( 66 ("abc\x80\x80\x82\x84", "strict", None), 67 ("abc\xf8", "strict", None), 68 ("abc\x80\x80\x82\x84", "replace", u"abc\ufffd\uff44"), 69 ("abc\x80\x80\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"), 70 ("abc\x80\x80\x82\x84def", "ignore", u"abc\uff44def"), 71) 72 73class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase): 74 encoding = 'shift_jis' 75 tstring = test_multibytecodec_support.load_teststring('shift_jis') 76 codectests = shiftjis_commonenctests + ( 77 ("\\\x7e", "strict", u"\\\x7e"), 78 ("\x81\x5f\x81\x61\x81\x7c", "strict", u"\uff3c\u2016\u2212"), 79 ) 80 81class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase): 82 encoding = 'shift_jisx0213' 83 tstring = test_multibytecodec_support.load_teststring('shift_jisx0213') 84 codectests = ( 85 # invalid bytes 86 ("abc\x80\x80\x82\x84", "strict", None), 87 ("abc\xf8", "strict", None), 88 ("abc\x80\x80\x82\x84", "replace", u"abc\ufffd\uff44"), 89 ("abc\x80\x80\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"), 90 ("abc\x80\x80\x82\x84def", "ignore", u"abc\uff44def"), 91 # sjis vs cp932 92 ("\\\x7e", "replace", u"\xa5\u203e"), 93 ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\x5c\u2016\u2212"), 94 ) 95 xmlcharnametest = ( 96 u"\xab\u211c\xbb = \u2329\u1234\u232a", 97 "\x85Gℜ\x85Q = ⟨ሴ⟩" 98 ) 99 100def test_main(): 101 test_support.run_unittest(__name__) 102 103if __name__ == "__main__": 104 test_main() 105