1# 2# test_codecencodings_jp.py 3# Codec encoding tests for Japanese encodings. 4# 5 6from test import multibytecodec_support 7import unittest 8 9class Test_CP932(multibytecodec_support.TestBase, unittest.TestCase): 10 encoding = 'cp932' 11 tstring = multibytecodec_support.load_teststring('shift_jis') 12 codectests = ( 13 # invalid bytes 14 (b"abc\x81\x00\x81\x00\x82\x84", "strict", None), 15 (b"abc\xf8", "strict", None), 16 (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\x00\uff44"), 17 (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\x00\uff44\ufffd"), 18 (b"abc\x81\x00\x82\x84", "ignore", "abc\x00\uff44"), 19 (b"ab\xEBxy", "replace", "ab\uFFFDxy"), 20 (b"ab\xF0\x39xy", "replace", "ab\uFFFD9xy"), 21 (b"ab\xEA\xF0xy", "replace", 'ab\ufffd\ue038y'), 22 # sjis vs cp932 23 (b"\\\x7e", "replace", "\\\x7e"), 24 (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"), 25 ) 26 27euc_commontests = ( 28 # invalid bytes 29 (b"abc\x80\x80\xc1\xc4", "strict", None), 30 (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u7956"), 31 (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u7956\ufffd"), 32 (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"), 33 (b"abc\xc8", "strict", None), 34 (b"abc\x8f\x83\x83", "replace", "abc\ufffd\ufffd\ufffd"), 35 (b"\x82\xFCxy", "replace", "\ufffd\ufffdxy"), 36 (b"\xc1\x64", "strict", None), 37 (b"\xa1\xc0", "strict", "\uff3c"), 38 (b"\xa1\xc0\\", "strict", "\uff3c\\"), 39 (b"\x8eXY", "replace", "\ufffdXY"), 40) 41 42class Test_EUC_JIS_2004(multibytecodec_support.TestBase, 43 unittest.TestCase): 44 encoding = 'euc_jis_2004' 45 tstring = multibytecodec_support.load_teststring('euc_jisx0213') 46 codectests = euc_commontests 47 xmlcharnametest = ( 48 "\xab\u211c\xbb = \u2329\u1234\u232a", 49 b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩" 50 ) 51 52class Test_EUC_JISX0213(multibytecodec_support.TestBase, 53 unittest.TestCase): 54 encoding = 'euc_jisx0213' 55 tstring = multibytecodec_support.load_teststring('euc_jisx0213') 56 codectests = euc_commontests 57 xmlcharnametest = ( 58 "\xab\u211c\xbb = \u2329\u1234\u232a", 59 b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩" 60 ) 61 62class Test_EUC_JP_COMPAT(multibytecodec_support.TestBase, 63 unittest.TestCase): 64 encoding = 'euc_jp' 65 tstring = multibytecodec_support.load_teststring('euc_jp') 66 codectests = euc_commontests + ( 67 ("\xa5", "strict", b"\x5c"), 68 ("\u203e", "strict", b"\x7e"), 69 ) 70 71shiftjis_commonenctests = ( 72 (b"abc\x80\x80\x82\x84", "strict", None), 73 (b"abc\xf8", "strict", None), 74 (b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"), 75) 76 77class Test_SJIS_COMPAT(multibytecodec_support.TestBase, unittest.TestCase): 78 encoding = 'shift_jis' 79 tstring = multibytecodec_support.load_teststring('shift_jis') 80 codectests = shiftjis_commonenctests + ( 81 (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"), 82 (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"), 83 84 (b"\\\x7e", "strict", "\\\x7e"), 85 (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"), 86 (b"abc\x81\x39", "replace", "abc\ufffd9"), 87 (b"abc\xEA\xFC", "replace", "abc\ufffd\ufffd"), 88 (b"abc\xFF\x58", "replace", "abc\ufffdX"), 89 ) 90 91class Test_SJIS_2004(multibytecodec_support.TestBase, unittest.TestCase): 92 encoding = 'shift_jis_2004' 93 tstring = multibytecodec_support.load_teststring('shift_jis') 94 codectests = shiftjis_commonenctests + ( 95 (b"\\\x7e", "strict", "\xa5\u203e"), 96 (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\\\u2016\u2212"), 97 (b"abc\xEA\xFC", "strict", "abc\u64bf"), 98 (b"\x81\x39xy", "replace", "\ufffd9xy"), 99 (b"\xFF\x58xy", "replace", "\ufffdXxy"), 100 (b"\x80\x80\x82\x84xy", "replace", "\ufffd\ufffd\uff44xy"), 101 (b"\x80\x80\x82\x84\x88xy", "replace", "\ufffd\ufffd\uff44\u5864y"), 102 (b"\xFC\xFBxy", "replace", '\ufffd\u95b4y'), 103 ) 104 xmlcharnametest = ( 105 "\xab\u211c\xbb = \u2329\u1234\u232a", 106 b"\x85Gℜ\x85Q = ⟨ሴ⟩" 107 ) 108 109class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase): 110 encoding = 'shift_jisx0213' 111 tstring = multibytecodec_support.load_teststring('shift_jisx0213') 112 codectests = shiftjis_commonenctests + ( 113 (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"), 114 (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"), 115 116 # sjis vs cp932 117 (b"\\\x7e", "replace", "\xa5\u203e"), 118 (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"), 119 ) 120 xmlcharnametest = ( 121 "\xab\u211c\xbb = \u2329\u1234\u232a", 122 b"\x85Gℜ\x85Q = ⟨ሴ⟩" 123 ) 124 125if __name__ == "__main__": 126 unittest.main() 127