1# -*- coding: koi8-r -*- 2 3import unittest 4from test.support import TESTFN, unlink, unload, rmtree, script_helper, captured_stdout 5import importlib 6import os 7import sys 8import subprocess 9import tempfile 10 11class MiscSourceEncodingTest(unittest.TestCase): 12 13 def test_pep263(self): 14 self.assertEqual( 15 "�����".encode("utf-8"), 16 b'\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd' 17 ) 18 self.assertEqual( 19 "\�".encode("utf-8"), 20 b'\\\xd0\x9f' 21 ) 22 23 def test_compilestring(self): 24 # see #1882 25 c = compile(b"\n# coding: utf-8\nu = '\xc3\xb3'\n", "dummy", "exec") 26 d = {} 27 exec(c, d) 28 self.assertEqual(d['u'], '\xf3') 29 30 def test_issue2301(self): 31 try: 32 compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec") 33 except SyntaxError as v: 34 self.assertEqual(v.text.rstrip('\n'), "print '\u5e74'") 35 else: 36 self.fail() 37 38 def test_issue4626(self): 39 c = compile("# coding=latin-1\n\u00c6 = '\u00c6'", "dummy", "exec") 40 d = {} 41 exec(c, d) 42 self.assertEqual(d['\xc6'], '\xc6') 43 44 def test_issue3297(self): 45 c = compile("a, b = '\U0001010F', '\\U0001010F'", "dummy", "exec") 46 d = {} 47 exec(c, d) 48 self.assertEqual(d['a'], d['b']) 49 self.assertEqual(len(d['a']), len(d['b'])) 50 self.assertEqual(ascii(d['a']), ascii(d['b'])) 51 52 def test_issue7820(self): 53 # Ensure that check_bom() restores all bytes in the right order if 54 # check_bom() fails in pydebug mode: a buffer starts with the first 55 # byte of a valid BOM, but next bytes are different 56 57 # one byte in common with the UTF-16-LE BOM 58 self.assertRaises(SyntaxError, eval, b'\xff\x20') 59 60 # two bytes in common with the UTF-8 BOM 61 self.assertRaises(SyntaxError, eval, b'\xef\xbb\x20') 62 63 def test_20731(self): 64 sub = subprocess.Popen([sys.executable, 65 os.path.join(os.path.dirname(__file__), 66 'coding20731.py')], 67 stderr=subprocess.PIPE) 68 err = sub.communicate()[1] 69 self.assertEqual(sub.returncode, 0) 70 self.assertNotIn(b'SyntaxError', err) 71 72 def test_error_message(self): 73 compile(b'# -*- coding: iso-8859-15 -*-\n', 'dummy', 'exec') 74 compile(b'\xef\xbb\xbf\n', 'dummy', 'exec') 75 compile(b'\xef\xbb\xbf# -*- coding: utf-8 -*-\n', 'dummy', 'exec') 76 with self.assertRaisesRegex(SyntaxError, 'fake'): 77 compile(b'# -*- coding: fake -*-\n', 'dummy', 'exec') 78 with self.assertRaisesRegex(SyntaxError, 'iso-8859-15'): 79 compile(b'\xef\xbb\xbf# -*- coding: iso-8859-15 -*-\n', 80 'dummy', 'exec') 81 with self.assertRaisesRegex(SyntaxError, 'BOM'): 82 compile(b'\xef\xbb\xbf# -*- coding: iso-8859-15 -*-\n', 83 'dummy', 'exec') 84 with self.assertRaisesRegex(SyntaxError, 'fake'): 85 compile(b'\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec') 86 with self.assertRaisesRegex(SyntaxError, 'BOM'): 87 compile(b'\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec') 88 89 def test_bad_coding(self): 90 module_name = 'bad_coding' 91 self.verify_bad_module(module_name) 92 93 def test_bad_coding2(self): 94 module_name = 'bad_coding2' 95 self.verify_bad_module(module_name) 96 97 def verify_bad_module(self, module_name): 98 self.assertRaises(SyntaxError, __import__, 'test.' + module_name) 99 100 path = os.path.dirname(__file__) 101 filename = os.path.join(path, module_name + '.py') 102 with open(filename, "rb") as fp: 103 bytes = fp.read() 104 self.assertRaises(SyntaxError, compile, bytes, filename, 'exec') 105 106 def test_exec_valid_coding(self): 107 d = {} 108 exec(b'# coding: cp949\na = "\xaa\xa7"\n', d) 109 self.assertEqual(d['a'], '\u3047') 110 111 def test_file_parse(self): 112 # issue1134: all encodings outside latin-1 and utf-8 fail on 113 # multiline strings and long lines (>512 columns) 114 unload(TESTFN) 115 filename = TESTFN + ".py" 116 f = open(filename, "w", encoding="cp1252") 117 sys.path.insert(0, os.curdir) 118 try: 119 with f: 120 f.write("# -*- coding: cp1252 -*-\n") 121 f.write("'''A short string\n") 122 f.write("'''\n") 123 f.write("'A very long string %s'\n" % ("X" * 1000)) 124 125 importlib.invalidate_caches() 126 __import__(TESTFN) 127 finally: 128 del sys.path[0] 129 unlink(filename) 130 unlink(filename + "c") 131 unlink(filename + "o") 132 unload(TESTFN) 133 rmtree('__pycache__') 134 135 def test_error_from_string(self): 136 # See http://bugs.python.org/issue6289 137 input = "# coding: ascii\n\N{SNOWMAN}".encode('utf-8') 138 with self.assertRaises(SyntaxError) as c: 139 compile(input, "<string>", "exec") 140 expected = "'ascii' codec can't decode byte 0xe2 in position 16: " \ 141 "ordinal not in range(128)" 142 self.assertTrue(c.exception.args[0].startswith(expected), 143 msg=c.exception.args[0]) 144 145 146class AbstractSourceEncodingTest: 147 148 def test_default_coding(self): 149 src = (b'print(ascii("\xc3\xa4"))\n') 150 self.check_script_output(src, br"'\xe4'") 151 152 def test_first_coding_line(self): 153 src = (b'#coding:iso8859-15\n' 154 b'print(ascii("\xc3\xa4"))\n') 155 self.check_script_output(src, br"'\xc3\u20ac'") 156 157 def test_second_coding_line(self): 158 src = (b'#\n' 159 b'#coding:iso8859-15\n' 160 b'print(ascii("\xc3\xa4"))\n') 161 self.check_script_output(src, br"'\xc3\u20ac'") 162 163 def test_third_coding_line(self): 164 # Only first two lines are tested for a magic comment. 165 src = (b'#\n' 166 b'#\n' 167 b'#coding:iso8859-15\n' 168 b'print(ascii("\xc3\xa4"))\n') 169 self.check_script_output(src, br"'\xe4'") 170 171 def test_double_coding_line(self): 172 # If the first line matches the second line is ignored. 173 src = (b'#coding:iso8859-15\n' 174 b'#coding:latin1\n' 175 b'print(ascii("\xc3\xa4"))\n') 176 self.check_script_output(src, br"'\xc3\u20ac'") 177 178 def test_double_coding_same_line(self): 179 src = (b'#coding:iso8859-15 coding:latin1\n' 180 b'print(ascii("\xc3\xa4"))\n') 181 self.check_script_output(src, br"'\xc3\u20ac'") 182 183 def test_first_non_utf8_coding_line(self): 184 src = (b'#coding:iso-8859-15 \xa4\n' 185 b'print(ascii("\xc3\xa4"))\n') 186 self.check_script_output(src, br"'\xc3\u20ac'") 187 188 def test_second_non_utf8_coding_line(self): 189 src = (b'\n' 190 b'#coding:iso-8859-15 \xa4\n' 191 b'print(ascii("\xc3\xa4"))\n') 192 self.check_script_output(src, br"'\xc3\u20ac'") 193 194 def test_utf8_bom(self): 195 src = (b'\xef\xbb\xbfprint(ascii("\xc3\xa4"))\n') 196 self.check_script_output(src, br"'\xe4'") 197 198 def test_utf8_bom_and_utf8_coding_line(self): 199 src = (b'\xef\xbb\xbf#coding:utf-8\n' 200 b'print(ascii("\xc3\xa4"))\n') 201 self.check_script_output(src, br"'\xe4'") 202 203 204class BytesSourceEncodingTest(AbstractSourceEncodingTest, unittest.TestCase): 205 206 def check_script_output(self, src, expected): 207 with captured_stdout() as stdout: 208 exec(src) 209 out = stdout.getvalue().encode('latin1') 210 self.assertEqual(out.rstrip(), expected) 211 212 213class FileSourceEncodingTest(AbstractSourceEncodingTest, unittest.TestCase): 214 215 def check_script_output(self, src, expected): 216 with tempfile.TemporaryDirectory() as tmpd: 217 fn = os.path.join(tmpd, 'test.py') 218 with open(fn, 'wb') as fp: 219 fp.write(src) 220 res = script_helper.assert_python_ok(fn) 221 self.assertEqual(res.out.rstrip(), expected) 222 223 224if __name__ == "__main__": 225 unittest.main() 226