1# -*- coding: koi8-r -*- 2 3import unittest 4from test.support import script_helper, captured_stdout 5from test.support.os_helper import TESTFN, unlink, rmtree 6from test.support.import_helper import unload 7import importlib 8import os 9import sys 10import subprocess 11import tempfile 12 13class MiscSourceEncodingTest(unittest.TestCase): 14 15 def test_pep263(self): 16 self.assertEqual( 17 "�����".encode("utf-8"), 18 b'\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd' 19 ) 20 self.assertEqual( 21 "\�".encode("utf-8"), 22 b'\\\xd0\x9f' 23 ) 24 25 def test_compilestring(self): 26 # see #1882 27 c = compile(b"\n# coding: utf-8\nu = '\xc3\xb3'\n", "dummy", "exec") 28 d = {} 29 exec(c, d) 30 self.assertEqual(d['u'], '\xf3') 31 32 def test_issue2301(self): 33 try: 34 compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec") 35 except SyntaxError as v: 36 self.assertEqual(v.text.rstrip('\n'), "print '\u5e74'") 37 else: 38 self.fail() 39 40 def test_issue4626(self): 41 c = compile("# coding=latin-1\n\u00c6 = '\u00c6'", "dummy", "exec") 42 d = {} 43 exec(c, d) 44 self.assertEqual(d['\xc6'], '\xc6') 45 46 def test_issue3297(self): 47 c = compile("a, b = '\U0001010F', '\\U0001010F'", "dummy", "exec") 48 d = {} 49 exec(c, d) 50 self.assertEqual(d['a'], d['b']) 51 self.assertEqual(len(d['a']), len(d['b'])) 52 self.assertEqual(ascii(d['a']), ascii(d['b'])) 53 54 def test_issue7820(self): 55 # Ensure that check_bom() restores all bytes in the right order if 56 # check_bom() fails in pydebug mode: a buffer starts with the first 57 # byte of a valid BOM, but next bytes are different 58 59 # one byte in common with the UTF-16-LE BOM 60 self.assertRaises(SyntaxError, eval, b'\xff\x20') 61 62 # one byte in common with the UTF-8 BOM 63 self.assertRaises(SyntaxError, eval, b'\xef\x20') 64 65 # two bytes in common with the UTF-8 BOM 66 self.assertRaises(SyntaxError, eval, b'\xef\xbb\x20') 67 68 def test_20731(self): 69 sub = subprocess.Popen([sys.executable, 70 os.path.join(os.path.dirname(__file__), 71 'coding20731.py')], 72 stderr=subprocess.PIPE) 73 err = sub.communicate()[1] 74 self.assertEqual(sub.returncode, 0) 75 self.assertNotIn(b'SyntaxError', err) 76 77 def test_error_message(self): 78 compile(b'# -*- coding: iso-8859-15 -*-\n', 'dummy', 'exec') 79 compile(b'\xef\xbb\xbf\n', 'dummy', 'exec') 80 compile(b'\xef\xbb\xbf# -*- coding: utf-8 -*-\n', 'dummy', 'exec') 81 with self.assertRaisesRegex(SyntaxError, 'fake'): 82 compile(b'# -*- coding: fake -*-\n', 'dummy', 'exec') 83 with self.assertRaisesRegex(SyntaxError, 'iso-8859-15'): 84 compile(b'\xef\xbb\xbf# -*- coding: iso-8859-15 -*-\n', 85 'dummy', 'exec') 86 with self.assertRaisesRegex(SyntaxError, 'BOM'): 87 compile(b'\xef\xbb\xbf# -*- coding: iso-8859-15 -*-\n', 88 'dummy', 'exec') 89 with self.assertRaisesRegex(SyntaxError, 'fake'): 90 compile(b'\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec') 91 with self.assertRaisesRegex(SyntaxError, 'BOM'): 92 compile(b'\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec') 93 94 def test_bad_coding(self): 95 module_name = 'bad_coding' 96 self.verify_bad_module(module_name) 97 98 def test_bad_coding2(self): 99 module_name = 'bad_coding2' 100 self.verify_bad_module(module_name) 101 102 def verify_bad_module(self, module_name): 103 self.assertRaises(SyntaxError, __import__, 'test.' + module_name) 104 105 path = os.path.dirname(__file__) 106 filename = os.path.join(path, module_name + '.py') 107 with open(filename, "rb") as fp: 108 bytes = fp.read() 109 self.assertRaises(SyntaxError, compile, bytes, filename, 'exec') 110 111 def test_exec_valid_coding(self): 112 d = {} 113 exec(b'# coding: cp949\na = "\xaa\xa7"\n', d) 114 self.assertEqual(d['a'], '\u3047') 115 116 def test_file_parse(self): 117 # issue1134: all encodings outside latin-1 and utf-8 fail on 118 # multiline strings and long lines (>512 columns) 119 unload(TESTFN) 120 filename = TESTFN + ".py" 121 f = open(filename, "w", encoding="cp1252") 122 sys.path.insert(0, os.curdir) 123 try: 124 with f: 125 f.write("# -*- coding: cp1252 -*-\n") 126 f.write("'''A short string\n") 127 f.write("'''\n") 128 f.write("'A very long string %s'\n" % ("X" * 1000)) 129 130 importlib.invalidate_caches() 131 __import__(TESTFN) 132 finally: 133 del sys.path[0] 134 unlink(filename) 135 unlink(filename + "c") 136 unlink(filename + "o") 137 unload(TESTFN) 138 rmtree('__pycache__') 139 140 def test_error_from_string(self): 141 # See http://bugs.python.org/issue6289 142 input = "# coding: ascii\n\N{SNOWMAN}".encode('utf-8') 143 with self.assertRaises(SyntaxError) as c: 144 compile(input, "<string>", "exec") 145 expected = "'ascii' codec can't decode byte 0xe2 in position 16: " \ 146 "ordinal not in range(128)" 147 self.assertTrue(c.exception.args[0].startswith(expected), 148 msg=c.exception.args[0]) 149 150 151class AbstractSourceEncodingTest: 152 153 def test_default_coding(self): 154 src = (b'print(ascii("\xc3\xa4"))\n') 155 self.check_script_output(src, br"'\xe4'") 156 157 def test_first_coding_line(self): 158 src = (b'#coding:iso8859-15\n' 159 b'print(ascii("\xc3\xa4"))\n') 160 self.check_script_output(src, br"'\xc3\u20ac'") 161 162 def test_second_coding_line(self): 163 src = (b'#\n' 164 b'#coding:iso8859-15\n' 165 b'print(ascii("\xc3\xa4"))\n') 166 self.check_script_output(src, br"'\xc3\u20ac'") 167 168 def test_third_coding_line(self): 169 # Only first two lines are tested for a magic comment. 170 src = (b'#\n' 171 b'#\n' 172 b'#coding:iso8859-15\n' 173 b'print(ascii("\xc3\xa4"))\n') 174 self.check_script_output(src, br"'\xe4'") 175 176 def test_double_coding_line(self): 177 # If the first line matches the second line is ignored. 178 src = (b'#coding:iso8859-15\n' 179 b'#coding:latin1\n' 180 b'print(ascii("\xc3\xa4"))\n') 181 self.check_script_output(src, br"'\xc3\u20ac'") 182 183 def test_double_coding_same_line(self): 184 src = (b'#coding:iso8859-15 coding:latin1\n' 185 b'print(ascii("\xc3\xa4"))\n') 186 self.check_script_output(src, br"'\xc3\u20ac'") 187 188 def test_first_non_utf8_coding_line(self): 189 src = (b'#coding:iso-8859-15 \xa4\n' 190 b'print(ascii("\xc3\xa4"))\n') 191 self.check_script_output(src, br"'\xc3\u20ac'") 192 193 def test_second_non_utf8_coding_line(self): 194 src = (b'\n' 195 b'#coding:iso-8859-15 \xa4\n' 196 b'print(ascii("\xc3\xa4"))\n') 197 self.check_script_output(src, br"'\xc3\u20ac'") 198 199 def test_utf8_bom(self): 200 src = (b'\xef\xbb\xbfprint(ascii("\xc3\xa4"))\n') 201 self.check_script_output(src, br"'\xe4'") 202 203 def test_utf8_bom_and_utf8_coding_line(self): 204 src = (b'\xef\xbb\xbf#coding:utf-8\n' 205 b'print(ascii("\xc3\xa4"))\n') 206 self.check_script_output(src, br"'\xe4'") 207 208 def test_crlf(self): 209 src = (b'print(ascii("""\r\n"""))\n') 210 out = self.check_script_output(src, br"'\n'") 211 212 def test_crcrlf(self): 213 src = (b'print(ascii("""\r\r\n"""))\n') 214 out = self.check_script_output(src, br"'\n\n'") 215 216 def test_crcrcrlf(self): 217 src = (b'print(ascii("""\r\r\r\n"""))\n') 218 out = self.check_script_output(src, br"'\n\n\n'") 219 220 def test_crcrcrlf2(self): 221 src = (b'#coding:iso-8859-1\n' 222 b'print(ascii("""\r\r\r\n"""))\n') 223 out = self.check_script_output(src, br"'\n\n\n'") 224 225 226class BytesSourceEncodingTest(AbstractSourceEncodingTest, unittest.TestCase): 227 228 def check_script_output(self, src, expected): 229 with captured_stdout() as stdout: 230 exec(src) 231 out = stdout.getvalue().encode('latin1') 232 self.assertEqual(out.rstrip(), expected) 233 234 235class FileSourceEncodingTest(AbstractSourceEncodingTest, unittest.TestCase): 236 237 def check_script_output(self, src, expected): 238 with tempfile.TemporaryDirectory() as tmpd: 239 fn = os.path.join(tmpd, 'test.py') 240 with open(fn, 'wb') as fp: 241 fp.write(src) 242 res = script_helper.assert_python_ok(fn) 243 self.assertEqual(res.out.rstrip(), expected) 244 245 246if __name__ == "__main__": 247 unittest.main() 248