• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# -*- coding: koi8-r -*-
2
3import unittest
4from test.support import script_helper, captured_stdout
5from test.support.os_helper import TESTFN, unlink, rmtree
6from test.support.import_helper import unload
7import importlib
8import os
9import sys
10import subprocess
11import tempfile
12
13class MiscSourceEncodingTest(unittest.TestCase):
14
15    def test_pep263(self):
16        self.assertEqual(
17            "�����".encode("utf-8"),
18            b'\xd0\x9f\xd0\xb8\xd1\x82\xd0\xbe\xd0\xbd'
19        )
20        self.assertEqual(
21            "\�".encode("utf-8"),
22            b'\\\xd0\x9f'
23        )
24
25    def test_compilestring(self):
26        # see #1882
27        c = compile(b"\n# coding: utf-8\nu = '\xc3\xb3'\n", "dummy", "exec")
28        d = {}
29        exec(c, d)
30        self.assertEqual(d['u'], '\xf3')
31
32    def test_issue2301(self):
33        try:
34            compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec")
35        except SyntaxError as v:
36            self.assertEqual(v.text.rstrip('\n'), "print '\u5e74'")
37        else:
38            self.fail()
39
40    def test_issue4626(self):
41        c = compile("# coding=latin-1\n\u00c6 = '\u00c6'", "dummy", "exec")
42        d = {}
43        exec(c, d)
44        self.assertEqual(d['\xc6'], '\xc6')
45
46    def test_issue3297(self):
47        c = compile("a, b = '\U0001010F', '\\U0001010F'", "dummy", "exec")
48        d = {}
49        exec(c, d)
50        self.assertEqual(d['a'], d['b'])
51        self.assertEqual(len(d['a']), len(d['b']))
52        self.assertEqual(ascii(d['a']), ascii(d['b']))
53
54    def test_issue7820(self):
55        # Ensure that check_bom() restores all bytes in the right order if
56        # check_bom() fails in pydebug mode: a buffer starts with the first
57        # byte of a valid BOM, but next bytes are different
58
59        # one byte in common with the UTF-16-LE BOM
60        self.assertRaises(SyntaxError, eval, b'\xff\x20')
61
62        # one byte in common with the UTF-8 BOM
63        self.assertRaises(SyntaxError, eval, b'\xef\x20')
64
65        # two bytes in common with the UTF-8 BOM
66        self.assertRaises(SyntaxError, eval, b'\xef\xbb\x20')
67
68    def test_20731(self):
69        sub = subprocess.Popen([sys.executable,
70                        os.path.join(os.path.dirname(__file__),
71                                     'coding20731.py')],
72                        stderr=subprocess.PIPE)
73        err = sub.communicate()[1]
74        self.assertEqual(sub.returncode, 0)
75        self.assertNotIn(b'SyntaxError', err)
76
77    def test_error_message(self):
78        compile(b'# -*- coding: iso-8859-15 -*-\n', 'dummy', 'exec')
79        compile(b'\xef\xbb\xbf\n', 'dummy', 'exec')
80        compile(b'\xef\xbb\xbf# -*- coding: utf-8 -*-\n', 'dummy', 'exec')
81        with self.assertRaisesRegex(SyntaxError, 'fake'):
82            compile(b'# -*- coding: fake -*-\n', 'dummy', 'exec')
83        with self.assertRaisesRegex(SyntaxError, 'iso-8859-15'):
84            compile(b'\xef\xbb\xbf# -*- coding: iso-8859-15 -*-\n',
85                    'dummy', 'exec')
86        with self.assertRaisesRegex(SyntaxError, 'BOM'):
87            compile(b'\xef\xbb\xbf# -*- coding: iso-8859-15 -*-\n',
88                    'dummy', 'exec')
89        with self.assertRaisesRegex(SyntaxError, 'fake'):
90            compile(b'\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec')
91        with self.assertRaisesRegex(SyntaxError, 'BOM'):
92            compile(b'\xef\xbb\xbf# -*- coding: fake -*-\n', 'dummy', 'exec')
93
94    def test_bad_coding(self):
95        module_name = 'bad_coding'
96        self.verify_bad_module(module_name)
97
98    def test_bad_coding2(self):
99        module_name = 'bad_coding2'
100        self.verify_bad_module(module_name)
101
102    def verify_bad_module(self, module_name):
103        self.assertRaises(SyntaxError, __import__, 'test.' + module_name)
104
105        path = os.path.dirname(__file__)
106        filename = os.path.join(path, module_name + '.py')
107        with open(filename, "rb") as fp:
108            bytes = fp.read()
109        self.assertRaises(SyntaxError, compile, bytes, filename, 'exec')
110
111    def test_exec_valid_coding(self):
112        d = {}
113        exec(b'# coding: cp949\na = "\xaa\xa7"\n', d)
114        self.assertEqual(d['a'], '\u3047')
115
116    def test_file_parse(self):
117        # issue1134: all encodings outside latin-1 and utf-8 fail on
118        # multiline strings and long lines (>512 columns)
119        unload(TESTFN)
120        filename = TESTFN + ".py"
121        f = open(filename, "w", encoding="cp1252")
122        sys.path.insert(0, os.curdir)
123        try:
124            with f:
125                f.write("# -*- coding: cp1252 -*-\n")
126                f.write("'''A short string\n")
127                f.write("'''\n")
128                f.write("'A very long string %s'\n" % ("X" * 1000))
129
130            importlib.invalidate_caches()
131            __import__(TESTFN)
132        finally:
133            del sys.path[0]
134            unlink(filename)
135            unlink(filename + "c")
136            unlink(filename + "o")
137            unload(TESTFN)
138            rmtree('__pycache__')
139
140    def test_error_from_string(self):
141        # See http://bugs.python.org/issue6289
142        input = "# coding: ascii\n\N{SNOWMAN}".encode('utf-8')
143        with self.assertRaises(SyntaxError) as c:
144            compile(input, "<string>", "exec")
145        expected = "'ascii' codec can't decode byte 0xe2 in position 16: " \
146                   "ordinal not in range(128)"
147        self.assertTrue(c.exception.args[0].startswith(expected),
148                        msg=c.exception.args[0])
149
150
151class AbstractSourceEncodingTest:
152
153    def test_default_coding(self):
154        src = (b'print(ascii("\xc3\xa4"))\n')
155        self.check_script_output(src, br"'\xe4'")
156
157    def test_first_coding_line(self):
158        src = (b'#coding:iso8859-15\n'
159               b'print(ascii("\xc3\xa4"))\n')
160        self.check_script_output(src, br"'\xc3\u20ac'")
161
162    def test_second_coding_line(self):
163        src = (b'#\n'
164               b'#coding:iso8859-15\n'
165               b'print(ascii("\xc3\xa4"))\n')
166        self.check_script_output(src, br"'\xc3\u20ac'")
167
168    def test_third_coding_line(self):
169        # Only first two lines are tested for a magic comment.
170        src = (b'#\n'
171               b'#\n'
172               b'#coding:iso8859-15\n'
173               b'print(ascii("\xc3\xa4"))\n')
174        self.check_script_output(src, br"'\xe4'")
175
176    def test_double_coding_line(self):
177        # If the first line matches the second line is ignored.
178        src = (b'#coding:iso8859-15\n'
179               b'#coding:latin1\n'
180               b'print(ascii("\xc3\xa4"))\n')
181        self.check_script_output(src, br"'\xc3\u20ac'")
182
183    def test_double_coding_same_line(self):
184        src = (b'#coding:iso8859-15 coding:latin1\n'
185               b'print(ascii("\xc3\xa4"))\n')
186        self.check_script_output(src, br"'\xc3\u20ac'")
187
188    def test_first_non_utf8_coding_line(self):
189        src = (b'#coding:iso-8859-15 \xa4\n'
190               b'print(ascii("\xc3\xa4"))\n')
191        self.check_script_output(src, br"'\xc3\u20ac'")
192
193    def test_second_non_utf8_coding_line(self):
194        src = (b'\n'
195               b'#coding:iso-8859-15 \xa4\n'
196               b'print(ascii("\xc3\xa4"))\n')
197        self.check_script_output(src, br"'\xc3\u20ac'")
198
199    def test_utf8_bom(self):
200        src = (b'\xef\xbb\xbfprint(ascii("\xc3\xa4"))\n')
201        self.check_script_output(src, br"'\xe4'")
202
203    def test_utf8_bom_and_utf8_coding_line(self):
204        src = (b'\xef\xbb\xbf#coding:utf-8\n'
205               b'print(ascii("\xc3\xa4"))\n')
206        self.check_script_output(src, br"'\xe4'")
207
208    def test_crlf(self):
209        src = (b'print(ascii("""\r\n"""))\n')
210        out = self.check_script_output(src, br"'\n'")
211
212    def test_crcrlf(self):
213        src = (b'print(ascii("""\r\r\n"""))\n')
214        out = self.check_script_output(src, br"'\n\n'")
215
216    def test_crcrcrlf(self):
217        src = (b'print(ascii("""\r\r\r\n"""))\n')
218        out = self.check_script_output(src, br"'\n\n\n'")
219
220    def test_crcrcrlf2(self):
221        src = (b'#coding:iso-8859-1\n'
222               b'print(ascii("""\r\r\r\n"""))\n')
223        out = self.check_script_output(src, br"'\n\n\n'")
224
225
226class BytesSourceEncodingTest(AbstractSourceEncodingTest, unittest.TestCase):
227
228    def check_script_output(self, src, expected):
229        with captured_stdout() as stdout:
230            exec(src)
231        out = stdout.getvalue().encode('latin1')
232        self.assertEqual(out.rstrip(), expected)
233
234
235class FileSourceEncodingTest(AbstractSourceEncodingTest, unittest.TestCase):
236
237    def check_script_output(self, src, expected):
238        with tempfile.TemporaryDirectory() as tmpd:
239            fn = os.path.join(tmpd, 'test.py')
240            with open(fn, 'wb') as fp:
241                fp.write(src)
242            res = script_helper.assert_python_ok(fn)
243        self.assertEqual(res.out.rstrip(), expected)
244
245
246if __name__ == "__main__":
247    unittest.main()
248