1r"""Test correct treatment of various string literals by the parser. 2 3There are four types of string literals: 4 5 'abc' -- normal str 6 r'abc' -- raw str 7 b'xyz' -- normal bytes 8 br'xyz' | rb'xyz' -- raw bytes 9 10The difference between normal and raw strings is of course that in a 11raw string, \ escapes (while still used to determine the end of the 12literal) are not interpreted, so that r'\x00' contains four 13characters: a backslash, an x, and two zeros; while '\x00' contains a 14single character (code point zero). 15 16The tricky thing is what should happen when non-ASCII bytes are used 17inside literals. For bytes literals, this is considered illegal. But 18for str literals, those bytes are supposed to be decoded using the 19encoding declared for the file (UTF-8 by default). 20 21We have to test this with various file encodings. We also test it with 22exec()/eval(), which uses a different code path. 23 24This file is really about correct treatment of encodings and 25backslashes. It doesn't concern itself with issues like single 26vs. double quotes or singly- vs. triply-quoted strings: that's dealt 27with elsewhere (I assume). 28""" 29 30import os 31import sys 32import shutil 33import tempfile 34import unittest 35import warnings 36 37 38TEMPLATE = r"""# coding: %s 39a = 'x' 40assert ord(a) == 120 41b = '\x01' 42assert ord(b) == 1 43c = r'\x01' 44assert list(map(ord, c)) == [92, 120, 48, 49] 45d = '\x81' 46assert ord(d) == 0x81 47e = r'\x81' 48assert list(map(ord, e)) == [92, 120, 56, 49] 49f = '\u1881' 50assert ord(f) == 0x1881 51g = r'\u1881' 52assert list(map(ord, g)) == [92, 117, 49, 56, 56, 49] 53h = '\U0001d120' 54assert ord(h) == 0x1d120 55i = r'\U0001d120' 56assert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48] 57""" 58 59 60def byte(i): 61 return bytes([i]) 62 63 64class TestLiterals(unittest.TestCase): 65 66 from test.support import check_syntax_warning 67 68 def setUp(self): 69 self.save_path = sys.path[:] 70 self.tmpdir = tempfile.mkdtemp() 71 sys.path.insert(0, self.tmpdir) 72 73 def tearDown(self): 74 sys.path[:] = self.save_path 75 shutil.rmtree(self.tmpdir, ignore_errors=True) 76 77 def test_template(self): 78 # Check that the template doesn't contain any non-printables 79 # except for \n. 80 for c in TEMPLATE: 81 assert c == '\n' or ' ' <= c <= '~', repr(c) 82 83 def test_eval_str_normal(self): 84 self.assertEqual(eval(""" 'x' """), 'x') 85 self.assertEqual(eval(r""" '\x01' """), chr(1)) 86 self.assertEqual(eval(""" '\x01' """), chr(1)) 87 self.assertEqual(eval(r""" '\x81' """), chr(0x81)) 88 self.assertEqual(eval(""" '\x81' """), chr(0x81)) 89 self.assertEqual(eval(r""" '\u1881' """), chr(0x1881)) 90 self.assertEqual(eval(""" '\u1881' """), chr(0x1881)) 91 self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120)) 92 self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120)) 93 94 def test_eval_str_incomplete(self): 95 self.assertRaises(SyntaxError, eval, r""" '\x' """) 96 self.assertRaises(SyntaxError, eval, r""" '\x0' """) 97 self.assertRaises(SyntaxError, eval, r""" '\u' """) 98 self.assertRaises(SyntaxError, eval, r""" '\u0' """) 99 self.assertRaises(SyntaxError, eval, r""" '\u00' """) 100 self.assertRaises(SyntaxError, eval, r""" '\u000' """) 101 self.assertRaises(SyntaxError, eval, r""" '\U' """) 102 self.assertRaises(SyntaxError, eval, r""" '\U0' """) 103 self.assertRaises(SyntaxError, eval, r""" '\U00' """) 104 self.assertRaises(SyntaxError, eval, r""" '\U000' """) 105 self.assertRaises(SyntaxError, eval, r""" '\U0000' """) 106 self.assertRaises(SyntaxError, eval, r""" '\U00000' """) 107 self.assertRaises(SyntaxError, eval, r""" '\U000000' """) 108 self.assertRaises(SyntaxError, eval, r""" '\U0000000' """) 109 110 def test_eval_str_invalid_escape(self): 111 for b in range(1, 128): 112 if b in b"""\n\r"'01234567NU\\abfnrtuvx""": 113 continue 114 with self.assertWarns(DeprecationWarning): 115 self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b)) 116 117 with warnings.catch_warnings(record=True) as w: 118 warnings.simplefilter('always', category=DeprecationWarning) 119 eval("'''\n\\z'''") 120 self.assertEqual(len(w), 1) 121 self.assertEqual(w[0].filename, '<string>') 122 self.assertEqual(w[0].lineno, 1) 123 124 with warnings.catch_warnings(record=True) as w: 125 warnings.simplefilter('error', category=DeprecationWarning) 126 with self.assertRaises(SyntaxError) as cm: 127 eval("'''\n\\z'''") 128 exc = cm.exception 129 self.assertEqual(w, []) 130 self.assertEqual(exc.filename, '<string>') 131 self.assertEqual(exc.lineno, 1) 132 133 def test_eval_str_raw(self): 134 self.assertEqual(eval(""" r'x' """), 'x') 135 self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01') 136 self.assertEqual(eval(""" r'\x01' """), chr(1)) 137 self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81') 138 self.assertEqual(eval(""" r'\x81' """), chr(0x81)) 139 self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881') 140 self.assertEqual(eval(""" r'\u1881' """), chr(0x1881)) 141 self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120') 142 self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120)) 143 144 def test_eval_bytes_normal(self): 145 self.assertEqual(eval(""" b'x' """), b'x') 146 self.assertEqual(eval(r""" b'\x01' """), byte(1)) 147 self.assertEqual(eval(""" b'\x01' """), byte(1)) 148 self.assertEqual(eval(r""" b'\x81' """), byte(0x81)) 149 self.assertRaises(SyntaxError, eval, """ b'\x81' """) 150 self.assertEqual(eval(r""" br'\u1881' """), b'\\' + b'u1881') 151 self.assertRaises(SyntaxError, eval, """ b'\u1881' """) 152 self.assertEqual(eval(r""" br'\U0001d120' """), b'\\' + b'U0001d120') 153 self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """) 154 155 def test_eval_bytes_incomplete(self): 156 self.assertRaises(SyntaxError, eval, r""" b'\x' """) 157 self.assertRaises(SyntaxError, eval, r""" b'\x0' """) 158 159 def test_eval_bytes_invalid_escape(self): 160 for b in range(1, 128): 161 if b in b"""\n\r"'01234567\\abfnrtvx""": 162 continue 163 with self.assertWarns(DeprecationWarning): 164 self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b])) 165 166 with warnings.catch_warnings(record=True) as w: 167 warnings.simplefilter('always', category=DeprecationWarning) 168 eval("b'''\n\\z'''") 169 self.assertEqual(len(w), 1) 170 self.assertEqual(w[0].filename, '<string>') 171 self.assertEqual(w[0].lineno, 1) 172 173 with warnings.catch_warnings(record=True) as w: 174 warnings.simplefilter('error', category=DeprecationWarning) 175 with self.assertRaises(SyntaxError) as cm: 176 eval("b'''\n\\z'''") 177 exc = cm.exception 178 self.assertEqual(w, []) 179 self.assertEqual(exc.filename, '<string>') 180 self.assertEqual(exc.lineno, 1) 181 182 def test_eval_bytes_raw(self): 183 self.assertEqual(eval(""" br'x' """), b'x') 184 self.assertEqual(eval(""" rb'x' """), b'x') 185 self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01') 186 self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01') 187 self.assertEqual(eval(""" br'\x01' """), byte(1)) 188 self.assertEqual(eval(""" rb'\x01' """), byte(1)) 189 self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81") 190 self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81") 191 self.assertRaises(SyntaxError, eval, """ br'\x81' """) 192 self.assertRaises(SyntaxError, eval, """ rb'\x81' """) 193 self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881") 194 self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881") 195 self.assertRaises(SyntaxError, eval, """ br'\u1881' """) 196 self.assertRaises(SyntaxError, eval, """ rb'\u1881' """) 197 self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120") 198 self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120") 199 self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """) 200 self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """) 201 self.assertRaises(SyntaxError, eval, """ bb'' """) 202 self.assertRaises(SyntaxError, eval, """ rr'' """) 203 self.assertRaises(SyntaxError, eval, """ brr'' """) 204 self.assertRaises(SyntaxError, eval, """ bbr'' """) 205 self.assertRaises(SyntaxError, eval, """ rrb'' """) 206 self.assertRaises(SyntaxError, eval, """ rbb'' """) 207 208 def test_eval_str_u(self): 209 self.assertEqual(eval(""" u'x' """), 'x') 210 self.assertEqual(eval(""" U'\u00e4' """), 'ä') 211 self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä') 212 self.assertRaises(SyntaxError, eval, """ ur'' """) 213 self.assertRaises(SyntaxError, eval, """ ru'' """) 214 self.assertRaises(SyntaxError, eval, """ bu'' """) 215 self.assertRaises(SyntaxError, eval, """ ub'' """) 216 217 def check_encoding(self, encoding, extra=""): 218 modname = "xx_" + encoding.replace("-", "_") 219 fn = os.path.join(self.tmpdir, modname + ".py") 220 f = open(fn, "w", encoding=encoding) 221 try: 222 f.write(TEMPLATE % encoding) 223 f.write(extra) 224 finally: 225 f.close() 226 __import__(modname) 227 del sys.modules[modname] 228 229 def test_file_utf_8(self): 230 extra = "z = '\u1234'; assert ord(z) == 0x1234\n" 231 self.check_encoding("utf-8", extra) 232 233 def test_file_utf_8_error(self): 234 extra = "b'\x80'\n" 235 self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra) 236 237 def test_file_utf8(self): 238 self.check_encoding("utf-8") 239 240 def test_file_iso_8859_1(self): 241 self.check_encoding("iso-8859-1") 242 243 def test_file_latin_1(self): 244 self.check_encoding("latin-1") 245 246 def test_file_latin9(self): 247 self.check_encoding("latin9") 248 249 250if __name__ == "__main__": 251 unittest.main() 252