1from test import support 2from test.support import os_helper 3from tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP, 4 STRING, ENDMARKER, ENCODING, tok_name, detect_encoding, 5 open as tokenize_open, Untokenizer, generate_tokens, 6 NEWLINE) 7from io import BytesIO, StringIO 8import unittest 9from unittest import TestCase, mock 10from test.test_grammar import (VALID_UNDERSCORE_LITERALS, 11 INVALID_UNDERSCORE_LITERALS) 12import os 13import token 14 15 16# Converts a source string into a list of textual representation 17# of the tokens such as: 18# ` NAME 'if' (1, 0) (1, 2)` 19# to make writing tests easier. 20def stringify_tokens_from_source(token_generator, source_string): 21 result = [] 22 num_lines = len(source_string.splitlines()) 23 missing_trailing_nl = source_string[-1] not in '\r\n' 24 25 for type, token, start, end, line in token_generator: 26 if type == ENDMARKER: 27 break 28 # Ignore the new line on the last line if the input lacks one 29 if missing_trailing_nl and type == NEWLINE and end[0] == num_lines: 30 continue 31 type = tok_name[type] 32 result.append(f" {type:10} {token!r:13} {start} {end}") 33 34 return result 35 36class TokenizeTest(TestCase): 37 # Tests for the tokenize module. 38 39 # The tests can be really simple. Given a small fragment of source 40 # code, print out a table with tokens. The ENDMARKER, ENCODING and 41 # final NEWLINE are omitted for brevity. 42 43 def check_tokenize(self, s, expected): 44 # Format the tokens in s in a table format. 45 # The ENDMARKER and final NEWLINE are omitted. 46 f = BytesIO(s.encode('utf-8')) 47 result = stringify_tokens_from_source(tokenize(f.readline), s) 48 49 self.assertEqual(result, 50 [" ENCODING 'utf-8' (0, 0) (0, 0)"] + 51 expected.rstrip().splitlines()) 52 53 def test_implicit_newline(self): 54 # Make sure that the tokenizer puts in an implicit NEWLINE 55 # when the input lacks a trailing new line. 56 f = BytesIO("x".encode('utf-8')) 57 tokens = list(tokenize(f.readline)) 58 self.assertEqual(tokens[-2].type, NEWLINE) 59 self.assertEqual(tokens[-1].type, ENDMARKER) 60 61 def test_basic(self): 62 self.check_tokenize("1 + 1", """\ 63 NUMBER '1' (1, 0) (1, 1) 64 OP '+' (1, 2) (1, 3) 65 NUMBER '1' (1, 4) (1, 5) 66 """) 67 self.check_tokenize("if False:\n" 68 " # NL\n" 69 " \n" 70 " True = False # NEWLINE\n", """\ 71 NAME 'if' (1, 0) (1, 2) 72 NAME 'False' (1, 3) (1, 8) 73 OP ':' (1, 8) (1, 9) 74 NEWLINE '\\n' (1, 9) (1, 10) 75 COMMENT '# NL' (2, 4) (2, 8) 76 NL '\\n' (2, 8) (2, 9) 77 NL '\\n' (3, 4) (3, 5) 78 INDENT ' ' (4, 0) (4, 4) 79 NAME 'True' (4, 4) (4, 8) 80 OP '=' (4, 9) (4, 10) 81 NAME 'False' (4, 11) (4, 16) 82 COMMENT '# NEWLINE' (4, 17) (4, 26) 83 NEWLINE '\\n' (4, 26) (4, 27) 84 DEDENT '' (5, 0) (5, 0) 85 """) 86 indent_error_file = b"""\ 87def k(x): 88 x += 2 89 x += 5 90""" 91 readline = BytesIO(indent_error_file).readline 92 with self.assertRaisesRegex(IndentationError, 93 "unindent does not match any " 94 "outer indentation level"): 95 for tok in tokenize(readline): 96 pass 97 98 def test_int(self): 99 # Ordinary integers and binary operators 100 self.check_tokenize("0xff <= 255", """\ 101 NUMBER '0xff' (1, 0) (1, 4) 102 OP '<=' (1, 5) (1, 7) 103 NUMBER '255' (1, 8) (1, 11) 104 """) 105 self.check_tokenize("0b10 <= 255", """\ 106 NUMBER '0b10' (1, 0) (1, 4) 107 OP '<=' (1, 5) (1, 7) 108 NUMBER '255' (1, 8) (1, 11) 109 """) 110 self.check_tokenize("0o123 <= 0O123", """\ 111 NUMBER '0o123' (1, 0) (1, 5) 112 OP '<=' (1, 6) (1, 8) 113 NUMBER '0O123' (1, 9) (1, 14) 114 """) 115 self.check_tokenize("1234567 > ~0x15", """\ 116 NUMBER '1234567' (1, 0) (1, 7) 117 OP '>' (1, 8) (1, 9) 118 OP '~' (1, 10) (1, 11) 119 NUMBER '0x15' (1, 11) (1, 15) 120 """) 121 self.check_tokenize("2134568 != 1231515", """\ 122 NUMBER '2134568' (1, 0) (1, 7) 123 OP '!=' (1, 8) (1, 10) 124 NUMBER '1231515' (1, 11) (1, 18) 125 """) 126 self.check_tokenize("(-124561-1) & 200000000", """\ 127 OP '(' (1, 0) (1, 1) 128 OP '-' (1, 1) (1, 2) 129 NUMBER '124561' (1, 2) (1, 8) 130 OP '-' (1, 8) (1, 9) 131 NUMBER '1' (1, 9) (1, 10) 132 OP ')' (1, 10) (1, 11) 133 OP '&' (1, 12) (1, 13) 134 NUMBER '200000000' (1, 14) (1, 23) 135 """) 136 self.check_tokenize("0xdeadbeef != -1", """\ 137 NUMBER '0xdeadbeef' (1, 0) (1, 10) 138 OP '!=' (1, 11) (1, 13) 139 OP '-' (1, 14) (1, 15) 140 NUMBER '1' (1, 15) (1, 16) 141 """) 142 self.check_tokenize("0xdeadc0de & 12345", """\ 143 NUMBER '0xdeadc0de' (1, 0) (1, 10) 144 OP '&' (1, 11) (1, 12) 145 NUMBER '12345' (1, 13) (1, 18) 146 """) 147 self.check_tokenize("0xFF & 0x15 | 1234", """\ 148 NUMBER '0xFF' (1, 0) (1, 4) 149 OP '&' (1, 5) (1, 6) 150 NUMBER '0x15' (1, 7) (1, 11) 151 OP '|' (1, 12) (1, 13) 152 NUMBER '1234' (1, 14) (1, 18) 153 """) 154 155 def test_long(self): 156 # Long integers 157 self.check_tokenize("x = 0", """\ 158 NAME 'x' (1, 0) (1, 1) 159 OP '=' (1, 2) (1, 3) 160 NUMBER '0' (1, 4) (1, 5) 161 """) 162 self.check_tokenize("x = 0xfffffffffff", """\ 163 NAME 'x' (1, 0) (1, 1) 164 OP '=' (1, 2) (1, 3) 165 NUMBER '0xfffffffffff' (1, 4) (1, 17) 166 """) 167 self.check_tokenize("x = 123141242151251616110", """\ 168 NAME 'x' (1, 0) (1, 1) 169 OP '=' (1, 2) (1, 3) 170 NUMBER '123141242151251616110' (1, 4) (1, 25) 171 """) 172 self.check_tokenize("x = -15921590215012591", """\ 173 NAME 'x' (1, 0) (1, 1) 174 OP '=' (1, 2) (1, 3) 175 OP '-' (1, 4) (1, 5) 176 NUMBER '15921590215012591' (1, 5) (1, 22) 177 """) 178 179 def test_float(self): 180 # Floating point numbers 181 self.check_tokenize("x = 3.14159", """\ 182 NAME 'x' (1, 0) (1, 1) 183 OP '=' (1, 2) (1, 3) 184 NUMBER '3.14159' (1, 4) (1, 11) 185 """) 186 self.check_tokenize("x = 314159.", """\ 187 NAME 'x' (1, 0) (1, 1) 188 OP '=' (1, 2) (1, 3) 189 NUMBER '314159.' (1, 4) (1, 11) 190 """) 191 self.check_tokenize("x = .314159", """\ 192 NAME 'x' (1, 0) (1, 1) 193 OP '=' (1, 2) (1, 3) 194 NUMBER '.314159' (1, 4) (1, 11) 195 """) 196 self.check_tokenize("x = 3e14159", """\ 197 NAME 'x' (1, 0) (1, 1) 198 OP '=' (1, 2) (1, 3) 199 NUMBER '3e14159' (1, 4) (1, 11) 200 """) 201 self.check_tokenize("x = 3E123", """\ 202 NAME 'x' (1, 0) (1, 1) 203 OP '=' (1, 2) (1, 3) 204 NUMBER '3E123' (1, 4) (1, 9) 205 """) 206 self.check_tokenize("x+y = 3e-1230", """\ 207 NAME 'x' (1, 0) (1, 1) 208 OP '+' (1, 1) (1, 2) 209 NAME 'y' (1, 2) (1, 3) 210 OP '=' (1, 4) (1, 5) 211 NUMBER '3e-1230' (1, 6) (1, 13) 212 """) 213 self.check_tokenize("x = 3.14e159", """\ 214 NAME 'x' (1, 0) (1, 1) 215 OP '=' (1, 2) (1, 3) 216 NUMBER '3.14e159' (1, 4) (1, 12) 217 """) 218 219 def test_underscore_literals(self): 220 def number_token(s): 221 f = BytesIO(s.encode('utf-8')) 222 for toktype, token, start, end, line in tokenize(f.readline): 223 if toktype == NUMBER: 224 return token 225 return 'invalid token' 226 for lit in VALID_UNDERSCORE_LITERALS: 227 if '(' in lit: 228 # this won't work with compound complex inputs 229 continue 230 self.assertEqual(number_token(lit), lit) 231 for lit in INVALID_UNDERSCORE_LITERALS: 232 self.assertNotEqual(number_token(lit), lit) 233 234 def test_string(self): 235 # String literals 236 self.check_tokenize("x = ''; y = \"\"", """\ 237 NAME 'x' (1, 0) (1, 1) 238 OP '=' (1, 2) (1, 3) 239 STRING "''" (1, 4) (1, 6) 240 OP ';' (1, 6) (1, 7) 241 NAME 'y' (1, 8) (1, 9) 242 OP '=' (1, 10) (1, 11) 243 STRING '""' (1, 12) (1, 14) 244 """) 245 self.check_tokenize("x = '\"'; y = \"'\"", """\ 246 NAME 'x' (1, 0) (1, 1) 247 OP '=' (1, 2) (1, 3) 248 STRING '\\'"\\'' (1, 4) (1, 7) 249 OP ';' (1, 7) (1, 8) 250 NAME 'y' (1, 9) (1, 10) 251 OP '=' (1, 11) (1, 12) 252 STRING '"\\'"' (1, 13) (1, 16) 253 """) 254 self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\ 255 NAME 'x' (1, 0) (1, 1) 256 OP '=' (1, 2) (1, 3) 257 STRING '"doesn\\'t "' (1, 4) (1, 14) 258 NAME 'shrink' (1, 14) (1, 20) 259 STRING '", does it"' (1, 20) (1, 31) 260 """) 261 self.check_tokenize("x = 'abc' + 'ABC'", """\ 262 NAME 'x' (1, 0) (1, 1) 263 OP '=' (1, 2) (1, 3) 264 STRING "'abc'" (1, 4) (1, 9) 265 OP '+' (1, 10) (1, 11) 266 STRING "'ABC'" (1, 12) (1, 17) 267 """) 268 self.check_tokenize('y = "ABC" + "ABC"', """\ 269 NAME 'y' (1, 0) (1, 1) 270 OP '=' (1, 2) (1, 3) 271 STRING '"ABC"' (1, 4) (1, 9) 272 OP '+' (1, 10) (1, 11) 273 STRING '"ABC"' (1, 12) (1, 17) 274 """) 275 self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\ 276 NAME 'x' (1, 0) (1, 1) 277 OP '=' (1, 2) (1, 3) 278 STRING "r'abc'" (1, 4) (1, 10) 279 OP '+' (1, 11) (1, 12) 280 STRING "r'ABC'" (1, 13) (1, 19) 281 OP '+' (1, 20) (1, 21) 282 STRING "R'ABC'" (1, 22) (1, 28) 283 OP '+' (1, 29) (1, 30) 284 STRING "R'ABC'" (1, 31) (1, 37) 285 """) 286 self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\ 287 NAME 'y' (1, 0) (1, 1) 288 OP '=' (1, 2) (1, 3) 289 STRING 'r"abc"' (1, 4) (1, 10) 290 OP '+' (1, 11) (1, 12) 291 STRING 'r"ABC"' (1, 13) (1, 19) 292 OP '+' (1, 20) (1, 21) 293 STRING 'R"ABC"' (1, 22) (1, 28) 294 OP '+' (1, 29) (1, 30) 295 STRING 'R"ABC"' (1, 31) (1, 37) 296 """) 297 298 self.check_tokenize("u'abc' + U'abc'", """\ 299 STRING "u'abc'" (1, 0) (1, 6) 300 OP '+' (1, 7) (1, 8) 301 STRING "U'abc'" (1, 9) (1, 15) 302 """) 303 self.check_tokenize('u"abc" + U"abc"', """\ 304 STRING 'u"abc"' (1, 0) (1, 6) 305 OP '+' (1, 7) (1, 8) 306 STRING 'U"abc"' (1, 9) (1, 15) 307 """) 308 309 self.check_tokenize("b'abc' + B'abc'", """\ 310 STRING "b'abc'" (1, 0) (1, 6) 311 OP '+' (1, 7) (1, 8) 312 STRING "B'abc'" (1, 9) (1, 15) 313 """) 314 self.check_tokenize('b"abc" + B"abc"', """\ 315 STRING 'b"abc"' (1, 0) (1, 6) 316 OP '+' (1, 7) (1, 8) 317 STRING 'B"abc"' (1, 9) (1, 15) 318 """) 319 self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\ 320 STRING "br'abc'" (1, 0) (1, 7) 321 OP '+' (1, 8) (1, 9) 322 STRING "bR'abc'" (1, 10) (1, 17) 323 OP '+' (1, 18) (1, 19) 324 STRING "Br'abc'" (1, 20) (1, 27) 325 OP '+' (1, 28) (1, 29) 326 STRING "BR'abc'" (1, 30) (1, 37) 327 """) 328 self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\ 329 STRING 'br"abc"' (1, 0) (1, 7) 330 OP '+' (1, 8) (1, 9) 331 STRING 'bR"abc"' (1, 10) (1, 17) 332 OP '+' (1, 18) (1, 19) 333 STRING 'Br"abc"' (1, 20) (1, 27) 334 OP '+' (1, 28) (1, 29) 335 STRING 'BR"abc"' (1, 30) (1, 37) 336 """) 337 self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\ 338 STRING "rb'abc'" (1, 0) (1, 7) 339 OP '+' (1, 8) (1, 9) 340 STRING "rB'abc'" (1, 10) (1, 17) 341 OP '+' (1, 18) (1, 19) 342 STRING "Rb'abc'" (1, 20) (1, 27) 343 OP '+' (1, 28) (1, 29) 344 STRING "RB'abc'" (1, 30) (1, 37) 345 """) 346 self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\ 347 STRING 'rb"abc"' (1, 0) (1, 7) 348 OP '+' (1, 8) (1, 9) 349 STRING 'rB"abc"' (1, 10) (1, 17) 350 OP '+' (1, 18) (1, 19) 351 STRING 'Rb"abc"' (1, 20) (1, 27) 352 OP '+' (1, 28) (1, 29) 353 STRING 'RB"abc"' (1, 30) (1, 37) 354 """) 355 # Check 0, 1, and 2 character string prefixes. 356 self.check_tokenize(r'"a\ 357de\ 358fg"', """\ 359 STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3) 360 """) 361 self.check_tokenize(r'u"a\ 362de"', """\ 363 STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3) 364 """) 365 self.check_tokenize(r'rb"a\ 366d"', """\ 367 STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2) 368 """) 369 self.check_tokenize(r'"""a\ 370b"""', """\ 371 STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4) 372 """) 373 self.check_tokenize(r'u"""a\ 374b"""', """\ 375 STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4) 376 """) 377 self.check_tokenize(r'rb"""a\ 378b\ 379c"""', """\ 380 STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4) 381 """) 382 self.check_tokenize('f"abc"', """\ 383 STRING 'f"abc"' (1, 0) (1, 6) 384 """) 385 self.check_tokenize('fR"a{b}c"', """\ 386 STRING 'fR"a{b}c"' (1, 0) (1, 9) 387 """) 388 self.check_tokenize('f"""abc"""', """\ 389 STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10) 390 """) 391 self.check_tokenize(r'f"abc\ 392def"', """\ 393 STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4) 394 """) 395 self.check_tokenize(r'Rf"abc\ 396def"', """\ 397 STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4) 398 """) 399 400 def test_function(self): 401 self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\ 402 NAME 'def' (1, 0) (1, 3) 403 NAME 'd22' (1, 4) (1, 7) 404 OP '(' (1, 7) (1, 8) 405 NAME 'a' (1, 8) (1, 9) 406 OP ',' (1, 9) (1, 10) 407 NAME 'b' (1, 11) (1, 12) 408 OP ',' (1, 12) (1, 13) 409 NAME 'c' (1, 14) (1, 15) 410 OP '=' (1, 15) (1, 16) 411 NUMBER '2' (1, 16) (1, 17) 412 OP ',' (1, 17) (1, 18) 413 NAME 'd' (1, 19) (1, 20) 414 OP '=' (1, 20) (1, 21) 415 NUMBER '2' (1, 21) (1, 22) 416 OP ',' (1, 22) (1, 23) 417 OP '*' (1, 24) (1, 25) 418 NAME 'k' (1, 25) (1, 26) 419 OP ')' (1, 26) (1, 27) 420 OP ':' (1, 27) (1, 28) 421 NAME 'pass' (1, 29) (1, 33) 422 """) 423 self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\ 424 NAME 'def' (1, 0) (1, 3) 425 NAME 'd01v_' (1, 4) (1, 9) 426 OP '(' (1, 9) (1, 10) 427 NAME 'a' (1, 10) (1, 11) 428 OP '=' (1, 11) (1, 12) 429 NUMBER '1' (1, 12) (1, 13) 430 OP ',' (1, 13) (1, 14) 431 OP '*' (1, 15) (1, 16) 432 NAME 'k' (1, 16) (1, 17) 433 OP ',' (1, 17) (1, 18) 434 OP '**' (1, 19) (1, 21) 435 NAME 'w' (1, 21) (1, 22) 436 OP ')' (1, 22) (1, 23) 437 OP ':' (1, 23) (1, 24) 438 NAME 'pass' (1, 25) (1, 29) 439 """) 440 self.check_tokenize("def d23(a: str, b: int=3) -> int: pass", """\ 441 NAME 'def' (1, 0) (1, 3) 442 NAME 'd23' (1, 4) (1, 7) 443 OP '(' (1, 7) (1, 8) 444 NAME 'a' (1, 8) (1, 9) 445 OP ':' (1, 9) (1, 10) 446 NAME 'str' (1, 11) (1, 14) 447 OP ',' (1, 14) (1, 15) 448 NAME 'b' (1, 16) (1, 17) 449 OP ':' (1, 17) (1, 18) 450 NAME 'int' (1, 19) (1, 22) 451 OP '=' (1, 22) (1, 23) 452 NUMBER '3' (1, 23) (1, 24) 453 OP ')' (1, 24) (1, 25) 454 OP '->' (1, 26) (1, 28) 455 NAME 'int' (1, 29) (1, 32) 456 OP ':' (1, 32) (1, 33) 457 NAME 'pass' (1, 34) (1, 38) 458 """) 459 460 def test_comparison(self): 461 # Comparison 462 self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " 463 "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\ 464 NAME 'if' (1, 0) (1, 2) 465 NUMBER '1' (1, 3) (1, 4) 466 OP '<' (1, 5) (1, 6) 467 NUMBER '1' (1, 7) (1, 8) 468 OP '>' (1, 9) (1, 10) 469 NUMBER '1' (1, 11) (1, 12) 470 OP '==' (1, 13) (1, 15) 471 NUMBER '1' (1, 16) (1, 17) 472 OP '>=' (1, 18) (1, 20) 473 NUMBER '5' (1, 21) (1, 22) 474 OP '<=' (1, 23) (1, 25) 475 NUMBER '0x15' (1, 26) (1, 30) 476 OP '<=' (1, 31) (1, 33) 477 NUMBER '0x12' (1, 34) (1, 38) 478 OP '!=' (1, 39) (1, 41) 479 NUMBER '1' (1, 42) (1, 43) 480 NAME 'and' (1, 44) (1, 47) 481 NUMBER '5' (1, 48) (1, 49) 482 NAME 'in' (1, 50) (1, 52) 483 NUMBER '1' (1, 53) (1, 54) 484 NAME 'not' (1, 55) (1, 58) 485 NAME 'in' (1, 59) (1, 61) 486 NUMBER '1' (1, 62) (1, 63) 487 NAME 'is' (1, 64) (1, 66) 488 NUMBER '1' (1, 67) (1, 68) 489 NAME 'or' (1, 69) (1, 71) 490 NUMBER '5' (1, 72) (1, 73) 491 NAME 'is' (1, 74) (1, 76) 492 NAME 'not' (1, 77) (1, 80) 493 NUMBER '1' (1, 81) (1, 82) 494 OP ':' (1, 82) (1, 83) 495 NAME 'pass' (1, 84) (1, 88) 496 """) 497 498 def test_shift(self): 499 # Shift 500 self.check_tokenize("x = 1 << 1 >> 5", """\ 501 NAME 'x' (1, 0) (1, 1) 502 OP '=' (1, 2) (1, 3) 503 NUMBER '1' (1, 4) (1, 5) 504 OP '<<' (1, 6) (1, 8) 505 NUMBER '1' (1, 9) (1, 10) 506 OP '>>' (1, 11) (1, 13) 507 NUMBER '5' (1, 14) (1, 15) 508 """) 509 510 def test_additive(self): 511 # Additive 512 self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\ 513 NAME 'x' (1, 0) (1, 1) 514 OP '=' (1, 2) (1, 3) 515 NUMBER '1' (1, 4) (1, 5) 516 OP '-' (1, 6) (1, 7) 517 NAME 'y' (1, 8) (1, 9) 518 OP '+' (1, 10) (1, 11) 519 NUMBER '15' (1, 12) (1, 14) 520 OP '-' (1, 15) (1, 16) 521 NUMBER '1' (1, 17) (1, 18) 522 OP '+' (1, 19) (1, 20) 523 NUMBER '0x124' (1, 21) (1, 26) 524 OP '+' (1, 27) (1, 28) 525 NAME 'z' (1, 29) (1, 30) 526 OP '+' (1, 31) (1, 32) 527 NAME 'a' (1, 33) (1, 34) 528 OP '[' (1, 34) (1, 35) 529 NUMBER '5' (1, 35) (1, 36) 530 OP ']' (1, 36) (1, 37) 531 """) 532 533 def test_multiplicative(self): 534 # Multiplicative 535 self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\ 536 NAME 'x' (1, 0) (1, 1) 537 OP '=' (1, 2) (1, 3) 538 NUMBER '1' (1, 4) (1, 5) 539 OP '//' (1, 5) (1, 7) 540 NUMBER '1' (1, 7) (1, 8) 541 OP '*' (1, 8) (1, 9) 542 NUMBER '1' (1, 9) (1, 10) 543 OP '/' (1, 10) (1, 11) 544 NUMBER '5' (1, 11) (1, 12) 545 OP '*' (1, 12) (1, 13) 546 NUMBER '12' (1, 13) (1, 15) 547 OP '%' (1, 15) (1, 16) 548 NUMBER '0x12' (1, 16) (1, 20) 549 OP '@' (1, 20) (1, 21) 550 NUMBER '42' (1, 21) (1, 23) 551 """) 552 553 def test_unary(self): 554 # Unary 555 self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\ 556 OP '~' (1, 0) (1, 1) 557 NUMBER '1' (1, 1) (1, 2) 558 OP '^' (1, 3) (1, 4) 559 NUMBER '1' (1, 5) (1, 6) 560 OP '&' (1, 7) (1, 8) 561 NUMBER '1' (1, 9) (1, 10) 562 OP '|' (1, 11) (1, 12) 563 NUMBER '1' (1, 12) (1, 13) 564 OP '^' (1, 14) (1, 15) 565 OP '-' (1, 16) (1, 17) 566 NUMBER '1' (1, 17) (1, 18) 567 """) 568 self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\ 569 OP '-' (1, 0) (1, 1) 570 NUMBER '1' (1, 1) (1, 2) 571 OP '*' (1, 2) (1, 3) 572 NUMBER '1' (1, 3) (1, 4) 573 OP '/' (1, 4) (1, 5) 574 NUMBER '1' (1, 5) (1, 6) 575 OP '+' (1, 6) (1, 7) 576 NUMBER '1' (1, 7) (1, 8) 577 OP '*' (1, 8) (1, 9) 578 NUMBER '1' (1, 9) (1, 10) 579 OP '//' (1, 10) (1, 12) 580 NUMBER '1' (1, 12) (1, 13) 581 OP '-' (1, 14) (1, 15) 582 OP '-' (1, 16) (1, 17) 583 OP '-' (1, 17) (1, 18) 584 OP '-' (1, 18) (1, 19) 585 NUMBER '1' (1, 19) (1, 20) 586 OP '**' (1, 20) (1, 22) 587 NUMBER '1' (1, 22) (1, 23) 588 """) 589 590 def test_selector(self): 591 # Selector 592 self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\ 593 NAME 'import' (1, 0) (1, 6) 594 NAME 'sys' (1, 7) (1, 10) 595 OP ',' (1, 10) (1, 11) 596 NAME 'time' (1, 12) (1, 16) 597 NEWLINE '\\n' (1, 16) (1, 17) 598 NAME 'x' (2, 0) (2, 1) 599 OP '=' (2, 2) (2, 3) 600 NAME 'sys' (2, 4) (2, 7) 601 OP '.' (2, 7) (2, 8) 602 NAME 'modules' (2, 8) (2, 15) 603 OP '[' (2, 15) (2, 16) 604 STRING "'time'" (2, 16) (2, 22) 605 OP ']' (2, 22) (2, 23) 606 OP '.' (2, 23) (2, 24) 607 NAME 'time' (2, 24) (2, 28) 608 OP '(' (2, 28) (2, 29) 609 OP ')' (2, 29) (2, 30) 610 """) 611 612 def test_method(self): 613 # Methods 614 self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\ 615 OP '@' (1, 0) (1, 1) 616 NAME 'staticmethod' (1, 1) (1, 13) 617 NEWLINE '\\n' (1, 13) (1, 14) 618 NAME 'def' (2, 0) (2, 3) 619 NAME 'foo' (2, 4) (2, 7) 620 OP '(' (2, 7) (2, 8) 621 NAME 'x' (2, 8) (2, 9) 622 OP ',' (2, 9) (2, 10) 623 NAME 'y' (2, 10) (2, 11) 624 OP ')' (2, 11) (2, 12) 625 OP ':' (2, 12) (2, 13) 626 NAME 'pass' (2, 14) (2, 18) 627 """) 628 629 def test_tabs(self): 630 # Evil tabs 631 self.check_tokenize("def f():\n" 632 "\tif x\n" 633 " \tpass", """\ 634 NAME 'def' (1, 0) (1, 3) 635 NAME 'f' (1, 4) (1, 5) 636 OP '(' (1, 5) (1, 6) 637 OP ')' (1, 6) (1, 7) 638 OP ':' (1, 7) (1, 8) 639 NEWLINE '\\n' (1, 8) (1, 9) 640 INDENT '\\t' (2, 0) (2, 1) 641 NAME 'if' (2, 1) (2, 3) 642 NAME 'x' (2, 4) (2, 5) 643 NEWLINE '\\n' (2, 5) (2, 6) 644 INDENT ' \\t' (3, 0) (3, 9) 645 NAME 'pass' (3, 9) (3, 13) 646 DEDENT '' (4, 0) (4, 0) 647 DEDENT '' (4, 0) (4, 0) 648 """) 649 650 def test_non_ascii_identifiers(self): 651 # Non-ascii identifiers 652 self.check_tokenize("Örter = 'places'\ngrün = 'green'", """\ 653 NAME 'Örter' (1, 0) (1, 5) 654 OP '=' (1, 6) (1, 7) 655 STRING "'places'" (1, 8) (1, 16) 656 NEWLINE '\\n' (1, 16) (1, 17) 657 NAME 'grün' (2, 0) (2, 4) 658 OP '=' (2, 5) (2, 6) 659 STRING "'green'" (2, 7) (2, 14) 660 """) 661 662 def test_unicode(self): 663 # Legacy unicode literals: 664 self.check_tokenize("Örter = u'places'\ngrün = U'green'", """\ 665 NAME 'Örter' (1, 0) (1, 5) 666 OP '=' (1, 6) (1, 7) 667 STRING "u'places'" (1, 8) (1, 17) 668 NEWLINE '\\n' (1, 17) (1, 18) 669 NAME 'grün' (2, 0) (2, 4) 670 OP '=' (2, 5) (2, 6) 671 STRING "U'green'" (2, 7) (2, 15) 672 """) 673 674 def test_async(self): 675 # Async/await extension: 676 self.check_tokenize("async = 1", """\ 677 NAME 'async' (1, 0) (1, 5) 678 OP '=' (1, 6) (1, 7) 679 NUMBER '1' (1, 8) (1, 9) 680 """) 681 682 self.check_tokenize("a = (async = 1)", """\ 683 NAME 'a' (1, 0) (1, 1) 684 OP '=' (1, 2) (1, 3) 685 OP '(' (1, 4) (1, 5) 686 NAME 'async' (1, 5) (1, 10) 687 OP '=' (1, 11) (1, 12) 688 NUMBER '1' (1, 13) (1, 14) 689 OP ')' (1, 14) (1, 15) 690 """) 691 692 self.check_tokenize("async()", """\ 693 NAME 'async' (1, 0) (1, 5) 694 OP '(' (1, 5) (1, 6) 695 OP ')' (1, 6) (1, 7) 696 """) 697 698 self.check_tokenize("class async(Bar):pass", """\ 699 NAME 'class' (1, 0) (1, 5) 700 NAME 'async' (1, 6) (1, 11) 701 OP '(' (1, 11) (1, 12) 702 NAME 'Bar' (1, 12) (1, 15) 703 OP ')' (1, 15) (1, 16) 704 OP ':' (1, 16) (1, 17) 705 NAME 'pass' (1, 17) (1, 21) 706 """) 707 708 self.check_tokenize("class async:pass", """\ 709 NAME 'class' (1, 0) (1, 5) 710 NAME 'async' (1, 6) (1, 11) 711 OP ':' (1, 11) (1, 12) 712 NAME 'pass' (1, 12) (1, 16) 713 """) 714 715 self.check_tokenize("await = 1", """\ 716 NAME 'await' (1, 0) (1, 5) 717 OP '=' (1, 6) (1, 7) 718 NUMBER '1' (1, 8) (1, 9) 719 """) 720 721 self.check_tokenize("foo.async", """\ 722 NAME 'foo' (1, 0) (1, 3) 723 OP '.' (1, 3) (1, 4) 724 NAME 'async' (1, 4) (1, 9) 725 """) 726 727 self.check_tokenize("async for a in b: pass", """\ 728 NAME 'async' (1, 0) (1, 5) 729 NAME 'for' (1, 6) (1, 9) 730 NAME 'a' (1, 10) (1, 11) 731 NAME 'in' (1, 12) (1, 14) 732 NAME 'b' (1, 15) (1, 16) 733 OP ':' (1, 16) (1, 17) 734 NAME 'pass' (1, 18) (1, 22) 735 """) 736 737 self.check_tokenize("async with a as b: pass", """\ 738 NAME 'async' (1, 0) (1, 5) 739 NAME 'with' (1, 6) (1, 10) 740 NAME 'a' (1, 11) (1, 12) 741 NAME 'as' (1, 13) (1, 15) 742 NAME 'b' (1, 16) (1, 17) 743 OP ':' (1, 17) (1, 18) 744 NAME 'pass' (1, 19) (1, 23) 745 """) 746 747 self.check_tokenize("async.foo", """\ 748 NAME 'async' (1, 0) (1, 5) 749 OP '.' (1, 5) (1, 6) 750 NAME 'foo' (1, 6) (1, 9) 751 """) 752 753 self.check_tokenize("async", """\ 754 NAME 'async' (1, 0) (1, 5) 755 """) 756 757 self.check_tokenize("async\n#comment\nawait", """\ 758 NAME 'async' (1, 0) (1, 5) 759 NEWLINE '\\n' (1, 5) (1, 6) 760 COMMENT '#comment' (2, 0) (2, 8) 761 NL '\\n' (2, 8) (2, 9) 762 NAME 'await' (3, 0) (3, 5) 763 """) 764 765 self.check_tokenize("async\n...\nawait", """\ 766 NAME 'async' (1, 0) (1, 5) 767 NEWLINE '\\n' (1, 5) (1, 6) 768 OP '...' (2, 0) (2, 3) 769 NEWLINE '\\n' (2, 3) (2, 4) 770 NAME 'await' (3, 0) (3, 5) 771 """) 772 773 self.check_tokenize("async\nawait", """\ 774 NAME 'async' (1, 0) (1, 5) 775 NEWLINE '\\n' (1, 5) (1, 6) 776 NAME 'await' (2, 0) (2, 5) 777 """) 778 779 self.check_tokenize("foo.async + 1", """\ 780 NAME 'foo' (1, 0) (1, 3) 781 OP '.' (1, 3) (1, 4) 782 NAME 'async' (1, 4) (1, 9) 783 OP '+' (1, 10) (1, 11) 784 NUMBER '1' (1, 12) (1, 13) 785 """) 786 787 self.check_tokenize("async def foo(): pass", """\ 788 NAME 'async' (1, 0) (1, 5) 789 NAME 'def' (1, 6) (1, 9) 790 NAME 'foo' (1, 10) (1, 13) 791 OP '(' (1, 13) (1, 14) 792 OP ')' (1, 14) (1, 15) 793 OP ':' (1, 15) (1, 16) 794 NAME 'pass' (1, 17) (1, 21) 795 """) 796 797 self.check_tokenize('''\ 798async def foo(): 799 def foo(await): 800 await = 1 801 if 1: 802 await 803async += 1 804''', """\ 805 NAME 'async' (1, 0) (1, 5) 806 NAME 'def' (1, 6) (1, 9) 807 NAME 'foo' (1, 10) (1, 13) 808 OP '(' (1, 13) (1, 14) 809 OP ')' (1, 14) (1, 15) 810 OP ':' (1, 15) (1, 16) 811 NEWLINE '\\n' (1, 16) (1, 17) 812 INDENT ' ' (2, 0) (2, 2) 813 NAME 'def' (2, 2) (2, 5) 814 NAME 'foo' (2, 6) (2, 9) 815 OP '(' (2, 9) (2, 10) 816 NAME 'await' (2, 10) (2, 15) 817 OP ')' (2, 15) (2, 16) 818 OP ':' (2, 16) (2, 17) 819 NEWLINE '\\n' (2, 17) (2, 18) 820 INDENT ' ' (3, 0) (3, 4) 821 NAME 'await' (3, 4) (3, 9) 822 OP '=' (3, 10) (3, 11) 823 NUMBER '1' (3, 12) (3, 13) 824 NEWLINE '\\n' (3, 13) (3, 14) 825 DEDENT '' (4, 2) (4, 2) 826 NAME 'if' (4, 2) (4, 4) 827 NUMBER '1' (4, 5) (4, 6) 828 OP ':' (4, 6) (4, 7) 829 NEWLINE '\\n' (4, 7) (4, 8) 830 INDENT ' ' (5, 0) (5, 4) 831 NAME 'await' (5, 4) (5, 9) 832 NEWLINE '\\n' (5, 9) (5, 10) 833 DEDENT '' (6, 0) (6, 0) 834 DEDENT '' (6, 0) (6, 0) 835 NAME 'async' (6, 0) (6, 5) 836 OP '+=' (6, 6) (6, 8) 837 NUMBER '1' (6, 9) (6, 10) 838 NEWLINE '\\n' (6, 10) (6, 11) 839 """) 840 841 self.check_tokenize('''\ 842async def foo(): 843 async for i in 1: pass''', """\ 844 NAME 'async' (1, 0) (1, 5) 845 NAME 'def' (1, 6) (1, 9) 846 NAME 'foo' (1, 10) (1, 13) 847 OP '(' (1, 13) (1, 14) 848 OP ')' (1, 14) (1, 15) 849 OP ':' (1, 15) (1, 16) 850 NEWLINE '\\n' (1, 16) (1, 17) 851 INDENT ' ' (2, 0) (2, 2) 852 NAME 'async' (2, 2) (2, 7) 853 NAME 'for' (2, 8) (2, 11) 854 NAME 'i' (2, 12) (2, 13) 855 NAME 'in' (2, 14) (2, 16) 856 NUMBER '1' (2, 17) (2, 18) 857 OP ':' (2, 18) (2, 19) 858 NAME 'pass' (2, 20) (2, 24) 859 DEDENT '' (3, 0) (3, 0) 860 """) 861 862 self.check_tokenize('''async def foo(async): await''', """\ 863 NAME 'async' (1, 0) (1, 5) 864 NAME 'def' (1, 6) (1, 9) 865 NAME 'foo' (1, 10) (1, 13) 866 OP '(' (1, 13) (1, 14) 867 NAME 'async' (1, 14) (1, 19) 868 OP ')' (1, 19) (1, 20) 869 OP ':' (1, 20) (1, 21) 870 NAME 'await' (1, 22) (1, 27) 871 """) 872 873 self.check_tokenize('''\ 874def f(): 875 876 def baz(): pass 877 async def bar(): pass 878 879 await = 2''', """\ 880 NAME 'def' (1, 0) (1, 3) 881 NAME 'f' (1, 4) (1, 5) 882 OP '(' (1, 5) (1, 6) 883 OP ')' (1, 6) (1, 7) 884 OP ':' (1, 7) (1, 8) 885 NEWLINE '\\n' (1, 8) (1, 9) 886 NL '\\n' (2, 0) (2, 1) 887 INDENT ' ' (3, 0) (3, 2) 888 NAME 'def' (3, 2) (3, 5) 889 NAME 'baz' (3, 6) (3, 9) 890 OP '(' (3, 9) (3, 10) 891 OP ')' (3, 10) (3, 11) 892 OP ':' (3, 11) (3, 12) 893 NAME 'pass' (3, 13) (3, 17) 894 NEWLINE '\\n' (3, 17) (3, 18) 895 NAME 'async' (4, 2) (4, 7) 896 NAME 'def' (4, 8) (4, 11) 897 NAME 'bar' (4, 12) (4, 15) 898 OP '(' (4, 15) (4, 16) 899 OP ')' (4, 16) (4, 17) 900 OP ':' (4, 17) (4, 18) 901 NAME 'pass' (4, 19) (4, 23) 902 NEWLINE '\\n' (4, 23) (4, 24) 903 NL '\\n' (5, 0) (5, 1) 904 NAME 'await' (6, 2) (6, 7) 905 OP '=' (6, 8) (6, 9) 906 NUMBER '2' (6, 10) (6, 11) 907 DEDENT '' (7, 0) (7, 0) 908 """) 909 910 self.check_tokenize('''\ 911async def f(): 912 913 def baz(): pass 914 async def bar(): pass 915 916 await = 2''', """\ 917 NAME 'async' (1, 0) (1, 5) 918 NAME 'def' (1, 6) (1, 9) 919 NAME 'f' (1, 10) (1, 11) 920 OP '(' (1, 11) (1, 12) 921 OP ')' (1, 12) (1, 13) 922 OP ':' (1, 13) (1, 14) 923 NEWLINE '\\n' (1, 14) (1, 15) 924 NL '\\n' (2, 0) (2, 1) 925 INDENT ' ' (3, 0) (3, 2) 926 NAME 'def' (3, 2) (3, 5) 927 NAME 'baz' (3, 6) (3, 9) 928 OP '(' (3, 9) (3, 10) 929 OP ')' (3, 10) (3, 11) 930 OP ':' (3, 11) (3, 12) 931 NAME 'pass' (3, 13) (3, 17) 932 NEWLINE '\\n' (3, 17) (3, 18) 933 NAME 'async' (4, 2) (4, 7) 934 NAME 'def' (4, 8) (4, 11) 935 NAME 'bar' (4, 12) (4, 15) 936 OP '(' (4, 15) (4, 16) 937 OP ')' (4, 16) (4, 17) 938 OP ':' (4, 17) (4, 18) 939 NAME 'pass' (4, 19) (4, 23) 940 NEWLINE '\\n' (4, 23) (4, 24) 941 NL '\\n' (5, 0) (5, 1) 942 NAME 'await' (6, 2) (6, 7) 943 OP '=' (6, 8) (6, 9) 944 NUMBER '2' (6, 10) (6, 11) 945 DEDENT '' (7, 0) (7, 0) 946 """) 947 948class GenerateTokensTest(TokenizeTest): 949 def check_tokenize(self, s, expected): 950 # Format the tokens in s in a table format. 951 # The ENDMARKER and final NEWLINE are omitted. 952 f = StringIO(s) 953 result = stringify_tokens_from_source(generate_tokens(f.readline), s) 954 self.assertEqual(result, expected.rstrip().splitlines()) 955 956 957def decistmt(s): 958 result = [] 959 g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string 960 for toknum, tokval, _, _, _ in g: 961 if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens 962 result.extend([ 963 (NAME, 'Decimal'), 964 (OP, '('), 965 (STRING, repr(tokval)), 966 (OP, ')') 967 ]) 968 else: 969 result.append((toknum, tokval)) 970 return untokenize(result).decode('utf-8') 971 972class TestMisc(TestCase): 973 974 def test_decistmt(self): 975 # Substitute Decimals for floats in a string of statements. 976 # This is an example from the docs. 977 978 from decimal import Decimal 979 s = '+21.3e-5*-.1234/81.7' 980 self.assertEqual(decistmt(s), 981 "+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')") 982 983 # The format of the exponent is inherited from the platform C library. 984 # Known cases are "e-007" (Windows) and "e-07" (not Windows). Since 985 # we're only showing 11 digits, and the 12th isn't close to 5, the 986 # rest of the output should be platform-independent. 987 self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7') 988 989 # Output from calculations with Decimal should be identical across all 990 # platforms. 991 self.assertEqual(eval(decistmt(s)), 992 Decimal('-3.217160342717258261933904529E-7')) 993 994 995class TestTokenizerAdheresToPep0263(TestCase): 996 """ 997 Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263. 998 """ 999 1000 def _testFile(self, filename): 1001 path = os.path.join(os.path.dirname(__file__), filename) 1002 TestRoundtrip.check_roundtrip(self, open(path, 'rb')) 1003 1004 def test_utf8_coding_cookie_and_no_utf8_bom(self): 1005 f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt' 1006 self._testFile(f) 1007 1008 def test_latin1_coding_cookie_and_utf8_bom(self): 1009 """ 1010 As per PEP 0263, if a file starts with a utf-8 BOM signature, the only 1011 allowed encoding for the comment is 'utf-8'. The text file used in 1012 this test starts with a BOM signature, but specifies latin1 as the 1013 coding, so verify that a SyntaxError is raised, which matches the 1014 behaviour of the interpreter when it encounters a similar condition. 1015 """ 1016 f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt' 1017 self.assertRaises(SyntaxError, self._testFile, f) 1018 1019 def test_no_coding_cookie_and_utf8_bom(self): 1020 f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt' 1021 self._testFile(f) 1022 1023 def test_utf8_coding_cookie_and_utf8_bom(self): 1024 f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt' 1025 self._testFile(f) 1026 1027 def test_bad_coding_cookie(self): 1028 self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py') 1029 self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py') 1030 1031 1032class Test_Tokenize(TestCase): 1033 1034 def test__tokenize_decodes_with_specified_encoding(self): 1035 literal = '"ЉЊЈЁЂ"' 1036 line = literal.encode('utf-8') 1037 first = False 1038 def readline(): 1039 nonlocal first 1040 if not first: 1041 first = True 1042 return line 1043 else: 1044 return b'' 1045 1046 # skip the initial encoding token and the end tokens 1047 tokens = list(_tokenize(readline, encoding='utf-8'))[1:-2] 1048 expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')] 1049 self.assertEqual(tokens, expected_tokens, 1050 "bytes not decoded with encoding") 1051 1052 def test__tokenize_does_not_decode_with_encoding_none(self): 1053 literal = '"ЉЊЈЁЂ"' 1054 first = False 1055 def readline(): 1056 nonlocal first 1057 if not first: 1058 first = True 1059 return literal 1060 else: 1061 return b'' 1062 1063 # skip the end tokens 1064 tokens = list(_tokenize(readline, encoding=None))[:-2] 1065 expected_tokens = [(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')] 1066 self.assertEqual(tokens, expected_tokens, 1067 "string not tokenized when encoding is None") 1068 1069 1070class TestDetectEncoding(TestCase): 1071 1072 def get_readline(self, lines): 1073 index = 0 1074 def readline(): 1075 nonlocal index 1076 if index == len(lines): 1077 raise StopIteration 1078 line = lines[index] 1079 index += 1 1080 return line 1081 return readline 1082 1083 def test_no_bom_no_encoding_cookie(self): 1084 lines = ( 1085 b'# something\n', 1086 b'print(something)\n', 1087 b'do_something(else)\n' 1088 ) 1089 encoding, consumed_lines = detect_encoding(self.get_readline(lines)) 1090 self.assertEqual(encoding, 'utf-8') 1091 self.assertEqual(consumed_lines, list(lines[:2])) 1092 1093 def test_bom_no_cookie(self): 1094 lines = ( 1095 b'\xef\xbb\xbf# something\n', 1096 b'print(something)\n', 1097 b'do_something(else)\n' 1098 ) 1099 encoding, consumed_lines = detect_encoding(self.get_readline(lines)) 1100 self.assertEqual(encoding, 'utf-8-sig') 1101 self.assertEqual(consumed_lines, 1102 [b'# something\n', b'print(something)\n']) 1103 1104 def test_cookie_first_line_no_bom(self): 1105 lines = ( 1106 b'# -*- coding: latin-1 -*-\n', 1107 b'print(something)\n', 1108 b'do_something(else)\n' 1109 ) 1110 encoding, consumed_lines = detect_encoding(self.get_readline(lines)) 1111 self.assertEqual(encoding, 'iso-8859-1') 1112 self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n']) 1113 1114 def test_matched_bom_and_cookie_first_line(self): 1115 lines = ( 1116 b'\xef\xbb\xbf# coding=utf-8\n', 1117 b'print(something)\n', 1118 b'do_something(else)\n' 1119 ) 1120 encoding, consumed_lines = detect_encoding(self.get_readline(lines)) 1121 self.assertEqual(encoding, 'utf-8-sig') 1122 self.assertEqual(consumed_lines, [b'# coding=utf-8\n']) 1123 1124 def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self): 1125 lines = ( 1126 b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n', 1127 b'print(something)\n', 1128 b'do_something(else)\n' 1129 ) 1130 readline = self.get_readline(lines) 1131 self.assertRaises(SyntaxError, detect_encoding, readline) 1132 1133 def test_cookie_second_line_no_bom(self): 1134 lines = ( 1135 b'#! something\n', 1136 b'# vim: set fileencoding=ascii :\n', 1137 b'print(something)\n', 1138 b'do_something(else)\n' 1139 ) 1140 encoding, consumed_lines = detect_encoding(self.get_readline(lines)) 1141 self.assertEqual(encoding, 'ascii') 1142 expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n'] 1143 self.assertEqual(consumed_lines, expected) 1144 1145 def test_matched_bom_and_cookie_second_line(self): 1146 lines = ( 1147 b'\xef\xbb\xbf#! something\n', 1148 b'f# coding=utf-8\n', 1149 b'print(something)\n', 1150 b'do_something(else)\n' 1151 ) 1152 encoding, consumed_lines = detect_encoding(self.get_readline(lines)) 1153 self.assertEqual(encoding, 'utf-8-sig') 1154 self.assertEqual(consumed_lines, 1155 [b'#! something\n', b'f# coding=utf-8\n']) 1156 1157 def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self): 1158 lines = ( 1159 b'\xef\xbb\xbf#! something\n', 1160 b'# vim: set fileencoding=ascii :\n', 1161 b'print(something)\n', 1162 b'do_something(else)\n' 1163 ) 1164 readline = self.get_readline(lines) 1165 self.assertRaises(SyntaxError, detect_encoding, readline) 1166 1167 def test_cookie_second_line_noncommented_first_line(self): 1168 lines = ( 1169 b"print('\xc2\xa3')\n", 1170 b'# vim: set fileencoding=iso8859-15 :\n', 1171 b"print('\xe2\x82\xac')\n" 1172 ) 1173 encoding, consumed_lines = detect_encoding(self.get_readline(lines)) 1174 self.assertEqual(encoding, 'utf-8') 1175 expected = [b"print('\xc2\xa3')\n"] 1176 self.assertEqual(consumed_lines, expected) 1177 1178 def test_cookie_second_line_commented_first_line(self): 1179 lines = ( 1180 b"#print('\xc2\xa3')\n", 1181 b'# vim: set fileencoding=iso8859-15 :\n', 1182 b"print('\xe2\x82\xac')\n" 1183 ) 1184 encoding, consumed_lines = detect_encoding(self.get_readline(lines)) 1185 self.assertEqual(encoding, 'iso8859-15') 1186 expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n'] 1187 self.assertEqual(consumed_lines, expected) 1188 1189 def test_cookie_second_line_empty_first_line(self): 1190 lines = ( 1191 b'\n', 1192 b'# vim: set fileencoding=iso8859-15 :\n', 1193 b"print('\xe2\x82\xac')\n" 1194 ) 1195 encoding, consumed_lines = detect_encoding(self.get_readline(lines)) 1196 self.assertEqual(encoding, 'iso8859-15') 1197 expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n'] 1198 self.assertEqual(consumed_lines, expected) 1199 1200 def test_latin1_normalization(self): 1201 # See get_normal_name() in tokenizer.c. 1202 encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix", 1203 "iso-8859-1-unix", "iso-latin-1-mac") 1204 for encoding in encodings: 1205 for rep in ("-", "_"): 1206 enc = encoding.replace("-", rep) 1207 lines = (b"#!/usr/bin/python\n", 1208 b"# coding: " + enc.encode("ascii") + b"\n", 1209 b"print(things)\n", 1210 b"do_something += 4\n") 1211 rl = self.get_readline(lines) 1212 found, consumed_lines = detect_encoding(rl) 1213 self.assertEqual(found, "iso-8859-1") 1214 1215 def test_syntaxerror_latin1(self): 1216 # Issue 14629: need to raise SyntaxError if the first 1217 # line(s) have non-UTF-8 characters 1218 lines = ( 1219 b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S 1220 ) 1221 readline = self.get_readline(lines) 1222 self.assertRaises(SyntaxError, detect_encoding, readline) 1223 1224 1225 def test_utf8_normalization(self): 1226 # See get_normal_name() in tokenizer.c. 1227 encodings = ("utf-8", "utf-8-mac", "utf-8-unix") 1228 for encoding in encodings: 1229 for rep in ("-", "_"): 1230 enc = encoding.replace("-", rep) 1231 lines = (b"#!/usr/bin/python\n", 1232 b"# coding: " + enc.encode("ascii") + b"\n", 1233 b"1 + 3\n") 1234 rl = self.get_readline(lines) 1235 found, consumed_lines = detect_encoding(rl) 1236 self.assertEqual(found, "utf-8") 1237 1238 def test_short_files(self): 1239 readline = self.get_readline((b'print(something)\n',)) 1240 encoding, consumed_lines = detect_encoding(readline) 1241 self.assertEqual(encoding, 'utf-8') 1242 self.assertEqual(consumed_lines, [b'print(something)\n']) 1243 1244 encoding, consumed_lines = detect_encoding(self.get_readline(())) 1245 self.assertEqual(encoding, 'utf-8') 1246 self.assertEqual(consumed_lines, []) 1247 1248 readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',)) 1249 encoding, consumed_lines = detect_encoding(readline) 1250 self.assertEqual(encoding, 'utf-8-sig') 1251 self.assertEqual(consumed_lines, [b'print(something)\n']) 1252 1253 readline = self.get_readline((b'\xef\xbb\xbf',)) 1254 encoding, consumed_lines = detect_encoding(readline) 1255 self.assertEqual(encoding, 'utf-8-sig') 1256 self.assertEqual(consumed_lines, []) 1257 1258 readline = self.get_readline((b'# coding: bad\n',)) 1259 self.assertRaises(SyntaxError, detect_encoding, readline) 1260 1261 def test_false_encoding(self): 1262 # Issue 18873: "Encoding" detected in non-comment lines 1263 readline = self.get_readline((b'print("#coding=fake")',)) 1264 encoding, consumed_lines = detect_encoding(readline) 1265 self.assertEqual(encoding, 'utf-8') 1266 self.assertEqual(consumed_lines, [b'print("#coding=fake")']) 1267 1268 def test_open(self): 1269 filename = os_helper.TESTFN + '.py' 1270 self.addCleanup(os_helper.unlink, filename) 1271 1272 # test coding cookie 1273 for encoding in ('iso-8859-15', 'utf-8'): 1274 with open(filename, 'w', encoding=encoding) as fp: 1275 print("# coding: %s" % encoding, file=fp) 1276 print("print('euro:\u20ac')", file=fp) 1277 with tokenize_open(filename) as fp: 1278 self.assertEqual(fp.encoding, encoding) 1279 self.assertEqual(fp.mode, 'r') 1280 1281 # test BOM (no coding cookie) 1282 with open(filename, 'w', encoding='utf-8-sig') as fp: 1283 print("print('euro:\u20ac')", file=fp) 1284 with tokenize_open(filename) as fp: 1285 self.assertEqual(fp.encoding, 'utf-8-sig') 1286 self.assertEqual(fp.mode, 'r') 1287 1288 def test_filename_in_exception(self): 1289 # When possible, include the file name in the exception. 1290 path = 'some_file_path' 1291 lines = ( 1292 b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S 1293 ) 1294 class Bunk: 1295 def __init__(self, lines, path): 1296 self.name = path 1297 self._lines = lines 1298 self._index = 0 1299 1300 def readline(self): 1301 if self._index == len(lines): 1302 raise StopIteration 1303 line = lines[self._index] 1304 self._index += 1 1305 return line 1306 1307 with self.assertRaises(SyntaxError): 1308 ins = Bunk(lines, path) 1309 # Make sure lacking a name isn't an issue. 1310 del ins.name 1311 detect_encoding(ins.readline) 1312 with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)): 1313 ins = Bunk(lines, path) 1314 detect_encoding(ins.readline) 1315 1316 def test_open_error(self): 1317 # Issue #23840: open() must close the binary file on error 1318 m = BytesIO(b'#coding:xxx') 1319 with mock.patch('tokenize._builtin_open', return_value=m): 1320 self.assertRaises(SyntaxError, tokenize_open, 'foobar') 1321 self.assertTrue(m.closed) 1322 1323 1324class TestTokenize(TestCase): 1325 1326 def test_tokenize(self): 1327 import tokenize as tokenize_module 1328 encoding = object() 1329 encoding_used = None 1330 def mock_detect_encoding(readline): 1331 return encoding, [b'first', b'second'] 1332 1333 def mock__tokenize(readline, encoding): 1334 nonlocal encoding_used 1335 encoding_used = encoding 1336 out = [] 1337 while True: 1338 next_line = readline() 1339 if next_line: 1340 out.append(next_line) 1341 continue 1342 return out 1343 1344 counter = 0 1345 def mock_readline(): 1346 nonlocal counter 1347 counter += 1 1348 if counter == 5: 1349 return b'' 1350 return str(counter).encode() 1351 1352 orig_detect_encoding = tokenize_module.detect_encoding 1353 orig__tokenize = tokenize_module._tokenize 1354 tokenize_module.detect_encoding = mock_detect_encoding 1355 tokenize_module._tokenize = mock__tokenize 1356 try: 1357 results = tokenize(mock_readline) 1358 self.assertEqual(list(results), 1359 [b'first', b'second', b'1', b'2', b'3', b'4']) 1360 finally: 1361 tokenize_module.detect_encoding = orig_detect_encoding 1362 tokenize_module._tokenize = orig__tokenize 1363 1364 self.assertEqual(encoding_used, encoding) 1365 1366 def test_oneline_defs(self): 1367 buf = [] 1368 for i in range(500): 1369 buf.append('def i{i}(): return {i}'.format(i=i)) 1370 buf.append('OK') 1371 buf = '\n'.join(buf) 1372 1373 # Test that 500 consequent, one-line defs is OK 1374 toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline)) 1375 self.assertEqual(toks[-3].string, 'OK') # [-1] is always ENDMARKER 1376 # [-2] is always NEWLINE 1377 1378 def assertExactTypeEqual(self, opstr, *optypes): 1379 tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline)) 1380 num_optypes = len(optypes) 1381 self.assertEqual(len(tokens), 3 + num_optypes) 1382 self.assertEqual(tok_name[tokens[0].exact_type], 1383 tok_name[ENCODING]) 1384 for i in range(num_optypes): 1385 self.assertEqual(tok_name[tokens[i + 1].exact_type], 1386 tok_name[optypes[i]]) 1387 self.assertEqual(tok_name[tokens[1 + num_optypes].exact_type], 1388 tok_name[token.NEWLINE]) 1389 self.assertEqual(tok_name[tokens[2 + num_optypes].exact_type], 1390 tok_name[token.ENDMARKER]) 1391 1392 def test_exact_type(self): 1393 self.assertExactTypeEqual('()', token.LPAR, token.RPAR) 1394 self.assertExactTypeEqual('[]', token.LSQB, token.RSQB) 1395 self.assertExactTypeEqual(':', token.COLON) 1396 self.assertExactTypeEqual(',', token.COMMA) 1397 self.assertExactTypeEqual(';', token.SEMI) 1398 self.assertExactTypeEqual('+', token.PLUS) 1399 self.assertExactTypeEqual('-', token.MINUS) 1400 self.assertExactTypeEqual('*', token.STAR) 1401 self.assertExactTypeEqual('/', token.SLASH) 1402 self.assertExactTypeEqual('|', token.VBAR) 1403 self.assertExactTypeEqual('&', token.AMPER) 1404 self.assertExactTypeEqual('<', token.LESS) 1405 self.assertExactTypeEqual('>', token.GREATER) 1406 self.assertExactTypeEqual('=', token.EQUAL) 1407 self.assertExactTypeEqual('.', token.DOT) 1408 self.assertExactTypeEqual('%', token.PERCENT) 1409 self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE) 1410 self.assertExactTypeEqual('==', token.EQEQUAL) 1411 self.assertExactTypeEqual('!=', token.NOTEQUAL) 1412 self.assertExactTypeEqual('<=', token.LESSEQUAL) 1413 self.assertExactTypeEqual('>=', token.GREATEREQUAL) 1414 self.assertExactTypeEqual('~', token.TILDE) 1415 self.assertExactTypeEqual('^', token.CIRCUMFLEX) 1416 self.assertExactTypeEqual('<<', token.LEFTSHIFT) 1417 self.assertExactTypeEqual('>>', token.RIGHTSHIFT) 1418 self.assertExactTypeEqual('**', token.DOUBLESTAR) 1419 self.assertExactTypeEqual('+=', token.PLUSEQUAL) 1420 self.assertExactTypeEqual('-=', token.MINEQUAL) 1421 self.assertExactTypeEqual('*=', token.STAREQUAL) 1422 self.assertExactTypeEqual('/=', token.SLASHEQUAL) 1423 self.assertExactTypeEqual('%=', token.PERCENTEQUAL) 1424 self.assertExactTypeEqual('&=', token.AMPEREQUAL) 1425 self.assertExactTypeEqual('|=', token.VBAREQUAL) 1426 self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL) 1427 self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL) 1428 self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL) 1429 self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL) 1430 self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL) 1431 self.assertExactTypeEqual('//', token.DOUBLESLASH) 1432 self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL) 1433 self.assertExactTypeEqual(':=', token.COLONEQUAL) 1434 self.assertExactTypeEqual('...', token.ELLIPSIS) 1435 self.assertExactTypeEqual('->', token.RARROW) 1436 self.assertExactTypeEqual('@', token.AT) 1437 self.assertExactTypeEqual('@=', token.ATEQUAL) 1438 1439 self.assertExactTypeEqual('a**2+b**2==c**2', 1440 NAME, token.DOUBLESTAR, NUMBER, 1441 token.PLUS, 1442 NAME, token.DOUBLESTAR, NUMBER, 1443 token.EQEQUAL, 1444 NAME, token.DOUBLESTAR, NUMBER) 1445 self.assertExactTypeEqual('{1, 2, 3}', 1446 token.LBRACE, 1447 token.NUMBER, token.COMMA, 1448 token.NUMBER, token.COMMA, 1449 token.NUMBER, 1450 token.RBRACE) 1451 self.assertExactTypeEqual('^(x & 0x1)', 1452 token.CIRCUMFLEX, 1453 token.LPAR, 1454 token.NAME, token.AMPER, token.NUMBER, 1455 token.RPAR) 1456 1457 def test_pathological_trailing_whitespace(self): 1458 # See http://bugs.python.org/issue16152 1459 self.assertExactTypeEqual('@ ', token.AT) 1460 1461 def test_comment_at_the_end_of_the_source_without_newline(self): 1462 # See http://bugs.python.org/issue44667 1463 source = 'b = 1\n\n#test' 1464 expected_tokens = [token.NAME, token.EQUAL, token.NUMBER, token.NEWLINE, token.NL, token.COMMENT] 1465 1466 tokens = list(tokenize(BytesIO(source.encode('utf-8')).readline)) 1467 self.assertEqual(tok_name[tokens[0].exact_type], tok_name[ENCODING]) 1468 for i in range(6): 1469 self.assertEqual(tok_name[tokens[i + 1].exact_type], tok_name[expected_tokens[i]]) 1470 self.assertEqual(tok_name[tokens[-1].exact_type], tok_name[token.ENDMARKER]) 1471 1472class UntokenizeTest(TestCase): 1473 1474 def test_bad_input_order(self): 1475 # raise if previous row 1476 u = Untokenizer() 1477 u.prev_row = 2 1478 u.prev_col = 2 1479 with self.assertRaises(ValueError) as cm: 1480 u.add_whitespace((1,3)) 1481 self.assertEqual(cm.exception.args[0], 1482 'start (1,3) precedes previous end (2,2)') 1483 # raise if previous column in row 1484 self.assertRaises(ValueError, u.add_whitespace, (2,1)) 1485 1486 def test_backslash_continuation(self): 1487 # The problem is that <whitespace>\<newline> leaves no token 1488 u = Untokenizer() 1489 u.prev_row = 1 1490 u.prev_col = 1 1491 u.tokens = [] 1492 u.add_whitespace((2, 0)) 1493 self.assertEqual(u.tokens, ['\\\n']) 1494 u.prev_row = 2 1495 u.add_whitespace((4, 4)) 1496 self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' ']) 1497 TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n') 1498 1499 def test_iter_compat(self): 1500 u = Untokenizer() 1501 token = (NAME, 'Hello') 1502 tokens = [(ENCODING, 'utf-8'), token] 1503 u.compat(token, iter([])) 1504 self.assertEqual(u.tokens, ["Hello "]) 1505 u = Untokenizer() 1506 self.assertEqual(u.untokenize(iter([token])), 'Hello ') 1507 u = Untokenizer() 1508 self.assertEqual(u.untokenize(iter(tokens)), 'Hello ') 1509 self.assertEqual(u.encoding, 'utf-8') 1510 self.assertEqual(untokenize(iter(tokens)), b'Hello ') 1511 1512 1513class TestRoundtrip(TestCase): 1514 1515 def check_roundtrip(self, f): 1516 """ 1517 Test roundtrip for `untokenize`. `f` is an open file or a string. 1518 The source code in f is tokenized to both 5- and 2-tuples. 1519 Both sequences are converted back to source code via 1520 tokenize.untokenize(), and the latter tokenized again to 2-tuples. 1521 The test fails if the 3 pair tokenizations do not match. 1522 1523 When untokenize bugs are fixed, untokenize with 5-tuples should 1524 reproduce code that does not contain a backslash continuation 1525 following spaces. A proper test should test this. 1526 """ 1527 # Get source code and original tokenizations 1528 if isinstance(f, str): 1529 code = f.encode('utf-8') 1530 else: 1531 code = f.read() 1532 f.close() 1533 readline = iter(code.splitlines(keepends=True)).__next__ 1534 tokens5 = list(tokenize(readline)) 1535 tokens2 = [tok[:2] for tok in tokens5] 1536 # Reproduce tokens2 from pairs 1537 bytes_from2 = untokenize(tokens2) 1538 readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__ 1539 tokens2_from2 = [tok[:2] for tok in tokenize(readline2)] 1540 self.assertEqual(tokens2_from2, tokens2) 1541 # Reproduce tokens2 from 5-tuples 1542 bytes_from5 = untokenize(tokens5) 1543 readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__ 1544 tokens2_from5 = [tok[:2] for tok in tokenize(readline5)] 1545 self.assertEqual(tokens2_from5, tokens2) 1546 1547 def test_roundtrip(self): 1548 # There are some standard formatting practices that are easy to get right. 1549 1550 self.check_roundtrip("if x == 1:\n" 1551 " print(x)\n") 1552 self.check_roundtrip("# This is a comment\n" 1553 "# This also\n") 1554 1555 # Some people use different formatting conventions, which makes 1556 # untokenize a little trickier. Note that this test involves trailing 1557 # whitespace after the colon. Note that we use hex escapes to make the 1558 # two trailing blanks apparent in the expected output. 1559 1560 self.check_roundtrip("if x == 1 : \n" 1561 " print(x)\n") 1562 fn = support.findfile("tokenize_tests.txt") 1563 with open(fn, 'rb') as f: 1564 self.check_roundtrip(f) 1565 self.check_roundtrip("if x == 1:\n" 1566 " # A comment by itself.\n" 1567 " print(x) # Comment here, too.\n" 1568 " # Another comment.\n" 1569 "after_if = True\n") 1570 self.check_roundtrip("if (x # The comments need to go in the right place\n" 1571 " == 1):\n" 1572 " print('x==1')\n") 1573 self.check_roundtrip("class Test: # A comment here\n" 1574 " # A comment with weird indent\n" 1575 " after_com = 5\n" 1576 " def x(m): return m*5 # a one liner\n" 1577 " def y(m): # A whitespace after the colon\n" 1578 " return y*4 # 3-space indent\n") 1579 1580 # Some error-handling code 1581 self.check_roundtrip("try: import somemodule\n" 1582 "except ImportError: # comment\n" 1583 " print('Can not import' # comment2\n)" 1584 "else: print('Loaded')\n") 1585 1586 def test_continuation(self): 1587 # Balancing continuation 1588 self.check_roundtrip("a = (3,4, \n" 1589 "5,6)\n" 1590 "y = [3, 4,\n" 1591 "5]\n" 1592 "z = {'a': 5,\n" 1593 "'b':15, 'c':True}\n" 1594 "x = len(y) + 5 - a[\n" 1595 "3] - a[2]\n" 1596 "+ len(z) - z[\n" 1597 "'b']\n") 1598 1599 def test_backslash_continuation(self): 1600 # Backslash means line continuation, except for comments 1601 self.check_roundtrip("x=1+\\\n" 1602 "1\n" 1603 "# This is a comment\\\n" 1604 "# This also\n") 1605 self.check_roundtrip("# Comment \\\n" 1606 "x = 0") 1607 1608 def test_string_concatenation(self): 1609 # Two string literals on the same line 1610 self.check_roundtrip("'' ''") 1611 1612 def test_random_files(self): 1613 # Test roundtrip on random python modules. 1614 # pass the '-ucpu' option to process the full directory. 1615 1616 import glob, random 1617 fn = support.findfile("tokenize_tests.txt") 1618 tempdir = os.path.dirname(fn) or os.curdir 1619 testfiles = glob.glob(os.path.join(glob.escape(tempdir), "test*.py")) 1620 1621 # Tokenize is broken on test_pep3131.py because regular expressions are 1622 # broken on the obscure unicode identifiers in it. *sigh* 1623 # With roundtrip extended to test the 5-tuple mode of untokenize, 1624 # 7 more testfiles fail. Remove them also until the failure is diagnosed. 1625 1626 testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py")) 1627 for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'): 1628 testfiles.remove(os.path.join(tempdir, "test_%s.py") % f) 1629 1630 if not support.is_resource_enabled("cpu"): 1631 testfiles = random.sample(testfiles, 10) 1632 1633 for testfile in testfiles: 1634 if support.verbose >= 2: 1635 print('tokenize', testfile) 1636 with open(testfile, 'rb') as f: 1637 with self.subTest(file=testfile): 1638 self.check_roundtrip(f) 1639 1640 1641 def roundtrip(self, code): 1642 if isinstance(code, str): 1643 code = code.encode('utf-8') 1644 return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8') 1645 1646 def test_indentation_semantics_retained(self): 1647 """ 1648 Ensure that although whitespace might be mutated in a roundtrip, 1649 the semantic meaning of the indentation remains consistent. 1650 """ 1651 code = "if False:\n\tx=3\n\tx=3\n" 1652 codelines = self.roundtrip(code).split('\n') 1653 self.assertEqual(codelines[1], codelines[2]) 1654 self.check_roundtrip(code) 1655 1656 1657if __name__ == "__main__": 1658 unittest.main() 1659