1import sysconfig 2import textwrap 3import unittest 4import os 5import shutil 6import tempfile 7from pathlib import Path 8 9from test import test_tools 10from test import support 11from test.support import os_helper 12from test.support.script_helper import assert_python_ok 13 14_py_cflags_nodist = sysconfig.get_config_var('PY_CFLAGS_NODIST') 15_pgo_flag = sysconfig.get_config_var('PGO_PROF_USE_FLAG') 16if _pgo_flag and _py_cflags_nodist and _pgo_flag in _py_cflags_nodist: 17 raise unittest.SkipTest("peg_generator test disabled under PGO build") 18 19test_tools.skip_if_missing("peg_generator") 20with test_tools.imports_under_tool("peg_generator"): 21 from pegen.grammar_parser import GeneratedParser as GrammarParser 22 from pegen.testutil import ( 23 parse_string, 24 generate_parser_c_extension, 25 generate_c_parser_source, 26 ) 27 from pegen.ast_dump import ast_dump 28 29 30TEST_TEMPLATE = """ 31tmp_dir = {extension_path!r} 32 33import ast 34import traceback 35import sys 36import unittest 37 38from test import test_tools 39with test_tools.imports_under_tool("peg_generator"): 40 from pegen.ast_dump import ast_dump 41 42sys.path.insert(0, tmp_dir) 43import parse 44 45class Tests(unittest.TestCase): 46 47 def check_input_strings_for_grammar( 48 self, 49 valid_cases = (), 50 invalid_cases = (), 51 ): 52 if valid_cases: 53 for case in valid_cases: 54 parse.parse_string(case, mode=0) 55 56 if invalid_cases: 57 for case in invalid_cases: 58 with self.assertRaises(SyntaxError): 59 parse.parse_string(case, mode=0) 60 61 def verify_ast_generation(self, stmt): 62 expected_ast = ast.parse(stmt) 63 actual_ast = parse.parse_string(stmt, mode=1) 64 self.assertEqual(ast_dump(expected_ast), ast_dump(actual_ast)) 65 66 def test_parse(self): 67 {test_source} 68 69unittest.main() 70""" 71 72 73class TestCParser(unittest.TestCase): 74 def setUp(self): 75 self._backup_config_vars = dict(sysconfig._CONFIG_VARS) 76 cmd = support.missing_compiler_executable() 77 if cmd is not None: 78 self.skipTest("The %r command is not found" % cmd) 79 self.old_cwd = os.getcwd() 80 self.tmp_path = tempfile.mkdtemp() 81 change_cwd = os_helper.change_cwd(self.tmp_path) 82 change_cwd.__enter__() 83 self.addCleanup(change_cwd.__exit__, None, None, None) 84 85 def tearDown(self): 86 os.chdir(self.old_cwd) 87 shutil.rmtree(self.tmp_path) 88 sysconfig._CONFIG_VARS.clear() 89 sysconfig._CONFIG_VARS.update(self._backup_config_vars) 90 91 def build_extension(self, grammar_source): 92 grammar = parse_string(grammar_source, GrammarParser) 93 generate_parser_c_extension(grammar, Path(self.tmp_path)) 94 95 def run_test(self, grammar_source, test_source): 96 self.build_extension(grammar_source) 97 test_source = textwrap.indent(textwrap.dedent(test_source), 8 * " ") 98 assert_python_ok( 99 "-c", 100 TEST_TEMPLATE.format(extension_path=self.tmp_path, test_source=test_source), 101 ) 102 103 def test_c_parser(self) -> None: 104 grammar_source = """ 105 start[mod_ty]: a[asdl_stmt_seq*]=stmt* $ { _PyAST_Module(a, NULL, p->arena) } 106 stmt[stmt_ty]: a=expr_stmt { a } 107 expr_stmt[stmt_ty]: a=expression NEWLINE { _PyAST_Expr(a, EXTRA) } 108 expression[expr_ty]: ( l=expression '+' r=term { _PyAST_BinOp(l, Add, r, EXTRA) } 109 | l=expression '-' r=term { _PyAST_BinOp(l, Sub, r, EXTRA) } 110 | t=term { t } 111 ) 112 term[expr_ty]: ( l=term '*' r=factor { _PyAST_BinOp(l, Mult, r, EXTRA) } 113 | l=term '/' r=factor { _PyAST_BinOp(l, Div, r, EXTRA) } 114 | f=factor { f } 115 ) 116 factor[expr_ty]: ('(' e=expression ')' { e } 117 | a=atom { a } 118 ) 119 atom[expr_ty]: ( n=NAME { n } 120 | n=NUMBER { n } 121 | s=STRING { s } 122 ) 123 """ 124 test_source = """ 125 expressions = [ 126 "4+5", 127 "4-5", 128 "4*5", 129 "1+4*5", 130 "1+4/5", 131 "(1+1) + (1+1)", 132 "(1+1) - (1+1)", 133 "(1+1) * (1+1)", 134 "(1+1) / (1+1)", 135 ] 136 137 for expr in expressions: 138 the_ast = parse.parse_string(expr, mode=1) 139 expected_ast = ast.parse(expr) 140 self.assertEqual(ast_dump(the_ast), ast_dump(expected_ast)) 141 """ 142 self.run_test(grammar_source, test_source) 143 144 def test_lookahead(self) -> None: 145 grammar_source = """ 146 start: NAME &NAME expr NEWLINE? ENDMARKER 147 expr: NAME | NUMBER 148 """ 149 test_source = """ 150 valid_cases = ["foo bar"] 151 invalid_cases = ["foo 34"] 152 self.check_input_strings_for_grammar(valid_cases, invalid_cases) 153 """ 154 self.run_test(grammar_source, test_source) 155 156 def test_negative_lookahead(self) -> None: 157 grammar_source = """ 158 start: NAME !NAME expr NEWLINE? ENDMARKER 159 expr: NAME | NUMBER 160 """ 161 test_source = """ 162 valid_cases = ["foo 34"] 163 invalid_cases = ["foo bar"] 164 self.check_input_strings_for_grammar(valid_cases, invalid_cases) 165 """ 166 self.run_test(grammar_source, test_source) 167 168 def test_cut(self) -> None: 169 grammar_source = """ 170 start: X ~ Y Z | X Q S 171 X: 'x' 172 Y: 'y' 173 Z: 'z' 174 Q: 'q' 175 S: 's' 176 """ 177 test_source = """ 178 valid_cases = ["x y z"] 179 invalid_cases = ["x q s"] 180 self.check_input_strings_for_grammar(valid_cases, invalid_cases) 181 """ 182 self.run_test(grammar_source, test_source) 183 184 def test_gather(self) -> None: 185 grammar_source = """ 186 start: ';'.pass_stmt+ NEWLINE 187 pass_stmt: 'pass' 188 """ 189 test_source = """ 190 valid_cases = ["pass", "pass; pass"] 191 invalid_cases = ["pass;", "pass; pass;"] 192 self.check_input_strings_for_grammar(valid_cases, invalid_cases) 193 """ 194 self.run_test(grammar_source, test_source) 195 196 def test_left_recursion(self) -> None: 197 grammar_source = """ 198 start: expr NEWLINE 199 expr: ('-' term | expr '+' term | term) 200 term: NUMBER 201 """ 202 test_source = """ 203 valid_cases = ["-34", "34", "34 + 12", "1 + 1 + 2 + 3"] 204 self.check_input_strings_for_grammar(valid_cases) 205 """ 206 self.run_test(grammar_source, test_source) 207 208 def test_advanced_left_recursive(self) -> None: 209 grammar_source = """ 210 start: NUMBER | sign start 211 sign: ['-'] 212 """ 213 test_source = """ 214 valid_cases = ["23", "-34"] 215 self.check_input_strings_for_grammar(valid_cases) 216 """ 217 self.run_test(grammar_source, test_source) 218 219 def test_mutually_left_recursive(self) -> None: 220 grammar_source = """ 221 start: foo 'E' 222 foo: bar 'A' | 'B' 223 bar: foo 'C' | 'D' 224 """ 225 test_source = """ 226 valid_cases = ["B E", "D A C A E"] 227 self.check_input_strings_for_grammar(valid_cases) 228 """ 229 self.run_test(grammar_source, test_source) 230 231 def test_nasty_mutually_left_recursive(self) -> None: 232 grammar_source = """ 233 start: target '=' 234 target: maybe '+' | NAME 235 maybe: maybe '-' | target 236 """ 237 test_source = """ 238 valid_cases = ["x ="] 239 invalid_cases = ["x - + ="] 240 self.check_input_strings_for_grammar(valid_cases, invalid_cases) 241 """ 242 self.run_test(grammar_source, test_source) 243 244 def test_return_stmt_noexpr_action(self) -> None: 245 grammar_source = """ 246 start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) } 247 statements[asdl_stmt_seq*]: a[asdl_stmt_seq*]=statement+ { a } 248 statement[stmt_ty]: simple_stmt 249 simple_stmt[stmt_ty]: small_stmt 250 small_stmt[stmt_ty]: return_stmt 251 return_stmt[stmt_ty]: a='return' NEWLINE { _PyAST_Return(NULL, EXTRA) } 252 """ 253 test_source = """ 254 stmt = "return" 255 self.verify_ast_generation(stmt) 256 """ 257 self.run_test(grammar_source, test_source) 258 259 def test_gather_action_ast(self) -> None: 260 grammar_source = """ 261 start[mod_ty]: a[asdl_stmt_seq*]=';'.pass_stmt+ NEWLINE ENDMARKER { _PyAST_Module(a, NULL, p->arena) } 262 pass_stmt[stmt_ty]: a='pass' { _PyAST_Pass(EXTRA)} 263 """ 264 test_source = """ 265 stmt = "pass; pass" 266 self.verify_ast_generation(stmt) 267 """ 268 self.run_test(grammar_source, test_source) 269 270 def test_pass_stmt_action(self) -> None: 271 grammar_source = """ 272 start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) } 273 statements[asdl_stmt_seq*]: a[asdl_stmt_seq*]=statement+ { a } 274 statement[stmt_ty]: simple_stmt 275 simple_stmt[stmt_ty]: small_stmt 276 small_stmt[stmt_ty]: pass_stmt 277 pass_stmt[stmt_ty]: a='pass' NEWLINE { _PyAST_Pass(EXTRA) } 278 """ 279 test_source = """ 280 stmt = "pass" 281 self.verify_ast_generation(stmt) 282 """ 283 self.run_test(grammar_source, test_source) 284 285 def test_if_stmt_action(self) -> None: 286 grammar_source = """ 287 start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) } 288 statements[asdl_stmt_seq*]: a=statement+ { (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a) } 289 statement[asdl_stmt_seq*]: a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) } | simple_stmt 290 291 simple_stmt[asdl_stmt_seq*]: a=small_stmt b=further_small_stmt* [';'] NEWLINE { 292 (asdl_stmt_seq*)_PyPegen_seq_insert_in_front(p, a, b) } 293 further_small_stmt[stmt_ty]: ';' a=small_stmt { a } 294 295 block: simple_stmt | NEWLINE INDENT a=statements DEDENT { a } 296 297 compound_stmt: if_stmt 298 299 if_stmt: 'if' a=full_expression ':' b=block { _PyAST_If(a, b, NULL, EXTRA) } 300 301 small_stmt[stmt_ty]: pass_stmt 302 303 pass_stmt[stmt_ty]: a='pass' { _PyAST_Pass(EXTRA) } 304 305 full_expression: NAME 306 """ 307 test_source = """ 308 stmt = "pass" 309 self.verify_ast_generation(stmt) 310 """ 311 self.run_test(grammar_source, test_source) 312 313 def test_same_name_different_types(self) -> None: 314 grammar_source = """ 315 start[mod_ty]: a[asdl_stmt_seq*]=import_from+ NEWLINE ENDMARKER { _PyAST_Module(a, NULL, p->arena)} 316 import_from[stmt_ty]: ( a='from' !'import' c=simple_name 'import' d=import_as_names_from { 317 _PyAST_ImportFrom(c->v.Name.id, d, 0, EXTRA) } 318 | a='from' '.' 'import' c=import_as_names_from { 319 _PyAST_ImportFrom(NULL, c, 1, EXTRA) } 320 ) 321 simple_name[expr_ty]: NAME 322 import_as_names_from[asdl_alias_seq*]: a[asdl_alias_seq*]=','.import_as_name_from+ { a } 323 import_as_name_from[alias_ty]: a=NAME 'as' b=NAME { _PyAST_alias(((expr_ty) a)->v.Name.id, ((expr_ty) b)->v.Name.id, EXTRA) } 324 """ 325 test_source = """ 326 for stmt in ("from a import b as c", "from . import a as b"): 327 expected_ast = ast.parse(stmt) 328 actual_ast = parse.parse_string(stmt, mode=1) 329 self.assertEqual(ast_dump(expected_ast), ast_dump(actual_ast)) 330 """ 331 self.run_test(grammar_source, test_source) 332 333 def test_with_stmt_with_paren(self) -> None: 334 grammar_source = """ 335 start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) } 336 statements[asdl_stmt_seq*]: a=statement+ { (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a) } 337 statement[asdl_stmt_seq*]: a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) } 338 compound_stmt[stmt_ty]: with_stmt 339 with_stmt[stmt_ty]: ( 340 a='with' '(' b[asdl_withitem_seq*]=','.with_item+ ')' ':' c=block { 341 _PyAST_With(b, (asdl_stmt_seq*) _PyPegen_singleton_seq(p, c), NULL, EXTRA) } 342 ) 343 with_item[withitem_ty]: ( 344 e=NAME o=['as' t=NAME { t }] { _PyAST_withitem(e, _PyPegen_set_expr_context(p, o, Store), p->arena) } 345 ) 346 block[stmt_ty]: a=pass_stmt NEWLINE { a } | NEWLINE INDENT a=pass_stmt DEDENT { a } 347 pass_stmt[stmt_ty]: a='pass' { _PyAST_Pass(EXTRA) } 348 """ 349 test_source = """ 350 stmt = "with (\\n a as b,\\n c as d\\n): pass" 351 the_ast = parse.parse_string(stmt, mode=1) 352 self.assertTrue(ast_dump(the_ast).startswith( 353 "Module(body=[With(items=[withitem(context_expr=Name(id='a', ctx=Load()), optional_vars=Name(id='b', ctx=Store())), " 354 "withitem(context_expr=Name(id='c', ctx=Load()), optional_vars=Name(id='d', ctx=Store()))]" 355 )) 356 """ 357 self.run_test(grammar_source, test_source) 358 359 def test_ternary_operator(self) -> None: 360 grammar_source = """ 361 start[mod_ty]: a=expr ENDMARKER { _PyAST_Module(a, NULL, p->arena) } 362 expr[asdl_stmt_seq*]: a=listcomp NEWLINE { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, _PyAST_Expr(a, EXTRA)) } 363 listcomp[expr_ty]: ( 364 a='[' b=NAME c=for_if_clauses d=']' { _PyAST_ListComp(b, c, EXTRA) } 365 ) 366 for_if_clauses[asdl_comprehension_seq*]: ( 367 a[asdl_comprehension_seq*]=(y=[ASYNC] 'for' a=NAME 'in' b=NAME c[asdl_expr_seq*]=('if' z=NAME { z })* 368 { _PyAST_comprehension(_PyAST_Name(((expr_ty) a)->v.Name.id, Store, EXTRA), b, c, (y == NULL) ? 0 : 1, p->arena) })+ { a } 369 ) 370 """ 371 test_source = """ 372 stmt = "[i for i in a if b]" 373 self.verify_ast_generation(stmt) 374 """ 375 self.run_test(grammar_source, test_source) 376 377 def test_syntax_error_for_string(self) -> None: 378 grammar_source = """ 379 start: expr+ NEWLINE? ENDMARKER 380 expr: NAME 381 """ 382 test_source = r""" 383 for text in ("a b 42 b a", "\u540d \u540d 42 \u540d \u540d"): 384 try: 385 parse.parse_string(text, mode=0) 386 except SyntaxError as e: 387 tb = traceback.format_exc() 388 self.assertTrue('File "<string>", line 1' in tb) 389 self.assertTrue(f"SyntaxError: invalid syntax" in tb) 390 """ 391 self.run_test(grammar_source, test_source) 392 393 def test_headers_and_trailer(self) -> None: 394 grammar_source = """ 395 @header 'SOME HEADER' 396 @subheader 'SOME SUBHEADER' 397 @trailer 'SOME TRAILER' 398 start: expr+ NEWLINE? ENDMARKER 399 expr: x=NAME 400 """ 401 grammar = parse_string(grammar_source, GrammarParser) 402 parser_source = generate_c_parser_source(grammar) 403 404 self.assertTrue("SOME HEADER" in parser_source) 405 self.assertTrue("SOME SUBHEADER" in parser_source) 406 self.assertTrue("SOME TRAILER" in parser_source) 407 408 def test_error_in_rules(self) -> None: 409 grammar_source = """ 410 start: expr+ NEWLINE? ENDMARKER 411 expr: NAME {PyTuple_New(-1)} 412 """ 413 # PyTuple_New raises SystemError if an invalid argument was passed. 414 test_source = """ 415 with self.assertRaises(SystemError): 416 parse.parse_string("a", mode=0) 417 """ 418 self.run_test(grammar_source, test_source) 419 420 def test_no_soft_keywords(self) -> None: 421 grammar_source = """ 422 start: expr+ NEWLINE? ENDMARKER 423 expr: 'foo' 424 """ 425 grammar = parse_string(grammar_source, GrammarParser) 426 parser_source = generate_c_parser_source(grammar) 427 assert "expect_soft_keyword" not in parser_source 428 429 def test_soft_keywords(self) -> None: 430 grammar_source = """ 431 start: expr+ NEWLINE? ENDMARKER 432 expr: "foo" 433 """ 434 grammar = parse_string(grammar_source, GrammarParser) 435 parser_source = generate_c_parser_source(grammar) 436 assert "expect_soft_keyword" in parser_source 437 438 def test_soft_keywords_parse(self) -> None: 439 grammar_source = """ 440 start: "if" expr '+' expr NEWLINE 441 expr: NAME 442 """ 443 test_source = """ 444 valid_cases = ["if if + if"] 445 invalid_cases = ["if if"] 446 self.check_input_strings_for_grammar(valid_cases, invalid_cases) 447 """ 448 self.run_test(grammar_source, test_source) 449 450 def test_soft_keywords_lookahead(self) -> None: 451 grammar_source = """ 452 start: &"if" "if" expr '+' expr NEWLINE 453 expr: NAME 454 """ 455 test_source = """ 456 valid_cases = ["if if + if"] 457 invalid_cases = ["if if"] 458 self.check_input_strings_for_grammar(valid_cases, invalid_cases) 459 """ 460 self.run_test(grammar_source, test_source) 461