• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import sysconfig
2import textwrap
3import unittest
4import os
5import shutil
6import tempfile
7from pathlib import Path
8
9from test import test_tools
10from test import support
11from test.support import os_helper
12from test.support.script_helper import assert_python_ok
13
14_py_cflags_nodist = sysconfig.get_config_var('PY_CFLAGS_NODIST')
15_pgo_flag = sysconfig.get_config_var('PGO_PROF_USE_FLAG')
16if _pgo_flag and _py_cflags_nodist and _pgo_flag in _py_cflags_nodist:
17    raise unittest.SkipTest("peg_generator test disabled under PGO build")
18
19test_tools.skip_if_missing("peg_generator")
20with test_tools.imports_under_tool("peg_generator"):
21    from pegen.grammar_parser import GeneratedParser as GrammarParser
22    from pegen.testutil import (
23        parse_string,
24        generate_parser_c_extension,
25        generate_c_parser_source,
26    )
27    from pegen.ast_dump import ast_dump
28
29
30TEST_TEMPLATE = """
31tmp_dir = {extension_path!r}
32
33import ast
34import traceback
35import sys
36import unittest
37
38from test import test_tools
39with test_tools.imports_under_tool("peg_generator"):
40    from pegen.ast_dump import ast_dump
41
42sys.path.insert(0, tmp_dir)
43import parse
44
45class Tests(unittest.TestCase):
46
47    def check_input_strings_for_grammar(
48        self,
49        valid_cases = (),
50        invalid_cases = (),
51    ):
52        if valid_cases:
53            for case in valid_cases:
54                parse.parse_string(case, mode=0)
55
56        if invalid_cases:
57            for case in invalid_cases:
58                with self.assertRaises(SyntaxError):
59                    parse.parse_string(case, mode=0)
60
61    def verify_ast_generation(self, stmt):
62        expected_ast = ast.parse(stmt)
63        actual_ast = parse.parse_string(stmt, mode=1)
64        self.assertEqual(ast_dump(expected_ast), ast_dump(actual_ast))
65
66    def test_parse(self):
67        {test_source}
68
69unittest.main()
70"""
71
72
73class TestCParser(unittest.TestCase):
74    def setUp(self):
75        self._backup_config_vars = dict(sysconfig._CONFIG_VARS)
76        cmd = support.missing_compiler_executable()
77        if cmd is not None:
78            self.skipTest("The %r command is not found" % cmd)
79        self.old_cwd = os.getcwd()
80        self.tmp_path = tempfile.mkdtemp()
81        change_cwd = os_helper.change_cwd(self.tmp_path)
82        change_cwd.__enter__()
83        self.addCleanup(change_cwd.__exit__, None, None, None)
84
85    def tearDown(self):
86        os.chdir(self.old_cwd)
87        shutil.rmtree(self.tmp_path)
88        sysconfig._CONFIG_VARS.clear()
89        sysconfig._CONFIG_VARS.update(self._backup_config_vars)
90
91    def build_extension(self, grammar_source):
92        grammar = parse_string(grammar_source, GrammarParser)
93        generate_parser_c_extension(grammar, Path(self.tmp_path))
94
95    def run_test(self, grammar_source, test_source):
96        self.build_extension(grammar_source)
97        test_source = textwrap.indent(textwrap.dedent(test_source), 8 * " ")
98        assert_python_ok(
99            "-c",
100            TEST_TEMPLATE.format(extension_path=self.tmp_path, test_source=test_source),
101        )
102
103    def test_c_parser(self) -> None:
104        grammar_source = """
105        start[mod_ty]: a[asdl_stmt_seq*]=stmt* $ { _PyAST_Module(a, NULL, p->arena) }
106        stmt[stmt_ty]: a=expr_stmt { a }
107        expr_stmt[stmt_ty]: a=expression NEWLINE { _PyAST_Expr(a, EXTRA) }
108        expression[expr_ty]: ( l=expression '+' r=term { _PyAST_BinOp(l, Add, r, EXTRA) }
109                            | l=expression '-' r=term { _PyAST_BinOp(l, Sub, r, EXTRA) }
110                            | t=term { t }
111                            )
112        term[expr_ty]: ( l=term '*' r=factor { _PyAST_BinOp(l, Mult, r, EXTRA) }
113                    | l=term '/' r=factor { _PyAST_BinOp(l, Div, r, EXTRA) }
114                    | f=factor { f }
115                    )
116        factor[expr_ty]: ('(' e=expression ')' { e }
117                        | a=atom { a }
118                        )
119        atom[expr_ty]: ( n=NAME { n }
120                    | n=NUMBER { n }
121                    | s=STRING { s }
122                    )
123        """
124        test_source = """
125        expressions = [
126            "4+5",
127            "4-5",
128            "4*5",
129            "1+4*5",
130            "1+4/5",
131            "(1+1) + (1+1)",
132            "(1+1) - (1+1)",
133            "(1+1) * (1+1)",
134            "(1+1) / (1+1)",
135        ]
136
137        for expr in expressions:
138            the_ast = parse.parse_string(expr, mode=1)
139            expected_ast = ast.parse(expr)
140            self.assertEqual(ast_dump(the_ast), ast_dump(expected_ast))
141        """
142        self.run_test(grammar_source, test_source)
143
144    def test_lookahead(self) -> None:
145        grammar_source = """
146        start: NAME &NAME expr NEWLINE? ENDMARKER
147        expr: NAME | NUMBER
148        """
149        test_source = """
150        valid_cases = ["foo bar"]
151        invalid_cases = ["foo 34"]
152        self.check_input_strings_for_grammar(valid_cases, invalid_cases)
153        """
154        self.run_test(grammar_source, test_source)
155
156    def test_negative_lookahead(self) -> None:
157        grammar_source = """
158        start: NAME !NAME expr NEWLINE? ENDMARKER
159        expr: NAME | NUMBER
160        """
161        test_source = """
162        valid_cases = ["foo 34"]
163        invalid_cases = ["foo bar"]
164        self.check_input_strings_for_grammar(valid_cases, invalid_cases)
165        """
166        self.run_test(grammar_source, test_source)
167
168    def test_cut(self) -> None:
169        grammar_source = """
170        start: X ~ Y Z | X Q S
171        X: 'x'
172        Y: 'y'
173        Z: 'z'
174        Q: 'q'
175        S: 's'
176        """
177        test_source = """
178        valid_cases = ["x y z"]
179        invalid_cases = ["x q s"]
180        self.check_input_strings_for_grammar(valid_cases, invalid_cases)
181        """
182        self.run_test(grammar_source, test_source)
183
184    def test_gather(self) -> None:
185        grammar_source = """
186        start: ';'.pass_stmt+ NEWLINE
187        pass_stmt: 'pass'
188        """
189        test_source = """
190        valid_cases = ["pass", "pass; pass"]
191        invalid_cases = ["pass;", "pass; pass;"]
192        self.check_input_strings_for_grammar(valid_cases, invalid_cases)
193        """
194        self.run_test(grammar_source, test_source)
195
196    def test_left_recursion(self) -> None:
197        grammar_source = """
198        start: expr NEWLINE
199        expr: ('-' term | expr '+' term | term)
200        term: NUMBER
201        """
202        test_source = """
203        valid_cases = ["-34", "34", "34 + 12", "1 + 1 + 2 + 3"]
204        self.check_input_strings_for_grammar(valid_cases)
205        """
206        self.run_test(grammar_source, test_source)
207
208    def test_advanced_left_recursive(self) -> None:
209        grammar_source = """
210        start: NUMBER | sign start
211        sign: ['-']
212        """
213        test_source = """
214        valid_cases = ["23", "-34"]
215        self.check_input_strings_for_grammar(valid_cases)
216        """
217        self.run_test(grammar_source, test_source)
218
219    def test_mutually_left_recursive(self) -> None:
220        grammar_source = """
221        start: foo 'E'
222        foo: bar 'A' | 'B'
223        bar: foo 'C' | 'D'
224        """
225        test_source = """
226        valid_cases = ["B E", "D A C A E"]
227        self.check_input_strings_for_grammar(valid_cases)
228        """
229        self.run_test(grammar_source, test_source)
230
231    def test_nasty_mutually_left_recursive(self) -> None:
232        grammar_source = """
233        start: target '='
234        target: maybe '+' | NAME
235        maybe: maybe '-' | target
236        """
237        test_source = """
238        valid_cases = ["x ="]
239        invalid_cases = ["x - + ="]
240        self.check_input_strings_for_grammar(valid_cases, invalid_cases)
241        """
242        self.run_test(grammar_source, test_source)
243
244    def test_return_stmt_noexpr_action(self) -> None:
245        grammar_source = """
246        start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
247        statements[asdl_stmt_seq*]: a[asdl_stmt_seq*]=statement+ { a }
248        statement[stmt_ty]: simple_stmt
249        simple_stmt[stmt_ty]: small_stmt
250        small_stmt[stmt_ty]: return_stmt
251        return_stmt[stmt_ty]: a='return' NEWLINE { _PyAST_Return(NULL, EXTRA) }
252        """
253        test_source = """
254        stmt = "return"
255        self.verify_ast_generation(stmt)
256        """
257        self.run_test(grammar_source, test_source)
258
259    def test_gather_action_ast(self) -> None:
260        grammar_source = """
261        start[mod_ty]: a[asdl_stmt_seq*]=';'.pass_stmt+ NEWLINE ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
262        pass_stmt[stmt_ty]: a='pass' { _PyAST_Pass(EXTRA)}
263        """
264        test_source = """
265        stmt = "pass; pass"
266        self.verify_ast_generation(stmt)
267        """
268        self.run_test(grammar_source, test_source)
269
270    def test_pass_stmt_action(self) -> None:
271        grammar_source = """
272        start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
273        statements[asdl_stmt_seq*]: a[asdl_stmt_seq*]=statement+ { a }
274        statement[stmt_ty]: simple_stmt
275        simple_stmt[stmt_ty]: small_stmt
276        small_stmt[stmt_ty]: pass_stmt
277        pass_stmt[stmt_ty]: a='pass' NEWLINE { _PyAST_Pass(EXTRA) }
278        """
279        test_source = """
280        stmt = "pass"
281        self.verify_ast_generation(stmt)
282        """
283        self.run_test(grammar_source, test_source)
284
285    def test_if_stmt_action(self) -> None:
286        grammar_source = """
287        start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
288        statements[asdl_stmt_seq*]: a=statement+ { (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a) }
289        statement[asdl_stmt_seq*]:  a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) } | simple_stmt
290
291        simple_stmt[asdl_stmt_seq*]: a=small_stmt b=further_small_stmt* [';'] NEWLINE {
292                                            (asdl_stmt_seq*)_PyPegen_seq_insert_in_front(p, a, b) }
293        further_small_stmt[stmt_ty]: ';' a=small_stmt { a }
294
295        block: simple_stmt | NEWLINE INDENT a=statements DEDENT { a }
296
297        compound_stmt: if_stmt
298
299        if_stmt: 'if' a=full_expression ':' b=block { _PyAST_If(a, b, NULL, EXTRA) }
300
301        small_stmt[stmt_ty]: pass_stmt
302
303        pass_stmt[stmt_ty]: a='pass' { _PyAST_Pass(EXTRA) }
304
305        full_expression: NAME
306        """
307        test_source = """
308        stmt = "pass"
309        self.verify_ast_generation(stmt)
310        """
311        self.run_test(grammar_source, test_source)
312
313    def test_same_name_different_types(self) -> None:
314        grammar_source = """
315        start[mod_ty]: a[asdl_stmt_seq*]=import_from+ NEWLINE ENDMARKER { _PyAST_Module(a, NULL, p->arena)}
316        import_from[stmt_ty]: ( a='from' !'import' c=simple_name 'import' d=import_as_names_from {
317                                _PyAST_ImportFrom(c->v.Name.id, d, 0, EXTRA) }
318                            | a='from' '.' 'import' c=import_as_names_from {
319                                _PyAST_ImportFrom(NULL, c, 1, EXTRA) }
320                            )
321        simple_name[expr_ty]: NAME
322        import_as_names_from[asdl_alias_seq*]: a[asdl_alias_seq*]=','.import_as_name_from+ { a }
323        import_as_name_from[alias_ty]: a=NAME 'as' b=NAME { _PyAST_alias(((expr_ty) a)->v.Name.id, ((expr_ty) b)->v.Name.id, EXTRA) }
324        """
325        test_source = """
326        for stmt in ("from a import b as c", "from . import a as b"):
327            expected_ast = ast.parse(stmt)
328            actual_ast = parse.parse_string(stmt, mode=1)
329            self.assertEqual(ast_dump(expected_ast), ast_dump(actual_ast))
330        """
331        self.run_test(grammar_source, test_source)
332
333    def test_with_stmt_with_paren(self) -> None:
334        grammar_source = """
335        start[mod_ty]: a=[statements] ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
336        statements[asdl_stmt_seq*]: a=statement+ { (asdl_stmt_seq*)_PyPegen_seq_flatten(p, a) }
337        statement[asdl_stmt_seq*]: a=compound_stmt { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, a) }
338        compound_stmt[stmt_ty]: with_stmt
339        with_stmt[stmt_ty]: (
340            a='with' '(' b[asdl_withitem_seq*]=','.with_item+ ')' ':' c=block {
341                _PyAST_With(b, (asdl_stmt_seq*) _PyPegen_singleton_seq(p, c), NULL, EXTRA) }
342        )
343        with_item[withitem_ty]: (
344            e=NAME o=['as' t=NAME { t }] { _PyAST_withitem(e, _PyPegen_set_expr_context(p, o, Store), p->arena) }
345        )
346        block[stmt_ty]: a=pass_stmt NEWLINE { a } | NEWLINE INDENT a=pass_stmt DEDENT { a }
347        pass_stmt[stmt_ty]: a='pass' { _PyAST_Pass(EXTRA) }
348        """
349        test_source = """
350        stmt = "with (\\n    a as b,\\n    c as d\\n): pass"
351        the_ast = parse.parse_string(stmt, mode=1)
352        self.assertTrue(ast_dump(the_ast).startswith(
353            "Module(body=[With(items=[withitem(context_expr=Name(id='a', ctx=Load()), optional_vars=Name(id='b', ctx=Store())), "
354            "withitem(context_expr=Name(id='c', ctx=Load()), optional_vars=Name(id='d', ctx=Store()))]"
355        ))
356        """
357        self.run_test(grammar_source, test_source)
358
359    def test_ternary_operator(self) -> None:
360        grammar_source = """
361        start[mod_ty]: a=expr ENDMARKER { _PyAST_Module(a, NULL, p->arena) }
362        expr[asdl_stmt_seq*]: a=listcomp NEWLINE { (asdl_stmt_seq*)_PyPegen_singleton_seq(p, _PyAST_Expr(a, EXTRA)) }
363        listcomp[expr_ty]: (
364            a='[' b=NAME c=for_if_clauses d=']' { _PyAST_ListComp(b, c, EXTRA) }
365        )
366        for_if_clauses[asdl_comprehension_seq*]: (
367            a[asdl_comprehension_seq*]=(y=[ASYNC] 'for' a=NAME 'in' b=NAME c[asdl_expr_seq*]=('if' z=NAME { z })*
368                { _PyAST_comprehension(_PyAST_Name(((expr_ty) a)->v.Name.id, Store, EXTRA), b, c, (y == NULL) ? 0 : 1, p->arena) })+ { a }
369        )
370        """
371        test_source = """
372        stmt = "[i for i in a if b]"
373        self.verify_ast_generation(stmt)
374        """
375        self.run_test(grammar_source, test_source)
376
377    def test_syntax_error_for_string(self) -> None:
378        grammar_source = """
379        start: expr+ NEWLINE? ENDMARKER
380        expr: NAME
381        """
382        test_source = r"""
383        for text in ("a b 42 b a", "\u540d \u540d 42 \u540d \u540d"):
384            try:
385                parse.parse_string(text, mode=0)
386            except SyntaxError as e:
387                tb = traceback.format_exc()
388            self.assertTrue('File "<string>", line 1' in tb)
389            self.assertTrue(f"SyntaxError: invalid syntax" in tb)
390        """
391        self.run_test(grammar_source, test_source)
392
393    def test_headers_and_trailer(self) -> None:
394        grammar_source = """
395        @header 'SOME HEADER'
396        @subheader 'SOME SUBHEADER'
397        @trailer 'SOME TRAILER'
398        start: expr+ NEWLINE? ENDMARKER
399        expr: x=NAME
400        """
401        grammar = parse_string(grammar_source, GrammarParser)
402        parser_source = generate_c_parser_source(grammar)
403
404        self.assertTrue("SOME HEADER" in parser_source)
405        self.assertTrue("SOME SUBHEADER" in parser_source)
406        self.assertTrue("SOME TRAILER" in parser_source)
407
408    def test_error_in_rules(self) -> None:
409        grammar_source = """
410        start: expr+ NEWLINE? ENDMARKER
411        expr: NAME {PyTuple_New(-1)}
412        """
413        # PyTuple_New raises SystemError if an invalid argument was passed.
414        test_source = """
415        with self.assertRaises(SystemError):
416            parse.parse_string("a", mode=0)
417        """
418        self.run_test(grammar_source, test_source)
419
420    def test_no_soft_keywords(self) -> None:
421        grammar_source = """
422        start: expr+ NEWLINE? ENDMARKER
423        expr: 'foo'
424        """
425        grammar = parse_string(grammar_source, GrammarParser)
426        parser_source = generate_c_parser_source(grammar)
427        assert "expect_soft_keyword" not in parser_source
428
429    def test_soft_keywords(self) -> None:
430        grammar_source = """
431        start: expr+ NEWLINE? ENDMARKER
432        expr: "foo"
433        """
434        grammar = parse_string(grammar_source, GrammarParser)
435        parser_source = generate_c_parser_source(grammar)
436        assert "expect_soft_keyword" in parser_source
437
438    def test_soft_keywords_parse(self) -> None:
439        grammar_source = """
440        start: "if" expr '+' expr NEWLINE
441        expr: NAME
442        """
443        test_source = """
444        valid_cases = ["if if + if"]
445        invalid_cases = ["if if"]
446        self.check_input_strings_for_grammar(valid_cases, invalid_cases)
447        """
448        self.run_test(grammar_source, test_source)
449
450    def test_soft_keywords_lookahead(self) -> None:
451        grammar_source = """
452        start: &"if" "if" expr '+' expr NEWLINE
453        expr: NAME
454        """
455        test_source = """
456        valid_cases = ["if if + if"]
457        invalid_cases = ["if if"]
458        self.check_input_strings_for_grammar(valid_cases, invalid_cases)
459        """
460        self.run_test(grammar_source, test_source)
461