• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import os.path
2import token
3from typing import Any, Dict, Optional, IO, Text, Tuple
4
5from pegen.grammar import (
6    Cut,
7    GrammarVisitor,
8    NameLeaf,
9    StringLeaf,
10    Rhs,
11    NamedItem,
12    Lookahead,
13    PositiveLookahead,
14    NegativeLookahead,
15    Opt,
16    Repeat0,
17    Repeat1,
18    Gather,
19    Group,
20    Rule,
21    Alt,
22)
23from pegen import grammar
24from pegen.parser_generator import ParserGenerator
25
26MODULE_PREFIX = """\
27#!/usr/bin/env python3.8
28# @generated by pegen from {filename}
29
30import ast
31from typing import Optional, Any
32
33from pegen.parser import memoize, memoize_left_rec, logger, Parser
34
35"""
36MODULE_SUFFIX = """
37
38if __name__ == '__main__':
39    from pegen.parser import simple_parser_main
40    simple_parser_main(GeneratedParser)
41"""
42
43
44class PythonCallMakerVisitor(GrammarVisitor):
45    def __init__(self, parser_generator: ParserGenerator):
46        self.gen = parser_generator
47        self.cache: Dict[Any, Any] = {}
48
49    def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:
50        name = node.value
51        if name in ("NAME", "NUMBER", "STRING", "OP"):
52            name = name.lower()
53            return name, f"self.{name}()"
54        if name in ("NEWLINE", "DEDENT", "INDENT", "ENDMARKER", "ASYNC", "AWAIT"):
55            return name.lower(), f"self.expect({name!r})"
56        return name, f"self.{name}()"
57
58    def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
59        return "literal", f"self.expect({node.value})"
60
61    def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
62        if node in self.cache:
63            return self.cache[node]
64        if len(node.alts) == 1 and len(node.alts[0].items) == 1:
65            self.cache[node] = self.visit(node.alts[0].items[0])
66        else:
67            name = self.gen.name_node(node)
68            self.cache[node] = name, f"self.{name}()"
69        return self.cache[node]
70
71    def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]:
72        name, call = self.visit(node.item)
73        if node.name:
74            name = node.name
75        return name, call
76
77    def lookahead_call_helper(self, node: Lookahead) -> Tuple[str, str]:
78        name, call = self.visit(node.node)
79        head, tail = call.split("(", 1)
80        assert tail[-1] == ")"
81        tail = tail[:-1]
82        return head, tail
83
84    def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]:
85        head, tail = self.lookahead_call_helper(node)
86        return None, f"self.positive_lookahead({head}, {tail})"
87
88    def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]:
89        head, tail = self.lookahead_call_helper(node)
90        return None, f"self.negative_lookahead({head}, {tail})"
91
92    def visit_Opt(self, node: Opt) -> Tuple[str, str]:
93        name, call = self.visit(node.node)
94        # Note trailing comma (the call may already have one comma
95        # at the end, for example when rules have both repeat0 and optional
96        # markers, e.g: [rule*])
97        if call.endswith(","):
98            return "opt", call
99        else:
100            return "opt", f"{call},"
101
102    def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
103        if node in self.cache:
104            return self.cache[node]
105        name = self.gen.name_loop(node.node, False)
106        self.cache[node] = name, f"self.{name}(),"  # Also a trailing comma!
107        return self.cache[node]
108
109    def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
110        if node in self.cache:
111            return self.cache[node]
112        name = self.gen.name_loop(node.node, True)
113        self.cache[node] = name, f"self.{name}()"  # But no trailing comma here!
114        return self.cache[node]
115
116    def visit_Gather(self, node: Gather) -> Tuple[str, str]:
117        if node in self.cache:
118            return self.cache[node]
119        name = self.gen.name_gather(node)
120        self.cache[node] = name, f"self.{name}()"  # No trailing comma here either!
121        return self.cache[node]
122
123    def visit_Group(self, node: Group) -> Tuple[Optional[str], str]:
124        return self.visit(node.rhs)
125
126    def visit_Cut(self, node: Cut) -> Tuple[str, str]:
127        return "cut", "True"
128
129
130class PythonParserGenerator(ParserGenerator, GrammarVisitor):
131    def __init__(
132        self,
133        grammar: grammar.Grammar,
134        file: Optional[IO[Text]],
135        tokens: Dict[int, str] = token.tok_name,
136    ):
137        super().__init__(grammar, tokens, file)
138        self.callmakervisitor = PythonCallMakerVisitor(self)
139
140    def generate(self, filename: str) -> None:
141        header = self.grammar.metas.get("header", MODULE_PREFIX)
142        if header is not None:
143            basename = os.path.basename(filename)
144            self.print(header.rstrip("\n").format(filename=basename))
145        subheader = self.grammar.metas.get("subheader", "")
146        if subheader:
147            self.print(subheader.format(filename=filename))
148        self.print("class GeneratedParser(Parser):")
149        while self.todo:
150            for rulename, rule in list(self.todo.items()):
151                del self.todo[rulename]
152                self.print()
153                with self.indent():
154                    self.visit(rule)
155        trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX)
156        if trailer is not None:
157            self.print(trailer.rstrip("\n"))
158
159    def visit_Rule(self, node: Rule) -> None:
160        is_loop = node.is_loop()
161        is_gather = node.is_gather()
162        rhs = node.flatten()
163        if node.left_recursive:
164            if node.leader:
165                self.print("@memoize_left_rec")
166            else:
167                # Non-leader rules in a cycle are not memoized,
168                # but they must still be logged.
169                self.print("@logger")
170        else:
171            self.print("@memoize")
172        node_type = node.type or "Any"
173        self.print(f"def {node.name}(self) -> Optional[{node_type}]:")
174        with self.indent():
175            self.print(f"# {node.name}: {rhs}")
176            if node.nullable:
177                self.print(f"# nullable={node.nullable}")
178            self.print("mark = self.mark()")
179            if is_loop:
180                self.print("children = []")
181            self.visit(rhs, is_loop=is_loop, is_gather=is_gather)
182            if is_loop:
183                self.print("return children")
184            else:
185                self.print("return None")
186
187    def visit_NamedItem(self, node: NamedItem) -> None:
188        name, call = self.callmakervisitor.visit(node.item)
189        if node.name:
190            name = node.name
191        if not name:
192            self.print(call)
193        else:
194            if name != "cut":
195                name = self.dedupe(name)
196            self.print(f"({name} := {call})")
197
198    def visit_Rhs(self, node: Rhs, is_loop: bool = False, is_gather: bool = False) -> None:
199        if is_loop:
200            assert len(node.alts) == 1
201        for alt in node.alts:
202            self.visit(alt, is_loop=is_loop, is_gather=is_gather)
203
204    def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:
205        with self.local_variable_context():
206            self.print("cut = False")  # TODO: Only if needed.
207            if is_loop:
208                self.print("while (")
209            else:
210                self.print("if (")
211            with self.indent():
212                first = True
213                for item in node.items:
214                    if first:
215                        first = False
216                    else:
217                        self.print("and")
218                    self.visit(item)
219                    if is_gather:
220                        self.print("is not None")
221
222            self.print("):")
223            with self.indent():
224                action = node.action
225                if not action:
226                    if is_gather:
227                        assert len(self.local_variable_names) == 2
228                        action = (
229                            f"[{self.local_variable_names[0]}] + {self.local_variable_names[1]}"
230                        )
231                    else:
232                        action = f"[{', '.join(self.local_variable_names)}]"
233                if is_loop:
234                    self.print(f"children.append({action})")
235                    self.print(f"mark = self.mark()")
236                else:
237                    self.print(f"return {action}")
238            self.print("self.reset(mark)")
239            # Skip remaining alternatives if a cut was reached.
240            self.print("if cut: return None")  # TODO: Only if needed.
241