• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1#!/usr/bin/env python3.8
2
3import argparse
4import pprint
5import sys
6from typing import Set, Dict
7
8from pegen.build import build_parser
9from pegen.grammar import (
10    Alt,
11    Cut,
12    Gather,
13    Grammar,
14    GrammarVisitor,
15    Group,
16    Leaf,
17    Lookahead,
18    NamedItem,
19    NameLeaf,
20    NegativeLookahead,
21    Opt,
22    Repeat,
23    Repeat0,
24    Repeat1,
25    Rhs,
26    Rule,
27    StringLeaf,
28    PositiveLookahead,
29)
30
31argparser = argparse.ArgumentParser(
32    prog="calculate_first_sets", description="Calculate the first sets of a grammar",
33)
34argparser.add_argument("grammar_file", help="The grammar file")
35
36
37class FirstSetCalculator(GrammarVisitor):
38    def __init__(self, rules: Dict[str, Rule]) -> None:
39        self.rules = rules
40        for rule in rules.values():
41            rule.nullable_visit(rules)
42        self.first_sets: Dict[str, Set[str]] = dict()
43        self.in_process: Set[str] = set()
44
45    def calculate(self) -> Dict[str, Set[str]]:
46        for name, rule in self.rules.items():
47            self.visit(rule)
48        return self.first_sets
49
50    def visit_Alt(self, item: Alt) -> Set[str]:
51        result: Set[str] = set()
52        to_remove: Set[str] = set()
53        for other in item.items:
54            new_terminals = self.visit(other)
55            if isinstance(other.item, NegativeLookahead):
56                to_remove |= new_terminals
57            result |= new_terminals
58            if to_remove:
59                result -= to_remove
60
61            # If the set of new terminals can start with the empty string,
62            # it means that the item is completely nullable and we should
63            # also considering at least the next item in case the current
64            # one fails to parse.
65
66            if "" in new_terminals:
67                continue
68
69            if not isinstance(other.item, (Opt, NegativeLookahead, Repeat0)):
70                break
71
72        # Do not allow the empty string to propagate.
73        result.discard("")
74
75        return result
76
77    def visit_Cut(self, item: Cut) -> Set[str]:
78        return set()
79
80    def visit_Group(self, item: Group) -> Set[str]:
81        return self.visit(item.rhs)
82
83    def visit_PositiveLookahead(self, item: Lookahead) -> Set[str]:
84        return self.visit(item.node)
85
86    def visit_NegativeLookahead(self, item: NegativeLookahead) -> Set[str]:
87        return self.visit(item.node)
88
89    def visit_NamedItem(self, item: NamedItem) -> Set[str]:
90        return self.visit(item.item)
91
92    def visit_Opt(self, item: Opt) -> Set[str]:
93        return self.visit(item.node)
94
95    def visit_Gather(self, item: Gather) -> Set[str]:
96        return self.visit(item.node)
97
98    def visit_Repeat0(self, item: Repeat0) -> Set[str]:
99        return self.visit(item.node)
100
101    def visit_Repeat1(self, item: Repeat1) -> Set[str]:
102        return self.visit(item.node)
103
104    def visit_NameLeaf(self, item: NameLeaf) -> Set[str]:
105        if item.value not in self.rules:
106            return {item.value}
107
108        if item.value not in self.first_sets:
109            self.first_sets[item.value] = self.visit(self.rules[item.value])
110            return self.first_sets[item.value]
111        elif item.value in self.in_process:
112            return set()
113
114        return self.first_sets[item.value]
115
116    def visit_StringLeaf(self, item: StringLeaf) -> Set[str]:
117        return {item.value}
118
119    def visit_Rhs(self, item: Rhs) -> Set[str]:
120        result: Set[str] = set()
121        for alt in item.alts:
122            result |= self.visit(alt)
123        return result
124
125    def visit_Rule(self, item: Rule) -> Set[str]:
126        if item.name in self.in_process:
127            return set()
128        elif item.name not in self.first_sets:
129            self.in_process.add(item.name)
130            terminals = self.visit(item.rhs)
131            if item.nullable:
132                terminals.add("")
133            self.first_sets[item.name] = terminals
134            self.in_process.remove(item.name)
135        return self.first_sets[item.name]
136
137
138def main() -> None:
139    args = argparser.parse_args()
140
141    try:
142        grammar, parser, tokenizer = build_parser(args.grammar_file)
143    except Exception as err:
144        print("ERROR: Failed to parse grammar file", file=sys.stderr)
145        sys.exit(1)
146
147    firs_sets = FirstSetCalculator(grammar.rules).calculate()
148    pprint.pprint(firs_sets)
149
150
151if __name__ == "__main__":
152    main()
153