1#!/usr/bin/env python3.8 2 3import argparse 4import pprint 5import sys 6from typing import Set, Dict 7 8from pegen.build import build_parser 9from pegen.grammar import ( 10 Alt, 11 Cut, 12 Gather, 13 Grammar, 14 GrammarVisitor, 15 Group, 16 Leaf, 17 Lookahead, 18 NamedItem, 19 NameLeaf, 20 NegativeLookahead, 21 Opt, 22 Repeat, 23 Repeat0, 24 Repeat1, 25 Rhs, 26 Rule, 27 StringLeaf, 28 PositiveLookahead, 29) 30 31argparser = argparse.ArgumentParser( 32 prog="calculate_first_sets", description="Calculate the first sets of a grammar", 33) 34argparser.add_argument("grammar_file", help="The grammar file") 35 36 37class FirstSetCalculator(GrammarVisitor): 38 def __init__(self, rules: Dict[str, Rule]) -> None: 39 self.rules = rules 40 for rule in rules.values(): 41 rule.nullable_visit(rules) 42 self.first_sets: Dict[str, Set[str]] = dict() 43 self.in_process: Set[str] = set() 44 45 def calculate(self) -> Dict[str, Set[str]]: 46 for name, rule in self.rules.items(): 47 self.visit(rule) 48 return self.first_sets 49 50 def visit_Alt(self, item: Alt) -> Set[str]: 51 result: Set[str] = set() 52 to_remove: Set[str] = set() 53 for other in item.items: 54 new_terminals = self.visit(other) 55 if isinstance(other.item, NegativeLookahead): 56 to_remove |= new_terminals 57 result |= new_terminals 58 if to_remove: 59 result -= to_remove 60 61 # If the set of new terminals can start with the empty string, 62 # it means that the item is completely nullable and we should 63 # also considering at least the next item in case the current 64 # one fails to parse. 65 66 if "" in new_terminals: 67 continue 68 69 if not isinstance(other.item, (Opt, NegativeLookahead, Repeat0)): 70 break 71 72 # Do not allow the empty string to propagate. 73 result.discard("") 74 75 return result 76 77 def visit_Cut(self, item: Cut) -> Set[str]: 78 return set() 79 80 def visit_Group(self, item: Group) -> Set[str]: 81 return self.visit(item.rhs) 82 83 def visit_PositiveLookahead(self, item: Lookahead) -> Set[str]: 84 return self.visit(item.node) 85 86 def visit_NegativeLookahead(self, item: NegativeLookahead) -> Set[str]: 87 return self.visit(item.node) 88 89 def visit_NamedItem(self, item: NamedItem) -> Set[str]: 90 return self.visit(item.item) 91 92 def visit_Opt(self, item: Opt) -> Set[str]: 93 return self.visit(item.node) 94 95 def visit_Gather(self, item: Gather) -> Set[str]: 96 return self.visit(item.node) 97 98 def visit_Repeat0(self, item: Repeat0) -> Set[str]: 99 return self.visit(item.node) 100 101 def visit_Repeat1(self, item: Repeat1) -> Set[str]: 102 return self.visit(item.node) 103 104 def visit_NameLeaf(self, item: NameLeaf) -> Set[str]: 105 if item.value not in self.rules: 106 return {item.value} 107 108 if item.value not in self.first_sets: 109 self.first_sets[item.value] = self.visit(self.rules[item.value]) 110 return self.first_sets[item.value] 111 elif item.value in self.in_process: 112 return set() 113 114 return self.first_sets[item.value] 115 116 def visit_StringLeaf(self, item: StringLeaf) -> Set[str]: 117 return {item.value} 118 119 def visit_Rhs(self, item: Rhs) -> Set[str]: 120 result: Set[str] = set() 121 for alt in item.alts: 122 result |= self.visit(alt) 123 return result 124 125 def visit_Rule(self, item: Rule) -> Set[str]: 126 if item.name in self.in_process: 127 return set() 128 elif item.name not in self.first_sets: 129 self.in_process.add(item.name) 130 terminals = self.visit(item.rhs) 131 if item.nullable: 132 terminals.add("") 133 self.first_sets[item.name] = terminals 134 self.in_process.remove(item.name) 135 return self.first_sets[item.name] 136 137 138def main() -> None: 139 args = argparser.parse_args() 140 141 try: 142 grammar, parser, tokenizer = build_parser(args.grammar_file) 143 except Exception as err: 144 print("ERROR: Failed to parse grammar file", file=sys.stderr) 145 sys.exit(1) 146 147 firs_sets = FirstSetCalculator(grammar.rules).calculate() 148 pprint.pprint(firs_sets) 149 150 151if __name__ == "__main__": 152 main() 153