• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1import collections
2
3
4class Grammar:
5    """Pgen parsing tables class.
6
7    The instance variables are as follows:
8
9    symbol2number -- a dict mapping symbol names to numbers.  Symbol
10                     numbers are always 256 or higher, to distinguish
11                     them from token numbers, which are between 0 and
12                     255 (inclusive).
13
14    number2symbol -- a dict mapping numbers to symbol names;
15                     these two are each other's inverse.
16
17    states        -- a list of DFAs, where each DFA is a list of
18                     states, each state is a list of arcs, and each
19                     arc is a (i, j) pair where i is a label and j is
20                     a state number.  The DFA number is the index into
21                     this list.  (This name is slightly confusing.)
22                     Final states are represented by a special arc of
23                     the form (0, j) where j is its own state number.
24
25    dfas          -- a dict mapping symbol numbers to (DFA, first)
26                     pairs, where DFA is an item from the states list
27                     above, and first is a set of tokens that can
28                     begin this grammar rule.
29
30    labels        -- a list of (x, y) pairs where x is either a token
31                     number or a symbol number, and y is either None
32                     or a string; the strings are keywords.  The label
33                     number is the index in this list; label numbers
34                     are used to mark state transitions (arcs) in the
35                     DFAs.
36
37    start         -- the number of the grammar's start symbol.
38
39    keywords      -- a dict mapping keyword strings to arc labels.
40
41    tokens        -- a dict mapping token numbers to arc labels.
42
43    """
44
45    def __init__(self):
46        self.symbol2number = collections.OrderedDict()
47        self.number2symbol = collections.OrderedDict()
48        self.states = []
49        self.dfas = collections.OrderedDict()
50        self.labels = [(0, "EMPTY")]
51        self.keywords = collections.OrderedDict()
52        self.tokens = collections.OrderedDict()
53        self.symbol2label = collections.OrderedDict()
54        self.start = 256
55
56    def produce_graminit_h(self, writer):
57        writer("/* Generated by Parser/pgen */\n\n")
58        for number, symbol in self.number2symbol.items():
59            writer("#define {} {}\n".format(symbol, number))
60
61    def produce_graminit_c(self, writer):
62        writer("/* Generated by Parser/pgen */\n\n")
63
64        writer('#include "exports.h"\n')
65        writer('#include "grammar.h"\n')
66        writer("Py_EXPORTED_SYMBOL grammar _PyParser_Grammar;\n")
67
68        self.print_dfas(writer)
69        self.print_labels(writer)
70
71        writer("Py_EXPORTED_SYMBOL grammar _PyParser_Grammar = {\n")
72        writer("    {n_dfas},\n".format(n_dfas=len(self.dfas)))
73        writer("    dfas,\n")
74        writer("    {{{n_labels}, labels}},\n".format(n_labels=len(self.labels)))
75        writer("    {start_number}\n".format(start_number=self.start))
76        writer("};\n")
77
78    def print_labels(self, writer):
79        writer(
80            "static const label labels[{n_labels}] = {{\n".format(
81                n_labels=len(self.labels)
82            )
83        )
84        for label, name in self.labels:
85            label_name = '"{}"'.format(name) if name is not None else 0
86            writer(
87                "    {{{label}, {label_name}}},\n".format(
88                    label=label, label_name=label_name
89                )
90            )
91        writer("};\n")
92
93    def print_dfas(self, writer):
94        self.print_states(writer)
95        writer("static const dfa dfas[{}] = {{\n".format(len(self.dfas)))
96        for dfaindex, dfa_elem in enumerate(self.dfas.items()):
97            symbol, (dfa, first_sets) = dfa_elem
98            writer(
99                '    {{{dfa_symbol}, "{symbol_name}", '.format(
100                    dfa_symbol=symbol, symbol_name=self.number2symbol[symbol]
101                )
102                + "{n_states}, states_{dfa_index},\n".format(
103                    n_states=len(dfa), dfa_index=dfaindex
104                )
105                + '     "'
106            )
107
108            bitset = bytearray((len(self.labels) >> 3) + 1)
109            for token in first_sets:
110                bitset[token >> 3] |= 1 << (token & 7)
111            for byte in bitset:
112                writer("\\%03o" % (byte & 0xFF))
113            writer('"},\n')
114        writer("};\n")
115
116    def print_states(self, write):
117        for dfaindex, dfa in enumerate(self.states):
118            self.print_arcs(write, dfaindex, dfa)
119            write(
120                "static state states_{dfa_index}[{n_states}] = {{\n".format(
121                    dfa_index=dfaindex, n_states=len(dfa)
122                )
123            )
124            for stateindex, state in enumerate(dfa):
125                narcs = len(state)
126                write(
127                    "    {{{n_arcs}, arcs_{dfa_index}_{state_index}}},\n".format(
128                        n_arcs=narcs, dfa_index=dfaindex, state_index=stateindex
129                    )
130                )
131            write("};\n")
132
133    def print_arcs(self, write, dfaindex, states):
134        for stateindex, state in enumerate(states):
135            narcs = len(state)
136            write(
137                "static const arc arcs_{dfa_index}_{state_index}[{n_arcs}] = {{\n".format(
138                    dfa_index=dfaindex, state_index=stateindex, n_arcs=narcs
139                )
140            )
141            for a, b in state:
142                write(
143                    "    {{{from_label}, {to_state}}},\n".format(
144                        from_label=a, to_state=b
145                    )
146                )
147            write("};\n")
148