1import collections 2 3 4class Grammar: 5 """Pgen parsing tables class. 6 7 The instance variables are as follows: 8 9 symbol2number -- a dict mapping symbol names to numbers. Symbol 10 numbers are always 256 or higher, to distinguish 11 them from token numbers, which are between 0 and 12 255 (inclusive). 13 14 number2symbol -- a dict mapping numbers to symbol names; 15 these two are each other's inverse. 16 17 states -- a list of DFAs, where each DFA is a list of 18 states, each state is a list of arcs, and each 19 arc is a (i, j) pair where i is a label and j is 20 a state number. The DFA number is the index into 21 this list. (This name is slightly confusing.) 22 Final states are represented by a special arc of 23 the form (0, j) where j is its own state number. 24 25 dfas -- a dict mapping symbol numbers to (DFA, first) 26 pairs, where DFA is an item from the states list 27 above, and first is a set of tokens that can 28 begin this grammar rule. 29 30 labels -- a list of (x, y) pairs where x is either a token 31 number or a symbol number, and y is either None 32 or a string; the strings are keywords. The label 33 number is the index in this list; label numbers 34 are used to mark state transitions (arcs) in the 35 DFAs. 36 37 start -- the number of the grammar's start symbol. 38 39 keywords -- a dict mapping keyword strings to arc labels. 40 41 tokens -- a dict mapping token numbers to arc labels. 42 43 """ 44 45 def __init__(self): 46 self.symbol2number = collections.OrderedDict() 47 self.number2symbol = collections.OrderedDict() 48 self.states = [] 49 self.dfas = collections.OrderedDict() 50 self.labels = [(0, "EMPTY")] 51 self.keywords = collections.OrderedDict() 52 self.tokens = collections.OrderedDict() 53 self.symbol2label = collections.OrderedDict() 54 self.start = 256 55 56 def produce_graminit_h(self, writer): 57 writer("/* Generated by Parser/pgen */\n\n") 58 for number, symbol in self.number2symbol.items(): 59 writer("#define {} {}\n".format(symbol, number)) 60 61 def produce_graminit_c(self, writer): 62 writer("/* Generated by Parser/pgen */\n\n") 63 64 writer('#include "exports.h"\n') 65 writer('#include "grammar.h"\n') 66 writer("Py_EXPORTED_SYMBOL grammar _PyParser_Grammar;\n") 67 68 self.print_dfas(writer) 69 self.print_labels(writer) 70 71 writer("Py_EXPORTED_SYMBOL grammar _PyParser_Grammar = {\n") 72 writer(" {n_dfas},\n".format(n_dfas=len(self.dfas))) 73 writer(" dfas,\n") 74 writer(" {{{n_labels}, labels}},\n".format(n_labels=len(self.labels))) 75 writer(" {start_number}\n".format(start_number=self.start)) 76 writer("};\n") 77 78 def print_labels(self, writer): 79 writer( 80 "static const label labels[{n_labels}] = {{\n".format( 81 n_labels=len(self.labels) 82 ) 83 ) 84 for label, name in self.labels: 85 label_name = '"{}"'.format(name) if name is not None else 0 86 writer( 87 " {{{label}, {label_name}}},\n".format( 88 label=label, label_name=label_name 89 ) 90 ) 91 writer("};\n") 92 93 def print_dfas(self, writer): 94 self.print_states(writer) 95 writer("static const dfa dfas[{}] = {{\n".format(len(self.dfas))) 96 for dfaindex, dfa_elem in enumerate(self.dfas.items()): 97 symbol, (dfa, first_sets) = dfa_elem 98 writer( 99 ' {{{dfa_symbol}, "{symbol_name}", '.format( 100 dfa_symbol=symbol, symbol_name=self.number2symbol[symbol] 101 ) 102 + "{n_states}, states_{dfa_index},\n".format( 103 n_states=len(dfa), dfa_index=dfaindex 104 ) 105 + ' "' 106 ) 107 108 bitset = bytearray((len(self.labels) >> 3) + 1) 109 for token in first_sets: 110 bitset[token >> 3] |= 1 << (token & 7) 111 for byte in bitset: 112 writer("\\%03o" % (byte & 0xFF)) 113 writer('"},\n') 114 writer("};\n") 115 116 def print_states(self, write): 117 for dfaindex, dfa in enumerate(self.states): 118 self.print_arcs(write, dfaindex, dfa) 119 write( 120 "static state states_{dfa_index}[{n_states}] = {{\n".format( 121 dfa_index=dfaindex, n_states=len(dfa) 122 ) 123 ) 124 for stateindex, state in enumerate(dfa): 125 narcs = len(state) 126 write( 127 " {{{n_arcs}, arcs_{dfa_index}_{state_index}}},\n".format( 128 n_arcs=narcs, dfa_index=dfaindex, state_index=stateindex 129 ) 130 ) 131 write("};\n") 132 133 def print_arcs(self, write, dfaindex, states): 134 for stateindex, state in enumerate(states): 135 narcs = len(state) 136 write( 137 "static const arc arcs_{dfa_index}_{state_index}[{n_arcs}] = {{\n".format( 138 dfa_index=dfaindex, state_index=stateindex, n_arcs=narcs 139 ) 140 ) 141 for a, b in state: 142 write( 143 " {{{from_label}, {to_state}}},\n".format( 144 from_label=a, to_state=b 145 ) 146 ) 147 write("};\n") 148