1import collections 2 3 4class Grammar: 5 """Pgen parsing tables class. 6 7 The instance variables are as follows: 8 9 symbol2number -- a dict mapping symbol names to numbers. Symbol 10 numbers are always 256 or higher, to distinguish 11 them from token numbers, which are between 0 and 12 255 (inclusive). 13 14 number2symbol -- a dict mapping numbers to symbol names; 15 these two are each other's inverse. 16 17 states -- a list of DFAs, where each DFA is a list of 18 states, each state is a list of arcs, and each 19 arc is a (i, j) pair where i is a label and j is 20 a state number. The DFA number is the index into 21 this list. (This name is slightly confusing.) 22 Final states are represented by a special arc of 23 the form (0, j) where j is its own state number. 24 25 dfas -- a dict mapping symbol numbers to (DFA, first) 26 pairs, where DFA is an item from the states list 27 above, and first is a set of tokens that can 28 begin this grammar rule. 29 30 labels -- a list of (x, y) pairs where x is either a token 31 number or a symbol number, and y is either None 32 or a string; the strings are keywords. The label 33 number is the index in this list; label numbers 34 are used to mark state transitions (arcs) in the 35 DFAs. 36 37 start -- the number of the grammar's start symbol. 38 39 keywords -- a dict mapping keyword strings to arc labels. 40 41 tokens -- a dict mapping token numbers to arc labels. 42 43 """ 44 45 def __init__(self): 46 self.symbol2number = collections.OrderedDict() 47 self.number2symbol = collections.OrderedDict() 48 self.states = [] 49 self.dfas = collections.OrderedDict() 50 self.labels = [(0, "EMPTY")] 51 self.keywords = collections.OrderedDict() 52 self.tokens = collections.OrderedDict() 53 self.symbol2label = collections.OrderedDict() 54 self.start = 256 55 56 def produce_graminit_h(self, writer): 57 writer("/* Generated by Parser/pgen */\n\n") 58 for number, symbol in self.number2symbol.items(): 59 writer("#define {} {}\n".format(symbol, number)) 60 61 def produce_graminit_c(self, writer): 62 writer("/* Generated by Parser/pgen */\n\n") 63 64 writer('#include "grammar.h"\n') 65 writer("grammar _PyParser_Grammar;\n") 66 67 self.print_dfas(writer) 68 self.print_labels(writer) 69 70 writer("grammar _PyParser_Grammar = {\n") 71 writer(" {n_dfas},\n".format(n_dfas=len(self.dfas))) 72 writer(" dfas,\n") 73 writer(" {{{n_labels}, labels}},\n".format(n_labels=len(self.labels))) 74 writer(" {start_number}\n".format(start_number=self.start)) 75 writer("};\n") 76 77 def print_labels(self, writer): 78 writer( 79 "static const label labels[{n_labels}] = {{\n".format(n_labels=len(self.labels)) 80 ) 81 for label, name in self.labels: 82 label_name = '"{}"'.format(name) if name is not None else 0 83 writer( 84 ' {{{label}, {label_name}}},\n'.format( 85 label=label, label_name=label_name 86 ) 87 ) 88 writer("};\n") 89 90 def print_dfas(self, writer): 91 self.print_states(writer) 92 writer("static const dfa dfas[{}] = {{\n".format(len(self.dfas))) 93 for dfaindex, dfa_elem in enumerate(self.dfas.items()): 94 symbol, (dfa, first_sets) = dfa_elem 95 writer( 96 ' {{{dfa_symbol}, "{symbol_name}", '.format( 97 dfa_symbol=symbol, symbol_name=self.number2symbol[symbol] 98 ) 99 + "{n_states}, states_{dfa_index},\n".format( 100 n_states=len(dfa), dfa_index=dfaindex 101 ) 102 + ' "' 103 ) 104 105 bitset = bytearray((len(self.labels) >> 3) + 1) 106 for token in first_sets: 107 bitset[token >> 3] |= 1 << (token & 7) 108 for byte in bitset: 109 writer("\\%03o" % (byte & 0xFF)) 110 writer('"},\n') 111 writer("};\n") 112 113 def print_states(self, write): 114 for dfaindex, dfa in enumerate(self.states): 115 self.print_arcs(write, dfaindex, dfa) 116 write( 117 "static state states_{dfa_index}[{n_states}] = {{\n".format( 118 dfa_index=dfaindex, n_states=len(dfa) 119 ) 120 ) 121 for stateindex, state in enumerate(dfa): 122 narcs = len(state) 123 write( 124 " {{{n_arcs}, arcs_{dfa_index}_{state_index}}},\n".format( 125 n_arcs=narcs, dfa_index=dfaindex, state_index=stateindex 126 ) 127 ) 128 write("};\n") 129 130 def print_arcs(self, write, dfaindex, states): 131 for stateindex, state in enumerate(states): 132 narcs = len(state) 133 write( 134 "static const arc arcs_{dfa_index}_{state_index}[{n_arcs}] = {{\n".format( 135 dfa_index=dfaindex, state_index=stateindex, n_arcs=narcs 136 ) 137 ) 138 for a, b in state: 139 write( 140 " {{{from_label}, {to_state}}},\n".format( 141 from_label=a, to_state=b 142 ) 143 ) 144 write("};\n") 145