1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. 2# Licensed to PSF under a Contributor Agreement. 3 4"""Parser engine for the grammar tables generated by pgen. 5 6The grammar table must be loaded first. 7 8See Parser/parser.c in the Python distribution for additional info on 9how this parsing engine works. 10 11""" 12 13# Local imports 14from . import token 15 16class ParseError(Exception): 17 """Exception to signal the parser is stuck.""" 18 19 def __init__(self, msg, type, value, context): 20 Exception.__init__(self, "%s: type=%r, value=%r, context=%r" % 21 (msg, type, value, context)) 22 self.msg = msg 23 self.type = type 24 self.value = value 25 self.context = context 26 27class Parser(object): 28 """Parser engine. 29 30 The proper usage sequence is: 31 32 p = Parser(grammar, [converter]) # create instance 33 p.setup([start]) # prepare for parsing 34 <for each input token>: 35 if p.addtoken(...): # parse a token; may raise ParseError 36 break 37 root = p.rootnode # root of abstract syntax tree 38 39 A Parser instance may be reused by calling setup() repeatedly. 40 41 A Parser instance contains state pertaining to the current token 42 sequence, and should not be used concurrently by different threads 43 to parse separate token sequences. 44 45 See driver.py for how to get input tokens by tokenizing a file or 46 string. 47 48 Parsing is complete when addtoken() returns True; the root of the 49 abstract syntax tree can then be retrieved from the rootnode 50 instance variable. When a syntax error occurs, addtoken() raises 51 the ParseError exception. There is no error recovery; the parser 52 cannot be used after a syntax error was reported (but it can be 53 reinitialized by calling setup()). 54 55 """ 56 57 def __init__(self, grammar, convert=None): 58 """Constructor. 59 60 The grammar argument is a grammar.Grammar instance; see the 61 grammar module for more information. 62 63 The parser is not ready yet for parsing; you must call the 64 setup() method to get it started. 65 66 The optional convert argument is a function mapping concrete 67 syntax tree nodes to abstract syntax tree nodes. If not 68 given, no conversion is done and the syntax tree produced is 69 the concrete syntax tree. If given, it must be a function of 70 two arguments, the first being the grammar (a grammar.Grammar 71 instance), and the second being the concrete syntax tree node 72 to be converted. The syntax tree is converted from the bottom 73 up. 74 75 A concrete syntax tree node is a (type, value, context, nodes) 76 tuple, where type is the node type (a token or symbol number), 77 value is None for symbols and a string for tokens, context is 78 None or an opaque value used for error reporting (typically a 79 (lineno, offset) pair), and nodes is a list of children for 80 symbols, and None for tokens. 81 82 An abstract syntax tree node may be anything; this is entirely 83 up to the converter function. 84 85 """ 86 self.grammar = grammar 87 self.convert = convert or (lambda grammar, node: node) 88 89 def setup(self, start=None): 90 """Prepare for parsing. 91 92 This *must* be called before starting to parse. 93 94 The optional argument is an alternative start symbol; it 95 defaults to the grammar's start symbol. 96 97 You can use a Parser instance to parse any number of programs; 98 each time you call setup() the parser is reset to an initial 99 state determined by the (implicit or explicit) start symbol. 100 101 """ 102 if start is None: 103 start = self.grammar.start 104 # Each stack entry is a tuple: (dfa, state, node). 105 # A node is a tuple: (type, value, context, children), 106 # where children is a list of nodes or None, and context may be None. 107 newnode = (start, None, None, []) 108 stackentry = (self.grammar.dfas[start], 0, newnode) 109 self.stack = [stackentry] 110 self.rootnode = None 111 self.used_names = set() # Aliased to self.rootnode.used_names in pop() 112 113 def addtoken(self, type, value, context): 114 """Add a token; return True iff this is the end of the program.""" 115 # Map from token to label 116 ilabel = self.classify(type, value, context) 117 # Loop until the token is shifted; may raise exceptions 118 while True: 119 dfa, state, node = self.stack[-1] 120 states, first = dfa 121 arcs = states[state] 122 # Look for a state with this label 123 for i, newstate in arcs: 124 t, v = self.grammar.labels[i] 125 if ilabel == i: 126 # Look it up in the list of labels 127 assert t < 256 128 # Shift a token; we're done with it 129 self.shift(type, value, newstate, context) 130 # Pop while we are in an accept-only state 131 state = newstate 132 while states[state] == [(0, state)]: 133 self.pop() 134 if not self.stack: 135 # Done parsing! 136 return True 137 dfa, state, node = self.stack[-1] 138 states, first = dfa 139 # Done with this token 140 return False 141 elif t >= 256: 142 # See if it's a symbol and if we're in its first set 143 itsdfa = self.grammar.dfas[t] 144 itsstates, itsfirst = itsdfa 145 if ilabel in itsfirst: 146 # Push a symbol 147 self.push(t, self.grammar.dfas[t], newstate, context) 148 break # To continue the outer while loop 149 else: 150 if (0, state) in arcs: 151 # An accepting state, pop it and try something else 152 self.pop() 153 if not self.stack: 154 # Done parsing, but another token is input 155 raise ParseError("too much input", 156 type, value, context) 157 else: 158 # No success finding a transition 159 raise ParseError("bad input", type, value, context) 160 161 def classify(self, type, value, context): 162 """Turn a token into a label. (Internal)""" 163 if type == token.NAME: 164 # Keep a listing of all used names 165 self.used_names.add(value) 166 # Check for reserved words 167 ilabel = self.grammar.keywords.get(value) 168 if ilabel is not None: 169 return ilabel 170 ilabel = self.grammar.tokens.get(type) 171 if ilabel is None: 172 raise ParseError("bad token", type, value, context) 173 return ilabel 174 175 def shift(self, type, value, newstate, context): 176 """Shift a token. (Internal)""" 177 dfa, state, node = self.stack[-1] 178 newnode = (type, value, context, None) 179 newnode = self.convert(self.grammar, newnode) 180 if newnode is not None: 181 node[-1].append(newnode) 182 self.stack[-1] = (dfa, newstate, node) 183 184 def push(self, type, newdfa, newstate, context): 185 """Push a nonterminal. (Internal)""" 186 dfa, state, node = self.stack[-1] 187 newnode = (type, None, context, []) 188 self.stack[-1] = (dfa, newstate, node) 189 self.stack.append((newdfa, 0, newnode)) 190 191 def pop(self): 192 """Pop a nonterminal. (Internal)""" 193 popdfa, popstate, popnode = self.stack.pop() 194 newnode = self.convert(self.grammar, popnode) 195 if newnode is not None: 196 if self.stack: 197 dfa, state, node = self.stack[-1] 198 node[-1].append(newnode) 199 else: 200 self.rootnode = newnode 201 self.rootnode.used_names = self.used_names 202