1# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. 2# Licensed to PSF under a Contributor Agreement. 3 4"""Parser engine for the grammar tables generated by pgen. 5 6The grammar table must be loaded first. 7 8See Parser/parser.c in the Python distribution for additional info on 9how this parsing engine works. 10 11""" 12 13# Local imports 14from . import token 15 16class ParseError(Exception): 17 """Exception to signal the parser is stuck.""" 18 19 def __init__(self, msg, type, value, context): 20 Exception.__init__(self, "%s: type=%r, value=%r, context=%r" % 21 (msg, type, value, context)) 22 self.msg = msg 23 self.type = type 24 self.value = value 25 self.context = context 26 27 def __reduce__(self): 28 return type(self), (self.msg, self.type, self.value, self.context) 29 30class Parser(object): 31 """Parser engine. 32 33 The proper usage sequence is: 34 35 p = Parser(grammar, [converter]) # create instance 36 p.setup([start]) # prepare for parsing 37 <for each input token>: 38 if p.addtoken(...): # parse a token; may raise ParseError 39 break 40 root = p.rootnode # root of abstract syntax tree 41 42 A Parser instance may be reused by calling setup() repeatedly. 43 44 A Parser instance contains state pertaining to the current token 45 sequence, and should not be used concurrently by different threads 46 to parse separate token sequences. 47 48 See driver.py for how to get input tokens by tokenizing a file or 49 string. 50 51 Parsing is complete when addtoken() returns True; the root of the 52 abstract syntax tree can then be retrieved from the rootnode 53 instance variable. When a syntax error occurs, addtoken() raises 54 the ParseError exception. There is no error recovery; the parser 55 cannot be used after a syntax error was reported (but it can be 56 reinitialized by calling setup()). 57 58 """ 59 60 def __init__(self, grammar, convert=None): 61 """Constructor. 62 63 The grammar argument is a grammar.Grammar instance; see the 64 grammar module for more information. 65 66 The parser is not ready yet for parsing; you must call the 67 setup() method to get it started. 68 69 The optional convert argument is a function mapping concrete 70 syntax tree nodes to abstract syntax tree nodes. If not 71 given, no conversion is done and the syntax tree produced is 72 the concrete syntax tree. If given, it must be a function of 73 two arguments, the first being the grammar (a grammar.Grammar 74 instance), and the second being the concrete syntax tree node 75 to be converted. The syntax tree is converted from the bottom 76 up. 77 78 A concrete syntax tree node is a (type, value, context, nodes) 79 tuple, where type is the node type (a token or symbol number), 80 value is None for symbols and a string for tokens, context is 81 None or an opaque value used for error reporting (typically a 82 (lineno, offset) pair), and nodes is a list of children for 83 symbols, and None for tokens. 84 85 An abstract syntax tree node may be anything; this is entirely 86 up to the converter function. 87 88 """ 89 self.grammar = grammar 90 self.convert = convert or (lambda grammar, node: node) 91 92 def setup(self, start=None): 93 """Prepare for parsing. 94 95 This *must* be called before starting to parse. 96 97 The optional argument is an alternative start symbol; it 98 defaults to the grammar's start symbol. 99 100 You can use a Parser instance to parse any number of programs; 101 each time you call setup() the parser is reset to an initial 102 state determined by the (implicit or explicit) start symbol. 103 104 """ 105 if start is None: 106 start = self.grammar.start 107 # Each stack entry is a tuple: (dfa, state, node). 108 # A node is a tuple: (type, value, context, children), 109 # where children is a list of nodes or None, and context may be None. 110 newnode = (start, None, None, []) 111 stackentry = (self.grammar.dfas[start], 0, newnode) 112 self.stack = [stackentry] 113 self.rootnode = None 114 self.used_names = set() # Aliased to self.rootnode.used_names in pop() 115 116 def addtoken(self, type, value, context): 117 """Add a token; return True iff this is the end of the program.""" 118 # Map from token to label 119 ilabel = self.classify(type, value, context) 120 # Loop until the token is shifted; may raise exceptions 121 while True: 122 dfa, state, node = self.stack[-1] 123 states, first = dfa 124 arcs = states[state] 125 # Look for a state with this label 126 for i, newstate in arcs: 127 t, v = self.grammar.labels[i] 128 if ilabel == i: 129 # Look it up in the list of labels 130 assert t < 256 131 # Shift a token; we're done with it 132 self.shift(type, value, newstate, context) 133 # Pop while we are in an accept-only state 134 state = newstate 135 while states[state] == [(0, state)]: 136 self.pop() 137 if not self.stack: 138 # Done parsing! 139 return True 140 dfa, state, node = self.stack[-1] 141 states, first = dfa 142 # Done with this token 143 return False 144 elif t >= 256: 145 # See if it's a symbol and if we're in its first set 146 itsdfa = self.grammar.dfas[t] 147 itsstates, itsfirst = itsdfa 148 if ilabel in itsfirst: 149 # Push a symbol 150 self.push(t, self.grammar.dfas[t], newstate, context) 151 break # To continue the outer while loop 152 else: 153 if (0, state) in arcs: 154 # An accepting state, pop it and try something else 155 self.pop() 156 if not self.stack: 157 # Done parsing, but another token is input 158 raise ParseError("too much input", 159 type, value, context) 160 else: 161 # No success finding a transition 162 raise ParseError("bad input", type, value, context) 163 164 def classify(self, type, value, context): 165 """Turn a token into a label. (Internal)""" 166 if type == token.NAME: 167 # Keep a listing of all used names 168 self.used_names.add(value) 169 # Check for reserved words 170 ilabel = self.grammar.keywords.get(value) 171 if ilabel is not None: 172 return ilabel 173 ilabel = self.grammar.tokens.get(type) 174 if ilabel is None: 175 raise ParseError("bad token", type, value, context) 176 return ilabel 177 178 def shift(self, type, value, newstate, context): 179 """Shift a token. (Internal)""" 180 dfa, state, node = self.stack[-1] 181 newnode = (type, value, context, None) 182 newnode = self.convert(self.grammar, newnode) 183 if newnode is not None: 184 node[-1].append(newnode) 185 self.stack[-1] = (dfa, newstate, node) 186 187 def push(self, type, newdfa, newstate, context): 188 """Push a nonterminal. (Internal)""" 189 dfa, state, node = self.stack[-1] 190 newnode = (type, None, context, []) 191 self.stack[-1] = (dfa, newstate, node) 192 self.stack.append((newdfa, 0, newnode)) 193 194 def pop(self): 195 """Pop a nonterminal. (Internal)""" 196 popdfa, popstate, popnode = self.stack.pop() 197 newnode = self.convert(self.grammar, popnode) 198 if newnode is not None: 199 if self.stack: 200 dfa, state, node = self.stack[-1] 201 node[-1].append(newnode) 202 else: 203 self.rootnode = newnode 204 self.rootnode.used_names = self.used_names 205