1# Copyright 2015 Google Inc. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""pytree-related utilities. 15 16This module collects various utilities related to the parse trees produced by 17the lib2to3 library. 18 19 NodeName(): produces a string name for pytree nodes. 20 ParseCodeToTree(): convenience wrapper around lib2to3 interfaces to parse 21 a given string with code to a pytree. 22 InsertNodeBefore(): insert a node before another in a pytree. 23 InsertNodeAfter(): insert a node after another in a pytree. 24 {Get,Set}NodeAnnotation(): manage custom annotations on pytree nodes. 25""" 26 27import ast 28 29from lib2to3 import pygram 30from lib2to3 import pytree 31from lib2to3.pgen2 import driver 32from lib2to3.pgen2 import parse 33from lib2to3.pgen2 import token 34 35# TODO(eliben): We may want to get rid of this filtering at some point once we 36# have a better understanding of what information we need from the tree. Then, 37# these tokens may be filtered out from the tree before the tree gets to the 38# unwrapper. 39NONSEMANTIC_TOKENS = frozenset(['DEDENT', 'INDENT', 'NEWLINE', 'ENDMARKER']) 40 41OPENING_BRACKETS = frozenset({'(', '[', '{'}) 42CLOSING_BRACKETS = frozenset({')', ']', '}'}) 43 44 45class Annotation(object): 46 """Annotation names associated with pytrees.""" 47 CHILD_INDENT = 'child_indent' 48 NEWLINES = 'newlines' 49 MUST_SPLIT = 'must_split' 50 SPLIT_PENALTY = 'split_penalty' 51 SUBTYPE = 'subtype' 52 53 54def NodeName(node): 55 """Produce a string name for a given node. 56 57 For a Leaf this is the token name, and for a Node this is the type. 58 59 Arguments: 60 node: a tree node 61 62 Returns: 63 Name as a string. 64 """ 65 # Nodes with values < 256 are tokens. Values >= 256 are grammar symbols. 66 if node.type < 256: 67 return token.tok_name[node.type] 68 else: 69 return pygram.python_grammar.number2symbol[node.type] 70 71 72def FirstLeafNode(node): 73 if isinstance(node, pytree.Leaf): 74 return node 75 return FirstLeafNode(node.children[0]) 76 77 78def LastLeafNode(node): 79 if isinstance(node, pytree.Leaf): 80 return node 81 return LastLeafNode(node.children[-1]) 82 83 84# lib2to3 thoughtfully provides pygram.python_grammar_no_print_statement for 85# parsing Python 3 code that wouldn't parse otherwise (when 'print' is used in a 86# context where a keyword is disallowed). 87# It forgets to do the same for 'exec' though. Luckily, Python is amenable to 88# monkey-patching. 89_GRAMMAR_FOR_PY3 = pygram.python_grammar_no_print_statement.copy() 90del _GRAMMAR_FOR_PY3.keywords['exec'] 91 92_GRAMMAR_FOR_PY2 = pygram.python_grammar.copy() 93del _GRAMMAR_FOR_PY2.keywords['nonlocal'] 94 95 96def ParseCodeToTree(code): 97 """Parse the given code to a lib2to3 pytree. 98 99 Arguments: 100 code: a string with the code to parse. 101 102 Raises: 103 SyntaxError if the code is invalid syntax. 104 parse.ParseError if some other parsing failure. 105 106 Returns: 107 The root node of the parsed tree. 108 """ 109 # This function is tiny, but the incantation for invoking the parser correctly 110 # is sufficiently magical to be worth abstracting away. 111 try: 112 # Try to parse using a Python 3 grammar, which is more permissive (print and 113 # exec are not keywords). 114 parser_driver = driver.Driver(_GRAMMAR_FOR_PY3, convert=pytree.convert) 115 tree = parser_driver.parse_string(code, debug=False) 116 except parse.ParseError: 117 # Now try to parse using a Python 2 grammar; If this fails, then 118 # there's something else wrong with the code. 119 try: 120 parser_driver = driver.Driver(_GRAMMAR_FOR_PY2, convert=pytree.convert) 121 tree = parser_driver.parse_string(code, debug=False) 122 except parse.ParseError: 123 # Raise a syntax error if the code is invalid python syntax. 124 try: 125 ast.parse(code) 126 except SyntaxError as e: 127 raise e 128 else: 129 raise 130 return _WrapEndMarker(tree) 131 132 133def _WrapEndMarker(tree): 134 """Wrap a single ENDMARKER token in a "file_input" node. 135 136 Arguments: 137 tree: (pytree.Node) The root node of the parsed tree. 138 139 Returns: 140 The root node of the parsed tree. If the tree is a single ENDMARKER node, 141 then that node is wrapped in a "file_input" node. That will ensure we don't 142 skip comments attached to that node. 143 """ 144 if isinstance(tree, pytree.Leaf) and tree.type == token.ENDMARKER: 145 return pytree.Node(pygram.python_symbols.file_input, [tree]) 146 return tree 147 148 149def InsertNodesBefore(new_nodes, target): 150 """Insert new_nodes before the given target location in the tree. 151 152 Arguments: 153 new_nodes: a sequence of new nodes to insert (the nodes should not be in the 154 tree). 155 target: the target node before which the new node node will be inserted. 156 157 Raises: 158 RuntimeError: if the tree is corrupted, or the insertion would corrupt it. 159 """ 160 for node in new_nodes: 161 _InsertNodeAt(node, target, after=False) 162 163 164def InsertNodesAfter(new_nodes, target): 165 """Insert new_nodes after the given target location in the tree. 166 167 Arguments: 168 new_nodes: a sequence of new nodes to insert (the nodes should not be in the 169 tree). 170 target: the target node after which the new node node will be inserted. 171 172 Raises: 173 RuntimeError: if the tree is corrupted, or the insertion would corrupt it. 174 """ 175 for node in reversed(new_nodes): 176 _InsertNodeAt(node, target, after=True) 177 178 179def _InsertNodeAt(new_node, target, after=False): 180 """Underlying implementation for node insertion. 181 182 Arguments: 183 new_node: a new node to insert (this node should not be in the tree). 184 target: the target node. 185 after: if True, new_node is inserted after target. Otherwise, it's inserted 186 before target. 187 188 Returns: 189 nothing 190 191 Raises: 192 RuntimeError: if the tree is corrupted, or the insertion would corrupt it. 193 """ 194 195 # Protect against attempts to insert nodes which already belong to some tree. 196 if new_node.parent is not None: 197 raise RuntimeError('inserting node which already has a parent', 198 (new_node, new_node.parent)) 199 200 # The code here is based on pytree.Base.next_sibling 201 parent_of_target = target.parent 202 if parent_of_target is None: 203 raise RuntimeError('expected target node to have a parent', (target,)) 204 205 for i, child in enumerate(parent_of_target.children): 206 if child is target: 207 insertion_index = i + 1 if after else i 208 parent_of_target.insert_child(insertion_index, new_node) 209 return 210 211 raise RuntimeError('unable to find insertion point for target node', 212 (target,)) 213 214 215# The following constant and functions implement a simple custom annotation 216# mechanism for pytree nodes. We attach new attributes to nodes. Each attribute 217# is prefixed with _NODE_ANNOTATION_PREFIX. These annotations should only be 218# managed through GetNodeAnnotation and SetNodeAnnotation. 219_NODE_ANNOTATION_PREFIX = '_yapf_annotation_' 220 221 222def GetNodeAnnotation(node, annotation, default=None): 223 """Get annotation value from a node. 224 225 Arguments: 226 node: the node. 227 annotation: annotation name - a string. 228 default: the default value to return if there's no annotation. 229 230 Returns: 231 Value of the annotation in the given node. If the node doesn't have this 232 particular annotation name yet, returns default. 233 """ 234 return getattr(node, _NODE_ANNOTATION_PREFIX + annotation, default) 235 236 237def SetNodeAnnotation(node, annotation, value): 238 """Set annotation value on a node. 239 240 Arguments: 241 node: the node. 242 annotation: annotation name - a string. 243 value: annotation value to set. 244 """ 245 setattr(node, _NODE_ANNOTATION_PREFIX + annotation, value) 246 247 248def AppendNodeAnnotation(node, annotation, value): 249 """Appends an annotation value to a list of annotations on the node. 250 251 Arguments: 252 node: the node. 253 annotation: annotation name - a string. 254 value: annotation value to set. 255 """ 256 attr = GetNodeAnnotation(node, annotation, set()) 257 attr.add(value) 258 SetNodeAnnotation(node, annotation, attr) 259 260 261def RemoveSubtypeAnnotation(node, value): 262 """Removes an annotation value from the subtype annotations on the node. 263 264 Arguments: 265 node: the node. 266 value: annotation value to remove. 267 """ 268 attr = GetNodeAnnotation(node, Annotation.SUBTYPE) 269 if attr and value in attr: 270 attr.remove(value) 271 SetNodeAnnotation(node, Annotation.SUBTYPE, attr) 272 273 274def GetOpeningBracket(node): 275 """Get opening bracket value from a node. 276 277 Arguments: 278 node: the node. 279 280 Returns: 281 The opening bracket node or None if it couldn't find one. 282 """ 283 return getattr(node, _NODE_ANNOTATION_PREFIX + 'container_bracket', None) 284 285 286def SetOpeningBracket(node, bracket): 287 """Set opening bracket value for a node. 288 289 Arguments: 290 node: the node. 291 bracket: opening bracket to set. 292 """ 293 setattr(node, _NODE_ANNOTATION_PREFIX + 'container_bracket', bracket) 294 295 296def DumpNodeToString(node): 297 """Dump a string representation of the given node. For debugging. 298 299 Arguments: 300 node: the node. 301 302 Returns: 303 The string representation. 304 """ 305 if isinstance(node, pytree.Leaf): 306 fmt = ('{name}({value}) [lineno={lineno}, column={column}, ' 307 'prefix={prefix}, penalty={penalty}]') 308 return fmt.format( 309 name=NodeName(node), 310 value=_PytreeNodeRepr(node), 311 lineno=node.lineno, 312 column=node.column, 313 prefix=repr(node.prefix), 314 penalty=GetNodeAnnotation(node, Annotation.SPLIT_PENALTY, None)) 315 else: 316 fmt = '{node} [{len} children] [child_indent="{indent}"]' 317 return fmt.format( 318 node=NodeName(node), 319 len=len(node.children), 320 indent=GetNodeAnnotation(node, Annotation.CHILD_INDENT)) 321 322 323def _PytreeNodeRepr(node): 324 """Like pytree.Node.__repr__, but names instead of numbers for tokens.""" 325 if isinstance(node, pytree.Node): 326 return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), 327 [_PytreeNodeRepr(c) for c in node.children]) 328 if isinstance(node, pytree.Leaf): 329 return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), node.value) 330 331 332def IsCommentStatement(node): 333 return (NodeName(node) == 'simple_stmt' and 334 node.children[0].type == token.COMMENT) 335