1# Copyright 2015 Google Inc. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""pytree-related utilities. 15 16This module collects various utilities related to the parse trees produced by 17the lib2to3 library. 18 19 NodeName(): produces a string name for pytree nodes. 20 ParseCodeToTree(): convenience wrapper around lib2to3 interfaces to parse 21 a given string with code to a pytree. 22 InsertNodeBefore(): insert a node before another in a pytree. 23 InsertNodeAfter(): insert a node after another in a pytree. 24 {Get,Set}NodeAnnotation(): manage custom annotations on pytree nodes. 25""" 26 27import ast 28import os 29 30from lib2to3 import pygram 31from lib2to3 import pytree 32from lib2to3.pgen2 import driver 33from lib2to3.pgen2 import parse 34from lib2to3.pgen2 import token 35 36# TODO(eliben): We may want to get rid of this filtering at some point once we 37# have a better understanding of what information we need from the tree. Then, 38# these tokens may be filtered out from the tree before the tree gets to the 39# unwrapper. 40NONSEMANTIC_TOKENS = frozenset(['DEDENT', 'INDENT', 'NEWLINE', 'ENDMARKER']) 41 42OPENING_BRACKETS = frozenset({'(', '[', '{'}) 43CLOSING_BRACKETS = frozenset({')', ']', '}'}) 44 45 46class Annotation(object): 47 """Annotation names associated with pytrees.""" 48 CHILD_INDENT = 'child_indent' 49 NEWLINES = 'newlines' 50 MUST_SPLIT = 'must_split' 51 SPLIT_PENALTY = 'split_penalty' 52 SUBTYPE = 'subtype' 53 54 55def NodeName(node): 56 """Produce a string name for a given node. 57 58 For a Leaf this is the token name, and for a Node this is the type. 59 60 Arguments: 61 node: a tree node 62 63 Returns: 64 Name as a string. 65 """ 66 # Nodes with values < 256 are tokens. Values >= 256 are grammar symbols. 67 if node.type < 256: 68 return token.tok_name[node.type] 69 else: 70 return pygram.python_grammar.number2symbol[node.type] 71 72 73def FirstLeafNode(node): 74 if isinstance(node, pytree.Leaf): 75 return node 76 return FirstLeafNode(node.children[0]) 77 78 79def LastLeafNode(node): 80 if isinstance(node, pytree.Leaf): 81 return node 82 return LastLeafNode(node.children[-1]) 83 84 85# lib2to3 thoughtfully provides pygram.python_grammar_no_print_statement for 86# parsing Python 3 code that wouldn't parse otherwise (when 'print' is used in a 87# context where a keyword is disallowed). 88# It forgets to do the same for 'exec' though. Luckily, Python is amenable to 89# monkey-patching. 90_GRAMMAR_FOR_PY3 = pygram.python_grammar_no_print_statement.copy() 91del _GRAMMAR_FOR_PY3.keywords['exec'] 92 93_GRAMMAR_FOR_PY2 = pygram.python_grammar.copy() 94del _GRAMMAR_FOR_PY2.keywords['nonlocal'] 95 96 97def ParseCodeToTree(code): 98 """Parse the given code to a lib2to3 pytree. 99 100 Arguments: 101 code: a string with the code to parse. 102 103 Raises: 104 SyntaxError if the code is invalid syntax. 105 parse.ParseError if some other parsing failure. 106 107 Returns: 108 The root node of the parsed tree. 109 """ 110 # This function is tiny, but the incantation for invoking the parser correctly 111 # is sufficiently magical to be worth abstracting away. 112 if not code.endswith(os.linesep): 113 code += os.linesep 114 115 try: 116 # Try to parse using a Python 3 grammar, which is more permissive (print and 117 # exec are not keywords). 118 parser_driver = driver.Driver(_GRAMMAR_FOR_PY3, convert=pytree.convert) 119 tree = parser_driver.parse_string(code, debug=False) 120 except parse.ParseError: 121 # Now try to parse using a Python 2 grammar; If this fails, then 122 # there's something else wrong with the code. 123 try: 124 parser_driver = driver.Driver(_GRAMMAR_FOR_PY2, convert=pytree.convert) 125 tree = parser_driver.parse_string(code, debug=False) 126 except parse.ParseError: 127 # Raise a syntax error if the code is invalid python syntax. 128 try: 129 ast.parse(code) 130 except SyntaxError as e: 131 raise e 132 else: 133 raise 134 return _WrapEndMarker(tree) 135 136 137def _WrapEndMarker(tree): 138 """Wrap a single ENDMARKER token in a "file_input" node. 139 140 Arguments: 141 tree: (pytree.Node) The root node of the parsed tree. 142 143 Returns: 144 The root node of the parsed tree. If the tree is a single ENDMARKER node, 145 then that node is wrapped in a "file_input" node. That will ensure we don't 146 skip comments attached to that node. 147 """ 148 if isinstance(tree, pytree.Leaf) and tree.type == token.ENDMARKER: 149 return pytree.Node(pygram.python_symbols.file_input, [tree]) 150 return tree 151 152 153def InsertNodesBefore(new_nodes, target): 154 """Insert new_nodes before the given target location in the tree. 155 156 Arguments: 157 new_nodes: a sequence of new nodes to insert (the nodes should not be in the 158 tree). 159 target: the target node before which the new node node will be inserted. 160 161 Raises: 162 RuntimeError: if the tree is corrupted, or the insertion would corrupt it. 163 """ 164 for node in new_nodes: 165 _InsertNodeAt(node, target, after=False) 166 167 168def InsertNodesAfter(new_nodes, target): 169 """Insert new_nodes after the given target location in the tree. 170 171 Arguments: 172 new_nodes: a sequence of new nodes to insert (the nodes should not be in the 173 tree). 174 target: the target node after which the new node node will be inserted. 175 176 Raises: 177 RuntimeError: if the tree is corrupted, or the insertion would corrupt it. 178 """ 179 for node in reversed(new_nodes): 180 _InsertNodeAt(node, target, after=True) 181 182 183def _InsertNodeAt(new_node, target, after=False): 184 """Underlying implementation for node insertion. 185 186 Arguments: 187 new_node: a new node to insert (this node should not be in the tree). 188 target: the target node. 189 after: if True, new_node is inserted after target. Otherwise, it's inserted 190 before target. 191 192 Returns: 193 nothing 194 195 Raises: 196 RuntimeError: if the tree is corrupted, or the insertion would corrupt it. 197 """ 198 199 # Protect against attempts to insert nodes which already belong to some tree. 200 if new_node.parent is not None: 201 raise RuntimeError('inserting node which already has a parent', 202 (new_node, new_node.parent)) 203 204 # The code here is based on pytree.Base.next_sibling 205 parent_of_target = target.parent 206 if parent_of_target is None: 207 raise RuntimeError('expected target node to have a parent', (target,)) 208 209 for i, child in enumerate(parent_of_target.children): 210 if child is target: 211 insertion_index = i + 1 if after else i 212 parent_of_target.insert_child(insertion_index, new_node) 213 return 214 215 raise RuntimeError('unable to find insertion point for target node', 216 (target,)) 217 218 219# The following constant and functions implement a simple custom annotation 220# mechanism for pytree nodes. We attach new attributes to nodes. Each attribute 221# is prefixed with _NODE_ANNOTATION_PREFIX. These annotations should only be 222# managed through GetNodeAnnotation and SetNodeAnnotation. 223_NODE_ANNOTATION_PREFIX = '_yapf_annotation_' 224 225 226def CopyYapfAnnotations(src, dst): 227 """Copy all YAPF annotations from the source node to the destination node. 228 229 Arguments: 230 src: the source node. 231 dst: the destination node. 232 """ 233 for annotation in dir(src): 234 if annotation.startswith(_NODE_ANNOTATION_PREFIX): 235 setattr(dst, annotation, getattr(src, annotation, None)) 236 237 238def GetNodeAnnotation(node, annotation, default=None): 239 """Get annotation value from a node. 240 241 Arguments: 242 node: the node. 243 annotation: annotation name - a string. 244 default: the default value to return if there's no annotation. 245 246 Returns: 247 Value of the annotation in the given node. If the node doesn't have this 248 particular annotation name yet, returns default. 249 """ 250 return getattr(node, _NODE_ANNOTATION_PREFIX + annotation, default) 251 252 253def SetNodeAnnotation(node, annotation, value): 254 """Set annotation value on a node. 255 256 Arguments: 257 node: the node. 258 annotation: annotation name - a string. 259 value: annotation value to set. 260 """ 261 setattr(node, _NODE_ANNOTATION_PREFIX + annotation, value) 262 263 264def AppendNodeAnnotation(node, annotation, value): 265 """Appends an annotation value to a list of annotations on the node. 266 267 Arguments: 268 node: the node. 269 annotation: annotation name - a string. 270 value: annotation value to set. 271 """ 272 attr = GetNodeAnnotation(node, annotation, set()) 273 attr.add(value) 274 SetNodeAnnotation(node, annotation, attr) 275 276 277def RemoveSubtypeAnnotation(node, value): 278 """Removes an annotation value from the subtype annotations on the node. 279 280 Arguments: 281 node: the node. 282 value: annotation value to remove. 283 """ 284 attr = GetNodeAnnotation(node, Annotation.SUBTYPE) 285 if attr and value in attr: 286 attr.remove(value) 287 SetNodeAnnotation(node, Annotation.SUBTYPE, attr) 288 289 290def GetOpeningBracket(node): 291 """Get opening bracket value from a node. 292 293 Arguments: 294 node: the node. 295 296 Returns: 297 The opening bracket node or None if it couldn't find one. 298 """ 299 return getattr(node, _NODE_ANNOTATION_PREFIX + 'container_bracket', None) 300 301 302def SetOpeningBracket(node, bracket): 303 """Set opening bracket value for a node. 304 305 Arguments: 306 node: the node. 307 bracket: opening bracket to set. 308 """ 309 setattr(node, _NODE_ANNOTATION_PREFIX + 'container_bracket', bracket) 310 311 312def DumpNodeToString(node): 313 """Dump a string representation of the given node. For debugging. 314 315 Arguments: 316 node: the node. 317 318 Returns: 319 The string representation. 320 """ 321 if isinstance(node, pytree.Leaf): 322 fmt = ('{name}({value}) [lineno={lineno}, column={column}, ' 323 'prefix={prefix}, penalty={penalty}]') 324 return fmt.format( 325 name=NodeName(node), 326 value=_PytreeNodeRepr(node), 327 lineno=node.lineno, 328 column=node.column, 329 prefix=repr(node.prefix), 330 penalty=GetNodeAnnotation(node, Annotation.SPLIT_PENALTY, None)) 331 else: 332 fmt = '{node} [{len} children] [child_indent="{indent}"]' 333 return fmt.format( 334 node=NodeName(node), 335 len=len(node.children), 336 indent=GetNodeAnnotation(node, Annotation.CHILD_INDENT)) 337 338 339def _PytreeNodeRepr(node): 340 """Like pytree.Node.__repr__, but names instead of numbers for tokens.""" 341 if isinstance(node, pytree.Node): 342 return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), 343 [_PytreeNodeRepr(c) for c in node.children]) 344 if isinstance(node, pytree.Leaf): 345 return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), node.value) 346 347 348def IsCommentStatement(node): 349 return (NodeName(node) == 'simple_stmt' and 350 node.children[0].type == token.COMMENT) 351