1# Copyright 2015 Google Inc. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""pytree-related utilities. 15 16This module collects various utilities related to the parse trees produced by 17the lib2to3 library. 18 19 NodeName(): produces a string name for pytree nodes. 20 ParseCodeToTree(): convenience wrapper around lib2to3 interfaces to parse 21 a given string with code to a pytree. 22 InsertNodeBefore(): insert a node before another in a pytree. 23 InsertNodeAfter(): insert a node after another in a pytree. 24 {Get,Set}NodeAnnotation(): manage custom annotations on pytree nodes. 25""" 26 27import ast 28import os 29 30from yapf_third_party._ylib2to3 import pygram 31from yapf_third_party._ylib2to3 import pytree 32from yapf_third_party._ylib2to3.pgen2 import driver 33from yapf_third_party._ylib2to3.pgen2 import parse 34from yapf_third_party._ylib2to3.pgen2 import token 35 36# TODO(eliben): We may want to get rid of this filtering at some point once we 37# have a better understanding of what information we need from the tree. Then, 38# these tokens may be filtered out from the tree before the tree gets to the 39# unwrapper. 40NONSEMANTIC_TOKENS = frozenset(['DEDENT', 'INDENT', 'NEWLINE', 'ENDMARKER']) 41 42 43class Annotation(object): 44 """Annotation names associated with pytrees.""" 45 CHILD_INDENT = 'child_indent' 46 NEWLINES = 'newlines' 47 MUST_SPLIT = 'must_split' 48 SPLIT_PENALTY = 'split_penalty' 49 SUBTYPE = 'subtype' 50 51 52def NodeName(node): 53 """Produce a string name for a given node. 54 55 For a Leaf this is the token name, and for a Node this is the type. 56 57 Arguments: 58 node: a tree node 59 60 Returns: 61 Name as a string. 62 """ 63 # Nodes with values < 256 are tokens. Values >= 256 are grammar symbols. 64 if node.type < 256: 65 return token.tok_name[node.type] 66 else: 67 return pygram.python_grammar.number2symbol[node.type] 68 69 70def FirstLeafNode(node): 71 if isinstance(node, pytree.Leaf): 72 return node 73 return FirstLeafNode(node.children[0]) 74 75 76def LastLeafNode(node): 77 if isinstance(node, pytree.Leaf): 78 return node 79 return LastLeafNode(node.children[-1]) 80 81 82# lib2to3 thoughtfully provides pygram.python_grammar_no_print_statement for 83# parsing Python 3 code that wouldn't parse otherwise (when 'print' is used in a 84# context where a keyword is disallowed). 85# It forgets to do the same for 'exec' though. Luckily, Python is amenable to 86# monkey-patching. 87# Note that pygram.python_grammar_no_print_and_exec_statement with "_and_exec" 88# will require Python >=3.8. 89_PYTHON_GRAMMAR = pygram.python_grammar_no_print_statement.copy() 90del _PYTHON_GRAMMAR.keywords['exec'] 91 92 93def ParseCodeToTree(code): 94 """Parse the given code to a lib2to3 pytree. 95 96 Arguments: 97 code: a string with the code to parse. 98 99 Raises: 100 SyntaxError if the code is invalid syntax. 101 parse.ParseError if some other parsing failure. 102 103 Returns: 104 The root node of the parsed tree. 105 """ 106 # This function is tiny, but the incantation for invoking the parser correctly 107 # is sufficiently magical to be worth abstracting away. 108 if not code.endswith(os.linesep): 109 code += os.linesep 110 111 try: 112 parser_driver = driver.Driver(_PYTHON_GRAMMAR, convert=pytree.convert) 113 tree = parser_driver.parse_string(code, debug=False) 114 except parse.ParseError: 115 # Raise a syntax error if the code is invalid python syntax. 116 ast.parse(code) 117 raise 118 return _WrapEndMarker(tree) 119 120 121def _WrapEndMarker(tree): 122 """Wrap a single ENDMARKER token in a "file_input" node. 123 124 Arguments: 125 tree: (pytree.Node) The root node of the parsed tree. 126 127 Returns: 128 The root node of the parsed tree. If the tree is a single ENDMARKER node, 129 then that node is wrapped in a "file_input" node. That will ensure we don't 130 skip comments attached to that node. 131 """ 132 if isinstance(tree, pytree.Leaf) and tree.type == token.ENDMARKER: 133 return pytree.Node(pygram.python_symbols.file_input, [tree]) 134 return tree 135 136 137def InsertNodesBefore(new_nodes, target): 138 """Insert new_nodes before the given target location in the tree. 139 140 Arguments: 141 new_nodes: a sequence of new nodes to insert (the nodes should not be in the 142 tree). 143 target: the target node before which the new node node will be inserted. 144 145 Raises: 146 RuntimeError: if the tree is corrupted, or the insertion would corrupt it. 147 """ 148 for node in new_nodes: 149 _InsertNodeAt(node, target, after=False) 150 151 152def InsertNodesAfter(new_nodes, target): 153 """Insert new_nodes after the given target location in the tree. 154 155 Arguments: 156 new_nodes: a sequence of new nodes to insert (the nodes should not be in the 157 tree). 158 target: the target node after which the new node node will be inserted. 159 160 Raises: 161 RuntimeError: if the tree is corrupted, or the insertion would corrupt it. 162 """ 163 for node in reversed(new_nodes): 164 _InsertNodeAt(node, target, after=True) 165 166 167def _InsertNodeAt(new_node, target, after=False): 168 """Underlying implementation for node insertion. 169 170 Arguments: 171 new_node: a new node to insert (this node should not be in the tree). 172 target: the target node. 173 after: if True, new_node is inserted after target. Otherwise, it's inserted 174 before target. 175 176 Returns: 177 nothing 178 179 Raises: 180 RuntimeError: if the tree is corrupted, or the insertion would corrupt it. 181 """ 182 183 # Protect against attempts to insert nodes which already belong to some tree. 184 if new_node.parent is not None: 185 raise RuntimeError('inserting node which already has a parent', 186 (new_node, new_node.parent)) 187 188 # The code here is based on pytree.Base.next_sibling 189 parent_of_target = target.parent 190 if parent_of_target is None: 191 raise RuntimeError('expected target node to have a parent', (target,)) 192 193 for i, child in enumerate(parent_of_target.children): 194 if child is target: 195 insertion_index = i + 1 if after else i 196 parent_of_target.insert_child(insertion_index, new_node) 197 return 198 199 raise RuntimeError('unable to find insertion point for target node', 200 (target,)) 201 202 203# The following constant and functions implement a simple custom annotation 204# mechanism for pytree nodes. We attach new attributes to nodes. Each attribute 205# is prefixed with _NODE_ANNOTATION_PREFIX. These annotations should only be 206# managed through GetNodeAnnotation and SetNodeAnnotation. 207_NODE_ANNOTATION_PREFIX = '_yapf_annotation_' 208 209 210def CopyYapfAnnotations(src, dst): 211 """Copy all YAPF annotations from the source node to the destination node. 212 213 Arguments: 214 src: the source node. 215 dst: the destination node. 216 """ 217 for annotation in dir(src): 218 if annotation.startswith(_NODE_ANNOTATION_PREFIX): 219 setattr(dst, annotation, getattr(src, annotation, None)) 220 221 222def GetNodeAnnotation(node, annotation, default=None): 223 """Get annotation value from a node. 224 225 Arguments: 226 node: the node. 227 annotation: annotation name - a string. 228 default: the default value to return if there's no annotation. 229 230 Returns: 231 Value of the annotation in the given node. If the node doesn't have this 232 particular annotation name yet, returns default. 233 """ 234 return getattr(node, _NODE_ANNOTATION_PREFIX + annotation, default) 235 236 237def SetNodeAnnotation(node, annotation, value): 238 """Set annotation value on a node. 239 240 Arguments: 241 node: the node. 242 annotation: annotation name - a string. 243 value: annotation value to set. 244 """ 245 setattr(node, _NODE_ANNOTATION_PREFIX + annotation, value) 246 247 248def AppendNodeAnnotation(node, annotation, value): 249 """Appends an annotation value to a list of annotations on the node. 250 251 Arguments: 252 node: the node. 253 annotation: annotation name - a string. 254 value: annotation value to set. 255 """ 256 attr = GetNodeAnnotation(node, annotation, set()) 257 attr.add(value) 258 SetNodeAnnotation(node, annotation, attr) 259 260 261def RemoveSubtypeAnnotation(node, value): 262 """Removes an annotation value from the subtype annotations on the node. 263 264 Arguments: 265 node: the node. 266 value: annotation value to remove. 267 """ 268 attr = GetNodeAnnotation(node, Annotation.SUBTYPE) 269 if attr and value in attr: 270 attr.remove(value) 271 SetNodeAnnotation(node, Annotation.SUBTYPE, attr) 272 273 274def GetOpeningBracket(node): 275 """Get opening bracket value from a node. 276 277 Arguments: 278 node: the node. 279 280 Returns: 281 The opening bracket node or None if it couldn't find one. 282 """ 283 return getattr(node, _NODE_ANNOTATION_PREFIX + 'container_bracket', None) 284 285 286def SetOpeningBracket(node, bracket): 287 """Set opening bracket value for a node. 288 289 Arguments: 290 node: the node. 291 bracket: opening bracket to set. 292 """ 293 setattr(node, _NODE_ANNOTATION_PREFIX + 'container_bracket', bracket) 294 295 296def DumpNodeToString(node): 297 """Dump a string representation of the given node. For debugging. 298 299 Arguments: 300 node: the node. 301 302 Returns: 303 The string representation. 304 """ 305 if isinstance(node, pytree.Leaf): 306 fmt = ('{name}({value}) [lineno={lineno}, column={column}, ' 307 'prefix={prefix}, penalty={penalty}]') 308 return fmt.format( 309 name=NodeName(node), 310 value=_PytreeNodeRepr(node), 311 lineno=node.lineno, 312 column=node.column, 313 prefix=repr(node.prefix), 314 penalty=GetNodeAnnotation(node, Annotation.SPLIT_PENALTY, None)) 315 else: 316 fmt = '{node} [{len} children] [child_indent="{indent}"]' 317 return fmt.format( 318 node=NodeName(node), 319 len=len(node.children), 320 indent=GetNodeAnnotation(node, Annotation.CHILD_INDENT)) 321 322 323def _PytreeNodeRepr(node): 324 """Like pytree.Node.__repr__, but names instead of numbers for tokens.""" 325 if isinstance(node, pytree.Node): 326 return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), 327 [_PytreeNodeRepr(c) for c in node.children]) 328 if isinstance(node, pytree.Leaf): 329 return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), node.value) 330 331 332def IsCommentStatement(node): 333 return (NodeName(node) == 'simple_stmt' and 334 node.children[0].type == token.COMMENT) 335