1# Copyright 2015-2017 Google Inc. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""pytree-related utilities. 15 16This module collects various utilities related to the parse trees produced by 17the lib2to3 library. 18 19 NodeName(): produces a string name for pytree nodes. 20 ParseCodeToTree(): convenience wrapper around lib2to3 interfaces to parse 21 a given string with code to a pytree. 22 InsertNodeBefore(): insert a node before another in a pytree. 23 InsertNodeAfter(): insert a node after another in a pytree. 24 {Get,Set}NodeAnnotation(): manage custom annotations on pytree nodes. 25""" 26 27import ast 28from lib2to3 import pygram 29from lib2to3 import pytree 30from lib2to3.pgen2 import driver 31from lib2to3.pgen2 import parse 32from lib2to3.pgen2 import token 33 34# TODO(eliben): We may want to get rid of this filtering at some point once we 35# have a better understanding of what information we need from the tree. Then, 36# these tokens may be filtered out from the tree before the tree gets to the 37# unwrapper. 38NONSEMANTIC_TOKENS = frozenset(['DEDENT', 'INDENT', 'NEWLINE', 'ENDMARKER']) 39 40OPENING_BRACKETS = frozenset({'(', '[', '{'}) 41CLOSING_BRACKETS = frozenset({')', ']', '}'}) 42 43 44class Annotation(object): 45 """Annotation names associated with pytrees.""" 46 CHILD_INDENT = 'child_indent' 47 NEWLINES = 'newlines' 48 MUST_SPLIT = 'must_split' 49 SPLIT_PENALTY = 'split_penalty' 50 SUBTYPE = 'subtype' 51 52 53def NodeName(node): 54 """Produce a string name for a given node. 55 56 For a Leaf this is the token name, and for a Node this is the type. 57 58 Arguments: 59 node: a tree node 60 61 Returns: 62 Name as a string. 63 """ 64 # Nodes with values < 256 are tokens. Values >= 256 are grammar symbols. 65 if node.type < 256: 66 return token.tok_name[node.type] 67 else: 68 return pygram.python_grammar.number2symbol[node.type] 69 70 71# lib2to3 thoughtfully provides pygram.python_grammar_no_print_statement for 72# parsing Python 3 code that wouldn't parse otherwise (when 'print' is used in a 73# context where a keyword is disallowed). 74# It forgets to do the same for 'exec' though. Luckily, Python is amenable to 75# monkey-patching. 76_GRAMMAR_FOR_PY3 = pygram.python_grammar_no_print_statement.copy() 77del _GRAMMAR_FOR_PY3.keywords['exec'] 78 79_GRAMMAR_FOR_PY2 = pygram.python_grammar.copy() 80del _GRAMMAR_FOR_PY2.keywords['nonlocal'] 81 82 83def ParseCodeToTree(code): 84 """Parse the given code to a lib2to3 pytree. 85 86 Arguments: 87 code: a string with the code to parse. 88 89 Raises: 90 SyntaxError if the code is invalid syntax. 91 parse.ParseError if some other parsing failure. 92 93 Returns: 94 The root node of the parsed tree. 95 """ 96 # This function is tiny, but the incantation for invoking the parser correctly 97 # is sufficiently magical to be worth abstracting away. 98 try: 99 # Try to parse using a Python 3 grammar, which is more permissive (print and 100 # exec are not keywords). 101 parser_driver = driver.Driver(_GRAMMAR_FOR_PY3, convert=pytree.convert) 102 tree = parser_driver.parse_string(code, debug=False) 103 except parse.ParseError: 104 # Now try to parse using a Python 2 grammar; If this fails, then 105 # there's something else wrong with the code. 106 try: 107 parser_driver = driver.Driver(_GRAMMAR_FOR_PY2, convert=pytree.convert) 108 tree = parser_driver.parse_string(code, debug=False) 109 except parse.ParseError: 110 # Raise a syntax error if the code is invalid python syntax. 111 try: 112 ast.parse(code) 113 except SyntaxError as e: 114 raise e 115 else: 116 raise 117 return _WrapEndMarker(tree) 118 119 120def _WrapEndMarker(tree): 121 """Wrap a single ENDMARKER token in a "file_input" node. 122 123 Arguments: 124 tree: (pytree.Node) The root node of the parsed tree. 125 126 Returns: 127 The root node of the parsed tree. If the tree is a single ENDMARKER node, 128 then that node is wrapped in a "file_input" node. That will ensure we don't 129 skip comments attached to that node. 130 """ 131 if isinstance(tree, pytree.Leaf) and tree.type == token.ENDMARKER: 132 return pytree.Node(pygram.python_symbols.file_input, [tree]) 133 return tree 134 135 136def InsertNodesBefore(new_nodes, target): 137 """Insert new_nodes before the given target location in the tree. 138 139 Arguments: 140 new_nodes: a sequence of new nodes to insert (the nodes should not be in the 141 tree). 142 target: the target node before which the new node node will be inserted. 143 144 Raises: 145 RuntimeError: if the tree is corrupted, or the insertion would corrupt it. 146 """ 147 for node in new_nodes: 148 _InsertNodeAt(node, target, after=False) 149 150 151def InsertNodesAfter(new_nodes, target): 152 """Insert new_nodes after the given target location in the tree. 153 154 Arguments: 155 new_nodes: a sequence of new nodes to insert (the nodes should not be in the 156 tree). 157 target: the target node after which the new node node will be inserted. 158 159 Raises: 160 RuntimeError: if the tree is corrupted, or the insertion would corrupt it. 161 """ 162 for node in reversed(new_nodes): 163 _InsertNodeAt(node, target, after=True) 164 165 166def _InsertNodeAt(new_node, target, after=False): 167 """Underlying implementation for node insertion. 168 169 Arguments: 170 new_node: a new node to insert (this node should not be in the tree). 171 target: the target node. 172 after: if True, new_node is inserted after target. Otherwise, it's inserted 173 before target. 174 175 Returns: 176 nothing 177 178 Raises: 179 RuntimeError: if the tree is corrupted, or the insertion would corrupt it. 180 """ 181 182 # Protect against attempts to insert nodes which already belong to some tree. 183 if new_node.parent is not None: 184 raise RuntimeError('inserting node which already has a parent', 185 (new_node, new_node.parent)) 186 187 # The code here is based on pytree.Base.next_sibling 188 parent_of_target = target.parent 189 if parent_of_target is None: 190 raise RuntimeError('expected target node to have a parent', (target,)) 191 192 for i, child in enumerate(parent_of_target.children): 193 if child is target: 194 insertion_index = i + 1 if after else i 195 parent_of_target.insert_child(insertion_index, new_node) 196 return 197 198 raise RuntimeError('unable to find insertion point for target node', 199 (target,)) 200 201 202# The following constant and functions implement a simple custom annotation 203# mechanism for pytree nodes. We attach new attributes to nodes. Each attribute 204# is prefixed with _NODE_ANNOTATION_PREFIX. These annotations should only be 205# managed through GetNodeAnnotation and SetNodeAnnotation. 206_NODE_ANNOTATION_PREFIX = '_yapf_annotation_' 207 208 209def GetNodeAnnotation(node, annotation, default=None): 210 """Get annotation value from a node. 211 212 Arguments: 213 node: the node. 214 annotation: annotation name - a string. 215 default: the default value to return if there's no annotation. 216 217 Returns: 218 Value of the annotation in the given node. If the node doesn't have this 219 particular annotation name yet, returns default. 220 """ 221 return getattr(node, _NODE_ANNOTATION_PREFIX + annotation, default) 222 223 224def SetNodeAnnotation(node, annotation, value): 225 """Set annotation value on a node. 226 227 Arguments: 228 node: the node. 229 annotation: annotation name - a string. 230 value: annotation value to set. 231 """ 232 setattr(node, _NODE_ANNOTATION_PREFIX + annotation, value) 233 234 235def AppendNodeAnnotation(node, annotation, value): 236 """Appends an annotation value to a list of annotations on the node. 237 238 Arguments: 239 node: the node. 240 annotation: annotation name - a string. 241 value: annotation value to set. 242 """ 243 attr = GetNodeAnnotation(node, annotation, set()) 244 attr.add(value) 245 SetNodeAnnotation(node, annotation, attr) 246 247 248def RemoveSubtypeAnnotation(node, value): 249 """Removes an annotation value from the subtype annotations on the node. 250 251 Arguments: 252 node: the node. 253 value: annotation value to remove. 254 """ 255 attr = GetNodeAnnotation(node, Annotation.SUBTYPE) 256 if attr and value in attr: 257 attr.remove(value) 258 SetNodeAnnotation(node, Annotation.SUBTYPE, attr) 259 260 261def DumpNodeToString(node): 262 """Dump a string representation of the given node. For debugging. 263 264 Arguments: 265 node: the node. 266 267 Returns: 268 The string representation. 269 """ 270 if isinstance(node, pytree.Leaf): 271 fmt = '{name}({value}) [lineno={lineno}, column={column}, prefix={prefix}]' 272 return fmt.format( 273 name=NodeName(node), 274 value=_PytreeNodeRepr(node), 275 lineno=node.lineno, 276 column=node.column, 277 prefix=repr(node.prefix)) 278 else: 279 fmt = '{node} [{len} children] [child_indent="{indent}"]' 280 return fmt.format( 281 node=NodeName(node), 282 len=len(node.children), 283 indent=GetNodeAnnotation(node, Annotation.CHILD_INDENT)) 284 285 286def _PytreeNodeRepr(node): 287 """Like pytree.Node.__repr__, but names instead of numbers for tokens.""" 288 if isinstance(node, pytree.Node): 289 return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), 290 [_PytreeNodeRepr(c) for c in node.children]) 291 if isinstance(node, pytree.Leaf): 292 return '%s(%s, %r)' % (node.__class__.__name__, NodeName(node), node.value) 293 294 295def IsCommentStatement(node): 296 return (NodeName(node) == 'simple_stmt' and 297 node.children[0].type == token.COMMENT) 298