1# Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# ============================================================================== 15"""Control flow graph (CFG) structure for Python AST representation. 16 17The CFG is a digraph with edges representing valid control flow. Each 18node is associated with exactly one AST node, but not all AST nodes may have 19a corresponding CFG counterpart. 20 21Once built, the CFG itself is immutable, but the values it holds need not be; 22they are usually annotated with information extracted by walking the graph. 23 24Tip: Use `Graph.as_dot` to visualize the CFG using any DOT viewer. 25 26Note: the CFG tries to include all code paths that MAY be taken, with a single 27notable exception: 28 * function calls do not generate edges corresponding to exceptions they may 29 raise (i.e. a function call in the middle of a block does not return or jump 30 to any except or finally block) 31TODO(mdan): Consider adding the edges above. They'd only add ~O(n) edges. 32TODO(mdan): Alternatively, consider adding an edge from try to all its excepts. 33""" 34 35# TODO(mdan): The notion of 'statements' below is inaccurate. 36# They should rather be called 'block statements', because they include 37# statements that may have a body, e.g. if and while. 38 39from __future__ import absolute_import 40from __future__ import division 41from __future__ import print_function 42 43import collections 44import weakref 45from enum import Enum 46 47import gast 48import six 49 50from tensorflow.python.autograph.pyct import anno 51from tensorflow.python.autograph.pyct import parser 52 53 54class Node(object): 55 """A node in the CFG. 56 57 Although new instances of this class are mutable, the objects that a user 58 finds in the CFG are typically not. 59 60 The nodes represent edges in the CFG graph, and maintain pointers to allow 61 efficient walking in both forward and reverse order. The following property 62 holds for all nodes: "child in node.next" iff "node in child.prev". 63 64 Attributes: 65 next: FrozenSet[Node, ...], the nodes that follow this node, in control 66 flow order 67 prev: FrozenSet[Node, ...], the nodes that precede this node, in reverse 68 control flow order 69 ast_node: ast.AST, the AST node corresponding to this CFG node 70 """ 71 72 def __init__(self, next_, prev, ast_node): 73 self.next = next_ 74 self.prev = prev 75 self.ast_node = ast_node 76 77 def freeze(self): 78 self.next = frozenset(self.next) 79 # Assumption: All CFG nodes have identical life spans, because the graph 80 # owns them. Nodes should never be used outside the context of an existing 81 # graph. 82 self.prev = weakref.WeakSet(self.prev) 83 84 def __repr__(self): 85 if isinstance(self.ast_node, gast.FunctionDef): 86 return 'def %s' % self.ast_node.name 87 elif isinstance(self.ast_node, gast.ClassDef): 88 return 'class %s' % self.ast_node.name 89 elif isinstance(self.ast_node, gast.withitem): 90 return parser.unparse( 91 self.ast_node.context_expr, include_encoding_marker=False).strip() 92 return parser.unparse(self.ast_node, include_encoding_marker=False).strip() 93 94 95class Graph( 96 collections.namedtuple( 97 'Graph', 98 ['entry', 'exit', 'error', 'index', 'stmt_prev', 'stmt_next'])): 99 """A Control Flow Graph. 100 101 The CFG maintains an index to allow looking up a CFG node by the AST node to 102 which it is associated. The index can also be enumerated in top-down, depth 103 first order. 104 105 Walking the graph in forward or reverse order is supported by double 106 parent-child links. 107 108 Note: the error nodes are not wired to their corresponding finally guards, 109 because these are shared, and wiring them would create a reverse path from 110 normal control flow into the error nodes, which we want to avoid. 111 112 The graph also maintains edges corresponding to higher level statements 113 like for-else loops. A node is considered successor of a statement if there 114 is an edge from a node that is lexically a child of that statement to a node 115 that is not. Statement predecessors are analogously defined. 116 117 Attributes: 118 entry: Node, the entry node 119 exit: FrozenSet[Node, ...], the exit nodes 120 error: FrozenSet[Node, ...], nodes that exit due to an explicitly raised 121 error (errors propagated from function calls are not accounted) 122 index: Dict[ast.Node, Node], mapping AST nodes to the respective CFG 123 node 124 stmt_prev: Dict[ast.Node, FrozenSet[Node, ...]], mapping statement AST 125 nodes to their predecessor CFG nodes 126 stmt_next: Dict[ast.Node, FrozenSet[Node, ...]], mapping statement AST 127 nodes to their successor CFG nodes 128 """ 129 130 def __repr__(self): 131 return self.as_dot() 132 133 def as_dot(self): 134 """Print CFG in DOT format.""" 135 result = 'digraph CFG {\n' 136 for node in self.index.values(): 137 result += ' %s [label="%s"];\n' % (id(node), node) 138 for node in self.index.values(): 139 for next_ in node.next: 140 result += ' %s -> %s;\n' % (id(node), id(next_)) 141 result += '}' 142 return result 143 144 145class _WalkMode(Enum): 146 FORWARD = 1 147 REVERSE = 2 148 149 150# TODO(mdan): Rename to DataFlowAnalyzer. 151# TODO(mdan): Consider specializations that use gen/kill/transfer abstractions. 152class GraphVisitor(object): 153 """Base class for a CFG visitors. 154 155 This implementation is not thread safe. 156 157 The visitor has some facilities to simplify dataflow analyses. In particular, 158 it allows revisiting the nodes at the decision of the subclass. This can be 159 used to visit the graph until the state reaches a fixed point. 160 161 For more details on dataflow analysis, see 162 https://www.seas.harvard.edu/courses/cs252/2011sp/slides/Lec02-Dataflow.pdf 163 164 Note: the literature generally suggests visiting successor nodes only when the 165 state of the current node changed, regardless of whether that successor has 166 ever been visited. This implementation visits every successor at least once. 167 168 Attributes: 169 graph: Graph 170 in_: Dict[Node, Any], stores node-keyed state during a visit 171 out: Dict[Node, Any], stores node-keyed state during a visit 172 """ 173 174 def __init__(self, graph): 175 self.graph = graph 176 self.reset() 177 178 def init_state(self, node): 179 """State initialization function. Optional to overload. 180 181 An in/out state slot will be created for each node in the graph. Subclasses 182 must overload this to control what that is initialized to. 183 184 Args: 185 node: Node 186 """ 187 raise NotImplementedError('Subclasses must implement this.') 188 189 # TODO(mdan): Rename to flow? 190 def visit_node(self, node): 191 """Visitor function. 192 193 Args: 194 node: Node 195 Returns: 196 bool, whether the node should be revisited; subclasses can visit every 197 reachable node exactly once by always returning False 198 """ 199 raise NotImplementedError('Subclasses must implement this.') 200 201 def reset(self): 202 self.in_ = { 203 node: self.init_state(node) for node in self.graph.index.values() 204 } 205 self.out = { 206 node: self.init_state(node) for node in self.graph.index.values() 207 } 208 209 def can_ignore(self, node): 210 """Returns True if the node can safely be assumed not to touch variables.""" 211 ast_node = node.ast_node 212 if anno.hasanno(ast_node, anno.Basic.SKIP_PROCESSING): 213 return True 214 if six.PY2: 215 if (isinstance(ast_node, gast.Name) and 216 ast_node.id in ('None', 'True', 'False')): 217 return True 218 return isinstance(ast_node, 219 (gast.Break, gast.Continue, gast.Raise, gast.Pass)) 220 221 def _visit_internal(self, mode): 222 """Visits the CFG, breadth-first.""" 223 assert mode in (_WalkMode.FORWARD, _WalkMode.REVERSE) 224 if mode == _WalkMode.FORWARD: 225 open_ = [self.graph.entry] 226 elif mode == _WalkMode.REVERSE: 227 open_ = list(self.graph.exit) 228 closed = set() 229 230 while open_: 231 node = open_.pop(0) 232 closed.add(node) 233 234 should_revisit = self.visit_node(node) 235 236 if mode == _WalkMode.FORWARD: 237 children = node.next 238 elif mode == _WalkMode.REVERSE: 239 children = node.prev 240 241 for next_ in children: 242 if should_revisit or next_ not in closed: 243 open_.append(next_) 244 245 def visit_forward(self): 246 self._visit_internal(_WalkMode.FORWARD) 247 248 def visit_reverse(self): 249 self._visit_internal(_WalkMode.REVERSE) 250 251 252class GraphBuilder(object): 253 """Builder that constructs a CFG from a given AST. 254 255 This GraphBuilder facilitates constructing the DAG that forms the CFG when 256 nodes 257 are supplied in lexical order (i.e., top-down, depth first). Under these 258 conditions, it supports building patterns found in typical structured 259 programs. 260 261 This builder ignores the flow generated by exceptions, which are assumed to 262 always be catastrophic and present purely for diagnostic purposes (e.g. to 263 print debug information). Statements like raise and try/catch sections are 264 allowed and will generate control flow edges, but ordinary statements are 265 assumed not to raise exceptions. 266 267 Finally sections are also correctly interleaved between break/continue/return 268 nodes and their subsequent statements. 269 270 Important concepts: 271 * nodes - nodes refer to CFG nodes; AST nodes are qualified explicitly 272 * leaf set - since the graph is constructed gradually, a leaf set maintains 273 the CFG nodes that will precede the node that the builder expects to 274 receive next; when an ordinary node is added, it is connected to the 275 existing leaves and it in turn becomes the new leaf 276 * jump nodes - nodes that should generate edges other than what 277 ordinary nodes would; these correspond to break, continue and return 278 statements 279 * sections - logical delimiters for subgraphs that require special 280 edges; there are various types of nodes, each admitting various 281 types of jump nodes; sections are identified by their corresponding AST 282 node 283 """ 284 285 # TODO(mdan): Perhaps detail this in a markdown doc. 286 # TODO(mdan): Add exception support. 287 288 def __init__(self, parent_ast_node): 289 self.reset() 290 self.parent = parent_ast_node 291 292 def reset(self): 293 """Resets the state of this factory.""" 294 self.head = None 295 self.errors = set() 296 self.node_index = {} 297 298 # TODO(mdan): Too many primitives. Use classes. 299 self.leaves = set() 300 301 # Note: This mechanism requires that nodes are added in lexical order (top 302 # to bottom, depth first). 303 self.active_stmts = set() 304 self.owners = {} # type: Set[any] 305 self.forward_edges = set() # type: Tuple[Node, Node] # (from, to) 306 307 self.finally_sections = {} 308 # Dict values represent (entry, exits) 309 self.finally_section_subgraphs = { 310 } # type: Dict[ast.AST, Tuple[Node, Set[Node]]] 311 # Whether the guard section can be reached from the statement that precedes 312 # it. 313 self.finally_section_has_direct_flow = {} 314 # Finally sections that await their first node. 315 self.pending_finally_sections = set() 316 317 # Exit jumps keyed by the section they affect. 318 self.exits = {} 319 320 # The entry of loop sections, keyed by the section. 321 self.section_entry = {} 322 # Continue jumps keyed by the section they affect. 323 self.continues = {} 324 325 # Raise jumps keyed by the except section guarding them. 326 self.raises = {} 327 328 # The entry of conditional sections, keyed by the section. 329 self.cond_entry = {} 330 # Lists of leaf nodes corresponding to each branch in the section. 331 self.cond_leaves = {} 332 333 def _connect_nodes(self, first, second): 334 """Connects nodes to signify that control flows from first to second. 335 336 Args: 337 first: Union[Set[Node, ...], Node] 338 second: Node 339 """ 340 if isinstance(first, Node): 341 first.next.add(second) 342 second.prev.add(first) 343 self.forward_edges.add((first, second)) 344 else: 345 for node in first: 346 self._connect_nodes(node, second) 347 348 def _add_new_node(self, ast_node): 349 """Grows the graph by adding a CFG node following the current leaves.""" 350 if ast_node is self.node_index: 351 raise ValueError('%s added twice' % ast_node) 352 # Assumption: All CFG nodes have identical life spans, because the graph 353 # owns them. Nodes should never be used outside the context of an existing 354 # graph. 355 node = Node(next_=set(), prev=weakref.WeakSet(), ast_node=ast_node) 356 self.node_index[ast_node] = node 357 self.owners[node] = frozenset(self.active_stmts) 358 359 if self.head is None: 360 self.head = node 361 362 for leaf in self.leaves: 363 self._connect_nodes(leaf, node) 364 365 # If any finally section awaits its first node, populate it. 366 for section_id in self.pending_finally_sections: 367 self.finally_section_subgraphs[section_id][0] = node 368 self.pending_finally_sections = set() 369 370 return node 371 372 def begin_statement(self, stmt): 373 """Marks the beginning of a statement. 374 375 Args: 376 stmt: Hashable, a key by which the statement can be identified in 377 the CFG's stmt_prev and stmt_next attributes 378 """ 379 self.active_stmts.add(stmt) 380 381 def end_statement(self, stmt): 382 """Marks the end of a statement. 383 384 Args: 385 stmt: Hashable, a key by which the statement can be identified in 386 the CFG's stmt_prev and stmt_next attributes; must match a key 387 previously passed to begin_statement. 388 """ 389 self.active_stmts.remove(stmt) 390 391 def add_ordinary_node(self, ast_node): 392 """Grows the graph by adding an ordinary CFG node. 393 394 Ordinary nodes are followed by the next node, in lexical order, that is, 395 they become the new leaf set. 396 397 Args: 398 ast_node: ast.AST 399 Returns: 400 Node 401 """ 402 node = self._add_new_node(ast_node) 403 self.leaves = set((node,)) 404 return node 405 406 def _add_jump_node(self, ast_node, guards): 407 """Grows the graph by adding a jump node. 408 409 Jump nodes are added to the current leaf set, and the leaf set becomes 410 empty. If the jump node is the last in a cond section, then it may be added 411 back to the leaf set by a separate mechanism. 412 413 Args: 414 ast_node: ast.AST 415 guards: Tuple[ast.AST, ...], the finally sections active for this node 416 Returns: 417 Node 418 """ 419 node = self._add_new_node(ast_node) 420 self.leaves = set() 421 # The guards themselves may not yet be complete, and will be wired later. 422 self.finally_sections[node] = guards 423 return node 424 425 def _connect_jump_to_finally_sections(self, node): 426 """Connects a jump node to the finally sections protecting it.""" 427 cursor = set((node,)) 428 if node not in self.finally_sections: 429 return cursor 430 for guard_section_id in self.finally_sections[node]: 431 guard_begin, guard_ends = self.finally_section_subgraphs[guard_section_id] 432 self._connect_nodes(cursor, guard_begin) 433 cursor = guard_ends 434 del self.finally_sections[node] 435 # TODO(mdan): Should garbage-collect finally_section_subgraphs. 436 return cursor 437 438 def add_exit_node(self, ast_node, section_id, guards): 439 """Grows the graph by adding an exit node. 440 441 This node becomes an exit for the current section. 442 443 Args: 444 ast_node: ast.AST 445 section_id: Hashable, the node for which ast_node should be considered 446 to be an exit node 447 guards: Tuple[ast.AST, ...], the finally sections that guard ast_node 448 Returns: 449 Node 450 """ 451 node = self._add_jump_node(ast_node, guards) 452 self.exits[section_id].add(node) 453 return node 454 455 def add_continue_node(self, ast_node, section_id, guards): 456 """Grows the graph by adding a reentry node. 457 458 This node causes control flow to go back to the loop section's entry. 459 460 Args: 461 ast_node: ast.AST 462 section_id: Hashable, the node for which ast_node should be considered 463 to be an exit node 464 guards: Tuple[ast.AST, ...], the finally sections that guard ast_node 465 """ 466 node = self._add_jump_node(ast_node, guards) 467 self.continues[section_id].add(node) 468 469 def connect_raise_node(self, node, except_guards): 470 """Adds extra connection between a raise node and containing except guards. 471 472 The node is a graph node, not an ast node. 473 474 Args: 475 node: Node 476 except_guards: Tuple[ast.AST, ...], the except sections that guard node 477 """ 478 for guard in except_guards: 479 if guard in self.raises: 480 self.raises[guard].append(node) 481 else: 482 self.raises[guard] = [node] 483 484 def enter_section(self, section_id): 485 """Enters a regular section. 486 487 Regular sections admit exit jumps, which end the section. 488 489 Args: 490 section_id: Hashable, the same node that will be used in calls to the 491 ast_node arg passed to add_exit_node 492 """ 493 assert section_id not in self.exits 494 self.exits[section_id] = set() 495 496 def exit_section(self, section_id): 497 """Exits a regular section.""" 498 499 # Exits are jump nodes, which may be protected. 500 for exit_ in self.exits[section_id]: 501 self.leaves |= self._connect_jump_to_finally_sections(exit_) 502 503 del self.exits[section_id] 504 505 def enter_loop_section(self, section_id, entry_node): 506 """Enters a loop section. 507 508 Loop sections define an entry node. The end of the section always flows back 509 to the entry node. These admit continue jump nodes which also flow to the 510 entry node. 511 512 Args: 513 section_id: Hashable, the same node that will be used in calls to the 514 ast_node arg passed to add_continue_node 515 entry_node: ast.AST, the entry node into the loop (e.g. the test node 516 for while loops) 517 """ 518 assert section_id not in self.section_entry 519 assert section_id not in self.continues 520 self.continues[section_id] = set() 521 node = self.add_ordinary_node(entry_node) 522 self.section_entry[section_id] = node 523 524 def exit_loop_section(self, section_id): 525 """Exits a loop section.""" 526 self._connect_nodes(self.leaves, self.section_entry[section_id]) 527 528 # continues are jump nodes, which may be protected. 529 for reentry in self.continues[section_id]: 530 guard_ends = self._connect_jump_to_finally_sections(reentry) 531 self._connect_nodes(guard_ends, self.section_entry[section_id]) 532 533 # Loop nodes always loop back. 534 self.leaves = set((self.section_entry[section_id],)) 535 536 del self.continues[section_id] 537 del self.section_entry[section_id] 538 539 def enter_cond_section(self, section_id): 540 """Enters a conditional section. 541 542 Conditional sections define an entry node, and one or more branches. 543 544 Args: 545 section_id: Hashable, the same node that will be used in calls to the 546 section_id arg passed to new_cond_branch 547 """ 548 549 assert section_id not in self.cond_entry 550 assert section_id not in self.cond_leaves 551 self.cond_leaves[section_id] = [] 552 553 def new_cond_branch(self, section_id): 554 """Begins a new branch in a cond section.""" 555 assert section_id in self.cond_leaves 556 557 if section_id in self.cond_entry: 558 # Subsequent splits move back to the split point, and memorize the 559 # current leaves. 560 self.cond_leaves[section_id].append(self.leaves) 561 self.leaves = self.cond_entry[section_id] 562 else: 563 # If this is the first time we split a section, just remember the split 564 # point. 565 self.cond_entry[section_id] = self.leaves 566 567 def exit_cond_section(self, section_id): 568 """Exits a conditional section.""" 569 for split in self.cond_leaves[section_id]: 570 self.leaves |= split 571 del self.cond_entry[section_id] 572 del self.cond_leaves[section_id] 573 574 def enter_except_section(self, section_id): 575 """Enters an except section.""" 576 if section_id in self.raises: 577 self.leaves.update(self.raises[section_id]) 578 579 def enter_finally_section(self, section_id): 580 """Enters a finally section.""" 581 # TODO(mdan): This, not the caller, should track the active sections. 582 self.finally_section_subgraphs[section_id] = [None, None] 583 if self.leaves: 584 self.finally_section_has_direct_flow[section_id] = True 585 else: 586 self.finally_section_has_direct_flow[section_id] = False 587 self.pending_finally_sections.add(section_id) 588 589 def exit_finally_section(self, section_id): 590 """Exits a finally section.""" 591 assert section_id not in self.pending_finally_sections, 'Empty finally?' 592 self.finally_section_subgraphs[section_id][1] = self.leaves 593 # If the guard can only be reached by a jump, then it will not flow 594 # into the statement that follows it. 595 if not self.finally_section_has_direct_flow[section_id]: 596 self.leaves = set() 597 del self.finally_section_has_direct_flow[section_id] 598 599 def build(self): 600 """Returns the CFG accumulated so far and resets the builder. 601 602 Returns: 603 Graph 604 """ 605 # Freeze the nodes. 606 for node in self.node_index.values(): 607 node.freeze() 608 609 # Build the statement edges. 610 stmt_next = {} 611 stmt_prev = {} 612 613 for node in self.node_index.values(): 614 for stmt in self.owners[node]: 615 if stmt not in stmt_prev: 616 stmt_prev[stmt] = set() 617 if stmt not in stmt_next: 618 stmt_next[stmt] = set() 619 620 for first, second in self.forward_edges: 621 stmts_exited = self.owners[first] - self.owners[second] 622 for stmt in stmts_exited: 623 stmt_next[stmt].add(second) 624 stmts_entered = self.owners[second] - self.owners[first] 625 for stmt in stmts_entered: 626 stmt_prev[stmt].add(first) 627 for stmt in stmt_next: 628 stmt_next[stmt] = frozenset(stmt_next[stmt]) 629 for stmt in stmt_prev: 630 stmt_prev[stmt] = frozenset(stmt_prev[stmt]) 631 632 # Construct the final graph object. 633 result = Graph( 634 entry=self.head, 635 exit=self.leaves, 636 error=self.errors, 637 index=self.node_index, 638 stmt_prev=stmt_prev, 639 stmt_next=stmt_next) 640 641 # Reset the state. 642 self.reset() 643 644 return result 645 646 647class AstToCfg(gast.NodeVisitor): 648 """Converts an AST to CFGs. 649 650 A separate CFG will be constructed for each function. 651 """ 652 653 def __init__(self): 654 super(AstToCfg, self).__init__() 655 656 self.builder_stack = [] 657 self.builder = None 658 self.cfgs = {} 659 660 self.lexical_scopes = [] 661 662 def _enter_lexical_scope(self, node): 663 self.lexical_scopes.append(node) 664 665 def _exit_lexical_scope(self, node): 666 leaving_node = self.lexical_scopes.pop() 667 assert node == leaving_node 668 669 def _get_enclosing_finally_scopes(self, stop_at): 670 included = [] 671 for node in reversed(self.lexical_scopes): 672 if isinstance(node, gast.Try) and node.finalbody: 673 included.append(node) 674 if isinstance(node, stop_at): 675 return node, included 676 return None, included 677 678 def _get_enclosing_except_scopes(self, stop_at): 679 included = [] 680 for node in reversed(self.lexical_scopes): 681 if isinstance(node, gast.Try) and node.handlers: 682 included.extend(node.handlers) 683 if isinstance(node, stop_at): 684 break 685 return included 686 687 def _process_basic_statement(self, node): 688 self.generic_visit(node) 689 self.builder.add_ordinary_node(node) 690 691 def _process_exit_statement( 692 self, node, exits_nodes_of_type, may_exit_via_except=False): 693 self.generic_visit(node) 694 # Note: this is safe because we process functions separately. 695 try_node, guards = self._get_enclosing_finally_scopes(exits_nodes_of_type) 696 assert try_node is not None, '{} that is not enclosed by any of {}'.format( 697 node, exits_nodes_of_type) 698 699 node = self.builder.add_exit_node(node, try_node, guards) 700 701 if may_exit_via_except: 702 except_guards = self._get_enclosing_except_scopes(exits_nodes_of_type) 703 self.builder.connect_raise_node(node, except_guards) 704 705 def _process_continue_statement(self, node, *loops_to_nodes_of_type): 706 # Note: this is safe because we process functions separately. 707 try_node, guards = self._get_enclosing_finally_scopes( 708 tuple(loops_to_nodes_of_type)) 709 if try_node is None: 710 raise ValueError('%s that is not enclosed by any of %s' % 711 (node, loops_to_nodes_of_type)) 712 self.builder.add_continue_node(node, try_node, guards) 713 714 def visit_ClassDef(self, node): 715 # We also keep the ClassDef node in the CFG, since it technically is a 716 # statement. 717 # For example, this is legal and allows executing user code: 718 # 719 # class Foo(bar()): 720 # pass 721 # 722 # It also has a scope: 723 # 724 # class Bar(object): 725 # a = 1 726 if self.builder is None: 727 self.generic_visit(node) 728 return 729 730 self.builder.add_ordinary_node(node) 731 732 self.builder_stack.append(self.builder) 733 self.builder = GraphBuilder(node) 734 self._enter_lexical_scope(node) 735 736 self._process_basic_statement(node) 737 738 self._exit_lexical_scope(node) 739 # TODO(mdan): Track the CFG local to the class definition as well? 740 self.builder = self.builder_stack.pop() 741 742 def _process_function_def(self, node, is_lambda): 743 # The function body is stored in a separate graph, because function 744 # definitions have effects very different from function calls. 745 if self.builder is not None: 746 self.builder.add_ordinary_node(node) 747 748 self.builder_stack.append(self.builder) 749 self.builder = GraphBuilder(node) 750 751 self._enter_lexical_scope(node) 752 self.builder.enter_section(node) 753 754 self._process_basic_statement(node.args) 755 if is_lambda: 756 self._process_exit_statement(node.body, (gast.Lambda,)) 757 else: 758 for stmt in node.body: 759 self.visit(stmt) 760 761 self.builder.exit_section(node) 762 self._exit_lexical_scope(node) 763 764 self.cfgs[node] = self.builder.build() 765 self.builder = self.builder_stack.pop() 766 767 def visit_FunctionDef(self, node): 768 self._process_function_def(node, is_lambda=False) 769 770 def visit_Lambda(self, node): 771 self._process_function_def(node, is_lambda=True) 772 773 def visit_Return(self, node): 774 self._process_exit_statement(node, (gast.FunctionDef,)) 775 776 def visit_Import(self, node): 777 self._process_basic_statement(node) 778 779 def visit_ImportFrom(self, node): 780 self._process_basic_statement(node) 781 782 def visit_Expr(self, node): 783 self._process_basic_statement(node) 784 785 def visit_Assign(self, node): 786 self._process_basic_statement(node) 787 788 def visit_AnnAssign(self, node): 789 self._process_basic_statement(node) 790 791 def visit_AugAssign(self, node): 792 self._process_basic_statement(node) 793 794 def visit_Pass(self, node): 795 self._process_basic_statement(node) 796 797 def visit_Global(self, node): 798 self._process_basic_statement(node) 799 800 def visit_Nonlocal(self, node): 801 self._process_basic_statement(node) 802 803 def visit_Print(self, node): 804 self._process_basic_statement(node) 805 806 def visit_Raise(self, node): 807 self._process_exit_statement( 808 node, (gast.FunctionDef,), may_exit_via_except=True) 809 self.builder.errors.add(node) 810 811 def visit_Assert(self, node): 812 # Ignoring the effect of exceptions. 813 self._process_basic_statement(node) 814 815 def visit_Delete(self, node): 816 self._process_basic_statement(node) 817 818 def visit_If(self, node): 819 # No need to track ifs as lexical scopes, for now. 820 # Lexical scopes are generally tracked in order to be able to resolve the 821 # targets of jump statements like break/continue/etc. Since there is no 822 # statement that can interrupt a conditional, we don't need to track their 823 # lexical scope. That may change in the future. 824 self.builder.begin_statement(node) 825 826 self.builder.enter_cond_section(node) 827 self._process_basic_statement(node.test) 828 829 self.builder.new_cond_branch(node) 830 for stmt in node.body: 831 self.visit(stmt) 832 833 self.builder.new_cond_branch(node) 834 for stmt in node.orelse: 835 self.visit(stmt) 836 837 self.builder.exit_cond_section(node) 838 self.builder.end_statement(node) 839 840 def visit_While(self, node): 841 self.builder.begin_statement(node) 842 self._enter_lexical_scope(node) 843 844 self.builder.enter_section(node) 845 846 self.generic_visit(node.test) 847 self.builder.enter_loop_section(node, node.test) 848 for stmt in node.body: 849 self.visit(stmt) 850 self.builder.exit_loop_section(node) 851 852 # Note: although the orelse is technically part of the loop node, 853 # the statements inside it don't affect the loop itself. For example, a 854 # break in the loop's orelse will not affect the loop itself. 855 self._exit_lexical_scope(node) 856 857 for stmt in node.orelse: 858 self.visit(stmt) 859 860 self.builder.exit_section(node) 861 self.builder.end_statement(node) 862 863 def visit_For(self, node): 864 self.builder.begin_statement(node) 865 self._enter_lexical_scope(node) 866 867 self.builder.enter_section(node) 868 869 # Note: Strictly speaking, this should be node.target + node.iter. 870 # However, the activity analysis accounts for this inconsistency, 871 # so dataflow analysis produces the correct values. 872 self.generic_visit(node.iter) 873 self.builder.enter_loop_section(node, node.iter) 874 # Also include the "extra loop test" annotation, to capture things like the 875 # control variable for return and break in for loops. 876 if anno.hasanno(node, anno.Basic.EXTRA_LOOP_TEST): 877 self._process_basic_statement( 878 anno.getanno(node, anno.Basic.EXTRA_LOOP_TEST)) 879 for stmt in node.body: 880 self.visit(stmt) 881 self.builder.exit_loop_section(node) 882 883 # Note: although the orelse is technically part of the loop node, 884 # they don't count as loop bodies. For example, a break in the loop's 885 # orelse will affect the parent loop, not the current one. 886 self._exit_lexical_scope(node) 887 888 for stmt in node.orelse: 889 self.visit(stmt) 890 891 self.builder.exit_section(node) 892 self.builder.end_statement(node) 893 894 def visit_Break(self, node): 895 self._process_exit_statement(node, (gast.While, gast.For,)) 896 897 def visit_Continue(self, node): 898 self._process_continue_statement(node, (gast.While, gast.For,)) 899 900 def visit_ExceptHandler(self, node): 901 self.builder.begin_statement(node) 902 self.builder.enter_except_section(node) 903 904 if node.type is not None: 905 self.visit(node.type) 906 if node.name is not None: 907 self.visit(node.name) 908 909 for stmt in node.body: 910 self.visit(stmt) 911 912 self.builder.end_statement(node) 913 914 def visit_Try(self, node): 915 self.builder.begin_statement(node) 916 self._enter_lexical_scope(node) 917 918 # Note: the current simplification is that the try block fully executes 919 # regardless of whether an exception triggers or not. This is consistent 920 # with blocks free of try/except, which also don't account for the 921 # possibility of an exception being raised mid-block. 922 923 for stmt in node.body: 924 self.visit(stmt) 925 # The orelse is an optional continuation of the body. 926 if node.orelse: 927 block_representative = node.orelse[0] 928 self.builder.enter_cond_section(block_representative) 929 self.builder.new_cond_branch(block_representative) 930 for stmt in node.orelse: 931 self.visit(stmt) 932 self.builder.new_cond_branch(block_representative) 933 self.builder.exit_cond_section(block_representative) 934 935 self._exit_lexical_scope(node) 936 937 if node.handlers: 938 # Using node would be inconsistent. Using the first handler node is also 939 # inconsistent, but less so. 940 block_representative = node.handlers[0] 941 self.builder.enter_cond_section(block_representative) 942 for block in node.handlers: 943 self.builder.new_cond_branch(block_representative) 944 self.visit(block) 945 self.builder.new_cond_branch(block_representative) 946 self.builder.exit_cond_section(block_representative) 947 948 if node.finalbody: 949 self.builder.enter_finally_section(node) 950 for stmt in node.finalbody: 951 self.visit(stmt) 952 self.builder.exit_finally_section(node) 953 954 self.builder.end_statement(node) 955 956 def visit_With(self, node): 957 # TODO(mdan): Mark the context manager's exit call as exit guard. 958 for item in node.items: 959 self._process_basic_statement(item) 960 for stmt in node.body: 961 self.visit(stmt) 962 963 964def build(node): 965 visitor = AstToCfg() 966 visitor.visit(node) 967 return visitor.cfgs 968