1"""A bottom-up tree matching algorithm implementation meant to speed 2up 2to3's matching process. After the tree patterns are reduced to 3their rarest linear path, a linear Aho-Corasick automaton is 4created. The linear automaton traverses the linear paths from the 5leaves to the root of the AST and returns a set of nodes for further 6matching. This reduces significantly the number of candidate nodes.""" 7 8__author__ = "George Boutsioukis <gboutsioukis@gmail.com>" 9 10import logging 11import itertools 12from collections import defaultdict 13 14from . import pytree 15from .btm_utils import reduce_tree 16 17class BMNode(object): 18 """Class for a node of the Aho-Corasick automaton used in matching""" 19 count = itertools.count() 20 def __init__(self): 21 self.transition_table = {} 22 self.fixers = [] 23 self.id = next(BMNode.count) 24 self.content = '' 25 26class BottomMatcher(object): 27 """The main matcher class. After instantiating the patterns should 28 be added using the add_fixer method""" 29 30 def __init__(self): 31 self.match = set() 32 self.root = BMNode() 33 self.nodes = [self.root] 34 self.fixers = [] 35 self.logger = logging.getLogger("RefactoringTool") 36 37 def add_fixer(self, fixer): 38 """Reduces a fixer's pattern tree to a linear path and adds it 39 to the matcher(a common Aho-Corasick automaton). The fixer is 40 appended on the matching states and called when they are 41 reached""" 42 self.fixers.append(fixer) 43 tree = reduce_tree(fixer.pattern_tree) 44 linear = tree.get_linear_subpattern() 45 match_nodes = self.add(linear, start=self.root) 46 for match_node in match_nodes: 47 match_node.fixers.append(fixer) 48 49 def add(self, pattern, start): 50 "Recursively adds a linear pattern to the AC automaton" 51 #print("adding pattern", pattern, "to", start) 52 if not pattern: 53 #print("empty pattern") 54 return [start] 55 if isinstance(pattern[0], tuple): 56 #alternatives 57 #print("alternatives") 58 match_nodes = [] 59 for alternative in pattern[0]: 60 #add all alternatives, and add the rest of the pattern 61 #to each end node 62 end_nodes = self.add(alternative, start=start) 63 for end in end_nodes: 64 match_nodes.extend(self.add(pattern[1:], end)) 65 return match_nodes 66 else: 67 #single token 68 #not last 69 if pattern[0] not in start.transition_table: 70 #transition did not exist, create new 71 next_node = BMNode() 72 start.transition_table[pattern[0]] = next_node 73 else: 74 #transition exists already, follow 75 next_node = start.transition_table[pattern[0]] 76 77 if pattern[1:]: 78 end_nodes = self.add(pattern[1:], start=next_node) 79 else: 80 end_nodes = [next_node] 81 return end_nodes 82 83 def run(self, leaves): 84 """The main interface with the bottom matcher. The tree is 85 traversed from the bottom using the constructed 86 automaton. Nodes are only checked once as the tree is 87 retraversed. When the automaton fails, we give it one more 88 shot(in case the above tree matches as a whole with the 89 rejected leaf), then we break for the next leaf. There is the 90 special case of multiple arguments(see code comments) where we 91 recheck the nodes 92 93 Args: 94 The leaves of the AST tree to be matched 95 96 Returns: 97 A dictionary of node matches with fixers as the keys 98 """ 99 current_ac_node = self.root 100 results = defaultdict(list) 101 for leaf in leaves: 102 current_ast_node = leaf 103 while current_ast_node: 104 current_ast_node.was_checked = True 105 for child in current_ast_node.children: 106 # multiple statements, recheck 107 if isinstance(child, pytree.Leaf) and child.value == ";": 108 current_ast_node.was_checked = False 109 break 110 if current_ast_node.type == 1: 111 #name 112 node_token = current_ast_node.value 113 else: 114 node_token = current_ast_node.type 115 116 if node_token in current_ac_node.transition_table: 117 #token matches 118 current_ac_node = current_ac_node.transition_table[node_token] 119 for fixer in current_ac_node.fixers: 120 results[fixer].append(current_ast_node) 121 else: 122 #matching failed, reset automaton 123 current_ac_node = self.root 124 if (current_ast_node.parent is not None 125 and current_ast_node.parent.was_checked): 126 #the rest of the tree upwards has been checked, next leaf 127 break 128 129 #recheck the rejected node once from the root 130 if node_token in current_ac_node.transition_table: 131 #token matches 132 current_ac_node = current_ac_node.transition_table[node_token] 133 for fixer in current_ac_node.fixers: 134 results[fixer].append(current_ast_node) 135 136 current_ast_node = current_ast_node.parent 137 return results 138 139 def print_ac(self): 140 "Prints a graphviz diagram of the BM automaton(for debugging)" 141 print("digraph g{") 142 def print_node(node): 143 for subnode_key in node.transition_table.keys(): 144 subnode = node.transition_table[subnode_key] 145 print("%d -> %d [label=%s] //%s" % 146 (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers))) 147 if subnode_key == 1: 148 print(subnode.content) 149 print_node(subnode) 150 print_node(self.root) 151 print("}") 152 153# taken from pytree.py for debugging; only used by print_ac 154_type_reprs = {} 155def type_repr(type_num): 156 global _type_reprs 157 if not _type_reprs: 158 from .pygram import python_symbols 159 # printing tokens is possible but not as useful 160 # from .pgen2 import token // token.__dict__.items(): 161 for name, val in python_symbols.__dict__.items(): 162 if type(val) == int: _type_reprs[val] = name 163 return _type_reprs.setdefault(type_num, type_num) 164