1"""A bottom-up tree matching algorithm implementation meant to speed 2up 2to3's matching process. After the tree patterns are reduced to 3their rarest linear path, a linear Aho-Corasick automaton is 4created. The linear automaton traverses the linear paths from the 5leaves to the root of the AST and returns a set of nodes for further 6matching. This reduces significantly the number of candidate nodes.""" 7 8__author__ = "George Boutsioukis <gboutsioukis@gmail.com>" 9 10import logging 11import itertools 12from collections import defaultdict 13 14from . import pytree 15from .btm_utils import reduce_tree 16 17class BMNode(object): 18 """Class for a node of the Aho-Corasick automaton used in matching""" 19 count = itertools.count() 20 def __init__(self): 21 self.transition_table = {} 22 self.fixers = [] 23 self.id = next(BMNode.count) 24 self.content = '' 25 26class BottomMatcher(object): 27 """The main matcher class. After instantiating the patterns should 28 be added using the add_fixer method""" 29 30 def __init__(self): 31 self.match = set() 32 self.root = BMNode() 33 self.nodes = [self.root] 34 self.fixers = [] 35 self.logger = logging.getLogger("RefactoringTool") 36 37 def add_fixer(self, fixer): 38 """Reduces a fixer's pattern tree to a linear path and adds it 39 to the matcher(a common Aho-Corasick automaton). The fixer is 40 appended on the matching states and called when they are 41 reached""" 42 self.fixers.append(fixer) 43 tree = reduce_tree(fixer.pattern_tree) 44 linear = tree.get_linear_subpattern() 45 match_nodes = self.add(linear, start=self.root) 46 for match_node in match_nodes: 47 match_node.fixers.append(fixer) 48 49 def add(self, pattern, start): 50 "Recursively adds a linear pattern to the AC automaton" 51 #print("adding pattern", pattern, "to", start) 52 if not pattern: 53 #print("empty pattern") 54 return [start] 55 if isinstance(pattern[0], tuple): 56 #alternatives 57 #print("alternatives") 58 match_nodes = [] 59 for alternative in pattern[0]: 60 #add all alternatives, and add the rest of the pattern 61 #to each end node 62 end_nodes = self.add(alternative, start=start) 63 for end in end_nodes: 64 match_nodes.extend(self.add(pattern[1:], end)) 65 return match_nodes 66 else: 67 #single token 68 #not last 69 if pattern[0] not in start.transition_table: 70 #transition did not exist, create new 71 next_node = BMNode() 72 start.transition_table[pattern[0]] = next_node 73 else: 74 #transition exists already, follow 75 next_node = start.transition_table[pattern[0]] 76 77 if pattern[1:]: 78 end_nodes = self.add(pattern[1:], start=next_node) 79 else: 80 end_nodes = [next_node] 81 return end_nodes 82 83 def run(self, leaves): 84 """The main interface with the bottom matcher. The tree is 85 traversed from the bottom using the constructed 86 automaton. Nodes are only checked once as the tree is 87 retraversed. When the automaton fails, we give it one more 88 shot(in case the above tree matches as a whole with the 89 rejected leaf), then we break for the next leaf. There is the 90 special case of multiple arguments(see code comments) where we 91 recheck the nodes 92 93 Args: 94 The leaves of the AST tree to be matched 95 96 Returns: 97 A dictionary of node matches with fixers as the keys 98 """ 99 current_ac_node = self.root 100 results = defaultdict(list) 101 for leaf in leaves: 102 current_ast_node = leaf 103 while current_ast_node: 104 current_ast_node.was_checked = True 105 for child in current_ast_node.children: 106 # multiple statements, recheck 107 if isinstance(child, pytree.Leaf) and child.value == u";": 108 current_ast_node.was_checked = False 109 break 110 if current_ast_node.type == 1: 111 #name 112 node_token = current_ast_node.value 113 else: 114 node_token = current_ast_node.type 115 116 if node_token in current_ac_node.transition_table: 117 #token matches 118 current_ac_node = current_ac_node.transition_table[node_token] 119 for fixer in current_ac_node.fixers: 120 if not fixer in results: 121 results[fixer] = [] 122 results[fixer].append(current_ast_node) 123 124 else: 125 #matching failed, reset automaton 126 current_ac_node = self.root 127 if (current_ast_node.parent is not None 128 and current_ast_node.parent.was_checked): 129 #the rest of the tree upwards has been checked, next leaf 130 break 131 132 #recheck the rejected node once from the root 133 if node_token in current_ac_node.transition_table: 134 #token matches 135 current_ac_node = current_ac_node.transition_table[node_token] 136 for fixer in current_ac_node.fixers: 137 if not fixer in results.keys(): 138 results[fixer] = [] 139 results[fixer].append(current_ast_node) 140 141 current_ast_node = current_ast_node.parent 142 return results 143 144 def print_ac(self): 145 "Prints a graphviz diagram of the BM automaton(for debugging)" 146 print("digraph g{") 147 def print_node(node): 148 for subnode_key in node.transition_table.keys(): 149 subnode = node.transition_table[subnode_key] 150 print("%d -> %d [label=%s] //%s" % 151 (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers))) 152 if subnode_key == 1: 153 print(subnode.content) 154 print_node(subnode) 155 print_node(self.root) 156 print("}") 157 158# taken from pytree.py for debugging; only used by print_ac 159_type_reprs = {} 160def type_repr(type_num): 161 global _type_reprs 162 if not _type_reprs: 163 from .pygram import python_symbols 164 # printing tokens is possible but not as useful 165 # from .pgen2 import token // token.__dict__.items(): 166 for name, val in python_symbols.__dict__.items(): 167 if type(val) == int: _type_reprs[val] = name 168 return _type_reprs.setdefault(type_num, type_num) 169