• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""A bottom-up tree matching algorithm implementation meant to speed
2up 2to3's matching process. After the tree patterns are reduced to
3their rarest linear path, a linear Aho-Corasick automaton is
4created. The linear automaton traverses the linear paths from the
5leaves to the root of the AST and returns a set of nodes for further
6matching. This reduces significantly the number of candidate nodes."""
7
8__author__ = "George Boutsioukis <gboutsioukis@gmail.com>"
9
10import logging
11import itertools
12from collections import defaultdict
13
14from . import pytree
15from .btm_utils import reduce_tree
16
17class BMNode(object):
18    """Class for a node of the Aho-Corasick automaton used in matching"""
19    count = itertools.count()
20    def __init__(self):
21        self.transition_table = {}
22        self.fixers = []
23        self.id = next(BMNode.count)
24        self.content = ''
25
26class BottomMatcher(object):
27    """The main matcher class. After instantiating the patterns should
28    be added using the add_fixer method"""
29
30    def __init__(self):
31        self.match = set()
32        self.root = BMNode()
33        self.nodes = [self.root]
34        self.fixers = []
35        self.logger = logging.getLogger("RefactoringTool")
36
37    def add_fixer(self, fixer):
38        """Reduces a fixer's pattern tree to a linear path and adds it
39        to the matcher(a common Aho-Corasick automaton). The fixer is
40        appended on the matching states and called when they are
41        reached"""
42        self.fixers.append(fixer)
43        tree = reduce_tree(fixer.pattern_tree)
44        linear = tree.get_linear_subpattern()
45        match_nodes = self.add(linear, start=self.root)
46        for match_node in match_nodes:
47            match_node.fixers.append(fixer)
48
49    def add(self, pattern, start):
50        "Recursively adds a linear pattern to the AC automaton"
51        #print("adding pattern", pattern, "to", start)
52        if not pattern:
53            #print("empty pattern")
54            return [start]
55        if isinstance(pattern[0], tuple):
56            #alternatives
57            #print("alternatives")
58            match_nodes = []
59            for alternative in pattern[0]:
60                #add all alternatives, and add the rest of the pattern
61                #to each end node
62                end_nodes = self.add(alternative, start=start)
63                for end in end_nodes:
64                    match_nodes.extend(self.add(pattern[1:], end))
65            return match_nodes
66        else:
67            #single token
68            #not last
69            if pattern[0] not in start.transition_table:
70                #transition did not exist, create new
71                next_node = BMNode()
72                start.transition_table[pattern[0]] = next_node
73            else:
74                #transition exists already, follow
75                next_node = start.transition_table[pattern[0]]
76
77            if pattern[1:]:
78                end_nodes = self.add(pattern[1:], start=next_node)
79            else:
80                end_nodes = [next_node]
81            return end_nodes
82
83    def run(self, leaves):
84        """The main interface with the bottom matcher. The tree is
85        traversed from the bottom using the constructed
86        automaton. Nodes are only checked once as the tree is
87        retraversed. When the automaton fails, we give it one more
88        shot(in case the above tree matches as a whole with the
89        rejected leaf), then we break for the next leaf. There is the
90        special case of multiple arguments(see code comments) where we
91        recheck the nodes
92
93        Args:
94           The leaves of the AST tree to be matched
95
96        Returns:
97           A dictionary of node matches with fixers as the keys
98        """
99        current_ac_node = self.root
100        results = defaultdict(list)
101        for leaf in leaves:
102            current_ast_node = leaf
103            while current_ast_node:
104                current_ast_node.was_checked = True
105                for child in current_ast_node.children:
106                    # multiple statements, recheck
107                    if isinstance(child, pytree.Leaf) and child.value == u";":
108                        current_ast_node.was_checked = False
109                        break
110                if current_ast_node.type == 1:
111                    #name
112                    node_token = current_ast_node.value
113                else:
114                    node_token = current_ast_node.type
115
116                if node_token in current_ac_node.transition_table:
117                    #token matches
118                    current_ac_node = current_ac_node.transition_table[node_token]
119                    for fixer in current_ac_node.fixers:
120                        if not fixer in results:
121                            results[fixer] = []
122                        results[fixer].append(current_ast_node)
123
124                else:
125                    #matching failed, reset automaton
126                    current_ac_node = self.root
127                    if (current_ast_node.parent is not None
128                        and current_ast_node.parent.was_checked):
129                        #the rest of the tree upwards has been checked, next leaf
130                        break
131
132                    #recheck the rejected node once from the root
133                    if node_token in current_ac_node.transition_table:
134                        #token matches
135                        current_ac_node = current_ac_node.transition_table[node_token]
136                        for fixer in current_ac_node.fixers:
137                            if not fixer in results.keys():
138                                results[fixer] = []
139                            results[fixer].append(current_ast_node)
140
141                current_ast_node = current_ast_node.parent
142        return results
143
144    def print_ac(self):
145        "Prints a graphviz diagram of the BM automaton(for debugging)"
146        print("digraph g{")
147        def print_node(node):
148            for subnode_key in node.transition_table.keys():
149                subnode = node.transition_table[subnode_key]
150                print("%d -> %d [label=%s] //%s" %
151                      (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers)))
152                if subnode_key == 1:
153                    print(subnode.content)
154                print_node(subnode)
155        print_node(self.root)
156        print("}")
157
158# taken from pytree.py for debugging; only used by print_ac
159_type_reprs = {}
160def type_repr(type_num):
161    global _type_reprs
162    if not _type_reprs:
163        from .pygram import python_symbols
164        # printing tokens is possible but not as useful
165        # from .pgen2 import token // token.__dict__.items():
166        for name, val in python_symbols.__dict__.items():
167            if type(val) == int: _type_reprs[val] = name
168    return _type_reprs.setdefault(type_num, type_num)
169