• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"Utility functions used by the btm_matcher module"
2
3from . import pytree
4from .pgen2 import grammar, token
5from .pygram import pattern_symbols, python_symbols
6
7syms = pattern_symbols
8pysyms = python_symbols
9tokens = grammar.opmap
10token_labels = token
11
12TYPE_ANY = -1
13TYPE_ALTERNATIVES = -2
14TYPE_GROUP = -3
15
16class MinNode(object):
17    """This class serves as an intermediate representation of the
18    pattern tree during the conversion to sets of leaf-to-root
19    subpatterns"""
20
21    def __init__(self, type=None, name=None):
22        self.type = type
23        self.name = name
24        self.children = []
25        self.leaf = False
26        self.parent = None
27        self.alternatives = []
28        self.group = []
29
30    def __repr__(self):
31        return str(self.type) + ' ' + str(self.name)
32
33    def leaf_to_root(self):
34        """Internal method. Returns a characteristic path of the
35        pattern tree. This method must be run for all leaves until the
36        linear subpatterns are merged into a single"""
37        node = self
38        subp = []
39        while node:
40            if node.type == TYPE_ALTERNATIVES:
41                node.alternatives.append(subp)
42                if len(node.alternatives) == len(node.children):
43                    #last alternative
44                    subp = [tuple(node.alternatives)]
45                    node.alternatives = []
46                    node = node.parent
47                    continue
48                else:
49                    node = node.parent
50                    subp = None
51                    break
52
53            if node.type == TYPE_GROUP:
54                node.group.append(subp)
55                #probably should check the number of leaves
56                if len(node.group) == len(node.children):
57                    subp = get_characteristic_subpattern(node.group)
58                    node.group = []
59                    node = node.parent
60                    continue
61                else:
62                    node = node.parent
63                    subp = None
64                    break
65
66            if node.type == token_labels.NAME and node.name:
67                #in case of type=name, use the name instead
68                subp.append(node.name)
69            else:
70                subp.append(node.type)
71
72            node = node.parent
73        return subp
74
75    def get_linear_subpattern(self):
76        """Drives the leaf_to_root method. The reason that
77        leaf_to_root must be run multiple times is because we need to
78        reject 'group' matches; for example the alternative form
79        (a | b c) creates a group [b c] that needs to be matched. Since
80        matching multiple linear patterns overcomes the automaton's
81        capabilities, leaf_to_root merges each group into a single
82        choice based on 'characteristic'ity,
83
84        i.e. (a|b c) -> (a|b) if b more characteristic than c
85
86        Returns: The most 'characteristic'(as defined by
87          get_characteristic_subpattern) path for the compiled pattern
88          tree.
89        """
90
91        for l in self.leaves():
92            subp = l.leaf_to_root()
93            if subp:
94                return subp
95
96    def leaves(self):
97        "Generator that returns the leaves of the tree"
98        for child in self.children:
99            for x in child.leaves():
100                yield x
101        if not self.children:
102            yield self
103
104def reduce_tree(node, parent=None):
105    """
106    Internal function. Reduces a compiled pattern tree to an
107    intermediate representation suitable for feeding the
108    automaton. This also trims off any optional pattern elements(like
109    [a], a*).
110    """
111
112    new_node = None
113    #switch on the node type
114    if node.type == syms.Matcher:
115        #skip
116        node = node.children[0]
117
118    if node.type == syms.Alternatives  :
119        #2 cases
120        if len(node.children) <= 2:
121            #just a single 'Alternative', skip this node
122            new_node = reduce_tree(node.children[0], parent)
123        else:
124            #real alternatives
125            new_node = MinNode(type=TYPE_ALTERNATIVES)
126            #skip odd children('|' tokens)
127            for child in node.children:
128                if node.children.index(child)%2:
129                    continue
130                reduced = reduce_tree(child, new_node)
131                if reduced is not None:
132                    new_node.children.append(reduced)
133    elif node.type == syms.Alternative:
134        if len(node.children) > 1:
135
136            new_node = MinNode(type=TYPE_GROUP)
137            for child in node.children:
138                reduced = reduce_tree(child, new_node)
139                if reduced:
140                    new_node.children.append(reduced)
141            if not new_node.children:
142                # delete the group if all of the children were reduced to None
143                new_node = None
144
145        else:
146            new_node = reduce_tree(node.children[0], parent)
147
148    elif node.type == syms.Unit:
149        if (isinstance(node.children[0], pytree.Leaf) and
150            node.children[0].value == '('):
151            #skip parentheses
152            return reduce_tree(node.children[1], parent)
153        if ((isinstance(node.children[0], pytree.Leaf) and
154               node.children[0].value == '[')
155               or
156               (len(node.children)>1 and
157               hasattr(node.children[1], "value") and
158               node.children[1].value == '[')):
159            #skip whole unit if its optional
160            return None
161
162        leaf = True
163        details_node = None
164        alternatives_node = None
165        has_repeater = False
166        repeater_node = None
167        has_variable_name = False
168
169        for child in node.children:
170            if child.type == syms.Details:
171                leaf = False
172                details_node = child
173            elif child.type == syms.Repeater:
174                has_repeater = True
175                repeater_node = child
176            elif child.type == syms.Alternatives:
177                alternatives_node = child
178            if hasattr(child, 'value') and child.value == '=': # variable name
179                has_variable_name = True
180
181        #skip variable name
182        if has_variable_name:
183            #skip variable name, '='
184            name_leaf = node.children[2]
185            if hasattr(name_leaf, 'value') and name_leaf.value == '(':
186                # skip parenthesis
187                name_leaf = node.children[3]
188        else:
189            name_leaf = node.children[0]
190
191        #set node type
192        if name_leaf.type == token_labels.NAME:
193            #(python) non-name or wildcard
194            if name_leaf.value == 'any':
195                new_node = MinNode(type=TYPE_ANY)
196            else:
197                if hasattr(token_labels, name_leaf.value):
198                    new_node = MinNode(type=getattr(token_labels, name_leaf.value))
199                else:
200                    new_node = MinNode(type=getattr(pysyms, name_leaf.value))
201
202        elif name_leaf.type == token_labels.STRING:
203            #(python) name or character; remove the apostrophes from
204            #the string value
205            name = name_leaf.value.strip("'")
206            if name in tokens:
207                new_node = MinNode(type=tokens[name])
208            else:
209                new_node = MinNode(type=token_labels.NAME, name=name)
210        elif name_leaf.type == syms.Alternatives:
211            new_node = reduce_tree(alternatives_node, parent)
212
213        #handle repeaters
214        if has_repeater:
215            if repeater_node.children[0].value == '*':
216                #reduce to None
217                new_node = None
218            elif repeater_node.children[0].value == '+':
219                #reduce to a single occurrence i.e. do nothing
220                pass
221            else:
222                #TODO: handle {min, max} repeaters
223                raise NotImplementedError
224                pass
225
226        #add children
227        if details_node and new_node is not None:
228            for child in details_node.children[1:-1]:
229                #skip '<', '>' markers
230                reduced = reduce_tree(child, new_node)
231                if reduced is not None:
232                    new_node.children.append(reduced)
233    if new_node:
234        new_node.parent = parent
235    return new_node
236
237
238def get_characteristic_subpattern(subpatterns):
239    """Picks the most characteristic from a list of linear patterns
240    Current order used is:
241    names > common_names > common_chars
242    """
243    if not isinstance(subpatterns, list):
244        return subpatterns
245    if len(subpatterns)==1:
246        return subpatterns[0]
247
248    # first pick out the ones containing variable names
249    subpatterns_with_names = []
250    subpatterns_with_common_names = []
251    common_names = ['in', 'for', 'if' , 'not', 'None']
252    subpatterns_with_common_chars = []
253    common_chars = "[]().,:"
254    for subpattern in subpatterns:
255        if any(rec_test(subpattern, lambda x: type(x) is str)):
256            if any(rec_test(subpattern,
257                            lambda x: isinstance(x, str) and x in common_chars)):
258                subpatterns_with_common_chars.append(subpattern)
259            elif any(rec_test(subpattern,
260                              lambda x: isinstance(x, str) and x in common_names)):
261                subpatterns_with_common_names.append(subpattern)
262
263            else:
264                subpatterns_with_names.append(subpattern)
265
266    if subpatterns_with_names:
267        subpatterns = subpatterns_with_names
268    elif subpatterns_with_common_names:
269        subpatterns = subpatterns_with_common_names
270    elif subpatterns_with_common_chars:
271        subpatterns = subpatterns_with_common_chars
272    # of the remaining subpatterns pick out the longest one
273    return max(subpatterns, key=len)
274
275def rec_test(sequence, test_func):
276    """Tests test_func on all items of sequence and items of included
277    sub-iterables"""
278    for x in sequence:
279        if isinstance(x, (list, tuple)):
280            for y in rec_test(x, test_func):
281                yield y
282        else:
283            yield test_func(x)
284