• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1"""Classes representing state-machine concepts"""
2
3class NFA:
4    """A non deterministic finite automata
5
6    A non deterministic automata is a form of a finite state
7    machine. An NFA's rules are less restrictive than a DFA.
8    The NFA rules are:
9
10      * A transition can be non-deterministic and can result in
11        nothing, one, or two or more states.
12
13      * An epsilon transition consuming empty input is valid.
14        Transitions consuming labeled symbols are also permitted.
15
16    This class assumes that there is only one starting state and one
17    accepting (ending) state.
18
19    Attributes:
20        name (str): The name of the rule the NFA is representing.
21        start (NFAState): The starting state.
22        end (NFAState): The ending state
23    """
24
25    def __init__(self, start, end):
26        self.name = start.rule_name
27        self.start = start
28        self.end = end
29
30    def __repr__(self):
31        return "NFA(start={}, end={})".format(self.start, self.end)
32
33    def dump(self, writer=print):
34        """Dump a graphical representation of the NFA"""
35        todo = [self.start]
36        for i, state in enumerate(todo):
37            writer("  State", i, state is self.end and "(final)" or "")
38            for arc in state.arcs:
39                label = arc.label
40                next = arc.target
41                if next in todo:
42                    j = todo.index(next)
43                else:
44                    j = len(todo)
45                    todo.append(next)
46                if label is None:
47                    writer("    -> %d" % j)
48                else:
49                    writer("    %s -> %d" % (label, j))
50
51    def dump_graph(self, writer):
52        """Dump a DOT representation of the NFA"""
53        writer('digraph %s_nfa {\n' % self.name)
54        todo = [self.start]
55        for i, state in enumerate(todo):
56            writer(' %d [label="State %d %s"];\n' % (i, i, state is self.end and "(final)" or ""))
57            for arc in state.arcs:
58                label = arc.label
59                next = arc.target
60                if next in todo:
61                    j = todo.index(next)
62                else:
63                    j = len(todo)
64                    todo.append(next)
65                if label is None:
66                    writer(" %d -> %d [style=dotted label=ε];\n" % (i, j))
67                else:
68                    writer(" %d -> %d [label=%s];\n" % (i, j, label.replace("'", '"')))
69        writer('}\n')
70
71
72class NFAArc:
73    """An arc representing a transition between two NFA states.
74
75    NFA states can be connected via two ways:
76
77        * A label transition: An input equal to the label must
78          be consumed to perform the transition.
79        * An epsilon transition: The transition can be taken without
80          consuming any input symbol.
81
82        Attributes:
83            target (NFAState): The ending state of the transition arc.
84            label (Optional[str]): The label that must be consumed to make
85                the transition. An epsilon transition is represented
86                using `None`.
87    """
88
89    def __init__(self, target, label):
90        self.target = target
91        self.label = label
92
93    def __repr__(self):
94        return "<%s: %s>" % (self.__class__.__name__, self.label)
95
96
97class NFAState:
98    """A state of a NFA, non deterministic finite automata.
99
100    Attributes:
101        target (rule_name): The name of the rule used to represent the NFA's
102            ending state after a transition.
103        arcs (Dict[Optional[str], NFAState]): A mapping representing transitions
104            between the current NFA state and another NFA state via following
105            a label.
106    """
107
108    def __init__(self, rule_name):
109        self.rule_name = rule_name
110        self.arcs = []
111
112    def add_arc(self, target, label=None):
113        """Add a new arc to connect the state to a target state within the NFA
114
115        The method adds a new arc to the list of arcs available as transitions
116        from the present state. An optional label indicates a named transition
117        that consumes an input while the absence of a label represents an epsilon
118        transition.
119
120        Attributes:
121            target (NFAState): The end of the transition that the arc represents.
122            label (Optional[str]): The label that must be consumed for making
123                the transition. If the label is not provided the transition is assumed
124                to be an epsilon-transition.
125        """
126        assert label is None or isinstance(label, str)
127        assert isinstance(target, NFAState)
128        self.arcs.append(NFAArc(target, label))
129
130    def __repr__(self):
131        return "<%s: from %s>" % (self.__class__.__name__, self.rule_name)
132
133
134class DFA:
135    """A deterministic finite automata
136
137    A deterministic finite automata is a form of a finite state machine
138    that obeys the following rules:
139
140       * Each of the transitions is uniquely determined by
141         the source state and input symbol
142       * Reading an input symbol is required for each state
143         transition (no epsilon transitions).
144
145    The finite-state machine will accept or reject a string of symbols
146    and only produces a unique computation of the automaton for each input
147    string. The DFA must have a unique starting state (represented as the first
148    element in the list of states) but can have multiple final states.
149
150    Attributes:
151        name (str): The name of the rule the DFA is representing.
152        states (List[DFAState]): A collection of DFA states.
153    """
154
155    def __init__(self, name, states):
156        self.name = name
157        self.states = states
158
159    @classmethod
160    def from_nfa(cls, nfa):
161        """Constructs a DFA from a NFA using the Rabin–Scott construction algorithm.
162
163        To simulate the operation of a DFA on a given input string, it's
164        necessary to keep track of a single state at any time, or more precisely,
165        the state that the automaton will reach after seeing a prefix of the
166        input. In contrast, to simulate an NFA, it's necessary to keep track of
167        a set of states: all of the states that the automaton could reach after
168        seeing the same prefix of the input, according to the nondeterministic
169        choices made by the automaton. There are two possible sources of
170        non-determinism:
171
172        1) Multiple (one or more) transitions with the same label
173
174                         'A'     +-------+
175                    +----------->+ State +----------->+
176                    |            |   2   |
177            +-------+            +-------+
178            | State |
179            |   1   |            +-------+
180            +-------+            | State |
181                    +----------->+   3   +----------->+
182                         'A'     +-------+
183
184        2) Epsilon transitions (transitions that can be taken without consuming any input)
185
186            +-------+            +-------+
187            | State |     ε      | State |
188            |   1   +----------->+   2   +----------->+
189            +-------+            +-------+
190
191        Looking at the first case above, we can't determine which transition should be
192        followed when given an input A. We could choose whether or not to follow the
193        transition while in the second case the problem is that we can choose both to
194        follow the transition or not doing it. To solve this problem we can imagine that
195        we follow all possibilities at the same time and we construct new states from the
196        set of all possible reachable states. For every case in the previous example:
197
198
199        1) For multiple transitions with the same label we colapse all of the
200           final states under the same one
201
202            +-------+            +-------+
203            | State |     'A'    | State |
204            |   1   +----------->+  2-3  +----------->+
205            +-------+            +-------+
206
207        2) For epsilon transitions we collapse all epsilon-reachable states
208           into the same one
209
210            +-------+
211            | State |
212            |  1-2  +----------->
213            +-------+
214
215        Because the DFA states consist of sets of NFA states, an n-state NFA
216        may be converted to a DFA with at most 2**n states. Notice that the
217        constructed DFA is not minimal and can be simplified or reduced
218        afterwards.
219
220        Parameters:
221            name (NFA): The NFA to transform to DFA.
222        """
223        assert isinstance(nfa, NFA)
224
225        def add_closure(nfa_state, base_nfa_set):
226            """Calculate the epsilon-closure of a given state
227
228            Add to the *base_nfa_set* all the states that are
229            reachable from *nfa_state* via epsilon-transitions.
230            """
231            assert isinstance(nfa_state, NFAState)
232            if nfa_state in base_nfa_set:
233                return
234            base_nfa_set.add(nfa_state)
235            for nfa_arc in nfa_state.arcs:
236                if nfa_arc.label is None:
237                    add_closure(nfa_arc.target, base_nfa_set)
238
239        # Calculate the epsilon-closure of the starting state
240        base_nfa_set = set()
241        add_closure(nfa.start, base_nfa_set)
242
243        # Start by visiting the NFA starting state (there is only one).
244        states = [DFAState(nfa.name, base_nfa_set, nfa.end)]
245
246        for state in states:  # NB states grow while we're iterating
247
248            # Find transitions from the current state to other reachable states
249            # and store them in mapping that correlates the label to all the
250            # possible reachable states that can be obtained by consuming a
251            # token equal to the label. Each set of all the states that can
252            # be reached after following a label will be the a DFA state.
253            arcs = {}
254            for nfa_state in state.nfa_set:
255                for nfa_arc in nfa_state.arcs:
256                    if nfa_arc.label is not None:
257                        nfa_set = arcs.setdefault(nfa_arc.label, set())
258                        # All states that can be reached by epsilon-transitions
259                        # are also included in the set of reachable states.
260                        add_closure(nfa_arc.target, nfa_set)
261
262            # Now create new DFAs by visiting all posible transitions between
263            # the current DFA state and the new power-set states (each nfa_set)
264            # via the different labels. As the nodes are appended to *states* this
265            # is performing a breadth-first search traversal over the power-set of
266            # the states of the original NFA.
267            for label, nfa_set in sorted(arcs.items()):
268                for exisisting_state in states:
269                    if exisisting_state.nfa_set == nfa_set:
270                        # The DFA state already exists for this rule.
271                        next_state = exisisting_state
272                        break
273                else:
274                    next_state = DFAState(nfa.name, nfa_set, nfa.end)
275                    states.append(next_state)
276
277                # Add a transition between the current DFA state and the new
278                # DFA state (the power-set state) via the current label.
279                state.add_arc(next_state, label)
280
281        return cls(nfa.name, states)
282
283    def __iter__(self):
284        return iter(self.states)
285
286    def simplify(self):
287        """Attempt to reduce the number of states of the DFA
288
289        Transform the DFA into an equivalent DFA that has fewer states. Two
290        classes of states can be removed or merged from the original DFA without
291        affecting the language it accepts to minimize it:
292
293            * Unreachable states can not be reached from the initial
294              state of the DFA, for any input string.
295            * Nondistinguishable states are those that cannot be distinguished
296              from one another for any input string.
297
298        This algorithm does not achieve the optimal fully-reduced solution, but it
299        works well enough for the particularities of the Python grammar. The
300        algorithm repeatedly looks for two states that have the same set of
301        arcs (same labels pointing to the same nodes) and unifies them, until
302        things stop changing.
303        """
304        changes = True
305        while changes:
306            changes = False
307            for i, state_i in enumerate(self.states):
308                for j in range(i + 1, len(self.states)):
309                    state_j = self.states[j]
310                    if state_i == state_j:
311                        del self.states[j]
312                        for state in self.states:
313                            state.unifystate(state_j, state_i)
314                        changes = True
315                        break
316
317    def dump(self, writer=print):
318        """Dump a graphical representation of the DFA"""
319        for i, state in enumerate(self.states):
320            writer("  State", i, state.is_final and "(final)" or "")
321            for label, next in sorted(state.arcs.items()):
322                writer("    %s -> %d" % (label, self.states.index(next)))
323
324    def dump_graph(self, writer):
325        """Dump a DOT representation of the DFA"""
326        writer('digraph %s_dfa {\n' % self.name)
327        for i, state in enumerate(self.states):
328            writer(' %d [label="State %d %s"];\n' % (i, i, state.is_final and "(final)" or ""))
329            for label, next in sorted(state.arcs.items()):
330                writer(" %d -> %d [label=%s];\n" % (i, self.states.index(next), label.replace("'", '"')))
331        writer('}\n')
332
333
334class DFAState(object):
335    """A state of a DFA
336
337    Attributes:
338        rule_name (rule_name): The name of the DFA rule containing the represented state.
339        nfa_set (Set[NFAState]): The set of NFA states used to create this state.
340        final (bool): True if the state represents an accepting state of the DFA
341            containing this state.
342        arcs (Dict[label, DFAState]): A mapping representing transitions between
343            the current DFA state and another DFA state via following a label.
344    """
345
346    def __init__(self, rule_name, nfa_set, final):
347        assert isinstance(nfa_set, set)
348        assert isinstance(next(iter(nfa_set)), NFAState)
349        assert isinstance(final, NFAState)
350        self.rule_name = rule_name
351        self.nfa_set = nfa_set
352        self.arcs = {}  # map from terminals/nonterminals to DFAState
353        self.is_final = final in nfa_set
354
355    def add_arc(self, target, label):
356        """Add a new arc to the current state.
357
358        Parameters:
359            target (DFAState): The DFA state at the end of the arc.
360            label (str): The label respresenting the token that must be consumed
361                to perform this transition.
362        """
363        assert isinstance(label, str)
364        assert label not in self.arcs
365        assert isinstance(target, DFAState)
366        self.arcs[label] = target
367
368    def unifystate(self, old, new):
369        """Replace all arcs from the current node to *old* with *new*.
370
371        Parameters:
372            old (DFAState): The  DFA state to remove from all existing arcs.
373            new (DFAState): The DFA state to replace in all existing arcs.
374        """
375        for label, next_ in self.arcs.items():
376            if next_ is old:
377                self.arcs[label] = new
378
379    def __eq__(self, other):
380        # The nfa_set does not matter for  equality
381        assert isinstance(other, DFAState)
382        if self.is_final != other.is_final:
383            return False
384        # We cannot just return self.arcs == other.arcs because that
385        # would invoke this method recursively if there are any cycles.
386        if len(self.arcs) != len(other.arcs):
387            return False
388        for label, next_ in self.arcs.items():
389            if next_ is not other.arcs.get(label):
390                return False
391        return True
392
393    __hash__ = None  # For Py3 compatibility.
394
395    def __repr__(self):
396        return "<%s: %s is_final=%s>" % (
397            self.__class__.__name__,
398            self.rule_name,
399            self.is_final,
400        )
401