1"""Classes representing state-machine concepts""" 2 3class NFA: 4 """A non deterministic finite automata 5 6 A non deterministic automata is a form of a finite state 7 machine. An NFA's rules are less restrictive than a DFA. 8 The NFA rules are: 9 10 * A transition can be non-deterministic and can result in 11 nothing, one, or two or more states. 12 13 * An epsilon transition consuming empty input is valid. 14 Transitions consuming labeled symbols are also permitted. 15 16 This class assumes that there is only one starting state and one 17 accepting (ending) state. 18 19 Attributes: 20 name (str): The name of the rule the NFA is representing. 21 start (NFAState): The starting state. 22 end (NFAState): The ending state 23 """ 24 25 def __init__(self, start, end): 26 self.name = start.rule_name 27 self.start = start 28 self.end = end 29 30 def __repr__(self): 31 return "NFA(start={}, end={})".format(self.start, self.end) 32 33 def dump(self, writer=print): 34 """Dump a graphical representation of the NFA""" 35 todo = [self.start] 36 for i, state in enumerate(todo): 37 writer(" State", i, state is self.end and "(final)" or "") 38 for arc in state.arcs: 39 label = arc.label 40 next = arc.target 41 if next in todo: 42 j = todo.index(next) 43 else: 44 j = len(todo) 45 todo.append(next) 46 if label is None: 47 writer(" -> %d" % j) 48 else: 49 writer(" %s -> %d" % (label, j)) 50 51 def dump_graph(self, writer): 52 """Dump a DOT representation of the NFA""" 53 writer('digraph %s_nfa {\n' % self.name) 54 todo = [self.start] 55 for i, state in enumerate(todo): 56 writer(' %d [label="State %d %s"];\n' % (i, i, state is self.end and "(final)" or "")) 57 for arc in state.arcs: 58 label = arc.label 59 next = arc.target 60 if next in todo: 61 j = todo.index(next) 62 else: 63 j = len(todo) 64 todo.append(next) 65 if label is None: 66 writer(" %d -> %d [style=dotted label=ε];\n" % (i, j)) 67 else: 68 writer(" %d -> %d [label=%s];\n" % (i, j, label.replace("'", '"'))) 69 writer('}\n') 70 71 72class NFAArc: 73 """An arc representing a transition between two NFA states. 74 75 NFA states can be connected via two ways: 76 77 * A label transition: An input equal to the label must 78 be consumed to perform the transition. 79 * An epsilon transition: The transition can be taken without 80 consuming any input symbol. 81 82 Attributes: 83 target (NFAState): The ending state of the transition arc. 84 label (Optional[str]): The label that must be consumed to make 85 the transition. An epsilon transition is represented 86 using `None`. 87 """ 88 89 def __init__(self, target, label): 90 self.target = target 91 self.label = label 92 93 def __repr__(self): 94 return "<%s: %s>" % (self.__class__.__name__, self.label) 95 96 97class NFAState: 98 """A state of a NFA, non deterministic finite automata. 99 100 Attributes: 101 target (rule_name): The name of the rule used to represent the NFA's 102 ending state after a transition. 103 arcs (Dict[Optional[str], NFAState]): A mapping representing transitions 104 between the current NFA state and another NFA state via following 105 a label. 106 """ 107 108 def __init__(self, rule_name): 109 self.rule_name = rule_name 110 self.arcs = [] 111 112 def add_arc(self, target, label=None): 113 """Add a new arc to connect the state to a target state within the NFA 114 115 The method adds a new arc to the list of arcs available as transitions 116 from the present state. An optional label indicates a named transition 117 that consumes an input while the absence of a label represents an epsilon 118 transition. 119 120 Attributes: 121 target (NFAState): The end of the transition that the arc represents. 122 label (Optional[str]): The label that must be consumed for making 123 the transition. If the label is not provided the transition is assumed 124 to be an epsilon-transition. 125 """ 126 assert label is None or isinstance(label, str) 127 assert isinstance(target, NFAState) 128 self.arcs.append(NFAArc(target, label)) 129 130 def __repr__(self): 131 return "<%s: from %s>" % (self.__class__.__name__, self.rule_name) 132 133 134class DFA: 135 """A deterministic finite automata 136 137 A deterministic finite automata is a form of a finite state machine 138 that obeys the following rules: 139 140 * Each of the transitions is uniquely determined by 141 the source state and input symbol 142 * Reading an input symbol is required for each state 143 transition (no epsilon transitions). 144 145 The finite-state machine will accept or reject a string of symbols 146 and only produces a unique computation of the automaton for each input 147 string. The DFA must have a unique starting state (represented as the first 148 element in the list of states) but can have multiple final states. 149 150 Attributes: 151 name (str): The name of the rule the DFA is representing. 152 states (List[DFAState]): A collection of DFA states. 153 """ 154 155 def __init__(self, name, states): 156 self.name = name 157 self.states = states 158 159 @classmethod 160 def from_nfa(cls, nfa): 161 """Constructs a DFA from a NFA using the Rabin–Scott construction algorithm. 162 163 To simulate the operation of a DFA on a given input string, it's 164 necessary to keep track of a single state at any time, or more precisely, 165 the state that the automaton will reach after seeing a prefix of the 166 input. In contrast, to simulate an NFA, it's necessary to keep track of 167 a set of states: all of the states that the automaton could reach after 168 seeing the same prefix of the input, according to the nondeterministic 169 choices made by the automaton. There are two possible sources of 170 non-determinism: 171 172 1) Multiple (one or more) transitions with the same label 173 174 'A' +-------+ 175 +----------->+ State +----------->+ 176 | | 2 | 177 +-------+ +-------+ 178 | State | 179 | 1 | +-------+ 180 +-------+ | State | 181 +----------->+ 3 +----------->+ 182 'A' +-------+ 183 184 2) Epsilon transitions (transitions that can be taken without consuming any input) 185 186 +-------+ +-------+ 187 | State | ε | State | 188 | 1 +----------->+ 2 +----------->+ 189 +-------+ +-------+ 190 191 Looking at the first case above, we can't determine which transition should be 192 followed when given an input A. We could choose whether or not to follow the 193 transition while in the second case the problem is that we can choose both to 194 follow the transition or not doing it. To solve this problem we can imagine that 195 we follow all possibilities at the same time and we construct new states from the 196 set of all possible reachable states. For every case in the previous example: 197 198 199 1) For multiple transitions with the same label we colapse all of the 200 final states under the same one 201 202 +-------+ +-------+ 203 | State | 'A' | State | 204 | 1 +----------->+ 2-3 +----------->+ 205 +-------+ +-------+ 206 207 2) For epsilon transitions we collapse all epsilon-reachable states 208 into the same one 209 210 +-------+ 211 | State | 212 | 1-2 +-----------> 213 +-------+ 214 215 Because the DFA states consist of sets of NFA states, an n-state NFA 216 may be converted to a DFA with at most 2**n states. Notice that the 217 constructed DFA is not minimal and can be simplified or reduced 218 afterwards. 219 220 Parameters: 221 name (NFA): The NFA to transform to DFA. 222 """ 223 assert isinstance(nfa, NFA) 224 225 def add_closure(nfa_state, base_nfa_set): 226 """Calculate the epsilon-closure of a given state 227 228 Add to the *base_nfa_set* all the states that are 229 reachable from *nfa_state* via epsilon-transitions. 230 """ 231 assert isinstance(nfa_state, NFAState) 232 if nfa_state in base_nfa_set: 233 return 234 base_nfa_set.add(nfa_state) 235 for nfa_arc in nfa_state.arcs: 236 if nfa_arc.label is None: 237 add_closure(nfa_arc.target, base_nfa_set) 238 239 # Calculate the epsilon-closure of the starting state 240 base_nfa_set = set() 241 add_closure(nfa.start, base_nfa_set) 242 243 # Start by visiting the NFA starting state (there is only one). 244 states = [DFAState(nfa.name, base_nfa_set, nfa.end)] 245 246 for state in states: # NB states grow while we're iterating 247 248 # Find transitions from the current state to other reachable states 249 # and store them in mapping that correlates the label to all the 250 # possible reachable states that can be obtained by consuming a 251 # token equal to the label. Each set of all the states that can 252 # be reached after following a label will be the a DFA state. 253 arcs = {} 254 for nfa_state in state.nfa_set: 255 for nfa_arc in nfa_state.arcs: 256 if nfa_arc.label is not None: 257 nfa_set = arcs.setdefault(nfa_arc.label, set()) 258 # All states that can be reached by epsilon-transitions 259 # are also included in the set of reachable states. 260 add_closure(nfa_arc.target, nfa_set) 261 262 # Now create new DFAs by visiting all posible transitions between 263 # the current DFA state and the new power-set states (each nfa_set) 264 # via the different labels. As the nodes are appended to *states* this 265 # is performing a breadth-first search traversal over the power-set of 266 # the states of the original NFA. 267 for label, nfa_set in sorted(arcs.items()): 268 for exisisting_state in states: 269 if exisisting_state.nfa_set == nfa_set: 270 # The DFA state already exists for this rule. 271 next_state = exisisting_state 272 break 273 else: 274 next_state = DFAState(nfa.name, nfa_set, nfa.end) 275 states.append(next_state) 276 277 # Add a transition between the current DFA state and the new 278 # DFA state (the power-set state) via the current label. 279 state.add_arc(next_state, label) 280 281 return cls(nfa.name, states) 282 283 def __iter__(self): 284 return iter(self.states) 285 286 def simplify(self): 287 """Attempt to reduce the number of states of the DFA 288 289 Transform the DFA into an equivalent DFA that has fewer states. Two 290 classes of states can be removed or merged from the original DFA without 291 affecting the language it accepts to minimize it: 292 293 * Unreachable states can not be reached from the initial 294 state of the DFA, for any input string. 295 * Nondistinguishable states are those that cannot be distinguished 296 from one another for any input string. 297 298 This algorithm does not achieve the optimal fully-reduced solution, but it 299 works well enough for the particularities of the Python grammar. The 300 algorithm repeatedly looks for two states that have the same set of 301 arcs (same labels pointing to the same nodes) and unifies them, until 302 things stop changing. 303 """ 304 changes = True 305 while changes: 306 changes = False 307 for i, state_i in enumerate(self.states): 308 for j in range(i + 1, len(self.states)): 309 state_j = self.states[j] 310 if state_i == state_j: 311 del self.states[j] 312 for state in self.states: 313 state.unifystate(state_j, state_i) 314 changes = True 315 break 316 317 def dump(self, writer=print): 318 """Dump a graphical representation of the DFA""" 319 for i, state in enumerate(self.states): 320 writer(" State", i, state.is_final and "(final)" or "") 321 for label, next in sorted(state.arcs.items()): 322 writer(" %s -> %d" % (label, self.states.index(next))) 323 324 def dump_graph(self, writer): 325 """Dump a DOT representation of the DFA""" 326 writer('digraph %s_dfa {\n' % self.name) 327 for i, state in enumerate(self.states): 328 writer(' %d [label="State %d %s"];\n' % (i, i, state.is_final and "(final)" or "")) 329 for label, next in sorted(state.arcs.items()): 330 writer(" %d -> %d [label=%s];\n" % (i, self.states.index(next), label.replace("'", '"'))) 331 writer('}\n') 332 333 334class DFAState(object): 335 """A state of a DFA 336 337 Attributes: 338 rule_name (rule_name): The name of the DFA rule containing the represented state. 339 nfa_set (Set[NFAState]): The set of NFA states used to create this state. 340 final (bool): True if the state represents an accepting state of the DFA 341 containing this state. 342 arcs (Dict[label, DFAState]): A mapping representing transitions between 343 the current DFA state and another DFA state via following a label. 344 """ 345 346 def __init__(self, rule_name, nfa_set, final): 347 assert isinstance(nfa_set, set) 348 assert isinstance(next(iter(nfa_set)), NFAState) 349 assert isinstance(final, NFAState) 350 self.rule_name = rule_name 351 self.nfa_set = nfa_set 352 self.arcs = {} # map from terminals/nonterminals to DFAState 353 self.is_final = final in nfa_set 354 355 def add_arc(self, target, label): 356 """Add a new arc to the current state. 357 358 Parameters: 359 target (DFAState): The DFA state at the end of the arc. 360 label (str): The label respresenting the token that must be consumed 361 to perform this transition. 362 """ 363 assert isinstance(label, str) 364 assert label not in self.arcs 365 assert isinstance(target, DFAState) 366 self.arcs[label] = target 367 368 def unifystate(self, old, new): 369 """Replace all arcs from the current node to *old* with *new*. 370 371 Parameters: 372 old (DFAState): The DFA state to remove from all existing arcs. 373 new (DFAState): The DFA state to replace in all existing arcs. 374 """ 375 for label, next_ in self.arcs.items(): 376 if next_ is old: 377 self.arcs[label] = new 378 379 def __eq__(self, other): 380 # The nfa_set does not matter for equality 381 assert isinstance(other, DFAState) 382 if self.is_final != other.is_final: 383 return False 384 # We cannot just return self.arcs == other.arcs because that 385 # would invoke this method recursively if there are any cycles. 386 if len(self.arcs) != len(other.arcs): 387 return False 388 for label, next_ in self.arcs.items(): 389 if next_ is not other.arcs.get(label): 390 return False 391 return True 392 393 __hash__ = None # For Py3 compatibility. 394 395 def __repr__(self): 396 return "<%s: %s is_final=%s>" % ( 397 self.__class__.__name__, 398 self.rule_name, 399 self.is_final, 400 ) 401