1 // connect.h
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 //
16 // \file
17 // Classes and functions to remove unsuccessful paths from an Fst.
18
19 #ifndef FST_LIB_CONNECT_H__
20 #define FST_LIB_CONNECT_H__
21
22 #include "fst/lib/mutable-fst.h"
23
24 namespace fst {
25
26 // Finds and returns strongly-connected components, accessible and
27 // coaccessible states and related properties. Uses Tarzan's single
28 // DFS SCC algorithm (see Aho, et al, "Design and Analysis of Computer
29 // Algorithms", 189pp).
30 template <class A>
31 class SccVisitor {
32 public:
33 typedef A Arc;
34 typedef typename Arc::Weight Weight;
35 typedef typename A::StateId StateId;
36
37 // scc[i]: strongly-connected component number for state i.
38 // SCC numbers will be in topological order for acyclic input.
39 // access[i]: accessibility of state i.
40 // coaccess[i]: coaccessibility of state i.
41 // Any of above can be NULL.
42 // props: related property bits (cyclicity, initial cyclicity,
43 // accessibiliity, coaccessiblity) set/cleared (o.w. unchanged).
SccVisitor(vector<StateId> * scc,vector<bool> * access,vector<bool> * coaccess,uint64 * props)44 SccVisitor(vector<StateId> *scc, vector<bool> *access,
45 vector<bool> *coaccess, uint64 *props)
46 : scc_(scc), access_(access), coaccess_(coaccess), props_(props) {}
SccVisitor(uint64 * props)47 SccVisitor(uint64 *props)
48 : scc_(0), access_(0), coaccess_(0), props_(props) {}
49
InitVisit(const Fst<A> & fst)50 void InitVisit(const Fst<A> &fst) {
51 if (scc_)
52 scc_->clear();
53 if (access_)
54 access_->clear();
55 if (coaccess_) {
56 coaccess_->clear();
57 coaccess_internal_ = false;
58 } else {
59 coaccess_ = new vector<bool>;
60 coaccess_internal_ = true;
61 }
62 *props_ |= kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible;
63 *props_ &= ~(kCyclic | kInitialCyclic | kNotAccessible | kNotCoAccessible);
64 fst_ = &fst;
65 start_ = fst.Start();
66 nstates_ = 0;
67 nscc_ = 0;
68 dfnumber_ = new vector<StateId>;
69 lowlink_ = new vector<StateId>;
70 onstack_ = new vector<bool>;
71 scc_stack_ = new vector<StateId>;
72 }
73
InitState(StateId s,StateId root)74 bool InitState(StateId s, StateId root) {
75 scc_stack_->push_back(s);
76 while ((StateId)dfnumber_->size() <= s) {
77 if (scc_)
78 scc_->push_back(-1);
79 if (access_)
80 access_->push_back(false);
81 coaccess_->push_back(false);
82 dfnumber_->push_back(-1);
83 lowlink_->push_back(-1);
84 onstack_->push_back(false);
85 }
86 (*dfnumber_)[s] = nstates_;
87 (*lowlink_)[s] = nstates_;
88 (*onstack_)[s] = true;
89 if (root == start_) {
90 if (access_)
91 (*access_)[s] = true;
92 } else {
93 if (access_)
94 (*access_)[s] = false;
95 *props_ |= kNotAccessible;
96 *props_ &= ~kAccessible;
97 }
98 ++nstates_;
99 return true;
100 }
101
TreeArc(StateId s,const A & arc)102 bool TreeArc(StateId s, const A &arc) { return true; }
103
BackArc(StateId s,const A & arc)104 bool BackArc(StateId s, const A &arc) {
105 StateId t = arc.nextstate;
106 if ((*dfnumber_)[t] < (*lowlink_)[s])
107 (*lowlink_)[s] = (*dfnumber_)[t];
108 if ((*coaccess_)[t])
109 (*coaccess_)[s] = true;
110 *props_ |= kCyclic;
111 *props_ &= ~kAcyclic;
112 if (arc.nextstate == start_) {
113 *props_ |= kInitialCyclic;
114 *props_ &= ~kInitialAcyclic;
115 }
116 return true;
117 }
118
ForwardOrCrossArc(StateId s,const A & arc)119 bool ForwardOrCrossArc(StateId s, const A &arc) {
120 StateId t = arc.nextstate;
121 if ((*dfnumber_)[t] < (*dfnumber_)[s] /* cross edge */ &&
122 (*onstack_)[t] && (*dfnumber_)[t] < (*lowlink_)[s])
123 (*lowlink_)[s] = (*dfnumber_)[t];
124 if ((*coaccess_)[t])
125 (*coaccess_)[s] = true;
126 return true;
127 }
128
FinishState(StateId s,StateId p,const A *)129 void FinishState(StateId s, StateId p, const A *) {
130 if (fst_->Final(s) != Weight::Zero())
131 (*coaccess_)[s] = true;
132 if ((*dfnumber_)[s] == (*lowlink_)[s]) { // root of new SCC
133 bool scc_coaccess = false;
134 size_t i = scc_stack_->size();
135 StateId t;
136 do {
137 t = (*scc_stack_)[--i];
138 if ((*coaccess_)[t])
139 scc_coaccess = true;
140 } while (s != t);
141 do {
142 t = scc_stack_->back();
143 if (scc_)
144 (*scc_)[t] = nscc_;
145 if (scc_coaccess)
146 (*coaccess_)[t] = true;
147 (*onstack_)[t] = false;
148 scc_stack_->pop_back();
149 } while (s != t);
150 if (!scc_coaccess) {
151 *props_ |= kNotCoAccessible;
152 *props_ &= ~kCoAccessible;
153 }
154 ++nscc_;
155 }
156 if (p != kNoStateId) {
157 if ((*coaccess_)[s])
158 (*coaccess_)[p] = true;
159 if ((*lowlink_)[s] < (*lowlink_)[p])
160 (*lowlink_)[p] = (*lowlink_)[s];
161 }
162 }
163
FinishVisit()164 void FinishVisit() {
165 // Numbers SCC's in topological order when acyclic.
166 if (scc_)
167 for (StateId i = 0; i < (StateId)scc_->size(); ++i)
168 (*scc_)[i] = nscc_ - 1 - (*scc_)[i];
169 if (coaccess_internal_)
170 delete coaccess_;
171 delete dfnumber_;
172 delete lowlink_;
173 delete onstack_;
174 delete scc_stack_;
175 }
176
177 private:
178 vector<StateId> *scc_; // State's scc number
179 vector<bool> *access_; // State's accessibility
180 vector<bool> *coaccess_; // State's coaccessibility
181 uint64 *props_;
182 const Fst<A> *fst_;
183 StateId start_;
184 StateId nstates_; // State count
185 StateId nscc_; // SCC count
186 bool coaccess_internal_;
187 vector<StateId> *dfnumber_; // state discovery times
188 vector<StateId> *lowlink_; // lowlink[s] == dfnumber[s] => SCC root
189 vector<bool> *onstack_; // is a state on the SCC stack
190 vector<StateId> *scc_stack_; // SCC stack (w/ random access)
191 };
192
193
194 // Trims an FST, removing states and arcs that are not on successful
195 // paths. This version modifies its input.
196 //
197 // Complexity:
198 // - Time: O(V + E)
199 // - Space: O(V + E)
200 // where V = # of states and E = # of arcs.
201 template<class Arc>
Connect(MutableFst<Arc> * fst)202 void Connect(MutableFst<Arc> *fst) {
203 typedef typename Arc::StateId StateId;
204
205 vector<bool> access;
206 vector<bool> coaccess;
207 uint64 props = 0;
208 SccVisitor<Arc> scc_visitor(0, &access, &coaccess, &props);
209 DfsVisit(*fst, &scc_visitor);
210 vector<StateId> dstates;
211 for (StateId s = 0; s < (StateId)access.size(); ++s)
212 if (!access[s] || !coaccess[s])
213 dstates.push_back(s);
214 fst->DeleteStates(dstates);
215 fst->SetProperties(kAccessible | kCoAccessible, kAccessible | kCoAccessible);
216 }
217
218 } // namespace fst
219
220 #endif // FST_LIB_CONNECT_H__
221