• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // connect.h
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 //
16 // \file
17 // Classes and functions to remove unsuccessful paths from an Fst.
18 
19 #ifndef FST_LIB_CONNECT_H__
20 #define FST_LIB_CONNECT_H__
21 
22 #include "fst/lib/mutable-fst.h"
23 
24 namespace fst {
25 
26 // Finds and returns strongly-connected components, accessible and
27 // coaccessible states and related properties. Uses Tarzan's single
28 // DFS SCC algorithm (see Aho, et al, "Design and Analysis of Computer
29 // Algorithms", 189pp).
30 template <class A>
31 class SccVisitor {
32  public:
33   typedef A Arc;
34   typedef typename Arc::Weight Weight;
35   typedef typename A::StateId StateId;
36 
37   // scc[i]: strongly-connected component number for state i.
38   //   SCC numbers will be in topological order for acyclic input.
39   // access[i]: accessibility of state i.
40   // coaccess[i]: coaccessibility of state i.
41   // Any of above can be NULL.
42   // props: related property bits (cyclicity, initial cyclicity,
43   //   accessibiliity, coaccessiblity) set/cleared (o.w. unchanged).
SccVisitor(vector<StateId> * scc,vector<bool> * access,vector<bool> * coaccess,uint64 * props)44   SccVisitor(vector<StateId> *scc, vector<bool> *access,
45              vector<bool> *coaccess, uint64 *props)
46       : scc_(scc), access_(access), coaccess_(coaccess), props_(props) {}
SccVisitor(uint64 * props)47   SccVisitor(uint64 *props)
48       : scc_(0), access_(0), coaccess_(0), props_(props) {}
49 
InitVisit(const Fst<A> & fst)50   void InitVisit(const Fst<A> &fst) {
51     if (scc_)
52       scc_->clear();
53     if (access_)
54       access_->clear();
55     if (coaccess_) {
56       coaccess_->clear();
57       coaccess_internal_ = false;
58     } else {
59       coaccess_ = new vector<bool>;
60       coaccess_internal_ = true;
61     }
62     *props_ |= kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible;
63     *props_ &= ~(kCyclic | kInitialCyclic | kNotAccessible | kNotCoAccessible);
64     fst_ = &fst;
65     start_ = fst.Start();
66     nstates_ = 0;
67     nscc_ = 0;
68     dfnumber_ = new vector<StateId>;
69     lowlink_ = new vector<StateId>;
70     onstack_ = new vector<bool>;
71     scc_stack_ = new vector<StateId>;
72   }
73 
InitState(StateId s,StateId root)74   bool InitState(StateId s, StateId root) {
75     scc_stack_->push_back(s);
76     while ((StateId)dfnumber_->size() <= s) {
77       if (scc_)
78         scc_->push_back(-1);
79       if (access_)
80         access_->push_back(false);
81       coaccess_->push_back(false);
82       dfnumber_->push_back(-1);
83       lowlink_->push_back(-1);
84       onstack_->push_back(false);
85     }
86     (*dfnumber_)[s] = nstates_;
87     (*lowlink_)[s] = nstates_;
88     (*onstack_)[s] = true;
89     if (root == start_) {
90       if (access_)
91         (*access_)[s] = true;
92     } else {
93       if (access_)
94         (*access_)[s] = false;
95       *props_ |= kNotAccessible;
96       *props_ &= ~kAccessible;
97     }
98     ++nstates_;
99     return true;
100   }
101 
TreeArc(StateId s,const A & arc)102   bool TreeArc(StateId s, const A &arc) { return true; }
103 
BackArc(StateId s,const A & arc)104   bool BackArc(StateId s, const A &arc) {
105     StateId t = arc.nextstate;
106     if ((*dfnumber_)[t] < (*lowlink_)[s])
107       (*lowlink_)[s] = (*dfnumber_)[t];
108     if ((*coaccess_)[t])
109       (*coaccess_)[s] = true;
110     *props_ |= kCyclic;
111     *props_ &= ~kAcyclic;
112     if (arc.nextstate == start_) {
113       *props_ |= kInitialCyclic;
114       *props_ &= ~kInitialAcyclic;
115     }
116     return true;
117   }
118 
ForwardOrCrossArc(StateId s,const A & arc)119   bool ForwardOrCrossArc(StateId s, const A &arc) {
120     StateId t = arc.nextstate;
121     if ((*dfnumber_)[t] < (*dfnumber_)[s] /* cross edge */ &&
122         (*onstack_)[t] && (*dfnumber_)[t] < (*lowlink_)[s])
123       (*lowlink_)[s] = (*dfnumber_)[t];
124     if ((*coaccess_)[t])
125       (*coaccess_)[s] = true;
126     return true;
127   }
128 
FinishState(StateId s,StateId p,const A *)129   void FinishState(StateId s, StateId p, const A *) {
130     if (fst_->Final(s) != Weight::Zero())
131       (*coaccess_)[s] = true;
132     if ((*dfnumber_)[s] == (*lowlink_)[s]) {  // root of new SCC
133       bool scc_coaccess = false;
134       size_t i = scc_stack_->size();
135       StateId t;
136       do {
137         t = (*scc_stack_)[--i];
138         if ((*coaccess_)[t])
139           scc_coaccess = true;
140       } while (s != t);
141       do {
142         t = scc_stack_->back();
143         if (scc_)
144           (*scc_)[t] = nscc_;
145         if (scc_coaccess)
146           (*coaccess_)[t] = true;
147         (*onstack_)[t] = false;
148         scc_stack_->pop_back();
149       } while (s != t);
150       if (!scc_coaccess) {
151         *props_ |= kNotCoAccessible;
152         *props_ &= ~kCoAccessible;
153       }
154       ++nscc_;
155     }
156     if (p != kNoStateId) {
157       if ((*coaccess_)[s])
158         (*coaccess_)[p] = true;
159       if ((*lowlink_)[s] < (*lowlink_)[p])
160         (*lowlink_)[p] = (*lowlink_)[s];
161     }
162   }
163 
FinishVisit()164   void FinishVisit() {
165     // Numbers SCC's in topological order when acyclic.
166     if (scc_)
167       for (StateId i = 0; i < (StateId)scc_->size(); ++i)
168         (*scc_)[i] = nscc_ - 1 - (*scc_)[i];
169     if (coaccess_internal_)
170       delete coaccess_;
171     delete dfnumber_;
172     delete lowlink_;
173     delete onstack_;
174     delete scc_stack_;
175   }
176 
177  private:
178   vector<StateId> *scc_;        // State's scc number
179   vector<bool> *access_;        // State's accessibility
180   vector<bool> *coaccess_;      // State's coaccessibility
181   uint64 *props_;
182   const Fst<A> *fst_;
183   StateId start_;
184   StateId nstates_;             // State count
185   StateId nscc_;                // SCC count
186   bool coaccess_internal_;
187   vector<StateId> *dfnumber_;   // state discovery times
188   vector<StateId> *lowlink_;    // lowlink[s] == dfnumber[s] => SCC root
189   vector<bool> *onstack_;       // is a state on the SCC stack
190   vector<StateId> *scc_stack_;  // SCC stack (w/ random access)
191 };
192 
193 
194 // Trims an FST, removing states and arcs that are not on successful
195 // paths. This version modifies its input.
196 //
197 // Complexity:
198 // - Time:  O(V + E)
199 // - Space: O(V + E)
200 // where V = # of states and E = # of arcs.
201 template<class Arc>
Connect(MutableFst<Arc> * fst)202 void Connect(MutableFst<Arc> *fst) {
203   typedef typename Arc::StateId StateId;
204 
205   vector<bool> access;
206   vector<bool> coaccess;
207   uint64 props = 0;
208   SccVisitor<Arc> scc_visitor(0, &access, &coaccess, &props);
209   DfsVisit(*fst, &scc_visitor);
210   vector<StateId> dstates;
211   for (StateId s = 0; s < (StateId)access.size(); ++s)
212     if (!access[s] || !coaccess[s])
213       dstates.push_back(s);
214   fst->DeleteStates(dstates);
215   fst->SetProperties(kAccessible | kCoAccessible, kAccessible | kCoAccessible);
216 }
217 
218 }  // namespace fst
219 
220 #endif  // FST_LIB_CONNECT_H__
221