• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // info.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Class to compute various information about FSTs, helper class for fstinfo.cc
20 
21 #ifndef FST_SCRIPT_INFO_IMPL_H_
22 #define FST_SCRIPT_INFO_IMPL_H_
23 
24 #include <string>
25 #include <vector>
26 using std::vector;
27 
28 #include <fst/connect.h>
29 #include <fst/dfs-visit.h>
30 #include <fst/fst.h>
31 #include <fst/lookahead-matcher.h>
32 #include <fst/matcher.h>
33 #include <fst/queue.h>
34 #include <fst/test-properties.h>
35 #include <fst/verify.h>
36 #include <fst/visit.h>
37 
38 namespace fst {
39 
40 // Compute various information about FSTs, helper class for fstinfo.cc.
41 // WARNING: Stand-alone use of this class is not recommended, most code
42 // should call directly the relevant library functions: Fst<A>::NumStates,
43 // Fst<A>::NumArcs, TestProperties, ...
44 template <class A> class FstInfo {
45  public:
46   typedef A Arc;
47   typedef typename A::StateId StateId;
48   typedef typename A::Label Label;
49   typedef typename A::Weight Weight;
50 
51   // When info_type is "short" (or "auto" and not an ExpandedFst)
52   // then only minimal info is computed and can be requested.
53   FstInfo(const Fst<A> &fst, bool test_properties,
54           const string &arc_filter_type = "any",
55           string info_type = "auto", bool verify = true)
56       : fst_type_(fst.Type()),
57         input_symbols_(fst.InputSymbols() ?
58                        fst.InputSymbols()->Name() : "none"),
59         output_symbols_(fst.OutputSymbols() ?
60                         fst.OutputSymbols()->Name() : "none"),
61         nstates_(0), narcs_(0), start_(kNoStateId), nfinal_(0),
62         nepsilons_(0), niepsilons_(0), noepsilons_(0),
63         naccess_(0), ncoaccess_(0), nconnect_(0), ncc_(0), nscc_(0),
64         input_match_type_(MATCH_NONE), output_match_type_(MATCH_NONE),
65         input_lookahead_(false), output_lookahead_(false),
66         properties_(0), arc_filter_type_(arc_filter_type), long_info_(true) {
67     if (info_type == "long") {
68       long_info_ = true;
69     } else if (info_type == "short") {
70       long_info_ = false;
71     } else if (info_type == "auto") {
72       long_info_ = fst.Properties(kExpanded, false);
73     } else {
74       FSTERROR() << "Bad info type: " << info_type;
75       return;
76     }
77 
78     if (!long_info_)
79       return;
80 
81     // If the FST is not sane, we return.
82     if (verify && !Verify(fst)) {
83       FSTERROR() << "FstInfo: Verify: FST not well-formed.";
84       return;
85     }
86 
87     start_ = fst.Start();
88     properties_ = fst.Properties(kFstProperties, test_properties);
89 
90     for (StateIterator< Fst<A> > siter(fst);
91          !siter.Done();
92          siter.Next()) {
93       ++nstates_;
94       StateId s = siter.Value();
95       if (fst.Final(s) != Weight::Zero())
96         ++nfinal_;
97       for (ArcIterator< Fst<A> > aiter(fst, s);
98            !aiter.Done();
99            aiter.Next()) {
100         const A &arc = aiter.Value();
101         ++narcs_;
102         if (arc.ilabel == 0 && arc.olabel == 0)
103           ++nepsilons_;
104         if (arc.ilabel == 0)
105           ++niepsilons_;
106         if (arc.olabel == 0)
107           ++noepsilons_;
108       }
109     }
110 
111     {
112       vector<StateId> cc;
113       CcVisitor<Arc> cc_visitor(&cc);
114       FifoQueue<StateId> fifo_queue;
115       if (arc_filter_type == "any") {
116         Visit(fst, &cc_visitor, &fifo_queue);
117       } else if (arc_filter_type == "epsilon") {
118         Visit(fst, &cc_visitor, &fifo_queue, EpsilonArcFilter<Arc>());
119       } else if (arc_filter_type == "iepsilon") {
120         Visit(fst, &cc_visitor, &fifo_queue, InputEpsilonArcFilter<Arc>());
121       } else if (arc_filter_type == "oepsilon") {
122         Visit(fst, &cc_visitor, &fifo_queue, OutputEpsilonArcFilter<Arc>());
123       } else {
124         FSTERROR() << "Bad arc filter type: " << arc_filter_type;
125         return;
126       }
127 
128       for (StateId s = 0; s < cc.size(); ++s) {
129         if (cc[s] >= ncc_)
130           ncc_ = cc[s] + 1;
131       }
132     }
133 
134     {
135       vector<StateId> scc;
136       vector<bool> access, coaccess;
137       uint64 props = 0;
138       SccVisitor<Arc> scc_visitor(&scc, &access, &coaccess, &props);
139       if (arc_filter_type == "any") {
140         DfsVisit(fst, &scc_visitor);
141       } else if (arc_filter_type == "epsilon") {
142         DfsVisit(fst, &scc_visitor, EpsilonArcFilter<Arc>());
143       } else if (arc_filter_type == "iepsilon") {
144         DfsVisit(fst, &scc_visitor, InputEpsilonArcFilter<Arc>());
145       } else if (arc_filter_type == "oepsilon") {
146         DfsVisit(fst, &scc_visitor, OutputEpsilonArcFilter<Arc>());
147       } else {
148         FSTERROR() << "Bad arc filter type: " << arc_filter_type;
149         return;
150       }
151 
152       for (StateId s = 0; s < scc.size(); ++s) {
153         if (access[s])
154           ++naccess_;
155         if (coaccess[s])
156           ++ncoaccess_;
157         if (access[s] && coaccess[s])
158           ++nconnect_;
159         if (scc[s] >= nscc_)
160           nscc_ = scc[s] + 1;
161       }
162     }
163 
164     LookAheadMatcher< Fst<A> > imatcher(fst, MATCH_INPUT);
165     input_match_type_ =  imatcher.Type(test_properties);
166     input_lookahead_ =  imatcher.Flags() & kInputLookAheadMatcher;
167 
168     LookAheadMatcher< Fst<A> > omatcher(fst, MATCH_OUTPUT);
169     output_match_type_ =  omatcher.Type(test_properties);
170     output_lookahead_ =  omatcher.Flags() & kOutputLookAheadMatcher;
171   }
172 
173   // Short info
FstType()174   const string& FstType() const { return fst_type_; }
ArcType()175   const string& ArcType() const { return A::Type(); }
InputSymbols()176   const string& InputSymbols() const { return input_symbols_; }
OutputSymbols()177   const string& OutputSymbols() const { return output_symbols_; }
LongInfo()178   const bool LongInfo() const { return long_info_; }
ArcFilterType()179   const string& ArcFilterType() const { return arc_filter_type_; }
180 
181   // Long info
InputMatchType()182   MatchType InputMatchType() const { CheckLong(); return input_match_type_; }
OutputMatchType()183   MatchType OutputMatchType() const { CheckLong(); return output_match_type_; }
InputLookAhead()184   bool InputLookAhead() const { CheckLong(); return input_lookahead_; }
OutputLookAhead()185   bool OutputLookAhead() const { CheckLong();  return output_lookahead_; }
NumStates()186   int64 NumStates() const { CheckLong();  return nstates_; }
NumArcs()187   int64 NumArcs() const { CheckLong();  return narcs_; }
Start()188   int64 Start() const { CheckLong();  return start_; }
NumFinal()189   int64 NumFinal() const { CheckLong();  return nfinal_; }
NumEpsilons()190   int64 NumEpsilons() const { CheckLong();  return nepsilons_; }
NumInputEpsilons()191   int64 NumInputEpsilons() const { CheckLong(); return niepsilons_; }
NumOutputEpsilons()192   int64 NumOutputEpsilons() const { CheckLong(); return noepsilons_; }
NumAccessible()193   int64 NumAccessible() const { CheckLong(); return naccess_; }
NumCoAccessible()194   int64 NumCoAccessible() const { CheckLong(); return ncoaccess_; }
NumConnected()195   int64 NumConnected() const { CheckLong(); return nconnect_; }
NumCc()196   int64 NumCc() const { CheckLong(); return ncc_; }
NumScc()197   int64 NumScc() const { CheckLong(); return nscc_; }
Properties()198   uint64 Properties() const { CheckLong(); return properties_; }
199 
200  private:
CheckLong()201   void CheckLong() const {
202     if (!long_info_)
203       FSTERROR() << "FstInfo: method only available with long info version";
204   }
205 
206   string fst_type_;
207   string input_symbols_;
208   string output_symbols_;
209   int64 nstates_;
210   int64 narcs_;
211   int64 start_;
212   int64 nfinal_;
213   int64 nepsilons_;
214   int64 niepsilons_;
215   int64 noepsilons_;
216   int64 naccess_;
217   int64 ncoaccess_;
218   int64 nconnect_;
219   int64 ncc_;
220   int64 nscc_;
221   MatchType input_match_type_;
222   MatchType output_match_type_;
223   bool input_lookahead_;
224   bool output_lookahead_;
225   uint64 properties_;
226   string arc_filter_type_;
227   bool long_info_;
228   DISALLOW_COPY_AND_ASSIGN(FstInfo);
229 };
230 
231 template <class A>
232 void PrintFstInfo(const FstInfo<A> &fstinfo, bool pipe = false) {
233   ostream &os = pipe ? cerr : cout;
234 
235   ios_base::fmtflags old = os.setf(ios::left);
236   os.width(50);
237   os << "fst type" <<  fstinfo.FstType() << endl;
238   os.width(50);
239   os << "arc type" << fstinfo.ArcType() << endl;
240   os.width(50);
241   os << "input symbol table" << fstinfo.InputSymbols() << endl;
242   os.width(50);
243   os << "output symbol table" << fstinfo.OutputSymbols() << endl;
244 
245   if (!fstinfo.LongInfo()) {
246     os.setf(old);
247     return;
248   }
249 
250   os.width(50);
251   os << "# of states" << fstinfo.NumStates() << endl;
252   os.width(50);
253   os << "# of arcs" << fstinfo.NumArcs() << endl;
254   os.width(50);
255   os << "initial state" << fstinfo.Start() << endl;
256   os.width(50);
257   os << "# of final states" << fstinfo.NumFinal() << endl;
258   os.width(50);
259   os << "# of input/output epsilons" << fstinfo.NumEpsilons() << endl;
260   os.width(50);
261   os << "# of input epsilons" << fstinfo.NumInputEpsilons() << endl;
262   os.width(50);
263   os << "# of output epsilons" << fstinfo.NumOutputEpsilons() << endl;
264   os.width(50);
265 
266   string arc_type = "";
267   if (fstinfo.ArcFilterType() == "epsilon")
268     arc_type = "epsilon ";
269   else if (fstinfo.ArcFilterType() == "iepsilon")
270     arc_type = "input-epsilon ";
271   else if (fstinfo.ArcFilterType() == "oepsilon")
272     arc_type = "output-epsilon ";
273 
274   string accessible_label = "# of " +  arc_type + "accessible states";
275   os.width(50);
276   os << accessible_label << fstinfo.NumAccessible() << endl;
277   string coaccessible_label = "# of " +  arc_type + "coaccessible states";
278   os.width(50);
279   os << coaccessible_label << fstinfo.NumCoAccessible() << endl;
280   string connected_label = "# of " +  arc_type + "connected states";
281   os.width(50);
282   os << connected_label << fstinfo.NumConnected() << endl;
283   string numcc_label = "# of " +  arc_type + "connected components";
284   os.width(50);
285   os << numcc_label << fstinfo.NumCc() << endl;
286   string numscc_label = "# of " +  arc_type + "strongly conn components";
287   os.width(50);
288   os << numscc_label << fstinfo.NumScc() << endl;
289 
290   os.width(50);
291   os << "input matcher"
292      << (fstinfo.InputMatchType() == MATCH_INPUT ? 'y' :
293          fstinfo.InputMatchType() == MATCH_NONE ? 'n' : '?') << endl;
294   os.width(50);
295   os << "output matcher"
296      << (fstinfo.OutputMatchType() == MATCH_OUTPUT ? 'y' :
297          fstinfo.OutputMatchType() == MATCH_NONE ? 'n' : '?') << endl;
298   os.width(50);
299   os << "input lookahead"
300      << (fstinfo.InputLookAhead() ? 'y' : 'n') << endl;
301   os.width(50);
302   os << "output lookahead"
303      << (fstinfo.OutputLookAhead() ? 'y' : 'n') << endl;
304 
305   uint64 prop = 1;
306   for (int i = 0; i < 64; ++i, prop <<= 1) {
307     if (prop & kBinaryProperties) {
308       char value = 'n';
309       if (fstinfo.Properties() & prop) value = 'y';
310       os.width(50);
311       os << PropertyNames[i] << value << endl;
312     } else if (prop & kPosTrinaryProperties) {
313       char value = '?';
314       if (fstinfo.Properties() & prop) value = 'y';
315       else if (fstinfo.Properties() & prop << 1) value = 'n';
316       os.width(50);
317       os << PropertyNames[i] << value << endl;
318     }
319   }
320   os.setf(old);
321 }
322 
323 }  // namespace fst
324 
325 #endif  // FST_SCRIPT_INFO_IMPL_H_
326