1 // info.h
2
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Prints information about a PDT.
20
21 #ifndef FST_EXTENSIONS_PDT_INFO_H__
22 #define FST_EXTENSIONS_PDT_INFO_H__
23
24 #include <tr1/unordered_map>
25 using std::tr1::unordered_map;
26 using std::tr1::unordered_multimap;
27 #include <tr1/unordered_set>
28 using std::tr1::unordered_set;
29 using std::tr1::unordered_multiset;
30 #include <vector>
31 using std::vector;
32
33 #include <fst/fst.h>
34 #include <fst/extensions/pdt/pdt.h>
35
36 namespace fst {
37
38 // Compute various information about PDTs, helper class for pdtinfo.cc.
39 template <class A> class PdtInfo {
40 public:
41 typedef A Arc;
42 typedef typename A::StateId StateId;
43 typedef typename A::Label Label;
44 typedef typename A::Weight Weight;
45
46 PdtInfo(const Fst<A> &fst,
47 const vector<pair<typename A::Label,
48 typename A::Label> > &parens);
49
FstType()50 const string& FstType() const { return fst_type_; }
ArcType()51 const string& ArcType() const { return A::Type(); }
52
NumStates()53 int64 NumStates() const { return nstates_; }
NumArcs()54 int64 NumArcs() const { return narcs_; }
NumOpenParens()55 int64 NumOpenParens() const { return nopen_parens_; }
NumCloseParens()56 int64 NumCloseParens() const { return nclose_parens_; }
NumUniqueOpenParens()57 int64 NumUniqueOpenParens() const { return nuniq_open_parens_; }
NumUniqueCloseParens()58 int64 NumUniqueCloseParens() const { return nuniq_close_parens_; }
NumOpenParenStates()59 int64 NumOpenParenStates() const { return nopen_paren_states_; }
NumCloseParenStates()60 int64 NumCloseParenStates() const { return nclose_paren_states_; }
61
62 private:
63 string fst_type_;
64 int64 nstates_;
65 int64 narcs_;
66 int64 nopen_parens_;
67 int64 nclose_parens_;
68 int64 nuniq_open_parens_;
69 int64 nuniq_close_parens_;
70 int64 nopen_paren_states_;
71 int64 nclose_paren_states_;
72
73 DISALLOW_COPY_AND_ASSIGN(PdtInfo);
74 };
75
76 template <class A>
PdtInfo(const Fst<A> & fst,const vector<pair<typename A::Label,typename A::Label>> & parens)77 PdtInfo<A>::PdtInfo(const Fst<A> &fst,
78 const vector<pair<typename A::Label,
79 typename A::Label> > &parens)
80 : fst_type_(fst.Type()),
81 nstates_(0),
82 narcs_(0),
83 nopen_parens_(0),
84 nclose_parens_(0),
85 nuniq_open_parens_(0),
86 nuniq_close_parens_(0),
87 nopen_paren_states_(0),
88 nclose_paren_states_(0) {
89 unordered_map<Label, size_t> paren_map;
90 unordered_set<Label> paren_set;
91 unordered_set<StateId> open_paren_state_set;
92 unordered_set<StateId> close_paren_state_set;
93
94 for (size_t i = 0; i < parens.size(); ++i) {
95 const pair<Label, Label> &p = parens[i];
96 paren_map[p.first] = i;
97 paren_map[p.second] = i;
98 }
99
100 for (StateIterator< Fst<A> > siter(fst);
101 !siter.Done();
102 siter.Next()) {
103 ++nstates_;
104 StateId s = siter.Value();
105 for (ArcIterator< Fst<A> > aiter(fst, s);
106 !aiter.Done();
107 aiter.Next()) {
108 const A &arc = aiter.Value();
109 ++narcs_;
110 typename unordered_map<Label, size_t>::const_iterator pit
111 = paren_map.find(arc.ilabel);
112 if (pit != paren_map.end()) {
113 Label open_paren = parens[pit->second].first;
114 Label close_paren = parens[pit->second].second;
115 if (arc.ilabel == open_paren) {
116 ++nopen_parens_;
117 if (!paren_set.count(open_paren)) {
118 ++nuniq_open_parens_;
119 paren_set.insert(open_paren);
120 }
121 if (!open_paren_state_set.count(arc.nextstate)) {
122 ++nopen_paren_states_;
123 open_paren_state_set.insert(arc.nextstate);
124 }
125 } else {
126 ++nclose_parens_;
127 if (!paren_set.count(close_paren)) {
128 ++nuniq_close_parens_;
129 paren_set.insert(close_paren);
130 }
131 if (!close_paren_state_set.count(s)) {
132 ++nclose_paren_states_;
133 close_paren_state_set.insert(s);
134 }
135
136 }
137 }
138 }
139 }
140 }
141
142
143 template <class A>
PrintPdtInfo(const PdtInfo<A> & pdtinfo)144 void PrintPdtInfo(const PdtInfo<A> &pdtinfo) {
145 ios_base::fmtflags old = cout.setf(ios::left);
146 cout.width(50);
147 cout << "fst type" << pdtinfo.FstType().c_str() << endl;
148 cout.width(50);
149 cout << "arc type" << pdtinfo.ArcType().c_str() << endl;
150 cout.width(50);
151 cout << "# of states" << pdtinfo.NumStates() << endl;
152 cout.width(50);
153 cout << "# of arcs" << pdtinfo.NumArcs() << endl;
154 cout.width(50);
155 cout << "# of open parentheses" << pdtinfo.NumOpenParens() << endl;
156 cout.width(50);
157 cout << "# of close parentheses" << pdtinfo.NumCloseParens() << endl;
158 cout.width(50);
159 cout << "# of unique open parentheses"
160 << pdtinfo.NumUniqueOpenParens() << endl;
161 cout.width(50);
162 cout << "# of unique close parentheses"
163 << pdtinfo.NumUniqueCloseParens() << endl;
164 cout.width(50);
165 cout << "# of open parenthesis dest. states"
166 << pdtinfo.NumOpenParenStates() << endl;
167 cout.width(50);
168 cout << "# of close parenthesis source states"
169 << pdtinfo.NumCloseParenStates() << endl;
170 cout.setf(old);
171 }
172
173 } // namespace fst
174
175 #endif // FST_EXTENSIONS_PDT_INFO_H__
176