1 // const-fst.h
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 //
16 // \file
17 // Simple concrete immutable FST whose states and arcs are each stored
18 // in single arrays.
19
20 #ifndef FST_LIB_CONST_FST_H__
21 #define FST_LIB_CONST_FST_H__
22
23 #include "fst/lib/expanded-fst.h"
24 #include "fst/lib/test-properties.h"
25
26 namespace fst {
27
28 template <class A> class ConstFst;
29
30 // States and arcs each implemented by single arrays, templated on the
31 // Arc definition.
32 template <class A>
33 class ConstFstImpl : public FstImpl<A> {
34 public:
35 using FstImpl<A>::SetType;
36 using FstImpl<A>::SetProperties;
37 using FstImpl<A>::Properties;
38 using FstImpl<A>::WriteHeaderAndSymbols;
39
40 typedef typename A::Weight Weight;
41 typedef typename A::StateId StateId;
42
ConstFstImpl()43 ConstFstImpl()
44 : states_(0), arcs_(0), nstates_(0), narcs_(0), start_(kNoStateId) {
45 SetType("const");
46 SetProperties(kNullProperties | kStaticProperties);
47 }
48
49 explicit ConstFstImpl(const Fst<A> &fst);
50
~ConstFstImpl()51 ~ConstFstImpl() {
52 delete[] states_;
53 delete[] arcs_;
54 }
55
Start()56 StateId Start() const { return start_; }
57
Final(StateId s)58 Weight Final(StateId s) const { return states_[s].final; }
59
NumStates()60 StateId NumStates() const { return nstates_; }
61
NumArcs(StateId s)62 size_t NumArcs(StateId s) const { return states_[s].narcs; }
63
NumInputEpsilons(StateId s)64 size_t NumInputEpsilons(StateId s) const { return states_[s].niepsilons; }
65
NumOutputEpsilons(StateId s)66 size_t NumOutputEpsilons(StateId s) const { return states_[s].noepsilons; }
67
68 static ConstFstImpl<A> *Read(istream &strm, const FstReadOptions &opts);
69
70 bool Write(ostream &strm, const FstWriteOptions &opts) const;
71
Arcs(StateId s)72 A *Arcs(StateId s) { return arcs_ + states_[s].pos; }
73
74 // Provide information needed for generic state iterator
InitStateIterator(StateIteratorData<A> * data)75 void InitStateIterator(StateIteratorData<A> *data) const {
76 data->base = 0;
77 data->nstates = nstates_;
78 }
79
80 // Provide information needed for the generic arc iterator
InitArcIterator(StateId s,ArcIteratorData<A> * data)81 void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
82 data->base = 0;
83 data->arcs = arcs_ + states_[s].pos;
84 data->narcs = states_[s].narcs;
85 data->ref_count = 0;
86 }
87
88 private:
89 // States implemented by array *states_ below, arcs by (single) *arcs_.
90 struct State {
91 Weight final; // Final weight
92 uint32 pos; // Start of state's arcs in *arcs_
93 uint32 narcs; // Number of arcs (per state)
94 uint32 niepsilons; // # of input epsilons
95 uint32 noepsilons; // # of output epsilons
StateState96 State() : final(Weight::Zero()), niepsilons(0), noepsilons(0) {}
97 };
98
99 // Properties always true of this Fst class
100 static const uint64 kStaticProperties = kExpanded;
101 // Current file format version
102 static const int kFileVersion = 1;
103 // Minimum file format version supported
104 static const int kMinFileVersion = 1;
105 // Byte alignment for states and arcs in file format
106 static const int kFileAlign = 16;
107
108 State *states_; // States represenation
109 A *arcs_; // Arcs representation
110 StateId nstates_; // Number of states
111 size_t narcs_; // Number of arcs (per FST)
112 StateId start_; // Initial state
113
114 DISALLOW_EVIL_CONSTRUCTORS(ConstFstImpl);
115 };
116
117 template<class A>
ConstFstImpl(const Fst<A> & fst)118 ConstFstImpl<A>::ConstFstImpl(const Fst<A> &fst) : nstates_(0), narcs_(0) {
119 SetType("const");
120 uint64 copy_properties = fst.Properties(kCopyProperties, true);
121 SetProperties(copy_properties | kStaticProperties);
122 this->SetInputSymbols(fst.InputSymbols());
123 this->SetOutputSymbols(fst.OutputSymbols());
124 start_ = fst.Start();
125
126 // count # of states and arcs
127 for (StateIterator< Fst<A> > siter(fst);
128 !siter.Done();
129 siter.Next()) {
130 ++nstates_;
131 StateId s = siter.Value();
132 for (ArcIterator< Fst<A> > aiter(fst, s);
133 !aiter.Done();
134 aiter.Next())
135 ++narcs_;
136 }
137 states_ = new State[nstates_];
138 arcs_ = new A[narcs_];
139 size_t pos = 0;
140 for (StateId s = 0; s < nstates_; ++s) {
141 states_[s].final = fst.Final(s);
142 states_[s].pos = pos;
143 states_[s].narcs = 0;
144 states_[s].niepsilons = 0;
145 states_[s].noepsilons = 0;
146 for (ArcIterator< Fst<A> > aiter(fst, s);
147 !aiter.Done();
148 aiter.Next()) {
149 const A &arc = aiter.Value();
150 ++states_[s].narcs;
151 if (arc.ilabel == 0)
152 ++states_[s].niepsilons;
153 if (arc.olabel == 0)
154 ++states_[s].noepsilons;
155 arcs_[pos++] = arc;
156 }
157 }
158 }
159
160 template<class A>
Read(istream & strm,const FstReadOptions & opts)161 ConstFstImpl<A> *ConstFstImpl<A>::Read(istream &strm,
162 const FstReadOptions &opts) {
163 ConstFstImpl<A> *impl = new ConstFstImpl<A>;
164 FstHeader hdr;
165 if (!impl->ReadHeaderAndSymbols(strm, opts, kMinFileVersion, &hdr))
166 return 0;
167 impl->start_ = hdr.Start();
168 impl->nstates_ = hdr.NumStates();
169 impl->narcs_ = hdr.NumArcs();
170 impl->states_ = new State[impl->nstates_];
171 impl->arcs_ = new A[impl->narcs_];
172
173 char c;
174 for (int i = 0; i < kFileAlign && strm.tellg() % kFileAlign; ++i)
175 strm.read(&c, 1);
176 // TODO: memory map this
177 size_t b = impl->nstates_ * sizeof(typename ConstFstImpl<A>::State);
178 strm.read(reinterpret_cast<char *>(impl->states_), b);
179 if (!strm) {
180 LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source;
181 return 0;
182 }
183 // TODO: memory map this
184 b = impl->narcs_ * sizeof(A);
185 for (int i = 0; i < kFileAlign && strm.tellg() % kFileAlign; ++i)
186 strm.read(&c, 1);
187 strm.read(reinterpret_cast<char *>(impl->arcs_), b);
188 if (!strm) {
189 LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source;
190 return 0;
191 }
192 return impl;
193 }
194
195 template<class A>
Write(ostream & strm,const FstWriteOptions & opts)196 bool ConstFstImpl<A>::Write(ostream &strm,
197 const FstWriteOptions &opts) const {
198 FstHeader hdr;
199 hdr.SetStart(start_);
200 hdr.SetNumStates(nstates_);
201 hdr.SetNumArcs(narcs_);
202 WriteHeaderAndSymbols(strm, opts, kFileVersion, &hdr);
203 if (!strm)
204 return false;
205
206 for (int i = 0; i < kFileAlign && strm.tellp() % kFileAlign; ++i)
207 strm.write("", 1);
208 strm.write(reinterpret_cast<char *>(states_),
209 nstates_ * sizeof(State));
210 for (int i = 0; i < kFileAlign && strm.tellp() % kFileAlign; ++i)
211 strm.write("", 1);
212 strm.write(reinterpret_cast<char *>(arcs_), narcs_ * sizeof(A));
213 strm.flush();
214 if (!strm)
215 LOG(ERROR) << "ConstFst::Write: Write failed: " << opts.source;
216 return strm;
217 }
218
219 // Simple concrete immutable FST. This class attaches interface to
220 // implementation and handles reference counting.
221 template <class A>
222 class ConstFst : public ExpandedFst<A> {
223 public:
224 friend class StateIterator< ConstFst<A> >;
225 friend class ArcIterator< ConstFst<A> >;
226
227 typedef A Arc;
228 typedef typename A::Weight Weight;
229 typedef typename A::StateId StateId;
230
ConstFst()231 ConstFst() : impl_(new ConstFstImpl<A>()) {}
232
ConstFst(const ConstFst<A> & fst)233 ConstFst(const ConstFst<A> &fst) : impl_(fst.impl_) {
234 impl_->IncrRefCount();
235 }
236
ConstFst(const Fst<A> & fst)237 explicit ConstFst(const Fst<A> &fst) : impl_(new ConstFstImpl<A>(fst)) {}
238
~ConstFst()239 virtual ~ConstFst() { if (!impl_->DecrRefCount()) delete impl_; }
240
Start()241 virtual StateId Start() const { return impl_->Start(); }
242
Final(StateId s)243 virtual Weight Final(StateId s) const { return impl_->Final(s); }
244
NumStates()245 StateId NumStates() const { return impl_->NumStates(); }
246
NumArcs(StateId s)247 size_t NumArcs(StateId s) const { return impl_->NumArcs(s); }
248
NumInputEpsilons(StateId s)249 size_t NumInputEpsilons(StateId s) const {
250 return impl_->NumInputEpsilons(s);
251 }
252
NumOutputEpsilons(StateId s)253 size_t NumOutputEpsilons(StateId s) const {
254 return impl_->NumOutputEpsilons(s);
255 }
256
Properties(uint64 mask,bool test)257 virtual uint64 Properties(uint64 mask, bool test) const {
258 if (test) {
259 uint64 known, test = TestProperties(*this, mask, &known);
260 impl_->SetProperties(test, known);
261 return test & mask;
262 } else {
263 return impl_->Properties(mask);
264 }
265 }
266
Type()267 virtual const string& Type() const { return impl_->Type(); }
268
269 // Get a copy of this ConstFst
Copy()270 virtual ConstFst<A> *Copy() const {
271 impl_->IncrRefCount();
272 return new ConstFst<A>(impl_);
273 }
274
275 // Read a ConstFst from an input stream; return NULL on error
Read(istream & strm,const FstReadOptions & opts)276 static ConstFst<A> *Read(istream &strm, const FstReadOptions &opts) {
277 ConstFstImpl<A>* impl = ConstFstImpl<A>::Read(strm, opts);
278 return impl ? new ConstFst<A>(impl) : 0;
279 }
280
281 // Read a ConstFst from a file; returno NULL on error
Read(const string & filename)282 static ConstFst<A> *Read(const string &filename) {
283 ifstream strm(filename.c_str());
284 if (!strm) {
285 LOG(ERROR) << "ConstFst::Write: Can't open file: " << filename;
286 return 0;
287 }
288 return Read(strm, FstReadOptions(filename));
289 }
290
291 // Write a ConstFst to an output stream; return false on error
Write(ostream & strm,const FstWriteOptions & opts)292 virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
293 return impl_->Write(strm, opts);
294 }
295
296 // Write a ConstFst to a file; return false on error
Write(const string & filename)297 virtual bool Write(const string &filename) const {
298 if (!filename.empty()) {
299 ofstream strm(filename.c_str());
300 if (!strm) {
301 LOG(ERROR) << "ConstrFst::Write: Can't open file: " << filename;
302 return false;
303 }
304 return Write(strm, FstWriteOptions(filename));
305 } else {
306 return Write(std::cout, FstWriteOptions("standard output"));
307 }
308 }
309
InputSymbols()310 virtual const SymbolTable* InputSymbols() const {
311 return impl_->InputSymbols();
312 }
313
OutputSymbols()314 virtual const SymbolTable* OutputSymbols() const {
315 return impl_->OutputSymbols();
316 }
317
InitStateIterator(StateIteratorData<A> * data)318 virtual void InitStateIterator(StateIteratorData<A> *data) const {
319 impl_->InitStateIterator(data);
320 }
321
InitArcIterator(StateId s,ArcIteratorData<A> * data)322 virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
323 impl_->InitArcIterator(s, data);
324 }
325
326 private:
ConstFst(ConstFstImpl<A> * impl)327 ConstFst(ConstFstImpl<A> *impl) : impl_(impl) {}
328
329 ConstFstImpl<A> *impl_; // FST's impl
330
331 void operator=(const ConstFst<A> &fst); // disallow
332 };
333
334 // Specialization for ConstFst; see generic version in fst.h
335 // for sample usage (but use the ConstFst type!). This version
336 // should inline.
337 template <class A>
338 class StateIterator< ConstFst<A> > {
339 public:
340 typedef typename A::StateId StateId;
341
StateIterator(const ConstFst<A> & fst)342 explicit StateIterator(const ConstFst<A> &fst)
343 : nstates_(fst.impl_->NumStates()), s_(0) {}
344
Done()345 bool Done() const { return s_ >= nstates_; }
346
Value()347 StateId Value() const { return s_; }
348
Next()349 void Next() { ++s_; }
350
Reset()351 void Reset() { s_ = 0; }
352
353 private:
354 StateId nstates_;
355 StateId s_;
356
357 DISALLOW_EVIL_CONSTRUCTORS(StateIterator);
358 };
359
360 // Specialization for ConstFst; see generic version in fst.h
361 // for sample usage (but use the ConstFst type!). This version
362 // should inline.
363 template <class A>
364 class ArcIterator< ConstFst<A> > {
365 public:
366 typedef typename A::StateId StateId;
367
ArcIterator(const ConstFst<A> & fst,StateId s)368 ArcIterator(const ConstFst<A> &fst, StateId s)
369 : arcs_(fst.impl_->Arcs(s)), narcs_(fst.impl_->NumArcs(s)), i_(0) {}
370
Done()371 bool Done() const { return i_ >= narcs_; }
372
Value()373 const A& Value() const { return arcs_[i_]; }
374
Next()375 void Next() { ++i_; }
376
Reset()377 void Reset() { i_ = 0; }
378
Seek(size_t a)379 void Seek(size_t a) { i_ = a; }
380
381 private:
382 const A *arcs_;
383 size_t narcs_;
384 size_t i_;
385
386 DISALLOW_EVIL_CONSTRUCTORS(ArcIterator);
387 };
388
389 // A useful alias when using StdArc.
390 typedef ConstFst<StdArc> StdConstFst;
391
392 } // namespace fst;
393
394 #endif // FST_LIB_CONST_FST_H__
395