1 // compact-fst.h
2
3
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 // Copyright 2005-2010 Google, Inc.
17 // Author: allauzen@google.com (Cyril Allauzen)
18 //
19 // \file
20 // FST Class for memory-efficient representation of common types of
21 // FSTs: linear automata, acceptors, unweighted FSTs, ...
22
23 #ifndef FST_LIB_COMPACT_FST_H__
24 #define FST_LIB_COMPACT_FST_H__
25
26 #include <iterator>
27 #include <utility>
28 using std::pair; using std::make_pair;
29 #include <vector>
30 using std::vector;
31
32 #include <fst/cache.h>
33 #include <fst/expanded-fst.h>
34 #include <fst/fst-decl.h> // For optional argument declarations
35 #include <fst/mapped-file.h>
36 #include <fst/matcher.h>
37 #include <fst/test-properties.h>
38 #include <fst/util.h>
39
40
41 namespace fst {
42
43 struct CompactFstOptions : public CacheOptions {
44 // CompactFst default caching behaviour is to do no caching. Most
45 // compactors are cheap and therefore we save memory by not doing
46 // caching.
CompactFstOptionsCompactFstOptions47 CompactFstOptions() : CacheOptions(true, 0) {}
CompactFstOptionsCompactFstOptions48 CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {}
49 };
50
51 // Compactor Interface - class determinies how arcs and final weights
52 // are compacted and expanded.
53 //
54 // Final weights are treated as transitions to the superfinal state,
55 // i.e. ilabel = olabel = kNoLabel and nextstate = kNoStateId.
56 //
57 // There are two types of compactors:
58 //
59 // * Fixed out-degree compactors: 'compactor.Size()' returns a
60 // positive integer 's'. An FST can be compacted by this compactor
61 // only if each state has exactly 's' outgoing transitions (counting a
62 // non-Zero() final weight as a transition). A typical example is a
63 // compactor for string FSTs, i.e. 's == 1'.
64 //
65 // * Variable out-degree compactors: 'compactor.Size() == -1'. There
66 // are no out-degree restrictions for these compactors.
67 //
68 //
69 // class Compactor {
70 // public:
71 // // Element is the type of the compacted transitions.
72 // typedef ... Element;
73 // // Return the compacted representation of a transition 'arc'
74 // // at a state 's'.
75 // Element Compact(StateId s, const Arc &arc);
76 // // Return the transition at state 's' represented by the compacted
77 // // transition 'e'.
78 // Arc Expand(StateId s, const Element &e);
79 // // Return -1 for variable out-degree compactors, and the mandatory
80 // // out-degree otherwise.
81 // ssize_t Size();
82 // // Test whether 'fst' can be compacted by this compactor.
83 // bool Compatible(const Fst<A> &fst);
84 // // Return the properties that are always true for an fst
85 // // compacted using this compactor
86 // uint64 Properties();
87 // // Return a string identifying the type of compactor.
88 // static const string &Type();
89 // // Write a compactor to a file.
90 // bool Write(ostream &strm);
91 // // Read a compactor from a file.
92 // static Compactor *Read(istream &strm);
93 // // Default constructor (optional, see comment below).
94 // Compactor();
95 // };
96 //
97 // The default constructor is only required for FST_REGISTER to work
98 // (i.e. enabling Convert() and the command-line utilities to work
99 // with this new compactor). However, a default constructor always
100 // needs to be specify for this code to compile, but one can have it
101 // simply raised an error when called:
102 //
103 // Compactor::Compactor() {
104 // FSTERROR() << "Compactor: no default constructor";
105 // }
106
107
108 // Implementation data for Compact Fst, which can shared between otherwise
109 // independent copies.
110 //
111 // The implementation contains two arrays: 'states_' and 'compacts_'.
112 //
113 // For fixed out-degree compactors, the 'states_' array is unallocated.
114 // The 'compacts_' contains the compacted transitions. Its size is
115 // 'ncompacts_'. The outgoing transitions at a given state are stored
116 // consecutively. For a given state 's', its 'compactor.Size()' outgoing
117 // transitions (including superfinal transition when 's' is final), are
118 // stored in position ['s*compactor.Size()', '(s+1)*compactor_.Size()').
119 //
120 // For variable out-degree compactors, the states_ array has size
121 // 'nstates_ + 1' and contains pointers to positions into 'compacts_'.
122 // For a given state 's', the compacted transitions of 's' are
123 // stored in positions [ 'states_[s]', 'states_[s + 1]' ) in 'compacts_'.
124 // By convention, 'states_[nstates_] == ncompacts_'.
125 //
126 // In both cases, the superfinal transitons (when 's' is final, i.e.
127 // 'Final(s) != Weight::Zero()') is stored first.
128 //
129 // The unsigned type U is used to represent indices into the compacts_
130 // array.
131 template <class E, class U>
132 class CompactFstData {
133 public:
134 typedef E CompactElement;
135 typedef U Unsigned;
136
CompactFstData()137 CompactFstData()
138 : states_region_(0),
139 compacts_region_(0),
140 states_(0),
141 compacts_(0),
142 nstates_(0),
143 ncompacts_(0),
144 narcs_(0),
145 start_(kNoStateId),
146 error_(false) {}
147
148 template <class A, class Compactor>
149 CompactFstData(const Fst<A> &fst, const Compactor &compactor);
150
151 template <class Iterator, class Compactor>
152 CompactFstData(const Iterator &begin, const Iterator &end,
153 const Compactor &compactor);
154
~CompactFstData()155 ~CompactFstData() {
156 if (states_region_ == NULL) {
157 delete [] states_;
158 }
159 delete states_region_;
160 if (compacts_region_ == NULL) {
161 delete [] compacts_;
162 }
163 delete compacts_region_;
164 }
165
166 template <class Compactor>
167 static CompactFstData<E, U> *Read(istream &strm,
168 const FstReadOptions &opts,
169 const FstHeader &hdr,
170 const Compactor &compactor);
171
172 bool Write(ostream &strm, const FstWriteOptions &opts) const;
173
States(ssize_t i)174 Unsigned States(ssize_t i) const { return states_[i]; }
Compacts(size_t i)175 const CompactElement &Compacts(size_t i) const { return compacts_[i]; }
NumStates()176 size_t NumStates() const { return nstates_; }
NumCompacts()177 size_t NumCompacts() const { return ncompacts_; }
NumArcs()178 size_t NumArcs() const { return narcs_; }
Start()179 ssize_t Start() const { return start_; }
180
RefCount()181 int RefCount() const { return ref_count_.count(); }
IncrRefCount()182 int IncrRefCount() { return ref_count_.Incr(); }
DecrRefCount()183 int DecrRefCount() { return ref_count_.Decr(); }
184
Error()185 bool Error() const { return error_; }
186
187 private:
188 MappedFile *states_region_;
189 MappedFile *compacts_region_;
190 Unsigned *states_;
191 CompactElement *compacts_;
192 size_t nstates_;
193 size_t ncompacts_;
194 size_t narcs_;
195 ssize_t start_;
196 RefCounter ref_count_;
197 bool error_;
198 };
199
200 template <class E, class U>
201 template <class A, class C>
CompactFstData(const Fst<A> & fst,const C & compactor)202 CompactFstData<E, U>::CompactFstData(const Fst<A> &fst, const C &compactor)
203 : states_region_(0),
204 compacts_region_(0),
205 states_(0),
206 compacts_(0),
207 nstates_(0),
208 ncompacts_(0),
209 narcs_(0),
210 start_(kNoStateId),
211 error_(false) {
212 typedef typename A::StateId StateId;
213 typedef typename A::Weight Weight;
214 start_ = fst.Start();
215 // Count # of states and arcs.
216 StateId nfinals = 0;
217 for (StateIterator< Fst<A> > siter(fst);
218 !siter.Done();
219 siter.Next()) {
220 ++nstates_;
221 StateId s = siter.Value();
222 for (ArcIterator< Fst<A> > aiter(fst, s);
223 !aiter.Done();
224 aiter.Next())
225 ++narcs_;
226 if (fst.Final(s) != Weight::Zero()) ++nfinals;
227 }
228 if (compactor.Size() == -1) {
229 states_ = new Unsigned[nstates_ + 1];
230 ncompacts_ = narcs_ + nfinals;
231 compacts_ = new CompactElement[ncompacts_];
232 states_[nstates_] = ncompacts_;
233 } else {
234 states_ = 0;
235 ncompacts_ = nstates_ * compactor.Size();
236 if ((narcs_ + nfinals) != ncompacts_) {
237 FSTERROR() << "CompactFstData: compactor incompatible with fst";
238 error_ = true;
239 return;
240 }
241 compacts_ = new CompactElement[ncompacts_];
242 }
243 size_t pos = 0, fpos = 0;
244 for (StateId s = 0; s < nstates_; ++s) {
245 fpos = pos;
246 if (compactor.Size() == -1)
247 states_[s] = pos;
248 if (fst.Final(s) != Weight::Zero())
249 compacts_[pos++] = compactor.Compact(s, A(kNoLabel, kNoLabel,
250 fst.Final(s), kNoStateId));
251 for (ArcIterator< Fst<A> > aiter(fst, s);
252 !aiter.Done();
253 aiter.Next()) {
254 compacts_[pos++] = compactor.Compact(s, aiter.Value());
255 }
256 if ((compactor.Size() != -1) && ((pos - fpos) != compactor.Size())) {
257 FSTERROR() << "CompactFstData: compactor incompatible with fst";
258 error_ = true;
259 return;
260 }
261 }
262 if (pos != ncompacts_) {
263 FSTERROR() << "CompactFstData: compactor incompatible with fst";
264 error_ = true;
265 return;
266 }
267 }
268
269 template <class E, class U>
270 template <class Iterator, class C>
CompactFstData(const Iterator & begin,const Iterator & end,const C & compactor)271 CompactFstData<E, U>::CompactFstData(const Iterator &begin,
272 const Iterator &end,
273 const C &compactor)
274 : states_region_(0),
275 compacts_region_(0),
276 states_(0),
277 compacts_(0),
278 nstates_(0),
279 ncompacts_(0),
280 narcs_(0),
281 start_(kNoStateId),
282 error_(false) {
283 typedef typename C::Arc Arc;
284 typedef typename Arc::Weight Weight;
285 if (compactor.Size() != -1) {
286 ncompacts_ = distance(begin, end);
287 if (compactor.Size() == 1) {
288 // For strings, allow implicit final weight.
289 // Empty input is the empty string.
290 if (ncompacts_ == 0) {
291 ++ncompacts_;
292 } else {
293 Arc arc = compactor.Expand(ncompacts_ - 1,
294 *(begin + (ncompacts_ - 1)));
295 if (arc.ilabel != kNoLabel)
296 ++ncompacts_;
297 }
298 }
299 if (ncompacts_ % compactor.Size()) {
300 FSTERROR() << "CompactFstData: size of input container incompatible"
301 << " with compactor";
302 error_ = true;
303 return;
304 }
305 if (ncompacts_ == 0)
306 return;
307 start_ = 0;
308 nstates_ = ncompacts_ / compactor.Size();
309 compacts_ = new CompactElement[ncompacts_];
310 size_t i = 0;
311 Iterator it = begin;
312 for(; it != end; ++it, ++i){
313 compacts_[i] = *it;
314 if (compactor.Expand(i, *it).ilabel != kNoLabel)
315 ++narcs_;
316 }
317 if (i < ncompacts_)
318 compacts_[i] = compactor.Compact(i, Arc(kNoLabel, kNoLabel,
319 Weight::One(), kNoStateId));
320 } else {
321 if (distance(begin, end) == 0)
322 return;
323 // Count # of states, arcs and compacts.
324 Iterator it = begin;
325 for(size_t i = 0; it != end; ++it, ++i) {
326 Arc arc = compactor.Expand(i, *it);
327 if (arc.ilabel != kNoLabel) {
328 ++narcs_;
329 ++ncompacts_;
330 } else {
331 ++nstates_;
332 if (arc.weight != Weight::Zero())
333 ++ncompacts_;
334 }
335 }
336 start_ = 0;
337 compacts_ = new CompactElement[ncompacts_];
338 states_ = new Unsigned[nstates_ + 1];
339 states_[nstates_] = ncompacts_;
340 size_t i = 0, s = 0;
341 for(it = begin; it != end; ++it) {
342 Arc arc = compactor.Expand(i, *it);
343 if (arc.ilabel != kNoLabel) {
344 compacts_[i++] = *it;
345 } else {
346 states_[s++] = i;
347 if (arc.weight != Weight::Zero())
348 compacts_[i++] = *it;
349 }
350 }
351 if ((s != nstates_) || (i != ncompacts_)) {
352 FSTERROR() << "CompactFstData: ill-formed input container";
353 error_ = true;
354 return;
355 }
356 }
357 }
358
359 template <class E, class U>
360 template <class C>
Read(istream & strm,const FstReadOptions & opts,const FstHeader & hdr,const C & compactor)361 CompactFstData<E, U> *CompactFstData<E, U>::Read(
362 istream &strm,
363 const FstReadOptions &opts,
364 const FstHeader &hdr,
365 const C &compactor) {
366 CompactFstData<E, U> *data = new CompactFstData<E, U>();
367 data->start_ = hdr.Start();
368 data->nstates_ = hdr.NumStates();
369 data->narcs_ = hdr.NumArcs();
370
371 if (compactor.Size() == -1) {
372 if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
373 LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source;
374 delete data;
375 return 0;
376 }
377 size_t b = (data->nstates_ + 1) * sizeof(Unsigned);
378 data->states_region_ = MappedFile::Map(&strm, opts, b);
379 if (!strm || data->states_region_ == NULL) {
380 LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
381 delete data;
382 return 0;
383 }
384 data->states_ = static_cast<Unsigned *>(
385 data->states_region_->mutable_data());
386 } else {
387 data->states_ = 0;
388 }
389 data->ncompacts_ = compactor.Size() == -1
390 ? data->states_[data->nstates_]
391 : data->nstates_ * compactor.Size();
392 if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
393 LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source;
394 delete data;
395 return 0;
396 }
397 size_t b = data->ncompacts_ * sizeof(CompactElement);
398 data->compacts_region_ = MappedFile::Map(&strm, opts, b);
399 if (!strm || data->compacts_region_ == NULL) {
400 LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
401 delete data;
402 return 0;
403 }
404 data->compacts_ = static_cast<CompactElement *>(
405 data->compacts_region_->mutable_data());
406 return data;
407 }
408
409 template<class E, class U>
Write(ostream & strm,const FstWriteOptions & opts)410 bool CompactFstData<E, U>::Write(ostream &strm,
411 const FstWriteOptions &opts) const {
412 if (states_) {
413 if (opts.align && !AlignOutput(strm)) {
414 LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
415 return false;
416 }
417 strm.write(reinterpret_cast<char *>(states_),
418 (nstates_ + 1) * sizeof(Unsigned));
419 }
420 if (opts.align && !AlignOutput(strm)) {
421 LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
422 return false;
423 }
424 strm.write(reinterpret_cast<char *>(compacts_),
425 ncompacts_ * sizeof(CompactElement));
426
427 strm.flush();
428 if (!strm) {
429 LOG(ERROR) << "CompactFst::Write: Write failed: " << opts.source;
430 return false;
431 }
432 return true;
433 }
434
435 template <class A, class C, class U> class CompactFst;
436 template <class F, class G> void Cast(const F &, G *);
437
438 // Implementation class for CompactFst, which contains CompactFstData
439 // and Fst cache.
440 template <class A, class C, class U>
441 class CompactFstImpl : public CacheImpl<A> {
442 public:
443 using FstImpl<A>::SetType;
444 using FstImpl<A>::SetProperties;
445 using FstImpl<A>::Properties;
446 using FstImpl<A>::SetInputSymbols;
447 using FstImpl<A>::SetOutputSymbols;
448 using FstImpl<A>::WriteHeader;
449
450 using CacheImpl<A>::PushArc;
451 using CacheImpl<A>::HasArcs;
452 using CacheImpl<A>::HasFinal;
453 using CacheImpl<A>::HasStart;
454 using CacheImpl<A>::SetArcs;
455 using CacheImpl<A>::SetFinal;
456 using CacheImpl<A>::SetStart;
457
458 typedef A Arc;
459 typedef typename A::Weight Weight;
460 typedef typename A::StateId StateId;
461 typedef C Compactor;
462 typedef typename C::Element CompactElement;
463 typedef U Unsigned;
464
CompactFstImpl()465 CompactFstImpl()
466 : CacheImpl<A>(CompactFstOptions()),
467 compactor_(0),
468 own_compactor_(false),
469 data_(0) {
470 string type = "compact";
471 if (sizeof(U) != sizeof(uint32)) {
472 string size;
473 Int64ToStr(8 * sizeof(U), &size);
474 type += size;
475 }
476 type += "_";
477 type += C::Type();
478 SetType(type);
479 SetProperties(kNullProperties | kStaticProperties);
480 }
481
CompactFstImpl(const Fst<Arc> & fst,const C & compactor,const CompactFstOptions & opts)482 CompactFstImpl(const Fst<Arc> &fst, const C &compactor,
483 const CompactFstOptions &opts)
484 : CacheImpl<A>(opts),
485 compactor_(new C(compactor)),
486 own_compactor_(true),
487 data_(0) {
488 Init(fst);
489 }
490
CompactFstImpl(const Fst<Arc> & fst,C * compactor,const CompactFstOptions & opts)491 CompactFstImpl(const Fst<Arc> &fst, C *compactor,
492 const CompactFstOptions &opts)
493 : CacheImpl<A>(opts),
494 compactor_(compactor),
495 own_compactor_(false),
496 data_(0) {
497 Init(fst);
498 }
499
500 template <class Iterator>
CompactFstImpl(const Iterator & b,const Iterator & e,const C & compactor,const CompactFstOptions & opts)501 CompactFstImpl(const Iterator &b, const Iterator &e, const C &compactor,
502 const CompactFstOptions &opts)
503 : CacheImpl<A>(opts),
504 compactor_(new C(compactor)),
505 own_compactor_(true),
506 data_(0) {
507 Init(b, e);
508 }
509
510 template <class Iterator>
CompactFstImpl(const Iterator & b,const Iterator & e,C * compactor,const CompactFstOptions & opts)511 CompactFstImpl(const Iterator &b, const Iterator &e, C *compactor,
512 const CompactFstOptions &opts)
513 : CacheImpl<A>(opts),
514 compactor_(compactor),
515 own_compactor_(false),
516 data_(0) {
517 Init(b, e);
518 }
519
CompactFstImpl(const CompactFstImpl<A,C,U> & impl)520 CompactFstImpl(const CompactFstImpl<A, C, U> &impl)
521 : CacheImpl<A>(impl),
522 compactor_(new C(*impl.compactor_)),
523 own_compactor_(true),
524 data_(impl.data_) {
525 if (data_)
526 data_->IncrRefCount();
527 SetType(impl.Type());
528 SetProperties(impl.Properties());
529 SetInputSymbols(impl.InputSymbols());
530 SetOutputSymbols(impl.OutputSymbols());
531 }
532
~CompactFstImpl()533 ~CompactFstImpl(){
534 if (own_compactor_)
535 delete compactor_;
536 if (data_ && !data_->DecrRefCount())
537 delete data_;
538 }
539
Start()540 StateId Start() {
541 if (!HasStart()) {
542 SetStart(data_->Start());
543 }
544 return CacheImpl<A>::Start();
545 }
546
Final(StateId s)547 Weight Final(StateId s) {
548 if (HasFinal(s))
549 return CacheImpl<A>::Final(s);
550 Arc arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId);
551 if ((compactor_->Size() != -1) ||
552 (data_->States(s) != data_->States(s + 1)))
553 arc = ComputeArc(s,
554 compactor_->Size() == -1
555 ? data_->States(s)
556 : s * compactor_->Size());
557 return arc.ilabel == kNoLabel ? arc.weight : Weight::Zero();
558 }
559
NumStates()560 StateId NumStates() const {
561 if (Properties(kError)) return 0;
562 return data_->NumStates();
563 }
564
NumArcs(StateId s)565 size_t NumArcs(StateId s) {
566 if (HasArcs(s))
567 return CacheImpl<A>::NumArcs(s);
568 Unsigned i, num_arcs;
569 if (compactor_->Size() == -1) {
570 i = data_->States(s);
571 num_arcs = data_->States(s + 1) - i;
572 } else {
573 i = s * compactor_->Size();
574 num_arcs = compactor_->Size();
575 }
576 if (num_arcs > 0) {
577 const A &arc = ComputeArc(s, i, kArcILabelValue);
578 if (arc.ilabel == kNoStateId) {
579 --num_arcs;
580 }
581 }
582 return num_arcs;
583 }
584
NumInputEpsilons(StateId s)585 size_t NumInputEpsilons(StateId s) {
586 if (!HasArcs(s) && !Properties(kILabelSorted))
587 Expand(s);
588 if (HasArcs(s))
589 return CacheImpl<A>::NumInputEpsilons(s);
590 return CountEpsilons(s, false);
591 }
592
NumOutputEpsilons(StateId s)593 size_t NumOutputEpsilons(StateId s) {
594 if (!HasArcs(s) && !Properties(kOLabelSorted))
595 Expand(s);
596 if (HasArcs(s))
597 return CacheImpl<A>::NumOutputEpsilons(s);
598 return CountEpsilons(s, true);
599 }
600
CountEpsilons(StateId s,bool output_epsilons)601 size_t CountEpsilons(StateId s, bool output_epsilons) {
602 size_t begin = compactor_->Size() == -1 ?
603 data_->States(s) : s * compactor_->Size();
604 size_t end = compactor_->Size() == -1 ?
605 data_->States(s + 1) : (s + 1) * compactor_->Size();
606 size_t num_eps = 0;
607 for (size_t i = begin; i < end; ++i) {
608 const A &arc = ComputeArc(
609 s, i, output_epsilons ? kArcOLabelValue : kArcILabelValue);
610 const typename A::Label &label =
611 (output_epsilons ? arc.olabel : arc.ilabel);
612 if (label == kNoLabel)
613 continue;
614 else if (label > 0)
615 break;
616 ++num_eps;
617 }
618 return num_eps;
619 }
620
Read(istream & strm,const FstReadOptions & opts)621 static CompactFstImpl<A, C, U> *Read(istream &strm,
622 const FstReadOptions &opts) {
623 CompactFstImpl<A, C, U> *impl = new CompactFstImpl<A, C, U>();
624 FstHeader hdr;
625 if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
626 delete impl;
627 return 0;
628 }
629
630 // Ensures compatibility
631 if (hdr.Version() == kAlignedFileVersion)
632 hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED);
633
634 impl->compactor_ = C::Read(strm);
635 if (!impl->compactor_) {
636 delete impl;
637 return 0;
638 }
639 impl->own_compactor_ = true;
640 impl->data_ = CompactFstData<CompactElement, U>::Read(strm, opts, hdr,
641 *impl->compactor_);
642 if (!impl->data_) {
643 delete impl;
644 return 0;
645 }
646 return impl;
647 }
648
Write(ostream & strm,const FstWriteOptions & opts)649 bool Write(ostream &strm, const FstWriteOptions &opts) const {
650 FstHeader hdr;
651 hdr.SetStart(data_->Start());
652 hdr.SetNumStates(data_->NumStates());
653 hdr.SetNumArcs(data_->NumArcs());
654
655 // Ensures compatibility
656 int file_version = opts.align ? kAlignedFileVersion : kFileVersion;
657 WriteHeader(strm, opts, file_version, &hdr);
658 compactor_->Write(strm);
659 return data_->Write(strm, opts);
660 }
661
662 // Provide information needed for generic state iterator
InitStateIterator(StateIteratorData<A> * data)663 void InitStateIterator(StateIteratorData<A> *data) const {
664 data->base = 0;
665 data->nstates = data_->NumStates();
666 }
667
InitArcIterator(StateId s,ArcIteratorData<A> * data)668 void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
669 if (!HasArcs(s))
670 Expand(s);
671 CacheImpl<A>::InitArcIterator(s, data);
672 }
673
674 Arc ComputeArc(StateId s, Unsigned i, uint32 f = kArcValueFlags) const {
675 return compactor_->Expand(s, data_->Compacts(i), f);
676 }
677
Expand(StateId s)678 void Expand(StateId s) {
679 size_t begin = compactor_->Size() == -1 ?
680 data_->States(s) : s * compactor_->Size();
681 size_t end = compactor_->Size() == -1 ?
682 data_->States(s + 1) : (s + 1) * compactor_->Size();
683 for (size_t i = begin; i < end; ++i) {
684 const Arc &arc = ComputeArc(s, i);
685 if (arc.ilabel == kNoLabel)
686 SetFinal(s, arc.weight);
687 else
688 PushArc(s, arc);
689 }
690 if (!HasFinal(s))
691 SetFinal(s, Weight::Zero());
692 SetArcs(s);
693 }
694
695 template <class Iterator>
SetCompactElements(const Iterator & b,const Iterator & e)696 void SetCompactElements(const Iterator &b, const Iterator &e) {
697 if (data_ && !data_->DecrRefCount())
698 delete data_;
699 data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_);
700 }
701
GetCompactor()702 C *GetCompactor() const { return compactor_; }
Data()703 CompactFstData<CompactElement, U> *Data() const { return data_; }
704
705 // Properties always true of this Fst class
706 static const uint64 kStaticProperties = kExpanded;
707
708 protected:
709 template <class B, class D>
CompactFstImpl(const CompactFstImpl<B,D,U> & impl)710 explicit CompactFstImpl(const CompactFstImpl<B, D, U> &impl)
711 : CacheImpl<A>(CacheOptions(impl.GetCacheGc(), impl.GetCacheLimit())),
712 compactor_(new C(*impl.GetCompactor())),
713 own_compactor_(true),
714 data_(impl.Data()) {
715 if (data_)
716 data_->IncrRefCount();
717 SetType(impl.Type());
718 SetProperties(impl.Properties());
719 SetInputSymbols(impl.InputSymbols());
720 SetOutputSymbols(impl.OutputSymbols());
721 }
722
723 private:
724 friend class CompactFst<A, C, U>; // allow access during write.
725
Init(const Fst<Arc> & fst)726 void Init(const Fst<Arc> &fst) {
727 string type = "compact";
728 if (sizeof(U) != sizeof(uint32)) {
729 string size;
730 Int64ToStr(8 * sizeof(U), &size);
731 type += size;
732 }
733 type += "_";
734 type += compactor_->Type();
735 SetType(type);
736 SetInputSymbols(fst.InputSymbols());
737 SetOutputSymbols(fst.OutputSymbols());
738 data_ = new CompactFstData<CompactElement, U>(fst, *compactor_);
739 if (data_->Error())
740 SetProperties(kError, kError);
741 uint64 copy_properties = fst.Properties(kCopyProperties, true);
742 if ((copy_properties & kError) || !compactor_->Compatible(fst)) {
743 FSTERROR() << "CompactFstImpl: input fst incompatible with compactor";
744 SetProperties(kError, kError);
745 return;
746 }
747 SetProperties(copy_properties | kStaticProperties);
748 }
749
750 template <class Iterator>
Init(const Iterator & b,const Iterator & e)751 void Init(const Iterator &b, const Iterator &e) {
752 string type = "compact";
753 if (sizeof(U) != sizeof(uint32)) {
754 string size;
755 Int64ToStr(8 * sizeof(U), &size);
756 type += size;
757 }
758 type += "_";
759 type += compactor_->Type();
760 SetType(type);
761 SetProperties(kStaticProperties | compactor_->Properties());
762 data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_);
763 if (data_->Error())
764 SetProperties(kError, kError);
765 }
766
767 // Current unaligned file format version
768 static const int kFileVersion = 2;
769 // Current aligned file format version
770 static const int kAlignedFileVersion = 1;
771 // Minimum file format version supported
772 static const int kMinFileVersion = 1;
773
774 C *compactor_;
775 bool own_compactor_;
776 CompactFstData<CompactElement, U> *data_;
777 };
778
779 template <class A, class C, class U>
780 const uint64 CompactFstImpl<A, C, U>::kStaticProperties;
781 template <class A, class C, class U>
782 const int CompactFstImpl<A, C, U>::kFileVersion;
783 template <class A, class C, class U>
784 const int CompactFstImpl<A, C, U>::kAlignedFileVersion;
785 template <class A, class C, class U>
786 const int CompactFstImpl<A, C, U>::kMinFileVersion;
787
788
789 // CompactFst. This class attaches interface to implementation and
790 // handles reference counting, delegating most methods to
791 // ImplToExpandedFst. The unsigned type U is used to represent indices
792 // into the compact arc array (uint32 by default, declared in
793 // fst-decl.h).
794 template <class A, class C, class U>
795 class CompactFst : public ImplToExpandedFst< CompactFstImpl<A, C, U> > {
796 public:
797 friend class StateIterator< CompactFst<A, C, U> >;
798 friend class ArcIterator< CompactFst<A, C, U> >;
799 template <class F, class G> void friend Cast(const F &, G *);
800
801 typedef A Arc;
802 typedef typename A::StateId StateId;
803 typedef CompactFstImpl<A, C, U> Impl;
804 typedef CacheState<A> State;
805 typedef U Unsigned;
806
CompactFst()807 CompactFst() : ImplToExpandedFst<Impl>(new Impl()) {}
808
809 explicit CompactFst(const Fst<A> &fst, const C &compactor = C(),
810 const CompactFstOptions &opts = CompactFstOptions())
811 : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
812
813 CompactFst(const Fst<A> &fst, C *compactor,
814 const CompactFstOptions &opts = CompactFstOptions())
815 : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
816
817 // The following 2 constructors take as input two iterators delimiting
818 // a set of (already) compacted transitions, starting with the
819 // transitions out of the initial state. The format of the input
820 // differs for fixed out-degree and variable out-degree compactors.
821 //
822 // - For fixed out-degree compactors, the final weight (encoded as a
823 // compacted transition) needs to be given only for final
824 // states. All strings (compactor of size 1) will be assume to be
825 // terminated by a final state even when the final state is not
826 // implicitely given.
827 //
828 // - For variable out-degree compactors, the final weight (encoded
829 // as a compacted transition) needs to be given for all states and
830 // must appeared first in the list (for state s, final weight of s,
831 // followed by outgoing transitons in s).
832 //
833 // These 2 constructors allows the direct construction of a CompactFst
834 // without first creating a more memory hungry 'regular' FST. This
835 // is useful when memory usage is severely constrained.
836 template <class Iterator>
837 explicit CompactFst(const Iterator &begin, const Iterator &end,
838 const C &compactor = C(),
839 const CompactFstOptions &opts = CompactFstOptions())
840 : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
841
842 template <class Iterator>
843 CompactFst(const Iterator &begin, const Iterator &end,
844 C *compactor, const CompactFstOptions &opts = CompactFstOptions())
845 : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
846
847 // See Fst<>::Copy() for doc.
848 CompactFst(const CompactFst<A, C, U> &fst, bool safe = false)
849 : ImplToExpandedFst<Impl>(fst, safe) {}
850
851 // Get a copy of this CompactFst. See Fst<>::Copy() for further doc.
852 virtual CompactFst<A, C, U> *Copy(bool safe = false) const {
853 return new CompactFst<A, C, U>(*this, safe);
854 }
855
856 // Read a CompactFst from an input stream; return NULL on error
Read(istream & strm,const FstReadOptions & opts)857 static CompactFst<A, C, U> *Read(istream &strm, const FstReadOptions &opts) {
858 Impl* impl = Impl::Read(strm, opts);
859 return impl ? new CompactFst<A, C, U>(impl) : 0;
860 }
861
862 // Read a CompactFst from a file; return NULL on error
863 // Empty filename reads from standard input
Read(const string & filename)864 static CompactFst<A, C, U> *Read(const string &filename) {
865 Impl* impl = ImplToExpandedFst<Impl>::Read(filename);
866 return impl ? new CompactFst<A, C, U>(impl) : 0;
867 }
868
Write(ostream & strm,const FstWriteOptions & opts)869 virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
870 return GetImpl()->Write(strm, opts);
871 }
872
Write(const string & filename)873 virtual bool Write(const string &filename) const {
874 return Fst<A>::WriteFile(filename);
875 }
876
877 template <class F>
878 static bool WriteFst(const F &fst, const C &compactor, ostream &strm,
879 const FstWriteOptions &opts);
880
InitStateIterator(StateIteratorData<A> * data)881 virtual void InitStateIterator(StateIteratorData<A> *data) const {
882 GetImpl()->InitStateIterator(data);
883 }
884
InitArcIterator(StateId s,ArcIteratorData<A> * data)885 virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
886 GetImpl()->InitArcIterator(s, data);
887 }
888
InitMatcher(MatchType match_type)889 virtual MatcherBase<A> *InitMatcher(MatchType match_type) const {
890 return new SortedMatcher<CompactFst<A, C, U> >(*this, match_type);
891 }
892
893 template <class Iterator>
SetCompactElements(const Iterator & b,const Iterator & e)894 void SetCompactElements(const Iterator &b, const Iterator &e) {
895 GetImpl()->SetCompactElements(b, e);
896 }
897
898 private:
CompactFst(Impl * impl)899 CompactFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {}
900
901 // Makes visible to friends.
GetImpl()902 Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); }
903
904 void SetImpl(Impl *impl, bool own_impl = false) {
905 ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl);
906 }
907
908 // Use overloading to extract the type of the argument.
GetImplIfCompactFst(const CompactFst<A,C,U> & compact_fst)909 static Impl* GetImplIfCompactFst(const CompactFst<A, C, U> &compact_fst) {
910 return compact_fst.GetImpl();
911 }
912
913 // This does not give privileged treatment to subclasses of CompactFst.
914 template<typename NonCompactFst>
GetImplIfCompactFst(const NonCompactFst & fst)915 static Impl* GetImplIfCompactFst(const NonCompactFst& fst) {
916 return NULL;
917 }
918
919 void operator=(const CompactFst<A, C, U> &fst); // disallow
920 };
921
922 // Writes Fst in Compact format, potentially with a pass over the machine
923 // before writing to compute the number of states and arcs.
924 //
925 template <class A, class C, class U>
926 template <class F>
WriteFst(const F & fst,const C & compactor,ostream & strm,const FstWriteOptions & opts)927 bool CompactFst<A, C, U>::WriteFst(const F &fst,
928 const C &compactor,
929 ostream &strm,
930 const FstWriteOptions &opts) {
931 typedef U Unsigned;
932 typedef typename C::Element CompactElement;
933 typedef typename A::Weight Weight;
934 int file_version = opts.align ?
935 CompactFstImpl<A, C, U>::kAlignedFileVersion :
936 CompactFstImpl<A, C, U>::kFileVersion;
937 size_t num_arcs = -1, num_states = -1, num_compacts = -1;
938 C first_pass_compactor = compactor;
939 if (Impl* impl = GetImplIfCompactFst(fst)) {
940 num_arcs = impl->Data()->NumArcs();
941 num_states = impl->Data()->NumStates();
942 num_compacts = impl->Data()->NumCompacts();
943 first_pass_compactor = *impl->GetCompactor();
944 } else {
945 // A first pass is needed to compute the state of the compactor, which
946 // is saved ahead of the rest of the data structures. This unfortunately
947 // means forcing a complete double compaction when writing in this format.
948 // TODO(allauzen): eliminate mutable state from compactors.
949 num_arcs = 0;
950 num_states = 0;
951 for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
952 const StateId s = siter.Value();
953 ++num_states;
954 if (fst.Final(s) != Weight::Zero()) {
955 first_pass_compactor.Compact(
956 s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
957 }
958 for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
959 ++num_arcs;
960 first_pass_compactor.Compact(s, aiter.Value());
961 }
962 }
963 }
964 FstHeader hdr;
965 hdr.SetStart(fst.Start());
966 hdr.SetNumStates(num_states);
967 hdr.SetNumArcs(num_arcs);
968 string type = "compact";
969 if (sizeof(U) != sizeof(uint32)) {
970 string size;
971 Int64ToStr(8 * sizeof(U), &size);
972 type += size;
973 }
974 type += "_";
975 type += C::Type();
976 uint64 copy_properties = fst.Properties(kCopyProperties, true);
977 if ((copy_properties & kError) || !compactor.Compatible(fst)) {
978 LOG(ERROR) << "fst incompatible with compactor";
979 return false;
980 }
981 uint64 properties = copy_properties |
982 CompactFstImpl<A, C, U>::kStaticProperties;
983 FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, properties,
984 &hdr);
985 first_pass_compactor.Write(strm);
986 if (first_pass_compactor.Size() == -1) {
987 if (opts.align && !AlignOutput(strm)) {
988 LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
989 return false;
990 }
991 Unsigned compacts = 0;
992 for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
993 const StateId s = siter.Value();
994 strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts));
995 if (fst.Final(s) != Weight::Zero()) {
996 ++compacts;
997 }
998 compacts += fst.NumArcs(s);
999 }
1000 strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts));
1001 }
1002 if (opts.align && !AlignOutput(strm)) {
1003 LOG(ERROR) << "Could not align file during write after writing states";
1004 }
1005 C second_pass_compactor = compactor;
1006 CompactElement element;
1007 for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
1008 const StateId s = siter.Value();
1009 if (fst.Final(s) != Weight::Zero()) {
1010 element = second_pass_compactor.Compact(
1011 s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
1012 strm.write(reinterpret_cast<const char *>(&element), sizeof(element));
1013 }
1014 for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
1015 element = second_pass_compactor.Compact(s, aiter.Value());
1016 strm.write(reinterpret_cast<const char *>(&element), sizeof(element));
1017 }
1018 }
1019 strm.flush();
1020 if (!strm) {
1021 LOG(ERROR) << "CompactFst write failed: " << opts.source;
1022 return false;
1023 }
1024 return true;
1025 }
1026
1027
1028 // Specialization for CompactFst; see generic version in fst.h
1029 // for sample usage (but use the CompactFst type!). This version
1030 // should inline.
1031 template <class A, class C, class U>
1032 class StateIterator< CompactFst<A, C, U> > {
1033 public:
1034 typedef typename A::StateId StateId;
1035
StateIterator(const CompactFst<A,C,U> & fst)1036 explicit StateIterator(const CompactFst<A, C, U> &fst)
1037 : nstates_(fst.GetImpl()->NumStates()), s_(0) {}
1038
Done()1039 bool Done() const { return s_ >= nstates_; }
1040
Value()1041 StateId Value() const { return s_; }
1042
Next()1043 void Next() { ++s_; }
1044
Reset()1045 void Reset() { s_ = 0; }
1046
1047 private:
1048 StateId nstates_;
1049 StateId s_;
1050
1051 DISALLOW_COPY_AND_ASSIGN(StateIterator);
1052 };
1053
1054 // Specialization for CompactFst.
1055 // Never caches, always iterates over the underlying compact elements.
1056 template <class A, class C, class U>
1057 class ArcIterator< CompactFst<A, C, U> > {
1058 public:
1059 typedef typename A::StateId StateId;
1060 typedef typename C::Element CompactElement;
1061
ArcIterator(const CompactFst<A,C,U> & fst,StateId s)1062 ArcIterator(const CompactFst<A, C, U> &fst, StateId s)
1063 : compactor_(fst.GetImpl()->GetCompactor()), state_(s), compacts_(0),
1064 pos_(0), flags_(kArcValueFlags) {
1065
1066 const CompactFstData<CompactElement, U> *data = fst.GetImpl()->Data();
1067 size_t offset;
1068 if (compactor_->Size() == -1) { // Variable out-degree compactor
1069 offset = data->States(s);
1070 num_arcs_ = data->States(s + 1) - offset;
1071 } else { // Fixed out-degree compactor
1072 offset = s * compactor_->Size();
1073 num_arcs_ = compactor_->Size();
1074 }
1075 if (num_arcs_ > 0) {
1076 compacts_ = &(data->Compacts(offset));
1077 arc_ = compactor_->Expand(s, *compacts_, kArcILabelValue);
1078 if (arc_.ilabel == kNoStateId) {
1079 ++compacts_;
1080 --num_arcs_;
1081 }
1082 }
1083 }
1084
~ArcIterator()1085 ~ArcIterator() {}
1086
Done()1087 bool Done() const { return pos_ >= num_arcs_; }
1088
Value()1089 const A& Value() const {
1090 arc_ = compactor_->Expand(state_, compacts_[pos_], flags_);
1091 return arc_;
1092 }
1093
Next()1094 void Next() { ++pos_; }
1095
Position()1096 size_t Position() const { return pos_; }
1097
Reset()1098 void Reset() { pos_ = 0; }
1099
Seek(size_t pos)1100 void Seek(size_t pos) { pos_ = pos; }
1101
Flags()1102 uint32 Flags() const { return flags_; }
1103
SetFlags(uint32 f,uint32 m)1104 void SetFlags(uint32 f, uint32 m) {
1105 flags_ &= ~m;
1106 flags_ |= (f & kArcValueFlags);
1107 }
1108
1109 private:
1110 C *compactor_;
1111 StateId state_;
1112 const CompactElement *compacts_;
1113 size_t pos_;
1114 size_t num_arcs_;
1115 mutable A arc_;
1116 uint32 flags_;
1117
1118 DISALLOW_COPY_AND_ASSIGN(ArcIterator);
1119 };
1120
1121 // // Specialization for CompactFst.
1122 // // This is an optionally caching arc iterator.
1123 // // TODO(allauzen): implements the kArcValueFlags, the current
1124 // // implementation only implements the kArcNoCache flag.
1125 // template <class A, class C, class U>
1126 // class ArcIterator< CompactFst<A, C, U> > {
1127 // public:
1128 // typedef typename A::StateId StateId;
1129
1130 // ArcIterator(const CompactFst<A, C, U> &fst, StateId s)
1131 // : fst_(fst), state_(s), pos_(0), num_arcs_(0), offset_(0),
1132 // flags_(kArcValueFlags) {
1133 // cache_data_.ref_count = 0;
1134
1135 // if (fst_.GetImpl()->HasArcs(state_)) {
1136 // fst_.GetImpl()->InitArcIterator(s, &cache_data_);
1137 // num_arcs_ = cache_data_.narcs;
1138 // return;
1139 // }
1140
1141 // const C *compactor = fst_.GetImpl()->GetCompactor();
1142 // const CompactFstData<A, C, U> *data = fst_.GetImpl()->Data();
1143 // if (compactor->Size() == -1) { // Variable out-degree compactor
1144 // offset_ = data->States(s);
1145 // num_arcs_ = data->States(s + 1) - offset_;
1146 // } else { // Fixed out-degree compactor
1147 // offset_ = s * compactor->Size();
1148 // num_arcs_ = compactor->Size();
1149 // }
1150 // if (num_arcs_ > 0) {
1151 // const A &arc = fst_.GetImpl()->ComputeArc(s, offset_);
1152 // if (arc.ilabel == kNoStateId) {
1153 // ++offset_;
1154 // --num_arcs_;
1155 // }
1156 // }
1157 // }
1158
1159
1160 // ~ArcIterator() {
1161 // if (cache_data_.ref_count)
1162 // --(*cache_data_.ref_count);
1163 // }
1164
1165 // bool Done() const { return pos_ >= num_arcs_; }
1166
1167 // const A& Value() const {
1168 // if (cache_data_.ref_count == 0) {
1169 // if (flags_ & kArcNoCache) {
1170 // arc_ = fst_.GetImpl()->ComputeArc(state_, pos_ + offset_);
1171 // return arc_;
1172 // } else {
1173 // fst_.GetImpl()->InitArcIterator(state_, &cache_data_);
1174 // }
1175 // }
1176 // return cache_data_.arcs[pos_];
1177 // }
1178
1179 // void Next() { ++pos_; }
1180
1181 // size_t Position() const { return pos_; }
1182
1183 // void Reset() { pos_ = 0; }
1184
1185 // void Seek(size_t pos) { pos_ = pos; }
1186
1187 // uint32 Flags() const { return flags_; }
1188
1189 // void SetFlags(uint32 f, uint32 m) {
1190 // flags_ &= ~m;
1191 // flags_ |= f;
1192
1193 // if (!(flags_ & kArcNoCache) && cache_data_.ref_count == 0)
1194 // fst_.GetImpl()->InitArcIterator(state_, &cache_data_);
1195 // }
1196
1197 // private:
1198 // mutable const CompactFst<A, C, U> &fst_;
1199 // StateId state_;
1200 // size_t pos_;
1201 // size_t num_arcs_;
1202 // size_t offset_;
1203 // uint32 flags_;
1204 // mutable A arc_;
1205 // mutable ArcIteratorData<A> cache_data_;
1206
1207 // DISALLOW_COPY_AND_ASSIGN(ArcIterator);
1208 // };
1209
1210
1211 //
1212 // Utility Compactors
1213 //
1214
1215 // Compactor for unweighted string FSTs
1216 template <class A>
1217 class StringCompactor {
1218 public:
1219 typedef A Arc;
1220 typedef typename A::Label Element;
1221 typedef typename A::Label Label;
1222 typedef typename A::StateId StateId;
1223 typedef typename A::Weight Weight;
1224
Compact(StateId s,const A & arc)1225 Element Compact(StateId s, const A &arc) const { return arc.ilabel; }
1226
1227 Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1228 return Arc(p, p, Weight::One(), p != kNoLabel ? s + 1 : kNoStateId);
1229 }
1230
Size()1231 ssize_t Size() const { return 1; }
1232
Properties()1233 uint64 Properties() const {
1234 return kString | kAcceptor | kUnweighted;
1235 }
1236
Compatible(const Fst<A> & fst)1237 bool Compatible(const Fst<A> &fst) const {
1238 uint64 props = Properties();
1239 return fst.Properties(props, true) == props;
1240 }
1241
Type()1242 static const string &Type() {
1243 static const string type = "string";
1244 return type;
1245 }
1246
Write(ostream & strm)1247 bool Write(ostream &strm) const { return true; }
1248
Read(istream & strm)1249 static StringCompactor *Read(istream &strm) {
1250 return new StringCompactor;
1251 }
1252 };
1253
1254
1255 // Compactor for weighted string FSTs
1256 template <class A>
1257 class WeightedStringCompactor {
1258 public:
1259 typedef A Arc;
1260 typedef typename A::Label Label;
1261 typedef typename A::StateId StateId;
1262 typedef typename A::Weight Weight;
1263 typedef pair<Label, Weight> Element;
1264
Compact(StateId s,const A & arc)1265 Element Compact(StateId s, const A &arc) const {
1266 return make_pair(arc.ilabel, arc.weight);
1267 }
1268
1269 Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1270 return Arc(p.first, p.first, p.second,
1271 p.first != kNoLabel ? s + 1 : kNoStateId);
1272 }
1273
Size()1274 ssize_t Size() const { return 1;}
1275
Properties()1276 uint64 Properties() const {
1277 return kString | kAcceptor;
1278 }
1279
Compatible(const Fst<A> & fst)1280 bool Compatible(const Fst<A> &fst) const {
1281 uint64 props = Properties();
1282 return fst.Properties(props, true) == props;
1283 }
1284
Type()1285 static const string &Type() {
1286 static const string type = "weighted_string";
1287 return type;
1288 }
1289
Write(ostream & strm)1290 bool Write(ostream &strm) const { return true; }
1291
Read(istream & strm)1292 static WeightedStringCompactor *Read(istream &strm) {
1293 return new WeightedStringCompactor;
1294 }
1295 };
1296
1297
1298 // Compactor for unweighted acceptor FSTs
1299 template <class A>
1300 class UnweightedAcceptorCompactor {
1301 public:
1302 typedef A Arc;
1303 typedef typename A::Label Label;
1304 typedef typename A::StateId StateId;
1305 typedef typename A::Weight Weight;
1306 typedef pair<Label, StateId> Element;
1307
Compact(StateId s,const A & arc)1308 Element Compact(StateId s, const A &arc) const {
1309 return make_pair(arc.ilabel, arc.nextstate);
1310 }
1311
1312 Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1313 return Arc(p.first, p.first, Weight::One(), p.second);
1314 }
1315
Size()1316 ssize_t Size() const { return -1;}
1317
Properties()1318 uint64 Properties() const {
1319 return kAcceptor | kUnweighted;
1320 }
1321
Compatible(const Fst<A> & fst)1322 bool Compatible(const Fst<A> &fst) const {
1323 uint64 props = Properties();
1324 return fst.Properties(props, true) == props;
1325 }
1326
Type()1327 static const string &Type() {
1328 static const string type = "unweighted_acceptor";
1329 return type;
1330 }
1331
Write(ostream & strm)1332 bool Write(ostream &strm) const { return true; }
1333
Read(istream & istrm)1334 static UnweightedAcceptorCompactor *Read(istream &istrm) {
1335 return new UnweightedAcceptorCompactor;
1336 }
1337 };
1338
1339
1340 // Compactor for weighted acceptor FSTs
1341 template <class A>
1342 class AcceptorCompactor {
1343 public:
1344 typedef A Arc;
1345 typedef typename A::Label Label;
1346 typedef typename A::StateId StateId;
1347 typedef typename A::Weight Weight;
1348 typedef pair< pair<Label, Weight>, StateId > Element;
1349
Compact(StateId s,const A & arc)1350 Element Compact(StateId s, const A &arc) const {
1351 return make_pair(make_pair(arc.ilabel, arc.weight), arc.nextstate);
1352 }
1353
1354 Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1355 return Arc(p.first.first, p.first.first, p.first.second, p.second);
1356 }
1357
Size()1358 ssize_t Size() const { return -1;}
1359
Properties()1360 uint64 Properties() const {
1361 return kAcceptor;
1362 }
1363
Compatible(const Fst<A> & fst)1364 bool Compatible(const Fst<A> &fst) const {
1365 uint64 props = Properties();
1366 return fst.Properties(props, true) == props;
1367 }
1368
Type()1369 static const string &Type() {
1370 static const string type = "acceptor";
1371 return type;
1372 }
1373
Write(ostream & strm)1374 bool Write(ostream &strm) const { return true; }
1375
Read(istream & strm)1376 static AcceptorCompactor *Read(istream &strm) {
1377 return new AcceptorCompactor;
1378 }
1379 };
1380
1381
1382 // Compactor for unweighted FSTs
1383 template <class A>
1384 class UnweightedCompactor {
1385 public:
1386 typedef A Arc;
1387 typedef typename A::Label Label;
1388 typedef typename A::StateId StateId;
1389 typedef typename A::Weight Weight;
1390 typedef pair< pair<Label, Label>, StateId > Element;
1391
Compact(StateId s,const A & arc)1392 Element Compact(StateId s, const A &arc) const {
1393 return make_pair(make_pair(arc.ilabel, arc.olabel), arc.nextstate);
1394 }
1395
1396 Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1397 return Arc(p.first.first, p.first.second, Weight::One(), p.second);
1398 }
1399
Size()1400 ssize_t Size() const { return -1; }
1401
Properties()1402 uint64 Properties() const {
1403 return kUnweighted;
1404 }
1405
Compatible(const Fst<A> & fst)1406 bool Compatible(const Fst<A> &fst) const {
1407 uint64 props = Properties();
1408 return fst.Properties(props, true) == props;
1409 }
1410
Type()1411 static const string &Type() {
1412 static const string type = "unweighted";
1413 return type;
1414 }
1415
Write(ostream & strm)1416 bool Write(ostream &strm) const { return true; }
1417
Read(istream & strm)1418 static UnweightedCompactor *Read(istream &strm) {
1419 return new UnweightedCompactor;
1420 }
1421 };
1422
1423
1424 // Uselful aliases when using StdArc
1425 typedef CompactFst< StdArc, StringCompactor<StdArc> >
1426 StdCompactStringFst;
1427 typedef CompactFst< StdArc, WeightedStringCompactor<StdArc> >
1428 StdCompactWeightedStringFst;
1429 typedef CompactFst<StdArc, AcceptorCompactor<StdArc> >
1430 StdCompactAcceptorFst;
1431 typedef CompactFst<StdArc, UnweightedCompactor<StdArc> >
1432 StdCompactUnweightedFst;
1433 typedef CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> >
1434 StdCompactUnweightedAcceptorFst;
1435
1436 } // namespace fst
1437
1438 #endif // FST_LIB_COMPACT_FST_H__
1439