• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // compact-fst.h
2 
3 
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 // Copyright 2005-2010 Google, Inc.
17 // Author: allauzen@google.com (Cyril Allauzen)
18 //
19 // \file
20 // FST Class for memory-efficient representation of common types of
21 // FSTs: linear automata, acceptors, unweighted FSTs, ...
22 
23 #ifndef FST_LIB_COMPACT_FST_H__
24 #define FST_LIB_COMPACT_FST_H__
25 
26 #include <iterator>
27 #include <utility>
28 using std::pair; using std::make_pair;
29 #include <vector>
30 using std::vector;
31 
32 #include <fst/cache.h>
33 #include <fst/expanded-fst.h>
34 #include <fst/fst-decl.h>  // For optional argument declarations
35 #include <fst/mapped-file.h>
36 #include <fst/matcher.h>
37 #include <fst/test-properties.h>
38 #include <fst/util.h>
39 
40 
41 namespace fst {
42 
43 struct CompactFstOptions : public CacheOptions {
44   // CompactFst default caching behaviour is to do no caching. Most
45   // compactors are cheap and therefore we save memory by not doing
46   // caching.
CompactFstOptionsCompactFstOptions47   CompactFstOptions() : CacheOptions(true, 0) {}
CompactFstOptionsCompactFstOptions48   CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {}
49 };
50 
51 // Compactor Interface - class determinies how arcs and final weights
52 // are compacted and expanded.
53 //
54 // Final weights are treated as transitions to the superfinal state,
55 // i.e. ilabel = olabel = kNoLabel and nextstate = kNoStateId.
56 //
57 // There are two types of compactors:
58 //
59 // * Fixed out-degree compactors: 'compactor.Size()' returns a
60 // positive integer 's'. An FST can be compacted by this compactor
61 // only if each state has exactly 's' outgoing transitions (counting a
62 // non-Zero() final weight as a transition). A typical example is a
63 // compactor for string FSTs, i.e. 's == 1'.
64 //
65 // * Variable out-degree compactors: 'compactor.Size() == -1'. There
66 // are no out-degree restrictions for these compactors.
67 //
68 //
69 // class Compactor {
70 //  public:
71 //   // Element is the type of the compacted transitions.
72 //   typedef ... Element;
73 //   // Return the compacted representation of a transition 'arc'
74 //   // at a state 's'.
75 //   Element Compact(StateId s, const Arc &arc);
76 //   // Return the transition at state 's' represented by the compacted
77 //   // transition 'e'.
78 //   Arc Expand(StateId s, const Element &e);
79 //   // Return -1 for variable out-degree compactors, and the mandatory
80 //   // out-degree otherwise.
81 //   ssize_t Size();
82 //   // Test whether 'fst' can be compacted by this compactor.
83 //   bool Compatible(const Fst<A> &fst);
84 //   // Return the properties that are always true for an fst
85 //   // compacted using this compactor
86 //   uint64 Properties();
87 //   // Return a string identifying the type of compactor.
88 //   static const string &Type();
89 //   // Write a compactor to a file.
90 //   bool Write(ostream &strm);
91 //   // Read a compactor from a file.
92 //   static Compactor *Read(istream &strm);
93 //   // Default constructor (optional, see comment below).
94 //   Compactor();
95 // };
96 //
97 // The default constructor is only required for FST_REGISTER to work
98 // (i.e. enabling Convert() and the command-line utilities to work
99 // with this new compactor). However, a default constructor always
100 // needs to be specify for this code to compile, but one can have it
101 // simply raised an error when called:
102 //
103 // Compactor::Compactor() {
104 //   FSTERROR() << "Compactor: no default constructor";
105 // }
106 
107 
108 // Implementation data for Compact Fst, which can shared between otherwise
109 // independent copies.
110 //
111 // The implementation contains two arrays: 'states_' and 'compacts_'.
112 //
113 // For fixed out-degree compactors, the 'states_' array is unallocated.
114 // The 'compacts_' contains the compacted transitions. Its size is
115 // 'ncompacts_'. The outgoing transitions at a given state are stored
116 // consecutively. For a given state 's', its 'compactor.Size()' outgoing
117 // transitions (including superfinal transition when 's' is final), are
118 // stored in position ['s*compactor.Size()', '(s+1)*compactor_.Size()').
119 //
120 // For variable out-degree compactors, the states_ array has size
121 // 'nstates_ + 1' and contains pointers to positions into 'compacts_'.
122 // For a given state 's', the compacted transitions of 's' are
123 // stored in positions [ 'states_[s]', 'states_[s + 1]' ) in 'compacts_'.
124 // By convention, 'states_[nstates_] == ncompacts_'.
125 //
126 // In both cases, the superfinal transitons (when 's' is final, i.e.
127 // 'Final(s) != Weight::Zero()') is stored first.
128 //
129 // The unsigned type U is used to represent indices into the compacts_
130 // array.
131 template <class E, class U>
132 class CompactFstData {
133   public:
134   typedef E CompactElement;
135   typedef U Unsigned;
136 
CompactFstData()137   CompactFstData()
138       : states_region_(0),
139         compacts_region_(0),
140         states_(0),
141         compacts_(0),
142         nstates_(0),
143         ncompacts_(0),
144         narcs_(0),
145         start_(kNoStateId),
146         error_(false) {}
147 
148   template <class A, class Compactor>
149   CompactFstData(const Fst<A> &fst, const Compactor &compactor);
150 
151   template <class Iterator, class Compactor>
152   CompactFstData(const Iterator &begin, const Iterator &end,
153                  const Compactor &compactor);
154 
~CompactFstData()155   ~CompactFstData() {
156     if (states_region_ == NULL) {
157       delete [] states_;
158     }
159     delete states_region_;
160     if (compacts_region_ == NULL) {
161       delete [] compacts_;
162     }
163     delete compacts_region_;
164   }
165 
166   template <class Compactor>
167   static CompactFstData<E, U> *Read(istream &strm,
168                                        const FstReadOptions &opts,
169                                        const FstHeader &hdr,
170                                        const Compactor &compactor);
171 
172   bool Write(ostream &strm, const FstWriteOptions &opts) const;
173 
States(ssize_t i)174   Unsigned States(ssize_t i) const { return states_[i]; }
Compacts(size_t i)175   const CompactElement &Compacts(size_t i) const { return compacts_[i]; }
NumStates()176   size_t NumStates() const { return nstates_; }
NumCompacts()177   size_t NumCompacts() const { return ncompacts_; }
NumArcs()178   size_t NumArcs() const { return narcs_; }
Start()179   ssize_t Start() const { return start_; }
180 
RefCount()181   int RefCount() const { return ref_count_.count(); }
IncrRefCount()182   int IncrRefCount() { return ref_count_.Incr(); }
DecrRefCount()183   int DecrRefCount() { return ref_count_.Decr(); }
184 
Error()185   bool Error() const { return error_; }
186 
187  private:
188   MappedFile *states_region_;
189   MappedFile *compacts_region_;
190   Unsigned *states_;
191   CompactElement *compacts_;
192   size_t nstates_;
193   size_t ncompacts_;
194   size_t narcs_;
195   ssize_t start_;
196   RefCounter ref_count_;
197   bool error_;
198 };
199 
200 template <class E, class U>
201 template <class A, class C>
CompactFstData(const Fst<A> & fst,const C & compactor)202 CompactFstData<E, U>::CompactFstData(const Fst<A> &fst, const C &compactor)
203     : states_region_(0),
204       compacts_region_(0),
205       states_(0),
206       compacts_(0),
207       nstates_(0),
208       ncompacts_(0),
209       narcs_(0),
210       start_(kNoStateId),
211       error_(false) {
212   typedef typename A::StateId StateId;
213   typedef typename A::Weight Weight;
214   start_ = fst.Start();
215   // Count # of states and arcs.
216   StateId nfinals = 0;
217   for (StateIterator< Fst<A> > siter(fst);
218        !siter.Done();
219        siter.Next()) {
220     ++nstates_;
221     StateId s = siter.Value();
222     for (ArcIterator< Fst<A> > aiter(fst, s);
223          !aiter.Done();
224          aiter.Next())
225       ++narcs_;
226     if (fst.Final(s) != Weight::Zero()) ++nfinals;
227   }
228   if (compactor.Size() == -1) {
229     states_ = new Unsigned[nstates_ + 1];
230     ncompacts_ = narcs_ + nfinals;
231     compacts_ = new CompactElement[ncompacts_];
232     states_[nstates_] = ncompacts_;
233   } else {
234     states_ = 0;
235     ncompacts_ = nstates_ * compactor.Size();
236     if ((narcs_ + nfinals) != ncompacts_) {
237       FSTERROR() << "CompactFstData: compactor incompatible with fst";
238       error_ = true;
239       return;
240     }
241     compacts_ = new CompactElement[ncompacts_];
242   }
243   size_t pos = 0, fpos = 0;
244   for (StateId s = 0; s < nstates_; ++s) {
245     fpos = pos;
246     if (compactor.Size() == -1)
247       states_[s] = pos;
248     if (fst.Final(s) != Weight::Zero())
249       compacts_[pos++] = compactor.Compact(s, A(kNoLabel, kNoLabel,
250                                                 fst.Final(s), kNoStateId));
251     for (ArcIterator< Fst<A> > aiter(fst, s);
252          !aiter.Done();
253          aiter.Next()) {
254       compacts_[pos++] = compactor.Compact(s, aiter.Value());
255     }
256     if ((compactor.Size() != -1) && ((pos - fpos) != compactor.Size())) {
257       FSTERROR() << "CompactFstData: compactor incompatible with fst";
258       error_ = true;
259       return;
260     }
261   }
262   if (pos != ncompacts_) {
263     FSTERROR() << "CompactFstData: compactor incompatible with fst";
264     error_ = true;
265     return;
266   }
267 }
268 
269 template <class E, class U>
270 template <class Iterator, class C>
CompactFstData(const Iterator & begin,const Iterator & end,const C & compactor)271 CompactFstData<E, U>::CompactFstData(const Iterator &begin,
272                                      const Iterator &end,
273                                      const C &compactor)
274     : states_region_(0),
275       compacts_region_(0),
276       states_(0),
277       compacts_(0),
278       nstates_(0),
279       ncompacts_(0),
280       narcs_(0),
281       start_(kNoStateId),
282       error_(false) {
283   typedef typename C::Arc Arc;
284   typedef typename Arc::Weight Weight;
285   if (compactor.Size() != -1) {
286     ncompacts_ = distance(begin, end);
287     if (compactor.Size() == 1) {
288       // For strings, allow implicit final weight.
289       // Empty input is the empty string.
290       if (ncompacts_ == 0) {
291         ++ncompacts_;
292       } else {
293         Arc arc = compactor.Expand(ncompacts_ - 1,
294                                       *(begin + (ncompacts_ - 1)));
295         if (arc.ilabel != kNoLabel)
296           ++ncompacts_;
297       }
298     }
299     if (ncompacts_ % compactor.Size()) {
300       FSTERROR() << "CompactFstData: size of input container incompatible"
301                  << " with compactor";
302       error_ = true;
303       return;
304     }
305     if (ncompacts_ == 0)
306       return;
307     start_ = 0;
308     nstates_ = ncompacts_ / compactor.Size();
309     compacts_ = new CompactElement[ncompacts_];
310     size_t i = 0;
311     Iterator it = begin;
312     for(; it != end; ++it, ++i){
313       compacts_[i] = *it;
314       if (compactor.Expand(i, *it).ilabel != kNoLabel)
315         ++narcs_;
316     }
317     if (i < ncompacts_)
318       compacts_[i] = compactor.Compact(i, Arc(kNoLabel, kNoLabel,
319                                               Weight::One(), kNoStateId));
320   } else {
321     if (distance(begin, end) == 0)
322       return;
323     // Count # of states, arcs and compacts.
324     Iterator it = begin;
325     for(size_t i = 0; it != end; ++it, ++i) {
326       Arc arc = compactor.Expand(i, *it);
327       if (arc.ilabel != kNoLabel) {
328         ++narcs_;
329         ++ncompacts_;
330       } else {
331         ++nstates_;
332         if (arc.weight != Weight::Zero())
333           ++ncompacts_;
334       }
335     }
336     start_ = 0;
337     compacts_ = new CompactElement[ncompacts_];
338     states_ = new Unsigned[nstates_ + 1];
339     states_[nstates_] = ncompacts_;
340     size_t i = 0, s = 0;
341     for(it = begin; it != end; ++it) {
342       Arc arc = compactor.Expand(i, *it);
343       if (arc.ilabel != kNoLabel) {
344         compacts_[i++] = *it;
345       } else {
346         states_[s++] = i;
347         if (arc.weight != Weight::Zero())
348           compacts_[i++] = *it;
349       }
350     }
351     if ((s != nstates_) || (i != ncompacts_)) {
352       FSTERROR() << "CompactFstData: ill-formed input container";
353       error_ = true;
354       return;
355     }
356   }
357 }
358 
359 template <class E, class U>
360 template <class C>
Read(istream & strm,const FstReadOptions & opts,const FstHeader & hdr,const C & compactor)361 CompactFstData<E, U> *CompactFstData<E, U>::Read(
362     istream &strm,
363     const FstReadOptions &opts,
364     const FstHeader &hdr,
365     const C &compactor) {
366   CompactFstData<E, U> *data = new CompactFstData<E, U>();
367   data->start_ = hdr.Start();
368   data->nstates_ = hdr.NumStates();
369   data->narcs_ = hdr.NumArcs();
370 
371   if (compactor.Size() == -1) {
372     if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
373       LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source;
374       delete data;
375       return 0;
376     }
377     size_t b = (data->nstates_ + 1) * sizeof(Unsigned);
378     data->states_region_ = MappedFile::Map(&strm, opts, b);
379     if (!strm || data->states_region_ == NULL) {
380       LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
381       delete data;
382       return 0;
383     }
384     data->states_ = static_cast<Unsigned *>(
385         data->states_region_->mutable_data());
386   } else {
387     data->states_ = 0;
388   }
389   data->ncompacts_ = compactor.Size() == -1
390       ? data->states_[data->nstates_]
391       : data->nstates_ * compactor.Size();
392   if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
393     LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source;
394     delete data;
395     return 0;
396   }
397   size_t b = data->ncompacts_ * sizeof(CompactElement);
398   data->compacts_region_ = MappedFile::Map(&strm, opts, b);
399   if (!strm || data->compacts_region_ == NULL) {
400     LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
401     delete data;
402     return 0;
403   }
404   data->compacts_ = static_cast<CompactElement *>(
405       data->compacts_region_->mutable_data());
406   return data;
407 }
408 
409 template<class E, class U>
Write(ostream & strm,const FstWriteOptions & opts)410 bool CompactFstData<E, U>::Write(ostream &strm,
411                                  const FstWriteOptions &opts) const {
412   if (states_) {
413     if (opts.align && !AlignOutput(strm)) {
414       LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
415       return false;
416     }
417     strm.write(reinterpret_cast<char *>(states_),
418                (nstates_ + 1) * sizeof(Unsigned));
419   }
420   if (opts.align && !AlignOutput(strm)) {
421     LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
422     return false;
423   }
424   strm.write(reinterpret_cast<char *>(compacts_),
425              ncompacts_ * sizeof(CompactElement));
426 
427   strm.flush();
428   if (!strm) {
429     LOG(ERROR) << "CompactFst::Write: Write failed: " << opts.source;
430     return false;
431   }
432   return true;
433 }
434 
435 template <class A, class C, class U> class CompactFst;
436 template <class F, class G> void Cast(const F &, G *);
437 
438 // Implementation class for CompactFst, which contains CompactFstData
439 // and Fst cache.
440 template <class A, class C, class U>
441 class CompactFstImpl : public CacheImpl<A> {
442  public:
443   using FstImpl<A>::SetType;
444   using FstImpl<A>::SetProperties;
445   using FstImpl<A>::Properties;
446   using FstImpl<A>::SetInputSymbols;
447   using FstImpl<A>::SetOutputSymbols;
448   using FstImpl<A>::WriteHeader;
449 
450   using CacheImpl<A>::PushArc;
451   using CacheImpl<A>::HasArcs;
452   using CacheImpl<A>::HasFinal;
453   using CacheImpl<A>::HasStart;
454   using CacheImpl<A>::SetArcs;
455   using CacheImpl<A>::SetFinal;
456   using CacheImpl<A>::SetStart;
457 
458   typedef A Arc;
459   typedef typename A::Weight Weight;
460   typedef typename A::StateId StateId;
461   typedef C Compactor;
462   typedef typename C::Element CompactElement;
463   typedef U Unsigned;
464 
CompactFstImpl()465   CompactFstImpl()
466       :  CacheImpl<A>(CompactFstOptions()),
467          compactor_(0),
468          own_compactor_(false),
469          data_(0) {
470     string type = "compact";
471     if (sizeof(U) != sizeof(uint32)) {
472       string size;
473       Int64ToStr(8 * sizeof(U), &size);
474       type += size;
475     }
476     type += "_";
477     type += C::Type();
478     SetType(type);
479     SetProperties(kNullProperties | kStaticProperties);
480   }
481 
CompactFstImpl(const Fst<Arc> & fst,const C & compactor,const CompactFstOptions & opts)482   CompactFstImpl(const Fst<Arc> &fst, const C &compactor,
483                  const CompactFstOptions &opts)
484       : CacheImpl<A>(opts),
485         compactor_(new C(compactor)),
486         own_compactor_(true),
487         data_(0) {
488     Init(fst);
489   }
490 
CompactFstImpl(const Fst<Arc> & fst,C * compactor,const CompactFstOptions & opts)491   CompactFstImpl(const Fst<Arc> &fst, C *compactor,
492                  const CompactFstOptions &opts)
493       : CacheImpl<A>(opts),
494         compactor_(compactor),
495         own_compactor_(false),
496         data_(0) {
497     Init(fst);
498   }
499 
500   template <class Iterator>
CompactFstImpl(const Iterator & b,const Iterator & e,const C & compactor,const CompactFstOptions & opts)501   CompactFstImpl(const Iterator &b, const Iterator &e, const C &compactor,
502                  const CompactFstOptions &opts)
503       : CacheImpl<A>(opts),
504         compactor_(new C(compactor)),
505         own_compactor_(true),
506         data_(0) {
507     Init(b, e);
508   }
509 
510   template <class Iterator>
CompactFstImpl(const Iterator & b,const Iterator & e,C * compactor,const CompactFstOptions & opts)511   CompactFstImpl(const Iterator &b, const Iterator &e, C *compactor,
512                  const CompactFstOptions &opts)
513       : CacheImpl<A>(opts),
514         compactor_(compactor),
515         own_compactor_(false),
516         data_(0) {
517     Init(b, e);
518   }
519 
CompactFstImpl(const CompactFstImpl<A,C,U> & impl)520   CompactFstImpl(const CompactFstImpl<A, C, U> &impl)
521       : CacheImpl<A>(impl),
522         compactor_(new C(*impl.compactor_)),
523         own_compactor_(true),
524         data_(impl.data_) {
525     if (data_)
526       data_->IncrRefCount();
527     SetType(impl.Type());
528     SetProperties(impl.Properties());
529     SetInputSymbols(impl.InputSymbols());
530     SetOutputSymbols(impl.OutputSymbols());
531   }
532 
~CompactFstImpl()533   ~CompactFstImpl(){
534     if (own_compactor_)
535       delete compactor_;
536     if (data_ && !data_->DecrRefCount())
537       delete data_;
538   }
539 
Start()540   StateId Start() {
541     if (!HasStart()) {
542       SetStart(data_->Start());
543     }
544     return CacheImpl<A>::Start();
545   }
546 
Final(StateId s)547   Weight Final(StateId s) {
548     if (HasFinal(s))
549       return CacheImpl<A>::Final(s);
550     Arc arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId);
551     if ((compactor_->Size() != -1) ||
552         (data_->States(s) != data_->States(s + 1)))
553       arc = ComputeArc(s,
554                        compactor_->Size() == -1
555                        ? data_->States(s)
556                        : s * compactor_->Size());
557     return arc.ilabel == kNoLabel ? arc.weight : Weight::Zero();
558   }
559 
NumStates()560   StateId NumStates() const {
561     if (Properties(kError)) return 0;
562     return data_->NumStates();
563   }
564 
NumArcs(StateId s)565   size_t NumArcs(StateId s) {
566     if (HasArcs(s))
567       return CacheImpl<A>::NumArcs(s);
568     Unsigned i, num_arcs;
569     if (compactor_->Size() == -1) {
570       i = data_->States(s);
571       num_arcs = data_->States(s + 1) - i;
572     } else {
573       i =  s * compactor_->Size();
574       num_arcs = compactor_->Size();
575     }
576     if (num_arcs > 0) {
577       const A &arc = ComputeArc(s, i, kArcILabelValue);
578       if (arc.ilabel == kNoStateId) {
579         --num_arcs;
580       }
581     }
582     return num_arcs;
583   }
584 
NumInputEpsilons(StateId s)585   size_t NumInputEpsilons(StateId s) {
586     if (!HasArcs(s) && !Properties(kILabelSorted))
587       Expand(s);
588     if (HasArcs(s))
589       return CacheImpl<A>::NumInputEpsilons(s);
590     return CountEpsilons(s, false);
591   }
592 
NumOutputEpsilons(StateId s)593   size_t NumOutputEpsilons(StateId s) {
594     if (!HasArcs(s) && !Properties(kOLabelSorted))
595       Expand(s);
596     if (HasArcs(s))
597       return CacheImpl<A>::NumOutputEpsilons(s);
598     return CountEpsilons(s, true);
599   }
600 
CountEpsilons(StateId s,bool output_epsilons)601   size_t CountEpsilons(StateId s, bool output_epsilons) {
602     size_t begin =  compactor_->Size() == -1 ?
603         data_->States(s) : s * compactor_->Size();
604     size_t end = compactor_->Size() == -1 ?
605         data_->States(s + 1) : (s + 1) * compactor_->Size();
606     size_t num_eps = 0;
607     for (size_t i = begin; i < end; ++i) {
608       const A &arc = ComputeArc(
609           s, i, output_epsilons ? kArcOLabelValue : kArcILabelValue);
610       const typename A::Label &label =
611           (output_epsilons ? arc.olabel : arc.ilabel);
612       if (label == kNoLabel)
613         continue;
614       else if (label > 0)
615         break;
616       ++num_eps;
617     }
618     return num_eps;
619   }
620 
Read(istream & strm,const FstReadOptions & opts)621   static CompactFstImpl<A, C, U> *Read(istream &strm,
622                                        const FstReadOptions &opts) {
623     CompactFstImpl<A, C, U> *impl = new CompactFstImpl<A, C, U>();
624     FstHeader hdr;
625     if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
626       delete impl;
627       return 0;
628     }
629 
630     // Ensures compatibility
631     if (hdr.Version() == kAlignedFileVersion)
632       hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED);
633 
634     impl->compactor_ = C::Read(strm);
635     if (!impl->compactor_) {
636       delete impl;
637       return 0;
638     }
639     impl->own_compactor_ = true;
640     impl->data_ = CompactFstData<CompactElement, U>::Read(strm, opts, hdr,
641                                                           *impl->compactor_);
642     if (!impl->data_) {
643       delete impl;
644       return 0;
645     }
646     return impl;
647   }
648 
Write(ostream & strm,const FstWriteOptions & opts)649   bool Write(ostream &strm, const FstWriteOptions &opts) const {
650     FstHeader hdr;
651     hdr.SetStart(data_->Start());
652     hdr.SetNumStates(data_->NumStates());
653     hdr.SetNumArcs(data_->NumArcs());
654 
655     // Ensures compatibility
656     int file_version = opts.align ? kAlignedFileVersion : kFileVersion;
657     WriteHeader(strm, opts, file_version, &hdr);
658     compactor_->Write(strm);
659     return data_->Write(strm, opts);
660   }
661 
662   // Provide information needed for generic state iterator
InitStateIterator(StateIteratorData<A> * data)663   void InitStateIterator(StateIteratorData<A> *data) const {
664     data->base = 0;
665     data->nstates = data_->NumStates();
666   }
667 
InitArcIterator(StateId s,ArcIteratorData<A> * data)668   void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
669     if (!HasArcs(s))
670       Expand(s);
671     CacheImpl<A>::InitArcIterator(s, data);
672   }
673 
674   Arc ComputeArc(StateId s, Unsigned i, uint32 f = kArcValueFlags) const {
675     return compactor_->Expand(s, data_->Compacts(i), f);
676   }
677 
Expand(StateId s)678   void Expand(StateId s) {
679     size_t begin =  compactor_->Size() == -1 ?
680         data_->States(s) : s * compactor_->Size();
681     size_t end = compactor_->Size() == -1 ?
682         data_->States(s + 1) : (s + 1) * compactor_->Size();
683     for (size_t i = begin; i < end; ++i) {
684       const Arc &arc = ComputeArc(s, i);
685       if (arc.ilabel == kNoLabel)
686         SetFinal(s, arc.weight);
687       else
688         PushArc(s, arc);
689     }
690     if (!HasFinal(s))
691       SetFinal(s, Weight::Zero());
692     SetArcs(s);
693   }
694 
695   template <class Iterator>
SetCompactElements(const Iterator & b,const Iterator & e)696   void SetCompactElements(const Iterator &b, const Iterator &e) {
697     if (data_ && !data_->DecrRefCount())
698       delete data_;
699     data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_);
700   }
701 
GetCompactor()702   C *GetCompactor() const { return compactor_; }
Data()703   CompactFstData<CompactElement, U> *Data() const { return data_; }
704 
705   // Properties always true of this Fst class
706   static const uint64 kStaticProperties = kExpanded;
707 
708  protected:
709   template <class B, class D>
CompactFstImpl(const CompactFstImpl<B,D,U> & impl)710   explicit CompactFstImpl(const CompactFstImpl<B, D, U> &impl)
711       : CacheImpl<A>(CacheOptions(impl.GetCacheGc(), impl.GetCacheLimit())),
712         compactor_(new C(*impl.GetCompactor())),
713         own_compactor_(true),
714         data_(impl.Data()) {
715     if (data_)
716       data_->IncrRefCount();
717     SetType(impl.Type());
718     SetProperties(impl.Properties());
719     SetInputSymbols(impl.InputSymbols());
720     SetOutputSymbols(impl.OutputSymbols());
721   }
722 
723  private:
724   friend class CompactFst<A, C, U>;  // allow access during write.
725 
Init(const Fst<Arc> & fst)726   void Init(const Fst<Arc> &fst) {
727     string type = "compact";
728     if (sizeof(U) != sizeof(uint32)) {
729       string size;
730       Int64ToStr(8 * sizeof(U), &size);
731       type += size;
732     }
733     type += "_";
734     type += compactor_->Type();
735     SetType(type);
736     SetInputSymbols(fst.InputSymbols());
737     SetOutputSymbols(fst.OutputSymbols());
738     data_ = new CompactFstData<CompactElement, U>(fst, *compactor_);
739     if (data_->Error())
740       SetProperties(kError, kError);
741     uint64 copy_properties = fst.Properties(kCopyProperties, true);
742     if ((copy_properties & kError) || !compactor_->Compatible(fst)) {
743       FSTERROR() << "CompactFstImpl: input fst incompatible with compactor";
744       SetProperties(kError, kError);
745       return;
746     }
747     SetProperties(copy_properties | kStaticProperties);
748   }
749 
750   template <class Iterator>
Init(const Iterator & b,const Iterator & e)751   void Init(const Iterator &b, const Iterator &e) {
752     string type = "compact";
753     if (sizeof(U) != sizeof(uint32)) {
754       string size;
755       Int64ToStr(8 * sizeof(U), &size);
756       type += size;
757     }
758     type += "_";
759     type += compactor_->Type();
760     SetType(type);
761     SetProperties(kStaticProperties | compactor_->Properties());
762     data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_);
763     if (data_->Error())
764       SetProperties(kError, kError);
765   }
766 
767   // Current unaligned file format version
768   static const int kFileVersion = 2;
769   // Current aligned file format version
770   static const int kAlignedFileVersion = 1;
771   // Minimum file format version supported
772   static const int kMinFileVersion = 1;
773 
774   C *compactor_;
775   bool own_compactor_;
776   CompactFstData<CompactElement, U> *data_;
777 };
778 
779 template <class A, class C, class U>
780 const uint64 CompactFstImpl<A, C, U>::kStaticProperties;
781 template <class A, class C, class U>
782 const int CompactFstImpl<A, C, U>::kFileVersion;
783 template <class A, class C, class U>
784 const int CompactFstImpl<A, C, U>::kAlignedFileVersion;
785 template <class A, class C, class U>
786 const int CompactFstImpl<A, C, U>::kMinFileVersion;
787 
788 
789 // CompactFst.  This class attaches interface to implementation and
790 // handles reference counting, delegating most methods to
791 // ImplToExpandedFst. The unsigned type U is used to represent indices
792 // into the compact arc array (uint32 by default, declared in
793 // fst-decl.h).
794 template <class A, class C, class U>
795 class CompactFst : public ImplToExpandedFst< CompactFstImpl<A, C, U> > {
796  public:
797   friend class StateIterator< CompactFst<A, C, U> >;
798   friend class ArcIterator< CompactFst<A, C, U> >;
799   template <class F, class G> void friend Cast(const F &, G *);
800 
801   typedef A Arc;
802   typedef typename A::StateId StateId;
803   typedef CompactFstImpl<A, C, U> Impl;
804   typedef CacheState<A> State;
805   typedef U Unsigned;
806 
CompactFst()807   CompactFst() : ImplToExpandedFst<Impl>(new Impl()) {}
808 
809   explicit CompactFst(const Fst<A> &fst, const C &compactor = C(),
810                       const CompactFstOptions &opts = CompactFstOptions())
811       : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
812 
813   CompactFst(const Fst<A> &fst, C *compactor,
814              const CompactFstOptions &opts = CompactFstOptions())
815       : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
816 
817   // The following 2 constructors take as input two iterators delimiting
818   // a set of (already) compacted transitions, starting with the
819   // transitions out of the initial state. The format of the input
820   // differs for fixed out-degree and variable out-degree compactors.
821   //
822   // - For fixed out-degree compactors, the final weight (encoded as a
823   // compacted transition) needs to be given only for final
824   // states. All strings (compactor of size 1) will be assume to be
825   // terminated by a final state even when the final state is not
826   // implicitely given.
827   //
828   // - For variable out-degree compactors, the final weight (encoded
829   // as a compacted transition) needs to be given for all states and
830   // must appeared first in the list (for state s, final weight of s,
831   // followed by outgoing transitons in s).
832   //
833   // These 2 constructors allows the direct construction of a CompactFst
834   // without first creating a more memory hungry 'regular' FST. This
835   // is useful when memory usage is severely constrained.
836   template <class Iterator>
837   explicit CompactFst(const Iterator &begin, const Iterator &end,
838                       const C &compactor = C(),
839                       const CompactFstOptions &opts = CompactFstOptions())
840       : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
841 
842   template <class Iterator>
843   CompactFst(const Iterator &begin, const Iterator &end,
844              C *compactor, const CompactFstOptions &opts = CompactFstOptions())
845       : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
846 
847   // See Fst<>::Copy() for doc.
848   CompactFst(const CompactFst<A, C, U> &fst, bool safe = false)
849       : ImplToExpandedFst<Impl>(fst, safe) {}
850 
851   // Get a copy of this CompactFst. See Fst<>::Copy() for further doc.
852   virtual CompactFst<A, C, U> *Copy(bool safe = false) const {
853     return new CompactFst<A, C, U>(*this, safe);
854   }
855 
856   // Read a CompactFst from an input stream; return NULL on error
Read(istream & strm,const FstReadOptions & opts)857   static CompactFst<A, C, U> *Read(istream &strm, const FstReadOptions &opts) {
858     Impl* impl = Impl::Read(strm, opts);
859     return impl ? new CompactFst<A, C, U>(impl) : 0;
860   }
861 
862   // Read a CompactFst from a file; return NULL on error
863   // Empty filename reads from standard input
Read(const string & filename)864   static CompactFst<A, C, U> *Read(const string &filename) {
865     Impl* impl = ImplToExpandedFst<Impl>::Read(filename);
866     return impl ? new CompactFst<A, C, U>(impl) : 0;
867   }
868 
Write(ostream & strm,const FstWriteOptions & opts)869   virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
870     return GetImpl()->Write(strm, opts);
871   }
872 
Write(const string & filename)873   virtual bool Write(const string &filename) const {
874     return Fst<A>::WriteFile(filename);
875   }
876 
877   template <class F>
878   static bool WriteFst(const F &fst, const C &compactor, ostream &strm,
879                        const FstWriteOptions &opts);
880 
InitStateIterator(StateIteratorData<A> * data)881   virtual void InitStateIterator(StateIteratorData<A> *data) const {
882     GetImpl()->InitStateIterator(data);
883   }
884 
InitArcIterator(StateId s,ArcIteratorData<A> * data)885   virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
886     GetImpl()->InitArcIterator(s, data);
887   }
888 
InitMatcher(MatchType match_type)889   virtual MatcherBase<A> *InitMatcher(MatchType match_type) const {
890     return new SortedMatcher<CompactFst<A, C, U> >(*this, match_type);
891   }
892 
893   template <class Iterator>
SetCompactElements(const Iterator & b,const Iterator & e)894   void SetCompactElements(const Iterator &b, const Iterator &e) {
895     GetImpl()->SetCompactElements(b, e);
896   }
897 
898  private:
CompactFst(Impl * impl)899   CompactFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {}
900 
901   // Makes visible to friends.
GetImpl()902   Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); }
903 
904   void SetImpl(Impl *impl, bool own_impl = false) {
905     ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl);
906   }
907 
908   // Use overloading to extract the type of the argument.
GetImplIfCompactFst(const CompactFst<A,C,U> & compact_fst)909   static Impl* GetImplIfCompactFst(const CompactFst<A, C, U> &compact_fst) {
910     return compact_fst.GetImpl();
911   }
912 
913   // This does not give privileged treatment to subclasses of CompactFst.
914   template<typename NonCompactFst>
GetImplIfCompactFst(const NonCompactFst & fst)915   static Impl* GetImplIfCompactFst(const NonCompactFst& fst) {
916     return NULL;
917   }
918 
919   void operator=(const CompactFst<A, C, U> &fst);  // disallow
920 };
921 
922 // Writes Fst in Compact format, potentially with a pass over the machine
923 // before writing to compute the number of states and arcs.
924 //
925 template <class A, class C, class U>
926 template <class F>
WriteFst(const F & fst,const C & compactor,ostream & strm,const FstWriteOptions & opts)927 bool CompactFst<A, C, U>::WriteFst(const F &fst,
928                                    const C &compactor,
929                                    ostream &strm,
930                                    const FstWriteOptions &opts) {
931   typedef U Unsigned;
932   typedef typename C::Element CompactElement;
933   typedef typename A::Weight Weight;
934   int file_version = opts.align ?
935       CompactFstImpl<A, C, U>::kAlignedFileVersion :
936       CompactFstImpl<A, C, U>::kFileVersion;
937   size_t num_arcs = -1, num_states = -1, num_compacts = -1;
938   C first_pass_compactor = compactor;
939   if (Impl* impl = GetImplIfCompactFst(fst)) {
940     num_arcs = impl->Data()->NumArcs();
941     num_states = impl->Data()->NumStates();
942     num_compacts = impl->Data()->NumCompacts();
943     first_pass_compactor = *impl->GetCompactor();
944   } else {
945     // A first pass is needed to compute the state of the compactor, which
946     // is saved ahead of the rest of the data structures.  This unfortunately
947     // means forcing a complete double compaction when writing in this format.
948     // TODO(allauzen): eliminate mutable state from compactors.
949     num_arcs = 0;
950     num_states = 0;
951     for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
952       const StateId s = siter.Value();
953       ++num_states;
954       if (fst.Final(s) != Weight::Zero()) {
955         first_pass_compactor.Compact(
956             s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
957       }
958       for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
959         ++num_arcs;
960         first_pass_compactor.Compact(s, aiter.Value());
961       }
962     }
963   }
964   FstHeader hdr;
965   hdr.SetStart(fst.Start());
966   hdr.SetNumStates(num_states);
967   hdr.SetNumArcs(num_arcs);
968   string type = "compact";
969   if (sizeof(U) != sizeof(uint32)) {
970     string size;
971     Int64ToStr(8 * sizeof(U), &size);
972     type += size;
973   }
974   type += "_";
975   type += C::Type();
976   uint64 copy_properties = fst.Properties(kCopyProperties, true);
977   if ((copy_properties & kError) || !compactor.Compatible(fst)) {
978     LOG(ERROR) << "fst incompatible with compactor";
979     return false;
980   }
981   uint64 properties = copy_properties |
982       CompactFstImpl<A, C, U>::kStaticProperties;
983   FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, properties,
984                              &hdr);
985   first_pass_compactor.Write(strm);
986   if (first_pass_compactor.Size() == -1)  {
987     if (opts.align && !AlignOutput(strm)) {
988       LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
989       return false;
990     }
991     Unsigned compacts = 0;
992     for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
993       const StateId s = siter.Value();
994       strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts));
995       if (fst.Final(s) != Weight::Zero()) {
996         ++compacts;
997       }
998       compacts += fst.NumArcs(s);
999     }
1000     strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts));
1001   }
1002   if (opts.align && !AlignOutput(strm)) {
1003     LOG(ERROR) << "Could not align file during write after writing states";
1004   }
1005   C second_pass_compactor = compactor;
1006   CompactElement element;
1007   for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
1008     const StateId s = siter.Value();
1009     if (fst.Final(s) != Weight::Zero()) {
1010       element = second_pass_compactor.Compact(
1011           s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
1012       strm.write(reinterpret_cast<const char *>(&element), sizeof(element));
1013     }
1014     for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
1015       element = second_pass_compactor.Compact(s, aiter.Value());
1016       strm.write(reinterpret_cast<const char *>(&element), sizeof(element));
1017     }
1018   }
1019   strm.flush();
1020   if (!strm) {
1021     LOG(ERROR) << "CompactFst write failed: " << opts.source;
1022     return false;
1023   }
1024   return true;
1025 }
1026 
1027 
1028 // Specialization for CompactFst; see generic version in fst.h
1029 // for sample usage (but use the CompactFst type!). This version
1030 // should inline.
1031 template <class A, class C, class U>
1032 class StateIterator< CompactFst<A, C, U> > {
1033  public:
1034   typedef typename A::StateId StateId;
1035 
StateIterator(const CompactFst<A,C,U> & fst)1036   explicit StateIterator(const CompactFst<A, C, U> &fst)
1037       : nstates_(fst.GetImpl()->NumStates()), s_(0) {}
1038 
Done()1039   bool Done() const { return s_ >= nstates_; }
1040 
Value()1041   StateId Value() const { return s_; }
1042 
Next()1043   void Next() { ++s_; }
1044 
Reset()1045   void Reset() { s_ = 0; }
1046 
1047  private:
1048   StateId nstates_;
1049   StateId s_;
1050 
1051   DISALLOW_COPY_AND_ASSIGN(StateIterator);
1052 };
1053 
1054 // Specialization for CompactFst.
1055 // Never caches, always iterates over the underlying compact elements.
1056 template <class A, class C, class U>
1057 class ArcIterator< CompactFst<A, C, U> > {
1058  public:
1059   typedef typename A::StateId StateId;
1060   typedef typename C::Element CompactElement;
1061 
ArcIterator(const CompactFst<A,C,U> & fst,StateId s)1062   ArcIterator(const CompactFst<A, C, U> &fst, StateId s)
1063       : compactor_(fst.GetImpl()->GetCompactor()), state_(s), compacts_(0),
1064         pos_(0), flags_(kArcValueFlags) {
1065 
1066     const CompactFstData<CompactElement, U> *data = fst.GetImpl()->Data();
1067     size_t offset;
1068     if (compactor_->Size() == -1) {  // Variable out-degree compactor
1069       offset = data->States(s);
1070       num_arcs_ = data->States(s + 1) - offset;
1071     } else {  // Fixed out-degree compactor
1072       offset =  s * compactor_->Size();
1073       num_arcs_ = compactor_->Size();
1074     }
1075     if (num_arcs_ > 0) {
1076       compacts_ = &(data->Compacts(offset));
1077       arc_ = compactor_->Expand(s, *compacts_, kArcILabelValue);
1078       if (arc_.ilabel == kNoStateId) {
1079         ++compacts_;
1080         --num_arcs_;
1081       }
1082     }
1083   }
1084 
~ArcIterator()1085   ~ArcIterator() {}
1086 
Done()1087   bool Done() const { return pos_ >= num_arcs_; }
1088 
Value()1089   const A& Value() const {
1090     arc_ = compactor_->Expand(state_, compacts_[pos_], flags_);
1091     return arc_;
1092   }
1093 
Next()1094   void Next() { ++pos_; }
1095 
Position()1096   size_t Position() const { return pos_; }
1097 
Reset()1098   void Reset() { pos_ = 0;  }
1099 
Seek(size_t pos)1100   void Seek(size_t pos) { pos_ = pos; }
1101 
Flags()1102   uint32 Flags() const { return flags_; }
1103 
SetFlags(uint32 f,uint32 m)1104   void SetFlags(uint32 f, uint32 m) {
1105     flags_ &= ~m;
1106     flags_ |= (f & kArcValueFlags);
1107   }
1108 
1109  private:
1110   C *compactor_;
1111   StateId state_;
1112   const CompactElement *compacts_;
1113   size_t pos_;
1114   size_t num_arcs_;
1115   mutable A arc_;
1116   uint32 flags_;
1117 
1118   DISALLOW_COPY_AND_ASSIGN(ArcIterator);
1119 };
1120 
1121 // // Specialization for CompactFst.
1122 // // This is an optionally caching arc iterator.
1123 // // TODO(allauzen): implements the kArcValueFlags, the current
1124 // // implementation only implements the kArcNoCache flag.
1125 // template <class A, class C, class U>
1126 // class ArcIterator< CompactFst<A, C, U> > {
1127 //  public:
1128 //   typedef typename A::StateId StateId;
1129 
1130 //   ArcIterator(const CompactFst<A, C, U> &fst, StateId s)
1131 //       : fst_(fst), state_(s), pos_(0), num_arcs_(0), offset_(0),
1132 //         flags_(kArcValueFlags) {
1133 //     cache_data_.ref_count = 0;
1134 
1135 //     if (fst_.GetImpl()->HasArcs(state_)) {
1136 //       fst_.GetImpl()->InitArcIterator(s, &cache_data_);
1137 //       num_arcs_ = cache_data_.narcs;
1138 //       return;
1139 //     }
1140 
1141 //     const C *compactor = fst_.GetImpl()->GetCompactor();
1142 //     const CompactFstData<A, C, U> *data = fst_.GetImpl()->Data();
1143 //     if (compactor->Size() == -1) {  // Variable out-degree compactor
1144 //       offset_ = data->States(s);
1145 //       num_arcs_ = data->States(s + 1) - offset_;
1146 //     } else {  // Fixed out-degree compactor
1147 //       offset_ =  s * compactor->Size();
1148 //       num_arcs_ = compactor->Size();
1149 //     }
1150 //     if (num_arcs_ > 0) {
1151 //       const A &arc = fst_.GetImpl()->ComputeArc(s, offset_);
1152 //       if (arc.ilabel == kNoStateId) {
1153 //         ++offset_;
1154 //         --num_arcs_;
1155 //       }
1156 //     }
1157 //   }
1158 
1159 
1160 //   ~ArcIterator() {
1161 //     if (cache_data_.ref_count)
1162 //       --(*cache_data_.ref_count);
1163 //   }
1164 
1165 //   bool Done() const { return pos_ >= num_arcs_; }
1166 
1167 //   const A& Value() const {
1168 //     if (cache_data_.ref_count == 0) {
1169 //       if (flags_ & kArcNoCache) {
1170 //         arc_ = fst_.GetImpl()->ComputeArc(state_, pos_ + offset_);
1171 //         return arc_;
1172 //       } else {
1173 //         fst_.GetImpl()->InitArcIterator(state_, &cache_data_);
1174 //       }
1175 //     }
1176 //     return cache_data_.arcs[pos_];
1177 //   }
1178 
1179 //   void Next() { ++pos_; }
1180 
1181 //   size_t Position() const { return pos_; }
1182 
1183 //   void Reset() { pos_ = 0;  }
1184 
1185 //   void Seek(size_t pos) { pos_ = pos; }
1186 
1187 //   uint32 Flags() const { return flags_; }
1188 
1189 //   void SetFlags(uint32 f, uint32 m) {
1190 //     flags_ &= ~m;
1191 //     flags_ |= f;
1192 
1193 //     if (!(flags_ & kArcNoCache) && cache_data_.ref_count == 0)
1194 //       fst_.GetImpl()->InitArcIterator(state_, &cache_data_);
1195 //   }
1196 
1197 //  private:
1198 //   mutable const CompactFst<A, C, U> &fst_;
1199 //   StateId state_;
1200 //   size_t pos_;
1201 //   size_t num_arcs_;
1202 //   size_t offset_;
1203 //   uint32 flags_;
1204 //   mutable A arc_;
1205 //   mutable ArcIteratorData<A> cache_data_;
1206 
1207 //   DISALLOW_COPY_AND_ASSIGN(ArcIterator);
1208 // };
1209 
1210 
1211 //
1212 // Utility Compactors
1213 //
1214 
1215 // Compactor for unweighted string FSTs
1216 template <class A>
1217 class StringCompactor {
1218  public:
1219   typedef A Arc;
1220   typedef typename A::Label Element;
1221   typedef typename A::Label Label;
1222   typedef typename A::StateId StateId;
1223   typedef typename A::Weight Weight;
1224 
Compact(StateId s,const A & arc)1225   Element Compact(StateId s, const A &arc) const { return arc.ilabel; }
1226 
1227   Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1228     return Arc(p, p, Weight::One(), p != kNoLabel ? s + 1 : kNoStateId);
1229   }
1230 
Size()1231   ssize_t Size() const { return 1; }
1232 
Properties()1233   uint64 Properties() const {
1234     return kString | kAcceptor | kUnweighted;
1235   }
1236 
Compatible(const Fst<A> & fst)1237   bool Compatible(const Fst<A> &fst) const {
1238     uint64 props = Properties();
1239     return fst.Properties(props, true) == props;
1240   }
1241 
Type()1242   static const string &Type() {
1243     static const string type = "string";
1244     return type;
1245   }
1246 
Write(ostream & strm)1247   bool Write(ostream &strm) const { return true; }
1248 
Read(istream & strm)1249   static StringCompactor *Read(istream &strm) {
1250     return new StringCompactor;
1251   }
1252 };
1253 
1254 
1255 // Compactor for weighted string FSTs
1256 template <class A>
1257 class WeightedStringCompactor {
1258  public:
1259   typedef A Arc;
1260   typedef typename A::Label Label;
1261   typedef typename A::StateId StateId;
1262   typedef typename A::Weight Weight;
1263   typedef pair<Label, Weight> Element;
1264 
Compact(StateId s,const A & arc)1265   Element Compact(StateId s, const A &arc) const {
1266     return make_pair(arc.ilabel, arc.weight);
1267   }
1268 
1269   Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1270     return Arc(p.first, p.first, p.second,
1271                p.first != kNoLabel ? s + 1 : kNoStateId);
1272   }
1273 
Size()1274   ssize_t Size() const { return 1;}
1275 
Properties()1276   uint64 Properties() const {
1277     return kString | kAcceptor;
1278   }
1279 
Compatible(const Fst<A> & fst)1280   bool Compatible(const Fst<A> &fst) const {
1281     uint64 props = Properties();
1282     return fst.Properties(props, true) == props;
1283   }
1284 
Type()1285   static const string &Type() {
1286     static const string type = "weighted_string";
1287     return type;
1288   }
1289 
Write(ostream & strm)1290   bool Write(ostream &strm) const { return true; }
1291 
Read(istream & strm)1292   static WeightedStringCompactor *Read(istream &strm) {
1293     return new WeightedStringCompactor;
1294   }
1295 };
1296 
1297 
1298 // Compactor for unweighted acceptor FSTs
1299 template <class A>
1300 class UnweightedAcceptorCompactor {
1301  public:
1302   typedef A Arc;
1303   typedef typename A::Label Label;
1304   typedef typename A::StateId StateId;
1305   typedef typename A::Weight Weight;
1306   typedef pair<Label, StateId> Element;
1307 
Compact(StateId s,const A & arc)1308   Element Compact(StateId s, const A &arc) const {
1309     return make_pair(arc.ilabel, arc.nextstate);
1310   }
1311 
1312   Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1313     return Arc(p.first, p.first, Weight::One(), p.second);
1314   }
1315 
Size()1316   ssize_t Size() const { return -1;}
1317 
Properties()1318   uint64 Properties() const {
1319     return kAcceptor | kUnweighted;
1320   }
1321 
Compatible(const Fst<A> & fst)1322   bool Compatible(const Fst<A> &fst) const {
1323     uint64 props = Properties();
1324     return fst.Properties(props, true) == props;
1325   }
1326 
Type()1327   static const string &Type() {
1328     static const string type = "unweighted_acceptor";
1329     return type;
1330   }
1331 
Write(ostream & strm)1332   bool Write(ostream &strm) const { return true; }
1333 
Read(istream & istrm)1334   static UnweightedAcceptorCompactor *Read(istream &istrm) {
1335     return new UnweightedAcceptorCompactor;
1336   }
1337 };
1338 
1339 
1340 // Compactor for weighted acceptor FSTs
1341 template <class A>
1342 class AcceptorCompactor {
1343  public:
1344   typedef A Arc;
1345   typedef typename A::Label Label;
1346   typedef typename A::StateId StateId;
1347   typedef typename A::Weight Weight;
1348   typedef pair< pair<Label, Weight>, StateId > Element;
1349 
Compact(StateId s,const A & arc)1350   Element Compact(StateId s, const A &arc) const {
1351     return make_pair(make_pair(arc.ilabel, arc.weight), arc.nextstate);
1352   }
1353 
1354   Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1355     return Arc(p.first.first, p.first.first, p.first.second, p.second);
1356   }
1357 
Size()1358   ssize_t Size() const { return -1;}
1359 
Properties()1360   uint64 Properties() const {
1361     return kAcceptor;
1362   }
1363 
Compatible(const Fst<A> & fst)1364   bool Compatible(const Fst<A> &fst) const {
1365     uint64 props = Properties();
1366     return fst.Properties(props, true) == props;
1367   }
1368 
Type()1369   static const string &Type() {
1370     static const string type = "acceptor";
1371     return type;
1372   }
1373 
Write(ostream & strm)1374   bool Write(ostream &strm) const { return true; }
1375 
Read(istream & strm)1376   static AcceptorCompactor *Read(istream &strm) {
1377     return new AcceptorCompactor;
1378   }
1379 };
1380 
1381 
1382 // Compactor for unweighted FSTs
1383 template <class A>
1384 class UnweightedCompactor {
1385  public:
1386   typedef A Arc;
1387   typedef typename A::Label Label;
1388   typedef typename A::StateId StateId;
1389   typedef typename A::Weight Weight;
1390   typedef pair< pair<Label, Label>, StateId > Element;
1391 
Compact(StateId s,const A & arc)1392   Element Compact(StateId s, const A &arc) const {
1393     return make_pair(make_pair(arc.ilabel, arc.olabel), arc.nextstate);
1394   }
1395 
1396   Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const {
1397     return Arc(p.first.first, p.first.second, Weight::One(), p.second);
1398   }
1399 
Size()1400   ssize_t Size() const { return -1; }
1401 
Properties()1402   uint64 Properties() const {
1403     return kUnweighted;
1404   }
1405 
Compatible(const Fst<A> & fst)1406   bool Compatible(const Fst<A> &fst) const {
1407     uint64 props = Properties();
1408     return fst.Properties(props, true) == props;
1409   }
1410 
Type()1411   static const string &Type() {
1412     static const string type = "unweighted";
1413     return type;
1414   }
1415 
Write(ostream & strm)1416   bool Write(ostream &strm) const { return true; }
1417 
Read(istream & strm)1418   static UnweightedCompactor *Read(istream &strm) {
1419     return new UnweightedCompactor;
1420   }
1421 };
1422 
1423 
1424 // Uselful aliases when using StdArc
1425 typedef CompactFst< StdArc, StringCompactor<StdArc> >
1426 StdCompactStringFst;
1427 typedef CompactFst< StdArc, WeightedStringCompactor<StdArc> >
1428 StdCompactWeightedStringFst;
1429 typedef CompactFst<StdArc, AcceptorCompactor<StdArc> >
1430 StdCompactAcceptorFst;
1431 typedef CompactFst<StdArc, UnweightedCompactor<StdArc> >
1432 StdCompactUnweightedFst;
1433 typedef CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> >
1434 StdCompactUnweightedAcceptorFst;
1435 
1436 }  // namespace fst
1437 
1438 #endif // FST_LIB_COMPACT_FST_H__
1439