• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // state-table.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Classes for representing the mapping between state tuples and state Ids.
20 
21 #ifndef FST_LIB_STATE_TABLE_H__
22 #define FST_LIB_STATE_TABLE_H__
23 
24 #include <deque>
25 #include <vector>
26 using std::vector;
27 
28 #include <fst/bi-table.h>
29 #include <fst/expanded-fst.h>
30 
31 
32 namespace fst {
33 
34 // STATE TABLES - these determine the bijective mapping between state
35 // tuples (e.g. in composition triples of two FST states and a
36 // composition filter state) and their corresponding state IDs.
37 // They are classes, templated on state tuples, of the form:
38 //
39 // template <class T>
40 // class StateTable {
41 //  public:
42 //   typedef typename T StateTuple;
43 //
44 //   // Required constructors.
45 //   StateTable();
46 //
47 //   // Lookup state ID by tuple. If it doesn't exist, then add it.
48 //   StateId FindState(const StateTuple &);
49 //   // Lookup state tuple by state ID.
50 //   const StateTuple<StateId> &Tuple(StateId) const;
51 //   // # of stored tuples.
52 //   StateId Size() const;
53 // };
54 //
55 // A state tuple has the form:
56 //
57 // template <class S>
58 // struct StateTuple {
59 //   typedef typename S StateId;
60 //
61 //   // Required constructor.
62 //   StateTuple();
63 // };
64 
65 
66 // An implementation using a hash map for the tuple to state ID mapping.
67 // The state tuple T must have == defined and the default constructor
68 // must produce a tuple that will never be seen. H is the hash function.
69 template <class T, class H>
70 class HashStateTable : public HashBiTable<typename T::StateId, T, H> {
71  public:
72   typedef T StateTuple;
73   typedef typename StateTuple::StateId StateId;
74   using HashBiTable<StateId, T, H>::FindId;
75   using HashBiTable<StateId, T, H>::FindEntry;
76   using HashBiTable<StateId, T, H>::Size;
77 
HashStateTable()78   HashStateTable() : HashBiTable<StateId, T, H>() {}
FindState(const StateTuple & tuple)79   StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
Tuple(StateId s)80   const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
81 };
82 
83 
84 // An implementation using a hash set for the tuple to state ID
85 // mapping. The state tuple T must have == defined and the default
86 // constructor must produce a tuple that will never be seen. H is the
87 // hash function.
88 template <class T, class H>
89 class CompactHashStateTable
90     : public CompactHashBiTable<typename T::StateId, T, H> {
91  public:
92   typedef T StateTuple;
93   typedef typename StateTuple::StateId StateId;
94   using CompactHashBiTable<StateId, T, H>::FindId;
95   using CompactHashBiTable<StateId, T, H>::FindEntry;
96   using CompactHashBiTable<StateId, T, H>::Size;
97 
CompactHashStateTable()98   CompactHashStateTable() : CompactHashBiTable<StateId, T, H>() {}
99 
100   // Reserves space for table_size elements.
CompactHashStateTable(size_t table_size)101   explicit CompactHashStateTable(size_t table_size)
102       : CompactHashBiTable<StateId, T, H>(table_size) {}
103 
FindState(const StateTuple & tuple)104   StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
Tuple(StateId s)105   const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
106 };
107 
108 // An implementation using a vector for the tuple to state mapping.
109 // It is passed a function object FP that should fingerprint tuples
110 // uniquely to an integer that can used as a vector index. Normally,
111 // VectorStateTable constructs the FP object.  The user can instead
112 // pass in this object; in that case, VectorStateTable takes its
113 // ownership.
114 template <class T, class FP>
115 class VectorStateTable
116     : public VectorBiTable<typename T::StateId, T, FP> {
117  public:
118   typedef T StateTuple;
119   typedef typename StateTuple::StateId StateId;
120   using VectorBiTable<StateId, T, FP>::FindId;
121   using VectorBiTable<StateId, T, FP>::FindEntry;
122   using VectorBiTable<StateId, T, FP>::Size;
123   using VectorBiTable<StateId, T, FP>::Fingerprint;
124 
125   explicit VectorStateTable(FP *fp = 0) : VectorBiTable<StateId, T, FP>(fp) {}
FindState(const StateTuple & tuple)126   StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
Tuple(StateId s)127   const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
128 };
129 
130 
131 // An implementation using a vector and a compact hash table. The
132 // selecting functor S returns true for tuples to be hashed in the
133 // vector.  The fingerprinting functor FP returns a unique fingerprint
134 // for each tuple to be hashed in the vector (these need to be
135 // suitable for indexing in a vector).  The hash functor H is used when
136 // hashing tuple into the compact hash table.
137 template <class T, class S, class FP, class H>
138 class VectorHashStateTable
139     : public VectorHashBiTable<typename T::StateId, T, S, FP, H> {
140  public:
141   typedef T StateTuple;
142   typedef typename StateTuple::StateId StateId;
143   using VectorHashBiTable<StateId, T, S, FP, H>::FindId;
144   using VectorHashBiTable<StateId, T, S, FP, H>::FindEntry;
145   using VectorHashBiTable<StateId, T, S, FP, H>::Size;
146   using VectorHashBiTable<StateId, T, S, FP, H>::Selector;
147   using VectorHashBiTable<StateId, T, S, FP, H>::Fingerprint;
148   using VectorHashBiTable<StateId, T, S, FP, H>::Hash;
149 
150   VectorHashStateTable(S *s, FP *fp, H *h,
151                        size_t vector_size = 0,
152                        size_t tuple_size = 0)
153       : VectorHashBiTable<StateId, T, S, FP, H>(
154           s, fp, h, vector_size, tuple_size) {}
155 
FindState(const StateTuple & tuple)156   StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
Tuple(StateId s)157   const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
158 };
159 
160 
161 // An implementation using a hash map for the tuple to state ID
162 // mapping. This version permits erasing of states.  The state tuple T
163 // must have == defined and its default constructor must produce a
164 // tuple that will never be seen. F is the hash function.
165 template <class T, class F>
166 class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, F> {
167  public:
168   typedef T StateTuple;
169   typedef typename StateTuple::StateId StateId;
170   using ErasableBiTable<StateId, T, F>::FindId;
171   using ErasableBiTable<StateId, T, F>::FindEntry;
172   using ErasableBiTable<StateId, T, F>::Size;
173   using ErasableBiTable<StateId, T, F>::Erase;
174 
ErasableStateTable()175   ErasableStateTable() : ErasableBiTable<StateId, T, F>() {}
FindState(const StateTuple & tuple)176   StateId FindState(const StateTuple &tuple) { return FindId(tuple); }
Tuple(StateId s)177   const StateTuple &Tuple(StateId s) const { return FindEntry(s); }
178 };
179 
180 //
181 // COMPOSITION STATE TUPLES AND TABLES
182 //
183 // The composition state table has the form:
184 //
185 // template <class A, class F>
186 // class ComposeStateTable {
187 //  public:
188 //   typedef A Arc;
189 //   typedef F FilterState;
190 //   typedef typename A::StateId StateId;
191 //   typedef ComposeStateTuple<StateId> StateTuple;
192 //
193 //   // Required constructors. Copy constructor does not copy state.
194 //   ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2);
195 //   ComposeStateTable(const ComposeStateTable<A, F> &table);
196 //   // Lookup state ID by tuple. If it doesn't exist, then add it.
197 //   StateId FindState(const StateTuple &);
198 //   // Lookup state tuple by state ID.
199 //   const StateTuple<StateId> &Tuple(StateId) const;
200 //   // # of stored tuples.
201 //   StateId Size() const;
202 //   // Return true if error encountered
203 //   bool Error() const;
204 // };
205 
206 // Represents the composition state.
207 template <typename S, typename F>
208 struct ComposeStateTuple {
209   typedef S StateId;
210   typedef F FilterState;
211 
ComposeStateTupleComposeStateTuple212   ComposeStateTuple()
213       : state_id1(kNoStateId), state_id2(kNoStateId),
214         filter_state(FilterState::NoState()) {}
215 
ComposeStateTupleComposeStateTuple216   ComposeStateTuple(StateId s1, StateId s2, const FilterState &f)
217       : state_id1(s1), state_id2(s2), filter_state(f) {}
218 
219   StateId state_id1;         // State Id on fst1
220   StateId state_id2;         // State Id on fst2
221   FilterState filter_state;  // State of composition filter
222 };
223 
224 // Equality of composition state tuples.
225 template <typename S, typename F>
226 inline bool operator==(const ComposeStateTuple<S, F>& x,
227                        const ComposeStateTuple<S, F>& y) {
228   if (&x == &y)
229     return true;
230   return x.state_id1 == y.state_id1 &&
231       x.state_id2 == y.state_id2 &&
232       x.filter_state == y.filter_state;
233 }
234 
235 
236 // Hashing of composition state tuples.
237 template <typename S, typename F>
238 class ComposeHash {
239  public:
operator()240   size_t operator()(const ComposeStateTuple<S, F>& t) const {
241     return t.state_id1 + t.state_id2 * kPrime0 +
242         t.filter_state.Hash() * kPrime1;
243   }
244  private:
245   static const size_t kPrime0;
246   static const size_t kPrime1;
247 };
248 
249 template <typename S, typename F>
250 const size_t ComposeHash<S, F>::kPrime0 = 7853;
251 
252 template <typename S, typename F>
253 const size_t ComposeHash<S, F>::kPrime1 = 7867;
254 
255 
256 // A HashStateTable over composition tuples.
257 template <typename A,
258           typename F,
259           typename H =
260           CompactHashStateTable<ComposeStateTuple<typename A::StateId, F>,
261                                 ComposeHash<typename A::StateId, F> > >
262 class GenericComposeStateTable : public H {
263  public:
264   typedef A Arc;
265   typedef typename A::StateId StateId;
266   typedef F FilterState;
267   typedef ComposeStateTuple<StateId, F> StateTuple;
268 
GenericComposeStateTable(const Fst<A> & fst1,const Fst<A> & fst2)269   GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}
270 
GenericComposeStateTable(const GenericComposeStateTable<A,F> & table)271   GenericComposeStateTable(const GenericComposeStateTable<A, F> &table) {}
272 
Error()273   bool Error() const { return false; }
274 
275  private:
276   void operator=(const GenericComposeStateTable<A, F> &table);  // disallow
277 };
278 
279 
280 //  Fingerprint for general composition tuples.
281 template  <typename S, typename F>
282 class ComposeFingerprint {
283  public:
284   typedef S StateId;
285   typedef F FilterState;
286   typedef ComposeStateTuple<S, F> StateTuple;
287 
288   // Required but suboptimal constructor.
ComposeFingerprint()289   ComposeFingerprint() : mult1_(8192), mult2_(8192) {
290     LOG(WARNING) << "TupleFingerprint: # of FST states should be provided.";
291   }
292 
293   // Constructor is provided the sizes of the input FSTs
ComposeFingerprint(StateId nstates1,StateId nstates2)294   ComposeFingerprint(StateId nstates1, StateId nstates2)
295       : mult1_(nstates1), mult2_(nstates1 * nstates2) { }
296 
operator()297   size_t operator()(const StateTuple &tuple) {
298     return tuple.state_id1 + tuple.state_id2 * mult1_ +
299         tuple.filter_state.Hash() * mult2_;
300   }
301 
302  private:
303   ssize_t mult1_;
304   ssize_t mult2_;
305 };
306 
307 
308 // Useful when the first composition state determines the tuple.
309 template  <typename S, typename F>
310 class ComposeState1Fingerprint {
311  public:
312   typedef S StateId;
313   typedef F FilterState;
314   typedef ComposeStateTuple<S, F> StateTuple;
315 
operator()316   size_t operator()(const StateTuple &tuple) { return tuple.state_id1; }
317 };
318 
319 
320 // Useful when the second composition state determines the tuple.
321 template  <typename S, typename F>
322 class ComposeState2Fingerprint {
323  public:
324   typedef S StateId;
325   typedef F FilterState;
326   typedef ComposeStateTuple<S, F> StateTuple;
327 
operator()328   size_t operator()(const StateTuple &tuple) { return tuple.state_id2; }
329 };
330 
331 
332 // A VectorStateTable over composition tuples.  This can be used when
333 // the product of number of states in FST1 and FST2 (and the
334 // composition filter state hash) is manageable. If the FSTs are not
335 // expanded Fsts, they will first have their states counted.
336 template  <typename A, typename F>
337 class ProductComposeStateTable : public
338 VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
339                  ComposeFingerprint<typename A::StateId, F> > {
340  public:
341   typedef A Arc;
342   typedef typename A::StateId StateId;
343   typedef F FilterState;
344   typedef ComposeStateTuple<StateId, F> StateTuple;
345 
ProductComposeStateTable(const Fst<A> & fst1,const Fst<A> & fst2)346   ProductComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
347       : VectorStateTable<ComposeStateTuple<StateId, F>,
348                          ComposeFingerprint<StateId, F> >
349   (new ComposeFingerprint<StateId, F>(CountStates(fst1),
350                                       CountStates(fst2))) { }
351 
ProductComposeStateTable(const ProductComposeStateTable<A,F> & table)352   ProductComposeStateTable(const ProductComposeStateTable<A, F> &table)
353       : VectorStateTable<ComposeStateTuple<StateId, F>,
354                          ComposeFingerprint<StateId, F> >
355         (new ComposeFingerprint<StateId, F>(table.Fingerprint())) {}
356 
Error()357   bool Error() const { return false; }
358 
359  private:
360   void operator=(const ProductComposeStateTable<A, F> &table);  // disallow
361 };
362 
363 // A VectorStateTable over composition tuples.  This can be used when
364 // FST1 is a string (satisfies kStringProperties) and FST2 is
365 // epsilon-free and deterministic. It should be used with a
366 // composition filter that creates at most one filter state per tuple
367 // under these conditions (e.g. SequenceComposeFilter or
368 // MatchComposeFilter).
369 template <typename A, typename F>
370 class StringDetComposeStateTable : public
371 VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
372                  ComposeState1Fingerprint<typename A::StateId, F> > {
373  public:
374   typedef A Arc;
375   typedef typename A::StateId StateId;
376   typedef F FilterState;
377   typedef ComposeStateTuple<StateId, F> StateTuple;
378 
StringDetComposeStateTable(const Fst<A> & fst1,const Fst<A> & fst2)379   StringDetComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
380       : error_(false) {
381     uint64 props1 = kString;
382     uint64 props2 = kIDeterministic | kNoIEpsilons;
383     if (fst1.Properties(props1, true) != props1 ||
384         fst2.Properties(props2, true) != props2) {
385       FSTERROR() << "StringDetComposeStateTable: fst1 not a string or"
386                  << " fst2 not input deterministic and epsilon-free";
387       error_ = true;
388     }
389   }
390 
StringDetComposeStateTable(const StringDetComposeStateTable<A,F> & table)391   StringDetComposeStateTable(const StringDetComposeStateTable<A, F> &table)
392    : error_(table.error_) {}
393 
Error()394   bool Error() const { return error_; }
395 
396  private:
397   bool error_;
398 
399   void operator=(const StringDetComposeStateTable<A, F> &table);  // disallow
400 };
401 
402 
403 // A VectorStateTable over composition tuples.  This can be used when
404 // FST2 is a string (satisfies kStringProperties) and FST1 is
405 // epsilon-free and deterministic. It should be used with a
406 // composition filter that creates at most one filter state per tuple
407 // under these conditions (e.g. SequenceComposeFilter or
408 // MatchComposeFilter).
409 template <typename A, typename F>
410 class DetStringComposeStateTable : public
411 VectorStateTable<ComposeStateTuple<typename A::StateId, F>,
412                  ComposeState1Fingerprint<typename A::StateId, F> > {
413  public:
414   typedef A Arc;
415   typedef typename A::StateId StateId;
416   typedef F FilterState;
417   typedef ComposeStateTuple<StateId, F> StateTuple;
418 
DetStringComposeStateTable(const Fst<A> & fst1,const Fst<A> & fst2)419   DetStringComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2)
420       :error_(false) {
421     uint64 props1 = kODeterministic | kNoOEpsilons;
422     uint64 props2 = kString;
423     if (fst1.Properties(props1, true) != props1 ||
424         fst2.Properties(props2, true) != props2) {
425       FSTERROR() << "StringDetComposeStateTable: fst2 not a string or"
426                  << " fst1 not output deterministic and epsilon-free";
427       error_ = true;
428     }
429   }
430 
DetStringComposeStateTable(const DetStringComposeStateTable<A,F> & table)431   DetStringComposeStateTable(const DetStringComposeStateTable<A, F> &table)
432       : error_(table.error_) {}
433 
Error()434   bool Error() const { return error_; }
435 
436  private:
437   bool error_;
438 
439   void operator=(const DetStringComposeStateTable<A, F> &table);  // disallow
440 };
441 
442 
443 // An ErasableStateTable over composition tuples. The Erase(StateId) method
444 // can be called if the user either is sure that composition will never return
445 // to that tuple or doesn't care that if it does, it is assigned a new
446 // state ID.
447 template <typename A, typename F>
448 class ErasableComposeStateTable : public
449 ErasableStateTable<ComposeStateTuple<typename A::StateId, F>,
450                    ComposeHash<typename A::StateId, F> > {
451  public:
452   typedef A Arc;
453   typedef typename A::StateId StateId;
454   typedef F FilterState;
455   typedef ComposeStateTuple<StateId, F> StateTuple;
456 
ErasableComposeStateTable(const Fst<A> & fst1,const Fst<A> & fst2)457   ErasableComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {}
458 
ErasableComposeStateTable(const ErasableComposeStateTable<A,F> & table)459   ErasableComposeStateTable(const ErasableComposeStateTable<A, F> &table) {}
460 
Error()461   bool Error() const { return false; }
462 
463  private:
464   void operator=(const ErasableComposeStateTable<A, F> &table);  // disallow
465 };
466 
467 }  // namespace fst
468 
469 #endif  // FST_LIB_STATE_TABLE_H__
470