1 // state-table.h 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: riley@google.com (Michael Riley) 17 // 18 // \file 19 // Classes for representing the mapping between state tuples and state Ids. 20 21 #ifndef FST_LIB_STATE_TABLE_H__ 22 #define FST_LIB_STATE_TABLE_H__ 23 24 #include <deque> 25 #include <vector> 26 using std::vector; 27 28 #include <fst/bi-table.h> 29 #include <fst/expanded-fst.h> 30 31 32 namespace fst { 33 34 // STATE TABLES - these determine the bijective mapping between state 35 // tuples (e.g. in composition triples of two FST states and a 36 // composition filter state) and their corresponding state IDs. 37 // They are classes, templated on state tuples, of the form: 38 // 39 // template <class T> 40 // class StateTable { 41 // public: 42 // typedef typename T StateTuple; 43 // 44 // // Required constructors. 45 // StateTable(); 46 // 47 // // Lookup state ID by tuple. If it doesn't exist, then add it. 48 // StateId FindState(const StateTuple &); 49 // // Lookup state tuple by state ID. 50 // const StateTuple<StateId> &Tuple(StateId) const; 51 // // # of stored tuples. 52 // StateId Size() const; 53 // }; 54 // 55 // A state tuple has the form: 56 // 57 // template <class S> 58 // struct StateTuple { 59 // typedef typename S StateId; 60 // 61 // // Required constructor. 62 // StateTuple(); 63 // }; 64 65 66 // An implementation using a hash map for the tuple to state ID mapping. 67 // The state tuple T must have == defined and the default constructor 68 // must produce a tuple that will never be seen. H is the hash function. 69 template <class T, class H> 70 class HashStateTable : public HashBiTable<typename T::StateId, T, H> { 71 public: 72 typedef T StateTuple; 73 typedef typename StateTuple::StateId StateId; 74 using HashBiTable<StateId, T, H>::FindId; 75 using HashBiTable<StateId, T, H>::FindEntry; 76 using HashBiTable<StateId, T, H>::Size; 77 HashStateTable()78 HashStateTable() : HashBiTable<StateId, T, H>() {} FindState(const StateTuple & tuple)79 StateId FindState(const StateTuple &tuple) { return FindId(tuple); } Tuple(StateId s)80 const StateTuple &Tuple(StateId s) const { return FindEntry(s); } 81 }; 82 83 84 // An implementation using a hash set for the tuple to state ID 85 // mapping. The state tuple T must have == defined and the default 86 // constructor must produce a tuple that will never be seen. H is the 87 // hash function. 88 template <class T, class H> 89 class CompactHashStateTable 90 : public CompactHashBiTable<typename T::StateId, T, H> { 91 public: 92 typedef T StateTuple; 93 typedef typename StateTuple::StateId StateId; 94 using CompactHashBiTable<StateId, T, H>::FindId; 95 using CompactHashBiTable<StateId, T, H>::FindEntry; 96 using CompactHashBiTable<StateId, T, H>::Size; 97 CompactHashStateTable()98 CompactHashStateTable() : CompactHashBiTable<StateId, T, H>() {} 99 100 // Reserves space for table_size elements. CompactHashStateTable(size_t table_size)101 explicit CompactHashStateTable(size_t table_size) 102 : CompactHashBiTable<StateId, T, H>(table_size) {} 103 FindState(const StateTuple & tuple)104 StateId FindState(const StateTuple &tuple) { return FindId(tuple); } Tuple(StateId s)105 const StateTuple &Tuple(StateId s) const { return FindEntry(s); } 106 }; 107 108 // An implementation using a vector for the tuple to state mapping. 109 // It is passed a function object FP that should fingerprint tuples 110 // uniquely to an integer that can used as a vector index. Normally, 111 // VectorStateTable constructs the FP object. The user can instead 112 // pass in this object; in that case, VectorStateTable takes its 113 // ownership. 114 template <class T, class FP> 115 class VectorStateTable 116 : public VectorBiTable<typename T::StateId, T, FP> { 117 public: 118 typedef T StateTuple; 119 typedef typename StateTuple::StateId StateId; 120 using VectorBiTable<StateId, T, FP>::FindId; 121 using VectorBiTable<StateId, T, FP>::FindEntry; 122 using VectorBiTable<StateId, T, FP>::Size; 123 using VectorBiTable<StateId, T, FP>::Fingerprint; 124 125 explicit VectorStateTable(FP *fp = 0) : VectorBiTable<StateId, T, FP>(fp) {} FindState(const StateTuple & tuple)126 StateId FindState(const StateTuple &tuple) { return FindId(tuple); } Tuple(StateId s)127 const StateTuple &Tuple(StateId s) const { return FindEntry(s); } 128 }; 129 130 131 // An implementation using a vector and a compact hash table. The 132 // selecting functor S returns true for tuples to be hashed in the 133 // vector. The fingerprinting functor FP returns a unique fingerprint 134 // for each tuple to be hashed in the vector (these need to be 135 // suitable for indexing in a vector). The hash functor H is used when 136 // hashing tuple into the compact hash table. 137 template <class T, class S, class FP, class H> 138 class VectorHashStateTable 139 : public VectorHashBiTable<typename T::StateId, T, S, FP, H> { 140 public: 141 typedef T StateTuple; 142 typedef typename StateTuple::StateId StateId; 143 using VectorHashBiTable<StateId, T, S, FP, H>::FindId; 144 using VectorHashBiTable<StateId, T, S, FP, H>::FindEntry; 145 using VectorHashBiTable<StateId, T, S, FP, H>::Size; 146 using VectorHashBiTable<StateId, T, S, FP, H>::Selector; 147 using VectorHashBiTable<StateId, T, S, FP, H>::Fingerprint; 148 using VectorHashBiTable<StateId, T, S, FP, H>::Hash; 149 150 VectorHashStateTable(S *s, FP *fp, H *h, 151 size_t vector_size = 0, 152 size_t tuple_size = 0) 153 : VectorHashBiTable<StateId, T, S, FP, H>( 154 s, fp, h, vector_size, tuple_size) {} 155 FindState(const StateTuple & tuple)156 StateId FindState(const StateTuple &tuple) { return FindId(tuple); } Tuple(StateId s)157 const StateTuple &Tuple(StateId s) const { return FindEntry(s); } 158 }; 159 160 161 // An implementation using a hash map for the tuple to state ID 162 // mapping. This version permits erasing of states. The state tuple T 163 // must have == defined and its default constructor must produce a 164 // tuple that will never be seen. F is the hash function. 165 template <class T, class F> 166 class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, F> { 167 public: 168 typedef T StateTuple; 169 typedef typename StateTuple::StateId StateId; 170 using ErasableBiTable<StateId, T, F>::FindId; 171 using ErasableBiTable<StateId, T, F>::FindEntry; 172 using ErasableBiTable<StateId, T, F>::Size; 173 using ErasableBiTable<StateId, T, F>::Erase; 174 ErasableStateTable()175 ErasableStateTable() : ErasableBiTable<StateId, T, F>() {} FindState(const StateTuple & tuple)176 StateId FindState(const StateTuple &tuple) { return FindId(tuple); } Tuple(StateId s)177 const StateTuple &Tuple(StateId s) const { return FindEntry(s); } 178 }; 179 180 // 181 // COMPOSITION STATE TUPLES AND TABLES 182 // 183 // The composition state table has the form: 184 // 185 // template <class A, class F> 186 // class ComposeStateTable { 187 // public: 188 // typedef A Arc; 189 // typedef F FilterState; 190 // typedef typename A::StateId StateId; 191 // typedef ComposeStateTuple<StateId> StateTuple; 192 // 193 // // Required constructors. Copy constructor does not copy state. 194 // ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2); 195 // ComposeStateTable(const ComposeStateTable<A, F> &table); 196 // // Lookup state ID by tuple. If it doesn't exist, then add it. 197 // StateId FindState(const StateTuple &); 198 // // Lookup state tuple by state ID. 199 // const StateTuple<StateId> &Tuple(StateId) const; 200 // // # of stored tuples. 201 // StateId Size() const; 202 // // Return true if error encountered 203 // bool Error() const; 204 // }; 205 206 // Represents the composition state. 207 template <typename S, typename F> 208 struct ComposeStateTuple { 209 typedef S StateId; 210 typedef F FilterState; 211 ComposeStateTupleComposeStateTuple212 ComposeStateTuple() 213 : state_id1(kNoStateId), state_id2(kNoStateId), 214 filter_state(FilterState::NoState()) {} 215 ComposeStateTupleComposeStateTuple216 ComposeStateTuple(StateId s1, StateId s2, const FilterState &f) 217 : state_id1(s1), state_id2(s2), filter_state(f) {} 218 219 StateId state_id1; // State Id on fst1 220 StateId state_id2; // State Id on fst2 221 FilterState filter_state; // State of composition filter 222 }; 223 224 // Equality of composition state tuples. 225 template <typename S, typename F> 226 inline bool operator==(const ComposeStateTuple<S, F>& x, 227 const ComposeStateTuple<S, F>& y) { 228 if (&x == &y) 229 return true; 230 return x.state_id1 == y.state_id1 && 231 x.state_id2 == y.state_id2 && 232 x.filter_state == y.filter_state; 233 } 234 235 236 // Hashing of composition state tuples. 237 template <typename S, typename F> 238 class ComposeHash { 239 public: operator()240 size_t operator()(const ComposeStateTuple<S, F>& t) const { 241 return t.state_id1 + t.state_id2 * kPrime0 + 242 t.filter_state.Hash() * kPrime1; 243 } 244 private: 245 static const size_t kPrime0; 246 static const size_t kPrime1; 247 }; 248 249 template <typename S, typename F> 250 const size_t ComposeHash<S, F>::kPrime0 = 7853; 251 252 template <typename S, typename F> 253 const size_t ComposeHash<S, F>::kPrime1 = 7867; 254 255 256 // A HashStateTable over composition tuples. 257 template <typename A, 258 typename F, 259 typename H = 260 CompactHashStateTable<ComposeStateTuple<typename A::StateId, F>, 261 ComposeHash<typename A::StateId, F> > > 262 class GenericComposeStateTable : public H { 263 public: 264 typedef A Arc; 265 typedef typename A::StateId StateId; 266 typedef F FilterState; 267 typedef ComposeStateTuple<StateId, F> StateTuple; 268 GenericComposeStateTable(const Fst<A> & fst1,const Fst<A> & fst2)269 GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {} 270 GenericComposeStateTable(const GenericComposeStateTable<A,F> & table)271 GenericComposeStateTable(const GenericComposeStateTable<A, F> &table) {} 272 Error()273 bool Error() const { return false; } 274 275 private: 276 void operator=(const GenericComposeStateTable<A, F> &table); // disallow 277 }; 278 279 280 // Fingerprint for general composition tuples. 281 template <typename S, typename F> 282 class ComposeFingerprint { 283 public: 284 typedef S StateId; 285 typedef F FilterState; 286 typedef ComposeStateTuple<S, F> StateTuple; 287 288 // Required but suboptimal constructor. ComposeFingerprint()289 ComposeFingerprint() : mult1_(8192), mult2_(8192) { 290 LOG(WARNING) << "TupleFingerprint: # of FST states should be provided."; 291 } 292 293 // Constructor is provided the sizes of the input FSTs ComposeFingerprint(StateId nstates1,StateId nstates2)294 ComposeFingerprint(StateId nstates1, StateId nstates2) 295 : mult1_(nstates1), mult2_(nstates1 * nstates2) { } 296 operator()297 size_t operator()(const StateTuple &tuple) { 298 return tuple.state_id1 + tuple.state_id2 * mult1_ + 299 tuple.filter_state.Hash() * mult2_; 300 } 301 302 private: 303 ssize_t mult1_; 304 ssize_t mult2_; 305 }; 306 307 308 // Useful when the first composition state determines the tuple. 309 template <typename S, typename F> 310 class ComposeState1Fingerprint { 311 public: 312 typedef S StateId; 313 typedef F FilterState; 314 typedef ComposeStateTuple<S, F> StateTuple; 315 operator()316 size_t operator()(const StateTuple &tuple) { return tuple.state_id1; } 317 }; 318 319 320 // Useful when the second composition state determines the tuple. 321 template <typename S, typename F> 322 class ComposeState2Fingerprint { 323 public: 324 typedef S StateId; 325 typedef F FilterState; 326 typedef ComposeStateTuple<S, F> StateTuple; 327 operator()328 size_t operator()(const StateTuple &tuple) { return tuple.state_id2; } 329 }; 330 331 332 // A VectorStateTable over composition tuples. This can be used when 333 // the product of number of states in FST1 and FST2 (and the 334 // composition filter state hash) is manageable. If the FSTs are not 335 // expanded Fsts, they will first have their states counted. 336 template <typename A, typename F> 337 class ProductComposeStateTable : public 338 VectorStateTable<ComposeStateTuple<typename A::StateId, F>, 339 ComposeFingerprint<typename A::StateId, F> > { 340 public: 341 typedef A Arc; 342 typedef typename A::StateId StateId; 343 typedef F FilterState; 344 typedef ComposeStateTuple<StateId, F> StateTuple; 345 ProductComposeStateTable(const Fst<A> & fst1,const Fst<A> & fst2)346 ProductComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) 347 : VectorStateTable<ComposeStateTuple<StateId, F>, 348 ComposeFingerprint<StateId, F> > 349 (new ComposeFingerprint<StateId, F>(CountStates(fst1), 350 CountStates(fst2))) { } 351 ProductComposeStateTable(const ProductComposeStateTable<A,F> & table)352 ProductComposeStateTable(const ProductComposeStateTable<A, F> &table) 353 : VectorStateTable<ComposeStateTuple<StateId, F>, 354 ComposeFingerprint<StateId, F> > 355 (new ComposeFingerprint<StateId, F>(table.Fingerprint())) {} 356 Error()357 bool Error() const { return false; } 358 359 private: 360 void operator=(const ProductComposeStateTable<A, F> &table); // disallow 361 }; 362 363 // A VectorStateTable over composition tuples. This can be used when 364 // FST1 is a string (satisfies kStringProperties) and FST2 is 365 // epsilon-free and deterministic. It should be used with a 366 // composition filter that creates at most one filter state per tuple 367 // under these conditions (e.g. SequenceComposeFilter or 368 // MatchComposeFilter). 369 template <typename A, typename F> 370 class StringDetComposeStateTable : public 371 VectorStateTable<ComposeStateTuple<typename A::StateId, F>, 372 ComposeState1Fingerprint<typename A::StateId, F> > { 373 public: 374 typedef A Arc; 375 typedef typename A::StateId StateId; 376 typedef F FilterState; 377 typedef ComposeStateTuple<StateId, F> StateTuple; 378 StringDetComposeStateTable(const Fst<A> & fst1,const Fst<A> & fst2)379 StringDetComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) 380 : error_(false) { 381 uint64 props1 = kString; 382 uint64 props2 = kIDeterministic | kNoIEpsilons; 383 if (fst1.Properties(props1, true) != props1 || 384 fst2.Properties(props2, true) != props2) { 385 FSTERROR() << "StringDetComposeStateTable: fst1 not a string or" 386 << " fst2 not input deterministic and epsilon-free"; 387 error_ = true; 388 } 389 } 390 StringDetComposeStateTable(const StringDetComposeStateTable<A,F> & table)391 StringDetComposeStateTable(const StringDetComposeStateTable<A, F> &table) 392 : error_(table.error_) {} 393 Error()394 bool Error() const { return error_; } 395 396 private: 397 bool error_; 398 399 void operator=(const StringDetComposeStateTable<A, F> &table); // disallow 400 }; 401 402 403 // A VectorStateTable over composition tuples. This can be used when 404 // FST2 is a string (satisfies kStringProperties) and FST1 is 405 // epsilon-free and deterministic. It should be used with a 406 // composition filter that creates at most one filter state per tuple 407 // under these conditions (e.g. SequenceComposeFilter or 408 // MatchComposeFilter). 409 template <typename A, typename F> 410 class DetStringComposeStateTable : public 411 VectorStateTable<ComposeStateTuple<typename A::StateId, F>, 412 ComposeState1Fingerprint<typename A::StateId, F> > { 413 public: 414 typedef A Arc; 415 typedef typename A::StateId StateId; 416 typedef F FilterState; 417 typedef ComposeStateTuple<StateId, F> StateTuple; 418 DetStringComposeStateTable(const Fst<A> & fst1,const Fst<A> & fst2)419 DetStringComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) 420 :error_(false) { 421 uint64 props1 = kODeterministic | kNoOEpsilons; 422 uint64 props2 = kString; 423 if (fst1.Properties(props1, true) != props1 || 424 fst2.Properties(props2, true) != props2) { 425 FSTERROR() << "StringDetComposeStateTable: fst2 not a string or" 426 << " fst1 not output deterministic and epsilon-free"; 427 error_ = true; 428 } 429 } 430 DetStringComposeStateTable(const DetStringComposeStateTable<A,F> & table)431 DetStringComposeStateTable(const DetStringComposeStateTable<A, F> &table) 432 : error_(table.error_) {} 433 Error()434 bool Error() const { return error_; } 435 436 private: 437 bool error_; 438 439 void operator=(const DetStringComposeStateTable<A, F> &table); // disallow 440 }; 441 442 443 // An ErasableStateTable over composition tuples. The Erase(StateId) method 444 // can be called if the user either is sure that composition will never return 445 // to that tuple or doesn't care that if it does, it is assigned a new 446 // state ID. 447 template <typename A, typename F> 448 class ErasableComposeStateTable : public 449 ErasableStateTable<ComposeStateTuple<typename A::StateId, F>, 450 ComposeHash<typename A::StateId, F> > { 451 public: 452 typedef A Arc; 453 typedef typename A::StateId StateId; 454 typedef F FilterState; 455 typedef ComposeStateTuple<StateId, F> StateTuple; 456 ErasableComposeStateTable(const Fst<A> & fst1,const Fst<A> & fst2)457 ErasableComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {} 458 ErasableComposeStateTable(const ErasableComposeStateTable<A,F> & table)459 ErasableComposeStateTable(const ErasableComposeStateTable<A, F> &table) {} 460 Error()461 bool Error() const { return false; } 462 463 private: 464 void operator=(const ErasableComposeStateTable<A, F> &table); // disallow 465 }; 466 467 } // namespace fst 468 469 #endif // FST_LIB_STATE_TABLE_H__ 470