1 // equivalent.h 2 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // Copyright 2005-2010 Google, Inc. 16 // Author: wojciech@google.com (Wojciech Skut) 17 // 18 // \file Functions and classes to determine the equivalence of two 19 // FSTs. 20 21 #ifndef FST_LIB_EQUIVALENT_H__ 22 #define FST_LIB_EQUIVALENT_H__ 23 24 #include <algorithm> 25 #include <deque> 26 #include <unordered_map> 27 using std::tr1::unordered_map; 28 using std::tr1::unordered_multimap; 29 #include <utility> 30 using std::pair; using std::make_pair; 31 #include <vector> 32 using std::vector; 33 34 #include <fst/encode.h> 35 #include <fst/push.h> 36 #include <fst/union-find.h> 37 #include <fst/vector-fst.h> 38 39 40 namespace fst { 41 42 // Traits-like struct holding utility functions/typedefs/constants for 43 // the equivalence algorithm. 44 // 45 // Encoding device: in order to make the statesets of the two acceptors 46 // disjoint, we map Arc::StateId on the type MappedId. The states of 47 // the first acceptor are mapped on odd numbers (s -> 2s + 1), and 48 // those of the second one on even numbers (s -> 2s + 2). The number 0 49 // is reserved for an implicit (non-final) 'dead state' (required for 50 // the correct treatment of non-coaccessible states; kNoStateId is 51 // mapped to kDeadState for both acceptors). The union-find algorithm 52 // operates on the mapped IDs. 53 template <class Arc> 54 struct EquivalenceUtil { 55 typedef typename Arc::StateId StateId; 56 typedef typename Arc::Weight Weight; 57 typedef StateId MappedId; // ID for an equivalence class. 58 59 // MappedId for an implicit dead state. 60 static const MappedId kDeadState = 0; 61 62 // MappedId for lookup failure. 63 static const MappedId kInvalidId = -1; 64 65 // Maps state ID to the representative of the corresponding 66 // equivalence class. The parameter 'which_fst' takes the values 1 67 // and 2, identifying the input FST. MapStateEquivalenceUtil68 static MappedId MapState(StateId s, int32 which_fst) { 69 return 70 (kNoStateId == s) 71 ? 72 kDeadState 73 : 74 (static_cast<MappedId>(s) << 1) + which_fst; 75 } 76 // Maps set ID to State ID. UnMapStateEquivalenceUtil77 static StateId UnMapState(MappedId id) { 78 return static_cast<StateId>((--id) >> 1); 79 } 80 // Convenience function: checks if state with MappedId 's' is final 81 // in acceptor 'fa'. IsFinalEquivalenceUtil82 static bool IsFinal(const Fst<Arc> &fa, MappedId s) { 83 return 84 (kDeadState == s) ? 85 false : (fa.Final(UnMapState(s)) != Weight::Zero()); 86 } 87 // Convenience function: returns the representative of 'id' in 'sets', 88 // creating a new set if needed. FindSetEquivalenceUtil89 static MappedId FindSet(UnionFind<MappedId> *sets, MappedId id) { 90 MappedId repr = sets->FindSet(id); 91 if (repr != kInvalidId) { 92 return repr; 93 } else { 94 sets->MakeSet(id); 95 return id; 96 } 97 } 98 }; 99 100 template <class Arc> const 101 typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kDeadState; 102 103 template <class Arc> const 104 typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kInvalidId; 105 106 107 // Equivalence checking algorithm: determines if the two FSTs 108 // <code>fst1</code> and <code>fst2</code> are equivalent. The input 109 // FSTs must be deterministic input-side epsilon-free acceptors, 110 // unweighted or with weights over a left semiring. Two acceptors are 111 // considered equivalent if they accept exactly the same set of 112 // strings (with the same weights). 113 // 114 // The algorithm (cf. Aho, Hopcroft and Ullman, "The Design and 115 // Analysis of Computer Programs") successively constructs sets of 116 // states that can be reached by the same prefixes, starting with a 117 // set containing the start states of both acceptors. A disjoint tree 118 // forest (the union-find algorithm) is used to represent the sets of 119 // states. The algorithm returns 'false' if one of the constructed 120 // sets contains both final and non-final states. Returns optional error 121 // value (when FLAGS_error_fatal = false). 122 // 123 // Complexity: quasi-linear, i.e. O(n G(n)), where 124 // n = |S1| + |S2| is the number of states in both acceptors 125 // G(n) is a very slowly growing function that can be approximated 126 // by 4 by all practical purposes. 127 // 128 template <class Arc> 129 bool Equivalent(const Fst<Arc> &fst1, 130 const Fst<Arc> &fst2, 131 double delta = kDelta, bool *error = 0) { 132 typedef typename Arc::Weight Weight; 133 if (error) *error = false; 134 135 // Check that the symbol table are compatible 136 if (!CompatSymbols(fst1.InputSymbols(), fst2.InputSymbols()) || 137 !CompatSymbols(fst1.OutputSymbols(), fst2.OutputSymbols())) { 138 FSTERROR() << "Equivalent: input/output symbol tables of 1st argument " 139 << "do not match input/output symbol tables of 2nd argument"; 140 if (error) *error = true; 141 return false; 142 } 143 // Check properties first: 144 uint64 props = kNoEpsilons | kIDeterministic | kAcceptor; 145 if (fst1.Properties(props, true) != props) { 146 FSTERROR() << "Equivalent: first argument not an" 147 << " epsilon-free deterministic acceptor"; 148 if (error) *error = true; 149 return false; 150 } 151 if (fst2.Properties(props, true) != props) { 152 FSTERROR() << "Equivalent: second argument not an" 153 << " epsilon-free deterministic acceptor"; 154 if (error) *error = true; 155 return false; 156 } 157 158 if ((fst1.Properties(kUnweighted , true) != kUnweighted) 159 || (fst2.Properties(kUnweighted , true) != kUnweighted)) { 160 VectorFst<Arc> efst1(fst1); 161 VectorFst<Arc> efst2(fst2); 162 Push(&efst1, REWEIGHT_TO_INITIAL, delta); 163 Push(&efst2, REWEIGHT_TO_INITIAL, delta); 164 ArcMap(&efst1, QuantizeMapper<Arc>(delta)); 165 ArcMap(&efst2, QuantizeMapper<Arc>(delta)); 166 EncodeMapper<Arc> mapper(kEncodeWeights|kEncodeLabels, ENCODE); 167 ArcMap(&efst1, &mapper); 168 ArcMap(&efst2, &mapper); 169 return Equivalent(efst1, efst2); 170 } 171 172 // Convenience typedefs: 173 typedef typename Arc::StateId StateId; 174 typedef EquivalenceUtil<Arc> Util; 175 typedef typename Util::MappedId MappedId; 176 enum { FST1 = 1, FST2 = 2 }; // Required by Util::MapState(...) 177 178 MappedId s1 = Util::MapState(fst1.Start(), FST1); 179 MappedId s2 = Util::MapState(fst2.Start(), FST2); 180 181 // The union-find structure. 182 UnionFind<MappedId> eq_classes(1000, Util::kInvalidId); 183 184 // Initialize the union-find structure. 185 eq_classes.MakeSet(s1); 186 eq_classes.MakeSet(s2); 187 188 // Data structure for the (partial) acceptor transition function of 189 // fst1 and fst2: input labels mapped to pairs of MappedId's 190 // representing destination states of the corresponding arcs in fst1 191 // and fst2, respectively. 192 typedef 193 unordered_map<typename Arc::Label, pair<MappedId, MappedId> > 194 Label2StatePairMap; 195 196 Label2StatePairMap arc_pairs; 197 198 // Pairs of MappedId's to be processed, organized in a queue. 199 deque<pair<MappedId, MappedId> > q; 200 201 bool ret = true; 202 // Early return if the start states differ w.r.t. being final. 203 if (Util::IsFinal(fst1, s1) != Util::IsFinal(fst2, s2)) { 204 ret = false; 205 } 206 207 // Main loop: explores the two acceptors in a breadth-first manner, 208 // updating the equivalence relation on the statesets. Loop 209 // invariant: each block of states contains either final states only 210 // or non-final states only. 211 for (q.push_back(make_pair(s1, s2)); ret && !q.empty(); q.pop_front()) { 212 s1 = q.front().first; 213 s2 = q.front().second; 214 215 // Representatives of the equivalence classes of s1/s2. 216 MappedId rep1 = Util::FindSet(&eq_classes, s1); 217 MappedId rep2 = Util::FindSet(&eq_classes, s2); 218 219 if (rep1 != rep2) { 220 eq_classes.Union(rep1, rep2); 221 arc_pairs.clear(); 222 223 // Copy outgoing arcs starting at s1 into the hashtable. 224 if (Util::kDeadState != s1) { 225 ArcIterator<Fst<Arc> > arc_iter(fst1, Util::UnMapState(s1)); 226 for (; !arc_iter.Done(); arc_iter.Next()) { 227 const Arc &arc = arc_iter.Value(); 228 if (arc.weight != Weight::Zero()) { // Zero-weight arcs 229 // are treated as 230 // non-exisitent. 231 arc_pairs[arc.ilabel].first = Util::MapState(arc.nextstate, FST1); 232 } 233 } 234 } 235 // Copy outgoing arcs starting at s2 into the hashtable. 236 if (Util::kDeadState != s2) { 237 ArcIterator<Fst<Arc> > arc_iter(fst2, Util::UnMapState(s2)); 238 for (; !arc_iter.Done(); arc_iter.Next()) { 239 const Arc &arc = arc_iter.Value(); 240 if (arc.weight != Weight::Zero()) { // Zero-weight arcs 241 // are treated as 242 // non-existent. 243 arc_pairs[arc.ilabel].second = Util::MapState(arc.nextstate, FST2); 244 } 245 } 246 } 247 // Iterate through the hashtable and process pairs of target 248 // states. 249 for (typename Label2StatePairMap::const_iterator 250 arc_iter = arc_pairs.begin(); 251 arc_iter != arc_pairs.end(); 252 ++arc_iter) { 253 const pair<MappedId, MappedId> &p = arc_iter->second; 254 if (Util::IsFinal(fst1, p.first) != Util::IsFinal(fst2, p.second)) { 255 // Detected inconsistency: return false. 256 ret = false; 257 break; 258 } 259 q.push_back(p); 260 } 261 } 262 } 263 264 if (fst1.Properties(kError, false) || fst2.Properties(kError, false)) { 265 if (error) *error = true; 266 return false; 267 } 268 269 return ret; 270 } 271 272 } // namespace fst 273 274 #endif // FST_LIB_EQUIVALENT_H__ 275