• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // equivalent.h
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 //
16 // \file Functions and classes to determine the equivalence of two
17 // FSTs.
18 
19 #ifndef FST_LIB_EQUIVALENT_H__
20 #define FST_LIB_EQUIVALENT_H__
21 
22 #include <algorithm>
23 
24 #include <ext/hash_map>
25 using __gnu_cxx::hash_map;
26 
27 #include "fst/lib/encode.h"
28 #include "fst/lib/push.h"
29 #include "fst/lib/union-find.h"
30 #include "fst/lib/vector-fst.h"
31 
32 namespace fst {
33 
34 // Traits-like struct holding utility functions/typedefs/constants for
35 // the equivalence algorithm.
36 //
37 // Encoding device: in order to make the statesets of the two acceptors
38 // disjoint, we map Arc::StateId on the type MappedId. The states of
39 // the first acceptor are mapped on odd numbers (s -> 2s + 1), and
40 // those of the second one on even numbers (s -> 2s + 2). The number 0
41 // is reserved for an implicit (non-final) 'dead state' (required for
42 // the correct treatment of non-coaccessible states; kNoStateId is
43 // mapped to kDeadState for both acceptors). The union-find algorithm
44 // operates on the mapped IDs.
45 template <class Arc>
46 struct EquivalenceUtil {
47   typedef typename Arc::StateId StateId;
48   typedef typename Arc::Weight Weight;
49   typedef int32 MappedId;  // ID for an equivalence class.
50 
51   // MappedId for an implicit dead state.
52   static const MappedId kDeadState = 0;
53 
54   // MappedId for lookup failure.
55   static const MappedId kInvalidId = -1;
56 
57   // Maps state ID to the representative of the corresponding
58   // equivalence class. The parameter 'which_fst' takes the values 1
59   // and 2, identifying the input FST.
MapStateEquivalenceUtil60   static MappedId MapState(StateId s, int32 which_fst) {
61     return
62       (kNoStateId == s)
63       ?
64       kDeadState
65       :
66       (static_cast<MappedId>(s) << 1) + which_fst;
67   }
68   // Maps set ID to State ID.
UnMapStateEquivalenceUtil69   static StateId UnMapState(MappedId id) {
70     return static_cast<StateId>((--id) >> 1);
71   }
72   // Convenience function: checks if state with MappedId 's' is final
73   // in acceptor 'fa'.
IsFinalEquivalenceUtil74   static bool IsFinal(const Fst<Arc> &fa, MappedId s) {
75     return
76       (kDeadState == s) ?
77       false : (fa.Final(UnMapState(s)) != Weight::Zero());
78   }
79   // Convenience function: returns the representative of 'id' in 'sets',
80   // creating a new set if needed.
FindSetEquivalenceUtil81   static MappedId FindSet(UnionFind<MappedId> *sets, MappedId id) {
82     MappedId repr = sets->FindSet(id);
83     if (repr != kInvalidId) {
84       return repr;
85     } else {
86       sets->MakeSet(id);
87       return id;
88     }
89   }
90 };
91 
92 // Equivalence checking algorithm: determines if the two FSTs
93 // <code>fst1</code> and <code>fst2</code> are equivalent. The input
94 // FSTs must be deterministic input-side epsilon-free acceptors,
95 // unweighted or with weights over a left semiring. Two acceptors are
96 // considered equivalent if they accept exactly the same set of
97 // strings (with the same weights).
98 //
99 // The algorithm (cf. Aho, Hopcroft and Ullman, "The Design and
100 // Analysis of Computer Programs") successively constructs sets of
101 // states that can be reached by the same prefixes, starting with a
102 // set containing the start states of both acceptors. A disjoint tree
103 // forest (the union-find algorithm) is used to represent the sets of
104 // states. The algorithm returns 'false' if one of the constructed
105 // sets contains both final and non-final states.
106 //
107 // Complexity: quasi-linear, i.e. O(n G(n)), where
108 //   n = |S1| + |S2| is the number of states in both acceptors
109 //   G(n) is a very slowly growing function that can be approximated
110 //        by 4 by all practical purposes.
111 //
112 template <class Arc>
Equivalent(const Fst<Arc> & fst1,const Fst<Arc> & fst2)113 bool Equivalent(const Fst<Arc> &fst1, const Fst<Arc> &fst2) {
114   typedef typename Arc::Weight Weight;
115   // Check properties first:
116   uint64 props = kNoEpsilons | kIDeterministic | kAcceptor;
117   if (fst1.Properties(props, true) != props) {
118     LOG(FATAL) << "Equivalent: first argument not an"
119                << " epsilon-free deterministic acceptor";
120   }
121   if (fst2.Properties(props, true) != props) {
122     LOG(FATAL) << "Equivalent: second argument not an"
123                << " epsilon-free deterministic acceptor";
124   }
125 
126   if ((fst1.Properties(kUnweighted , true) != kUnweighted)
127       || (fst2.Properties(kUnweighted , true) != kUnweighted)) {
128     VectorFst<Arc> efst1(fst1);
129     VectorFst<Arc> efst2(fst2);
130     Push(&efst1, REWEIGHT_TO_INITIAL);
131     Push(&efst2, REWEIGHT_TO_INITIAL);
132     Map(&efst1, QuantizeMapper<Arc>());
133     Map(&efst2, QuantizeMapper<Arc>());
134     EncodeMapper<Arc> mapper(kEncodeWeights|kEncodeLabels, ENCODE);
135     Map(&efst1, &mapper);
136     Map(&efst2, &mapper);
137     return Equivalent(efst1, efst2);
138   }
139 
140   // Convenience typedefs:
141   typedef typename Arc::StateId StateId;
142   typedef EquivalenceUtil<Arc> Util;
143   typedef typename Util::MappedId MappedId;
144   enum { FST1 = 1, FST2 = 2 };  // Required by Util::MapState(...)
145 
146   MappedId s1 = Util::MapState(fst1.Start(), FST1);
147   MappedId s2 = Util::MapState(fst2.Start(), FST2);
148 
149   // The union-find structure.
150   UnionFind<MappedId> eq_classes(1000, Util::kInvalidId);
151 
152   // Initialize the union-find structure.
153   eq_classes.MakeSet(s1);
154   eq_classes.MakeSet(s2);
155 
156   // Early return if the start states differ w.r.t. being final.
157   if (Util::IsFinal(fst1, s1) != Util::IsFinal(fst2, s2)) {
158     return false;
159   }
160   // Data structure for the (partial) acceptor transition function of
161   // fst1 and fst2: input labels mapped to pairs of MappedId's
162   // representing destination states of the corresponding arcs in fst1
163   // and fst2, respectively.
164   typedef
165     hash_map<typename Arc::Label, pair<MappedId, MappedId> >
166     Label2StatePairMap;
167 
168   Label2StatePairMap arc_pairs;
169 
170   // Pairs of MappedId's to be processed, organized in a queue.
171   deque<pair<MappedId, MappedId> > q;
172 
173   // Main loop: explores the two acceptors in a breadth-first manner,
174   // updating the equivalence relation on the statesets. Loop
175   // invariant: each block of states contains either final states only
176   // or non-final states only.
177   for (q.push_back(make_pair(s1, s2)); !q.empty(); q.pop_front()) {
178     s1 = q.front().first;
179     s2 = q.front().second;
180 
181     // Representatives of the equivalence classes of s1/s2.
182     MappedId rep1 = Util::FindSet(&eq_classes, s1);
183     MappedId rep2 = Util::FindSet(&eq_classes, s2);
184 
185     if (rep1 != rep2) {
186       eq_classes.Union(rep1, rep2);
187       arc_pairs.clear();
188 
189       // Copy outgoing arcs starting at s1 into the hashtable.
190       if (Util::kDeadState != s1) {
191         ArcIterator<Fst<Arc> > arc_iter(fst1, Util::UnMapState(s1));
192         for (; !arc_iter.Done(); arc_iter.Next()) {
193           const Arc &arc = arc_iter.Value();
194           if (arc.weight != Weight::Zero()) {  // Zero-weight arcs
195                                                    // are treated as
196                                                    // non-exisitent.
197             arc_pairs[arc.ilabel].first = Util::MapState(arc.nextstate, FST1);
198           }
199         }
200       }
201       // Copy outgoing arcs starting at s2 into the hashtable.
202       if (Util::kDeadState != s2) {
203         ArcIterator<Fst<Arc> > arc_iter(fst2, Util::UnMapState(s2));
204         for (; !arc_iter.Done(); arc_iter.Next()) {
205           const Arc &arc = arc_iter.Value();
206           if (arc.weight != Weight::Zero()) {  // Zero-weight arcs
207                                                    // are treated as
208                                                    // non-existent.
209             arc_pairs[arc.ilabel].second = Util::MapState(arc.nextstate, FST2);
210           }
211         }
212       }
213       // Iterate through the hashtable and process pairs of target
214       // states.
215       for (typename Label2StatePairMap::const_iterator
216              arc_iter = arc_pairs.begin();
217            arc_iter != arc_pairs.end();
218            ++arc_iter) {
219         const pair<MappedId, MappedId> &p = arc_iter->second;
220         if (Util::IsFinal(fst1, p.first) != Util::IsFinal(fst2, p.second)) {
221           // Detected inconsistency: return false.
222           return false;
223         }
224         q.push_back(p);
225       }
226     }
227   }
228   return true;
229 }
230 
231 }  // namespace fst
232 
233 #endif  // FST_LIB_EQUIVALENT_H__
234