• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // equivalent.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: wojciech@google.com (Wojciech Skut)
17 //
18 // \file Functions and classes to determine the equivalence of two
19 // FSTs.
20 
21 #ifndef FST_LIB_EQUIVALENT_H__
22 #define FST_LIB_EQUIVALENT_H__
23 
24 #include <algorithm>
25 #include <deque>
26 #include <unordered_map>
27 using std::tr1::unordered_map;
28 using std::tr1::unordered_multimap;
29 #include <utility>
30 using std::pair; using std::make_pair;
31 #include <vector>
32 using std::vector;
33 
34 #include <fst/encode.h>
35 #include <fst/push.h>
36 #include <fst/union-find.h>
37 #include <fst/vector-fst.h>
38 
39 
40 namespace fst {
41 
42 // Traits-like struct holding utility functions/typedefs/constants for
43 // the equivalence algorithm.
44 //
45 // Encoding device: in order to make the statesets of the two acceptors
46 // disjoint, we map Arc::StateId on the type MappedId. The states of
47 // the first acceptor are mapped on odd numbers (s -> 2s + 1), and
48 // those of the second one on even numbers (s -> 2s + 2). The number 0
49 // is reserved for an implicit (non-final) 'dead state' (required for
50 // the correct treatment of non-coaccessible states; kNoStateId is
51 // mapped to kDeadState for both acceptors). The union-find algorithm
52 // operates on the mapped IDs.
53 template <class Arc>
54 struct EquivalenceUtil {
55   typedef typename Arc::StateId StateId;
56   typedef typename Arc::Weight Weight;
57   typedef StateId MappedId;  // ID for an equivalence class.
58 
59   // MappedId for an implicit dead state.
60   static const MappedId kDeadState = 0;
61 
62   // MappedId for lookup failure.
63   static const MappedId kInvalidId = -1;
64 
65   // Maps state ID to the representative of the corresponding
66   // equivalence class. The parameter 'which_fst' takes the values 1
67   // and 2, identifying the input FST.
MapStateEquivalenceUtil68   static MappedId MapState(StateId s, int32 which_fst) {
69     return
70       (kNoStateId == s)
71       ?
72       kDeadState
73       :
74       (static_cast<MappedId>(s) << 1) + which_fst;
75   }
76   // Maps set ID to State ID.
UnMapStateEquivalenceUtil77   static StateId UnMapState(MappedId id) {
78     return static_cast<StateId>((--id) >> 1);
79   }
80   // Convenience function: checks if state with MappedId 's' is final
81   // in acceptor 'fa'.
IsFinalEquivalenceUtil82   static bool IsFinal(const Fst<Arc> &fa, MappedId s) {
83     return
84       (kDeadState == s) ?
85       false : (fa.Final(UnMapState(s)) != Weight::Zero());
86   }
87   // Convenience function: returns the representative of 'id' in 'sets',
88   // creating a new set if needed.
FindSetEquivalenceUtil89   static MappedId FindSet(UnionFind<MappedId> *sets, MappedId id) {
90     MappedId repr = sets->FindSet(id);
91     if (repr != kInvalidId) {
92       return repr;
93     } else {
94       sets->MakeSet(id);
95       return id;
96     }
97   }
98 };
99 
100 template <class Arc> const
101 typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kDeadState;
102 
103 template <class Arc> const
104 typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kInvalidId;
105 
106 
107 // Equivalence checking algorithm: determines if the two FSTs
108 // <code>fst1</code> and <code>fst2</code> are equivalent. The input
109 // FSTs must be deterministic input-side epsilon-free acceptors,
110 // unweighted or with weights over a left semiring. Two acceptors are
111 // considered equivalent if they accept exactly the same set of
112 // strings (with the same weights).
113 //
114 // The algorithm (cf. Aho, Hopcroft and Ullman, "The Design and
115 // Analysis of Computer Programs") successively constructs sets of
116 // states that can be reached by the same prefixes, starting with a
117 // set containing the start states of both acceptors. A disjoint tree
118 // forest (the union-find algorithm) is used to represent the sets of
119 // states. The algorithm returns 'false' if one of the constructed
120 // sets contains both final and non-final states. Returns optional error
121 // value (when FLAGS_error_fatal = false).
122 //
123 // Complexity: quasi-linear, i.e. O(n G(n)), where
124 //   n = |S1| + |S2| is the number of states in both acceptors
125 //   G(n) is a very slowly growing function that can be approximated
126 //        by 4 by all practical purposes.
127 //
128 template <class Arc>
129 bool Equivalent(const Fst<Arc> &fst1,
130                 const Fst<Arc> &fst2,
131                 double delta = kDelta, bool *error = 0) {
132   typedef typename Arc::Weight Weight;
133   if (error) *error = false;
134 
135   // Check that the symbol table are compatible
136   if (!CompatSymbols(fst1.InputSymbols(), fst2.InputSymbols()) ||
137       !CompatSymbols(fst1.OutputSymbols(), fst2.OutputSymbols())) {
138     FSTERROR() << "Equivalent: input/output symbol tables of 1st argument "
139                << "do not match input/output symbol tables of 2nd argument";
140     if (error) *error = true;
141     return false;
142   }
143   // Check properties first:
144   uint64 props = kNoEpsilons | kIDeterministic | kAcceptor;
145   if (fst1.Properties(props, true) != props) {
146     FSTERROR() << "Equivalent: first argument not an"
147                << " epsilon-free deterministic acceptor";
148     if (error) *error = true;
149     return false;
150   }
151   if (fst2.Properties(props, true) != props) {
152     FSTERROR() << "Equivalent: second argument not an"
153                << " epsilon-free deterministic acceptor";
154     if (error) *error = true;
155     return false;
156   }
157 
158   if ((fst1.Properties(kUnweighted , true) != kUnweighted)
159       || (fst2.Properties(kUnweighted , true) != kUnweighted)) {
160     VectorFst<Arc> efst1(fst1);
161     VectorFst<Arc> efst2(fst2);
162     Push(&efst1, REWEIGHT_TO_INITIAL, delta);
163     Push(&efst2, REWEIGHT_TO_INITIAL, delta);
164     ArcMap(&efst1, QuantizeMapper<Arc>(delta));
165     ArcMap(&efst2, QuantizeMapper<Arc>(delta));
166     EncodeMapper<Arc> mapper(kEncodeWeights|kEncodeLabels, ENCODE);
167     ArcMap(&efst1, &mapper);
168     ArcMap(&efst2, &mapper);
169     return Equivalent(efst1, efst2);
170   }
171 
172   // Convenience typedefs:
173   typedef typename Arc::StateId StateId;
174   typedef EquivalenceUtil<Arc> Util;
175   typedef typename Util::MappedId MappedId;
176   enum { FST1 = 1, FST2 = 2 };  // Required by Util::MapState(...)
177 
178   MappedId s1 = Util::MapState(fst1.Start(), FST1);
179   MappedId s2 = Util::MapState(fst2.Start(), FST2);
180 
181   // The union-find structure.
182   UnionFind<MappedId> eq_classes(1000, Util::kInvalidId);
183 
184   // Initialize the union-find structure.
185   eq_classes.MakeSet(s1);
186   eq_classes.MakeSet(s2);
187 
188   // Data structure for the (partial) acceptor transition function of
189   // fst1 and fst2: input labels mapped to pairs of MappedId's
190   // representing destination states of the corresponding arcs in fst1
191   // and fst2, respectively.
192   typedef
193     unordered_map<typename Arc::Label, pair<MappedId, MappedId> >
194     Label2StatePairMap;
195 
196   Label2StatePairMap arc_pairs;
197 
198   // Pairs of MappedId's to be processed, organized in a queue.
199   deque<pair<MappedId, MappedId> > q;
200 
201   bool ret = true;
202   // Early return if the start states differ w.r.t. being final.
203   if (Util::IsFinal(fst1, s1) != Util::IsFinal(fst2, s2)) {
204     ret = false;
205   }
206 
207   // Main loop: explores the two acceptors in a breadth-first manner,
208   // updating the equivalence relation on the statesets. Loop
209   // invariant: each block of states contains either final states only
210   // or non-final states only.
211   for (q.push_back(make_pair(s1, s2)); ret && !q.empty(); q.pop_front()) {
212     s1 = q.front().first;
213     s2 = q.front().second;
214 
215     // Representatives of the equivalence classes of s1/s2.
216     MappedId rep1 = Util::FindSet(&eq_classes, s1);
217     MappedId rep2 = Util::FindSet(&eq_classes, s2);
218 
219     if (rep1 != rep2) {
220       eq_classes.Union(rep1, rep2);
221       arc_pairs.clear();
222 
223       // Copy outgoing arcs starting at s1 into the hashtable.
224       if (Util::kDeadState != s1) {
225         ArcIterator<Fst<Arc> > arc_iter(fst1, Util::UnMapState(s1));
226         for (; !arc_iter.Done(); arc_iter.Next()) {
227           const Arc &arc = arc_iter.Value();
228           if (arc.weight != Weight::Zero()) {  // Zero-weight arcs
229                                                    // are treated as
230                                                    // non-exisitent.
231             arc_pairs[arc.ilabel].first = Util::MapState(arc.nextstate, FST1);
232           }
233         }
234       }
235       // Copy outgoing arcs starting at s2 into the hashtable.
236       if (Util::kDeadState != s2) {
237         ArcIterator<Fst<Arc> > arc_iter(fst2, Util::UnMapState(s2));
238         for (; !arc_iter.Done(); arc_iter.Next()) {
239           const Arc &arc = arc_iter.Value();
240           if (arc.weight != Weight::Zero()) {  // Zero-weight arcs
241                                                    // are treated as
242                                                    // non-existent.
243             arc_pairs[arc.ilabel].second = Util::MapState(arc.nextstate, FST2);
244           }
245         }
246       }
247       // Iterate through the hashtable and process pairs of target
248       // states.
249       for (typename Label2StatePairMap::const_iterator
250              arc_iter = arc_pairs.begin();
251            arc_iter != arc_pairs.end();
252            ++arc_iter) {
253         const pair<MappedId, MappedId> &p = arc_iter->second;
254         if (Util::IsFinal(fst1, p.first) != Util::IsFinal(fst2, p.second)) {
255           // Detected inconsistency: return false.
256           ret = false;
257           break;
258         }
259         q.push_back(p);
260       }
261     }
262   }
263 
264   if (fst1.Properties(kError, false) || fst2.Properties(kError, false)) {
265     if (error) *error = true;
266     return false;
267   }
268 
269   return ret;
270 }
271 
272 }  // namespace fst
273 
274 #endif  // FST_LIB_EQUIVALENT_H__
275