1 // compose.h
2
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Class to compute the composition of two FSTs
20
21 #ifndef FST_LIB_COMPOSE_H__
22 #define FST_LIB_COMPOSE_H__
23
24 #include <algorithm>
25 #include <string>
26 #include <vector>
27 using std::vector;
28
29 #include <fst/cache.h>
30 #include <fst/compose-filter.h>
31 #include <fst/lookahead-filter.h>
32 #include <fst/matcher.h>
33 #include <fst/state-table.h>
34 #include <fst/test-properties.h>
35
36
37 namespace fst {
38
39 // Delayed composition options templated on the arc type, the matcher,
40 // the composition filter, and the composition state table. By
41 // default, the matchers, filter, and state table are constructed by
42 // composition. If set below, the user can instead pass in these
43 // objects; in that case, ComposeFst takes their ownership. This
44 // version controls composition implemented between generic Fst<Arc>
45 // types and a shared matcher type M for Fst<Arc>. This should be
46 // adequate for most applications, giving a reasonable tradeoff
47 // between efficiency and code sharing (but see ComposeFstImplOptions).
48 template <class A,
49 class M = Matcher<Fst<A> >,
50 class F = SequenceComposeFilter<M>,
51 class T = GenericComposeStateTable<A, typename F::FilterState> >
52 struct ComposeFstOptions : public CacheOptions {
53 M *matcher1; // FST1 matcher (see matcher.h)
54 M *matcher2; // FST2 matcher
55 F *filter; // Composition filter (see compose-filter.h)
56 T *state_table; // Composition state table (see compose-state-table.h)
57
58 explicit ComposeFstOptions(const CacheOptions &opts,
59 M *mat1 = 0, M *mat2 = 0,
60 F *filt = 0, T *sttable= 0)
CacheOptionsComposeFstOptions61 : CacheOptions(opts), matcher1(mat1), matcher2(mat2),
62 filter(filt), state_table(sttable) {}
63
ComposeFstOptionsComposeFstOptions64 ComposeFstOptions() : matcher1(0), matcher2(0), filter(0), state_table(0) {}
65 };
66
67
68 // Delayed composition options templated on the two matcher types, the
69 // composition filter, and the composition state table. By default,
70 // the matchers, filter, and state table are constructed by
71 // composition. If set below, the user can instead pass in these
72 // objects; in that case, ComposeFst takes their ownership. This
73 // version controls composition implemented using arbitrary matchers
74 // (of the same Arc type but otherwise arbitrary Fst type). The user
75 // must ensure the matchers are compatible. These options permit the
76 // most efficient use, but shares the least code. This is for advanced
77 // use only in the most demanding or specialized applications that can
78 // benefit from it (o.w. prefer ComposeFstOptions).
79 template <class M1, class M2,
80 class F = SequenceComposeFilter<M1, M2>,
81 class T = GenericComposeStateTable<typename M1::Arc,
82 typename F::FilterState> >
83 struct ComposeFstImplOptions : public CacheOptions {
84 M1 *matcher1; // FST1 matcher (see matcher.h)
85 M2 *matcher2; // FST2 matcher
86 F *filter; // Composition filter (see compose-filter.h)
87 T *state_table; // Composition state table (see compose-state-table.h)
88
89 explicit ComposeFstImplOptions(const CacheOptions &opts,
90 M1 *mat1 = 0, M2 *mat2 = 0,
91 F *filt = 0, T *sttable= 0)
CacheOptionsComposeFstImplOptions92 : CacheOptions(opts), matcher1(mat1), matcher2(mat2),
93 filter(filt), state_table(sttable) {}
94
ComposeFstImplOptionsComposeFstImplOptions95 ComposeFstImplOptions()
96 : matcher1(0), matcher2(0), filter(0), state_table(0) {}
97 };
98
99
100 // Implementation of delayed composition. This base class is
101 // common to the variants with different matchers, composition filters
102 // and state tables.
103 template <class A>
104 class ComposeFstImplBase : public CacheImpl<A> {
105 public:
106 using FstImpl<A>::SetType;
107 using FstImpl<A>::SetProperties;
108 using FstImpl<A>::Properties;
109 using FstImpl<A>::SetInputSymbols;
110 using FstImpl<A>::SetOutputSymbols;
111
112 using CacheBaseImpl< CacheState<A> >::HasStart;
113 using CacheBaseImpl< CacheState<A> >::HasFinal;
114 using CacheBaseImpl< CacheState<A> >::HasArcs;
115 using CacheBaseImpl< CacheState<A> >::SetFinal;
116 using CacheBaseImpl< CacheState<A> >::SetStart;
117
118 typedef typename A::Label Label;
119 typedef typename A::Weight Weight;
120 typedef typename A::StateId StateId;
121 typedef CacheState<A> State;
122
ComposeFstImplBase(const Fst<A> & fst1,const Fst<A> & fst2,const CacheOptions & opts)123 ComposeFstImplBase(const Fst<A> &fst1, const Fst<A> &fst2,
124 const CacheOptions &opts)
125 :CacheImpl<A>(opts) {
126 VLOG(2) << "ComposeFst(" << this << "): Begin";
127 SetType("compose");
128
129 if (!CompatSymbols(fst2.InputSymbols(), fst1.OutputSymbols())) {
130 FSTERROR() << "ComposeFst: output symbol table of 1st argument "
131 << "does not match input symbol table of 2nd argument";
132 SetProperties(kError, kError);
133 }
134
135 SetInputSymbols(fst1.InputSymbols());
136 SetOutputSymbols(fst2.OutputSymbols());
137 }
138
ComposeFstImplBase(const ComposeFstImplBase<A> & impl)139 ComposeFstImplBase(const ComposeFstImplBase<A> &impl)
140 : CacheImpl<A>(impl) {
141 SetProperties(impl.Properties(), kCopyProperties);
142 SetInputSymbols(impl.InputSymbols());
143 SetOutputSymbols(impl.OutputSymbols());
144 }
145
146 virtual ComposeFstImplBase<A> *Copy() = 0;
147
~ComposeFstImplBase()148 virtual ~ComposeFstImplBase() {}
149
Start()150 StateId Start() {
151 if (!HasStart()) {
152 StateId start = ComputeStart();
153 if (start != kNoStateId) {
154 SetStart(start);
155 }
156 }
157 return CacheImpl<A>::Start();
158 }
159
Final(StateId s)160 Weight Final(StateId s) {
161 if (!HasFinal(s)) {
162 Weight final = ComputeFinal(s);
163 SetFinal(s, final);
164 }
165 return CacheImpl<A>::Final(s);
166 }
167
168 virtual void Expand(StateId s) = 0;
169
NumArcs(StateId s)170 size_t NumArcs(StateId s) {
171 if (!HasArcs(s))
172 Expand(s);
173 return CacheImpl<A>::NumArcs(s);
174 }
175
NumInputEpsilons(StateId s)176 size_t NumInputEpsilons(StateId s) {
177 if (!HasArcs(s))
178 Expand(s);
179 return CacheImpl<A>::NumInputEpsilons(s);
180 }
181
NumOutputEpsilons(StateId s)182 size_t NumOutputEpsilons(StateId s) {
183 if (!HasArcs(s))
184 Expand(s);
185 return CacheImpl<A>::NumOutputEpsilons(s);
186 }
187
InitArcIterator(StateId s,ArcIteratorData<A> * data)188 void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
189 if (!HasArcs(s))
190 Expand(s);
191 CacheImpl<A>::InitArcIterator(s, data);
192 }
193
194 protected:
195 virtual StateId ComputeStart() = 0;
196 virtual Weight ComputeFinal(StateId s) = 0;
197 };
198
199
200 // Implementaion of delayed composition templated on the matchers (see
201 // matcher.h), composition filter (see compose-filter-inl.h) and
202 // the composition state table (see compose-state-table.h).
203 template <class M1, class M2, class F, class T>
204 class ComposeFstImpl : public ComposeFstImplBase<typename M1::Arc> {
205 typedef typename M1::FST FST1;
206 typedef typename M2::FST FST2;
207 typedef typename M1::Arc Arc;
208 typedef typename Arc::StateId StateId;
209 typedef typename Arc::Label Label;
210 typedef typename Arc::Weight Weight;
211 typedef typename F::FilterState FilterState;
212 typedef typename F::Matcher1 Matcher1;
213 typedef typename F::Matcher2 Matcher2;
214
215 using CacheBaseImpl<CacheState<Arc> >::SetArcs;
216 using FstImpl<Arc>::SetType;
217 using FstImpl<Arc>::SetProperties;
218
219 typedef ComposeStateTuple<StateId, FilterState> StateTuple;
220
221 public:
222 ComposeFstImpl(const FST1 &fst1, const FST2 &fst2,
223 const ComposeFstImplOptions<M1, M2, F, T> &opts);
224
ComposeFstImpl(const ComposeFstImpl<M1,M2,F,T> & impl)225 ComposeFstImpl(const ComposeFstImpl<M1, M2, F, T> &impl)
226 : ComposeFstImplBase<Arc>(impl),
227 filter_(new F(*impl.filter_, true)),
228 matcher1_(filter_->GetMatcher1()),
229 matcher2_(filter_->GetMatcher2()),
230 fst1_(matcher1_->GetFst()),
231 fst2_(matcher2_->GetFst()),
232 state_table_(new T(*impl.state_table_)),
233 match_type_(impl.match_type_) {}
234
~ComposeFstImpl()235 ~ComposeFstImpl() {
236 VLOG(2) << "ComposeFst(" << this
237 << "): End: # of visited states: " << state_table_->Size();
238
239 delete filter_;
240 delete state_table_;
241 }
242
Copy()243 virtual ComposeFstImpl<M1, M2, F, T> *Copy() {
244 return new ComposeFstImpl<M1, M2, F, T>(*this);
245 }
246
Properties()247 uint64 Properties() const { return Properties(kFstProperties); }
248
249 // Set error if found; return FST impl properties.
Properties(uint64 mask)250 uint64 Properties(uint64 mask) const {
251 if ((mask & kError) &&
252 (fst1_.Properties(kError, false) ||
253 fst2_.Properties(kError, false) ||
254 (matcher1_->Properties(0) & kError) ||
255 (matcher2_->Properties(0) & kError) |
256 (filter_->Properties(0) & kError) ||
257 state_table_->Error())) {
258 SetProperties(kError, kError);
259 }
260 return FstImpl<Arc>::Properties(mask);
261 }
262
263 // Arranges it so that the first arg to OrderedExpand is the Fst
264 // that will be matched on.
Expand(StateId s)265 void Expand(StateId s) {
266 const StateTuple &tuple = state_table_->Tuple(s);
267 StateId s1 = tuple.state_id1;
268 StateId s2 = tuple.state_id2;
269 filter_->SetState(s1, s2, tuple.filter_state);
270 if (match_type_ == MATCH_OUTPUT ||
271 (match_type_ == MATCH_BOTH &&
272 internal::NumArcs(fst1_, s1) > internal::NumArcs(fst2_, s2)))
273 OrderedExpand(s, fst1_, s1, fst2_, s2, matcher1_, false);
274 else
275 OrderedExpand(s, fst2_, s2, fst1_, s1, matcher2_, true);
276 }
277
278 private:
279 // This does that actual matching of labels in the composition. The
280 // arguments are ordered so matching is called on state 'sa' of
281 // 'fsta' for each arc leaving state 'sb' of 'fstb'. The 'match_input' arg
282 // determines whether the input or output label of arcs at 'sb' is
283 // the one to match on.
284 template <class FST, class Matcher>
OrderedExpand(StateId s,const Fst<Arc> &,StateId sa,const FST & fstb,StateId sb,Matcher * matchera,bool match_input)285 void OrderedExpand(StateId s, const Fst<Arc> &, StateId sa,
286 const FST &fstb, StateId sb,
287 Matcher *matchera, bool match_input) {
288 matchera->SetState(sa);
289
290 // First process non-consuming symbols (e.g., epsilons) on FSTA.
291 Arc loop(match_input ? 0 : kNoLabel, match_input ? kNoLabel : 0,
292 Weight::One(), sb);
293 MatchArc(s, matchera, loop, match_input);
294
295 // Then process matches on FSTB.
296 for (ArcIterator<FST> iterb(fstb, sb); !iterb.Done(); iterb.Next())
297 MatchArc(s, matchera, iterb.Value(), match_input);
298
299 SetArcs(s);
300 }
301
302 // Matches a single transition from 'fstb' against 'fata' at 's'.
303 template <class Matcher>
MatchArc(StateId s,Matcher * matchera,const Arc & arc,bool match_input)304 void MatchArc(StateId s, Matcher *matchera,
305 const Arc &arc, bool match_input) {
306 if (matchera->Find(match_input ? arc.olabel : arc.ilabel)) {
307 for (; !matchera->Done(); matchera->Next()) {
308 Arc arca = matchera->Value();
309 Arc arcb = arc;
310 if (match_input) {
311 const FilterState &f = filter_->FilterArc(&arcb, &arca);
312 if (f != FilterState::NoState())
313 AddArc(s, arcb, arca, f);
314 } else {
315 const FilterState &f = filter_->FilterArc(&arca, &arcb);
316 if (f != FilterState::NoState())
317 AddArc(s, arca, arcb, f);
318 }
319 }
320 }
321 }
322
323 // Add a matching transition at 's'.
AddArc(StateId s,const Arc & arc1,const Arc & arc2,const FilterState & f)324 void AddArc(StateId s, const Arc &arc1, const Arc &arc2,
325 const FilterState &f) {
326 StateTuple tuple(arc1.nextstate, arc2.nextstate, f);
327 Arc oarc(arc1.ilabel, arc2.olabel, Times(arc1.weight, arc2.weight),
328 state_table_->FindState(tuple));
329 CacheImpl<Arc>::PushArc(s, oarc);
330 }
331
ComputeStart()332 StateId ComputeStart() {
333 StateId s1 = fst1_.Start();
334 if (s1 == kNoStateId)
335 return kNoStateId;
336
337 StateId s2 = fst2_.Start();
338 if (s2 == kNoStateId)
339 return kNoStateId;
340
341 const FilterState &f = filter_->Start();
342 StateTuple tuple(s1, s2, f);
343 return state_table_->FindState(tuple);
344 }
345
ComputeFinal(StateId s)346 Weight ComputeFinal(StateId s) {
347 const StateTuple &tuple = state_table_->Tuple(s);
348 StateId s1 = tuple.state_id1;
349 Weight final1 = internal::Final(fst1_, s1);
350 if (final1 == Weight::Zero())
351 return final1;
352
353 StateId s2 = tuple.state_id2;
354 Weight final2 = internal::Final(fst2_, s2);
355 if (final2 == Weight::Zero())
356 return final2;
357
358 filter_->SetState(s1, s2, tuple.filter_state);
359 filter_->FilterFinal(&final1, &final2);
360 return Times(final1, final2);
361 }
362
363 F *filter_;
364 Matcher1 *matcher1_;
365 Matcher2 *matcher2_;
366 const FST1 &fst1_;
367 const FST2 &fst2_;
368 T *state_table_;
369
370 MatchType match_type_;
371
372 void operator=(const ComposeFstImpl<M1, M2, F, T> &); // disallow
373 };
374
375 template <class M1, class M2, class F, class T> inline
ComposeFstImpl(const FST1 & fst1,const FST2 & fst2,const ComposeFstImplOptions<M1,M2,F,T> & opts)376 ComposeFstImpl<M1, M2, F, T>::ComposeFstImpl(
377 const FST1 &fst1, const FST2 &fst2,
378 const ComposeFstImplOptions<M1, M2, F, T> &opts)
379 : ComposeFstImplBase<Arc>(fst1, fst2, opts),
380 filter_(opts.filter ? opts.filter :
381 new F(fst1, fst2, opts.matcher1, opts.matcher2)),
382 matcher1_(filter_->GetMatcher1()),
383 matcher2_(filter_->GetMatcher2()),
384 fst1_(matcher1_->GetFst()),
385 fst2_(matcher2_->GetFst()),
386 state_table_(opts.state_table ? opts.state_table :
387 new T(fst1_, fst2_)) {
388 MatchType type1 = matcher1_->Type(false);
389 MatchType type2 = matcher2_->Type(false);
390 if (type1 == MATCH_OUTPUT && type2 == MATCH_INPUT) {
391 match_type_ = MATCH_BOTH;
392 } else if (type1 == MATCH_OUTPUT) {
393 match_type_ = MATCH_OUTPUT;
394 } else if (type2 == MATCH_INPUT) {
395 match_type_ = MATCH_INPUT;
396 } else if (matcher1_->Type(true) == MATCH_OUTPUT) {
397 match_type_ = MATCH_OUTPUT;
398 } else if (matcher2_->Type(true) == MATCH_INPUT) {
399 match_type_ = MATCH_INPUT;
400 } else {
401 FSTERROR() << "ComposeFst: 1st argument cannot match on output labels "
402 << "and 2nd argument cannot match on input labels (sort?).";
403 SetProperties(kError, kError);
404 }
405 uint64 fprops1 = fst1.Properties(kFstProperties, false);
406 uint64 fprops2 = fst2.Properties(kFstProperties, false);
407 uint64 mprops1 = matcher1_->Properties(fprops1);
408 uint64 mprops2 = matcher2_->Properties(fprops2);
409 uint64 cprops = ComposeProperties(mprops1, mprops2);
410 SetProperties(filter_->Properties(cprops), kCopyProperties);
411 if (state_table_->Error()) SetProperties(kError, kError);
412 VLOG(2) << "ComposeFst(" << this << "): Initialized";
413 }
414
415
416 // Computes the composition of two transducers. This version is a
417 // delayed Fst. If FST1 transduces string x to y with weight a and FST2
418 // transduces y to z with weight b, then their composition transduces
419 // string x to z with weight Times(x, z).
420 //
421 // The output labels of the first transducer or the input labels of
422 // the second transducer must be sorted (with the default matcher).
423 // The weights need to form a commutative semiring (valid for
424 // TropicalWeight and LogWeight).
425 //
426 // Complexity:
427 // Assuming the first FST is unsorted and the second is sorted:
428 // - Time: O(v1 v2 d1 (log d2 + m2)),
429 // - Space: O(v1 v2)
430 // where vi = # of states visited, di = maximum out-degree, and mi the
431 // maximum multiplicity of the states visited for the ith
432 // FST. Constant time and space to visit an input state or arc is
433 // assumed and exclusive of caching.
434 //
435 // Caveats:
436 // - ComposeFst does not trim its output (since it is a delayed operation).
437 // - The efficiency of composition can be strongly affected by several factors:
438 // - the choice of which tnansducer is sorted - prefer sorting the FST
439 // that has the greater average out-degree.
440 // - the amount of non-determinism
441 // - the presence and location of epsilon transitions - avoid epsilon
442 // transitions on the output side of the first transducer or
443 // the input side of the second transducer or prefer placing
444 // them later in a path since they delay matching and can
445 // introduce non-coaccessible states and transitions.
446 //
447 // This class attaches interface to implementation and handles
448 // reference counting, delegating most methods to ImplToFst.
449 template <class A>
450 class ComposeFst : public ImplToFst< ComposeFstImplBase<A> > {
451 public:
452 friend class ArcIterator< ComposeFst<A> >;
453 friend class StateIterator< ComposeFst<A> >;
454
455 typedef A Arc;
456 typedef typename A::Weight Weight;
457 typedef typename A::StateId StateId;
458 typedef CacheState<A> State;
459 typedef ComposeFstImplBase<A> Impl;
460
461 using ImplToFst<Impl>::SetImpl;
462
463 // Compose specifying only caching options.
464 ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2,
465 const CacheOptions &opts = CacheOptions())
CreateBase(fst1,fst2,opts)466 : ImplToFst<Impl>(CreateBase(fst1, fst2, opts)) {}
467
468 // Compose specifying one shared matcher type M. Requires input
469 // Fsts and matcher FST type (M::FST) be Fst<A>. Recommended for
470 // best code-sharing and matcher compatiblity.
471 template <class M, class F, class T>
ComposeFst(const Fst<A> & fst1,const Fst<A> & fst2,const ComposeFstOptions<A,M,F,T> & opts)472 ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2,
473 const ComposeFstOptions<A, M, F, T> &opts)
474 : ImplToFst<Impl>(CreateBase1(fst1, fst2, opts)) {}
475
476 // Compose specifying two matcher types M1 and M2. Requires input
477 // Fsts (of the same Arc type but o.w. arbitrary) match the
478 // corresponding matcher FST types (M1::FST, M2::FST). Recommended
479 // only for advanced use in demanding or specialized applications
480 // due to potential code bloat and matcher incompatibilities.
481 template <class M1, class M2, class F, class T>
ComposeFst(const typename M1::FST & fst1,const typename M2::FST & fst2,const ComposeFstImplOptions<M1,M2,F,T> & opts)482 ComposeFst(const typename M1::FST &fst1, const typename M2::FST &fst2,
483 const ComposeFstImplOptions<M1, M2, F, T> &opts)
484 : ImplToFst<Impl>(CreateBase2(fst1, fst2, opts)) {}
485
486 // See Fst<>::Copy() for doc.
487 ComposeFst(const ComposeFst<A> &fst, bool safe = false) {
488 if (safe)
489 SetImpl(fst.GetImpl()->Copy());
490 else
491 SetImpl(fst.GetImpl(), false);
492 }
493
494 // Get a copy of this ComposeFst. See Fst<>::Copy() for further doc.
495 virtual ComposeFst<A> *Copy(bool safe = false) const {
496 return new ComposeFst<A>(*this, safe);
497 }
498
499 virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
500
InitArcIterator(StateId s,ArcIteratorData<A> * data)501 virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
502 GetImpl()->InitArcIterator(s, data);
503 }
504
505 protected:
ComposeFst()506 ComposeFst() {}
507
508 // Create compose implementation specifying two matcher types.
509 template <class M1, class M2, class F, class T>
CreateBase2(const typename M1::FST & fst1,const typename M2::FST & fst2,const ComposeFstImplOptions<M1,M2,F,T> & opts)510 static Impl *CreateBase2(
511 const typename M1::FST &fst1, const typename M2::FST &fst2,
512 const ComposeFstImplOptions<M1, M2, F, T> &opts) {
513 Impl *impl = new ComposeFstImpl<M1, M2, F, T>(fst1, fst2, opts);
514 if (!(Weight::Properties() & kCommutative)) {
515 int64 props1 = fst1.Properties(kUnweighted, true);
516 int64 props2 = fst2.Properties(kUnweighted, true);
517 if (!(props1 & kUnweighted) && !(props2 & kUnweighted)) {
518 FSTERROR() << "ComposeFst: Weights must be a commutative semiring: "
519 << Weight::Type();
520 impl->SetProperties(kError, kError);
521 }
522 }
523 return impl;
524 }
525
526 // Create compose implementation specifying one matcher type.
527 // Requires input Fsts and matcher FST type (M::FST) be Fst<A>
528 template <class M, class F, class T>
CreateBase1(const Fst<A> & fst1,const Fst<A> & fst2,const ComposeFstOptions<A,M,F,T> & opts)529 static Impl *CreateBase1(const Fst<A> &fst1, const Fst<A> &fst2,
530 const ComposeFstOptions<A, M, F, T> &opts) {
531 ComposeFstImplOptions<M, M, F, T> nopts(opts, opts.matcher1, opts.matcher2,
532 opts.filter, opts.state_table);
533 return CreateBase2(fst1, fst2, nopts);
534 }
535
536 // Create compose implementation specifying no matcher type.
CreateBase(const Fst<A> & fst1,const Fst<A> & fst2,const CacheOptions & opts)537 static Impl *CreateBase(const Fst<A> &fst1, const Fst<A> &fst2,
538 const CacheOptions &opts) {
539 switch (LookAheadMatchType(fst1, fst2)) { // Check for lookahead matchers
540 default:
541 case MATCH_NONE: { // Default composition (no look-ahead)
542 ComposeFstOptions<Arc> nopts(opts);
543 return CreateBase1(fst1, fst2, nopts);
544 }
545 case MATCH_OUTPUT: { // Lookahead on fst1
546 typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::FstMatcher M;
547 typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::ComposeFilter F;
548 ComposeFstOptions<Arc, M, F> nopts(opts);
549 return CreateBase1(fst1, fst2, nopts);
550 }
551 case MATCH_INPUT: { // Lookahead on fst2
552 typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::FstMatcher M;
553 typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::ComposeFilter F;
554 ComposeFstOptions<Arc, M, F> nopts(opts);
555 return CreateBase1(fst1, fst2, nopts);
556 }
557 }
558 }
559
560 private:
561 // Makes visible to friends.
GetImpl()562 Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
563
564 void operator=(const ComposeFst<A> &fst); // disallow
565 };
566
567
568 // Specialization for ComposeFst.
569 template<class A>
570 class StateIterator< ComposeFst<A> >
571 : public CacheStateIterator< ComposeFst<A> > {
572 public:
StateIterator(const ComposeFst<A> & fst)573 explicit StateIterator(const ComposeFst<A> &fst)
574 : CacheStateIterator< ComposeFst<A> >(fst, fst.GetImpl()) {}
575 };
576
577
578 // Specialization for ComposeFst.
579 template <class A>
580 class ArcIterator< ComposeFst<A> >
581 : public CacheArcIterator< ComposeFst<A> > {
582 public:
583 typedef typename A::StateId StateId;
584
ArcIterator(const ComposeFst<A> & fst,StateId s)585 ArcIterator(const ComposeFst<A> &fst, StateId s)
586 : CacheArcIterator< ComposeFst<A> >(fst.GetImpl(), s) {
587 if (!fst.GetImpl()->HasArcs(s))
588 fst.GetImpl()->Expand(s);
589 }
590
591 private:
592 DISALLOW_COPY_AND_ASSIGN(ArcIterator);
593 };
594
595 template <class A> inline
InitStateIterator(StateIteratorData<A> * data)596 void ComposeFst<A>::InitStateIterator(StateIteratorData<A> *data) const {
597 data->base = new StateIterator< ComposeFst<A> >(*this);
598 }
599
600 // Useful alias when using StdArc.
601 typedef ComposeFst<StdArc> StdComposeFst;
602
603 enum ComposeFilter { AUTO_FILTER, SEQUENCE_FILTER, ALT_SEQUENCE_FILTER,
604 MATCH_FILTER };
605
606 struct ComposeOptions {
607 bool connect; // Connect output
608 ComposeFilter filter_type; // Which pre-defined filter to use
609
610 ComposeOptions(bool c, ComposeFilter ft = AUTO_FILTER)
connectComposeOptions611 : connect(c), filter_type(ft) {}
ComposeOptionsComposeOptions612 ComposeOptions() : connect(true), filter_type(AUTO_FILTER) {}
613 };
614
615 // Computes the composition of two transducers. This version writes
616 // the composed FST into a MurableFst. If FST1 transduces string x to
617 // y with weight a and FST2 transduces y to z with weight b, then
618 // their composition transduces string x to z with weight
619 // Times(x, z).
620 //
621 // The output labels of the first transducer or the input labels of
622 // the second transducer must be sorted. The weights need to form a
623 // commutative semiring (valid for TropicalWeight and LogWeight).
624 //
625 // Complexity:
626 // Assuming the first FST is unsorted and the second is sorted:
627 // - Time: O(V1 V2 D1 (log D2 + M2)),
628 // - Space: O(V1 V2 D1 M2)
629 // where Vi = # of states, Di = maximum out-degree, and Mi is
630 // the maximum multiplicity for the ith FST.
631 //
632 // Caveats:
633 // - Compose trims its output.
634 // - The efficiency of composition can be strongly affected by several factors:
635 // - the choice of which tnansducer is sorted - prefer sorting the FST
636 // that has the greater average out-degree.
637 // - the amount of non-determinism
638 // - the presence and location of epsilon transitions - avoid epsilon
639 // transitions on the output side of the first transducer or
640 // the input side of the second transducer or prefer placing
641 // them later in a path since they delay matching and can
642 // introduce non-coaccessible states and transitions.
643 template<class Arc>
644 void Compose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2,
645 MutableFst<Arc> *ofst,
646 const ComposeOptions &opts = ComposeOptions()) {
647 typedef Matcher< Fst<Arc> > M;
648
649 if (opts.filter_type == AUTO_FILTER) {
650 CacheOptions nopts;
651 nopts.gc_limit = 0; // Cache only the last state for fastest copy.
652 *ofst = ComposeFst<Arc>(ifst1, ifst2, nopts);
653 } else if (opts.filter_type == SEQUENCE_FILTER) {
654 ComposeFstOptions<Arc> copts;
655 copts.gc_limit = 0; // Cache only the last state for fastest copy.
656 *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
657 } else if (opts.filter_type == ALT_SEQUENCE_FILTER) {
658 ComposeFstOptions<Arc, M, AltSequenceComposeFilter<M> > copts;
659 copts.gc_limit = 0; // Cache only the last state for fastest copy.
660 *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
661 } else if (opts.filter_type == MATCH_FILTER) {
662 ComposeFstOptions<Arc, M, MatchComposeFilter<M> > copts;
663 copts.gc_limit = 0; // Cache only the last state for fastest copy.
664 *ofst = ComposeFst<Arc>(ifst1, ifst2, copts);
665 }
666
667 if (opts.connect)
668 Connect(ofst);
669 }
670
671 } // namespace fst
672
673 #endif // FST_LIB_COMPOSE_H__
674