• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // string-weight.h
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 //
16 // \file
17 // String weight set and associated semiring operation definitions.
18 
19 #ifndef FST_LIB_STRING_WEIGHT_H__
20 #define FST_LIB_STRING_WEIGHT_H__
21 
22 #include <list>
23 
24 #include "fst/lib/product-weight.h"
25 #include "fst/lib/weight.h"
26 
27 namespace fst {
28 
29 const int kStringInfinity = -1;      // Label for the infinite string
30 const int kStringBad = -2;           // Label for a non-string
31 const char kStringSeparator = '_';   // Label separator in strings
32 
33 // Determines whether to use left or right string semiring.  Includes
34 // restricted versions that signal an error if proper prefixes
35 // (suffixes) would otherwise be returned by Plus, useful with various
36 // algorithms that require functional transducer input with the
37 // string semirings.
38 enum StringType { STRING_LEFT = 0, STRING_RIGHT = 1 ,
39                   STRING_LEFT_RESTRICT = 2, STRING_RIGHT_RESTRICT };
40 
41 #define REVERSE_STRING_TYPE(S)                                  \
42    ((S) == STRING_LEFT ? STRING_RIGHT :                         \
43     ((S) == STRING_RIGHT ? STRING_LEFT :                        \
44      ((S) == STRING_LEFT_RESTRICT ? STRING_RIGHT_RESTRICT :     \
45       STRING_LEFT_RESTRICT)))
46 
47 template <typename L, StringType S = STRING_LEFT>
48 class StringWeight;
49 
50 template <typename L, StringType S = STRING_LEFT>
51 class StringWeightIterator;
52 
53 template <typename L, StringType S = STRING_LEFT>
54 class StringWeightReverseIterator;
55 
56 template <typename L, StringType S>
57 bool operator==(const StringWeight<L, S> &,  const StringWeight<L, S> &);
58 
59 
60 // String semiring: (longest_common_prefix/suffix, ., Infinity, Epsilon)
61 template <typename L, StringType S>
62 class StringWeight {
63  public:
64   typedef L Label;
65   typedef StringWeight<L, REVERSE_STRING_TYPE(S)> ReverseWeight;
66 
67   friend class StringWeightIterator<L, S>;
68   friend class StringWeightReverseIterator<L, S>;
69   friend bool operator==<>(const StringWeight<L, S> &,
70                            const StringWeight<L, S> &);
71 
StringWeight()72   StringWeight() { Init(); }
73 
74   template <typename Iter>
StringWeight(const Iter & begin,const Iter & end)75   StringWeight(const Iter &begin, const Iter &end) {
76     Init();
77     for (Iter iter = begin; iter != end; ++iter)
78       PushBack(*iter);
79   }
80 
StringWeight(L l)81   explicit StringWeight(L l) { Init(); PushBack(l); }
82 
Zero()83   static const StringWeight<L, S> &Zero() {
84     static const StringWeight<L, S> zero(kStringInfinity);
85     return zero;
86   }
87 
One()88   static const StringWeight<L, S> &One() {
89     static const StringWeight<L, S> one;
90     return one;
91   }
92 
Type()93   static const string &Type() {
94     static const string type =
95         S == STRING_LEFT ? "string" :
96         (S == STRING_RIGHT ? "right_string" :
97          (S == STRING_LEFT_RESTRICT ? "restricted_string" :
98           "right_restricted_string"));
99     return type;
100   }
101 
102   bool Member() const;
103 
104   istream &Read(istream &strm);
105 
106   ostream &Write(ostream &strm) const;
107 
108   ssize_t Hash() const;
109 
110   StringWeight<L, S> Quantize(float delta = kDelta) const {
111     return *this;
112   }
113 
114   ReverseWeight Reverse() const;
115 
Properties()116   static uint64 Properties() {
117     return (S == STRING_LEFT || S == STRING_LEFT_RESTRICT ?
118             kLeftSemiring : kRightSemiring) | kIdempotent;
119   }
120 
121   // NB: This needs to be uncommented only if default fails for this impl.
122   // StringWeight<L, S> &operator=(const StringWeight<L, S> &w);
123 
124   // These operations combined with the StringWeightIterator and
125   // StringWeightReverseIterator provide the access and mutation of
126   // the string internal elements.
127 
128   // Common initializer among constructors.
Init()129   void Init() { first_ = 0; }
130 
131   // Clear existing StringWeight.
Clear()132   void Clear() { first_ = 0; rest_.clear(); }
133 
Size()134   Label Size() const { return first_ ? rest_.size() + 1 : 0; }
135 
PushFront(L l)136   void PushFront(L l) {
137     if (first_)
138       rest_.push_front(first_);
139     first_ = l;
140   }
141 
PushBack(L l)142   void PushBack(L l) {
143     if (!first_)
144       first_ = l;
145     else
146       rest_.push_back(l);
147   }
148 
149  private:
150   L first_;         // first label in string (0 if empty)
151   list<L> rest_;    // remaining labels in string
152 };
153 
154 
155 // Traverses string in forward direction.
156 template <typename L, StringType S>
157 class StringWeightIterator {
158  public:
StringWeightIterator(const StringWeight<L,S> & w)159   explicit StringWeightIterator(const StringWeight<L, S>& w)
160       : first_(w.first_), rest_(w.rest_), init_(true),
161         iter_(rest_.begin()) {}
162 
Done()163   bool Done() const {
164     if (init_) return first_ == 0;
165     else return iter_ == rest_.end();
166   }
167 
Value()168   const L& Value() const { return init_ ? first_ : *iter_; }
169 
Next()170   void Next() {
171     if (init_) init_ = false;
172     else  ++iter_;
173   }
174 
Reset()175   void Reset() {
176     init_ = true;
177     iter_ = rest_.begin();
178   }
179 
180  private:
181   const L &first_;
182   const list<L> &rest_;
183   bool init_;   // in the initialized state?
184   typename list<L>::const_iterator iter_;
185 
186   DISALLOW_EVIL_CONSTRUCTORS(StringWeightIterator);
187 };
188 
189 
190 // Traverses string in forward direction.
191 template <typename L, StringType S>
192 class StringWeightReverseIterator {
193  public:
StringWeightReverseIterator(const StringWeight<L,S> & w)194   explicit StringWeightReverseIterator(const StringWeight<L, S>& w)
195       : first_(w.first_), rest_(w.rest_), fin_(first_ == 0),
196         iter_(rest_.rbegin()) {}
197 
Done()198   bool Done() const { return fin_; }
199 
Value()200   const L& Value() const { return iter_ == rest_.rend() ? first_ : *iter_; }
201 
Next()202   void Next() {
203     if (iter_ == rest_.rend()) fin_ = true;
204     else  ++iter_;
205   }
206 
Reset()207   void Reset() {
208     fin_ = false;
209     iter_ = rest_.rbegin();
210   }
211 
212  private:
213   const L &first_;
214   const list<L> &rest_;
215   bool fin_;   // in the final state?
216   typename list<L>::const_reverse_iterator iter_;
217 
218   DISALLOW_EVIL_CONSTRUCTORS(StringWeightReverseIterator);
219 };
220 
221 
222 // StringWeight member functions follow that require
223 // StringWeightIterator or StringWeightReverseIterator.
224 
225 template <typename L, StringType S>
Read(istream & strm)226 inline istream &StringWeight<L, S>::Read(istream &strm) {
227   Clear();
228   int32 size = 0;
229   ReadType(strm, &size);
230   for (int i = 0; i < size; ++i) {
231     L label;
232     ReadType(strm, &label);
233     PushBack(label);
234   }
235   return strm;
236 }
237 
238 template <typename L, StringType S>
Write(ostream & strm)239 inline ostream &StringWeight<L, S>::Write(ostream &strm) const {
240   int32 size =  Size();
241   WriteType(strm, size);
242   for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next()) {
243     L label = iter.Value();
244     WriteType(strm, label);
245   }
246   return strm;
247 }
248 
249 template <typename L, StringType S>
Member()250 inline bool StringWeight<L, S>::Member() const {
251   if (Size() != 1)
252     return true;
253   StringWeightIterator<L, S> iter(*this);
254   return iter.Value() != kStringBad;
255 }
256 
257 template <typename L, StringType S>
258 inline typename StringWeight<L, S>::ReverseWeight
Reverse()259 StringWeight<L, S>::Reverse() const {
260   ReverseWeight rw;
261   for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next())
262     rw.PushFront(iter.Value());
263   return rw;
264 }
265 
266 template <typename L, StringType S>
Hash()267 inline ssize_t StringWeight<L, S>::Hash() const {
268   size_t h = 0;
269   for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next())
270     h ^= h<<1 ^ iter.Value();
271   return static_cast<ssize_t>(h);
272 }
273 
274 // NB: This needs to be uncommented only if default fails for this the impl.
275 //
276 // template <typename L, StringType S>
277 // inline StringWeight<L, S>
278 // &StringWeight<L, S>::operator=(const StringWeight<L, S> &w) {
279 //   if (this != &w) {
280 //     Clear();
281 //     for (StringWeightIterator<L, S> iter(w); !iter.Done(); iter.Next())
282 //       PushBack(iter.Value());
283 //   }
284 //   return *this;
285 // }
286 
287 template <typename L, StringType S>
288 inline bool operator==(const StringWeight<L, S> &w1,
289                        const StringWeight<L, S> &w2) {
290   if (w1.Size() != w2.Size())
291     return false;
292 
293   StringWeightIterator<L, S> iter1(w1);
294   StringWeightIterator<L, S> iter2(w2);
295 
296   for (; !iter1.Done() ; iter1.Next(), iter2.Next())
297     if (iter1.Value() != iter2.Value())
298       return false;
299 
300   return true;
301 }
302 
303 template <typename L, StringType S>
304 inline bool operator!=(const StringWeight<L, S> &w1,
305                        const StringWeight<L, S> &w2) {
306   return !(w1 == w2);
307 }
308 
309 template <typename L, StringType S>
310 inline bool ApproxEqual(const StringWeight<L, S> &w1,
311                         const StringWeight<L, S> &w2,
312                         float delta = kDelta) {
313   return w1 == w2;
314 }
315 
316 template <typename L, StringType S>
317 inline ostream &operator<<(ostream &strm, const StringWeight<L, S> &w) {
318   StringWeightIterator<L, S> iter(w);
319   if (iter.Done())
320     return strm << "Epsilon";
321   else if (iter.Value() == kStringInfinity)
322     return strm << "Infinity";
323   else if (iter.Value() == kStringBad)
324     return strm << "BadString";
325   else
326     for (size_t i = 0; !iter.Done(); ++i, iter.Next()) {
327       if (i > 0)
328         strm << kStringSeparator;
329       strm << iter.Value();
330     }
331   return strm;
332 }
333 
334 template <typename L, StringType S>
335 inline istream &operator>>(istream &strm, StringWeight<L, S> &w) {
336   string s;
337   strm >> s;
338   if (s == "Infinity") {
339     w = StringWeight<L, S>::Zero();
340   } else if (s == "Epsilon") {
341     w = StringWeight<L, S>::One();
342   } else {
343     w.Clear();
344     char *p = 0;
345     for (const char *cs = s.c_str(); !p || *p != '\0'; cs = p + 1) {
346       int l = strtoll(cs, &p, 10);
347       if (p == cs || *p != 0 && *p != kStringSeparator) {
348         strm.clear(std::ios::badbit);
349         break;
350       }
351       w.PushBack(l);
352     }
353   }
354   return strm;
355 }
356 
357 
358 // Default is for the restricted left and right semirings.  String
359 // equality is required (for non-Zero() input. This restriction
360 // is used in e.g. Determinize to ensure functional input.
361 template <typename L, StringType S>  inline StringWeight<L, S>
Plus(const StringWeight<L,S> & w1,const StringWeight<L,S> & w2)362 Plus(const StringWeight<L, S> &w1,
363      const StringWeight<L, S> &w2) {
364   if (w1 == StringWeight<L, S>::Zero())
365     return w2;
366   if (w2 == StringWeight<L, S>::Zero())
367     return w1;
368 
369   if (w1 != w2)
370     LOG(FATAL) << "StringWeight::Plus: unequal arguments "
371                << "(non-functional FST?)";
372 
373   return w1;
374 }
375 
376 
377 // Longest common prefix for left string semiring.
378 template <typename L>  inline StringWeight<L, STRING_LEFT>
Plus(const StringWeight<L,STRING_LEFT> & w1,const StringWeight<L,STRING_LEFT> & w2)379 Plus(const StringWeight<L, STRING_LEFT> &w1,
380      const StringWeight<L, STRING_LEFT> &w2) {
381   if (w1 == StringWeight<L, STRING_LEFT>::Zero())
382     return w2;
383   if (w2 == StringWeight<L, STRING_LEFT>::Zero())
384     return w1;
385 
386   StringWeight<L, STRING_LEFT> sum;
387   StringWeightIterator<L, STRING_LEFT> iter1(w1);
388   StringWeightIterator<L, STRING_LEFT> iter2(w2);
389   for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value();
390        iter1.Next(), iter2.Next())
391     sum.PushBack(iter1.Value());
392   return sum;
393 }
394 
395 
396 // Longest common suffix for right string semiring.
397 template <typename L>  inline StringWeight<L, STRING_RIGHT>
Plus(const StringWeight<L,STRING_RIGHT> & w1,const StringWeight<L,STRING_RIGHT> & w2)398 Plus(const StringWeight<L, STRING_RIGHT> &w1,
399      const StringWeight<L, STRING_RIGHT> &w2) {
400   if (w1 == StringWeight<L, STRING_RIGHT>::Zero())
401     return w2;
402   if (w2 == StringWeight<L, STRING_RIGHT>::Zero())
403     return w1;
404 
405   StringWeight<L, STRING_RIGHT> sum;
406   StringWeightReverseIterator<L, STRING_RIGHT> iter1(w1);
407   StringWeightReverseIterator<L, STRING_RIGHT> iter2(w2);
408   for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value();
409        iter1.Next(), iter2.Next())
410     sum.PushFront(iter1.Value());
411   return sum;
412 }
413 
414 
415 template <typename L, StringType S>
Times(const StringWeight<L,S> & w1,const StringWeight<L,S> & w2)416 inline StringWeight<L, S> Times(const StringWeight<L, S> &w1,
417                              const StringWeight<L, S> &w2) {
418   if (w1 == StringWeight<L, S>::Zero() || w2 == StringWeight<L, S>::Zero())
419     return StringWeight<L, S>::Zero();
420 
421   StringWeight<L, S> prod(w1);
422   for (StringWeightIterator<L, S> iter(w2); !iter.Done(); iter.Next())
423     prod.PushBack(iter.Value());
424 
425   return prod;
426 }
427 
428 
429 // Default is for left division in the left string and the
430 // left restricted string semirings.
431 template <typename L, StringType S> inline StringWeight<L, S>
Divide(const StringWeight<L,S> & w1,const StringWeight<L,S> & w2,DivideType typ)432 Divide(const StringWeight<L, S> &w1,
433        const StringWeight<L, S> &w2,
434        DivideType typ) {
435 
436   if (typ != DIVIDE_LEFT)
437     LOG(FATAL) << "StringWeight::Divide: only left division is defined "
438                << "for the " << StringWeight<L, S>::Type() << " semiring";
439 
440   if (w2 == StringWeight<L, S>::Zero())
441     return StringWeight<L, S>(kStringBad);
442   else if (w1 == StringWeight<L, S>::Zero())
443     return StringWeight<L, S>::Zero();
444 
445   StringWeight<L, S> div;
446   StringWeightIterator<L, S> iter(w1);
447   for (int i = 0; !iter.Done(); iter.Next(), ++i) {
448     if (i >= w2.Size())
449       div.PushBack(iter.Value());
450   }
451   return div;
452 }
453 
454 
455 // Right division in the right string semiring.
456 template <typename L> inline StringWeight<L, STRING_RIGHT>
Divide(const StringWeight<L,STRING_RIGHT> & w1,const StringWeight<L,STRING_RIGHT> & w2,DivideType typ)457 Divide(const StringWeight<L, STRING_RIGHT> &w1,
458        const StringWeight<L, STRING_RIGHT> &w2,
459        DivideType typ) {
460 
461   if (typ != DIVIDE_RIGHT)
462     LOG(FATAL) << "StringWeight::Divide: only right division is defined "
463                << "for the right string semiring";
464 
465   if (w2 == StringWeight<L, STRING_RIGHT>::Zero())
466     return StringWeight<L, STRING_RIGHT>(kStringBad);
467   else if (w1 == StringWeight<L, STRING_RIGHT>::Zero())
468     return StringWeight<L, STRING_RIGHT>::Zero();
469 
470   StringWeight<L, STRING_RIGHT> div;
471   StringWeightReverseIterator<L, STRING_RIGHT> iter(w1);
472   for (int i = 0; !iter.Done(); iter.Next(), ++i) {
473     if (i >= w2.Size())
474       div.PushFront(iter.Value());
475   }
476   return div;
477 }
478 
479 
480 // Right division in the right restricted string semiring.
481 template <typename L> inline StringWeight<L, STRING_RIGHT_RESTRICT>
Divide(const StringWeight<L,STRING_RIGHT_RESTRICT> & w1,const StringWeight<L,STRING_RIGHT_RESTRICT> & w2,DivideType typ)482 Divide(const StringWeight<L, STRING_RIGHT_RESTRICT> &w1,
483        const StringWeight<L, STRING_RIGHT_RESTRICT> &w2,
484        DivideType typ) {
485 
486   if (typ != DIVIDE_RIGHT)
487     LOG(FATAL) << "StringWeight::Divide: only right division is defined "
488                << "for the right restricted string semiring";
489 
490   if (w2 == StringWeight<L, STRING_RIGHT_RESTRICT>::Zero())
491     return StringWeight<L, STRING_RIGHT_RESTRICT>(kStringBad);
492   else if (w1 == StringWeight<L, STRING_RIGHT_RESTRICT>::Zero())
493     return StringWeight<L, STRING_RIGHT_RESTRICT>::Zero();
494 
495   StringWeight<L, STRING_RIGHT_RESTRICT> div;
496   StringWeightReverseIterator<L, STRING_RIGHT_RESTRICT> iter(w1);
497   for (int i = 0; !iter.Done(); iter.Next(), ++i) {
498     if (i >= w2.Size())
499       div.PushFront(iter.Value());
500   }
501   return div;
502 }
503 
504 
505 // Product of string weight and an arbitray weight.
506 template <class L, class W, StringType S = STRING_LEFT>
507 struct GallicWeight : public ProductWeight<StringWeight<L, S>, W> {
508   typedef GallicWeight<L, typename W::ReverseWeight, REVERSE_STRING_TYPE(S)>
509   ReverseWeight;
510 
GallicWeightGallicWeight511   GallicWeight() {}
512 
GallicWeightGallicWeight513   GallicWeight(StringWeight<L, S> w1, W w2)
514       : ProductWeight<StringWeight<L, S>, W>(w1, w2) {}
515 
516   explicit GallicWeight(const string &s, int *nread = 0)
517       : ProductWeight<StringWeight<L, S>, W>(s, nread) {}
518 
GallicWeightGallicWeight519   GallicWeight(const ProductWeight<StringWeight<L, S>, W> &w)
520       : ProductWeight<StringWeight<L, S>, W>(w) {}
521 };
522 
523 }  // namespace fst;
524 
525 #endif  // FST_LIB_STRING_WEIGHT_H__
526