• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // far.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Finite-State Transducer (FST) archive classes.
20 //
21 
22 #ifndef FST_EXTENSIONS_FAR_FAR_H__
23 #define FST_EXTENSIONS_FAR_FAR_H__
24 
25 #include <fst/extensions/far/stlist.h>
26 #include <fst/extensions/far/sttable.h>
27 #include <fst/fst.h>
28 #include <fst/vector-fst.h>
29 
30 namespace fst {
31 
32 enum FarEntryType { FET_LINE, FET_FILE };
33 enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 };
34 
35 // FST archive header class
36 class FarHeader {
37  public:
FarType()38   const string &FarType() const { return fartype_; }
ArcType()39   const string &ArcType() const { return arctype_; }
40 
Read(const string & filename)41   bool Read(const string &filename) {
42     FstHeader fsthdr;
43     if (filename.empty()) {  // Header reading unsupported on stdin.
44       return false;
45     } else if (IsSTTable(filename)) {  // Check if STTable
46       ReadSTTableHeader(filename, &fsthdr);
47       fartype_ = "sttable";
48       arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
49       return true;
50     } else if (IsSTList(filename)) {  // Check if STList
51       ReadSTListHeader(filename, &fsthdr);
52       fartype_ = "sttable";
53       arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
54       return true;
55     }
56     return false;
57   }
58 
59  private:
60   string fartype_;
61   string arctype_;
62 };
63 
64 enum FarType { FAR_DEFAULT = 0, FAR_STTABLE = 1, FAR_STLIST = 2,
65                FAR_SSTABLE = 3 };
66 
67 // This class creates an archive of FSTs.
68 template <class A>
69 class FarWriter {
70  public:
71   typedef A Arc;
72 
73   // Creates a new (empty) FST archive; returns NULL on error.
74   static FarWriter *Create(const string &filename, FarType type = FAR_DEFAULT);
75 
76   // Adds an FST to the end of an archive. Keys must be non-empty and
77   // in lexicographic order. FSTs must have a suitable write method.
78   virtual void Add(const string &key, const Fst<A> &fst) = 0;
79 
80   virtual FarType Type() const = 0;
81 
82   virtual bool Error() const = 0;
83 
~FarWriter()84   virtual ~FarWriter() {}
85 
86  protected:
FarWriter()87   FarWriter() {}
88 
89  private:
90   DISALLOW_COPY_AND_ASSIGN(FarWriter);
91 };
92 
93 
94 // This class iterates through an existing archive of FSTs.
95 template <class A>
96 class FarReader {
97  public:
98  typedef A Arc;
99 
100   // Opens an existing FST archive in a single file; returns NULL on error.
101   // Sets current position to the beginning of the achive.
102   static FarReader *Open(const string &filename);
103 
104   // Opens an existing FST archive in multiple files; returns NULL on error.
105   // Sets current position to the beginning of the achive.
106   static FarReader *Open(const vector<string> &filenames);
107 
108   // Resets current posision to beginning of archive.
109   virtual void Reset() = 0;
110 
111   // Sets current position to first entry >= key.  Returns true if a match.
112   virtual bool Find(const string &key) = 0;
113 
114   // Current position at end of archive?
115   virtual bool Done() const = 0;
116 
117   // Move current position to next FST.
118   virtual void Next() = 0;
119 
120   // Returns key at the current position. This reference is invalidated if
121   // the current position in the archive is changed.
122   virtual const string &GetKey() const = 0;
123 
124   // Returns FST at the current position. This reference is invalidated if
125   // the current position in the archive is changed.
126   virtual const Fst<A> &GetFst() const = 0;
127 
128   virtual FarType Type() const = 0;
129 
130   virtual bool Error() const = 0;
131 
~FarReader()132   virtual ~FarReader() {}
133 
134  protected:
FarReader()135   FarReader() {}
136 
137  private:
138   DISALLOW_COPY_AND_ASSIGN(FarReader);
139 };
140 
141 
142 template <class A>
143 class FstWriter {
144  public:
operator()145   void operator()(ostream &strm, const Fst<A> &fst) const {
146     fst.Write(strm, FstWriteOptions());
147   }
148 };
149 
150 
151 template <class A>
152 class STTableFarWriter : public FarWriter<A> {
153  public:
154   typedef A Arc;
155 
Create(const string filename)156   static STTableFarWriter *Create(const string filename) {
157     STTableWriter<Fst<A>, FstWriter<A> > *writer =
158         STTableWriter<Fst<A>, FstWriter<A> >::Create(filename);
159     return new STTableFarWriter(writer);
160   }
161 
Add(const string & key,const Fst<A> & fst)162   void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
163 
Type()164   FarType Type() const { return FAR_STTABLE; }
165 
Error()166   bool Error() const { return writer_->Error(); }
167 
~STTableFarWriter()168   ~STTableFarWriter() { delete writer_; }
169 
170  private:
STTableFarWriter(STTableWriter<Fst<A>,FstWriter<A>> * writer)171   explicit STTableFarWriter(STTableWriter<Fst<A>, FstWriter<A> > *writer)
172       : writer_(writer) {}
173 
174  private:
175   STTableWriter<Fst<A>, FstWriter<A> > *writer_;
176 
177   DISALLOW_COPY_AND_ASSIGN(STTableFarWriter);
178 };
179 
180 
181 template <class A>
182 class STListFarWriter : public FarWriter<A> {
183  public:
184   typedef A Arc;
185 
Create(const string filename)186   static STListFarWriter *Create(const string filename) {
187     STListWriter<Fst<A>, FstWriter<A> > *writer =
188         STListWriter<Fst<A>, FstWriter<A> >::Create(filename);
189     return new STListFarWriter(writer);
190   }
191 
Add(const string & key,const Fst<A> & fst)192   void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
193 
Type()194   FarType Type() const { return FAR_STLIST; }
195 
Error()196   bool Error() const { return writer_->Error(); }
197 
~STListFarWriter()198   ~STListFarWriter() { delete writer_; }
199 
200  private:
STListFarWriter(STListWriter<Fst<A>,FstWriter<A>> * writer)201   explicit STListFarWriter(STListWriter<Fst<A>, FstWriter<A> > *writer)
202       : writer_(writer) {}
203 
204  private:
205   STListWriter<Fst<A>, FstWriter<A> > *writer_;
206 
207   DISALLOW_COPY_AND_ASSIGN(STListFarWriter);
208 };
209 
210 
211 template <class A>
Create(const string & filename,FarType type)212 FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) {
213   switch(type) {
214     case FAR_DEFAULT:
215       if (filename.empty())
216         return STListFarWriter<A>::Create(filename);
217     case FAR_STTABLE:
218       return STTableFarWriter<A>::Create(filename);
219       break;
220     case FAR_STLIST:
221       return STListFarWriter<A>::Create(filename);
222       break;
223     default:
224       LOG(ERROR) << "FarWriter::Create: unknown far type";
225       return 0;
226   }
227 }
228 
229 
230 template <class A>
231 class FstReader {
232  public:
operator()233   Fst<A> *operator()(istream &strm) const {
234     return Fst<A>::Read(strm, FstReadOptions());
235   }
236 };
237 
238 
239 template <class A>
240 class STTableFarReader : public FarReader<A> {
241  public:
242   typedef A Arc;
243 
Open(const string & filename)244   static STTableFarReader *Open(const string &filename) {
245     STTableReader<Fst<A>, FstReader<A> > *reader =
246         STTableReader<Fst<A>, FstReader<A> >::Open(filename);
247     // TODO: error check
248     return new STTableFarReader(reader);
249   }
250 
Open(const vector<string> & filenames)251   static STTableFarReader *Open(const vector<string> &filenames) {
252     STTableReader<Fst<A>, FstReader<A> > *reader =
253         STTableReader<Fst<A>, FstReader<A> >::Open(filenames);
254     // TODO: error check
255     return new STTableFarReader(reader);
256   }
257 
Reset()258   void Reset() { reader_->Reset(); }
259 
Find(const string & key)260   bool Find(const string &key) { return reader_->Find(key); }
261 
Done()262   bool Done() const { return reader_->Done(); }
263 
Next()264   void Next() { return reader_->Next(); }
265 
GetKey()266   const string &GetKey() const { return reader_->GetKey(); }
267 
GetFst()268   const Fst<A> &GetFst() const { return reader_->GetEntry(); }
269 
Type()270   FarType Type() const { return FAR_STTABLE; }
271 
Error()272   bool Error() const { return reader_->Error(); }
273 
~STTableFarReader()274   ~STTableFarReader() { delete reader_; }
275 
276  private:
STTableFarReader(STTableReader<Fst<A>,FstReader<A>> * reader)277   explicit STTableFarReader(STTableReader<Fst<A>, FstReader<A> > *reader)
278       : reader_(reader) {}
279 
280  private:
281   STTableReader<Fst<A>, FstReader<A> > *reader_;
282 
283   DISALLOW_COPY_AND_ASSIGN(STTableFarReader);
284 };
285 
286 
287 template <class A>
288 class STListFarReader : public FarReader<A> {
289  public:
290   typedef A Arc;
291 
Open(const string & filename)292   static STListFarReader *Open(const string &filename) {
293     STListReader<Fst<A>, FstReader<A> > *reader =
294         STListReader<Fst<A>, FstReader<A> >::Open(filename);
295     // TODO: error check
296     return new STListFarReader(reader);
297   }
298 
Open(const vector<string> & filenames)299   static STListFarReader *Open(const vector<string> &filenames) {
300     STListReader<Fst<A>, FstReader<A> > *reader =
301         STListReader<Fst<A>, FstReader<A> >::Open(filenames);
302     // TODO: error check
303     return new STListFarReader(reader);
304   }
305 
Reset()306   void Reset() { reader_->Reset(); }
307 
Find(const string & key)308   bool Find(const string &key) { return reader_->Find(key); }
309 
Done()310   bool Done() const { return reader_->Done(); }
311 
Next()312   void Next() { return reader_->Next(); }
313 
GetKey()314   const string &GetKey() const { return reader_->GetKey(); }
315 
GetFst()316   const Fst<A> &GetFst() const { return reader_->GetEntry(); }
317 
Type()318   FarType Type() const { return FAR_STLIST; }
319 
Error()320   bool Error() const { return reader_->Error(); }
321 
~STListFarReader()322   ~STListFarReader() { delete reader_; }
323 
324  private:
STListFarReader(STListReader<Fst<A>,FstReader<A>> * reader)325   explicit STListFarReader(STListReader<Fst<A>, FstReader<A> > *reader)
326       : reader_(reader) {}
327 
328  private:
329   STListReader<Fst<A>, FstReader<A> > *reader_;
330 
331   DISALLOW_COPY_AND_ASSIGN(STListFarReader);
332 };
333 
334 
335 template <class A>
Open(const string & filename)336 FarReader<A> *FarReader<A>::Open(const string &filename) {
337   if (filename.empty())
338     return STListFarReader<A>::Open(filename);
339   else if (IsSTTable(filename))
340     return STTableFarReader<A>::Open(filename);
341   else if (IsSTList(filename))
342     return STListFarReader<A>::Open(filename);
343   return 0;
344 }
345 
346 
347 template <class A>
Open(const vector<string> & filenames)348 FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) {
349   if (!filenames.empty() && filenames[0].empty())
350     return STListFarReader<A>::Open(filenames);
351   else if (!filenames.empty() && IsSTTable(filenames[0]))
352     return STTableFarReader<A>::Open(filenames);
353   else if (!filenames.empty() && IsSTList(filenames[0]))
354     return STListFarReader<A>::Open(filenames);
355   return 0;
356 }
357 
358 }  // namespace fst
359 
360 #endif  // FST_EXTENSIONS_FAR_FAR_H__
361