1 // far.h
2
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Finite-State Transducer (FST) archive classes.
20 //
21
22 #ifndef FST_EXTENSIONS_FAR_FAR_H__
23 #define FST_EXTENSIONS_FAR_FAR_H__
24
25 #include <fst/extensions/far/stlist.h>
26 #include <fst/extensions/far/sttable.h>
27 #include <fst/fst.h>
28 #include <fst/vector-fst.h>
29
30 namespace fst {
31
32 enum FarEntryType { FET_LINE, FET_FILE };
33 enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 };
34
35 // FST archive header class
36 class FarHeader {
37 public:
FarType()38 const string &FarType() const { return fartype_; }
ArcType()39 const string &ArcType() const { return arctype_; }
40
Read(const string & filename)41 bool Read(const string &filename) {
42 FstHeader fsthdr;
43 if (filename.empty()) { // Header reading unsupported on stdin.
44 return false;
45 } else if (IsSTTable(filename)) { // Check if STTable
46 ReadSTTableHeader(filename, &fsthdr);
47 fartype_ = "sttable";
48 arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
49 return true;
50 } else if (IsSTList(filename)) { // Check if STList
51 ReadSTListHeader(filename, &fsthdr);
52 fartype_ = "sttable";
53 arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
54 return true;
55 }
56 return false;
57 }
58
59 private:
60 string fartype_;
61 string arctype_;
62 };
63
64 enum FarType { FAR_DEFAULT = 0, FAR_STTABLE = 1, FAR_STLIST = 2,
65 FAR_SSTABLE = 3 };
66
67 // This class creates an archive of FSTs.
68 template <class A>
69 class FarWriter {
70 public:
71 typedef A Arc;
72
73 // Creates a new (empty) FST archive; returns NULL on error.
74 static FarWriter *Create(const string &filename, FarType type = FAR_DEFAULT);
75
76 // Adds an FST to the end of an archive. Keys must be non-empty and
77 // in lexicographic order. FSTs must have a suitable write method.
78 virtual void Add(const string &key, const Fst<A> &fst) = 0;
79
80 virtual FarType Type() const = 0;
81
82 virtual bool Error() const = 0;
83
~FarWriter()84 virtual ~FarWriter() {}
85
86 protected:
FarWriter()87 FarWriter() {}
88
89 private:
90 DISALLOW_COPY_AND_ASSIGN(FarWriter);
91 };
92
93
94 // This class iterates through an existing archive of FSTs.
95 template <class A>
96 class FarReader {
97 public:
98 typedef A Arc;
99
100 // Opens an existing FST archive in a single file; returns NULL on error.
101 // Sets current position to the beginning of the achive.
102 static FarReader *Open(const string &filename);
103
104 // Opens an existing FST archive in multiple files; returns NULL on error.
105 // Sets current position to the beginning of the achive.
106 static FarReader *Open(const vector<string> &filenames);
107
108 // Resets current posision to beginning of archive.
109 virtual void Reset() = 0;
110
111 // Sets current position to first entry >= key. Returns true if a match.
112 virtual bool Find(const string &key) = 0;
113
114 // Current position at end of archive?
115 virtual bool Done() const = 0;
116
117 // Move current position to next FST.
118 virtual void Next() = 0;
119
120 // Returns key at the current position. This reference is invalidated if
121 // the current position in the archive is changed.
122 virtual const string &GetKey() const = 0;
123
124 // Returns FST at the current position. This reference is invalidated if
125 // the current position in the archive is changed.
126 virtual const Fst<A> &GetFst() const = 0;
127
128 virtual FarType Type() const = 0;
129
130 virtual bool Error() const = 0;
131
~FarReader()132 virtual ~FarReader() {}
133
134 protected:
FarReader()135 FarReader() {}
136
137 private:
138 DISALLOW_COPY_AND_ASSIGN(FarReader);
139 };
140
141
142 template <class A>
143 class FstWriter {
144 public:
operator()145 void operator()(ostream &strm, const Fst<A> &fst) const {
146 fst.Write(strm, FstWriteOptions());
147 }
148 };
149
150
151 template <class A>
152 class STTableFarWriter : public FarWriter<A> {
153 public:
154 typedef A Arc;
155
Create(const string filename)156 static STTableFarWriter *Create(const string filename) {
157 STTableWriter<Fst<A>, FstWriter<A> > *writer =
158 STTableWriter<Fst<A>, FstWriter<A> >::Create(filename);
159 return new STTableFarWriter(writer);
160 }
161
Add(const string & key,const Fst<A> & fst)162 void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
163
Type()164 FarType Type() const { return FAR_STTABLE; }
165
Error()166 bool Error() const { return writer_->Error(); }
167
~STTableFarWriter()168 ~STTableFarWriter() { delete writer_; }
169
170 private:
STTableFarWriter(STTableWriter<Fst<A>,FstWriter<A>> * writer)171 explicit STTableFarWriter(STTableWriter<Fst<A>, FstWriter<A> > *writer)
172 : writer_(writer) {}
173
174 private:
175 STTableWriter<Fst<A>, FstWriter<A> > *writer_;
176
177 DISALLOW_COPY_AND_ASSIGN(STTableFarWriter);
178 };
179
180
181 template <class A>
182 class STListFarWriter : public FarWriter<A> {
183 public:
184 typedef A Arc;
185
Create(const string filename)186 static STListFarWriter *Create(const string filename) {
187 STListWriter<Fst<A>, FstWriter<A> > *writer =
188 STListWriter<Fst<A>, FstWriter<A> >::Create(filename);
189 return new STListFarWriter(writer);
190 }
191
Add(const string & key,const Fst<A> & fst)192 void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
193
Type()194 FarType Type() const { return FAR_STLIST; }
195
Error()196 bool Error() const { return writer_->Error(); }
197
~STListFarWriter()198 ~STListFarWriter() { delete writer_; }
199
200 private:
STListFarWriter(STListWriter<Fst<A>,FstWriter<A>> * writer)201 explicit STListFarWriter(STListWriter<Fst<A>, FstWriter<A> > *writer)
202 : writer_(writer) {}
203
204 private:
205 STListWriter<Fst<A>, FstWriter<A> > *writer_;
206
207 DISALLOW_COPY_AND_ASSIGN(STListFarWriter);
208 };
209
210
211 template <class A>
Create(const string & filename,FarType type)212 FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) {
213 switch(type) {
214 case FAR_DEFAULT:
215 if (filename.empty())
216 return STListFarWriter<A>::Create(filename);
217 case FAR_STTABLE:
218 return STTableFarWriter<A>::Create(filename);
219 break;
220 case FAR_STLIST:
221 return STListFarWriter<A>::Create(filename);
222 break;
223 default:
224 LOG(ERROR) << "FarWriter::Create: unknown far type";
225 return 0;
226 }
227 }
228
229
230 template <class A>
231 class FstReader {
232 public:
operator()233 Fst<A> *operator()(istream &strm) const {
234 return Fst<A>::Read(strm, FstReadOptions());
235 }
236 };
237
238
239 template <class A>
240 class STTableFarReader : public FarReader<A> {
241 public:
242 typedef A Arc;
243
Open(const string & filename)244 static STTableFarReader *Open(const string &filename) {
245 STTableReader<Fst<A>, FstReader<A> > *reader =
246 STTableReader<Fst<A>, FstReader<A> >::Open(filename);
247 // TODO: error check
248 return new STTableFarReader(reader);
249 }
250
Open(const vector<string> & filenames)251 static STTableFarReader *Open(const vector<string> &filenames) {
252 STTableReader<Fst<A>, FstReader<A> > *reader =
253 STTableReader<Fst<A>, FstReader<A> >::Open(filenames);
254 // TODO: error check
255 return new STTableFarReader(reader);
256 }
257
Reset()258 void Reset() { reader_->Reset(); }
259
Find(const string & key)260 bool Find(const string &key) { return reader_->Find(key); }
261
Done()262 bool Done() const { return reader_->Done(); }
263
Next()264 void Next() { return reader_->Next(); }
265
GetKey()266 const string &GetKey() const { return reader_->GetKey(); }
267
GetFst()268 const Fst<A> &GetFst() const { return reader_->GetEntry(); }
269
Type()270 FarType Type() const { return FAR_STTABLE; }
271
Error()272 bool Error() const { return reader_->Error(); }
273
~STTableFarReader()274 ~STTableFarReader() { delete reader_; }
275
276 private:
STTableFarReader(STTableReader<Fst<A>,FstReader<A>> * reader)277 explicit STTableFarReader(STTableReader<Fst<A>, FstReader<A> > *reader)
278 : reader_(reader) {}
279
280 private:
281 STTableReader<Fst<A>, FstReader<A> > *reader_;
282
283 DISALLOW_COPY_AND_ASSIGN(STTableFarReader);
284 };
285
286
287 template <class A>
288 class STListFarReader : public FarReader<A> {
289 public:
290 typedef A Arc;
291
Open(const string & filename)292 static STListFarReader *Open(const string &filename) {
293 STListReader<Fst<A>, FstReader<A> > *reader =
294 STListReader<Fst<A>, FstReader<A> >::Open(filename);
295 // TODO: error check
296 return new STListFarReader(reader);
297 }
298
Open(const vector<string> & filenames)299 static STListFarReader *Open(const vector<string> &filenames) {
300 STListReader<Fst<A>, FstReader<A> > *reader =
301 STListReader<Fst<A>, FstReader<A> >::Open(filenames);
302 // TODO: error check
303 return new STListFarReader(reader);
304 }
305
Reset()306 void Reset() { reader_->Reset(); }
307
Find(const string & key)308 bool Find(const string &key) { return reader_->Find(key); }
309
Done()310 bool Done() const { return reader_->Done(); }
311
Next()312 void Next() { return reader_->Next(); }
313
GetKey()314 const string &GetKey() const { return reader_->GetKey(); }
315
GetFst()316 const Fst<A> &GetFst() const { return reader_->GetEntry(); }
317
Type()318 FarType Type() const { return FAR_STLIST; }
319
Error()320 bool Error() const { return reader_->Error(); }
321
~STListFarReader()322 ~STListFarReader() { delete reader_; }
323
324 private:
STListFarReader(STListReader<Fst<A>,FstReader<A>> * reader)325 explicit STListFarReader(STListReader<Fst<A>, FstReader<A> > *reader)
326 : reader_(reader) {}
327
328 private:
329 STListReader<Fst<A>, FstReader<A> > *reader_;
330
331 DISALLOW_COPY_AND_ASSIGN(STListFarReader);
332 };
333
334
335 template <class A>
Open(const string & filename)336 FarReader<A> *FarReader<A>::Open(const string &filename) {
337 if (filename.empty())
338 return STListFarReader<A>::Open(filename);
339 else if (IsSTTable(filename))
340 return STTableFarReader<A>::Open(filename);
341 else if (IsSTList(filename))
342 return STListFarReader<A>::Open(filename);
343 return 0;
344 }
345
346
347 template <class A>
Open(const vector<string> & filenames)348 FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) {
349 if (!filenames.empty() && filenames[0].empty())
350 return STListFarReader<A>::Open(filenames);
351 else if (!filenames.empty() && IsSTTable(filenames[0]))
352 return STTableFarReader<A>::Open(filenames);
353 else if (!filenames.empty() && IsSTList(filenames[0]))
354 return STListFarReader<A>::Open(filenames);
355 return 0;
356 }
357
358 } // namespace fst
359
360 #endif // FST_EXTENSIONS_FAR_FAR_H__
361