1 // far.h
2
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // Finite-State Transducer (FST) archive classes.
20 //
21
22 #ifndef FST_EXTENSIONS_FAR_FAR_H__
23 #define FST_EXTENSIONS_FAR_FAR_H__
24
25 #include <fst/extensions/far/stlist.h>
26 #include <fst/extensions/far/sttable.h>
27 #include <fst/fst.h>
28 #include <fst/vector-fst.h>
29
30 namespace fst {
31
32 enum FarEntryType { FET_LINE, FET_FILE };
33 enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 };
34
IsFst(const string & filename)35 inline bool IsFst(const string &filename) {
36 ifstream strm(filename.c_str());
37 if (!strm)
38 return false;
39 return IsFstHeader(strm, filename);
40 }
41
42 // FST archive header class
43 class FarHeader {
44 public:
FarType()45 const string &FarType() const { return fartype_; }
ArcType()46 const string &ArcType() const { return arctype_; }
47
Read(const string & filename)48 bool Read(const string &filename) {
49 FstHeader fsthdr;
50 if (filename.empty()) {
51 // Header reading unsupported on stdin. Assumes STList and StdArc.
52 fartype_ = "stlist";
53 arctype_ = "standard";
54 return true;
55 } else if (IsSTTable(filename)) { // Check if STTable
56 ReadSTTableHeader(filename, &fsthdr);
57 fartype_ = "sttable";
58 arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
59 return true;
60 } else if (IsSTList(filename)) { // Check if STList
61 ReadSTListHeader(filename, &fsthdr);
62 fartype_ = "sttable";
63 arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
64 return true;
65 } else if (IsFst(filename)) { // Check if Fst
66 ifstream istrm(filename.c_str());
67 fsthdr.Read(istrm, filename);
68 fartype_ = "fst";
69 arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType();
70 return true;
71 }
72 return false;
73 }
74
75 private:
76 string fartype_;
77 string arctype_;
78 };
79
80 enum FarType {
81 FAR_DEFAULT = 0,
82 FAR_STTABLE = 1,
83 FAR_STLIST = 2,
84 FAR_FST = 3,
85 };
86
87 // This class creates an archive of FSTs.
88 template <class A>
89 class FarWriter {
90 public:
91 typedef A Arc;
92
93 // Creates a new (empty) FST archive; returns NULL on error.
94 static FarWriter *Create(const string &filename, FarType type = FAR_DEFAULT);
95
96 // Adds an FST to the end of an archive. Keys must be non-empty and
97 // in lexicographic order. FSTs must have a suitable write method.
98 virtual void Add(const string &key, const Fst<A> &fst) = 0;
99
100 virtual FarType Type() const = 0;
101
102 virtual bool Error() const = 0;
103
~FarWriter()104 virtual ~FarWriter() {}
105
106 protected:
FarWriter()107 FarWriter() {}
108
109 private:
110 DISALLOW_COPY_AND_ASSIGN(FarWriter);
111 };
112
113
114 // This class iterates through an existing archive of FSTs.
115 template <class A>
116 class FarReader {
117 public:
118 typedef A Arc;
119
120 // Opens an existing FST archive in a single file; returns NULL on error.
121 // Sets current position to the beginning of the achive.
122 static FarReader *Open(const string &filename);
123
124 // Opens an existing FST archive in multiple files; returns NULL on error.
125 // Sets current position to the beginning of the achive.
126 static FarReader *Open(const vector<string> &filenames);
127
128 // Resets current posision to beginning of archive.
129 virtual void Reset() = 0;
130
131 // Sets current position to first entry >= key. Returns true if a match.
132 virtual bool Find(const string &key) = 0;
133
134 // Current position at end of archive?
135 virtual bool Done() const = 0;
136
137 // Move current position to next FST.
138 virtual void Next() = 0;
139
140 // Returns key at the current position. This reference is invalidated if
141 // the current position in the archive is changed.
142 virtual const string &GetKey() const = 0;
143
144 // Returns FST at the current position. This reference is invalidated if
145 // the current position in the archive is changed.
146 virtual const Fst<A> &GetFst() const = 0;
147
148 virtual FarType Type() const = 0;
149
150 virtual bool Error() const = 0;
151
~FarReader()152 virtual ~FarReader() {}
153
154 protected:
FarReader()155 FarReader() {}
156
157 private:
158 DISALLOW_COPY_AND_ASSIGN(FarReader);
159 };
160
161
162 template <class A>
163 class FstWriter {
164 public:
operator()165 void operator()(ostream &strm, const Fst<A> &fst) const {
166 fst.Write(strm, FstWriteOptions());
167 }
168 };
169
170
171 template <class A>
172 class STTableFarWriter : public FarWriter<A> {
173 public:
174 typedef A Arc;
175
Create(const string & filename)176 static STTableFarWriter *Create(const string &filename) {
177 STTableWriter<Fst<A>, FstWriter<A> > *writer =
178 STTableWriter<Fst<A>, FstWriter<A> >::Create(filename);
179 return new STTableFarWriter(writer);
180 }
181
Add(const string & key,const Fst<A> & fst)182 void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
183
Type()184 FarType Type() const { return FAR_STTABLE; }
185
Error()186 bool Error() const { return writer_->Error(); }
187
~STTableFarWriter()188 ~STTableFarWriter() { delete writer_; }
189
190 private:
STTableFarWriter(STTableWriter<Fst<A>,FstWriter<A>> * writer)191 explicit STTableFarWriter(STTableWriter<Fst<A>, FstWriter<A> > *writer)
192 : writer_(writer) {}
193
194 private:
195 STTableWriter<Fst<A>, FstWriter<A> > *writer_;
196
197 DISALLOW_COPY_AND_ASSIGN(STTableFarWriter);
198 };
199
200
201 template <class A>
202 class STListFarWriter : public FarWriter<A> {
203 public:
204 typedef A Arc;
205
Create(const string & filename)206 static STListFarWriter *Create(const string &filename) {
207 STListWriter<Fst<A>, FstWriter<A> > *writer =
208 STListWriter<Fst<A>, FstWriter<A> >::Create(filename);
209 return new STListFarWriter(writer);
210 }
211
Add(const string & key,const Fst<A> & fst)212 void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); }
213
Type()214 FarType Type() const { return FAR_STLIST; }
215
Error()216 bool Error() const { return writer_->Error(); }
217
~STListFarWriter()218 ~STListFarWriter() { delete writer_; }
219
220 private:
STListFarWriter(STListWriter<Fst<A>,FstWriter<A>> * writer)221 explicit STListFarWriter(STListWriter<Fst<A>, FstWriter<A> > *writer)
222 : writer_(writer) {}
223
224 private:
225 STListWriter<Fst<A>, FstWriter<A> > *writer_;
226
227 DISALLOW_COPY_AND_ASSIGN(STListFarWriter);
228 };
229
230
231 template <class A>
232 class FstFarWriter : public FarWriter<A> {
233 public:
234 typedef A Arc;
235
FstFarWriter(const string & filename)236 explicit FstFarWriter(const string &filename)
237 : filename_(filename), error_(false), written_(false) {}
238
Create(const string & filename)239 static FstFarWriter *Create(const string &filename) {
240 return new FstFarWriter(filename);
241 }
242
Add(const string & key,const Fst<A> & fst)243 void Add(const string &key, const Fst<A> &fst) {
244 if (written_) {
245 LOG(WARNING) << "FstFarWriter::Add: only one Fst supported,"
246 << " subsequent entries discarded.";
247 } else {
248 error_ = !fst.Write(filename_);
249 written_ = true;
250 }
251 }
252
Type()253 FarType Type() const { return FAR_FST; }
254
Error()255 bool Error() const { return error_; }
256
~FstFarWriter()257 ~FstFarWriter() {}
258
259 private:
260 string filename_;
261 bool error_;
262 bool written_;
263
264 DISALLOW_COPY_AND_ASSIGN(FstFarWriter);
265 };
266
267
268 template <class A>
Create(const string & filename,FarType type)269 FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) {
270 switch(type) {
271 case FAR_DEFAULT:
272 if (filename.empty())
273 return STListFarWriter<A>::Create(filename);
274 case FAR_STTABLE:
275 return STTableFarWriter<A>::Create(filename);
276 case FAR_STLIST:
277 return STListFarWriter<A>::Create(filename);
278 case FAR_FST:
279 return FstFarWriter<A>::Create(filename);
280 default:
281 LOG(ERROR) << "FarWriter::Create: unknown far type";
282 return 0;
283 }
284 }
285
286
287 template <class A>
288 class FstReader {
289 public:
operator()290 Fst<A> *operator()(istream &strm) const {
291 return Fst<A>::Read(strm, FstReadOptions());
292 }
293 };
294
295
296 template <class A>
297 class STTableFarReader : public FarReader<A> {
298 public:
299 typedef A Arc;
300
Open(const string & filename)301 static STTableFarReader *Open(const string &filename) {
302 STTableReader<Fst<A>, FstReader<A> > *reader =
303 STTableReader<Fst<A>, FstReader<A> >::Open(filename);
304 // TODO: error check
305 return new STTableFarReader(reader);
306 }
307
Open(const vector<string> & filenames)308 static STTableFarReader *Open(const vector<string> &filenames) {
309 STTableReader<Fst<A>, FstReader<A> > *reader =
310 STTableReader<Fst<A>, FstReader<A> >::Open(filenames);
311 // TODO: error check
312 return new STTableFarReader(reader);
313 }
314
Reset()315 void Reset() { reader_->Reset(); }
316
Find(const string & key)317 bool Find(const string &key) { return reader_->Find(key); }
318
Done()319 bool Done() const { return reader_->Done(); }
320
Next()321 void Next() { return reader_->Next(); }
322
GetKey()323 const string &GetKey() const { return reader_->GetKey(); }
324
GetFst()325 const Fst<A> &GetFst() const { return reader_->GetEntry(); }
326
Type()327 FarType Type() const { return FAR_STTABLE; }
328
Error()329 bool Error() const { return reader_->Error(); }
330
~STTableFarReader()331 ~STTableFarReader() { delete reader_; }
332
333 private:
STTableFarReader(STTableReader<Fst<A>,FstReader<A>> * reader)334 explicit STTableFarReader(STTableReader<Fst<A>, FstReader<A> > *reader)
335 : reader_(reader) {}
336
337 private:
338 STTableReader<Fst<A>, FstReader<A> > *reader_;
339
340 DISALLOW_COPY_AND_ASSIGN(STTableFarReader);
341 };
342
343
344 template <class A>
345 class STListFarReader : public FarReader<A> {
346 public:
347 typedef A Arc;
348
Open(const string & filename)349 static STListFarReader *Open(const string &filename) {
350 STListReader<Fst<A>, FstReader<A> > *reader =
351 STListReader<Fst<A>, FstReader<A> >::Open(filename);
352 // TODO: error check
353 return new STListFarReader(reader);
354 }
355
Open(const vector<string> & filenames)356 static STListFarReader *Open(const vector<string> &filenames) {
357 STListReader<Fst<A>, FstReader<A> > *reader =
358 STListReader<Fst<A>, FstReader<A> >::Open(filenames);
359 // TODO: error check
360 return new STListFarReader(reader);
361 }
362
Reset()363 void Reset() { reader_->Reset(); }
364
Find(const string & key)365 bool Find(const string &key) { return reader_->Find(key); }
366
Done()367 bool Done() const { return reader_->Done(); }
368
Next()369 void Next() { return reader_->Next(); }
370
GetKey()371 const string &GetKey() const { return reader_->GetKey(); }
372
GetFst()373 const Fst<A> &GetFst() const { return reader_->GetEntry(); }
374
Type()375 FarType Type() const { return FAR_STLIST; }
376
Error()377 bool Error() const { return reader_->Error(); }
378
~STListFarReader()379 ~STListFarReader() { delete reader_; }
380
381 private:
STListFarReader(STListReader<Fst<A>,FstReader<A>> * reader)382 explicit STListFarReader(STListReader<Fst<A>, FstReader<A> > *reader)
383 : reader_(reader) {}
384
385 private:
386 STListReader<Fst<A>, FstReader<A> > *reader_;
387
388 DISALLOW_COPY_AND_ASSIGN(STListFarReader);
389 };
390
391 template <class A>
392 class FstFarReader : public FarReader<A> {
393 public:
394 typedef A Arc;
395
Open(const string & filename)396 static FstFarReader *Open(const string &filename) {
397 vector<string> filenames;
398 filenames.push_back(filename);
399 return new FstFarReader<A>(filenames);
400 }
401
Open(const vector<string> & filenames)402 static FstFarReader *Open(const vector<string> &filenames) {
403 return new FstFarReader<A>(filenames);
404 }
405
FstFarReader(const vector<string> & filenames)406 FstFarReader(const vector<string> &filenames)
407 : keys_(filenames), has_stdin_(false), pos_(0), fst_(0), error_(false) {
408 sort(keys_.begin(), keys_.end());
409 streams_.resize(keys_.size(), 0);
410 for (size_t i = 0; i < keys_.size(); ++i) {
411 if (keys_[i].empty()) {
412 if (!has_stdin_) {
413 streams_[i] = &cin;
414 //sources_[i] = "stdin";
415 has_stdin_ = true;
416 } else {
417 FSTERROR() << "FstFarReader::FstFarReader: stdin should only "
418 << "appear once in the input file list.";
419 error_ = true;
420 return;
421 }
422 } else {
423 streams_[i] = new ifstream(
424 keys_[i].c_str(), ifstream::in | ifstream::binary);
425 }
426 }
427 if (pos_ >= keys_.size()) return;
428 ReadFst();
429 }
430
Reset()431 void Reset() {
432 if (has_stdin_) {
433 FSTERROR() << "FstFarReader::Reset: operation not supported on stdin";
434 error_ = true;
435 return;
436 }
437 pos_ = 0;
438 ReadFst();
439 }
440
Find(const string & key)441 bool Find(const string &key) {
442 if (has_stdin_) {
443 FSTERROR() << "FstFarReader::Find: operation not supported on stdin";
444 error_ = true;
445 return false;
446 }
447 pos_ = 0;//TODO
448 ReadFst();
449 return true;
450 }
451
Done()452 bool Done() const { return error_ || pos_ >= keys_.size(); }
453
Next()454 void Next() {
455 ++pos_;
456 ReadFst();
457 }
458
GetKey()459 const string &GetKey() const {
460 return keys_[pos_];
461 }
462
GetFst()463 const Fst<A> &GetFst() const {
464 return *fst_;
465 }
466
Type()467 FarType Type() const { return FAR_FST; }
468
Error()469 bool Error() const { return error_; }
470
~FstFarReader()471 ~FstFarReader() {
472 if (fst_) delete fst_;
473 for (size_t i = 0; i < keys_.size(); ++i)
474 delete streams_[i];
475 }
476
477 private:
ReadFst()478 void ReadFst() {
479 if (fst_) delete fst_;
480 if (pos_ >= keys_.size()) return;
481 streams_[pos_]->seekg(0);
482 fst_ = Fst<A>::Read(*streams_[pos_], FstReadOptions());
483 if (!fst_) {
484 FSTERROR() << "FstFarReader: error reading Fst from: " << keys_[pos_];
485 error_ = true;
486 }
487 }
488
489 private:
490 vector<string> keys_;
491 vector<istream*> streams_;
492 bool has_stdin_;
493 size_t pos_;
494 mutable Fst<A> *fst_;
495 mutable bool error_;
496
497 DISALLOW_COPY_AND_ASSIGN(FstFarReader);
498 };
499
500 template <class A>
Open(const string & filename)501 FarReader<A> *FarReader<A>::Open(const string &filename) {
502 if (filename.empty())
503 return STListFarReader<A>::Open(filename);
504 else if (IsSTTable(filename))
505 return STTableFarReader<A>::Open(filename);
506 else if (IsSTList(filename))
507 return STListFarReader<A>::Open(filename);
508 else if (IsFst(filename))
509 return FstFarReader<A>::Open(filename);
510 return 0;
511 }
512
513
514 template <class A>
Open(const vector<string> & filenames)515 FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) {
516 if (!filenames.empty() && filenames[0].empty())
517 return STListFarReader<A>::Open(filenames);
518 else if (!filenames.empty() && IsSTTable(filenames[0]))
519 return STTableFarReader<A>::Open(filenames);
520 else if (!filenames.empty() && IsSTList(filenames[0]))
521 return STListFarReader<A>::Open(filenames);
522 else if (!filenames.empty() && IsFst(filenames[0]))
523 return FstFarReader<A>::Open(filenames);
524 return 0;
525 }
526
527 } // namespace fst
528
529 #endif // FST_EXTENSIONS_FAR_FAR_H__
530