1 // fst.cc
2
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // FST definitions.
20
21 #include <fst/fst.h>
22
23 // Include these so they are registered
24 #include <fst/compact-fst.h>
25 #include <fst/const-fst.h>
26 #include <fst/matcher-fst.h>
27 #include <fst/vector-fst.h>
28 #include <fst/edit-fst.h>
29
30 // FST flag definitions
31
32 DEFINE_bool(fst_verify_properties, false,
33 "Verify fst properties queried by TestProperties");
34
35 DEFINE_string(fst_weight_separator, ",",
36 "Character separator between printed composite weights; "
37 "must be a single character");
38
39 DEFINE_string(fst_weight_parentheses, "",
40 "Characters enclosing the first weight of a printed composite "
41 "weight (e.g. pair weight, tuple weight and derived classes) to "
42 "ensure proper I/O of nested composite weights; "
43 "must have size 0 (none) or 2 (open and close parenthesis)");
44
45 DEFINE_bool(fst_default_cache_gc, true, "Enable garbage collection of cache");
46
47 DEFINE_int64(fst_default_cache_gc_limit, 1<<20LL,
48 "Cache byte size that triggers garbage collection");
49
50 DEFINE_bool(fst_align, false, "Write FST data aligned where appropriate");
51
52 DEFINE_string(save_relabel_ipairs, "", "Save input relabel pairs to file");
53 DEFINE_string(save_relabel_opairs, "", "Save output relabel pairs to file");
54
55 DEFINE_string(fst_read_mode, "read",
56 "Default file reading mode for mappable files");
57
58 namespace fst {
59
60 // Register VectorFst, ConstFst and EditFst for common arcs types
61 REGISTER_FST(VectorFst, StdArc);
62 REGISTER_FST(VectorFst, LogArc);
63 REGISTER_FST(VectorFst, Log64Arc);
64 REGISTER_FST(ConstFst, StdArc);
65 REGISTER_FST(ConstFst, LogArc);
66 REGISTER_FST(ConstFst, Log64Arc);
67 REGISTER_FST(EditFst, StdArc);
68 REGISTER_FST(EditFst, LogArc);
69 REGISTER_FST(EditFst, Log64Arc);
70
71 // Register CompactFst for common arcs with the default (uint32) size type
72 static FstRegisterer<
73 CompactFst<StdArc, StringCompactor<StdArc> > >
74 CompactFst_StdArc_StringCompactor_registerer;
75 static FstRegisterer<
76 CompactFst<LogArc, StringCompactor<LogArc> > >
77 CompactFst_LogArc_StringCompactor_registerer;
78 static FstRegisterer<
79 CompactFst<StdArc, WeightedStringCompactor<StdArc> > >
80 CompactFst_StdArc_WeightedStringCompactor_registerer;
81 static FstRegisterer<
82 CompactFst<LogArc, WeightedStringCompactor<LogArc> > >
83 CompactFst_LogArc_WeightedStringCompactor_registerer;
84 static FstRegisterer<
85 CompactFst<StdArc, AcceptorCompactor<StdArc> > >
86 CompactFst_StdArc_AcceptorCompactor_registerer;
87 static FstRegisterer<
88 CompactFst<LogArc, AcceptorCompactor<LogArc> > >
89 CompactFst_LogArc_AcceptorCompactor_registerer;
90 static FstRegisterer<
91 CompactFst<StdArc, UnweightedCompactor<StdArc> > >
92 CompactFst_StdArc_UnweightedCompactor_registerer;
93 static FstRegisterer<
94 CompactFst<LogArc, UnweightedCompactor<LogArc> > >
95 CompactFst_LogArc_UnweightedCompactor_registerer;
96 static FstRegisterer<
97 CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> > >
98 CompactFst_StdArc_UnweightedAcceptorCompactor_registerer;
99 static FstRegisterer<
100 CompactFst<LogArc, UnweightedAcceptorCompactor<LogArc> > >
101 CompactFst_LogArc_UnweightedAcceptorCompactor_registerer;
102
103 // Fst type definitions for lookahead Fsts.
104 extern const char arc_lookahead_fst_type[] = "arc_lookahead";
105 extern const char ilabel_lookahead_fst_type[] = "ilabel_lookahead";
106 extern const char olabel_lookahead_fst_type[] = "olabel_lookahead";
107
108 // Identifies stream data as an FST (and its endianity)
109 static const int32 kFstMagicNumber = 2125659606;
110
111 // Check for Fst magic number in stream, to indicate
112 // caller function that the stream content is an Fst header;
IsFstHeader(istream & strm,const string & source)113 bool IsFstHeader(istream &strm, const string &source) {
114 int64 pos = strm.tellg();
115 bool match = true;
116 int32 magic_number = 0;
117 ReadType(strm, &magic_number);
118 if (magic_number != kFstMagicNumber
119 ) {
120 match = false;
121 }
122 strm.seekg(pos);
123 return match;
124 }
125
126 // Check Fst magic number and read in Fst header.
127 // If rewind = true, reposition stream to before call (if possible).
Read(istream & strm,const string & source,bool rewind)128 bool FstHeader::Read(istream &strm, const string &source, bool rewind) {
129 int64 pos = 0;
130 if (rewind) pos = strm.tellg();
131 int32 magic_number = 0;
132 ReadType(strm, &magic_number);
133 if (magic_number != kFstMagicNumber
134 ) {
135 LOG(ERROR) << "FstHeader::Read: Bad FST header: " << source;
136 if (rewind) strm.seekg(pos);
137 return false;
138 }
139
140 ReadType(strm, &fsttype_);
141 ReadType(strm, &arctype_);
142 ReadType(strm, &version_);
143 ReadType(strm, &flags_);
144 ReadType(strm, &properties_);
145 ReadType(strm, &start_);
146 ReadType(strm, &numstates_);
147 ReadType(strm, &numarcs_);
148 if (!strm) {
149 LOG(ERROR) << "FstHeader::Read: read failed: " << source;
150 return false;
151 }
152 if (rewind) strm.seekg(pos);
153 return true;
154 }
155
156 // Write Fst magic number and Fst header.
Write(ostream & strm,const string & source) const157 bool FstHeader::Write(ostream &strm, const string &source) const {
158 WriteType(strm, kFstMagicNumber);
159 WriteType(strm, fsttype_);
160 WriteType(strm, arctype_);
161 WriteType(strm, version_);
162 WriteType(strm, flags_);
163 WriteType(strm, properties_);
164 WriteType(strm, start_);
165 WriteType(strm, numstates_);
166 WriteType(strm, numarcs_);
167 return true;
168 }
169
FstReadOptions(const string & src,const FstHeader * hdr,const SymbolTable * isym,const SymbolTable * osym)170 FstReadOptions::FstReadOptions(const string& src, const FstHeader *hdr,
171 const SymbolTable* isym, const SymbolTable* osym)
172 : source(src), header(hdr), isymbols(isym), osymbols(osym) {
173 mode = ReadMode(FLAGS_fst_read_mode);
174 }
175
FstReadOptions(const string & src,const SymbolTable * isym,const SymbolTable * osym)176 FstReadOptions::FstReadOptions(const string& src, const SymbolTable* isym,
177 const SymbolTable* osym)
178 : source(src), header(0), isymbols(isym), osymbols(osym) {
179 mode = ReadMode(FLAGS_fst_read_mode);
180 }
181
ReadMode(const string & mode)182 FstReadOptions::FileReadMode FstReadOptions::ReadMode(const string &mode) {
183 if (mode == "read") {
184 return READ;
185 }
186 if (mode == "map") {
187 return MAP;
188 }
189 LOG(ERROR) << "Unknown file read mode " << mode;
190 return READ;
191 }
192
193 } // namespace fst
194