1 // fst.cc
2
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // FST definitions.
20
21 #include <fst/fst.h>
22
23 // Include these so they are registered
24 #include <fst/compact-fst.h>
25 #include <fst/const-fst.h>
26 #include <fst/matcher-fst.h>
27 #include <fst/vector-fst.h>
28 #include <fst/edit-fst.h>
29
30 // FST flag definitions
31
32 DEFINE_bool(fst_verify_properties, false,
33 "Verify fst properties queried by TestProperties");
34
35 DEFINE_string(fst_weight_separator, ",",
36 "Character separator between printed composite weights; "
37 "must be a single character");
38
39 DEFINE_string(fst_weight_parentheses, "",
40 "Characters enclosing the first weight of a printed composite "
41 "weight (e.g. pair weight, tuple weight and derived classes) to "
42 "ensure proper I/O of nested composite weights; "
43 "must have size 0 (none) or 2 (open and close parenthesis)");
44
45 DEFINE_bool(fst_default_cache_gc, true, "Enable garbage collection of cache");
46
47 DEFINE_int64(fst_default_cache_gc_limit, 1<<20LL,
48 "Cache byte size that triggers garbage collection");
49
50 DEFINE_bool(fst_align, false, "Write FST data aligned where appropriate");
51
52 DEFINE_string(save_relabel_ipairs, "", "Save input relabel pairs to file");
53 DEFINE_string(save_relabel_opairs, "", "Save output relabel pairs to file");
54
55 namespace fst {
56
57 // Register VectorFst, ConstFst and EditFst for common arcs types
58 REGISTER_FST(VectorFst, StdArc);
59 REGISTER_FST(VectorFst, LogArc);
60 REGISTER_FST(VectorFst, Log64Arc);
61 REGISTER_FST(ConstFst, StdArc);
62 REGISTER_FST(ConstFst, LogArc);
63 REGISTER_FST(ConstFst, Log64Arc);
64 REGISTER_FST(EditFst, StdArc);
65 REGISTER_FST(EditFst, LogArc);
66 REGISTER_FST(EditFst, Log64Arc);
67
68 // Register CompactFst for common arcs with the default (uint32) size type
69 static FstRegisterer<
70 CompactFst<StdArc, StringCompactor<StdArc> > >
71 CompactFst_StdArc_StringCompactor_registerer;
72 static FstRegisterer<
73 CompactFst<LogArc, StringCompactor<LogArc> > >
74 CompactFst_LogArc_StringCompactor_registerer;
75 static FstRegisterer<
76 CompactFst<StdArc, WeightedStringCompactor<StdArc> > >
77 CompactFst_StdArc_WeightedStringCompactor_registerer;
78 static FstRegisterer<
79 CompactFst<LogArc, WeightedStringCompactor<LogArc> > >
80 CompactFst_LogArc_WeightedStringCompactor_registerer;
81 static FstRegisterer<
82 CompactFst<StdArc, AcceptorCompactor<StdArc> > >
83 CompactFst_StdArc_AcceptorCompactor_registerer;
84 static FstRegisterer<
85 CompactFst<LogArc, AcceptorCompactor<LogArc> > >
86 CompactFst_LogArc_AcceptorCompactor_registerer;
87 static FstRegisterer<
88 CompactFst<StdArc, UnweightedCompactor<StdArc> > >
89 CompactFst_StdArc_UnweightedCompactor_registerer;
90 static FstRegisterer<
91 CompactFst<LogArc, UnweightedCompactor<LogArc> > >
92 CompactFst_LogArc_UnweightedCompactor_registerer;
93 static FstRegisterer<
94 CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> > >
95 CompactFst_StdArc_UnweightedAcceptorCompactor_registerer;
96 static FstRegisterer<
97 CompactFst<LogArc, UnweightedAcceptorCompactor<LogArc> > >
98 CompactFst_LogArc_UnweightedAcceptorCompactor_registerer;
99
100 // Fst type definitions for lookahead Fsts.
101 extern const char arc_lookahead_fst_type[] = "arc_lookahead";
102 extern const char ilabel_lookahead_fst_type[] = "ilabel_lookahead";
103 extern const char olabel_lookahead_fst_type[] = "olabel_lookahead";
104
105 // Identifies stream data as an FST (and its endianity)
106 static const int32 kFstMagicNumber = 2125659606;
107
108 // Check for Fst magic number in stream, to indicate
109 // caller function that the stream content is an Fst header;
IsFstHeader(istream & strm,const string & source)110 bool IsFstHeader(istream &strm, const string &source) {
111 int64 pos = strm.tellg();
112 bool match = true;
113 int32 magic_number = 0;
114 ReadType(strm, &magic_number);
115 if (magic_number != kFstMagicNumber
116 ) {
117 match = false;
118 }
119 strm.seekg(pos);
120 return match;
121 }
122
123 // Check Fst magic number and read in Fst header.
124 // If rewind = true, reposition stream to before call (if possible).
Read(istream & strm,const string & source,bool rewind)125 bool FstHeader::Read(istream &strm, const string &source, bool rewind) {
126 int64 pos = 0;
127 if (rewind) pos = strm.tellg();
128 int32 magic_number = 0;
129 ReadType(strm, &magic_number);
130 if (magic_number != kFstMagicNumber
131 ) {
132 LOG(ERROR) << "FstHeader::Read: Bad FST header: " << source;
133 if (rewind) strm.seekg(pos);
134 return false;
135 }
136
137 ReadType(strm, &fsttype_);
138 ReadType(strm, &arctype_);
139 ReadType(strm, &version_);
140 ReadType(strm, &flags_);
141 ReadType(strm, &properties_);
142 ReadType(strm, &start_);
143 ReadType(strm, &numstates_);
144 ReadType(strm, &numarcs_);
145 if (!strm) {
146 LOG(ERROR) << "FstHeader::Read: read failed: " << source;
147 return false;
148 }
149 if (rewind) strm.seekg(pos);
150 return true;
151 }
152
153 // Write Fst magic number and Fst header.
Write(ostream & strm,const string & source) const154 bool FstHeader::Write(ostream &strm, const string &source) const {
155 WriteType(strm, kFstMagicNumber);
156 WriteType(strm, fsttype_);
157 WriteType(strm, arctype_);
158 WriteType(strm, version_);
159 WriteType(strm, flags_);
160 WriteType(strm, properties_);
161 WriteType(strm, start_);
162 WriteType(strm, numstates_);
163 WriteType(strm, numarcs_);
164 return true;
165 }
166
167 } // namespace fst
168