• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // fst.cc
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: riley@google.com (Michael Riley)
17 //
18 // \file
19 // FST definitions.
20 
21 #include <fst/fst.h>
22 
23 // Include these so they are registered
24 #include <fst/compact-fst.h>
25 #include <fst/const-fst.h>
26 #include <fst/matcher-fst.h>
27 #include <fst/vector-fst.h>
28 #include <fst/edit-fst.h>
29 
30 // FST flag definitions
31 
32 DEFINE_bool(fst_verify_properties, false,
33             "Verify fst properties queried by TestProperties");
34 
35 DEFINE_string(fst_weight_separator, ",",
36               "Character separator between printed composite weights; "
37               "must be a single character");
38 
39 DEFINE_string(fst_weight_parentheses, "",
40               "Characters enclosing the first weight of a printed composite "
41               "weight (e.g. pair weight, tuple weight and derived classes) to "
42               "ensure proper I/O of nested composite weights; "
43               "must have size 0 (none) or 2 (open and close parenthesis)");
44 
45 DEFINE_bool(fst_default_cache_gc, true, "Enable garbage collection of cache");
46 
47 DEFINE_int64(fst_default_cache_gc_limit, 1<<20LL,
48              "Cache byte size that triggers garbage collection");
49 
50 DEFINE_bool(fst_align, false, "Write FST data aligned where appropriate");
51 
52 DEFINE_string(save_relabel_ipairs, "",  "Save input relabel pairs to file");
53 DEFINE_string(save_relabel_opairs, "",  "Save output relabel pairs to file");
54 
55 namespace fst {
56 
57 // Register VectorFst, ConstFst and EditFst for common arcs types
58 REGISTER_FST(VectorFst, StdArc);
59 REGISTER_FST(VectorFst, LogArc);
60 REGISTER_FST(VectorFst, Log64Arc);
61 REGISTER_FST(ConstFst, StdArc);
62 REGISTER_FST(ConstFst, LogArc);
63 REGISTER_FST(ConstFst, Log64Arc);
64 REGISTER_FST(EditFst, StdArc);
65 REGISTER_FST(EditFst, LogArc);
66 REGISTER_FST(EditFst, Log64Arc);
67 
68 // Register CompactFst for common arcs with the default (uint32) size type
69 static FstRegisterer<
70   CompactFst<StdArc, StringCompactor<StdArc> > >
71 CompactFst_StdArc_StringCompactor_registerer;
72 static FstRegisterer<
73   CompactFst<LogArc, StringCompactor<LogArc> > >
74 CompactFst_LogArc_StringCompactor_registerer;
75 static FstRegisterer<
76   CompactFst<StdArc, WeightedStringCompactor<StdArc> > >
77 CompactFst_StdArc_WeightedStringCompactor_registerer;
78 static FstRegisterer<
79   CompactFst<LogArc, WeightedStringCompactor<LogArc> > >
80 CompactFst_LogArc_WeightedStringCompactor_registerer;
81 static FstRegisterer<
82   CompactFst<StdArc, AcceptorCompactor<StdArc> > >
83 CompactFst_StdArc_AcceptorCompactor_registerer;
84 static FstRegisterer<
85   CompactFst<LogArc, AcceptorCompactor<LogArc> > >
86 CompactFst_LogArc_AcceptorCompactor_registerer;
87 static FstRegisterer<
88   CompactFst<StdArc, UnweightedCompactor<StdArc> > >
89 CompactFst_StdArc_UnweightedCompactor_registerer;
90 static FstRegisterer<
91   CompactFst<LogArc, UnweightedCompactor<LogArc> > >
92 CompactFst_LogArc_UnweightedCompactor_registerer;
93 static FstRegisterer<
94   CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> > >
95 CompactFst_StdArc_UnweightedAcceptorCompactor_registerer;
96 static FstRegisterer<
97   CompactFst<LogArc, UnweightedAcceptorCompactor<LogArc> > >
98 CompactFst_LogArc_UnweightedAcceptorCompactor_registerer;
99 
100 // Fst type definitions for lookahead Fsts.
101 extern const char arc_lookahead_fst_type[] = "arc_lookahead";
102 extern const char ilabel_lookahead_fst_type[] = "ilabel_lookahead";
103 extern const char olabel_lookahead_fst_type[] = "olabel_lookahead";
104 
105 // Identifies stream data as an FST (and its endianity)
106 static const int32 kFstMagicNumber = 2125659606;
107 
108 // Check for Fst magic number in stream, to indicate
109 // caller function that the stream content is an Fst header;
IsFstHeader(istream & strm,const string & source)110 bool IsFstHeader(istream &strm, const string &source) {
111   int64 pos = strm.tellg();
112   bool match = true;
113   int32 magic_number = 0;
114   ReadType(strm, &magic_number);
115   if (magic_number != kFstMagicNumber
116       ) {
117     match = false;
118   }
119   strm.seekg(pos);
120   return match;
121 }
122 
123 // Check Fst magic number and read in Fst header.
124 // If rewind = true, reposition stream to before call (if possible).
Read(istream & strm,const string & source,bool rewind)125 bool FstHeader::Read(istream &strm, const string &source, bool rewind) {
126   int64 pos = 0;
127   if (rewind) pos = strm.tellg();
128   int32 magic_number = 0;
129   ReadType(strm, &magic_number);
130   if (magic_number != kFstMagicNumber
131       ) {
132     LOG(ERROR) << "FstHeader::Read: Bad FST header: " << source;
133     if (rewind) strm.seekg(pos);
134     return false;
135   }
136 
137   ReadType(strm, &fsttype_);
138   ReadType(strm, &arctype_);
139   ReadType(strm, &version_);
140   ReadType(strm, &flags_);
141   ReadType(strm, &properties_);
142   ReadType(strm, &start_);
143   ReadType(strm, &numstates_);
144   ReadType(strm, &numarcs_);
145   if (!strm) {
146     LOG(ERROR) << "FstHeader::Read: read failed: " << source;
147     return false;
148   }
149   if (rewind) strm.seekg(pos);
150   return true;
151 }
152 
153 // Write Fst magic number and Fst header.
Write(ostream & strm,const string & source) const154 bool FstHeader::Write(ostream &strm, const string &source) const {
155   WriteType(strm, kFstMagicNumber);
156   WriteType(strm, fsttype_);
157   WriteType(strm, arctype_);
158   WriteType(strm, version_);
159   WriteType(strm, flags_);
160   WriteType(strm, properties_);
161   WriteType(strm, start_);
162   WriteType(strm, numstates_);
163   WriteType(strm, numarcs_);
164   return true;
165 }
166 
167 }  // namespace fst
168