1 // printstrings-main.h
2
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: allauzen@google.com (Cyril Allauzen)
17 // Modified by: jpr@google.com (Jake Ratkiewicz)
18 //
19 // \file
20 // Output as strings the string FSTs in a finite-state archive.
21
22 #ifndef FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
23 #define FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
24
25 #include <string>
26 #include <vector>
27 using std::vector;
28
29 #include <fst/extensions/far/far.h>
30 #include <fst/shortest-distance.h>
31 #include <fst/string.h>
32
33 DECLARE_string(far_field_separator);
34
35 namespace fst {
36
37 template <class Arc>
FarPrintStrings(const vector<string> & ifilenames,const FarEntryType entry_type,const FarTokenType far_token_type,const string & begin_key,const string & end_key,const bool print_key,const bool print_weight,const string & symbols_fname,const bool initial_symbols,const int32 generate_filenames,const string & filename_prefix,const string & filename_suffix)38 void FarPrintStrings(
39 const vector<string> &ifilenames, const FarEntryType entry_type,
40 const FarTokenType far_token_type, const string &begin_key,
41 const string &end_key, const bool print_key, const bool print_weight,
42 const string &symbols_fname, const bool initial_symbols,
43 const int32 generate_filenames,
44 const string &filename_prefix, const string &filename_suffix) {
45
46 typename StringPrinter<Arc>::TokenType token_type;
47 if (far_token_type == FTT_SYMBOL) {
48 token_type = StringPrinter<Arc>::SYMBOL;
49 } else if (far_token_type == FTT_BYTE) {
50 token_type = StringPrinter<Arc>::BYTE;
51 } else if (far_token_type == FTT_UTF8) {
52 token_type = StringPrinter<Arc>::UTF8;
53 } else {
54 FSTERROR() << "FarPrintStrings: unknown token type";
55 return;
56 }
57
58 const SymbolTable *syms = 0;
59 if (!symbols_fname.empty()) {
60 // allow negative flag?
61 SymbolTableTextOptions opts;
62 opts.allow_negative = true;
63 syms = SymbolTable::ReadText(symbols_fname, opts);
64 if (!syms) {
65 FSTERROR() << "FarPrintStrings: error reading symbol table: "
66 << symbols_fname;
67 return;
68 }
69 }
70
71 FarReader<Arc> *far_reader = FarReader<Arc>::Open(ifilenames);
72 if (!far_reader) return;
73
74 if (!begin_key.empty())
75 far_reader->Find(begin_key);
76
77 string okey;
78 int nrep = 0;
79 for (int i = 1; !far_reader->Done(); far_reader->Next(), ++i) {
80 string key = far_reader->GetKey();
81 if (!end_key.empty() && end_key < key)
82 break;
83 if (okey == key)
84 ++nrep;
85 else
86 nrep = 0;
87 okey = key;
88
89 const Fst<Arc> &fst = far_reader->GetFst();
90 if (i == 1 && initial_symbols && syms == 0 && fst.InputSymbols() != 0)
91 syms = fst.InputSymbols()->Copy();
92 string str;
93 VLOG(2) << "Handling key: " << key;
94 StringPrinter<Arc> string_printer(
95 token_type, syms ? syms : fst.InputSymbols());
96 string_printer(fst, &str);
97
98 if (entry_type == FET_LINE) {
99 if (print_key)
100 cout << key << FLAGS_far_field_separator[0];
101 cout << str;
102 if (print_weight)
103 cout << FLAGS_far_field_separator[0] << ShortestDistance(fst);
104 cout << endl;
105 } else if (entry_type == FET_FILE) {
106 stringstream sstrm;
107 if (generate_filenames) {
108 sstrm.fill('0');
109 sstrm << std::right << setw(generate_filenames) << i;
110 } else {
111 sstrm << key;
112 if (nrep > 0)
113 sstrm << "." << nrep;
114 }
115
116 string filename;
117 filename = filename_prefix + sstrm.str() + filename_suffix;
118
119 ofstream ostrm(filename.c_str());
120 if (!ostrm) {
121 FSTERROR() << "FarPrintStrings: Can't open file:" << filename;
122 delete syms;
123 delete far_reader;
124 return;
125 }
126 ostrm << str;
127 if (token_type == StringPrinter<Arc>::SYMBOL)
128 ostrm << "\n";
129 }
130 }
131 delete syms;
132 }
133
134
135
136 } // namespace fst
137
138 #endif // FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
139