• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // printstrings-main.h
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: allauzen@google.com (Cyril Allauzen)
17 // Modified by: jpr@google.com (Jake Ratkiewicz)
18 //
19 // \file
20 // Output as strings the string FSTs in a finite-state archive.
21 
22 #ifndef FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
23 #define FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
24 
25 #include <string>
26 #include <vector>
27 using std::vector;
28 
29 #include <fst/extensions/far/far.h>
30 #include <fst/shortest-distance.h>
31 #include <fst/string.h>
32 
33 DECLARE_string(far_field_separator);
34 
35 namespace fst {
36 
37 template <class Arc>
FarPrintStrings(const vector<string> & ifilenames,const FarEntryType entry_type,const FarTokenType far_token_type,const string & begin_key,const string & end_key,const bool print_key,const bool print_weight,const string & symbols_fname,const bool initial_symbols,const int32 generate_filenames,const string & filename_prefix,const string & filename_suffix)38 void FarPrintStrings(
39     const vector<string> &ifilenames, const FarEntryType entry_type,
40     const FarTokenType far_token_type, const string &begin_key,
41     const string &end_key, const bool print_key, const bool print_weight,
42     const string &symbols_fname, const bool initial_symbols,
43     const int32 generate_filenames,
44     const string &filename_prefix, const string &filename_suffix) {
45 
46   typename StringPrinter<Arc>::TokenType token_type;
47   if (far_token_type == FTT_SYMBOL) {
48     token_type = StringPrinter<Arc>::SYMBOL;
49   } else if (far_token_type == FTT_BYTE) {
50     token_type = StringPrinter<Arc>::BYTE;
51   } else if (far_token_type == FTT_UTF8) {
52     token_type = StringPrinter<Arc>::UTF8;
53   } else {
54     FSTERROR() << "FarPrintStrings: unknown token type";
55     return;
56   }
57 
58   const SymbolTable *syms = 0;
59   if (!symbols_fname.empty()) {
60     // allow negative flag?
61     SymbolTableTextOptions opts;
62     opts.allow_negative = true;
63     syms = SymbolTable::ReadText(symbols_fname, opts);
64     if (!syms) {
65       FSTERROR() << "FarPrintStrings: error reading symbol table: "
66                  << symbols_fname;
67       return;
68     }
69   }
70 
71   FarReader<Arc> *far_reader = FarReader<Arc>::Open(ifilenames);
72   if (!far_reader) return;
73 
74   if (!begin_key.empty())
75     far_reader->Find(begin_key);
76 
77   string okey;
78   int nrep = 0;
79   for (int i = 1; !far_reader->Done(); far_reader->Next(), ++i) {
80     string key = far_reader->GetKey();
81     if (!end_key.empty() && end_key < key)
82       break;
83     if (okey == key)
84       ++nrep;
85     else
86       nrep = 0;
87     okey = key;
88 
89     const Fst<Arc> &fst = far_reader->GetFst();
90     if (i == 1 && initial_symbols && syms == 0 && fst.InputSymbols() != 0)
91       syms = fst.InputSymbols()->Copy();
92     string str;
93     VLOG(2) << "Handling key: " << key;
94     StringPrinter<Arc> string_printer(
95         token_type, syms ? syms : fst.InputSymbols());
96     string_printer(fst, &str);
97 
98     if (entry_type == FET_LINE) {
99       if (print_key)
100         cout << key << FLAGS_far_field_separator[0];
101       cout << str;
102       if (print_weight)
103         cout << FLAGS_far_field_separator[0] << ShortestDistance(fst);
104       cout << endl;
105     } else if (entry_type == FET_FILE) {
106       stringstream sstrm;
107       if (generate_filenames) {
108         sstrm.fill('0');
109         sstrm << std::right << setw(generate_filenames) << i;
110       } else {
111         sstrm << key;
112         if (nrep > 0)
113           sstrm << "." << nrep;
114       }
115 
116       string filename;
117       filename = filename_prefix +  sstrm.str() + filename_suffix;
118 
119       ofstream ostrm(filename.c_str());
120       if (!ostrm) {
121         FSTERROR() << "FarPrintStrings: Can't open file:" << filename;
122         delete syms;
123         delete far_reader;
124         return;
125       }
126       ostrm << str;
127       if (token_type == StringPrinter<Arc>::SYMBOL)
128         ostrm << "\n";
129     }
130   }
131   delete syms;
132 }
133 
134 
135 
136 }  // namespace fst
137 
138 #endif  // FST_EXTENSIONS_FAR_PRINT_STRINGS_H__
139