• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // farcompilestrings.cc
2 
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // Copyright 2005-2010 Google, Inc.
16 // Author: allauzen@google.com (Cyril Allauzen)
17 // Modified: jpr@google.com (Jake Ratkiewicz) to use new arc-type dispatching
18 //
19 // \file
20 // Compiles a set of stings as FSTs and stores them in a finite-state
21 // archive.
22 //
23 
24 #include <fst/extensions/far/farscript.h>
25 #include <fst/extensions/far/main.h>
26 #include <iostream>
27 #include <fstream>
28 #include <sstream>
29 
30 DEFINE_string(key_prefix, "", "Prefix to append to keys");
31 DEFINE_string(key_suffix, "", "Suffix to append to keys");
32 DEFINE_int32(generate_keys, 0,
33              "Generate N digit numeric keys (def: use file basenames)");
34 DEFINE_string(far_type, "default",
35               "FAR file format type: one of: \"default\", \"fst\", "
36               "\"stlist\", \"sttable\"");
37 DEFINE_bool(allow_negative_labels, false,
38             "Allow negative labels (not recommended; may cause conflicts)");
39 DEFINE_string(arc_type, "standard", "Output arc type");
40 DEFINE_string(entry_type, "line", "Entry type: one of : "
41               "\"file\" (one FST per file), \"line\" (one FST per line)");
42 DEFINE_string(fst_type, "vector", "Output FST type");
43 DEFINE_string(token_type, "symbol", "Token type: one of : "
44               "\"symbol\", \"byte\", \"utf8\"");
45 DEFINE_string(symbols, "", "Label symbol table");
46 DEFINE_string(unknown_symbol, "", "");
47 DEFINE_bool(file_list_input, false,
48             "Each input files contains a list of files to be processed");
49 DEFINE_bool(keep_symbols, false,
50             "Store symbol table in Far file");
51 DEFINE_bool(initial_symbols, true,
52             "When keep_symbols==true, stores symbol table only for the first"
53             " Fst in archive.");
54 
main(int argc,char ** argv)55 int  main(int argc, char **argv) {
56   namespace s = fst::script;
57 
58   string usage = "Compiles a set of strings as FSTs and stores them in";
59   usage += " a finite-state archive.\n\n Usage:";
60   usage += argv[0];
61   usage += " [in1.txt [[in2.txt ...] out.far]]\n";
62 
63   std::set_new_handler(FailedNewHandler);
64   SET_FLAGS(usage.c_str(), &argc, &argv, true);
65 
66   vector<string> in_fnames;
67   for (unsigned i = 1; i < argc - 1; ++i)
68     in_fnames.push_back(strcmp(argv[i], "") != 0 ? argv[i] : "");
69   if (in_fnames.empty())
70     in_fnames.push_back(argc == 2 && strcmp(argv[1], "-") != 0 ? argv[1] : "");
71 
72   string out_fname =
73       argc > 2 && strcmp(argv[argc - 1], "-") != 0 ? argv[argc - 1] : "";
74 
75   fst::FarEntryType fet = fst::StringToFarEntryType(FLAGS_entry_type);
76   fst::FarTokenType ftt = fst::StringToFarTokenType(FLAGS_token_type);
77   fst::FarType far_type = fst::FarTypeFromString(FLAGS_far_type);
78 
79   s::FarCompileStrings(in_fnames, out_fname, FLAGS_arc_type, FLAGS_fst_type,
80                        far_type, FLAGS_generate_keys, fet, ftt,
81                        FLAGS_symbols, FLAGS_unknown_symbol,
82                        FLAGS_keep_symbols, FLAGS_initial_symbols,
83                        FLAGS_allow_negative_labels,
84                        FLAGS_file_list_input, FLAGS_key_prefix,
85                        FLAGS_key_suffix);
86 
87   return 0;
88 }
89