1 #ifdef _WIN32
2 #include <fcntl.h>
3 #include <io.h>
4 #include <stdio.h>
5 #endif // _WIN32
6
7 #include <cstdlib>
8 #include <iostream>
9 #include <string>
10
11 #include <marisa.h>
12
13 #include "cmdopt.h"
14
15 namespace {
16
17 const char *delimiter = "\n";
18 bool mmap_flag = true;
19
print_help(const char * cmd)20 void print_help(const char *cmd) {
21 std::cerr << "Usage: " << cmd << " [OPTION]... DIC...\n\n"
22 "Options:\n"
23 " -d, --delimiter=[S] specify the delimier (default: \"\\n\")\n"
24 " -m, --mmap-dictionary use memory-mapped I/O to load a dictionary"
25 " (default)\n"
26 " -r, --read-dictionary read an entire dictionary into memory\n"
27 " -h, --help print this help\n"
28 << std::endl;
29 }
30
dump(const marisa::Trie & trie)31 int dump(const marisa::Trie &trie) {
32 std::size_t num_keys = 0;
33 marisa::Agent agent;
34 agent.set_query("");
35 try {
36 while (trie.predictive_search(agent)) {
37 std::cout.write(agent.key().ptr(),
38 static_cast<std::streamsize>(agent.key().length())) << delimiter;
39 if (!std::cout) {
40 std::cerr << "error: failed to write results to standard output"
41 << std::endl;
42 return 20;
43 }
44 ++num_keys;
45 }
46 } catch (const marisa::Exception &ex) {
47 std::cerr << ex.what() << ": predictive_search() failed" << std::endl;
48 return 21;
49 }
50 std::cerr << "#keys: " << num_keys << std::endl;
51 return 0;
52 }
53
dump(const char * filename)54 int dump(const char *filename) {
55 marisa::Trie trie;
56 if (filename != NULL) {
57 std::cerr << "input: " << filename << std::endl;
58 if (mmap_flag) {
59 try {
60 trie.mmap(filename);
61 } catch (const marisa::Exception &ex) {
62 std::cerr << ex.what() << ": failed to mmap a dictionary file: "
63 << filename << std::endl;
64 return 10;
65 }
66 } else {
67 try {
68 trie.load(filename);
69 } catch (const marisa::Exception &ex) {
70 std::cerr << ex.what() << ": failed to load a dictionary file: "
71 << filename << std::endl;
72 return 11;
73 }
74 }
75 } else {
76 std::cerr << "input: <stdin>" << std::endl;
77 #ifdef _WIN32
78 const int stdin_fileno = ::_fileno(stdin);
79 if (stdin_fileno < 0) {
80 std::cerr << "error: failed to get the file descriptor of "
81 "standard input" << std::endl;
82 return 20;
83 }
84 if (::_setmode(stdin_fileno, _O_BINARY) == -1) {
85 std::cerr << "error: failed to set binary mode" << std::endl;
86 return 21;
87 }
88 #endif // _WIN32
89 try {
90 std::cin >> trie;
91 } catch (const marisa::Exception &ex) {
92 std::cerr << ex.what()
93 << ": failed to read a dictionary from standard input" << std::endl;
94 return 22;
95 }
96 }
97 return dump(trie);
98 }
99
dump(const char * const * args,std::size_t num_args)100 int dump(const char * const *args, std::size_t num_args) {
101 if (num_args == 0) {
102 return dump(NULL);
103 }
104 for (std::size_t i = 0; i < num_args; ++i) {
105 const int result = dump(args[i]);
106 if (result != 0) {
107 return result;
108 }
109 }
110 return 0;
111 }
112
113 } // namespace
114
main(int argc,char * argv[])115 int main(int argc, char *argv[]) {
116 std::ios::sync_with_stdio(false);
117
118 ::cmdopt_option long_options[] = {
119 { "delimiter", 1, NULL, 'd' },
120 { "mmap-dictionary", 0, NULL, 'm' },
121 { "read-dictionary", 0, NULL, 'r' },
122 { "help", 0, NULL, 'h' },
123 { NULL, 0, NULL, 0 }
124 };
125 ::cmdopt_t cmdopt;
126 ::cmdopt_init(&cmdopt, argc, argv, "d:mrh", long_options);
127 int label;
128 while ((label = ::cmdopt_get(&cmdopt)) != -1) {
129 switch (label) {
130 case 'd': {
131 delimiter = cmdopt.optarg;
132 break;
133 }
134 case 'm': {
135 mmap_flag = true;
136 break;
137 }
138 case 'r': {
139 mmap_flag = false;
140 break;
141 }
142 case 'h': {
143 print_help(argv[0]);
144 return 0;
145 }
146 default: {
147 return 1;
148 }
149 }
150 }
151 return dump(cmdopt.argv + cmdopt.optind,
152 static_cast<std::size_t>(cmdopt.argc - cmdopt.optind));
153 }
154