1 #include <cstdlib>
2 #include <iostream>
3 #include <limits>
4 #include <string>
5 #include <vector>
6
7 #include <marisa.h>
8
9 #include "./cmdopt.h"
10
11 namespace {
12
13 enum FindMode {
14 FIND_ALL,
15 FIND_FIRST,
16 FIND_LAST
17 };
18
19 std::size_t max_num_results = 10;
20 FindMode find_mode = FIND_ALL;
21 bool mmap_flag = true;
22
print_help(const char * cmd)23 void print_help(const char *cmd) {
24 std::cerr << "Usage: " << cmd << " [OPTION]... DIC\n\n"
25 "Options:\n"
26 " -n, --max-num-results=[N] limits the number of results to N"
27 " (default: 10)\n"
28 " 0: no limit\n"
29 " -a, --find-all find all prefix keys (default)\n"
30 " -f, --find-first find a shortest prefix key\n"
31 " -l, --find-last find a longest prefix key\n"
32 " -m, --mmap-dictionary use memory-mapped I/O to load a dictionary"
33 " (default)\n"
34 " -r, --read-dictionary read an entire dictionary into memory\n"
35 " -h, --help print this help\n"
36 << std::endl;
37 }
38
find_all(const marisa::Trie & trie,const std::string & str)39 void find_all(const marisa::Trie &trie, const std::string &str) {
40 static std::vector<marisa::UInt32> key_ids;
41 static std::vector<std::size_t> lengths;
42 const std::size_t num_keys = trie.find(str, &key_ids, &lengths);
43 if (num_keys != 0) {
44 std::cout << num_keys << " found" << std::endl;
45 for (std::size_t i = 0; (i < num_keys) && (i < max_num_results); ++i) {
46 std::cout << key_ids[i] << '\t';
47 std::cout.write(str.c_str(), lengths[i]) << '\t' << str << '\n';
48 }
49 } else {
50 std::cout << "not found" << std::endl;
51 }
52 key_ids.clear();
53 lengths.clear();
54 }
55
find_first(const marisa::Trie & trie,const std::string & str)56 void find_first(const marisa::Trie &trie, const std::string &str) {
57 std::size_t length = 0;
58 const marisa::UInt32 key_id = trie.find_first(str, &length);
59 if (key_id != trie.notfound()) {
60 std::cout << key_id << '\t';
61 std::cout.write(str.c_str(), length) << '\t' << str << '\n';
62 } else {
63 std::cout << "-1\t" << str << '\n';
64 }
65 }
66
find_last(const marisa::Trie & trie,const std::string & str)67 void find_last(const marisa::Trie &trie, const std::string &str) {
68 std::size_t length = 0;
69 const marisa::UInt32 key_id = trie.find_last(str, &length);
70 if (key_id != trie.notfound()) {
71 std::cout << key_id << '\t';
72 std::cout.write(str.c_str(), length) << '\t' << str << '\n';
73 } else {
74 std::cout << "-1\t" << str << '\n';
75 }
76 }
77
find(const char * const * args,std::size_t num_args)78 int find(const char * const *args, std::size_t num_args) {
79 if (num_args == 0) {
80 std::cerr << "error: a dictionary is not specified" << std::endl;
81 return 10;
82 } else if (num_args > 1) {
83 std::cerr << "error: more than one dictionaries are specified"
84 << std::endl;
85 return 11;
86 }
87
88 marisa::Trie trie;
89 marisa::Mapper mapper;
90 if (mmap_flag) {
91 try {
92 trie.mmap(&mapper, args[0]);
93 } catch (const marisa::Exception &ex) {
94 std::cerr << ex.filename() << ':' << ex.line() << ": " << ex.what()
95 << ": failed to mmap a dictionary file: " << args[0] << std::endl;
96 return 20;
97 }
98 } else {
99 try {
100 trie.load(args[0]);
101 } catch (const marisa::Exception &ex) {
102 std::cerr << ex.filename() << ':' << ex.line() << ": " << ex.what()
103 << ": failed to load a dictionary file: " << args[0] << std::endl;
104 return 21;
105 }
106 }
107
108 std::string str;
109 while (std::getline(std::cin, str)) {
110 try {
111 switch (find_mode) {
112 case FIND_ALL: {
113 find_all(trie, str);
114 break;
115 }
116 case FIND_FIRST: {
117 find_first(trie, str);
118 break;
119 }
120 case FIND_LAST: {
121 find_last(trie, str);
122 break;
123 }
124 }
125 } catch (const marisa::Exception &ex) {
126 std::cerr << ex.filename() << ':' << ex.line() << ": " << ex.what()
127 << ": failed to find keys in: " << str << std::endl;
128 return 30;
129 }
130 if (!std::cout) {
131 std::cerr << "error: failed to write results to standard output"
132 << std::endl;
133 return 31;
134 }
135 }
136
137 return 0;
138 }
139
140 } // namespace
141
main(int argc,char * argv[])142 int main(int argc, char *argv[]) {
143 std::ios::sync_with_stdio(false);
144
145 ::cmdopt_option long_options[] = {
146 { "max-num-results", 1, NULL, 'n' },
147 { "find-all", 0, NULL, 'a' },
148 { "find-first", 0, NULL, 'f' },
149 { "find-last", 0, NULL, 'l' },
150 { "mmap-dictionary", 0, NULL, 'm' },
151 { "read-dictionary", 0, NULL, 'r' },
152 { "help", 0, NULL, 'h' },
153 { NULL, 0, NULL, 0 }
154 };
155 ::cmdopt_t cmdopt;
156 ::cmdopt_init(&cmdopt, argc, argv, "n:aflmrh", long_options);
157 int label;
158 while ((label = ::cmdopt_get(&cmdopt)) != -1) {
159 switch (label) {
160 case 'n': {
161 char *end_of_value;
162 const long value = std::strtol(cmdopt.optarg, &end_of_value, 10);
163 if ((*end_of_value != '\0') || (value < 0)) {
164 std::cerr << "error: option `-n' with an invalid argument: "
165 << cmdopt.optarg << std::endl;
166 }
167 if ((value == 0) || ((unsigned long)value > MARISA_MAX_NUM_KEYS)) {
168 max_num_results = MARISA_MAX_NUM_KEYS;
169 } else {
170 max_num_results = (std::size_t)(value);
171 }
172 break;
173 }
174 case 'a': {
175 find_mode = FIND_ALL;
176 break;
177 }
178 case 'f': {
179 find_mode = FIND_FIRST;
180 break;
181 }
182 case 'l': {
183 find_mode = FIND_LAST;
184 break;
185 }
186 case 'm': {
187 mmap_flag = true;
188 break;
189 }
190 case 'r': {
191 mmap_flag = false;
192 break;
193 }
194 case 'h': {
195 print_help(argv[0]);
196 return 0;
197 }
198 default: {
199 return 1;
200 }
201 }
202 }
203 return find(cmdopt.argv + cmdopt.optind, cmdopt.argc - cmdopt.optind);
204 }
205