1 #include <cstdlib>
2 #include <iostream>
3 #include <string>
4
5 #include <marisa.h>
6
7 #include "cmdopt.h"
8
9 namespace {
10
11 std::size_t max_num_results = 10;
12 bool mmap_flag = true;
13
print_help(const char * cmd)14 void print_help(const char *cmd) {
15 std::cerr << "Usage: " << cmd << " [OPTION]... DIC\n\n"
16 "Options:\n"
17 " -n, --max-num-results=[N] limit the number of results to N"
18 " (default: 10)\n"
19 " 0: no limit\n"
20 " -m, --mmap-dictionary use memory-mapped I/O to load a dictionary"
21 " (default)\n"
22 " -r, --read-dictionary read an entire dictionary into memory\n"
23 " -h, --help print this help\n"
24 << std::endl;
25 }
26
common_prefix_search(const char * const * args,std::size_t num_args)27 int common_prefix_search(const char * const *args, std::size_t num_args) {
28 if (num_args == 0) {
29 std::cerr << "error: dictionary is not specified" << std::endl;
30 return 10;
31 } else if (num_args > 1) {
32 std::cerr << "error: more than one dictionaries are specified"
33 << std::endl;
34 return 11;
35 }
36
37 marisa::Trie trie;
38 if (mmap_flag) {
39 try {
40 trie.mmap(args[0]);
41 } catch (const marisa::Exception &ex) {
42 std::cerr << ex.what() << ": failed to mmap a dictionary file: "
43 << args[0] << std::endl;
44 return 20;
45 }
46 } else {
47 try {
48 trie.load(args[0]);
49 } catch (const marisa::Exception &ex) {
50 std::cerr << ex.what() << ": failed to load a dictionary file: "
51 << args[0] << std::endl;
52 return 21;
53 }
54 }
55
56 marisa::Agent agent;
57 marisa::Keyset keyset;
58 std::string str;
59 while (std::getline(std::cin, str)) {
60 try {
61 agent.set_query(str.c_str(), str.length());
62 while (trie.common_prefix_search(agent)) {
63 keyset.push_back(agent.key());
64 }
65 if (keyset.empty()) {
66 std::cout << "not found" << std::endl;
67 } else {
68 std::cout << keyset.size() << " found" << std::endl;
69 const std::size_t end = std::min(max_num_results, keyset.size());
70 for (std::size_t i = 0; i < end; ++i) {
71 std::cout << keyset[i].id() << '\t';
72 std::cout.write(keyset[i].ptr(),
73 static_cast<std::streamsize>(keyset[i].length())) << '\t';
74 std::cout << str << '\n';
75 }
76 }
77 keyset.reset();
78 } catch (const marisa::Exception &ex) {
79 std::cerr << ex.what() << ": common_prefix_search() failed: "
80 << str << std::endl;
81 return 30;
82 }
83
84 if (!std::cout) {
85 std::cerr << "error: failed to write results to standard output"
86 << std::endl;
87 return 31;
88 }
89 }
90
91 return 0;
92 }
93
94 } // namespace
95
main(int argc,char * argv[])96 int main(int argc, char *argv[]) {
97 std::ios::sync_with_stdio(false);
98
99 ::cmdopt_option long_options[] = {
100 { "max-num-results", 1, NULL, 'n' },
101 { "mmap-dictionary", 0, NULL, 'm' },
102 { "read-dictionary", 0, NULL, 'r' },
103 { "help", 0, NULL, 'h' },
104 { NULL, 0, NULL, 0 }
105 };
106 ::cmdopt_t cmdopt;
107 ::cmdopt_init(&cmdopt, argc, argv, "n:mrh", long_options);
108 int label;
109 while ((label = ::cmdopt_get(&cmdopt)) != -1) {
110 switch (label) {
111 case 'n': {
112 char *end_of_value;
113 const long value = std::strtol(cmdopt.optarg, &end_of_value, 10);
114 if ((*end_of_value != '\0') || (value < 0)) {
115 std::cerr << "error: option `-n' with an invalid argument: "
116 << cmdopt.optarg << std::endl;
117 }
118 if ((value == 0) || ((unsigned long long)value > MARISA_SIZE_MAX)) {
119 max_num_results = MARISA_SIZE_MAX;
120 } else {
121 max_num_results = (std::size_t)value;
122 }
123 break;
124 }
125 case 'm': {
126 mmap_flag = true;
127 break;
128 }
129 case 'r': {
130 mmap_flag = false;
131 break;
132 }
133 case 'h': {
134 print_help(argv[0]);
135 return 0;
136 }
137 default: {
138 return 1;
139 }
140 }
141 }
142 return common_prefix_search(cmdopt.argv + cmdopt.optind,
143 static_cast<std::size_t>(cmdopt.argc - cmdopt.optind));
144 }
145