1 #ifndef MARISA_ALPHA_BASE_H_
2 #define MARISA_ALPHA_BASE_H_
3
4 // Visual C++ does not provide stdint.h.
5 #ifndef _MSC_VER
6 #include <stdint.h>
7 #endif // _MSC_VER
8
9 #ifdef __cplusplus
10 #include <cstddef>
11 #include <new>
12 #else // __cplusplus
13 #include <stddef.h>
14 #endif // __cplusplus
15
16 #ifdef __cplusplus
17 extern "C" {
18 #endif // __cplusplus
19
20 #ifdef _MSC_VER
21 typedef unsigned __int8 marisa_alpha_uint8;
22 typedef unsigned __int16 marisa_alpha_uint16;
23 typedef unsigned __int32 marisa_alpha_uint32;
24 typedef unsigned __int64 marisa_alpha_uint64;
25 #else // _MSC_VER
26 typedef uint8_t marisa_alpha_uint8;
27 typedef uint16_t marisa_alpha_uint16;
28 typedef uint32_t marisa_alpha_uint32;
29 typedef uint64_t marisa_alpha_uint64;
30 #endif // _MSC_VER
31
32 #define MARISA_ALPHA_UINT8_MAX ((marisa_alpha_uint8)-1)
33 #define MARISA_ALPHA_UINT16_MAX ((marisa_alpha_uint16)-1)
34 #define MARISA_ALPHA_UINT32_MAX ((marisa_alpha_uint32)-1)
35 #define MARISA_ALPHA_UINT64_MAX ((marisa_alpha_uint64)-1)
36 #define MARISA_ALPHA_SIZE_MAX ((size_t)-1)
37
38 #define MARISA_ALPHA_ZERO_TERMINATED MARISA_ALPHA_UINT32_MAX
39 #define MARISA_ALPHA_NOT_FOUND MARISA_ALPHA_UINT32_MAX
40 #define MARISA_ALPHA_MISMATCH MARISA_ALPHA_UINT32_MAX
41
42 #define MARISA_ALPHA_MAX_LENGTH (MARISA_ALPHA_UINT32_MAX - 1)
43 #define MARISA_ALPHA_MAX_NUM_KEYS (MARISA_ALPHA_UINT32_MAX - 1)
44
45 // marisa_alpha_status provides a list of error codes. Most of functions in
46 // libmarisa throw or return an error code.
47 typedef enum marisa_alpha_status_ {
48 // MARISA_ALPHA_OK means that a requested operation has succeeded.
49 MARISA_ALPHA_OK = 0,
50
51 // MARISA_ALPHA_HANDLE_ERROR means that a given handle is invalid.
52 MARISA_ALPHA_HANDLE_ERROR = 1,
53
54 // MARISA_ALPHA_STATE_ERROR means that an object is not ready for a requested
55 // operation. For example, an operation to modify a fixed container throws
56 // an exception with this error code.
57 MARISA_ALPHA_STATE_ERROR = 2,
58
59 // MARISA_ALPHA_PARAM_ERROR means that a given argument is invalid. For
60 // example, some functions throw an exception with this error code when an
61 // out-of-range value or a NULL pointer is given.
62 MARISA_ALPHA_PARAM_ERROR = 3,
63
64 // MARISA_ALPHA_SIZE_ERROR means that a size exceeds its limit. This error
65 // code is used when a building dictionary is too large or std::length_error
66 // is catched.
67 MARISA_ALPHA_SIZE_ERROR = 4,
68
69 // MARISA_ALPHA_MEMORY_ERROR means that a memory allocation has failed.
70 MARISA_ALPHA_MEMORY_ERROR = 5,
71
72 // MARISA_ALPHA_IO_ERROR means that an I/O failure.
73 MARISA_ALPHA_IO_ERROR = 6,
74
75 // MARISA_ALPHA_UNEXPECTED_ERROR means that an unexpected error has occurred.
76 MARISA_ALPHA_UNEXPECTED_ERROR = 7
77 } marisa_alpha_status;
78
79 // marisa_alpha_strerror() returns a name of an error code.
80 const char *marisa_alpha_strerror(marisa_alpha_status status);
81
82 // Flags and masks for dictionary settings are defined as follows. Please note
83 // that unspecified value/flags will be replaced with default value/flags.
84 typedef enum marisa_alpha_flags_ {
85 // A dictionary consinsts of 3 tries in default. If you want to change the
86 // number of tries, please give it with other flags.
87 MARISA_ALPHA_MIN_NUM_TRIES = 0x00001,
88 MARISA_ALPHA_MAX_NUM_TRIES = 0x000FF,
89 MARISA_ALPHA_DEFAULT_NUM_TRIES = 0x00003,
90
91 // MARISA_ALPHA_PATRICIA_TRIE is usually a better choice. MARISA_ALPHA_PREFIX_TRIE is
92 // provided for comparing prefix/patricia tries.
93 MARISA_ALPHA_PATRICIA_TRIE = 0x00100,
94 MARISA_ALPHA_PREFIX_TRIE = 0x00200,
95 MARISA_ALPHA_DEFAULT_TRIE = MARISA_ALPHA_PATRICIA_TRIE,
96
97 // There are 3 kinds of TAIL implementations.
98 // - MARISA_ALPHA_WITHOUT_TAIL:
99 // builds a dictionary without a TAIL. Its last trie has only 1-byte
100 // labels.
101 // - MARISA_ALPHA_BINARY_TAIL:
102 // builds a dictionary with a binary-mode TAIL. Its last labels are stored
103 // as binary data.
104 // - MARISA_ALPHA_TEXT_TAIL:
105 // builds a dictionary with a text-mode TAIL if its last labels do not
106 // contain NULL characters. The last labels are stored as zero-terminated
107 // string. Otherwise, a dictionary is built with a binary-mode TAIL.
108 MARISA_ALPHA_WITHOUT_TAIL = 0x01000,
109 MARISA_ALPHA_BINARY_TAIL = 0x02000,
110 MARISA_ALPHA_TEXT_TAIL = 0x04000,
111 MARISA_ALPHA_DEFAULT_TAIL = MARISA_ALPHA_TEXT_TAIL,
112
113 // libmarisa arranges nodes in ascending order of their labels
114 // (MARISA_ALPHA_LABEL_ORDER) or in descending order of their weights
115 // (MARISA_ALPHA_WEIGHT_ORDER). MARISA_ALPHA_WEIGHT_ORDER is generally a
116 // better choice because it enables faster lookups, but
117 // MARISA_ALPHA_LABEL_ORDER is still useful if an application needs to
118 // predict keys in label order.
119 MARISA_ALPHA_LABEL_ORDER = 0x10000,
120 MARISA_ALPHA_WEIGHT_ORDER = 0x20000,
121 MARISA_ALPHA_DEFAULT_ORDER = MARISA_ALPHA_WEIGHT_ORDER,
122
123 // The default settings. 0 is equivalent to MARISA_ALPHA_DEFAULT_FLAGS.
124 MARISA_ALPHA_DEFAULT_FLAGS = MARISA_ALPHA_DEFAULT_NUM_TRIES
125 | MARISA_ALPHA_DEFAULT_TRIE | MARISA_ALPHA_DEFAULT_TAIL | MARISA_ALPHA_DEFAULT_ORDER,
126
127 MARISA_ALPHA_NUM_TRIES_MASK = 0x000FF,
128 MARISA_ALPHA_TRIE_MASK = 0x00F00,
129 MARISA_ALPHA_TAIL_MASK = 0x0F000,
130 MARISA_ALPHA_ORDER_MASK = 0xF0000,
131 MARISA_ALPHA_FLAGS_MASK = 0xFFFFF
132 } marisa_alpha_flags;
133
134 #ifdef __cplusplus
135 } // extern "C"
136 #endif // __cplusplus
137
138 #ifdef __cplusplus
139 namespace marisa_alpha {
140
141 typedef ::marisa_alpha_uint8 UInt8;
142 typedef ::marisa_alpha_uint16 UInt16;
143 typedef ::marisa_alpha_uint32 UInt32;
144 typedef ::marisa_alpha_uint64 UInt64;
145
146 typedef ::marisa_alpha_status Status;
147
148 // An exception object stores a filename, a line number and an error code.
149 class Exception {
150 public:
Exception(const char * filename,int line,Status status)151 Exception(const char *filename, int line, Status status)
152 : filename_(filename), line_(line), status_(status) {}
Exception(const Exception & ex)153 Exception(const Exception &ex)
154 : filename_(ex.filename_), line_(ex.line_), status_(ex.status_) {}
155
156 Exception &operator=(const Exception &rhs) {
157 filename_ = rhs.filename_;
158 line_ = rhs.line_;
159 status_ = rhs.status_;
160 return *this;
161 }
162
filename()163 const char *filename() const {
164 return filename_;
165 }
line()166 int line() const {
167 return line_;
168 }
status()169 Status status() const {
170 return status_;
171 }
172
173 // Same as std::exception, what() returns an error message.
what()174 const char *what() const {
175 return ::marisa_alpha_strerror(status_);
176 }
177
178 private:
179 const char *filename_;
180 int line_;
181 Status status_;
182 };
183
184 // MARISA_ALPHA_THROW adds a filename and a line number to an exception.
185 #define MARISA_ALPHA_THROW(status) \
186 (throw Exception(__FILE__, __LINE__, status))
187
188 // MARISA_ALPHA_THROW_IF throws an exception with `status' if `cond' is true.
189 #define MARISA_ALPHA_THROW_IF(cond, status) \
190 (void)((!(cond)) || (MARISA_ALPHA_THROW(status), 0))
191
192 // MARISA_ALPHA_DEBUG_IF is used for debugging. For example,
193 // MARISA_ALPHA_DEBUG_IF is used to find out-of-range accesses in
194 // marisa::Vector, marisa::IntVector, etc.
195 #ifdef _DEBUG
196 #define MARISA_ALPHA_DEBUG_IF(cond, status) \
197 MARISA_ALPHA_THROW_IF(cond, status)
198 #else
199 #define MARISA_ALPHA_DEBUG_IF(cond, status)
200 #endif
201
202 // To not include <algorithm> only for std::swap().
203 template <typename T>
Swap(T * lhs,T * rhs)204 void Swap(T *lhs, T *rhs) {
205 MARISA_ALPHA_THROW_IF((lhs == NULL) || (rhs == NULL),
206 MARISA_ALPHA_PARAM_ERROR);
207 T temp = *lhs;
208 *lhs = *rhs;
209 *rhs = temp;
210 }
211
212 } // namespace marisa_alpha
213 #endif // __cplusplus
214
215 #endif // MARISA_ALPHA_BASE_H_
216