1 #ifndef MARISA_BASE_H_
2 #define MARISA_BASE_H_
3
4 // Visual C++ does not provide stdint.h.
5 #ifndef _MSC_VER
6 #include <stdint.h>
7 #endif // _MSC_VER
8
9 #ifdef __cplusplus
10 #include <cstddef>
11 #include <new>
12 #else // __cplusplus
13 #include <stddef.h>
14 #endif // __cplusplus
15
16 #if defined(__ANDROID__)
17 #include <android/log.h>
18 #include <stdio.h>
19 #endif // __ANDROID__
20
21 #ifdef __cplusplus
22 extern "C" {
23 #endif // __cplusplus
24
25 #ifdef _MSC_VER
26 typedef unsigned __int8 marisa_uint8;
27 typedef unsigned __int16 marisa_uint16;
28 typedef unsigned __int32 marisa_uint32;
29 typedef unsigned __int64 marisa_uint64;
30 #else // _MSC_VER
31 typedef uint8_t marisa_uint8;
32 typedef uint16_t marisa_uint16;
33 typedef uint32_t marisa_uint32;
34 typedef uint64_t marisa_uint64;
35 #endif // _MSC_VER
36
37 #define MARISA_UINT8_MAX ((marisa_uint8)-1)
38 #define MARISA_UINT16_MAX ((marisa_uint16)-1)
39 #define MARISA_UINT32_MAX ((marisa_uint32)-1)
40 #define MARISA_UINT64_MAX ((marisa_uint64)-1)
41 #define MARISA_SIZE_MAX ((size_t)-1)
42
43 #define MARISA_ZERO_TERMINATED MARISA_UINT32_MAX
44 #define MARISA_NOT_FOUND MARISA_UINT32_MAX
45 #define MARISA_MISMATCH MARISA_UINT32_MAX
46
47 #define MARISA_MAX_LENGTH (MARISA_UINT32_MAX - 1)
48 #define MARISA_MAX_NUM_KEYS (MARISA_UINT32_MAX - 1)
49
50 // marisa_status provides a list of error codes. Most of functions in
51 // libmarisa throw or return an error code.
52 typedef enum marisa_status_ {
53 // MARISA_OK means that a requested operation has succeeded.
54 MARISA_OK = 0,
55
56 // MARISA_HANDLE_ERROR means that a given handle is invalid.
57 MARISA_HANDLE_ERROR = 1,
58
59 // MARISA_STATE_ERROR means that an object is not ready for a requested
60 // operation. For example, an operation to modify a fixed container throws
61 // an exception with this error code.
62 MARISA_STATE_ERROR = 2,
63
64 // MARISA_PARAM_ERROR means that a given argument is invalid. For example,
65 // some functions throw an exception with this error code when an
66 // out-of-range value or a NULL pointer is given.
67 MARISA_PARAM_ERROR = 3,
68
69 // MARISA_SIZE_ERROR means that a size exceeds its limit. This error code
70 // is used when a building dictionary is too large or std::length_error is
71 // catched.
72 MARISA_SIZE_ERROR = 4,
73
74 // MARISA_MEMORY_ERROR means that a memory allocation has failed.
75 MARISA_MEMORY_ERROR = 5,
76
77 // MARISA_IO_ERROR means that an I/O failure.
78 MARISA_IO_ERROR = 6,
79
80 // MARISA_UNEXPECTED_ERROR means that an unexpected error has occurred.
81 MARISA_UNEXPECTED_ERROR = 7
82 } marisa_status;
83
84 // marisa_strerror() returns a name of an error code.
85 const char *marisa_strerror(marisa_status status);
86
87 // Flags and masks for dictionary settings are defined as follows. Please note
88 // that unspecified value/flags will be replaced with default value/flags.
89 typedef enum marisa_flags_ {
90 // A dictionary consinsts of 3 tries in default. If you want to change the
91 // number of tries, please give it with other flags.
92 MARISA_MIN_NUM_TRIES = 0x00001,
93 MARISA_MAX_NUM_TRIES = 0x000FF,
94 MARISA_DEFAULT_NUM_TRIES = 0x00003,
95
96 // MARISA_PATRICIA_TRIE is usually a better choice. MARISA_PREFIX_TRIE is
97 // provided for comparing prefix/patricia tries.
98 MARISA_PATRICIA_TRIE = 0x00100,
99 MARISA_PREFIX_TRIE = 0x00200,
100 MARISA_DEFAULT_TRIE = MARISA_PATRICIA_TRIE,
101
102 // There are 3 kinds of TAIL implementations.
103 // - MARISA_WITHOUT_TAIL:
104 // builds a dictionary without a TAIL. Its last trie has only 1-byte
105 // labels.
106 // - MARISA_BINARY_TAIL:
107 // builds a dictionary with a binary-mode TAIL. Its last labels are stored
108 // as binary data.
109 // - MARISA_TEXT_TAIL:
110 // builds a dictionary with a text-mode TAIL if its last labels do not
111 // contain NULL characters. The last labels are stored as zero-terminated
112 // string. Otherwise, a dictionary is built with a binary-mode TAIL.
113 MARISA_WITHOUT_TAIL = 0x01000,
114 MARISA_BINARY_TAIL = 0x02000,
115 MARISA_TEXT_TAIL = 0x04000,
116 MARISA_DEFAULT_TAIL = MARISA_TEXT_TAIL,
117
118 // libmarisa arranges nodes in ascending order of their labels
119 // (MARISA_LABEL_ORDER) or in descending order of their weights
120 // (MARISA_WEIGHT_ORDER). MARISA_WEIGHT_ORDER is generally a better choice
121 // because it enables faster lookups, but MARISA_LABEL_ORDER is still useful
122 // if an application needs to predict keys in label order.
123 MARISA_LABEL_ORDER = 0x10000,
124 MARISA_WEIGHT_ORDER = 0x20000,
125 MARISA_DEFAULT_ORDER = MARISA_WEIGHT_ORDER,
126
127 // The default settings. 0 is equivalent to MARISA_DEFAULT_FLAGS.
128 MARISA_DEFAULT_FLAGS = MARISA_DEFAULT_NUM_TRIES
129 | MARISA_DEFAULT_TRIE | MARISA_DEFAULT_TAIL | MARISA_DEFAULT_ORDER,
130
131 MARISA_NUM_TRIES_MASK = 0x000FF,
132 MARISA_TRIE_MASK = 0x00F00,
133 MARISA_TAIL_MASK = 0x0F000,
134 MARISA_ORDER_MASK = 0xF0000,
135 MARISA_FLAGS_MASK = 0xFFFFF
136 } marisa_flags;
137
138 #ifdef __cplusplus
139 } // extern "C"
140 #endif // __cplusplus
141
142 //#include <cstddef>
143
144 #ifdef __cplusplus
145 namespace marisa {
146
147 typedef ::marisa_uint8 UInt8;
148 typedef ::marisa_uint16 UInt16;
149 typedef ::marisa_uint32 UInt32;
150 typedef ::marisa_uint64 UInt64;
151
152 typedef ::marisa_status Status;
153
154 // An exception object stores a filename, a line number and an error code.
155 class Exception {
156 public:
Exception(const char * filename,int line,Status status)157 Exception(const char *filename, int line, Status status)
158 : filename_(filename), line_(line), status_(status) {}
Exception(const Exception & ex)159 Exception(const Exception &ex)
160 : filename_(ex.filename_), line_(ex.line_), status_(ex.status_) {}
161
162 Exception &operator=(const Exception &rhs) {
163 filename_ = rhs.filename_;
164 line_ = rhs.line_;
165 status_ = rhs.status_;
166 return *this;
167 }
168
filename()169 const char *filename() const {
170 return filename_;
171 }
line()172 int line() const {
173 return line_;
174 }
status()175 Status status() const {
176 return status_;
177 }
178
179 // Same as std::exception, what() returns an error message.
what()180 const char *what() const {
181 return ::marisa_strerror(status_);
182 }
183
184 private:
185 const char *filename_;
186 int line_;
187 Status status_;
188 };
189
190 // MARISA_THROW adds a filename and a line number to an exception.
191 #if !defined(__ANDROID__)
192 #define MARISA_THROW(status) \
193 (throw Exception(__FILE__, __LINE__, status))
194 #else
195
android_log_exception(int status)196 inline int android_log_exception(int status) {
197 char tmpbuf[100];
198 snprintf(tmpbuf, sizeof(tmpbuf), "marisa exception: %d", status);
199 __android_log_write(ANDROID_LOG_ERROR, "marisa-trie", tmpbuf);
200 return 0;
201 }
202
203 #define MARISA_THROW(status) \
204 (android_log_exception(status))
205
206 #endif // __ANDROID__
207
208 // MARISA_THROW_IF throws an exception with `status' if `cond' is true.
209 #define MARISA_THROW_IF(cond, status) \
210 (void)((!(cond)) || (MARISA_THROW(status), 0))
211
212 // MARISA_DEBUG_IF is used for debugging. For example, MARISA_DEBUG_IF is used
213 // to find out-of-range accesses in marisa::Vector, marisa::IntVector, etc.
214 #ifdef _DEBUG
215 #define MARISA_DEBUG_IF(cond, status) \
216 MARISA_THROW_IF(cond, status)
217 #else
218 #define MARISA_DEBUG_IF(cond, status)
219 #endif
220
221 // To not include <algorithm> only for std::swap().
222 template <typename T>
Swap(T * lhs,T * rhs)223 void Swap(T *lhs, T *rhs) {
224 MARISA_THROW_IF((lhs == NULL) || (rhs == NULL), MARISA_PARAM_ERROR);
225 T temp = *lhs;
226 *lhs = *rhs;
227 *rhs = temp;
228 }
229
230 } // namespace marisa
231 #endif // __cplusplus
232
233 #endif // MARISA_BASE_H_
234