1 #ifndef MARISA_BASE_H_ 2 #define MARISA_BASE_H_ 3 4 // Old Visual C++ does not provide stdint.h. 5 #ifndef _MSC_VER 6 #include <stdint.h> 7 #endif // _MSC_VER 8 9 #ifdef __cplusplus 10 #include <cstddef> 11 #else // __cplusplus 12 #include <stddef.h> 13 #endif // __cplusplus 14 15 #ifdef __cplusplus 16 extern "C" { 17 #endif // __cplusplus 18 19 #ifdef _MSC_VER 20 typedef unsigned __int8 marisa_uint8; 21 typedef unsigned __int16 marisa_uint16; 22 typedef unsigned __int32 marisa_uint32; 23 typedef unsigned __int64 marisa_uint64; 24 #else // _MSC_VER 25 typedef uint8_t marisa_uint8; 26 typedef uint16_t marisa_uint16; 27 typedef uint32_t marisa_uint32; 28 typedef uint64_t marisa_uint64; 29 #endif // _MSC_VER 30 31 #if defined(_WIN64) || defined(__amd64__) || defined(__x86_64__) || \ 32 defined(__ia64__) || defined(__ppc64__) || defined(__powerpc64__) || \ 33 defined(__sparc64__) || defined(__mips64__) || defined(__aarch64__) || \ 34 defined(__s390x__) 35 #define MARISA_WORD_SIZE 64 36 #else // defined(_WIN64), etc. 37 #define MARISA_WORD_SIZE 32 38 #endif // defined(_WIN64), etc. 39 40 //#define MARISA_WORD_SIZE (sizeof(void *) * 8) 41 42 #define MARISA_UINT8_MAX ((marisa_uint8)~(marisa_uint8)0) 43 #define MARISA_UINT16_MAX ((marisa_uint16)~(marisa_uint16)0) 44 #define MARISA_UINT32_MAX ((marisa_uint32)~(marisa_uint32)0) 45 #define MARISA_UINT64_MAX ((marisa_uint64)~(marisa_uint64)0) 46 #define MARISA_SIZE_MAX ((size_t)~(size_t)0) 47 48 #define MARISA_INVALID_LINK_ID MARISA_UINT32_MAX 49 #define MARISA_INVALID_KEY_ID MARISA_UINT32_MAX 50 #define MARISA_INVALID_EXTRA (MARISA_UINT32_MAX >> 8) 51 52 // Error codes are defined as members of marisa_error_code. This library throws 53 // an exception with one of the error codes when an error occurs. 54 typedef enum marisa_error_code_ { 55 // MARISA_OK means that a requested operation has succeeded. In practice, an 56 // exception never has MARISA_OK because it is not an error. 57 MARISA_OK = 0, 58 59 // MARISA_STATE_ERROR means that an object was not ready for a requested 60 // operation. For example, an operation to modify a fixed vector throws an 61 // exception with MARISA_STATE_ERROR. 62 MARISA_STATE_ERROR = 1, 63 64 // MARISA_NULL_ERROR means that an invalid NULL pointer has been given. 65 MARISA_NULL_ERROR = 2, 66 67 // MARISA_BOUND_ERROR means that an operation has tried to access an out of 68 // range address. 69 MARISA_BOUND_ERROR = 3, 70 71 // MARISA_RANGE_ERROR means that an out of range value has appeared in 72 // operation. 73 MARISA_RANGE_ERROR = 4, 74 75 // MARISA_CODE_ERROR means that an undefined code has appeared in operation. 76 MARISA_CODE_ERROR = 5, 77 78 // MARISA_RESET_ERROR means that a smart pointer has tried to reset itself. 79 MARISA_RESET_ERROR = 6, 80 81 // MARISA_SIZE_ERROR means that a size has exceeded a library limitation. 82 MARISA_SIZE_ERROR = 7, 83 84 // MARISA_MEMORY_ERROR means that a memory allocation has failed. 85 MARISA_MEMORY_ERROR = 8, 86 87 // MARISA_IO_ERROR means that an I/O operation has failed. 88 MARISA_IO_ERROR = 9, 89 90 // MARISA_FORMAT_ERROR means that input was in invalid format. 91 MARISA_FORMAT_ERROR = 10, 92 } marisa_error_code; 93 94 // Min/max values, flags and masks for dictionary settings are defined below. 95 // Please note that unspecified settings will be replaced with the default 96 // settings. For example, 0 is equivalent to (MARISA_DEFAULT_NUM_TRIES | 97 // MARISA_DEFAULT_TRIE | MARISA_DEFAULT_TAIL | MARISA_DEFAULT_ORDER). 98 99 // A dictionary consists of 3 tries in default. Usually more tries make a 100 // dictionary space-efficient but time-inefficient. 101 typedef enum marisa_num_tries_ { 102 MARISA_MIN_NUM_TRIES = 0x00001, 103 MARISA_MAX_NUM_TRIES = 0x0007F, 104 MARISA_DEFAULT_NUM_TRIES = 0x00003, 105 } marisa_num_tries; 106 107 // This library uses a cache technique to accelerate search functions. The 108 // following enumerated type marisa_cache_level gives a list of available cache 109 // size options. A larger cache enables faster search but takes a more space. 110 typedef enum marisa_cache_level_ { 111 MARISA_HUGE_CACHE = 0x00080, 112 MARISA_LARGE_CACHE = 0x00100, 113 MARISA_NORMAL_CACHE = 0x00200, 114 MARISA_SMALL_CACHE = 0x00400, 115 MARISA_TINY_CACHE = 0x00800, 116 MARISA_DEFAULT_CACHE = MARISA_NORMAL_CACHE 117 } marisa_cache_level; 118 119 // This library provides 2 kinds of TAIL implementations. 120 typedef enum marisa_tail_mode_ { 121 // MARISA_TEXT_TAIL merges last labels as zero-terminated strings. So, it is 122 // available if and only if the last labels do not contain a NULL character. 123 // If MARISA_TEXT_TAIL is specified and a NULL character exists in the last 124 // labels, the setting is automatically switched to MARISA_BINARY_TAIL. 125 MARISA_TEXT_TAIL = 0x01000, 126 127 // MARISA_BINARY_TAIL also merges last labels but as byte sequences. It uses 128 // a bit vector to detect the end of a sequence, instead of NULL characters. 129 // So, MARISA_BINARY_TAIL requires a larger space if the average length of 130 // labels is greater than 8. 131 MARISA_BINARY_TAIL = 0x02000, 132 133 MARISA_DEFAULT_TAIL = MARISA_TEXT_TAIL, 134 } marisa_tail_mode; 135 136 // The arrangement of nodes affects the time cost of matching and the order of 137 // predictive search. 138 typedef enum marisa_node_order_ { 139 // MARISA_LABEL_ORDER arranges nodes in ascending label order. 140 // MARISA_LABEL_ORDER is useful if an application needs to predict keys in 141 // label order. 142 MARISA_LABEL_ORDER = 0x10000, 143 144 // MARISA_WEIGHT_ORDER arranges nodes in descending weight order. 145 // MARISA_WEIGHT_ORDER is generally a better choice because it enables faster 146 // matching. 147 MARISA_WEIGHT_ORDER = 0x20000, 148 149 MARISA_DEFAULT_ORDER = MARISA_WEIGHT_ORDER, 150 } marisa_node_order; 151 152 typedef enum marisa_config_mask_ { 153 MARISA_NUM_TRIES_MASK = 0x0007F, 154 MARISA_CACHE_LEVEL_MASK = 0x00F80, 155 MARISA_TAIL_MODE_MASK = 0x0F000, 156 MARISA_NODE_ORDER_MASK = 0xF0000, 157 MARISA_CONFIG_MASK = 0xFFFFF 158 } marisa_config_mask; 159 160 #ifdef __cplusplus 161 } // extern "C" 162 #endif // __cplusplus 163 164 #ifdef __cplusplus 165 166 // `std::swap` is in <utility> since C++ 11 but in <algorithm> in C++ 98: 167 #if __cplusplus >= 201103L 168 #include <utility> 169 #else 170 #include <algorithm> 171 #endif 172 namespace marisa { 173 174 typedef ::marisa_uint8 UInt8; 175 typedef ::marisa_uint16 UInt16; 176 typedef ::marisa_uint32 UInt32; 177 typedef ::marisa_uint64 UInt64; 178 179 typedef ::marisa_error_code ErrorCode; 180 181 typedef ::marisa_cache_level CacheLevel; 182 typedef ::marisa_tail_mode TailMode; 183 typedef ::marisa_node_order NodeOrder; 184 185 using std::swap; 186 187 } // namespace marisa 188 #endif // __cplusplus 189 190 #ifdef __cplusplus 191 #include "marisa/exception.h" 192 #include "marisa/scoped-ptr.h" 193 #include "marisa/scoped-array.h" 194 #endif // __cplusplus 195 196 #endif // MARISA_BASE_H_ 197