1 /** 2 ******************************************************************************* 3 * Copyright (C) 2006, International Business Machines Corporation and others. * 4 * All Rights Reserved. * 5 ******************************************************************************* 6 */ 7 8 #ifndef TRIEDICT_H 9 #define TRIEDICT_H 10 11 #include "unicode/utypes.h" 12 #include "unicode/uobject.h" 13 #include "unicode/utext.h" 14 15 struct UEnumeration; 16 struct UDataSwapper; 17 struct UDataMemory; 18 19 /** 20 * <p>UDataSwapFn function for use in swapping a compact dictionary.</p> 21 * 22 * @param ds Pointer to UDataSwapper containing global data about the 23 * transformation and function pointers for handling primitive 24 * types. 25 * @param inData Pointer to the input data to be transformed or examined. 26 * @param length Length of the data, counting bytes. May be -1 for preflighting. 27 * If length>=0, then transform the data. 28 * If length==-1, then only determine the length of the data. 29 * The length cannot be determined from the data itself for all 30 * types of data (e.g., not for simple arrays of integers). 31 * @param outData Pointer to the output data buffer. 32 * If length>=0 (transformation), then the output buffer must 33 * have a capacity of at least length. 34 * If length==-1, then outData will not be used and can be NULL. 35 * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must 36 * fulfill U_SUCCESS on input. 37 * @return The actual length of the data. 38 * 39 * @see UDataSwapper 40 */ 41 42 U_CAPI int32_t U_EXPORT2 43 triedict_swap(const UDataSwapper *ds, 44 const void *inData, int32_t length, void *outData, 45 UErrorCode *pErrorCode); 46 47 U_NAMESPACE_BEGIN 48 49 class StringEnumeration; 50 struct CompactTrieHeader; 51 52 /******************************************************************* 53 * TrieWordDictionary 54 */ 55 56 /** 57 * <p>TrieWordDictionary is an abstract class that represents a word 58 * dictionary based on a trie. The base protocol is read-only. 59 * Subclasses may allow writing.</p> 60 */ 61 class U_COMMON_API TrieWordDictionary : public UMemory { 62 public: 63 64 /** 65 * <p>Default constructor.</p> 66 * 67 */ 68 TrieWordDictionary(); 69 70 /** 71 * <p>Virtual destructor.</p> 72 */ 73 virtual ~TrieWordDictionary(); 74 75 /** 76 * <p>Find dictionary words that match the text.</p> 77 * 78 * @param text A UText representing the text. The 79 * iterator is left after the longest prefix match in the dictionary. 80 * @param start The current position in text. 81 * @param maxLength The maximum number of code units to match. 82 * @param lengths An array that is filled with the lengths of words that matched. 83 * @param count Filled with the number of elements output in lengths. 84 * @param limit The size of the lengths array; this limits the number of words output. 85 * @return The number of characters in text that were matched. 86 */ 87 virtual int32_t matches( UText *text, 88 int32_t maxLength, 89 int32_t *lengths, 90 int &count, 91 int limit ) const = 0; 92 93 /** 94 * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p> 95 * 96 * @param status A status code recording the success of the call. 97 * @return A StringEnumeration that will iterate through the whole dictionary. 98 * The caller is responsible for closing it. The order is unspecified. 99 */ 100 virtual StringEnumeration *openWords( UErrorCode &status ) const = 0; 101 102 }; 103 104 /******************************************************************* 105 * MutableTrieDictionary 106 */ 107 108 /** 109 * <p>MutableTrieDictionary is a TrieWordDictionary that allows words to be 110 * added.</p> 111 */ 112 113 struct TernaryNode; // Forwards declaration 114 115 class U_COMMON_API MutableTrieDictionary : public TrieWordDictionary { 116 private: 117 /** 118 * The root node of the trie 119 * @internal 120 */ 121 122 TernaryNode *fTrie; 123 124 /** 125 * A UText for internal use 126 * @internal 127 */ 128 129 UText *fIter; 130 131 friend class CompactTrieDictionary; // For fast conversion 132 133 public: 134 135 /** 136 * <p>Constructor.</p> 137 * 138 * @param median A UChar around which to balance the trie. Ideally, it should 139 * begin at least one word that is near the median of the set in the dictionary 140 * @param status A status code recording the success of the call. 141 */ 142 MutableTrieDictionary( UChar median, UErrorCode &status ); 143 144 /** 145 * <p>Virtual destructor.</p> 146 */ 147 virtual ~MutableTrieDictionary(); 148 149 /** 150 * <p>Find dictionary words that match the text.</p> 151 * 152 * @param text A UText representing the text. The 153 * iterator is left after the longest prefix match in the dictionary. 154 * @param maxLength The maximum number of code units to match. 155 * @param lengths An array that is filled with the lengths of words that matched. 156 * @param count Filled with the number of elements output in lengths. 157 * @param limit The size of the lengths array; this limits the number of words output. 158 * @return The number of characters in text that were matched. 159 */ 160 virtual int32_t matches( UText *text, 161 int32_t maxLength, 162 int32_t *lengths, 163 int &count, 164 int limit ) const; 165 166 /** 167 * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p> 168 * 169 * @param status A status code recording the success of the call. 170 * @return A StringEnumeration that will iterate through the whole dictionary. 171 * The caller is responsible for closing it. The order is unspecified. 172 */ 173 virtual StringEnumeration *openWords( UErrorCode &status ) const; 174 175 /** 176 * <p>Add one word to the dictionary.</p> 177 * 178 * @param word A UChar buffer containing the word. 179 * @param length The length of the word. 180 * @param status The resultant status 181 */ 182 virtual void addWord( const UChar *word, 183 int32_t length, 184 UErrorCode &status); 185 186 #if 0 187 /** 188 * <p>Add all strings from a UEnumeration to the dictionary.</p> 189 * 190 * @param words A UEnumeration that will return the desired words. 191 * @param status The resultant status 192 */ 193 virtual void addWords( UEnumeration *words, UErrorCode &status ); 194 #endif 195 196 protected: 197 /** 198 * <p>Search the dictionary for matches.</p> 199 * 200 * @param text A UText representing the text. The 201 * iterator is left after the longest prefix match in the dictionary. 202 * @param maxLength The maximum number of code units to match. 203 * @param lengths An array that is filled with the lengths of words that matched. 204 * @param count Filled with the number of elements output in lengths. 205 * @param limit The size of the lengths array; this limits the number of words output. 206 * @param parent The parent of the current node 207 * @param pMatched The returned parent node matched the input 208 * @return The number of characters in text that were matched. 209 */ 210 virtual int32_t search( UText *text, 211 int32_t maxLength, 212 int32_t *lengths, 213 int &count, 214 int limit, 215 TernaryNode *&parent, 216 UBool &pMatched ) const; 217 218 private: 219 /** 220 * <p>Private constructor. The root node it not allocated.</p> 221 * 222 * @param status A status code recording the success of the call. 223 */ 224 MutableTrieDictionary( UErrorCode &status ); 225 }; 226 227 /******************************************************************* 228 * CompactTrieDictionary 229 */ 230 231 /** 232 * <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted 233 * to save space.</p> 234 */ 235 class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary { 236 private: 237 /** 238 * The root node of the trie 239 */ 240 241 const CompactTrieHeader *fData; 242 243 /** 244 * A UBool indicating whether or not we own the fData. 245 */ 246 247 UBool fOwnData; 248 249 UDataMemory *fUData; 250 public: 251 /** 252 * <p>Construct a dictionary from a UDataMemory.</p> 253 * 254 * @param data A pointer to a UDataMemory, which is adopted 255 * @param status A status code giving the result of the constructor 256 */ 257 CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status); 258 259 /** 260 * <p>Construct a dictionary from raw saved data.</p> 261 * 262 * @param data A pointer to the raw data, which is still owned by the caller 263 * @param status A status code giving the result of the constructor 264 */ 265 CompactTrieDictionary(const void *dataObj, UErrorCode &status); 266 267 /** 268 * <p>Construct a dictionary from a MutableTrieDictionary.</p> 269 * 270 * @param dict The dictionary to use as input. 271 * @param status A status code recording the success of the call. 272 */ 273 CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status ); 274 275 /** 276 * <p>Virtual destructor.</p> 277 */ 278 virtual ~CompactTrieDictionary(); 279 280 /** 281 * <p>Find dictionary words that match the text.</p> 282 * 283 * @param text A UText representing the text. The 284 * iterator is left after the longest prefix match in the dictionary. 285 * @param maxLength The maximum number of code units to match. 286 * @param lengths An array that is filled with the lengths of words that matched. 287 * @param count Filled with the number of elements output in lengths. 288 * @param limit The size of the lengths array; this limits the number of words output. 289 * @return The number of characters in text that were matched. 290 */ 291 virtual int32_t matches( UText *text, 292 int32_t rangeEnd, 293 int32_t *lengths, 294 int &count, 295 int limit ) const; 296 297 /** 298 * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p> 299 * 300 * @param status A status code recording the success of the call. 301 * @return A StringEnumeration that will iterate through the whole dictionary. 302 * The caller is responsible for closing it. The order is unspecified. 303 */ 304 virtual StringEnumeration *openWords( UErrorCode &status ) const; 305 306 /** 307 * <p>Return the size of the compact data.</p> 308 * 309 * @return The size of the dictionary's compact data. 310 */ 311 virtual uint32_t dataSize() const; 312 313 /** 314 * <p>Return a void * pointer to the compact data, platform-endian.</p> 315 * 316 * @return The data for the compact dictionary, suitable for passing to the 317 * constructor. 318 */ 319 virtual const void *data() const; 320 321 /** 322 * <p>Return a MutableTrieDictionary clone of this dictionary.</p> 323 * 324 * @param status A status code recording the success of the call. 325 * @return A MutableTrieDictionary with the same data as this dictionary 326 */ 327 virtual MutableTrieDictionary *cloneMutable( UErrorCode &status ) const; 328 329 private: 330 331 /** 332 * <p>Convert a MutableTrieDictionary into a compact data blob.</p> 333 * 334 * @param dict The dictionary to convert. 335 * @param status A status code recording the success of the call. 336 * @return A single data blob starting with a CompactTrieHeader. 337 */ 338 static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDictionary &dict, 339 UErrorCode &status ); 340 341 }; 342 343 U_NAMESPACE_END 344 345 /* TRIEDICT_H */ 346 #endif 347