• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  *******************************************************************************
3  * Copyright (C) 2006, International Business Machines Corporation and others. *
4  * All Rights Reserved.                                                        *
5  *******************************************************************************
6  */
7 
8 #ifndef TRIEDICT_H
9 #define TRIEDICT_H
10 
11 #include "unicode/utypes.h"
12 #include "unicode/uobject.h"
13 #include "unicode/utext.h"
14 
15 struct UEnumeration;
16 struct UDataSwapper;
17 struct UDataMemory;
18 
19  /**
20   * <p>UDataSwapFn function for use in swapping a compact dictionary.</p>
21   *
22   * @param ds Pointer to UDataSwapper containing global data about the
23   *           transformation and function pointers for handling primitive
24   *           types.
25   * @param inData Pointer to the input data to be transformed or examined.
26   * @param length Length of the data, counting bytes. May be -1 for preflighting.
27   *               If length>=0, then transform the data.
28   *               If length==-1, then only determine the length of the data.
29   *               The length cannot be determined from the data itself for all
30   *               types of data (e.g., not for simple arrays of integers).
31   * @param outData Pointer to the output data buffer.
32   *                If length>=0 (transformation), then the output buffer must
33   *                have a capacity of at least length.
34   *                If length==-1, then outData will not be used and can be NULL.
35   * @param pErrorCode ICU UErrorCode parameter, must not be NULL and must
36   *                   fulfill U_SUCCESS on input.
37   * @return The actual length of the data.
38   *
39   * @see UDataSwapper
40   */
41 
42 U_CAPI int32_t U_EXPORT2
43 triedict_swap(const UDataSwapper *ds,
44             const void *inData, int32_t length, void *outData,
45             UErrorCode *pErrorCode);
46 
47 U_NAMESPACE_BEGIN
48 
49 class StringEnumeration;
50 struct CompactTrieHeader;
51 
52 /*******************************************************************
53  * TrieWordDictionary
54  */
55 
56 /**
57  * <p>TrieWordDictionary is an abstract class that represents a word
58  * dictionary based on a trie. The base protocol is read-only.
59  * Subclasses may allow writing.</p>
60  */
61 class U_COMMON_API TrieWordDictionary : public UMemory {
62  public:
63 
64   /**
65    * <p>Default constructor.</p>
66    *
67    */
68   TrieWordDictionary();
69 
70   /**
71    * <p>Virtual destructor.</p>
72    */
73   virtual ~TrieWordDictionary();
74 
75  /**
76   * <p>Find dictionary words that match the text.</p>
77   *
78   * @param text A UText representing the text. The
79   * iterator is left after the longest prefix match in the dictionary.
80   * @param start The current position in text.
81   * @param maxLength The maximum number of code units to match.
82   * @param lengths An array that is filled with the lengths of words that matched.
83   * @param count Filled with the number of elements output in lengths.
84   * @param limit The size of the lengths array; this limits the number of words output.
85   * @return The number of characters in text that were matched.
86   */
87   virtual int32_t matches( UText *text,
88                               int32_t maxLength,
89                               int32_t *lengths,
90                               int &count,
91                               int limit ) const = 0;
92 
93   /**
94    * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
95    *
96    * @param status A status code recording the success of the call.
97    * @return A StringEnumeration that will iterate through the whole dictionary.
98    * The caller is responsible for closing it. The order is unspecified.
99    */
100   virtual StringEnumeration *openWords( UErrorCode &status ) const = 0;
101 
102 };
103 
104 /*******************************************************************
105  * MutableTrieDictionary
106  */
107 
108 /**
109  * <p>MutableTrieDictionary is a TrieWordDictionary that allows words to be
110  * added.</p>
111  */
112 
113 struct TernaryNode;             // Forwards declaration
114 
115 class U_COMMON_API MutableTrieDictionary : public TrieWordDictionary {
116  private:
117     /**
118      * The root node of the trie
119      * @internal
120      */
121 
122   TernaryNode               *fTrie;
123 
124     /**
125      * A UText for internal use
126      * @internal
127      */
128 
129   UText    *fIter;
130 
131   friend class CompactTrieDictionary;   // For fast conversion
132 
133  public:
134 
135  /**
136   * <p>Constructor.</p>
137   *
138   * @param median A UChar around which to balance the trie. Ideally, it should
139   * begin at least one word that is near the median of the set in the dictionary
140   * @param status A status code recording the success of the call.
141   */
142   MutableTrieDictionary( UChar median, UErrorCode &status );
143 
144   /**
145    * <p>Virtual destructor.</p>
146    */
147   virtual ~MutableTrieDictionary();
148 
149  /**
150   * <p>Find dictionary words that match the text.</p>
151   *
152   * @param text A UText representing the text. The
153   * iterator is left after the longest prefix match in the dictionary.
154   * @param maxLength The maximum number of code units to match.
155   * @param lengths An array that is filled with the lengths of words that matched.
156   * @param count Filled with the number of elements output in lengths.
157   * @param limit The size of the lengths array; this limits the number of words output.
158   * @return The number of characters in text that were matched.
159   */
160   virtual int32_t matches( UText *text,
161                               int32_t maxLength,
162                               int32_t *lengths,
163                               int &count,
164                               int limit ) const;
165 
166   /**
167    * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
168    *
169    * @param status A status code recording the success of the call.
170    * @return A StringEnumeration that will iterate through the whole dictionary.
171    * The caller is responsible for closing it. The order is unspecified.
172    */
173   virtual StringEnumeration *openWords( UErrorCode &status ) const;
174 
175  /**
176   * <p>Add one word to the dictionary.</p>
177   *
178   * @param word A UChar buffer containing the word.
179   * @param length The length of the word.
180   * @param status The resultant status
181   */
182   virtual void addWord( const UChar *word,
183                         int32_t length,
184                         UErrorCode &status);
185 
186 #if 0
187  /**
188   * <p>Add all strings from a UEnumeration to the dictionary.</p>
189   *
190   * @param words A UEnumeration that will return the desired words.
191   * @param status The resultant status
192   */
193   virtual void addWords( UEnumeration *words, UErrorCode &status );
194 #endif
195 
196 protected:
197  /**
198   * <p>Search the dictionary for matches.</p>
199   *
200   * @param text A UText representing the text. The
201   * iterator is left after the longest prefix match in the dictionary.
202   * @param maxLength The maximum number of code units to match.
203   * @param lengths An array that is filled with the lengths of words that matched.
204   * @param count Filled with the number of elements output in lengths.
205   * @param limit The size of the lengths array; this limits the number of words output.
206   * @param parent The parent of the current node
207   * @param pMatched The returned parent node matched the input
208   * @return The number of characters in text that were matched.
209   */
210   virtual int32_t search( UText *text,
211                               int32_t maxLength,
212                               int32_t *lengths,
213                               int &count,
214                               int limit,
215                               TernaryNode *&parent,
216                               UBool &pMatched ) const;
217 
218 private:
219  /**
220   * <p>Private constructor. The root node it not allocated.</p>
221   *
222   * @param status A status code recording the success of the call.
223   */
224   MutableTrieDictionary( UErrorCode &status );
225 };
226 
227 /*******************************************************************
228  * CompactTrieDictionary
229  */
230 
231 /**
232  * <p>CompactTrieDictionary is a TrieWordDictionary that has been compacted
233  * to save space.</p>
234  */
235 class U_COMMON_API CompactTrieDictionary : public TrieWordDictionary {
236  private:
237     /**
238      * The root node of the trie
239      */
240 
241   const CompactTrieHeader   *fData;
242 
243     /**
244      * A UBool indicating whether or not we own the fData.
245      */
246 
247   UBool                     fOwnData;
248 
249     UDataMemory              *fUData;
250  public:
251   /**
252    * <p>Construct a dictionary from a UDataMemory.</p>
253    *
254    * @param data A pointer to a UDataMemory, which is adopted
255    * @param status A status code giving the result of the constructor
256    */
257   CompactTrieDictionary(UDataMemory *dataObj, UErrorCode &status);
258 
259   /**
260    * <p>Construct a dictionary from raw saved data.</p>
261    *
262    * @param data A pointer to the raw data, which is still owned by the caller
263    * @param status A status code giving the result of the constructor
264    */
265   CompactTrieDictionary(const void *dataObj, UErrorCode &status);
266 
267   /**
268    * <p>Construct a dictionary from a MutableTrieDictionary.</p>
269    *
270    * @param dict The dictionary to use as input.
271    * @param status A status code recording the success of the call.
272    */
273   CompactTrieDictionary( const MutableTrieDictionary &dict, UErrorCode &status );
274 
275   /**
276    * <p>Virtual destructor.</p>
277    */
278   virtual ~CompactTrieDictionary();
279 
280  /**
281   * <p>Find dictionary words that match the text.</p>
282   *
283   * @param text A UText representing the text. The
284   * iterator is left after the longest prefix match in the dictionary.
285   * @param maxLength The maximum number of code units to match.
286   * @param lengths An array that is filled with the lengths of words that matched.
287   * @param count Filled with the number of elements output in lengths.
288   * @param limit The size of the lengths array; this limits the number of words output.
289   * @return The number of characters in text that were matched.
290   */
291   virtual int32_t matches( UText *text,
292                               int32_t rangeEnd,
293                               int32_t *lengths,
294                               int &count,
295                               int limit ) const;
296 
297   /**
298    * <p>Return a StringEnumeration for iterating all the words in the dictionary.</p>
299    *
300    * @param status A status code recording the success of the call.
301    * @return A StringEnumeration that will iterate through the whole dictionary.
302    * The caller is responsible for closing it. The order is unspecified.
303    */
304   virtual StringEnumeration *openWords( UErrorCode &status ) const;
305 
306  /**
307   * <p>Return the size of the compact data.</p>
308   *
309   * @return The size of the dictionary's compact data.
310   */
311   virtual uint32_t dataSize() const;
312 
313  /**
314   * <p>Return a void * pointer to the compact data, platform-endian.</p>
315   *
316   * @return The data for the compact dictionary, suitable for passing to the
317   * constructor.
318   */
319   virtual const void *data() const;
320 
321  /**
322   * <p>Return a MutableTrieDictionary clone of this dictionary.</p>
323   *
324   * @param status A status code recording the success of the call.
325   * @return A MutableTrieDictionary with the same data as this dictionary
326   */
327   virtual MutableTrieDictionary *cloneMutable( UErrorCode &status ) const;
328 
329  private:
330 
331   /**
332    * <p>Convert a MutableTrieDictionary into a compact data blob.</p>
333    *
334    * @param dict The dictionary to convert.
335    * @param status A status code recording the success of the call.
336    * @return A single data blob starting with a CompactTrieHeader.
337    */
338   static CompactTrieHeader *compactMutableTrieDictionary( const MutableTrieDictionary &dict,
339                                                         UErrorCode &status );
340 
341 };
342 
343 U_NAMESPACE_END
344 
345     /* TRIEDICT_H */
346 #endif
347