• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  *   Copyright (C) 1996-2009, International Business Machines                 *
4  *   Corporation and others.  All Rights Reserved.                            *
5  ******************************************************************************
6  */
7 
8 /**
9  * \file
10  * \brief C++ API: Collation data used to compute minLengthInChars.
11  * \internal
12  */
13 
14 #ifndef COLL_DATA_H
15 #define COLL_DATA_H
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_COLLATION
20 
21 #include "unicode/uobject.h"
22 #include "unicode/ucol.h"
23 
24 U_NAMESPACE_BEGIN
25 
26 /**
27  * The size of the internal buffer for the Collator's short description string.
28  */
29 #define KEY_BUFFER_SIZE 64
30 
31  /**
32   * The size of the internal CE buffer in a <code>CEList</code> object
33   */
34 #define CELIST_BUFFER_SIZE 4
35 
36 /**
37  * Define this to enable the <code>CEList</code> objects to collect
38  * statistics.
39  */
40 //#define INSTRUMENT_CELIST
41 
42  /**
43   * The size of the initial list in a <code>StringList</code> object.
44   */
45 #define STRING_LIST_BUFFER_SIZE 16
46 
47 /**
48  * Define this to enable the <code>StringList</code> objects to
49  * collect statistics.
50  */
51 //#define INSTRUMENT_STRING_LIST
52 
53  /**
54   * This object holds a list of CEs generated from a particular
55   * <code>UnicodeString</code>
56   *
57   * @internal ICU 4.0.1 technology preview
58   */
59 class U_I18N_API CEList : public UObject
60 {
61 public:
62     /**
63      * Construct a <code>CEList</code> object.
64      *
65      * @param coll - the Collator used to collect the CEs.
66      * @param string - the string for which to collect the CEs.
67      * @param status - will be set if any errors occur.
68      *
69      * Note: if on return, status is set to an error code,
70      * the only safe thing to do with this object is to call
71      * the destructor.
72      *
73      * @internal ICU 4.0.1 technology preview
74      */
75     CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status);
76 
77     /**
78      * The destructor.
79      */
80     ~CEList();
81 
82     /**
83      * Return the number of CEs in the list.
84      *
85      * @return the number of CEs in the list.
86      *
87      * @internal ICU 4.0.1 technology preview
88      */
89     int32_t size() const;
90 
91     /**
92      * Get a particular CE from the list.
93      *
94      * @param index - the index of the CE to return
95      *
96      * @return the CE, or <code>0</code> if <code>index</code> is out of range
97      *
98      * @internal ICU 4.0.1 technology preview
99      */
100     uint32_t get(int32_t index) const;
101 
102     /**
103      * Check if the CEs in another <code>CEList</code> match the
104      * suffix of this list starting at a give offset.
105      *
106      * @param offset - the offset of the suffix
107      * @param other - the other <code>CEList</code>
108      *
109      * @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise.
110      *
111      * @internal ICU 4.0.1 technology preview
112      */
113     UBool matchesAt(int32_t offset, const CEList *other) const;
114 
115     /**
116      * The index operator.
117      *
118      * @param index - the index
119      *
120      * @return a reference to the given CE in the list
121      *
122      * @internal ICU 4.0.1 technology preview
123      */
124     uint32_t &operator[](int32_t index) const;
125 
126     /**
127      * UObject glue...
128      */
129     virtual UClassID getDynamicClassID() const;
130     /**
131      * UObject glue...
132      */
133     static UClassID getStaticClassID();
134 
135 private:
136     void add(uint32_t ce, UErrorCode &status);
137 
138     uint32_t ceBuffer[CELIST_BUFFER_SIZE];
139     uint32_t *ces;
140     int32_t listMax;
141     int32_t listSize;
142 
143 #ifdef INSTRUMENT_CELIST
144     static int32_t _active;
145     static int32_t _histogram[10];
146 #endif
147 };
148 
149 /**
150  * StringList
151  *
152  * This object holds a list of <code>UnicodeString</code> objects.
153  *
154  * @internal ICU 4.0.1 technology preview
155  */
156 class U_I18N_API StringList : public UObject
157 {
158 public:
159     /**
160      * Construct an empty <code>StringList</code>
161      *
162      * @param status - will be set if any errors occur.
163      *
164      * Note: if on return, status is set to an error code,
165      * the only safe thing to do with this object is to call
166      * the destructor.
167      *
168      * @internal ICU 4.0.1 technology preview
169      */
170     StringList(UErrorCode &status);
171 
172     /**
173      * The destructor.
174      *
175      * @internal ICU 4.0.1 technology preview
176      */
177     ~StringList();
178 
179     /**
180      * Add a string to the list.
181      *
182      * @param string - the string to add
183      * @param status - will be set if any errors occur.
184      *
185      * @internal ICU 4.0.1 technology preview
186      */
187     void add(const UnicodeString *string, UErrorCode &status);
188 
189     /**
190      * Add an array of Unicode code points to the list.
191      *
192      * @param chars - the address of the array of code points
193      * @param count - the number of code points in the array
194      * @param status - will be set if any errors occur.
195      *
196      * @internal ICU 4.0.1 technology preview
197      */
198     void add(const UChar *chars, int32_t count, UErrorCode &status);
199 
200     /**
201      * Get a particular string from the list.
202      *
203      * @param index - the index of the string
204      *
205      * @return a pointer to the <code>UnicodeString</code> or <code>NULL</code>
206      *         if <code>index</code> is out of bounds.
207      *
208      * @internal ICU 4.0.1 technology preview
209      */
210     const UnicodeString *get(int32_t index) const;
211 
212     /**
213      * Get the number of stings in the list.
214      *
215      * @return the number of strings in the list.
216      *
217      * @internal ICU 4.0.1 technology preview
218      */
219     int32_t size() const;
220 
221     /**
222      * the UObject glue...
223      */
224     virtual UClassID getDynamicClassID() const;
225     /**
226      * the UObject glue...
227      */
228     static UClassID getStaticClassID();
229 
230 private:
231     UnicodeString *strings;
232     int32_t listMax;
233     int32_t listSize;
234 
235 #ifdef INSTRUMENT_STRING_LIST
236     static int32_t _lists;
237     static int32_t _strings;
238     static int32_t _histogram[101];
239 #endif
240 };
241 
242 /*
243  * Forward references to internal classes.
244  */
245 class StringToCEsMap;
246 class CEToStringsMap;
247 class CollDataCache;
248 
249 /**
250  * CollData
251  *
252  * This class holds the Collator-specific data needed to
253  * compute the length of the shortest string that can
254  * generate a partcular list of CEs.
255  *
256  * <code>CollData</code> objects are quite expensive to compute. Because
257  * of this, they are cached. When you call <code>CollData::open</code> it
258  * returns a reference counted cached object. When you call <code>CollData::close</code>
259  * the reference count on the object is decremented but the object is not deleted.
260  *
261  * If you do not need to reuse any unreferenced objects in the cache, you can call
262  * <code>CollData::flushCollDataCache</code>. If you no longer need any <code>CollData</code>
263  * objects, you can call <code>CollData::freeCollDataCache</code>
264  *
265  * @internal ICU 4.0.1 technology preview
266  */
267 class U_I18N_API CollData : public UObject
268 {
269 public:
270     /**
271      * Construct a <code>CollData</code> object.
272      *
273      * @param collator - the collator
274      * @param status - will be set if any errors occur.
275      *
276      * @return the <code>CollData</code> object. You must call
277      *         <code>close</code> when you are done using the object.
278      *
279      * Note: if on return, status is set to an error code,
280      * the only safe thing to do with this object is to call
281      * <code>CollData::close</code>.
282      *
283      * @internal ICU 4.0.1 technology preview
284      */
285     static CollData *open(UCollator *collator, UErrorCode &status);
286 
287     /**
288      * Release a <code>CollData</code> object.
289      *
290      * @param collData - the object
291      *
292      * @internal ICU 4.0.1 technology preview
293      */
294     static void close(CollData *collData);
295 
296     /**
297      * Get the <code>UCollator</code> object used to create this object.
298      * The object returned may not be the exact object that was used to
299      * create this object, but it will have the same behavior.
300      */
301     UCollator *getCollator() const;
302 
303     /**
304      * Get a list of all the strings which generate a list
305      * of CEs starting with a given CE.
306      *
307      * @param ce - the CE
308      *
309      * return a <code>StringList</code> object containing all
310      *        the stirngs, or <code>NULL</code> if there are
311      *        no such strings.
312      *
313      * @internal ICU 4.0.1 technology preview.
314      */
315     const StringList *getStringList(int32_t ce) const;
316 
317     /**
318      * Get a list of the CEs generated by a partcular stirng.
319      *
320      * @param string - the string
321      *
322      * @return a <code>CEList</code> object containt the CEs. You
323      *         must call <code>freeCEList</code> when you are finished
324      *         using the <code>CEList</code>/
325      *
326      * @internal ICU 4.0.1 technology preview.
327      */
328     const CEList *getCEList(const UnicodeString *string) const;
329 
330     /**
331      * Release a <code>CEList</code> returned by <code>getCEList</code>.
332      *
333      * @param list - the <code>CEList</code> to free.
334      *
335      * @internal ICU 4.0.1 technology preview
336      */
337     void freeCEList(const CEList *list);
338 
339     /**
340      * Return the length of the shortest string that will generate
341      * the given list of CEs.
342      *
343      * @param ces - the CEs
344      * @param offset - the offset of the first CE in the list to use.
345      *
346      * @return the length of the shortest string.
347      *
348      * @internal ICU 4.0.1 technology preview
349      */
350     int32_t minLengthInChars(const CEList *ces, int32_t offset) const;
351 
352 
353     /**
354      * Return the length of the shortest string that will generate
355      * the given list of CEs.
356      *
357      * Note: the algorithm used to do this computation is recursive. To
358      * limit the amount of recursion, a "history" list is used to record
359      * the best answer starting at a particular offset in the list of CEs.
360      * If the same offset is visited again during the recursion, the answer
361      * in the history list is used.
362      *
363      * @param ces - the CEs
364      * @param offset - the offset of the first CE in the list to use.
365      * @param history - the history list. Must be at least as long as
366      *                 the number of cEs in the <code>CEList</code>
367      *
368      * @return the length of the shortest string.
369      *
370      * @internal ICU 4.0.1 technology preview
371      */
372    int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const;
373 
374    /**
375     * UObject glue...
376     */
377     virtual UClassID getDynamicClassID() const;
378    /**
379     * UObject glue...
380     */
381     static UClassID getStaticClassID();
382 
383     /**
384      * <code>CollData</code> objects are expensive to compute, and so
385      * may be cached. This routine will free the cached objects and delete
386      * the cache.
387      *
388      * WARNING: Don't call this until you are have called <code>close</code>
389      * for each <code>CollData</code> object that you have used. also,
390      * DO NOT call this if another thread may be calling <code>flushCollDataCache</code>
391      * at the same time.
392      *
393      * @internal 4.0.1 technology preview
394      */
395     static void freeCollDataCache();
396 
397     /**
398      * <code>CollData</code> objects are expensive to compute, and so
399      * may be cached. This routine will remove any unused <code>CollData</code>
400      * objects from the cache.
401      *
402      * @internal 4.0.1 technology preview
403      */
404     static void flushCollDataCache();
405 
406 private:
407     friend class CollDataCache;
408     friend class CollDataCacheEntry;
409 
410     CollData(UCollator *collator, char *cacheKey, int32_t cachekeyLength, UErrorCode &status);
411     ~CollData();
412 
413     CollData();
414 
415     static char *getCollatorKey(UCollator *collator, char *buffer, int32_t bufferLength);
416 
417     static CollDataCache *getCollDataCache();
418 
419     UCollator      *coll;
420     StringToCEsMap *charsToCEList;
421     CEToStringsMap *ceToCharsStartingWith;
422 
423     char keyBuffer[KEY_BUFFER_SIZE];
424     char *key;
425 
426     static CollDataCache *collDataCache;
427 
428     uint32_t minHan;
429     uint32_t maxHan;
430 
431     uint32_t jamoLimits[4];
432 };
433 
434 U_NAMESPACE_END
435 
436 #endif // #if !UCONFIG_NO_COLLATION
437 #endif // #ifndef COLL_DATA_H
438