• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  *   Copyright (C) 1996-2010, International Business Machines                 *
4  *   Corporation and others.  All Rights Reserved.                            *
5  ******************************************************************************
6  */
7 
8 /**
9  * \file
10  * \brief C++ API: Collation data used to compute minLengthInChars.
11  * \internal
12  */
13 
14 #ifndef COLL_DATA_H
15 #define COLL_DATA_H
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_COLLATION
20 
21 #include "unicode/uobject.h"
22 #include "unicode/ucol.h"
23 
24 U_NAMESPACE_BEGIN
25 
26 /**
27  * The size of the internal buffer for the Collator's short description string.
28  * @internal ICU 4.0.1 technology preview
29  */
30 #define KEY_BUFFER_SIZE 64
31 
32  /**
33   * The size of the internal CE buffer in a <code>CEList</code> object
34   * @internal ICU 4.0.1 technology preview
35   */
36 #define CELIST_BUFFER_SIZE 4
37 
38 /**
39  * \def INSTRUMENT_CELIST
40  * Define this to enable the <code>CEList</code> objects to collect
41  * statistics.
42  * @internal ICU 4.0.1 technology preview
43  */
44 //#define INSTRUMENT_CELIST
45 
46  /**
47   * The size of the initial list in a <code>StringList</code> object.
48   * @internal ICU 4.0.1 technology preview
49   */
50 #define STRING_LIST_BUFFER_SIZE 16
51 
52 /**
53  * \def INSTRUMENT_STRING_LIST
54  * Define this to enable the <code>StringList</code> objects to
55  * collect statistics.
56  * @internal ICU 4.0.1 technology preview
57  */
58 //#define INSTRUMENT_STRING_LIST
59 
60  /**
61   * This object holds a list of CEs generated from a particular
62   * <code>UnicodeString</code>
63   *
64   * @internal ICU 4.0.1 technology preview
65   */
66 class U_I18N_API CEList : public UObject
67 {
68 public:
69     /**
70      * Construct a <code>CEList</code> object.
71      *
72      * @param coll - the Collator used to collect the CEs.
73      * @param string - the string for which to collect the CEs.
74      * @param status - will be set if any errors occur.
75      *
76      * Note: if on return, status is set to an error code,
77      * the only safe thing to do with this object is to call
78      * the destructor.
79      *
80      * @internal ICU 4.0.1 technology preview
81      */
82     CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status);
83 
84     /**
85      * The destructor.
86      * @internal ICU 4.0.1 technology preview
87      */
88     ~CEList();
89 
90     /**
91      * Return the number of CEs in the list.
92      *
93      * @return the number of CEs in the list.
94      *
95      * @internal ICU 4.0.1 technology preview
96      */
97     int32_t size() const;
98 
99     /**
100      * Get a particular CE from the list.
101      *
102      * @param index - the index of the CE to return
103      *
104      * @return the CE, or <code>0</code> if <code>index</code> is out of range
105      *
106      * @internal ICU 4.0.1 technology preview
107      */
108     uint32_t get(int32_t index) const;
109 
110     /**
111      * Check if the CEs in another <code>CEList</code> match the
112      * suffix of this list starting at a give offset.
113      *
114      * @param offset - the offset of the suffix
115      * @param other - the other <code>CEList</code>
116      *
117      * @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise.
118      *
119      * @internal ICU 4.0.1 technology preview
120      */
121     UBool matchesAt(int32_t offset, const CEList *other) const;
122 
123     /**
124      * The index operator.
125      *
126      * @param index - the index
127      *
128      * @return a reference to the given CE in the list
129      *
130      * @internal ICU 4.0.1 technology preview
131      */
132     uint32_t &operator[](int32_t index) const;
133 
134     /**
135      * UObject glue...
136      * @internal ICU 4.0.1 technology preview
137      */
138     virtual UClassID getDynamicClassID() const;
139     /**
140      * UObject glue...
141      * @internal ICU 4.0.1 technology preview
142      */
143     static UClassID getStaticClassID();
144 
145 private:
146     void add(uint32_t ce, UErrorCode &status);
147 
148     uint32_t ceBuffer[CELIST_BUFFER_SIZE];
149     uint32_t *ces;
150     int32_t listMax;
151     int32_t listSize;
152 
153 #ifdef INSTRUMENT_CELIST
154     static int32_t _active;
155     static int32_t _histogram[10];
156 #endif
157 };
158 
159 /**
160  * StringList
161  *
162  * This object holds a list of <code>UnicodeString</code> objects.
163  *
164  * @internal ICU 4.0.1 technology preview
165  */
166 class U_I18N_API StringList : public UObject
167 {
168 public:
169     /**
170      * Construct an empty <code>StringList</code>
171      *
172      * @param status - will be set if any errors occur.
173      *
174      * Note: if on return, status is set to an error code,
175      * the only safe thing to do with this object is to call
176      * the destructor.
177      *
178      * @internal ICU 4.0.1 technology preview
179      */
180     StringList(UErrorCode &status);
181 
182     /**
183      * The destructor.
184      *
185      * @internal ICU 4.0.1 technology preview
186      */
187     ~StringList();
188 
189     /**
190      * Add a string to the list.
191      *
192      * @param string - the string to add
193      * @param status - will be set if any errors occur.
194      *
195      * @internal ICU 4.0.1 technology preview
196      */
197     void add(const UnicodeString *string, UErrorCode &status);
198 
199     /**
200      * Add an array of Unicode code points to the list.
201      *
202      * @param chars - the address of the array of code points
203      * @param count - the number of code points in the array
204      * @param status - will be set if any errors occur.
205      *
206      * @internal ICU 4.0.1 technology preview
207      */
208     void add(const UChar *chars, int32_t count, UErrorCode &status);
209 
210     /**
211      * Get a particular string from the list.
212      *
213      * @param index - the index of the string
214      *
215      * @return a pointer to the <code>UnicodeString</code> or <code>NULL</code>
216      *         if <code>index</code> is out of bounds.
217      *
218      * @internal ICU 4.0.1 technology preview
219      */
220     const UnicodeString *get(int32_t index) const;
221 
222     /**
223      * Get the number of stings in the list.
224      *
225      * @return the number of strings in the list.
226      *
227      * @internal ICU 4.0.1 technology preview
228      */
229     int32_t size() const;
230 
231     /**
232      * the UObject glue...
233      * @internal ICU 4.0.1 technology preview
234      */
235     virtual UClassID getDynamicClassID() const;
236     /**
237      * the UObject glue...
238      * @internal ICU 4.0.1 technology preview
239      */
240     static UClassID getStaticClassID();
241 
242 private:
243     UnicodeString *strings;
244     int32_t listMax;
245     int32_t listSize;
246 
247 #ifdef INSTRUMENT_STRING_LIST
248     static int32_t _lists;
249     static int32_t _strings;
250     static int32_t _histogram[101];
251 #endif
252 };
253 
254 /*
255  * Forward references to internal classes.
256  */
257 class StringToCEsMap;
258 class CEToStringsMap;
259 class CollDataCache;
260 
261 /**
262  * CollData
263  *
264  * This class holds the Collator-specific data needed to
265  * compute the length of the shortest string that can
266  * generate a partcular list of CEs.
267  *
268  * <code>CollData</code> objects are quite expensive to compute. Because
269  * of this, they are cached. When you call <code>CollData::open</code> it
270  * returns a reference counted cached object. When you call <code>CollData::close</code>
271  * the reference count on the object is decremented but the object is not deleted.
272  *
273  * If you do not need to reuse any unreferenced objects in the cache, you can call
274  * <code>CollData::flushCollDataCache</code>. If you no longer need any <code>CollData</code>
275  * objects, you can call <code>CollData::freeCollDataCache</code>
276  *
277  * @internal ICU 4.0.1 technology preview
278  */
279 class U_I18N_API CollData : public UObject
280 {
281 public:
282     /**
283      * Construct a <code>CollData</code> object.
284      *
285      * @param collator - the collator
286      * @param status - will be set if any errors occur.
287      *
288      * @return the <code>CollData</code> object. You must call
289      *         <code>close</code> when you are done using the object.
290      *
291      * Note: if on return, status is set to an error code,
292      * the only safe thing to do with this object is to call
293      * <code>CollData::close</code>.
294      *
295      * @internal ICU 4.0.1 technology preview
296      */
297     static CollData *open(UCollator *collator, UErrorCode &status);
298 
299     /**
300      * Release a <code>CollData</code> object.
301      *
302      * @param collData - the object
303      *
304      * @internal ICU 4.0.1 technology preview
305      */
306     static void close(CollData *collData);
307 
308     /**
309      * Get the <code>UCollator</code> object used to create this object.
310      * The object returned may not be the exact object that was used to
311      * create this object, but it will have the same behavior.
312      * @internal ICU 4.0.1 technology preview
313      */
314     UCollator *getCollator() const;
315 
316     /**
317      * Get a list of all the strings which generate a list
318      * of CEs starting with a given CE.
319      *
320      * @param ce - the CE
321      *
322      * return a <code>StringList</code> object containing all
323      *        the stirngs, or <code>NULL</code> if there are
324      *        no such strings.
325      *
326      * @internal ICU 4.0.1 technology preview.
327      */
328     const StringList *getStringList(int32_t ce) const;
329 
330     /**
331      * Get a list of the CEs generated by a partcular stirng.
332      *
333      * @param string - the string
334      *
335      * @return a <code>CEList</code> object containt the CEs. You
336      *         must call <code>freeCEList</code> when you are finished
337      *         using the <code>CEList</code>/
338      *
339      * @internal ICU 4.0.1 technology preview.
340      */
341     const CEList *getCEList(const UnicodeString *string) const;
342 
343     /**
344      * Release a <code>CEList</code> returned by <code>getCEList</code>.
345      *
346      * @param list - the <code>CEList</code> to free.
347      *
348      * @internal ICU 4.0.1 technology preview
349      */
350     void freeCEList(const CEList *list);
351 
352     /**
353      * Return the length of the shortest string that will generate
354      * the given list of CEs.
355      *
356      * @param ces - the CEs
357      * @param offset - the offset of the first CE in the list to use.
358      *
359      * @return the length of the shortest string.
360      *
361      * @internal ICU 4.0.1 technology preview
362      */
363     int32_t minLengthInChars(const CEList *ces, int32_t offset) const;
364 
365 
366     /**
367      * Return the length of the shortest string that will generate
368      * the given list of CEs.
369      *
370      * Note: the algorithm used to do this computation is recursive. To
371      * limit the amount of recursion, a "history" list is used to record
372      * the best answer starting at a particular offset in the list of CEs.
373      * If the same offset is visited again during the recursion, the answer
374      * in the history list is used.
375      *
376      * @param ces - the CEs
377      * @param offset - the offset of the first CE in the list to use.
378      * @param history - the history list. Must be at least as long as
379      *                 the number of cEs in the <code>CEList</code>
380      *
381      * @return the length of the shortest string.
382      *
383      * @internal ICU 4.0.1 technology preview
384      */
385    int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const;
386 
387    /**
388     * UObject glue...
389     * @internal ICU 4.0.1 technology preview
390     */
391     virtual UClassID getDynamicClassID() const;
392    /**
393     * UObject glue...
394     * @internal ICU 4.0.1 technology preview
395     */
396     static UClassID getStaticClassID();
397 
398     /**
399      * <code>CollData</code> objects are expensive to compute, and so
400      * may be cached. This routine will free the cached objects and delete
401      * the cache.
402      *
403      * WARNING: Don't call this until you are have called <code>close</code>
404      * for each <code>CollData</code> object that you have used. also,
405      * DO NOT call this if another thread may be calling <code>flushCollDataCache</code>
406      * at the same time.
407      *
408      * @internal 4.0.1 technology preview
409      */
410     static void freeCollDataCache();
411 
412     /**
413      * <code>CollData</code> objects are expensive to compute, and so
414      * may be cached. This routine will remove any unused <code>CollData</code>
415      * objects from the cache.
416      *
417      * @internal 4.0.1 technology preview
418      */
419     static void flushCollDataCache();
420 
421 private:
422     friend class CollDataCache;
423     friend class CollDataCacheEntry;
424 
425     CollData(UCollator *collator, char *cacheKey, int32_t cachekeyLength, UErrorCode &status);
426     ~CollData();
427 
428     CollData();
429 
430     static char *getCollatorKey(UCollator *collator, char *buffer, int32_t bufferLength);
431 
432     static CollDataCache *getCollDataCache();
433 
434     UCollator      *coll;
435     StringToCEsMap *charsToCEList;
436     CEToStringsMap *ceToCharsStartingWith;
437 
438     char keyBuffer[KEY_BUFFER_SIZE];
439     char *key;
440 
441     static CollDataCache *collDataCache;
442 
443     uint32_t minHan;
444     uint32_t maxHan;
445 
446     uint32_t jamoLimits[4];
447 };
448 
449 U_NAMESPACE_END
450 
451 #endif // #if !UCONFIG_NO_COLLATION
452 #endif // #ifndef COLL_DATA_H
453