• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  ******************************************************************************
3  *   Copyright (C) 1996-2012, International Business Machines                 *
4  *   Corporation and others.  All Rights Reserved.                            *
5  ******************************************************************************
6  */
7 
8 /**
9  * \file
10  * \brief C++ API: Collation data used to compute minLengthInChars.
11  * \internal
12  */
13 
14 #ifndef COLL_DATA_H
15 #define COLL_DATA_H
16 
17 #include "unicode/utypes.h"
18 
19 #if !UCONFIG_NO_COLLATION
20 
21 #include "unicode/uobject.h"
22 #include "unicode/ucol.h"
23 
24 U_NAMESPACE_BEGIN
25 
26 #ifndef U_HIDE_INTERNAL_API
27 /**
28  * The size of the internal buffer for the Collator's short description string.
29  * @internal ICU 4.0.1 technology preview
30  */
31 #define KEY_BUFFER_SIZE 64
32 
33  /**
34   * The size of the internal CE buffer in a <code>CEList</code> object
35   * @internal ICU 4.0.1 technology preview
36   */
37 #define CELIST_BUFFER_SIZE 4
38 
39 /**
40  * \def INSTRUMENT_CELIST
41  * Define this to enable the <code>CEList</code> objects to collect
42  * statistics.
43  * @internal ICU 4.0.1 technology preview
44  */
45 //#define INSTRUMENT_CELIST
46 
47  /**
48   * The size of the initial list in a <code>StringList</code> object.
49   * @internal ICU 4.0.1 technology preview
50   */
51 #define STRING_LIST_BUFFER_SIZE 16
52 
53 /**
54  * \def INSTRUMENT_STRING_LIST
55  * Define this to enable the <code>StringList</code> objects to
56  * collect statistics.
57  * @internal ICU 4.0.1 technology preview
58  */
59 //#define INSTRUMENT_STRING_LIST
60 
61  /**
62   * This object holds a list of CEs generated from a particular
63   * <code>UnicodeString</code>
64   *
65   * @internal ICU 4.0.1 technology preview
66   */
67 class U_I18N_API CEList : public UObject
68 {
69 public:
70     /**
71      * Construct a <code>CEList</code> object.
72      *
73      * @param coll - the Collator used to collect the CEs.
74      * @param string - the string for which to collect the CEs.
75      * @param status - will be set if any errors occur.
76      *
77      * Note: if on return, status is set to an error code,
78      * the only safe thing to do with this object is to call
79      * the destructor.
80      *
81      * @internal ICU 4.0.1 technology preview
82      */
83     CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status);
84 
85     /**
86      * The destructor.
87      * @internal ICU 4.0.1 technology preview
88      */
89     ~CEList();
90 
91     /**
92      * Return the number of CEs in the list.
93      *
94      * @return the number of CEs in the list.
95      *
96      * @internal ICU 4.0.1 technology preview
97      */
98     int32_t size() const;
99 
100     /**
101      * Get a particular CE from the list.
102      *
103      * @param index - the index of the CE to return
104      *
105      * @return the CE, or <code>0</code> if <code>index</code> is out of range
106      *
107      * @internal ICU 4.0.1 technology preview
108      */
109     uint32_t get(int32_t index) const;
110 
111     /**
112      * Check if the CEs in another <code>CEList</code> match the
113      * suffix of this list starting at a give offset.
114      *
115      * @param offset - the offset of the suffix
116      * @param other - the other <code>CEList</code>
117      *
118      * @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise.
119      *
120      * @internal ICU 4.0.1 technology preview
121      */
122     UBool matchesAt(int32_t offset, const CEList *other) const;
123 
124     /**
125      * The index operator.
126      *
127      * @param index - the index
128      *
129      * @return a reference to the given CE in the list
130      *
131      * @internal ICU 4.0.1 technology preview
132      */
133     uint32_t &operator[](int32_t index) const;
134 
135     /**
136      * UObject glue...
137      * @internal ICU 4.0.1 technology preview
138      */
139     virtual UClassID getDynamicClassID() const;
140     /**
141      * UObject glue...
142      * @internal ICU 4.0.1 technology preview
143      */
144     static UClassID getStaticClassID();
145 
146 private:
147     void add(uint32_t ce, UErrorCode &status);
148 
149     uint32_t ceBuffer[CELIST_BUFFER_SIZE];
150     uint32_t *ces;
151     int32_t listMax;
152     int32_t listSize;
153 
154 #ifdef INSTRUMENT_CELIST
155     static int32_t _active;
156     static int32_t _histogram[10];
157 #endif
158 };
159 
160 /**
161  * StringList
162  *
163  * This object holds a list of <code>UnicodeString</code> objects.
164  *
165  * @internal ICU 4.0.1 technology preview
166  */
167 class U_I18N_API StringList : public UObject
168 {
169 public:
170     /**
171      * Construct an empty <code>StringList</code>
172      *
173      * @param status - will be set if any errors occur.
174      *
175      * Note: if on return, status is set to an error code,
176      * the only safe thing to do with this object is to call
177      * the destructor.
178      *
179      * @internal ICU 4.0.1 technology preview
180      */
181     StringList(UErrorCode &status);
182 
183     /**
184      * The destructor.
185      *
186      * @internal ICU 4.0.1 technology preview
187      */
188     ~StringList();
189 
190     /**
191      * Add a string to the list.
192      *
193      * @param string - the string to add
194      * @param status - will be set if any errors occur.
195      *
196      * @internal ICU 4.0.1 technology preview
197      */
198     void add(const UnicodeString *string, UErrorCode &status);
199 
200     /**
201      * Add an array of Unicode code points to the list.
202      *
203      * @param chars - the address of the array of code points
204      * @param count - the number of code points in the array
205      * @param status - will be set if any errors occur.
206      *
207      * @internal ICU 4.0.1 technology preview
208      */
209     void add(const UChar *chars, int32_t count, UErrorCode &status);
210 
211     /**
212      * Get a particular string from the list.
213      *
214      * @param index - the index of the string
215      *
216      * @return a pointer to the <code>UnicodeString</code> or <code>NULL</code>
217      *         if <code>index</code> is out of bounds.
218      *
219      * @internal ICU 4.0.1 technology preview
220      */
221     const UnicodeString *get(int32_t index) const;
222 
223     /**
224      * Get the number of stings in the list.
225      *
226      * @return the number of strings in the list.
227      *
228      * @internal ICU 4.0.1 technology preview
229      */
230     int32_t size() const;
231 
232     /**
233      * the UObject glue...
234      * @internal ICU 4.0.1 technology preview
235      */
236     virtual UClassID getDynamicClassID() const;
237     /**
238      * the UObject glue...
239      * @internal ICU 4.0.1 technology preview
240      */
241     static UClassID getStaticClassID();
242 
243 private:
244     UnicodeString *strings;
245     int32_t listMax;
246     int32_t listSize;
247 
248 #ifdef INSTRUMENT_STRING_LIST
249     static int32_t _lists;
250     static int32_t _strings;
251     static int32_t _histogram[101];
252 #endif
253 };
254 #endif  /* U_HIDE_INTERNAL_API */
255 
256 /*
257  * Forward references to internal classes.
258  */
259 class StringToCEsMap;
260 class CEToStringsMap;
261 class CollDataCache;
262 
263 #ifndef U_HIDE_INTERNAL_API
264 /**
265  * CollData
266  *
267  * This class holds the Collator-specific data needed to
268  * compute the length of the shortest string that can
269  * generate a partcular list of CEs.
270  *
271  * <code>CollData</code> objects are quite expensive to compute. Because
272  * of this, they are cached. When you call <code>CollData::open</code> it
273  * returns a reference counted cached object. When you call <code>CollData::close</code>
274  * the reference count on the object is decremented but the object is not deleted.
275  *
276  * If you do not need to reuse any unreferenced objects in the cache, you can call
277  * <code>CollData::flushCollDataCache</code>. If you no longer need any <code>CollData</code>
278  * objects, you can call <code>CollData::freeCollDataCache</code>
279  *
280  * @internal ICU 4.0.1 technology preview
281  */
282 class U_I18N_API CollData : public UObject
283 {
284 public:
285     /**
286      * Construct a <code>CollData</code> object.
287      *
288      * @param collator - the collator
289      * @param status - will be set if any errors occur.
290      *
291      * @return the <code>CollData</code> object. You must call
292      *         <code>close</code> when you are done using the object.
293      *
294      * Note: if on return, status is set to an error code,
295      * the only safe thing to do with this object is to call
296      * <code>CollData::close</code>.
297      *
298      * @internal ICU 4.0.1 technology preview
299      */
300     static CollData *open(UCollator *collator, UErrorCode &status);
301 
302     /**
303      * Release a <code>CollData</code> object.
304      *
305      * @param collData - the object
306      *
307      * @internal ICU 4.0.1 technology preview
308      */
309     static void close(CollData *collData);
310 
311     /**
312      * Get the <code>UCollator</code> object used to create this object.
313      * The object returned may not be the exact object that was used to
314      * create this object, but it will have the same behavior.
315      * @internal ICU 4.0.1 technology preview
316      */
317     UCollator *getCollator() const;
318 
319     /**
320      * Get a list of all the strings which generate a list
321      * of CEs starting with a given CE.
322      *
323      * @param ce - the CE
324      *
325      * return a <code>StringList</code> object containing all
326      *        the stirngs, or <code>NULL</code> if there are
327      *        no such strings.
328      *
329      * @internal ICU 4.0.1 technology preview.
330      */
331     const StringList *getStringList(int32_t ce) const;
332 
333     /**
334      * Get a list of the CEs generated by a partcular stirng.
335      *
336      * @param string - the string
337      *
338      * @return a <code>CEList</code> object containt the CEs. You
339      *         must call <code>freeCEList</code> when you are finished
340      *         using the <code>CEList</code>/
341      *
342      * @internal ICU 4.0.1 technology preview.
343      */
344     const CEList *getCEList(const UnicodeString *string) const;
345 
346     /**
347      * Release a <code>CEList</code> returned by <code>getCEList</code>.
348      *
349      * @param list - the <code>CEList</code> to free.
350      *
351      * @internal ICU 4.0.1 technology preview
352      */
353     void freeCEList(const CEList *list);
354 
355     /**
356      * Return the length of the shortest string that will generate
357      * the given list of CEs.
358      *
359      * @param ces - the CEs
360      * @param offset - the offset of the first CE in the list to use.
361      *
362      * @return the length of the shortest string.
363      *
364      * @internal ICU 4.0.1 technology preview
365      */
366     int32_t minLengthInChars(const CEList *ces, int32_t offset) const;
367 
368 
369     /**
370      * Return the length of the shortest string that will generate
371      * the given list of CEs.
372      *
373      * Note: the algorithm used to do this computation is recursive. To
374      * limit the amount of recursion, a "history" list is used to record
375      * the best answer starting at a particular offset in the list of CEs.
376      * If the same offset is visited again during the recursion, the answer
377      * in the history list is used.
378      *
379      * @param ces - the CEs
380      * @param offset - the offset of the first CE in the list to use.
381      * @param history - the history list. Must be at least as long as
382      *                 the number of cEs in the <code>CEList</code>
383      *
384      * @return the length of the shortest string.
385      *
386      * @internal ICU 4.0.1 technology preview
387      */
388    int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const;
389 
390    /**
391     * UObject glue...
392     * @internal ICU 4.0.1 technology preview
393     */
394     virtual UClassID getDynamicClassID() const;
395    /**
396     * UObject glue...
397     * @internal ICU 4.0.1 technology preview
398     */
399     static UClassID getStaticClassID();
400 
401     /**
402      * <code>CollData</code> objects are expensive to compute, and so
403      * may be cached. This routine will free the cached objects and delete
404      * the cache.
405      *
406      * WARNING: Don't call this until you are have called <code>close</code>
407      * for each <code>CollData</code> object that you have used. also,
408      * DO NOT call this if another thread may be calling <code>flushCollDataCache</code>
409      * at the same time.
410      *
411      * @internal 4.0.1 technology preview
412      */
413     static void freeCollDataCache();
414 
415     /**
416      * <code>CollData</code> objects are expensive to compute, and so
417      * may be cached. This routine will remove any unused <code>CollData</code>
418      * objects from the cache.
419      *
420      * @internal 4.0.1 technology preview
421      */
422     static void flushCollDataCache();
423 
424 private:
425     friend class CollDataCache;
426     friend class CollDataCacheEntry;
427 
428     CollData(UCollator *collator, char *cacheKey, int32_t cachekeyLength, UErrorCode &status);
429     ~CollData();
430 
431     CollData();
432 
433     static char *getCollatorKey(UCollator *collator, char *buffer, int32_t bufferLength);
434 
435     static CollDataCache *getCollDataCache();
436 
437     UCollator      *coll;
438     StringToCEsMap *charsToCEList;
439     CEToStringsMap *ceToCharsStartingWith;
440 
441     char keyBuffer[KEY_BUFFER_SIZE];
442     char *key;
443 
444     static CollDataCache *collDataCache;
445 
446     uint32_t minHan;
447     uint32_t maxHan;
448 
449     uint32_t jamoLimits[4];
450 };
451 #endif  /* U_HIDE_INTERNAL_API */
452 
453 U_NAMESPACE_END
454 
455 #endif // #if !UCONFIG_NO_COLLATION
456 #endif // #ifndef COLL_DATA_H
457