1 /* 2 ****************************************************************************** 3 * Copyright (C) 1996-2009, International Business Machines * 4 * Corporation and others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 8 /** 9 * \file 10 * \brief C++ API: Collation data used to compute minLengthInChars. 11 * \internal 12 */ 13 14 #ifndef COLL_DATA_H 15 #define COLL_DATA_H 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_COLLATION 20 21 #include "unicode/uobject.h" 22 #include "unicode/ucol.h" 23 24 U_NAMESPACE_BEGIN 25 26 /** 27 * The size of the internal buffer for the Collator's short description string. 28 */ 29 #define KEY_BUFFER_SIZE 64 30 31 /** 32 * The size of the internal CE buffer in a <code>CEList</code> object 33 */ 34 #define CELIST_BUFFER_SIZE 4 35 36 /** 37 * Define this to enable the <code>CEList</code> objects to collect 38 * statistics. 39 */ 40 //#define INSTRUMENT_CELIST 41 42 /** 43 * The size of the initial list in a <code>StringList</code> object. 44 */ 45 #define STRING_LIST_BUFFER_SIZE 16 46 47 /** 48 * Define this to enable the <code>StringList</code> objects to 49 * collect statistics. 50 */ 51 //#define INSTRUMENT_STRING_LIST 52 53 /** 54 * This object holds a list of CEs generated from a particular 55 * <code>UnicodeString</code> 56 * 57 * @internal ICU 4.0.1 technology preview 58 */ 59 class U_I18N_API CEList : public UObject 60 { 61 public: 62 /** 63 * Construct a <code>CEList</code> object. 64 * 65 * @param coll - the Collator used to collect the CEs. 66 * @param string - the string for which to collect the CEs. 67 * @param status - will be set if any errors occur. 68 * 69 * Note: if on return, status is set to an error code, 70 * the only safe thing to do with this object is to call 71 * the destructor. 72 * 73 * @internal ICU 4.0.1 technology preview 74 */ 75 CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status); 76 77 /** 78 * The destructor. 79 */ 80 ~CEList(); 81 82 /** 83 * Return the number of CEs in the list. 84 * 85 * @return the number of CEs in the list. 86 * 87 * @internal ICU 4.0.1 technology preview 88 */ 89 int32_t size() const; 90 91 /** 92 * Get a particular CE from the list. 93 * 94 * @param index - the index of the CE to return 95 * 96 * @return the CE, or <code>0</code> if <code>index</code> is out of range 97 * 98 * @internal ICU 4.0.1 technology preview 99 */ 100 uint32_t get(int32_t index) const; 101 102 /** 103 * Check if the CEs in another <code>CEList</code> match the 104 * suffix of this list starting at a give offset. 105 * 106 * @param offset - the offset of the suffix 107 * @param other - the other <code>CEList</code> 108 * 109 * @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise. 110 * 111 * @internal ICU 4.0.1 technology preview 112 */ 113 UBool matchesAt(int32_t offset, const CEList *other) const; 114 115 /** 116 * The index operator. 117 * 118 * @param index - the index 119 * 120 * @return a reference to the given CE in the list 121 * 122 * @internal ICU 4.0.1 technology preview 123 */ 124 uint32_t &operator[](int32_t index) const; 125 126 /** 127 * UObject glue... 128 */ 129 virtual UClassID getDynamicClassID() const; 130 /** 131 * UObject glue... 132 */ 133 static UClassID getStaticClassID(); 134 135 private: 136 void add(uint32_t ce, UErrorCode &status); 137 138 uint32_t ceBuffer[CELIST_BUFFER_SIZE]; 139 uint32_t *ces; 140 int32_t listMax; 141 int32_t listSize; 142 143 #ifdef INSTRUMENT_CELIST 144 static int32_t _active; 145 static int32_t _histogram[10]; 146 #endif 147 }; 148 149 /** 150 * StringList 151 * 152 * This object holds a list of <code>UnicodeString</code> objects. 153 * 154 * @internal ICU 4.0.1 technology preview 155 */ 156 class U_I18N_API StringList : public UObject 157 { 158 public: 159 /** 160 * Construct an empty <code>StringList</code> 161 * 162 * @param status - will be set if any errors occur. 163 * 164 * Note: if on return, status is set to an error code, 165 * the only safe thing to do with this object is to call 166 * the destructor. 167 * 168 * @internal ICU 4.0.1 technology preview 169 */ 170 StringList(UErrorCode &status); 171 172 /** 173 * The destructor. 174 * 175 * @internal ICU 4.0.1 technology preview 176 */ 177 ~StringList(); 178 179 /** 180 * Add a string to the list. 181 * 182 * @param string - the string to add 183 * @param status - will be set if any errors occur. 184 * 185 * @internal ICU 4.0.1 technology preview 186 */ 187 void add(const UnicodeString *string, UErrorCode &status); 188 189 /** 190 * Add an array of Unicode code points to the list. 191 * 192 * @param chars - the address of the array of code points 193 * @param count - the number of code points in the array 194 * @param status - will be set if any errors occur. 195 * 196 * @internal ICU 4.0.1 technology preview 197 */ 198 void add(const UChar *chars, int32_t count, UErrorCode &status); 199 200 /** 201 * Get a particular string from the list. 202 * 203 * @param index - the index of the string 204 * 205 * @return a pointer to the <code>UnicodeString</code> or <code>NULL</code> 206 * if <code>index</code> is out of bounds. 207 * 208 * @internal ICU 4.0.1 technology preview 209 */ 210 const UnicodeString *get(int32_t index) const; 211 212 /** 213 * Get the number of stings in the list. 214 * 215 * @return the number of strings in the list. 216 * 217 * @internal ICU 4.0.1 technology preview 218 */ 219 int32_t size() const; 220 221 /** 222 * the UObject glue... 223 */ 224 virtual UClassID getDynamicClassID() const; 225 /** 226 * the UObject glue... 227 */ 228 static UClassID getStaticClassID(); 229 230 private: 231 UnicodeString *strings; 232 int32_t listMax; 233 int32_t listSize; 234 235 #ifdef INSTRUMENT_STRING_LIST 236 static int32_t _lists; 237 static int32_t _strings; 238 static int32_t _histogram[101]; 239 #endif 240 }; 241 242 /* 243 * Forward references to internal classes. 244 */ 245 class StringToCEsMap; 246 class CEToStringsMap; 247 class CollDataCache; 248 249 /** 250 * CollData 251 * 252 * This class holds the Collator-specific data needed to 253 * compute the length of the shortest string that can 254 * generate a partcular list of CEs. 255 * 256 * <code>CollData</code> objects are quite expensive to compute. Because 257 * of this, they are cached. When you call <code>CollData::open</code> it 258 * returns a reference counted cached object. When you call <code>CollData::close</code> 259 * the reference count on the object is decremented but the object is not deleted. 260 * 261 * If you do not need to reuse any unreferenced objects in the cache, you can call 262 * <code>CollData::flushCollDataCache</code>. If you no longer need any <code>CollData</code> 263 * objects, you can call <code>CollData::freeCollDataCache</code> 264 * 265 * @internal ICU 4.0.1 technology preview 266 */ 267 class U_I18N_API CollData : public UObject 268 { 269 public: 270 /** 271 * Construct a <code>CollData</code> object. 272 * 273 * @param collator - the collator 274 * @param status - will be set if any errors occur. 275 * 276 * @return the <code>CollData</code> object. You must call 277 * <code>close</code> when you are done using the object. 278 * 279 * Note: if on return, status is set to an error code, 280 * the only safe thing to do with this object is to call 281 * <code>CollData::close</code>. 282 * 283 * @internal ICU 4.0.1 technology preview 284 */ 285 static CollData *open(UCollator *collator, UErrorCode &status); 286 287 /** 288 * Release a <code>CollData</code> object. 289 * 290 * @param collData - the object 291 * 292 * @internal ICU 4.0.1 technology preview 293 */ 294 static void close(CollData *collData); 295 296 /** 297 * Get the <code>UCollator</code> object used to create this object. 298 * The object returned may not be the exact object that was used to 299 * create this object, but it will have the same behavior. 300 */ 301 UCollator *getCollator() const; 302 303 /** 304 * Get a list of all the strings which generate a list 305 * of CEs starting with a given CE. 306 * 307 * @param ce - the CE 308 * 309 * return a <code>StringList</code> object containing all 310 * the stirngs, or <code>NULL</code> if there are 311 * no such strings. 312 * 313 * @internal ICU 4.0.1 technology preview. 314 */ 315 const StringList *getStringList(int32_t ce) const; 316 317 /** 318 * Get a list of the CEs generated by a partcular stirng. 319 * 320 * @param string - the string 321 * 322 * @return a <code>CEList</code> object containt the CEs. You 323 * must call <code>freeCEList</code> when you are finished 324 * using the <code>CEList</code>/ 325 * 326 * @internal ICU 4.0.1 technology preview. 327 */ 328 const CEList *getCEList(const UnicodeString *string) const; 329 330 /** 331 * Release a <code>CEList</code> returned by <code>getCEList</code>. 332 * 333 * @param list - the <code>CEList</code> to free. 334 * 335 * @internal ICU 4.0.1 technology preview 336 */ 337 void freeCEList(const CEList *list); 338 339 /** 340 * Return the length of the shortest string that will generate 341 * the given list of CEs. 342 * 343 * @param ces - the CEs 344 * @param offset - the offset of the first CE in the list to use. 345 * 346 * @return the length of the shortest string. 347 * 348 * @internal ICU 4.0.1 technology preview 349 */ 350 int32_t minLengthInChars(const CEList *ces, int32_t offset) const; 351 352 353 /** 354 * Return the length of the shortest string that will generate 355 * the given list of CEs. 356 * 357 * Note: the algorithm used to do this computation is recursive. To 358 * limit the amount of recursion, a "history" list is used to record 359 * the best answer starting at a particular offset in the list of CEs. 360 * If the same offset is visited again during the recursion, the answer 361 * in the history list is used. 362 * 363 * @param ces - the CEs 364 * @param offset - the offset of the first CE in the list to use. 365 * @param history - the history list. Must be at least as long as 366 * the number of cEs in the <code>CEList</code> 367 * 368 * @return the length of the shortest string. 369 * 370 * @internal ICU 4.0.1 technology preview 371 */ 372 int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const; 373 374 /** 375 * UObject glue... 376 */ 377 virtual UClassID getDynamicClassID() const; 378 /** 379 * UObject glue... 380 */ 381 static UClassID getStaticClassID(); 382 383 /** 384 * <code>CollData</code> objects are expensive to compute, and so 385 * may be cached. This routine will free the cached objects and delete 386 * the cache. 387 * 388 * WARNING: Don't call this until you are have called <code>close</code> 389 * for each <code>CollData</code> object that you have used. also, 390 * DO NOT call this if another thread may be calling <code>flushCollDataCache</code> 391 * at the same time. 392 * 393 * @internal 4.0.1 technology preview 394 */ 395 static void freeCollDataCache(); 396 397 /** 398 * <code>CollData</code> objects are expensive to compute, and so 399 * may be cached. This routine will remove any unused <code>CollData</code> 400 * objects from the cache. 401 * 402 * @internal 4.0.1 technology preview 403 */ 404 static void flushCollDataCache(); 405 406 private: 407 friend class CollDataCache; 408 friend class CollDataCacheEntry; 409 410 CollData(UCollator *collator, char *cacheKey, int32_t cachekeyLength, UErrorCode &status); 411 ~CollData(); 412 413 CollData(); 414 415 static char *getCollatorKey(UCollator *collator, char *buffer, int32_t bufferLength); 416 417 static CollDataCache *getCollDataCache(); 418 419 UCollator *coll; 420 StringToCEsMap *charsToCEList; 421 CEToStringsMap *ceToCharsStartingWith; 422 423 char keyBuffer[KEY_BUFFER_SIZE]; 424 char *key; 425 426 static CollDataCache *collDataCache; 427 428 uint32_t minHan; 429 uint32_t maxHan; 430 431 uint32_t jamoLimits[4]; 432 }; 433 434 U_NAMESPACE_END 435 436 #endif // #if !UCONFIG_NO_COLLATION 437 #endif // #ifndef COLL_DATA_H 438