1 /* 2 ****************************************************************************** 3 * Copyright (C) 1996-2012, International Business Machines * 4 * Corporation and others. All Rights Reserved. * 5 ****************************************************************************** 6 */ 7 8 /** 9 * \file 10 * \brief C++ API: Collation data used to compute minLengthInChars. 11 * \internal 12 */ 13 14 #ifndef COLL_DATA_H 15 #define COLL_DATA_H 16 17 #include "unicode/utypes.h" 18 19 #if !UCONFIG_NO_COLLATION 20 21 #include "unicode/uobject.h" 22 #include "unicode/ucol.h" 23 24 U_NAMESPACE_BEGIN 25 26 #ifndef U_HIDE_INTERNAL_API 27 /** 28 * The size of the internal buffer for the Collator's short description string. 29 * @internal ICU 4.0.1 technology preview 30 */ 31 #define KEY_BUFFER_SIZE 64 32 33 /** 34 * The size of the internal CE buffer in a <code>CEList</code> object 35 * @internal ICU 4.0.1 technology preview 36 */ 37 #define CELIST_BUFFER_SIZE 4 38 39 /** 40 * \def INSTRUMENT_CELIST 41 * Define this to enable the <code>CEList</code> objects to collect 42 * statistics. 43 * @internal ICU 4.0.1 technology preview 44 */ 45 //#define INSTRUMENT_CELIST 46 47 /** 48 * The size of the initial list in a <code>StringList</code> object. 49 * @internal ICU 4.0.1 technology preview 50 */ 51 #define STRING_LIST_BUFFER_SIZE 16 52 53 /** 54 * \def INSTRUMENT_STRING_LIST 55 * Define this to enable the <code>StringList</code> objects to 56 * collect statistics. 57 * @internal ICU 4.0.1 technology preview 58 */ 59 //#define INSTRUMENT_STRING_LIST 60 61 /** 62 * This object holds a list of CEs generated from a particular 63 * <code>UnicodeString</code> 64 * 65 * @internal ICU 4.0.1 technology preview 66 */ 67 class U_I18N_API CEList : public UObject 68 { 69 public: 70 /** 71 * Construct a <code>CEList</code> object. 72 * 73 * @param coll - the Collator used to collect the CEs. 74 * @param string - the string for which to collect the CEs. 75 * @param status - will be set if any errors occur. 76 * 77 * Note: if on return, status is set to an error code, 78 * the only safe thing to do with this object is to call 79 * the destructor. 80 * 81 * @internal ICU 4.0.1 technology preview 82 */ 83 CEList(UCollator *coll, const UnicodeString &string, UErrorCode &status); 84 85 /** 86 * The destructor. 87 * @internal ICU 4.0.1 technology preview 88 */ 89 ~CEList(); 90 91 /** 92 * Return the number of CEs in the list. 93 * 94 * @return the number of CEs in the list. 95 * 96 * @internal ICU 4.0.1 technology preview 97 */ 98 int32_t size() const; 99 100 /** 101 * Get a particular CE from the list. 102 * 103 * @param index - the index of the CE to return 104 * 105 * @return the CE, or <code>0</code> if <code>index</code> is out of range 106 * 107 * @internal ICU 4.0.1 technology preview 108 */ 109 uint32_t get(int32_t index) const; 110 111 /** 112 * Check if the CEs in another <code>CEList</code> match the 113 * suffix of this list starting at a give offset. 114 * 115 * @param offset - the offset of the suffix 116 * @param other - the other <code>CEList</code> 117 * 118 * @return <code>TRUE</code> if the CEs match, <code>FALSE</code> otherwise. 119 * 120 * @internal ICU 4.0.1 technology preview 121 */ 122 UBool matchesAt(int32_t offset, const CEList *other) const; 123 124 /** 125 * The index operator. 126 * 127 * @param index - the index 128 * 129 * @return a reference to the given CE in the list 130 * 131 * @internal ICU 4.0.1 technology preview 132 */ 133 uint32_t &operator[](int32_t index) const; 134 135 /** 136 * UObject glue... 137 * @internal ICU 4.0.1 technology preview 138 */ 139 virtual UClassID getDynamicClassID() const; 140 /** 141 * UObject glue... 142 * @internal ICU 4.0.1 technology preview 143 */ 144 static UClassID getStaticClassID(); 145 146 private: 147 void add(uint32_t ce, UErrorCode &status); 148 149 uint32_t ceBuffer[CELIST_BUFFER_SIZE]; 150 uint32_t *ces; 151 int32_t listMax; 152 int32_t listSize; 153 154 #ifdef INSTRUMENT_CELIST 155 static int32_t _active; 156 static int32_t _histogram[10]; 157 #endif 158 }; 159 160 /** 161 * StringList 162 * 163 * This object holds a list of <code>UnicodeString</code> objects. 164 * 165 * @internal ICU 4.0.1 technology preview 166 */ 167 class U_I18N_API StringList : public UObject 168 { 169 public: 170 /** 171 * Construct an empty <code>StringList</code> 172 * 173 * @param status - will be set if any errors occur. 174 * 175 * Note: if on return, status is set to an error code, 176 * the only safe thing to do with this object is to call 177 * the destructor. 178 * 179 * @internal ICU 4.0.1 technology preview 180 */ 181 StringList(UErrorCode &status); 182 183 /** 184 * The destructor. 185 * 186 * @internal ICU 4.0.1 technology preview 187 */ 188 ~StringList(); 189 190 /** 191 * Add a string to the list. 192 * 193 * @param string - the string to add 194 * @param status - will be set if any errors occur. 195 * 196 * @internal ICU 4.0.1 technology preview 197 */ 198 void add(const UnicodeString *string, UErrorCode &status); 199 200 /** 201 * Add an array of Unicode code points to the list. 202 * 203 * @param chars - the address of the array of code points 204 * @param count - the number of code points in the array 205 * @param status - will be set if any errors occur. 206 * 207 * @internal ICU 4.0.1 technology preview 208 */ 209 void add(const UChar *chars, int32_t count, UErrorCode &status); 210 211 /** 212 * Get a particular string from the list. 213 * 214 * @param index - the index of the string 215 * 216 * @return a pointer to the <code>UnicodeString</code> or <code>NULL</code> 217 * if <code>index</code> is out of bounds. 218 * 219 * @internal ICU 4.0.1 technology preview 220 */ 221 const UnicodeString *get(int32_t index) const; 222 223 /** 224 * Get the number of stings in the list. 225 * 226 * @return the number of strings in the list. 227 * 228 * @internal ICU 4.0.1 technology preview 229 */ 230 int32_t size() const; 231 232 /** 233 * the UObject glue... 234 * @internal ICU 4.0.1 technology preview 235 */ 236 virtual UClassID getDynamicClassID() const; 237 /** 238 * the UObject glue... 239 * @internal ICU 4.0.1 technology preview 240 */ 241 static UClassID getStaticClassID(); 242 243 private: 244 UnicodeString *strings; 245 int32_t listMax; 246 int32_t listSize; 247 248 #ifdef INSTRUMENT_STRING_LIST 249 static int32_t _lists; 250 static int32_t _strings; 251 static int32_t _histogram[101]; 252 #endif 253 }; 254 #endif /* U_HIDE_INTERNAL_API */ 255 256 /* 257 * Forward references to internal classes. 258 */ 259 class StringToCEsMap; 260 class CEToStringsMap; 261 class CollDataCache; 262 263 #ifndef U_HIDE_INTERNAL_API 264 /** 265 * CollData 266 * 267 * This class holds the Collator-specific data needed to 268 * compute the length of the shortest string that can 269 * generate a partcular list of CEs. 270 * 271 * <code>CollData</code> objects are quite expensive to compute. Because 272 * of this, they are cached. When you call <code>CollData::open</code> it 273 * returns a reference counted cached object. When you call <code>CollData::close</code> 274 * the reference count on the object is decremented but the object is not deleted. 275 * 276 * If you do not need to reuse any unreferenced objects in the cache, you can call 277 * <code>CollData::flushCollDataCache</code>. If you no longer need any <code>CollData</code> 278 * objects, you can call <code>CollData::freeCollDataCache</code> 279 * 280 * @internal ICU 4.0.1 technology preview 281 */ 282 class U_I18N_API CollData : public UObject 283 { 284 public: 285 /** 286 * Construct a <code>CollData</code> object. 287 * 288 * @param collator - the collator 289 * @param status - will be set if any errors occur. 290 * 291 * @return the <code>CollData</code> object. You must call 292 * <code>close</code> when you are done using the object. 293 * 294 * Note: if on return, status is set to an error code, 295 * the only safe thing to do with this object is to call 296 * <code>CollData::close</code>. 297 * 298 * @internal ICU 4.0.1 technology preview 299 */ 300 static CollData *open(UCollator *collator, UErrorCode &status); 301 302 /** 303 * Release a <code>CollData</code> object. 304 * 305 * @param collData - the object 306 * 307 * @internal ICU 4.0.1 technology preview 308 */ 309 static void close(CollData *collData); 310 311 /** 312 * Get the <code>UCollator</code> object used to create this object. 313 * The object returned may not be the exact object that was used to 314 * create this object, but it will have the same behavior. 315 * @internal ICU 4.0.1 technology preview 316 */ 317 UCollator *getCollator() const; 318 319 /** 320 * Get a list of all the strings which generate a list 321 * of CEs starting with a given CE. 322 * 323 * @param ce - the CE 324 * 325 * return a <code>StringList</code> object containing all 326 * the stirngs, or <code>NULL</code> if there are 327 * no such strings. 328 * 329 * @internal ICU 4.0.1 technology preview. 330 */ 331 const StringList *getStringList(int32_t ce) const; 332 333 /** 334 * Get a list of the CEs generated by a partcular stirng. 335 * 336 * @param string - the string 337 * 338 * @return a <code>CEList</code> object containt the CEs. You 339 * must call <code>freeCEList</code> when you are finished 340 * using the <code>CEList</code>/ 341 * 342 * @internal ICU 4.0.1 technology preview. 343 */ 344 const CEList *getCEList(const UnicodeString *string) const; 345 346 /** 347 * Release a <code>CEList</code> returned by <code>getCEList</code>. 348 * 349 * @param list - the <code>CEList</code> to free. 350 * 351 * @internal ICU 4.0.1 technology preview 352 */ 353 void freeCEList(const CEList *list); 354 355 /** 356 * Return the length of the shortest string that will generate 357 * the given list of CEs. 358 * 359 * @param ces - the CEs 360 * @param offset - the offset of the first CE in the list to use. 361 * 362 * @return the length of the shortest string. 363 * 364 * @internal ICU 4.0.1 technology preview 365 */ 366 int32_t minLengthInChars(const CEList *ces, int32_t offset) const; 367 368 369 /** 370 * Return the length of the shortest string that will generate 371 * the given list of CEs. 372 * 373 * Note: the algorithm used to do this computation is recursive. To 374 * limit the amount of recursion, a "history" list is used to record 375 * the best answer starting at a particular offset in the list of CEs. 376 * If the same offset is visited again during the recursion, the answer 377 * in the history list is used. 378 * 379 * @param ces - the CEs 380 * @param offset - the offset of the first CE in the list to use. 381 * @param history - the history list. Must be at least as long as 382 * the number of cEs in the <code>CEList</code> 383 * 384 * @return the length of the shortest string. 385 * 386 * @internal ICU 4.0.1 technology preview 387 */ 388 int32_t minLengthInChars(const CEList *ces, int32_t offset, int32_t *history) const; 389 390 /** 391 * UObject glue... 392 * @internal ICU 4.0.1 technology preview 393 */ 394 virtual UClassID getDynamicClassID() const; 395 /** 396 * UObject glue... 397 * @internal ICU 4.0.1 technology preview 398 */ 399 static UClassID getStaticClassID(); 400 401 /** 402 * <code>CollData</code> objects are expensive to compute, and so 403 * may be cached. This routine will free the cached objects and delete 404 * the cache. 405 * 406 * WARNING: Don't call this until you are have called <code>close</code> 407 * for each <code>CollData</code> object that you have used. also, 408 * DO NOT call this if another thread may be calling <code>flushCollDataCache</code> 409 * at the same time. 410 * 411 * @internal 4.0.1 technology preview 412 */ 413 static void freeCollDataCache(); 414 415 /** 416 * <code>CollData</code> objects are expensive to compute, and so 417 * may be cached. This routine will remove any unused <code>CollData</code> 418 * objects from the cache. 419 * 420 * @internal 4.0.1 technology preview 421 */ 422 static void flushCollDataCache(); 423 424 private: 425 friend class CollDataCache; 426 friend class CollDataCacheEntry; 427 428 CollData(UCollator *collator, char *cacheKey, int32_t cachekeyLength, UErrorCode &status); 429 ~CollData(); 430 431 CollData(); 432 433 static char *getCollatorKey(UCollator *collator, char *buffer, int32_t bufferLength); 434 435 static CollDataCache *getCollDataCache(); 436 437 UCollator *coll; 438 StringToCEsMap *charsToCEList; 439 CEToStringsMap *ceToCharsStartingWith; 440 441 char keyBuffer[KEY_BUFFER_SIZE]; 442 char *key; 443 444 static CollDataCache *collDataCache; 445 446 uint32_t minHan; 447 uint32_t maxHan; 448 449 uint32_t jamoLimits[4]; 450 }; 451 #endif /* U_HIDE_INTERNAL_API */ 452 453 U_NAMESPACE_END 454 455 #endif // #if !UCONFIG_NO_COLLATION 456 #endif // #ifndef COLL_DATA_H 457