1 /* 2 ******************************************************************************* 3 * Copyright (c) 1996-2011, International Business Machines Corporation and others. 4 * All Rights Reserved. 5 ******************************************************************************* 6 */ 7 8 #ifndef UCOL_H 9 #define UCOL_H 10 11 #include "unicode/utypes.h" 12 13 #if !UCONFIG_NO_COLLATION 14 15 #include "unicode/unorm.h" 16 #include "unicode/localpointer.h" 17 #include "unicode/parseerr.h" 18 #include "unicode/uloc.h" 19 #include "unicode/uset.h" 20 #include "unicode/uscript.h" 21 22 /** 23 * \file 24 * \brief C API: Collator 25 * 26 * <h2> Collator C API </h2> 27 * 28 * The C API for Collator performs locale-sensitive 29 * string comparison. You use this service to build 30 * searching and sorting routines for natural language text. 31 * <em>Important: </em>The ICU collation service has been reimplemented 32 * in order to achieve better performance and UCA compliance. 33 * For details, see the 34 * <a href="http://source.icu-project.org/repos/icu/icuhtml/trunk/design/collation/ICU_collation_design.htm"> 35 * collation design document</a>. 36 * <p> 37 * For more information about the collation service see 38 * <a href="http://icu-project.org/userguide/Collate_Intro.html">the users guide</a>. 39 * <p> 40 * Collation service provides correct sorting orders for most locales supported in ICU. 41 * If specific data for a locale is not available, the orders eventually falls back 42 * to the <a href="http://www.unicode.org/unicode/reports/tr10/">UCA sort order</a>. 43 * <p> 44 * Sort ordering may be customized by providing your own set of rules. For more on 45 * this subject see the 46 * <a href="http://icu-project.org/userguide/Collate_Customization.html"> 47 * Collation customization</a> section of the users guide. 48 * <p> 49 * @see UCollationResult 50 * @see UNormalizationMode 51 * @see UCollationStrength 52 * @see UCollationElements 53 */ 54 55 /** A collator. 56 * For usage in C programs. 57 */ 58 struct UCollator; 59 /** structure representing a collator object instance 60 * @stable ICU 2.0 61 */ 62 typedef struct UCollator UCollator; 63 64 65 /** 66 * UCOL_LESS is returned if source string is compared to be less than target 67 * string in the u_strcoll() method. 68 * UCOL_EQUAL is returned if source string is compared to be equal to target 69 * string in the u_strcoll() method. 70 * UCOL_GREATER is returned if source string is compared to be greater than 71 * target string in the u_strcoll() method. 72 * @see u_strcoll() 73 * <p> 74 * Possible values for a comparison result 75 * @stable ICU 2.0 76 */ 77 typedef enum { 78 /** string a == string b */ 79 UCOL_EQUAL = 0, 80 /** string a > string b */ 81 UCOL_GREATER = 1, 82 /** string a < string b */ 83 UCOL_LESS = -1 84 } UCollationResult ; 85 86 87 /** Enum containing attribute values for controling collation behavior. 88 * Here are all the allowable values. Not every attribute can take every value. The only 89 * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined 90 * value for that locale 91 * @stable ICU 2.0 92 */ 93 typedef enum { 94 /** accepted by most attributes */ 95 UCOL_DEFAULT = -1, 96 97 /** Primary collation strength */ 98 UCOL_PRIMARY = 0, 99 /** Secondary collation strength */ 100 UCOL_SECONDARY = 1, 101 /** Tertiary collation strength */ 102 UCOL_TERTIARY = 2, 103 /** Default collation strength */ 104 UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY, 105 UCOL_CE_STRENGTH_LIMIT, 106 /** Quaternary collation strength */ 107 UCOL_QUATERNARY=3, 108 /** Identical collation strength */ 109 UCOL_IDENTICAL=15, 110 UCOL_STRENGTH_LIMIT, 111 112 /** Turn the feature off - works for UCOL_FRENCH_COLLATION, 113 UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE 114 & UCOL_DECOMPOSITION_MODE*/ 115 UCOL_OFF = 16, 116 /** Turn the feature on - works for UCOL_FRENCH_COLLATION, 117 UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE 118 & UCOL_DECOMPOSITION_MODE*/ 119 UCOL_ON = 17, 120 121 /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */ 122 UCOL_SHIFTED = 20, 123 /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */ 124 UCOL_NON_IGNORABLE = 21, 125 126 /** Valid for UCOL_CASE_FIRST - 127 lower case sorts before upper case */ 128 UCOL_LOWER_FIRST = 24, 129 /** upper case sorts before lower case */ 130 UCOL_UPPER_FIRST = 25, 131 132 UCOL_ATTRIBUTE_VALUE_COUNT 133 134 } UColAttributeValue; 135 136 /** 137 * Enum containing the codes for reordering segments of the collation table that are not script 138 * codes. These reordering codes are to be used in conjunction with the script codes. 139 * @see ucol_getReorderCodes 140 * @see ucol_setReorderCodes 141 * @see ucol_getEquivalentReorderCodes 142 * @draft ICU 4.8 143 */ 144 typedef enum { 145 /** 146 * A special reordering code that is used to specify the default 147 * reordering codes for a locale. 148 * @draft ICU 4.8 149 */ 150 UCOL_REORDER_CODE_DEFAULT = -1, 151 /** 152 * A special reordering code that is used to specify no reordering codes. 153 * @draft ICU 4.8 154 */ 155 UCOL_REORDER_CODE_NONE = USCRIPT_UNKNOWN, 156 /** 157 * A special reordering code that is used to specify all other codes used for 158 * reordering except for the codes lised as UColReorderCode values and those 159 * listed explicitly in a reordering. 160 * @draft ICU 4.8 161 */ 162 UCOL_REORDER_CODE_OTHERS = USCRIPT_UNKNOWN, 163 /** 164 * Characters with the space property. 165 * @draft ICU 4.8 166 */ 167 UCOL_REORDER_CODE_SPACE = 0x1000, 168 /** 169 * The first entry in the enumeration of reordering groups. 170 * @draft ICU 4.8 171 */ 172 UCOL_REORDER_CODE_FIRST = UCOL_REORDER_CODE_SPACE, 173 /** 174 * Characters with the punctuation property. 175 * @draft ICU 4.8 176 */ 177 UCOL_REORDER_CODE_PUNCTUATION = 0x1001, 178 /** 179 * Characters with the symbol property. 180 * @draft ICU 4.8 181 */ 182 UCOL_REORDER_CODE_SYMBOL = 0x1002, 183 /** 184 * Characters with the currency property. 185 * @draft ICU 4.8 186 */ 187 UCOL_REORDER_CODE_CURRENCY = 0x1003, 188 /** 189 * Characters with the digit property. 190 * @draft ICU 4.8 191 */ 192 UCOL_REORDER_CODE_DIGIT = 0x1004, 193 /** 194 * The limit of the reorder codes. 195 * @draft ICU 4.8 196 */ 197 UCOL_REORDER_CODE_LIMIT = 0x1005 198 } UColReorderCode; 199 200 /** 201 * Base letter represents a primary difference. Set comparison 202 * level to UCOL_PRIMARY to ignore secondary and tertiary differences. 203 * Use this to set the strength of a Collator object. 204 * Example of primary difference, "abc" < "abd" 205 * 206 * Diacritical differences on the same base letter represent a secondary 207 * difference. Set comparison level to UCOL_SECONDARY to ignore tertiary 208 * differences. Use this to set the strength of a Collator object. 209 * Example of secondary difference, "ä" >> "a". 210 * 211 * Uppercase and lowercase versions of the same character represents a 212 * tertiary difference. Set comparison level to UCOL_TERTIARY to include 213 * all comparison differences. Use this to set the strength of a Collator 214 * object. 215 * Example of tertiary difference, "abc" <<< "ABC". 216 * 217 * Two characters are considered "identical" when they have the same 218 * unicode spellings. UCOL_IDENTICAL. 219 * For example, "ä" == "ä". 220 * 221 * UCollationStrength is also used to determine the strength of sort keys 222 * generated from UCollator objects 223 * These values can be now found in the UColAttributeValue enum. 224 * @stable ICU 2.0 225 **/ 226 typedef UColAttributeValue UCollationStrength; 227 228 /** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT 229 * value, as well as the values specific to each one. 230 * @stable ICU 2.0 231 */ 232 typedef enum { 233 /** Attribute for direction of secondary weights - used in French. 234 * Acceptable values are UCOL_ON, which results in secondary weights 235 * being considered backwards and UCOL_OFF which treats secondary 236 * weights in the order they appear.*/ 237 UCOL_FRENCH_COLLATION, 238 /** Attribute for handling variable elements. 239 * Acceptable values are UCOL_NON_IGNORABLE (default) 240 * which treats all the codepoints with non-ignorable 241 * primary weights in the same way, 242 * and UCOL_SHIFTED which causes codepoints with primary 243 * weights that are equal or below the variable top value 244 * to be ignored on primary level and moved to the quaternary 245 * level.*/ 246 UCOL_ALTERNATE_HANDLING, 247 /** Controls the ordering of upper and lower case letters. 248 * Acceptable values are UCOL_OFF (default), which orders 249 * upper and lower case letters in accordance to their tertiary 250 * weights, UCOL_UPPER_FIRST which forces upper case letters to 251 * sort before lower case letters, and UCOL_LOWER_FIRST which does 252 * the opposite. */ 253 UCOL_CASE_FIRST, 254 /** Controls whether an extra case level (positioned before the third 255 * level) is generated or not. Acceptable values are UCOL_OFF (default), 256 * when case level is not generated, and UCOL_ON which causes the case 257 * level to be generated. Contents of the case level are affected by 258 * the value of UCOL_CASE_FIRST attribute. A simple way to ignore 259 * accent differences in a string is to set the strength to UCOL_PRIMARY 260 * and enable case level. */ 261 UCOL_CASE_LEVEL, 262 /** Controls whether the normalization check and necessary normalizations 263 * are performed. When set to UCOL_OFF (default) no normalization check 264 * is performed. The correctness of the result is guaranteed only if the 265 * input data is in so-called FCD form (see users manual for more info). 266 * When set to UCOL_ON, an incremental check is performed to see whether 267 * the input data is in the FCD form. If the data is not in the FCD form, 268 * incremental NFD normalization is performed. */ 269 UCOL_NORMALIZATION_MODE, 270 /** An alias for UCOL_NORMALIZATION_MODE attribute */ 271 UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE, 272 /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY, 273 * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength 274 * for most locales (except Japanese) is tertiary. Quaternary strength 275 * is useful when combined with shifted setting for alternate handling 276 * attribute and for JIS x 4061 collation, when it is used to distinguish 277 * between Katakana and Hiragana (this is achieved by setting the 278 * UCOL_HIRAGANA_QUATERNARY mode to on. Otherwise, quaternary level 279 * is affected only by the number of non ignorable code points in 280 * the string. Identical strength is rarely useful, as it amounts 281 * to codepoints of the NFD form of the string. */ 282 UCOL_STRENGTH, 283 /** When turned on, this attribute positions Hiragana before all 284 * non-ignorables on quaternary level This is a sneaky way to produce JIS 285 * sort order */ 286 UCOL_HIRAGANA_QUATERNARY_MODE, 287 /** When turned on, this attribute generates a collation key 288 * for the numeric value of substrings of digits. 289 * This is a way to get '100' to sort AFTER '2'. Note that the longest 290 * digit substring that can be treated as a single collation element is 291 * 254 digits (not counting leading zeros). If a digit substring is 292 * longer than that, the digits beyond the limit will be treated as a 293 * separate digit substring associated with a separate collation element. */ 294 UCOL_NUMERIC_COLLATION, 295 UCOL_ATTRIBUTE_COUNT 296 } UColAttribute; 297 298 /** Options for retrieving the rule string 299 * @stable ICU 2.0 300 */ 301 typedef enum { 302 /** Retrieve tailoring only */ 303 UCOL_TAILORING_ONLY, 304 /** Retrieve UCA rules and tailoring */ 305 UCOL_FULL_RULES 306 } UColRuleOption ; 307 308 /** 309 * Open a UCollator for comparing strings. 310 * The UCollator pointer is used in all the calls to the Collation 311 * service. After finished, collator must be disposed of by calling 312 * {@link #ucol_close }. 313 * @param loc The locale containing the required collation rules. 314 * Special values for locales can be passed in - 315 * if NULL is passed for the locale, the default locale 316 * collation rules will be used. If empty string ("") or 317 * "root" are passed, UCA rules will be used. 318 * @param status A pointer to an UErrorCode to receive any errors 319 * @return A pointer to a UCollator, or 0 if an error occurred. 320 * @see ucol_openRules 321 * @see ucol_safeClone 322 * @see ucol_close 323 * @stable ICU 2.0 324 */ 325 U_STABLE UCollator* U_EXPORT2 326 ucol_open(const char *loc, UErrorCode *status); 327 328 /** 329 * Produce an UCollator instance according to the rules supplied. 330 * The rules are used to change the default ordering, defined in the 331 * UCA in a process called tailoring. The resulting UCollator pointer 332 * can be used in the same way as the one obtained by {@link #ucol_strcoll }. 333 * @param rules A string describing the collation rules. For the syntax 334 * of the rules please see users guide. 335 * @param rulesLength The length of rules, or -1 if null-terminated. 336 * @param normalizationMode The normalization mode: One of 337 * UCOL_OFF (expect the text to not need normalization), 338 * UCOL_ON (normalize), or 339 * UCOL_DEFAULT (set the mode according to the rules) 340 * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, 341 * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules. 342 * @param parseError A pointer to UParseError to recieve information about errors 343 * occurred during parsing. This argument can currently be set 344 * to NULL, but at users own risk. Please provide a real structure. 345 * @param status A pointer to an UErrorCode to receive any errors 346 * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case 347 * of error - please use status argument to check for errors. 348 * @see ucol_open 349 * @see ucol_safeClone 350 * @see ucol_close 351 * @stable ICU 2.0 352 */ 353 U_STABLE UCollator* U_EXPORT2 354 ucol_openRules( const UChar *rules, 355 int32_t rulesLength, 356 UColAttributeValue normalizationMode, 357 UCollationStrength strength, 358 UParseError *parseError, 359 UErrorCode *status); 360 361 /** 362 * Open a collator defined by a short form string. 363 * The structure and the syntax of the string is defined in the "Naming collators" 364 * section of the users guide: 365 * http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators 366 * Attributes are overriden by the subsequent attributes. So, for "S2_S3", final 367 * strength will be 3. 3066bis locale overrides individual locale parts. 368 * The call to this function is equivalent to a call to ucol_open, followed by a 369 * series of calls to ucol_setAttribute and ucol_setVariableTop. 370 * @param definition A short string containing a locale and a set of attributes. 371 * Attributes not explicitly mentioned are left at the default 372 * state for a locale. 373 * @param parseError if not NULL, structure that will get filled with error's pre 374 * and post context in case of error. 375 * @param forceDefaults if FALSE, the settings that are the same as the collator 376 * default settings will not be applied (for example, setting 377 * French secondary on a French collator would not be executed). 378 * If TRUE, all the settings will be applied regardless of the 379 * collator default value. If the definition 380 * strings are to be cached, should be set to FALSE. 381 * @param status Error code. Apart from regular error conditions connected to 382 * instantiating collators (like out of memory or similar), this 383 * API will return an error if an invalid attribute or attribute/value 384 * combination is specified. 385 * @return A pointer to a UCollator or 0 if an error occured (including an 386 * invalid attribute). 387 * @see ucol_open 388 * @see ucol_setAttribute 389 * @see ucol_setVariableTop 390 * @see ucol_getShortDefinitionString 391 * @see ucol_normalizeShortDefinitionString 392 * @stable ICU 3.0 393 * 394 */ 395 U_STABLE UCollator* U_EXPORT2 396 ucol_openFromShortString( const char *definition, 397 UBool forceDefaults, 398 UParseError *parseError, 399 UErrorCode *status); 400 401 /** 402 * Get a set containing the contractions defined by the collator. The set includes 403 * both the UCA contractions and the contractions defined by the collator. This set 404 * will contain only strings. If a tailoring explicitly suppresses contractions from 405 * the UCA (like Russian), removed contractions will not be in the resulting set. 406 * @param coll collator 407 * @param conts the set to hold the result. It gets emptied before 408 * contractions are added. 409 * @param status to hold the error code 410 * @return the size of the contraction set 411 * 412 * @deprecated ICU 3.4, use ucol_getContractionsAndExpansions instead 413 */ 414 U_DEPRECATED int32_t U_EXPORT2 415 ucol_getContractions( const UCollator *coll, 416 USet *conts, 417 UErrorCode *status); 418 419 /** 420 * Get a set containing the expansions defined by the collator. The set includes 421 * both the UCA expansions and the expansions defined by the tailoring 422 * @param coll collator 423 * @param contractions if not NULL, the set to hold the contractions 424 * @param expansions if not NULL, the set to hold the expansions 425 * @param addPrefixes add the prefix contextual elements to contractions 426 * @param status to hold the error code 427 * 428 * @stable ICU 3.4 429 */ 430 U_STABLE void U_EXPORT2 431 ucol_getContractionsAndExpansions( const UCollator *coll, 432 USet *contractions, USet *expansions, 433 UBool addPrefixes, UErrorCode *status); 434 435 /** 436 * Close a UCollator. 437 * Once closed, a UCollator should not be used. Every open collator should 438 * be closed. Otherwise, a memory leak will result. 439 * @param coll The UCollator to close. 440 * @see ucol_open 441 * @see ucol_openRules 442 * @see ucol_safeClone 443 * @stable ICU 2.0 444 */ 445 U_STABLE void U_EXPORT2 446 ucol_close(UCollator *coll); 447 448 #if U_SHOW_CPLUSPLUS_API 449 450 U_NAMESPACE_BEGIN 451 452 /** 453 * \class LocalUCollatorPointer 454 * "Smart pointer" class, closes a UCollator via ucol_close(). 455 * For most methods see the LocalPointerBase base class. 456 * 457 * @see LocalPointerBase 458 * @see LocalPointer 459 * @stable ICU 4.4 460 */ 461 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCollatorPointer, UCollator, ucol_close); 462 463 U_NAMESPACE_END 464 465 #endif 466 467 /** 468 * Compare two strings. 469 * The strings will be compared using the options already specified. 470 * @param coll The UCollator containing the comparison rules. 471 * @param source The source string. 472 * @param sourceLength The length of source, or -1 if null-terminated. 473 * @param target The target string. 474 * @param targetLength The length of target, or -1 if null-terminated. 475 * @return The result of comparing the strings; one of UCOL_EQUAL, 476 * UCOL_GREATER, UCOL_LESS 477 * @see ucol_greater 478 * @see ucol_greaterOrEqual 479 * @see ucol_equal 480 * @stable ICU 2.0 481 */ 482 U_STABLE UCollationResult U_EXPORT2 483 ucol_strcoll( const UCollator *coll, 484 const UChar *source, 485 int32_t sourceLength, 486 const UChar *target, 487 int32_t targetLength); 488 489 /** 490 * Determine if one string is greater than another. 491 * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER 492 * @param coll The UCollator containing the comparison rules. 493 * @param source The source string. 494 * @param sourceLength The length of source, or -1 if null-terminated. 495 * @param target The target string. 496 * @param targetLength The length of target, or -1 if null-terminated. 497 * @return TRUE if source is greater than target, FALSE otherwise. 498 * @see ucol_strcoll 499 * @see ucol_greaterOrEqual 500 * @see ucol_equal 501 * @stable ICU 2.0 502 */ 503 U_STABLE UBool U_EXPORT2 504 ucol_greater(const UCollator *coll, 505 const UChar *source, int32_t sourceLength, 506 const UChar *target, int32_t targetLength); 507 508 /** 509 * Determine if one string is greater than or equal to another. 510 * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS 511 * @param coll The UCollator containing the comparison rules. 512 * @param source The source string. 513 * @param sourceLength The length of source, or -1 if null-terminated. 514 * @param target The target string. 515 * @param targetLength The length of target, or -1 if null-terminated. 516 * @return TRUE if source is greater than or equal to target, FALSE otherwise. 517 * @see ucol_strcoll 518 * @see ucol_greater 519 * @see ucol_equal 520 * @stable ICU 2.0 521 */ 522 U_STABLE UBool U_EXPORT2 523 ucol_greaterOrEqual(const UCollator *coll, 524 const UChar *source, int32_t sourceLength, 525 const UChar *target, int32_t targetLength); 526 527 /** 528 * Compare two strings for equality. 529 * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL 530 * @param coll The UCollator containing the comparison rules. 531 * @param source The source string. 532 * @param sourceLength The length of source, or -1 if null-terminated. 533 * @param target The target string. 534 * @param targetLength The length of target, or -1 if null-terminated. 535 * @return TRUE if source is equal to target, FALSE otherwise 536 * @see ucol_strcoll 537 * @see ucol_greater 538 * @see ucol_greaterOrEqual 539 * @stable ICU 2.0 540 */ 541 U_STABLE UBool U_EXPORT2 542 ucol_equal(const UCollator *coll, 543 const UChar *source, int32_t sourceLength, 544 const UChar *target, int32_t targetLength); 545 546 /** 547 * Compare two UTF-8 encoded trings. 548 * The strings will be compared using the options already specified. 549 * @param coll The UCollator containing the comparison rules. 550 * @param sIter The source string iterator. 551 * @param tIter The target string iterator. 552 * @return The result of comparing the strings; one of UCOL_EQUAL, 553 * UCOL_GREATER, UCOL_LESS 554 * @param status A pointer to an UErrorCode to receive any errors 555 * @see ucol_strcoll 556 * @stable ICU 2.6 557 */ 558 U_STABLE UCollationResult U_EXPORT2 559 ucol_strcollIter( const UCollator *coll, 560 UCharIterator *sIter, 561 UCharIterator *tIter, 562 UErrorCode *status); 563 564 /** 565 * Get the collation strength used in a UCollator. 566 * The strength influences how strings are compared. 567 * @param coll The UCollator to query. 568 * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, 569 * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL 570 * @see ucol_setStrength 571 * @stable ICU 2.0 572 */ 573 U_STABLE UCollationStrength U_EXPORT2 574 ucol_getStrength(const UCollator *coll); 575 576 /** 577 * Set the collation strength used in a UCollator. 578 * The strength influences how strings are compared. 579 * @param coll The UCollator to set. 580 * @param strength The desired collation strength; one of UCOL_PRIMARY, 581 * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT 582 * @see ucol_getStrength 583 * @stable ICU 2.0 584 */ 585 U_STABLE void U_EXPORT2 586 ucol_setStrength(UCollator *coll, 587 UCollationStrength strength); 588 589 /** 590 * Retrieves the reordering codes for this collator. 591 * These reordering codes are a combination of UScript codes and UColReorderCode entries. 592 * @param coll The UCollator to query. 593 * @param dest The array to fill with the script ordering. 594 * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function 595 * will only return the length of the result without writing any of the result string (pre-flighting). 596 * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a 597 * failure before the function call. 598 * @return The number of reordering codes written to the dest array. 599 * @see ucol_setReorderCodes 600 * @see ucol_getEquivalentReorderCodes 601 * @draft ICU 4.8 602 */ 603 U_DRAFT int32_t U_EXPORT2 604 ucol_getReorderCodes(const UCollator* coll, 605 int32_t* dest, 606 int32_t destCapacity, 607 UErrorCode *pErrorCode); 608 /** 609 * Sets the reordering codes for this collator. 610 * Collation reordering allows scripts and some other defined blocks of characters 611 * to be moved relative to each other as a block. This reordering is done on top of 612 * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed 613 * at the start and/or the end of the collation order. These groups are specified using 614 * UScript codes and UColReorderCode entries. 615 * <p>By default, reordering codes specified for the start of the order are placed in the 616 * order given after a group of "special" non-script blocks. These special groups of characters 617 * are space, punctuation, symbol, currency, and digit. These special groups are represented with 618 * UColReorderCode entries. Script groups can be intermingled with 619 * these special non-script blocks if those special blocks are explicitly specified in the reordering. 620 * <p>The special code OTHERS stands for any script that is not explicitly 621 * mentioned in the list of reordering codes given. Anything that is after OTHERS 622 * will go at the very end of the reordering in the order given. 623 * <p>The special reorder code DEFAULT will reset the reordering for this collator 624 * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that 625 * was specified when this collator was created from resource data or from rules. The 626 * DEFAULT code <b>must</b> be the sole code supplied when it used. If not 627 * that will result in an U_ILLEGAL_ARGUMENT_ERROR being set. 628 * <p>The special reorder code NONE will remove any reordering for this collator. 629 * The result of setting no reordering will be to have the DUCET/CLDR reordering used. The 630 * NONE code <b>must</b> be the sole code supplied when it used. 631 * @param coll The UCollator to set. 632 * @param reorderCodes An array of script codes in the new order. This can be NULL if the 633 * length is also set to 0. An empty array will clear any reordering codes on the collator. 634 * @param reorderCodesLength The length of reorderCodes. 635 * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a 636 * failure before the function call. 637 * @see ucol_getReorderCodes 638 * @see ucol_getEquivalentReorderCodes 639 * @draft ICU 4.8 640 */ 641 U_DRAFT void U_EXPORT2 642 ucol_setReorderCodes(UCollator* coll, 643 const int32_t* reorderCodes, 644 int32_t reorderCodesLength, 645 UErrorCode *pErrorCode); 646 647 /** 648 * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder 649 * codes will be grouped and must reorder together. 650 * @param reorderCode The reorder code to determine equivalence for. 651 * @param dest The array to fill with the script ordering. 652 * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function 653 * will only return the length of the result without writing any of the result string (pre-flighting). 654 * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate 655 * a failure before the function call. 656 * @return The number of reordering codes written to the dest array. 657 * @see ucol_setReorderCodes 658 * @see ucol_getReorderCodes 659 * @draft ICU 4.8 660 */ 661 U_DRAFT int32_t U_EXPORT2 662 ucol_getEquivalentReorderCodes(int32_t reorderCode, 663 int32_t* dest, 664 int32_t destCapacity, 665 UErrorCode *pErrorCode); 666 667 /** 668 * Get the display name for a UCollator. 669 * The display name is suitable for presentation to a user. 670 * @param objLoc The locale of the collator in question. 671 * @param dispLoc The locale for display. 672 * @param result A pointer to a buffer to receive the attribute. 673 * @param resultLength The maximum size of result. 674 * @param status A pointer to an UErrorCode to receive any errors 675 * @return The total buffer size needed; if greater than resultLength, 676 * the output was truncated. 677 * @stable ICU 2.0 678 */ 679 U_STABLE int32_t U_EXPORT2 680 ucol_getDisplayName( const char *objLoc, 681 const char *dispLoc, 682 UChar *result, 683 int32_t resultLength, 684 UErrorCode *status); 685 686 /** 687 * Get a locale for which collation rules are available. 688 * A UCollator in a locale returned by this function will perform the correct 689 * collation for the locale. 690 * @param localeIndex The index of the desired locale. 691 * @return A locale for which collation rules are available, or 0 if none. 692 * @see ucol_countAvailable 693 * @stable ICU 2.0 694 */ 695 U_STABLE const char* U_EXPORT2 696 ucol_getAvailable(int32_t localeIndex); 697 698 /** 699 * Determine how many locales have collation rules available. 700 * This function is most useful as determining the loop ending condition for 701 * calls to {@link #ucol_getAvailable }. 702 * @return The number of locales for which collation rules are available. 703 * @see ucol_getAvailable 704 * @stable ICU 2.0 705 */ 706 U_STABLE int32_t U_EXPORT2 707 ucol_countAvailable(void); 708 709 #if !UCONFIG_NO_SERVICE 710 /** 711 * Create a string enumerator of all locales for which a valid 712 * collator may be opened. 713 * @param status input-output error code 714 * @return a string enumeration over locale strings. The caller is 715 * responsible for closing the result. 716 * @stable ICU 3.0 717 */ 718 U_STABLE UEnumeration* U_EXPORT2 719 ucol_openAvailableLocales(UErrorCode *status); 720 #endif 721 722 /** 723 * Create a string enumerator of all possible keywords that are relevant to 724 * collation. At this point, the only recognized keyword for this 725 * service is "collation". 726 * @param status input-output error code 727 * @return a string enumeration over locale strings. The caller is 728 * responsible for closing the result. 729 * @stable ICU 3.0 730 */ 731 U_STABLE UEnumeration* U_EXPORT2 732 ucol_getKeywords(UErrorCode *status); 733 734 /** 735 * Given a keyword, create a string enumeration of all values 736 * for that keyword that are currently in use. 737 * @param keyword a particular keyword as enumerated by 738 * ucol_getKeywords. If any other keyword is passed in, *status is set 739 * to U_ILLEGAL_ARGUMENT_ERROR. 740 * @param status input-output error code 741 * @return a string enumeration over collation keyword values, or NULL 742 * upon error. The caller is responsible for closing the result. 743 * @stable ICU 3.0 744 */ 745 U_STABLE UEnumeration* U_EXPORT2 746 ucol_getKeywordValues(const char *keyword, UErrorCode *status); 747 748 /** 749 * Given a key and a locale, returns an array of string values in a preferred 750 * order that would make a difference. These are all and only those values where 751 * the open (creation) of the service with the locale formed from the input locale 752 * plus input keyword and that value has different behavior than creation with the 753 * input locale alone. 754 * @param key one of the keys supported by this service. For now, only 755 * "collation" is supported. 756 * @param locale the locale 757 * @param commonlyUsed if set to true it will return only commonly used values 758 * with the given locale in preferred order. Otherwise, 759 * it will return all the available values for the locale. 760 * @param status error status 761 * @return a string enumeration over keyword values for the given key and the locale. 762 * @stable ICU 4.2 763 */ 764 U_STABLE UEnumeration* U_EXPORT2 765 ucol_getKeywordValuesForLocale(const char* key, 766 const char* locale, 767 UBool commonlyUsed, 768 UErrorCode* status); 769 770 /** 771 * Return the functionally equivalent locale for the given 772 * requested locale, with respect to given keyword, for the 773 * collation service. If two locales return the same result, then 774 * collators instantiated for these locales will behave 775 * equivalently. The converse is not always true; two collators 776 * may in fact be equivalent, but return different results, due to 777 * internal details. The return result has no other meaning than 778 * that stated above, and implies nothing as to the relationship 779 * between the two locales. This is intended for use by 780 * applications who wish to cache collators, or otherwise reuse 781 * collators when possible. The functional equivalent may change 782 * over time. For more information, please see the <a 783 * href="http://icu-project.org/userguide/locale.html#services"> 784 * Locales and Services</a> section of the ICU User Guide. 785 * @param result fillin for the functionally equivalent locale 786 * @param resultCapacity capacity of the fillin buffer 787 * @param keyword a particular keyword as enumerated by 788 * ucol_getKeywords. 789 * @param locale the requested locale 790 * @param isAvailable if non-NULL, pointer to a fillin parameter that 791 * indicates whether the requested locale was 'available' to the 792 * collation service. A locale is defined as 'available' if it 793 * physically exists within the collation locale data. 794 * @param status pointer to input-output error code 795 * @return the actual buffer size needed for the locale. If greater 796 * than resultCapacity, the returned full name will be truncated and 797 * an error code will be returned. 798 * @stable ICU 3.0 799 */ 800 U_STABLE int32_t U_EXPORT2 801 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, 802 const char* keyword, const char* locale, 803 UBool* isAvailable, UErrorCode* status); 804 805 /** 806 * Get the collation rules from a UCollator. 807 * The rules will follow the rule syntax. 808 * @param coll The UCollator to query. 809 * @param length 810 * @return The collation rules. 811 * @stable ICU 2.0 812 */ 813 U_STABLE const UChar* U_EXPORT2 814 ucol_getRules( const UCollator *coll, 815 int32_t *length); 816 817 /** Get the short definition string for a collator. This API harvests the collator's 818 * locale and the attribute set and produces a string that can be used for opening 819 * a collator with the same properties using the ucol_openFromShortString API. 820 * This string will be normalized. 821 * The structure and the syntax of the string is defined in the "Naming collators" 822 * section of the users guide: 823 * http://icu-project.org/userguide/Collate_Concepts.html#Naming_Collators 824 * This API supports preflighting. 825 * @param coll a collator 826 * @param locale a locale that will appear as a collators locale in the resulting 827 * short string definition. If NULL, the locale will be harvested 828 * from the collator. 829 * @param buffer space to hold the resulting string 830 * @param capacity capacity of the buffer 831 * @param status for returning errors. All the preflighting errors are featured 832 * @return length of the resulting string 833 * @see ucol_openFromShortString 834 * @see ucol_normalizeShortDefinitionString 835 * @stable ICU 3.0 836 */ 837 U_STABLE int32_t U_EXPORT2 838 ucol_getShortDefinitionString(const UCollator *coll, 839 const char *locale, 840 char *buffer, 841 int32_t capacity, 842 UErrorCode *status); 843 844 /** Verifies and normalizes short definition string. 845 * Normalized short definition string has all the option sorted by the argument name, 846 * so that equivalent definition strings are the same. 847 * This API supports preflighting. 848 * @param source definition string 849 * @param destination space to hold the resulting string 850 * @param capacity capacity of the buffer 851 * @param parseError if not NULL, structure that will get filled with error's pre 852 * and post context in case of error. 853 * @param status Error code. This API will return an error if an invalid attribute 854 * or attribute/value combination is specified. All the preflighting 855 * errors are also featured 856 * @return length of the resulting normalized string. 857 * 858 * @see ucol_openFromShortString 859 * @see ucol_getShortDefinitionString 860 * 861 * @stable ICU 3.0 862 */ 863 864 U_STABLE int32_t U_EXPORT2 865 ucol_normalizeShortDefinitionString(const char *source, 866 char *destination, 867 int32_t capacity, 868 UParseError *parseError, 869 UErrorCode *status); 870 871 872 /** 873 * Get a sort key for a string from a UCollator. 874 * Sort keys may be compared using <TT>strcmp</TT>. 875 * 876 * Like ICU functions that write to an output buffer, the buffer contents 877 * is undefined if the buffer capacity (resultLength parameter) is too small. 878 * Unlike ICU functions that write a string to an output buffer, 879 * the terminating zero byte is counted in the sort key length. 880 * @param coll The UCollator containing the collation rules. 881 * @param source The string to transform. 882 * @param sourceLength The length of source, or -1 if null-terminated. 883 * @param result A pointer to a buffer to receive the attribute. 884 * @param resultLength The maximum size of result. 885 * @return The size needed to fully store the sort key. 886 * If there was an internal error generating the sort key, 887 * a zero value is returned. 888 * @see ucol_keyHashCode 889 * @stable ICU 2.0 890 */ 891 U_STABLE int32_t U_EXPORT2 892 ucol_getSortKey(const UCollator *coll, 893 const UChar *source, 894 int32_t sourceLength, 895 uint8_t *result, 896 int32_t resultLength); 897 898 899 /** Gets the next count bytes of a sort key. Caller needs 900 * to preserve state array between calls and to provide 901 * the same type of UCharIterator set with the same string. 902 * The destination buffer provided must be big enough to store 903 * the number of requested bytes. Generated sortkey is not 904 * compatible with sortkeys generated using ucol_getSortKey 905 * API, since we don't do any compression. If uncompressed 906 * sortkeys are required, this API can be used. 907 * @param coll The UCollator containing the collation rules. 908 * @param iter UCharIterator containing the string we need 909 * the sort key to be calculated for. 910 * @param state Opaque state of sortkey iteration. 911 * @param dest Buffer to hold the resulting sortkey part 912 * @param count number of sort key bytes required. 913 * @param status error code indicator. 914 * @return the actual number of bytes of a sortkey. It can be 915 * smaller than count if we have reached the end of 916 * the sort key. 917 * @stable ICU 2.6 918 */ 919 U_STABLE int32_t U_EXPORT2 920 ucol_nextSortKeyPart(const UCollator *coll, 921 UCharIterator *iter, 922 uint32_t state[2], 923 uint8_t *dest, int32_t count, 924 UErrorCode *status); 925 926 /** enum that is taken by ucol_getBound API 927 * See below for explanation 928 * do not change the values assigned to the 929 * members of this enum. Underlying code 930 * depends on them having these numbers 931 * @stable ICU 2.0 932 */ 933 typedef enum { 934 /** lower bound */ 935 UCOL_BOUND_LOWER = 0, 936 /** upper bound that will match strings of exact size */ 937 UCOL_BOUND_UPPER = 1, 938 /** upper bound that will match all the strings that have the same initial substring as the given string */ 939 UCOL_BOUND_UPPER_LONG = 2, 940 UCOL_BOUND_VALUE_COUNT 941 } UColBoundMode; 942 943 /** 944 * Produce a bound for a given sortkey and a number of levels. 945 * Return value is always the number of bytes needed, regardless of 946 * whether the result buffer was big enough or even valid.<br> 947 * Resulting bounds can be used to produce a range of strings that are 948 * between upper and lower bounds. For example, if bounds are produced 949 * for a sortkey of string "smith", strings between upper and lower 950 * bounds with one level would include "Smith", "SMITH", "sMiTh".<br> 951 * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER 952 * is produced, strings matched would be as above. However, if bound 953 * produced using UCOL_BOUND_UPPER_LONG is used, the above example will 954 * also match "Smithsonian" and similar.<br> 955 * For more on usage, see example in cintltst/capitst.c in procedure 956 * TestBounds. 957 * Sort keys may be compared using <TT>strcmp</TT>. 958 * @param source The source sortkey. 959 * @param sourceLength The length of source, or -1 if null-terminated. 960 * (If an unmodified sortkey is passed, it is always null 961 * terminated). 962 * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which 963 * produces a lower inclusive bound, UCOL_BOUND_UPPER, that 964 * produces upper bound that matches strings of the same length 965 * or UCOL_BOUND_UPPER_LONG that matches strings that have the 966 * same starting substring as the source string. 967 * @param noOfLevels Number of levels required in the resulting bound (for most 968 * uses, the recommended value is 1). See users guide for 969 * explanation on number of levels a sortkey can have. 970 * @param result A pointer to a buffer to receive the resulting sortkey. 971 * @param resultLength The maximum size of result. 972 * @param status Used for returning error code if something went wrong. If the 973 * number of levels requested is higher than the number of levels 974 * in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is 975 * issued. 976 * @return The size needed to fully store the bound. 977 * @see ucol_keyHashCode 978 * @stable ICU 2.1 979 */ 980 U_STABLE int32_t U_EXPORT2 981 ucol_getBound(const uint8_t *source, 982 int32_t sourceLength, 983 UColBoundMode boundType, 984 uint32_t noOfLevels, 985 uint8_t *result, 986 int32_t resultLength, 987 UErrorCode *status); 988 989 /** 990 * Gets the version information for a Collator. Version is currently 991 * an opaque 32-bit number which depends, among other things, on major 992 * versions of the collator tailoring and UCA. 993 * @param coll The UCollator to query. 994 * @param info the version # information, the result will be filled in 995 * @stable ICU 2.0 996 */ 997 U_STABLE void U_EXPORT2 998 ucol_getVersion(const UCollator* coll, UVersionInfo info); 999 1000 /** 1001 * Gets the UCA version information for a Collator. Version is the 1002 * UCA version number (3.1.1, 4.0). 1003 * @param coll The UCollator to query. 1004 * @param info the version # information, the result will be filled in 1005 * @stable ICU 2.8 1006 */ 1007 U_STABLE void U_EXPORT2 1008 ucol_getUCAVersion(const UCollator* coll, UVersionInfo info); 1009 1010 /** 1011 * Merge two sort keys. The levels are merged with their corresponding counterparts 1012 * (primaries with primaries, secondaries with secondaries etc.). Between the values 1013 * from the same level a separator is inserted. 1014 * example (uncompressed): 1015 * 191B1D 01 050505 01 910505 00 and 1F2123 01 050505 01 910505 00 1016 * will be merged as 1017 * 191B1D 02 1F212301 050505 02 050505 01 910505 02 910505 00 1018 * This allows for concatenating of first and last names for sorting, among other things. 1019 * If the destination buffer is not big enough, the results are undefined. 1020 * If any of source lengths are zero or any of source pointers are NULL/undefined, 1021 * result is of size zero. 1022 * @param src1 pointer to the first sortkey 1023 * @param src1Length length of the first sortkey 1024 * @param src2 pointer to the second sortkey 1025 * @param src2Length length of the second sortkey 1026 * @param dest buffer to hold the result 1027 * @param destCapacity size of the buffer for the result 1028 * @return size of the result. If the buffer is big enough size is always 1029 * src1Length+src2Length-1 1030 * @stable ICU 2.0 1031 */ 1032 U_STABLE int32_t U_EXPORT2 1033 ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, 1034 const uint8_t *src2, int32_t src2Length, 1035 uint8_t *dest, int32_t destCapacity); 1036 1037 /** 1038 * Universal attribute setter 1039 * @param coll collator which attributes are to be changed 1040 * @param attr attribute type 1041 * @param value attribute value 1042 * @param status to indicate whether the operation went on smoothly or there were errors 1043 * @see UColAttribute 1044 * @see UColAttributeValue 1045 * @see ucol_getAttribute 1046 * @stable ICU 2.0 1047 */ 1048 U_STABLE void U_EXPORT2 1049 ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status); 1050 1051 /** 1052 * Universal attribute getter 1053 * @param coll collator which attributes are to be changed 1054 * @param attr attribute type 1055 * @return attribute value 1056 * @param status to indicate whether the operation went on smoothly or there were errors 1057 * @see UColAttribute 1058 * @see UColAttributeValue 1059 * @see ucol_setAttribute 1060 * @stable ICU 2.0 1061 */ 1062 U_STABLE UColAttributeValue U_EXPORT2 1063 ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status); 1064 1065 /** Variable top 1066 * is a two byte primary value which causes all the codepoints with primary values that 1067 * are less or equal than the variable top to be shifted when alternate handling is set 1068 * to UCOL_SHIFTED. 1069 * Sets the variable top to a collation element value of a string supplied. 1070 * @param coll collator which variable top needs to be changed 1071 * @param varTop one or more (if contraction) UChars to which the variable top should be set 1072 * @param len length of variable top string. If -1 it is considered to be zero terminated. 1073 * @param status error code. If error code is set, the return value is undefined. 1074 * Errors set by this function are: <br> 1075 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such 1076 * a contraction<br> 1077 * U_PRIMARY_TOO_LONG_ERROR if the primary for the variable top has more than two bytes 1078 * @return a 32 bit value containing the value of the variable top in upper 16 bits. 1079 * Lower 16 bits are undefined 1080 * @see ucol_getVariableTop 1081 * @see ucol_restoreVariableTop 1082 * @stable ICU 2.0 1083 */ 1084 U_STABLE uint32_t U_EXPORT2 1085 ucol_setVariableTop(UCollator *coll, 1086 const UChar *varTop, int32_t len, 1087 UErrorCode *status); 1088 1089 /** 1090 * Gets the variable top value of a Collator. 1091 * Lower 16 bits are undefined and should be ignored. 1092 * @param coll collator which variable top needs to be retrieved 1093 * @param status error code (not changed by function). If error code is set, 1094 * the return value is undefined. 1095 * @return the variable top value of a Collator. 1096 * @see ucol_setVariableTop 1097 * @see ucol_restoreVariableTop 1098 * @stable ICU 2.0 1099 */ 1100 U_STABLE uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status); 1101 1102 /** 1103 * Sets the variable top to a collation element value supplied. Variable top is 1104 * set to the upper 16 bits. 1105 * Lower 16 bits are ignored. 1106 * @param coll collator which variable top needs to be changed 1107 * @param varTop CE value, as returned by ucol_setVariableTop or ucol)getVariableTop 1108 * @param status error code (not changed by function) 1109 * @see ucol_getVariableTop 1110 * @see ucol_setVariableTop 1111 * @stable ICU 2.0 1112 */ 1113 U_STABLE void U_EXPORT2 1114 ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status); 1115 1116 /** 1117 * Thread safe cloning operation. The result is a clone of a given collator. 1118 * @param coll collator to be cloned 1119 * @param stackBuffer user allocated space for the new clone. 1120 * If NULL new memory will be allocated. 1121 * If buffer is not large enough, new memory will be allocated. 1122 * Clients can use the U_COL_SAFECLONE_BUFFERSIZE. 1123 * This will probably be enough to avoid memory allocations. 1124 * @param pBufferSize pointer to size of allocated space. 1125 * If *pBufferSize == 0, a sufficient size for use in cloning will 1126 * be returned ('pre-flighting') 1127 * If *pBufferSize is not enough for a stack-based safe clone, 1128 * new memory will be allocated. 1129 * @param status to indicate whether the operation went on smoothly or there were errors 1130 * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any 1131 * allocations were necessary. 1132 * @return pointer to the new clone 1133 * @see ucol_open 1134 * @see ucol_openRules 1135 * @see ucol_close 1136 * @stable ICU 2.0 1137 */ 1138 U_STABLE UCollator* U_EXPORT2 1139 ucol_safeClone(const UCollator *coll, 1140 void *stackBuffer, 1141 int32_t *pBufferSize, 1142 UErrorCode *status); 1143 1144 /** default memory size for the new clone. It needs to be this large for os/400 large pointers 1145 * @stable ICU 2.0 1146 */ 1147 #define U_COL_SAFECLONE_BUFFERSIZE 512 1148 1149 /** 1150 * Returns current rules. Delta defines whether full rules are returned or just the tailoring. 1151 * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough 1152 * to store rules, will store up to available space. 1153 * @param coll collator to get the rules from 1154 * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. 1155 * @param buffer buffer to store the result in. If NULL, you'll get no rules. 1156 * @param bufferLen lenght of buffer to store rules in. If less then needed you'll get only the part that fits in. 1157 * @return current rules 1158 * @stable ICU 2.0 1159 */ 1160 U_STABLE int32_t U_EXPORT2 1161 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen); 1162 1163 /** 1164 * gets the locale name of the collator. If the collator 1165 * is instantiated from the rules, then this function returns 1166 * NULL. 1167 * @param coll The UCollator for which the locale is needed 1168 * @param type You can choose between requested, valid and actual 1169 * locale. For description see the definition of 1170 * ULocDataLocaleType in uloc.h 1171 * @param status error code of the operation 1172 * @return real locale name from which the collation data comes. 1173 * If the collator was instantiated from rules, returns 1174 * NULL. 1175 * @deprecated ICU 2.8 Use ucol_getLocaleByType instead 1176 */ 1177 U_DEPRECATED const char * U_EXPORT2 1178 ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); 1179 1180 1181 /** 1182 * gets the locale name of the collator. If the collator 1183 * is instantiated from the rules, then this function returns 1184 * NULL. 1185 * @param coll The UCollator for which the locale is needed 1186 * @param type You can choose between requested, valid and actual 1187 * locale. For description see the definition of 1188 * ULocDataLocaleType in uloc.h 1189 * @param status error code of the operation 1190 * @return real locale name from which the collation data comes. 1191 * If the collator was instantiated from rules, returns 1192 * NULL. 1193 * @stable ICU 2.8 1194 */ 1195 U_STABLE const char * U_EXPORT2 1196 ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); 1197 1198 /** 1199 * Get an Unicode set that contains all the characters and sequences tailored in 1200 * this collator. The result must be disposed of by using uset_close. 1201 * @param coll The UCollator for which we want to get tailored chars 1202 * @param status error code of the operation 1203 * @return a pointer to newly created USet. Must be be disposed by using uset_close 1204 * @see ucol_openRules 1205 * @see uset_close 1206 * @stable ICU 2.4 1207 */ 1208 U_STABLE USet * U_EXPORT2 1209 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status); 1210 1211 /** 1212 * Universal attribute getter that returns UCOL_DEFAULT if the value is default 1213 * @param coll collator which attributes are to be changed 1214 * @param attr attribute type 1215 * @return attribute value or UCOL_DEFAULT if the value is default 1216 * @param status to indicate whether the operation went on smoothly or there were errors 1217 * @see UColAttribute 1218 * @see UColAttributeValue 1219 * @see ucol_setAttribute 1220 * @internal ICU 3.0 1221 */ 1222 U_INTERNAL UColAttributeValue U_EXPORT2 1223 ucol_getAttributeOrDefault(const UCollator *coll, UColAttribute attr, UErrorCode *status); 1224 1225 /** Check whether two collators are equal. Collators are considered equal if they 1226 * will sort strings the same. This means that both the current attributes and the 1227 * rules must be equivalent. Currently used for RuleBasedCollator::operator==. 1228 * @param source first collator 1229 * @param target second collator 1230 * @return TRUE or FALSE 1231 * @internal ICU 3.0 1232 */ 1233 U_INTERNAL UBool U_EXPORT2 1234 ucol_equals(const UCollator *source, const UCollator *target); 1235 1236 /** Calculates the set of unsafe code points, given a collator. 1237 * A character is unsafe if you could append any character and cause the ordering to alter significantly. 1238 * Collation sorts in normalized order, so anything that rearranges in normalization can cause this. 1239 * Thus if you have a character like a_umlaut, and you add a lower_dot to it, 1240 * then it normalizes to a_lower_dot + umlaut, and sorts differently. 1241 * @param coll Collator 1242 * @param unsafe a fill-in set to receive the unsafe points 1243 * @param status for catching errors 1244 * @return number of elements in the set 1245 * @internal ICU 3.0 1246 */ 1247 U_INTERNAL int32_t U_EXPORT2 1248 ucol_getUnsafeSet( const UCollator *coll, 1249 USet *unsafe, 1250 UErrorCode *status); 1251 1252 /** Reset UCA's static pointers. You don't want to use this, unless your static memory can go away. 1253 * @internal ICU 3.2.1 1254 */ 1255 U_INTERNAL void U_EXPORT2 1256 ucol_forgetUCA(void); 1257 1258 /** Touches all resources needed for instantiating a collator from a short string definition, 1259 * thus filling up the cache. 1260 * @param definition A short string containing a locale and a set of attributes. 1261 * Attributes not explicitly mentioned are left at the default 1262 * state for a locale. 1263 * @param parseError if not NULL, structure that will get filled with error's pre 1264 * and post context in case of error. 1265 * @param forceDefaults if FALSE, the settings that are the same as the collator 1266 * default settings will not be applied (for example, setting 1267 * French secondary on a French collator would not be executed). 1268 * If TRUE, all the settings will be applied regardless of the 1269 * collator default value. If the definition 1270 * strings are to be cached, should be set to FALSE. 1271 * @param status Error code. Apart from regular error conditions connected to 1272 * instantiating collators (like out of memory or similar), this 1273 * API will return an error if an invalid attribute or attribute/value 1274 * combination is specified. 1275 * @see ucol_openFromShortString 1276 * @internal ICU 3.2.1 1277 */ 1278 U_INTERNAL void U_EXPORT2 1279 ucol_prepareShortStringOpen( const char *definition, 1280 UBool forceDefaults, 1281 UParseError *parseError, 1282 UErrorCode *status); 1283 1284 /** Creates a binary image of a collator. This binary image can be stored and 1285 * later used to instantiate a collator using ucol_openBinary. 1286 * This API supports preflighting. 1287 * @param coll Collator 1288 * @param buffer a fill-in buffer to receive the binary image 1289 * @param capacity capacity of the destination buffer 1290 * @param status for catching errors 1291 * @return size of the image 1292 * @see ucol_openBinary 1293 * @stable ICU 3.2 1294 */ 1295 U_STABLE int32_t U_EXPORT2 1296 ucol_cloneBinary(const UCollator *coll, 1297 uint8_t *buffer, int32_t capacity, 1298 UErrorCode *status); 1299 1300 /** Opens a collator from a collator binary image created using 1301 * ucol_cloneBinary. Binary image used in instantiation of the 1302 * collator remains owned by the user and should stay around for 1303 * the lifetime of the collator. The API also takes a base collator 1304 * which usualy should be UCA. 1305 * @param bin binary image owned by the user and required through the 1306 * lifetime of the collator 1307 * @param length size of the image. If negative, the API will try to 1308 * figure out the length of the image 1309 * @param base fallback collator, usually UCA. Base is required to be 1310 * present through the lifetime of the collator. Currently 1311 * it cannot be NULL. 1312 * @param status for catching errors 1313 * @return newly created collator 1314 * @see ucol_cloneBinary 1315 * @stable ICU 3.2 1316 */ 1317 U_STABLE UCollator* U_EXPORT2 1318 ucol_openBinary(const uint8_t *bin, int32_t length, 1319 const UCollator *base, 1320 UErrorCode *status); 1321 1322 1323 #endif /* #if !UCONFIG_NO_COLLATION */ 1324 1325 #endif 1326