1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * Copyright (c) 1996-2015, International Business Machines Corporation and others. 6 * All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 #ifndef UCOL_H 11 #define UCOL_H 12 13 #include "unicode/utypes.h" 14 15 #if !UCONFIG_NO_COLLATION 16 17 #include "unicode/unorm.h" 18 #include "unicode/localpointer.h" 19 #include "unicode/parseerr.h" 20 #include "unicode/uloc.h" 21 #include "unicode/uset.h" 22 #include "unicode/uscript.h" 23 24 /** 25 * \file 26 * \brief C API: Collator 27 * 28 * <h2> Collator C API </h2> 29 * 30 * The C API for Collator performs locale-sensitive 31 * string comparison. You use this service to build 32 * searching and sorting routines for natural language text. 33 * <p> 34 * For more information about the collation service see 35 * <a href="http://userguide.icu-project.org/collation">the User Guide</a>. 36 * <p> 37 * Collation service provides correct sorting orders for most locales supported in ICU. 38 * If specific data for a locale is not available, the orders eventually falls back 39 * to the <a href="http://www.unicode.org/reports/tr35/tr35-collation.html#Root_Collation">CLDR root sort order</a>. 40 * <p> 41 * Sort ordering may be customized by providing your own set of rules. For more on 42 * this subject see the <a href="http://userguide.icu-project.org/collation/customization"> 43 * Collation Customization</a> section of the User Guide. 44 * <p> 45 * @see UCollationResult 46 * @see UNormalizationMode 47 * @see UCollationStrength 48 * @see UCollationElements 49 */ 50 51 /** A collator. 52 * For usage in C programs. 53 */ 54 struct UCollator; 55 /** structure representing a collator object instance 56 * @stable ICU 2.0 57 */ 58 typedef struct UCollator UCollator; 59 60 61 /** 62 * UCOL_LESS is returned if source string is compared to be less than target 63 * string in the ucol_strcoll() method. 64 * UCOL_EQUAL is returned if source string is compared to be equal to target 65 * string in the ucol_strcoll() method. 66 * UCOL_GREATER is returned if source string is compared to be greater than 67 * target string in the ucol_strcoll() method. 68 * @see ucol_strcoll() 69 * <p> 70 * Possible values for a comparison result 71 * @stable ICU 2.0 72 */ 73 typedef enum { 74 /** string a == string b */ 75 UCOL_EQUAL = 0, 76 /** string a > string b */ 77 UCOL_GREATER = 1, 78 /** string a < string b */ 79 UCOL_LESS = -1 80 } UCollationResult ; 81 82 83 /** Enum containing attribute values for controling collation behavior. 84 * Here are all the allowable values. Not every attribute can take every value. The only 85 * universal value is UCOL_DEFAULT, which resets the attribute value to the predefined 86 * value for that locale 87 * @stable ICU 2.0 88 */ 89 typedef enum { 90 /** accepted by most attributes */ 91 UCOL_DEFAULT = -1, 92 93 /** Primary collation strength */ 94 UCOL_PRIMARY = 0, 95 /** Secondary collation strength */ 96 UCOL_SECONDARY = 1, 97 /** Tertiary collation strength */ 98 UCOL_TERTIARY = 2, 99 /** Default collation strength */ 100 UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY, 101 UCOL_CE_STRENGTH_LIMIT, 102 /** Quaternary collation strength */ 103 UCOL_QUATERNARY=3, 104 /** Identical collation strength */ 105 UCOL_IDENTICAL=15, 106 UCOL_STRENGTH_LIMIT, 107 108 /** Turn the feature off - works for UCOL_FRENCH_COLLATION, 109 UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE 110 & UCOL_DECOMPOSITION_MODE*/ 111 UCOL_OFF = 16, 112 /** Turn the feature on - works for UCOL_FRENCH_COLLATION, 113 UCOL_CASE_LEVEL, UCOL_HIRAGANA_QUATERNARY_MODE 114 & UCOL_DECOMPOSITION_MODE*/ 115 UCOL_ON = 17, 116 117 /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be shifted */ 118 UCOL_SHIFTED = 20, 119 /** Valid for UCOL_ALTERNATE_HANDLING. Alternate handling will be non ignorable */ 120 UCOL_NON_IGNORABLE = 21, 121 122 /** Valid for UCOL_CASE_FIRST - 123 lower case sorts before upper case */ 124 UCOL_LOWER_FIRST = 24, 125 /** upper case sorts before lower case */ 126 UCOL_UPPER_FIRST = 25, 127 128 #ifndef U_HIDE_DEPRECATED_API 129 /** 130 * One more than the highest normal UColAttributeValue value. 131 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 132 */ 133 UCOL_ATTRIBUTE_VALUE_COUNT 134 #endif /* U_HIDE_DEPRECATED_API */ 135 } UColAttributeValue; 136 137 /** 138 * Enum containing the codes for reordering segments of the collation table that are not script 139 * codes. These reordering codes are to be used in conjunction with the script codes. 140 * @see ucol_getReorderCodes 141 * @see ucol_setReorderCodes 142 * @see ucol_getEquivalentReorderCodes 143 * @see UScriptCode 144 * @stable ICU 4.8 145 */ 146 typedef enum { 147 /** 148 * A special reordering code that is used to specify the default 149 * reordering codes for a locale. 150 * @stable ICU 4.8 151 */ 152 UCOL_REORDER_CODE_DEFAULT = -1, 153 /** 154 * A special reordering code that is used to specify no reordering codes. 155 * @stable ICU 4.8 156 */ 157 UCOL_REORDER_CODE_NONE = USCRIPT_UNKNOWN, 158 /** 159 * A special reordering code that is used to specify all other codes used for 160 * reordering except for the codes lised as UColReorderCode values and those 161 * listed explicitly in a reordering. 162 * @stable ICU 4.8 163 */ 164 UCOL_REORDER_CODE_OTHERS = USCRIPT_UNKNOWN, 165 /** 166 * Characters with the space property. 167 * This is equivalent to the rule value "space". 168 * @stable ICU 4.8 169 */ 170 UCOL_REORDER_CODE_SPACE = 0x1000, 171 /** 172 * The first entry in the enumeration of reordering groups. This is intended for use in 173 * range checking and enumeration of the reorder codes. 174 * @stable ICU 4.8 175 */ 176 UCOL_REORDER_CODE_FIRST = UCOL_REORDER_CODE_SPACE, 177 /** 178 * Characters with the punctuation property. 179 * This is equivalent to the rule value "punct". 180 * @stable ICU 4.8 181 */ 182 UCOL_REORDER_CODE_PUNCTUATION = 0x1001, 183 /** 184 * Characters with the symbol property. 185 * This is equivalent to the rule value "symbol". 186 * @stable ICU 4.8 187 */ 188 UCOL_REORDER_CODE_SYMBOL = 0x1002, 189 /** 190 * Characters with the currency property. 191 * This is equivalent to the rule value "currency". 192 * @stable ICU 4.8 193 */ 194 UCOL_REORDER_CODE_CURRENCY = 0x1003, 195 /** 196 * Characters with the digit property. 197 * This is equivalent to the rule value "digit". 198 * @stable ICU 4.8 199 */ 200 UCOL_REORDER_CODE_DIGIT = 0x1004, 201 #ifndef U_HIDE_DEPRECATED_API 202 /** 203 * One more than the highest normal UColReorderCode value. 204 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 205 */ 206 UCOL_REORDER_CODE_LIMIT = 0x1005 207 #endif /* U_HIDE_DEPRECATED_API */ 208 } UColReorderCode; 209 210 /** 211 * Base letter represents a primary difference. Set comparison 212 * level to UCOL_PRIMARY to ignore secondary and tertiary differences. 213 * Use this to set the strength of a Collator object. 214 * Example of primary difference, "abc" < "abd" 215 * 216 * Diacritical differences on the same base letter represent a secondary 217 * difference. Set comparison level to UCOL_SECONDARY to ignore tertiary 218 * differences. Use this to set the strength of a Collator object. 219 * Example of secondary difference, "ä" >> "a". 220 * 221 * Uppercase and lowercase versions of the same character represents a 222 * tertiary difference. Set comparison level to UCOL_TERTIARY to include 223 * all comparison differences. Use this to set the strength of a Collator 224 * object. 225 * Example of tertiary difference, "abc" <<< "ABC". 226 * 227 * Two characters are considered "identical" when they have the same 228 * unicode spellings. UCOL_IDENTICAL. 229 * For example, "ä" == "ä". 230 * 231 * UCollationStrength is also used to determine the strength of sort keys 232 * generated from UCollator objects 233 * These values can be now found in the UColAttributeValue enum. 234 * @stable ICU 2.0 235 **/ 236 typedef UColAttributeValue UCollationStrength; 237 238 /** Attributes that collation service understands. All the attributes can take UCOL_DEFAULT 239 * value, as well as the values specific to each one. 240 * @stable ICU 2.0 241 */ 242 typedef enum { 243 /** Attribute for direction of secondary weights - used in Canadian French. 244 * Acceptable values are UCOL_ON, which results in secondary weights 245 * being considered backwards and UCOL_OFF which treats secondary 246 * weights in the order they appear. 247 * @stable ICU 2.0 248 */ 249 UCOL_FRENCH_COLLATION, 250 /** Attribute for handling variable elements. 251 * Acceptable values are UCOL_NON_IGNORABLE (default) 252 * which treats all the codepoints with non-ignorable 253 * primary weights in the same way, 254 * and UCOL_SHIFTED which causes codepoints with primary 255 * weights that are equal or below the variable top value 256 * to be ignored on primary level and moved to the quaternary 257 * level. 258 * @stable ICU 2.0 259 */ 260 UCOL_ALTERNATE_HANDLING, 261 /** Controls the ordering of upper and lower case letters. 262 * Acceptable values are UCOL_OFF (default), which orders 263 * upper and lower case letters in accordance to their tertiary 264 * weights, UCOL_UPPER_FIRST which forces upper case letters to 265 * sort before lower case letters, and UCOL_LOWER_FIRST which does 266 * the opposite. 267 * @stable ICU 2.0 268 */ 269 UCOL_CASE_FIRST, 270 /** Controls whether an extra case level (positioned before the third 271 * level) is generated or not. Acceptable values are UCOL_OFF (default), 272 * when case level is not generated, and UCOL_ON which causes the case 273 * level to be generated. Contents of the case level are affected by 274 * the value of UCOL_CASE_FIRST attribute. A simple way to ignore 275 * accent differences in a string is to set the strength to UCOL_PRIMARY 276 * and enable case level. 277 * @stable ICU 2.0 278 */ 279 UCOL_CASE_LEVEL, 280 /** Controls whether the normalization check and necessary normalizations 281 * are performed. When set to UCOL_OFF (default) no normalization check 282 * is performed. The correctness of the result is guaranteed only if the 283 * input data is in so-called FCD form (see users manual for more info). 284 * When set to UCOL_ON, an incremental check is performed to see whether 285 * the input data is in the FCD form. If the data is not in the FCD form, 286 * incremental NFD normalization is performed. 287 * @stable ICU 2.0 288 */ 289 UCOL_NORMALIZATION_MODE, 290 /** An alias for UCOL_NORMALIZATION_MODE attribute. 291 * @stable ICU 2.0 292 */ 293 UCOL_DECOMPOSITION_MODE = UCOL_NORMALIZATION_MODE, 294 /** The strength attribute. Can be either UCOL_PRIMARY, UCOL_SECONDARY, 295 * UCOL_TERTIARY, UCOL_QUATERNARY or UCOL_IDENTICAL. The usual strength 296 * for most locales (except Japanese) is tertiary. 297 * 298 * Quaternary strength 299 * is useful when combined with shifted setting for alternate handling 300 * attribute and for JIS X 4061 collation, when it is used to distinguish 301 * between Katakana and Hiragana. 302 * Otherwise, quaternary level 303 * is affected only by the number of non-ignorable code points in 304 * the string. 305 * 306 * Identical strength is rarely useful, as it amounts 307 * to codepoints of the NFD form of the string. 308 * @stable ICU 2.0 309 */ 310 UCOL_STRENGTH, 311 #ifndef U_HIDE_DEPRECATED_API 312 /** When turned on, this attribute positions Hiragana before all 313 * non-ignorables on quaternary level This is a sneaky way to produce JIS 314 * sort order. 315 * 316 * This attribute was an implementation detail of the CLDR Japanese tailoring. 317 * Since ICU 50, this attribute is not settable any more via API functions. 318 * Since CLDR 25/ICU 53, explicit quaternary relations are used 319 * to achieve the same Japanese sort order. 320 * 321 * @deprecated ICU 50 Implementation detail, cannot be set via API, was removed from implementation. 322 */ 323 UCOL_HIRAGANA_QUATERNARY_MODE = UCOL_STRENGTH + 1, 324 #endif /* U_HIDE_DEPRECATED_API */ 325 /** 326 * When turned on, this attribute makes 327 * substrings of digits sort according to their numeric values. 328 * 329 * This is a way to get '100' to sort AFTER '2'. Note that the longest 330 * digit substring that can be treated as a single unit is 331 * 254 digits (not counting leading zeros). If a digit substring is 332 * longer than that, the digits beyond the limit will be treated as a 333 * separate digit substring. 334 * 335 * A "digit" in this sense is a code point with General_Category=Nd, 336 * which does not include circled numbers, roman numerals, etc. 337 * Only a contiguous digit substring is considered, that is, 338 * non-negative integers without separators. 339 * There is no support for plus/minus signs, decimals, exponents, etc. 340 * 341 * @stable ICU 2.8 342 */ 343 UCOL_NUMERIC_COLLATION = UCOL_STRENGTH + 2, 344 345 /* Do not conditionalize the following with #ifndef U_HIDE_DEPRECATED_API, 346 * it is needed for layout of RuleBasedCollator object. */ 347 #ifndef U_FORCE_HIDE_DEPRECATED_API 348 /** 349 * One more than the highest normal UColAttribute value. 350 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 351 */ 352 UCOL_ATTRIBUTE_COUNT 353 #endif // U_FORCE_HIDE_DEPRECATED_API 354 } UColAttribute; 355 356 /** Options for retrieving the rule string 357 * @stable ICU 2.0 358 */ 359 typedef enum { 360 /** 361 * Retrieves the tailoring rules only. 362 * Same as calling the version of getRules() without UColRuleOption. 363 * @stable ICU 2.0 364 */ 365 UCOL_TAILORING_ONLY, 366 /** 367 * Retrieves the "UCA rules" concatenated with the tailoring rules. 368 * The "UCA rules" are an <i>approximation</i> of the root collator's sort order. 369 * They are almost never used or useful at runtime and can be removed from the data. 370 * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales 371 * @stable ICU 2.0 372 */ 373 UCOL_FULL_RULES 374 } UColRuleOption ; 375 376 /** 377 * Open a UCollator for comparing strings. 378 * 379 * For some languages, multiple collation types are available; 380 * for example, "de@collation=phonebook". 381 * Starting with ICU 54, collation attributes can be specified via locale keywords as well, 382 * in the old locale extension syntax ("el@colCaseFirst=upper") 383 * or in language tag syntax ("el-u-kf-upper"). 384 * See <a href="http://userguide.icu-project.org/collation/api">User Guide: Collation API</a>. 385 * 386 * The UCollator pointer is used in all the calls to the Collation 387 * service. After finished, collator must be disposed of by calling 388 * {@link #ucol_close }. 389 * @param loc The locale containing the required collation rules. 390 * Special values for locales can be passed in - 391 * if NULL is passed for the locale, the default locale 392 * collation rules will be used. If empty string ("") or 393 * "root" are passed, the root collator will be returned. 394 * @param status A pointer to a UErrorCode to receive any errors 395 * @return A pointer to a UCollator, or 0 if an error occurred. 396 * @see ucol_openRules 397 * @see ucol_safeClone 398 * @see ucol_close 399 * @stable ICU 2.0 400 */ 401 U_STABLE UCollator* U_EXPORT2 402 ucol_open(const char *loc, UErrorCode *status); 403 404 /** 405 * Produce a UCollator instance according to the rules supplied. 406 * The rules are used to change the default ordering, defined in the 407 * UCA in a process called tailoring. The resulting UCollator pointer 408 * can be used in the same way as the one obtained by {@link #ucol_strcoll }. 409 * @param rules A string describing the collation rules. For the syntax 410 * of the rules please see users guide. 411 * @param rulesLength The length of rules, or -1 if null-terminated. 412 * @param normalizationMode The normalization mode: One of 413 * UCOL_OFF (expect the text to not need normalization), 414 * UCOL_ON (normalize), or 415 * UCOL_DEFAULT (set the mode according to the rules) 416 * @param strength The default collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, 417 * UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH - can be also set in the rules. 418 * @param parseError A pointer to UParseError to recieve information about errors 419 * occurred during parsing. This argument can currently be set 420 * to NULL, but at users own risk. Please provide a real structure. 421 * @param status A pointer to a UErrorCode to receive any errors 422 * @return A pointer to a UCollator. It is not guaranteed that NULL be returned in case 423 * of error - please use status argument to check for errors. 424 * @see ucol_open 425 * @see ucol_safeClone 426 * @see ucol_close 427 * @stable ICU 2.0 428 */ 429 U_STABLE UCollator* U_EXPORT2 430 ucol_openRules( const UChar *rules, 431 int32_t rulesLength, 432 UColAttributeValue normalizationMode, 433 UCollationStrength strength, 434 UParseError *parseError, 435 UErrorCode *status); 436 437 #ifndef U_HIDE_DEPRECATED_API 438 /** 439 * Open a collator defined by a short form string. 440 * The structure and the syntax of the string is defined in the "Naming collators" 441 * section of the users guide: 442 * http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme 443 * Attributes are overriden by the subsequent attributes. So, for "S2_S3", final 444 * strength will be 3. 3066bis locale overrides individual locale parts. 445 * The call to this function is equivalent to a call to ucol_open, followed by a 446 * series of calls to ucol_setAttribute and ucol_setVariableTop. 447 * @param definition A short string containing a locale and a set of attributes. 448 * Attributes not explicitly mentioned are left at the default 449 * state for a locale. 450 * @param parseError if not NULL, structure that will get filled with error's pre 451 * and post context in case of error. 452 * @param forceDefaults if FALSE, the settings that are the same as the collator 453 * default settings will not be applied (for example, setting 454 * French secondary on a French collator would not be executed). 455 * If TRUE, all the settings will be applied regardless of the 456 * collator default value. If the definition 457 * strings are to be cached, should be set to FALSE. 458 * @param status Error code. Apart from regular error conditions connected to 459 * instantiating collators (like out of memory or similar), this 460 * API will return an error if an invalid attribute or attribute/value 461 * combination is specified. 462 * @return A pointer to a UCollator or 0 if an error occured (including an 463 * invalid attribute). 464 * @see ucol_open 465 * @see ucol_setAttribute 466 * @see ucol_setVariableTop 467 * @see ucol_getShortDefinitionString 468 * @see ucol_normalizeShortDefinitionString 469 * @deprecated ICU 54 Use ucol_open() with language tag collation keywords instead. 470 */ 471 U_DEPRECATED UCollator* U_EXPORT2 472 ucol_openFromShortString( const char *definition, 473 UBool forceDefaults, 474 UParseError *parseError, 475 UErrorCode *status); 476 #endif /* U_HIDE_DEPRECATED_API */ 477 478 #ifndef U_HIDE_DEPRECATED_API 479 /** 480 * Get a set containing the contractions defined by the collator. The set includes 481 * both the root collator's contractions and the contractions defined by the collator. This set 482 * will contain only strings. If a tailoring explicitly suppresses contractions from 483 * the root collator (like Russian), removed contractions will not be in the resulting set. 484 * @param coll collator 485 * @param conts the set to hold the result. It gets emptied before 486 * contractions are added. 487 * @param status to hold the error code 488 * @return the size of the contraction set 489 * 490 * @deprecated ICU 3.4, use ucol_getContractionsAndExpansions instead 491 */ 492 U_DEPRECATED int32_t U_EXPORT2 493 ucol_getContractions( const UCollator *coll, 494 USet *conts, 495 UErrorCode *status); 496 #endif /* U_HIDE_DEPRECATED_API */ 497 498 /** 499 * Get a set containing the expansions defined by the collator. The set includes 500 * both the root collator's expansions and the expansions defined by the tailoring 501 * @param coll collator 502 * @param contractions if not NULL, the set to hold the contractions 503 * @param expansions if not NULL, the set to hold the expansions 504 * @param addPrefixes add the prefix contextual elements to contractions 505 * @param status to hold the error code 506 * 507 * @stable ICU 3.4 508 */ 509 U_STABLE void U_EXPORT2 510 ucol_getContractionsAndExpansions( const UCollator *coll, 511 USet *contractions, USet *expansions, 512 UBool addPrefixes, UErrorCode *status); 513 514 /** 515 * Close a UCollator. 516 * Once closed, a UCollator should not be used. Every open collator should 517 * be closed. Otherwise, a memory leak will result. 518 * @param coll The UCollator to close. 519 * @see ucol_open 520 * @see ucol_openRules 521 * @see ucol_safeClone 522 * @stable ICU 2.0 523 */ 524 U_STABLE void U_EXPORT2 525 ucol_close(UCollator *coll); 526 527 #if U_SHOW_CPLUSPLUS_API 528 529 U_NAMESPACE_BEGIN 530 531 /** 532 * \class LocalUCollatorPointer 533 * "Smart pointer" class, closes a UCollator via ucol_close(). 534 * For most methods see the LocalPointerBase base class. 535 * 536 * @see LocalPointerBase 537 * @see LocalPointer 538 * @stable ICU 4.4 539 */ 540 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCollatorPointer, UCollator, ucol_close); 541 542 U_NAMESPACE_END 543 544 #endif 545 546 /** 547 * Compare two strings. 548 * The strings will be compared using the options already specified. 549 * @param coll The UCollator containing the comparison rules. 550 * @param source The source string. 551 * @param sourceLength The length of source, or -1 if null-terminated. 552 * @param target The target string. 553 * @param targetLength The length of target, or -1 if null-terminated. 554 * @return The result of comparing the strings; one of UCOL_EQUAL, 555 * UCOL_GREATER, UCOL_LESS 556 * @see ucol_greater 557 * @see ucol_greaterOrEqual 558 * @see ucol_equal 559 * @stable ICU 2.0 560 */ 561 U_STABLE UCollationResult U_EXPORT2 562 ucol_strcoll( const UCollator *coll, 563 const UChar *source, 564 int32_t sourceLength, 565 const UChar *target, 566 int32_t targetLength); 567 568 /** 569 * Compare two strings in UTF-8. 570 * The strings will be compared using the options already specified. 571 * Note: When input string contains malformed a UTF-8 byte sequence, 572 * this function treats these bytes as REPLACEMENT CHARACTER (U+FFFD). 573 * @param coll The UCollator containing the comparison rules. 574 * @param source The source UTF-8 string. 575 * @param sourceLength The length of source, or -1 if null-terminated. 576 * @param target The target UTF-8 string. 577 * @param targetLength The length of target, or -1 if null-terminated. 578 * @param status A pointer to a UErrorCode to receive any errors 579 * @return The result of comparing the strings; one of UCOL_EQUAL, 580 * UCOL_GREATER, UCOL_LESS 581 * @see ucol_greater 582 * @see ucol_greaterOrEqual 583 * @see ucol_equal 584 * @stable ICU 50 585 */ 586 U_STABLE UCollationResult U_EXPORT2 587 ucol_strcollUTF8( 588 const UCollator *coll, 589 const char *source, 590 int32_t sourceLength, 591 const char *target, 592 int32_t targetLength, 593 UErrorCode *status); 594 595 /** 596 * Determine if one string is greater than another. 597 * This function is equivalent to {@link #ucol_strcoll } == UCOL_GREATER 598 * @param coll The UCollator containing the comparison rules. 599 * @param source The source string. 600 * @param sourceLength The length of source, or -1 if null-terminated. 601 * @param target The target string. 602 * @param targetLength The length of target, or -1 if null-terminated. 603 * @return TRUE if source is greater than target, FALSE otherwise. 604 * @see ucol_strcoll 605 * @see ucol_greaterOrEqual 606 * @see ucol_equal 607 * @stable ICU 2.0 608 */ 609 U_STABLE UBool U_EXPORT2 610 ucol_greater(const UCollator *coll, 611 const UChar *source, int32_t sourceLength, 612 const UChar *target, int32_t targetLength); 613 614 /** 615 * Determine if one string is greater than or equal to another. 616 * This function is equivalent to {@link #ucol_strcoll } != UCOL_LESS 617 * @param coll The UCollator containing the comparison rules. 618 * @param source The source string. 619 * @param sourceLength The length of source, or -1 if null-terminated. 620 * @param target The target string. 621 * @param targetLength The length of target, or -1 if null-terminated. 622 * @return TRUE if source is greater than or equal to target, FALSE otherwise. 623 * @see ucol_strcoll 624 * @see ucol_greater 625 * @see ucol_equal 626 * @stable ICU 2.0 627 */ 628 U_STABLE UBool U_EXPORT2 629 ucol_greaterOrEqual(const UCollator *coll, 630 const UChar *source, int32_t sourceLength, 631 const UChar *target, int32_t targetLength); 632 633 /** 634 * Compare two strings for equality. 635 * This function is equivalent to {@link #ucol_strcoll } == UCOL_EQUAL 636 * @param coll The UCollator containing the comparison rules. 637 * @param source The source string. 638 * @param sourceLength The length of source, or -1 if null-terminated. 639 * @param target The target string. 640 * @param targetLength The length of target, or -1 if null-terminated. 641 * @return TRUE if source is equal to target, FALSE otherwise 642 * @see ucol_strcoll 643 * @see ucol_greater 644 * @see ucol_greaterOrEqual 645 * @stable ICU 2.0 646 */ 647 U_STABLE UBool U_EXPORT2 648 ucol_equal(const UCollator *coll, 649 const UChar *source, int32_t sourceLength, 650 const UChar *target, int32_t targetLength); 651 652 /** 653 * Compare two UTF-8 encoded trings. 654 * The strings will be compared using the options already specified. 655 * @param coll The UCollator containing the comparison rules. 656 * @param sIter The source string iterator. 657 * @param tIter The target string iterator. 658 * @return The result of comparing the strings; one of UCOL_EQUAL, 659 * UCOL_GREATER, UCOL_LESS 660 * @param status A pointer to a UErrorCode to receive any errors 661 * @see ucol_strcoll 662 * @stable ICU 2.6 663 */ 664 U_STABLE UCollationResult U_EXPORT2 665 ucol_strcollIter( const UCollator *coll, 666 UCharIterator *sIter, 667 UCharIterator *tIter, 668 UErrorCode *status); 669 670 /** 671 * Get the collation strength used in a UCollator. 672 * The strength influences how strings are compared. 673 * @param coll The UCollator to query. 674 * @return The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, 675 * UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL 676 * @see ucol_setStrength 677 * @stable ICU 2.0 678 */ 679 U_STABLE UCollationStrength U_EXPORT2 680 ucol_getStrength(const UCollator *coll); 681 682 /** 683 * Set the collation strength used in a UCollator. 684 * The strength influences how strings are compared. 685 * @param coll The UCollator to set. 686 * @param strength The desired collation strength; one of UCOL_PRIMARY, 687 * UCOL_SECONDARY, UCOL_TERTIARY, UCOL_QUATERNARY, UCOL_IDENTICAL, UCOL_DEFAULT 688 * @see ucol_getStrength 689 * @stable ICU 2.0 690 */ 691 U_STABLE void U_EXPORT2 692 ucol_setStrength(UCollator *coll, 693 UCollationStrength strength); 694 695 /** 696 * Retrieves the reordering codes for this collator. 697 * These reordering codes are a combination of UScript codes and UColReorderCode entries. 698 * @param coll The UCollator to query. 699 * @param dest The array to fill with the script ordering. 700 * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function 701 * will only return the length of the result without writing any codes (pre-flighting). 702 * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a 703 * failure before the function call. 704 * @return The number of reordering codes written to the dest array. 705 * @see ucol_setReorderCodes 706 * @see ucol_getEquivalentReorderCodes 707 * @see UScriptCode 708 * @see UColReorderCode 709 * @stable ICU 4.8 710 */ 711 U_STABLE int32_t U_EXPORT2 712 ucol_getReorderCodes(const UCollator* coll, 713 int32_t* dest, 714 int32_t destCapacity, 715 UErrorCode *pErrorCode); 716 /** 717 * Sets the reordering codes for this collator. 718 * Collation reordering allows scripts and some other groups of characters 719 * to be moved relative to each other. This reordering is done on top of 720 * the DUCET/CLDR standard collation order. Reordering can specify groups to be placed 721 * at the start and/or the end of the collation order. These groups are specified using 722 * UScript codes and UColReorderCode entries. 723 * 724 * <p>By default, reordering codes specified for the start of the order are placed in the 725 * order given after several special non-script blocks. These special groups of characters 726 * are space, punctuation, symbol, currency, and digit. These special groups are represented with 727 * UColReorderCode entries. Script groups can be intermingled with 728 * these special non-script groups if those special groups are explicitly specified in the reordering. 729 * 730 * <p>The special code OTHERS stands for any script that is not explicitly 731 * mentioned in the list of reordering codes given. Anything that is after OTHERS 732 * will go at the very end of the reordering in the order given. 733 * 734 * <p>The special reorder code DEFAULT will reset the reordering for this collator 735 * to the default for this collator. The default reordering may be the DUCET/CLDR order or may be a reordering that 736 * was specified when this collator was created from resource data or from rules. The 737 * DEFAULT code <b>must</b> be the sole code supplied when it is used. 738 * If not, then U_ILLEGAL_ARGUMENT_ERROR will be set. 739 * 740 * <p>The special reorder code NONE will remove any reordering for this collator. 741 * The result of setting no reordering will be to have the DUCET/CLDR ordering used. The 742 * NONE code <b>must</b> be the sole code supplied when it is used. 743 * 744 * @param coll The UCollator to set. 745 * @param reorderCodes An array of script codes in the new order. This can be NULL if the 746 * length is also set to 0. An empty array will clear any reordering codes on the collator. 747 * @param reorderCodesLength The length of reorderCodes. 748 * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate a 749 * failure before the function call. 750 * @see ucol_getReorderCodes 751 * @see ucol_getEquivalentReorderCodes 752 * @see UScriptCode 753 * @see UColReorderCode 754 * @stable ICU 4.8 755 */ 756 U_STABLE void U_EXPORT2 757 ucol_setReorderCodes(UCollator* coll, 758 const int32_t* reorderCodes, 759 int32_t reorderCodesLength, 760 UErrorCode *pErrorCode); 761 762 /** 763 * Retrieves the reorder codes that are grouped with the given reorder code. Some reorder 764 * codes will be grouped and must reorder together. 765 * Beginning with ICU 55, scripts only reorder together if they are primary-equal, 766 * for example Hiragana and Katakana. 767 * 768 * @param reorderCode The reorder code to determine equivalence for. 769 * @param dest The array to fill with the script ordering. 770 * @param destCapacity The length of dest. If it is 0, then dest may be NULL and the function 771 * will only return the length of the result without writing any codes (pre-flighting). 772 * @param pErrorCode Must be a valid pointer to an error code value, which must not indicate 773 * a failure before the function call. 774 * @return The number of reordering codes written to the dest array. 775 * @see ucol_setReorderCodes 776 * @see ucol_getReorderCodes 777 * @see UScriptCode 778 * @see UColReorderCode 779 * @stable ICU 4.8 780 */ 781 U_STABLE int32_t U_EXPORT2 782 ucol_getEquivalentReorderCodes(int32_t reorderCode, 783 int32_t* dest, 784 int32_t destCapacity, 785 UErrorCode *pErrorCode); 786 787 /** 788 * Get the display name for a UCollator. 789 * The display name is suitable for presentation to a user. 790 * @param objLoc The locale of the collator in question. 791 * @param dispLoc The locale for display. 792 * @param result A pointer to a buffer to receive the attribute. 793 * @param resultLength The maximum size of result. 794 * @param status A pointer to a UErrorCode to receive any errors 795 * @return The total buffer size needed; if greater than resultLength, 796 * the output was truncated. 797 * @stable ICU 2.0 798 */ 799 U_STABLE int32_t U_EXPORT2 800 ucol_getDisplayName( const char *objLoc, 801 const char *dispLoc, 802 UChar *result, 803 int32_t resultLength, 804 UErrorCode *status); 805 806 /** 807 * Get a locale for which collation rules are available. 808 * A UCollator in a locale returned by this function will perform the correct 809 * collation for the locale. 810 * @param localeIndex The index of the desired locale. 811 * @return A locale for which collation rules are available, or 0 if none. 812 * @see ucol_countAvailable 813 * @stable ICU 2.0 814 */ 815 U_STABLE const char* U_EXPORT2 816 ucol_getAvailable(int32_t localeIndex); 817 818 /** 819 * Determine how many locales have collation rules available. 820 * This function is most useful as determining the loop ending condition for 821 * calls to {@link #ucol_getAvailable }. 822 * @return The number of locales for which collation rules are available. 823 * @see ucol_getAvailable 824 * @stable ICU 2.0 825 */ 826 U_STABLE int32_t U_EXPORT2 827 ucol_countAvailable(void); 828 829 #if !UCONFIG_NO_SERVICE 830 /** 831 * Create a string enumerator of all locales for which a valid 832 * collator may be opened. 833 * @param status input-output error code 834 * @return a string enumeration over locale strings. The caller is 835 * responsible for closing the result. 836 * @stable ICU 3.0 837 */ 838 U_STABLE UEnumeration* U_EXPORT2 839 ucol_openAvailableLocales(UErrorCode *status); 840 #endif 841 842 /** 843 * Create a string enumerator of all possible keywords that are relevant to 844 * collation. At this point, the only recognized keyword for this 845 * service is "collation". 846 * @param status input-output error code 847 * @return a string enumeration over locale strings. The caller is 848 * responsible for closing the result. 849 * @stable ICU 3.0 850 */ 851 U_STABLE UEnumeration* U_EXPORT2 852 ucol_getKeywords(UErrorCode *status); 853 854 /** 855 * Given a keyword, create a string enumeration of all values 856 * for that keyword that are currently in use. 857 * @param keyword a particular keyword as enumerated by 858 * ucol_getKeywords. If any other keyword is passed in, *status is set 859 * to U_ILLEGAL_ARGUMENT_ERROR. 860 * @param status input-output error code 861 * @return a string enumeration over collation keyword values, or NULL 862 * upon error. The caller is responsible for closing the result. 863 * @stable ICU 3.0 864 */ 865 U_STABLE UEnumeration* U_EXPORT2 866 ucol_getKeywordValues(const char *keyword, UErrorCode *status); 867 868 /** 869 * Given a key and a locale, returns an array of string values in a preferred 870 * order that would make a difference. These are all and only those values where 871 * the open (creation) of the service with the locale formed from the input locale 872 * plus input keyword and that value has different behavior than creation with the 873 * input locale alone. 874 * @param key one of the keys supported by this service. For now, only 875 * "collation" is supported. 876 * @param locale the locale 877 * @param commonlyUsed if set to true it will return only commonly used values 878 * with the given locale in preferred order. Otherwise, 879 * it will return all the available values for the locale. 880 * @param status error status 881 * @return a string enumeration over keyword values for the given key and the locale. 882 * @stable ICU 4.2 883 */ 884 U_STABLE UEnumeration* U_EXPORT2 885 ucol_getKeywordValuesForLocale(const char* key, 886 const char* locale, 887 UBool commonlyUsed, 888 UErrorCode* status); 889 890 /** 891 * Return the functionally equivalent locale for the specified 892 * input locale, with respect to given keyword, for the 893 * collation service. If two different input locale + keyword 894 * combinations produce the same result locale, then collators 895 * instantiated for these two different input locales will behave 896 * equivalently. The converse is not always true; two collators 897 * may in fact be equivalent, but return different results, due to 898 * internal details. The return result has no other meaning than 899 * that stated above, and implies nothing as to the relationship 900 * between the two locales. This is intended for use by 901 * applications who wish to cache collators, or otherwise reuse 902 * collators when possible. The functional equivalent may change 903 * over time. For more information, please see the <a 904 * href="http://userguide.icu-project.org/locale#TOC-Locales-and-Services"> 905 * Locales and Services</a> section of the ICU User Guide. 906 * @param result fillin for the functionally equivalent result locale 907 * @param resultCapacity capacity of the fillin buffer 908 * @param keyword a particular keyword as enumerated by 909 * ucol_getKeywords. 910 * @param locale the specified input locale 911 * @param isAvailable if non-NULL, pointer to a fillin parameter that 912 * on return indicates whether the specified input locale was 'available' 913 * to the collation service. A locale is defined as 'available' if it 914 * physically exists within the collation locale data. 915 * @param status pointer to input-output error code 916 * @return the actual buffer size needed for the locale. If greater 917 * than resultCapacity, the returned full name will be truncated and 918 * an error code will be returned. 919 * @stable ICU 3.0 920 */ 921 U_STABLE int32_t U_EXPORT2 922 ucol_getFunctionalEquivalent(char* result, int32_t resultCapacity, 923 const char* keyword, const char* locale, 924 UBool* isAvailable, UErrorCode* status); 925 926 /** 927 * Get the collation tailoring rules from a UCollator. 928 * The rules will follow the rule syntax. 929 * @param coll The UCollator to query. 930 * @param length 931 * @return The collation tailoring rules. 932 * @stable ICU 2.0 933 */ 934 U_STABLE const UChar* U_EXPORT2 935 ucol_getRules( const UCollator *coll, 936 int32_t *length); 937 938 #ifndef U_HIDE_DEPRECATED_API 939 /** Get the short definition string for a collator. This API harvests the collator's 940 * locale and the attribute set and produces a string that can be used for opening 941 * a collator with the same attributes using the ucol_openFromShortString API. 942 * This string will be normalized. 943 * The structure and the syntax of the string is defined in the "Naming collators" 944 * section of the users guide: 945 * http://userguide.icu-project.org/collation/concepts#TOC-Collator-naming-scheme 946 * This API supports preflighting. 947 * @param coll a collator 948 * @param locale a locale that will appear as a collators locale in the resulting 949 * short string definition. If NULL, the locale will be harvested 950 * from the collator. 951 * @param buffer space to hold the resulting string 952 * @param capacity capacity of the buffer 953 * @param status for returning errors. All the preflighting errors are featured 954 * @return length of the resulting string 955 * @see ucol_openFromShortString 956 * @see ucol_normalizeShortDefinitionString 957 * @deprecated ICU 54 958 */ 959 U_DEPRECATED int32_t U_EXPORT2 960 ucol_getShortDefinitionString(const UCollator *coll, 961 const char *locale, 962 char *buffer, 963 int32_t capacity, 964 UErrorCode *status); 965 966 /** Verifies and normalizes short definition string. 967 * Normalized short definition string has all the option sorted by the argument name, 968 * so that equivalent definition strings are the same. 969 * This API supports preflighting. 970 * @param source definition string 971 * @param destination space to hold the resulting string 972 * @param capacity capacity of the buffer 973 * @param parseError if not NULL, structure that will get filled with error's pre 974 * and post context in case of error. 975 * @param status Error code. This API will return an error if an invalid attribute 976 * or attribute/value combination is specified. All the preflighting 977 * errors are also featured 978 * @return length of the resulting normalized string. 979 * 980 * @see ucol_openFromShortString 981 * @see ucol_getShortDefinitionString 982 * 983 * @deprecated ICU 54 984 */ 985 986 U_DEPRECATED int32_t U_EXPORT2 987 ucol_normalizeShortDefinitionString(const char *source, 988 char *destination, 989 int32_t capacity, 990 UParseError *parseError, 991 UErrorCode *status); 992 #endif /* U_HIDE_DEPRECATED_API */ 993 994 995 /** 996 * Get a sort key for a string from a UCollator. 997 * Sort keys may be compared using <TT>strcmp</TT>. 998 * 999 * Note that sort keys are often less efficient than simply doing comparison. 1000 * For more details, see the ICU User Guide. 1001 * 1002 * Like ICU functions that write to an output buffer, the buffer contents 1003 * is undefined if the buffer capacity (resultLength parameter) is too small. 1004 * Unlike ICU functions that write a string to an output buffer, 1005 * the terminating zero byte is counted in the sort key length. 1006 * @param coll The UCollator containing the collation rules. 1007 * @param source The string to transform. 1008 * @param sourceLength The length of source, or -1 if null-terminated. 1009 * @param result A pointer to a buffer to receive the attribute. 1010 * @param resultLength The maximum size of result. 1011 * @return The size needed to fully store the sort key. 1012 * If there was an internal error generating the sort key, 1013 * a zero value is returned. 1014 * @see ucol_keyHashCode 1015 * @stable ICU 2.0 1016 */ 1017 U_STABLE int32_t U_EXPORT2 1018 ucol_getSortKey(const UCollator *coll, 1019 const UChar *source, 1020 int32_t sourceLength, 1021 uint8_t *result, 1022 int32_t resultLength); 1023 1024 1025 /** Gets the next count bytes of a sort key. Caller needs 1026 * to preserve state array between calls and to provide 1027 * the same type of UCharIterator set with the same string. 1028 * The destination buffer provided must be big enough to store 1029 * the number of requested bytes. 1030 * 1031 * The generated sort key may or may not be compatible with 1032 * sort keys generated using ucol_getSortKey(). 1033 * @param coll The UCollator containing the collation rules. 1034 * @param iter UCharIterator containing the string we need 1035 * the sort key to be calculated for. 1036 * @param state Opaque state of sortkey iteration. 1037 * @param dest Buffer to hold the resulting sortkey part 1038 * @param count number of sort key bytes required. 1039 * @param status error code indicator. 1040 * @return the actual number of bytes of a sortkey. It can be 1041 * smaller than count if we have reached the end of 1042 * the sort key. 1043 * @stable ICU 2.6 1044 */ 1045 U_STABLE int32_t U_EXPORT2 1046 ucol_nextSortKeyPart(const UCollator *coll, 1047 UCharIterator *iter, 1048 uint32_t state[2], 1049 uint8_t *dest, int32_t count, 1050 UErrorCode *status); 1051 1052 /** enum that is taken by ucol_getBound API 1053 * See below for explanation 1054 * do not change the values assigned to the 1055 * members of this enum. Underlying code 1056 * depends on them having these numbers 1057 * @stable ICU 2.0 1058 */ 1059 typedef enum { 1060 /** lower bound */ 1061 UCOL_BOUND_LOWER = 0, 1062 /** upper bound that will match strings of exact size */ 1063 UCOL_BOUND_UPPER = 1, 1064 /** upper bound that will match all the strings that have the same initial substring as the given string */ 1065 UCOL_BOUND_UPPER_LONG = 2, 1066 #ifndef U_HIDE_DEPRECATED_API 1067 /** 1068 * One more than the highest normal UColBoundMode value. 1069 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 1070 */ 1071 UCOL_BOUND_VALUE_COUNT 1072 #endif /* U_HIDE_DEPRECATED_API */ 1073 } UColBoundMode; 1074 1075 /** 1076 * Produce a bound for a given sortkey and a number of levels. 1077 * Return value is always the number of bytes needed, regardless of 1078 * whether the result buffer was big enough or even valid.<br> 1079 * Resulting bounds can be used to produce a range of strings that are 1080 * between upper and lower bounds. For example, if bounds are produced 1081 * for a sortkey of string "smith", strings between upper and lower 1082 * bounds with one level would include "Smith", "SMITH", "sMiTh".<br> 1083 * There are two upper bounds that can be produced. If UCOL_BOUND_UPPER 1084 * is produced, strings matched would be as above. However, if bound 1085 * produced using UCOL_BOUND_UPPER_LONG is used, the above example will 1086 * also match "Smithsonian" and similar.<br> 1087 * For more on usage, see example in cintltst/capitst.c in procedure 1088 * TestBounds. 1089 * Sort keys may be compared using <TT>strcmp</TT>. 1090 * @param source The source sortkey. 1091 * @param sourceLength The length of source, or -1 if null-terminated. 1092 * (If an unmodified sortkey is passed, it is always null 1093 * terminated). 1094 * @param boundType Type of bound required. It can be UCOL_BOUND_LOWER, which 1095 * produces a lower inclusive bound, UCOL_BOUND_UPPER, that 1096 * produces upper bound that matches strings of the same length 1097 * or UCOL_BOUND_UPPER_LONG that matches strings that have the 1098 * same starting substring as the source string. 1099 * @param noOfLevels Number of levels required in the resulting bound (for most 1100 * uses, the recommended value is 1). See users guide for 1101 * explanation on number of levels a sortkey can have. 1102 * @param result A pointer to a buffer to receive the resulting sortkey. 1103 * @param resultLength The maximum size of result. 1104 * @param status Used for returning error code if something went wrong. If the 1105 * number of levels requested is higher than the number of levels 1106 * in the source key, a warning (U_SORT_KEY_TOO_SHORT_WARNING) is 1107 * issued. 1108 * @return The size needed to fully store the bound. 1109 * @see ucol_keyHashCode 1110 * @stable ICU 2.1 1111 */ 1112 U_STABLE int32_t U_EXPORT2 1113 ucol_getBound(const uint8_t *source, 1114 int32_t sourceLength, 1115 UColBoundMode boundType, 1116 uint32_t noOfLevels, 1117 uint8_t *result, 1118 int32_t resultLength, 1119 UErrorCode *status); 1120 1121 /** 1122 * Gets the version information for a Collator. Version is currently 1123 * an opaque 32-bit number which depends, among other things, on major 1124 * versions of the collator tailoring and UCA. 1125 * @param coll The UCollator to query. 1126 * @param info the version # information, the result will be filled in 1127 * @stable ICU 2.0 1128 */ 1129 U_STABLE void U_EXPORT2 1130 ucol_getVersion(const UCollator* coll, UVersionInfo info); 1131 1132 /** 1133 * Gets the UCA version information for a Collator. Version is the 1134 * UCA version number (3.1.1, 4.0). 1135 * @param coll The UCollator to query. 1136 * @param info the version # information, the result will be filled in 1137 * @stable ICU 2.8 1138 */ 1139 U_STABLE void U_EXPORT2 1140 ucol_getUCAVersion(const UCollator* coll, UVersionInfo info); 1141 1142 /** 1143 * Merges two sort keys. The levels are merged with their corresponding counterparts 1144 * (primaries with primaries, secondaries with secondaries etc.). Between the values 1145 * from the same level a separator is inserted. 1146 * 1147 * This is useful, for example, for combining sort keys from first and last names 1148 * to sort such pairs. 1149 * See http://www.unicode.org/reports/tr10/#Merging_Sort_Keys 1150 * 1151 * The recommended way to achieve "merged" sorting is by 1152 * concatenating strings with U+FFFE between them. 1153 * The concatenation has the same sort order as the merged sort keys, 1154 * but merge(getSortKey(str1), getSortKey(str2)) may differ from getSortKey(str1 + '\\uFFFE' + str2). 1155 * Using strings with U+FFFE may yield shorter sort keys. 1156 * 1157 * For details about Sort Key Features see 1158 * http://userguide.icu-project.org/collation/api#TOC-Sort-Key-Features 1159 * 1160 * It is possible to merge multiple sort keys by consecutively merging 1161 * another one with the intermediate result. 1162 * 1163 * The length of the merge result is the sum of the lengths of the input sort keys. 1164 * 1165 * Example (uncompressed): 1166 * <pre>191B1D 01 050505 01 910505 00 1167 * 1F2123 01 050505 01 910505 00</pre> 1168 * will be merged as 1169 * <pre>191B1D 02 1F2123 01 050505 02 050505 01 910505 02 910505 00</pre> 1170 * 1171 * If the destination buffer is not big enough, then its contents are undefined. 1172 * If any of source lengths are zero or any of the source pointers are NULL/undefined, 1173 * the result is of size zero. 1174 * 1175 * @param src1 the first sort key 1176 * @param src1Length the length of the first sort key, including the zero byte at the end; 1177 * can be -1 if the function is to find the length 1178 * @param src2 the second sort key 1179 * @param src2Length the length of the second sort key, including the zero byte at the end; 1180 * can be -1 if the function is to find the length 1181 * @param dest the buffer where the merged sort key is written, 1182 * can be NULL if destCapacity==0 1183 * @param destCapacity the number of bytes in the dest buffer 1184 * @return the length of the merged sort key, src1Length+src2Length; 1185 * can be larger than destCapacity, or 0 if an error occurs (only for illegal arguments), 1186 * in which cases the contents of dest is undefined 1187 * @stable ICU 2.0 1188 */ 1189 U_STABLE int32_t U_EXPORT2 1190 ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, 1191 const uint8_t *src2, int32_t src2Length, 1192 uint8_t *dest, int32_t destCapacity); 1193 1194 /** 1195 * Universal attribute setter 1196 * @param coll collator which attributes are to be changed 1197 * @param attr attribute type 1198 * @param value attribute value 1199 * @param status to indicate whether the operation went on smoothly or there were errors 1200 * @see UColAttribute 1201 * @see UColAttributeValue 1202 * @see ucol_getAttribute 1203 * @stable ICU 2.0 1204 */ 1205 U_STABLE void U_EXPORT2 1206 ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status); 1207 1208 /** 1209 * Universal attribute getter 1210 * @param coll collator which attributes are to be changed 1211 * @param attr attribute type 1212 * @return attribute value 1213 * @param status to indicate whether the operation went on smoothly or there were errors 1214 * @see UColAttribute 1215 * @see UColAttributeValue 1216 * @see ucol_setAttribute 1217 * @stable ICU 2.0 1218 */ 1219 U_STABLE UColAttributeValue U_EXPORT2 1220 ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status); 1221 1222 /** 1223 * Sets the variable top to the top of the specified reordering group. 1224 * The variable top determines the highest-sorting character 1225 * which is affected by UCOL_ALTERNATE_HANDLING. 1226 * If that attribute is set to UCOL_NON_IGNORABLE, then the variable top has no effect. 1227 * @param coll the collator 1228 * @param group one of UCOL_REORDER_CODE_SPACE, UCOL_REORDER_CODE_PUNCTUATION, 1229 * UCOL_REORDER_CODE_SYMBOL, UCOL_REORDER_CODE_CURRENCY; 1230 * or UCOL_REORDER_CODE_DEFAULT to restore the default max variable group 1231 * @param pErrorCode Standard ICU error code. Its input value must 1232 * pass the U_SUCCESS() test, or else the function returns 1233 * immediately. Check for U_FAILURE() on output or use with 1234 * function chaining. (See User Guide for details.) 1235 * @see ucol_getMaxVariable 1236 * @stable ICU 53 1237 */ 1238 U_STABLE void U_EXPORT2 1239 ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode); 1240 1241 /** 1242 * Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING. 1243 * @param coll the collator 1244 * @return the maximum variable reordering group. 1245 * @see ucol_setMaxVariable 1246 * @stable ICU 53 1247 */ 1248 U_STABLE UColReorderCode U_EXPORT2 1249 ucol_getMaxVariable(const UCollator *coll); 1250 1251 #ifndef U_HIDE_DEPRECATED_API 1252 /** 1253 * Sets the variable top to the primary weight of the specified string. 1254 * 1255 * Beginning with ICU 53, the variable top is pinned to 1256 * the top of one of the supported reordering groups, 1257 * and it must not be beyond the last of those groups. 1258 * See ucol_setMaxVariable(). 1259 * @param coll the collator 1260 * @param varTop one or more (if contraction) UChars to which the variable top should be set 1261 * @param len length of variable top string. If -1 it is considered to be zero terminated. 1262 * @param status error code. If error code is set, the return value is undefined. 1263 * Errors set by this function are:<br> 1264 * U_CE_NOT_FOUND_ERROR if more than one character was passed and there is no such contraction<br> 1265 * U_ILLEGAL_ARGUMENT_ERROR if the variable top is beyond 1266 * the last reordering group supported by ucol_setMaxVariable() 1267 * @return variable top primary weight 1268 * @see ucol_getVariableTop 1269 * @see ucol_restoreVariableTop 1270 * @deprecated ICU 53 Call ucol_setMaxVariable() instead. 1271 */ 1272 U_DEPRECATED uint32_t U_EXPORT2 1273 ucol_setVariableTop(UCollator *coll, 1274 const UChar *varTop, int32_t len, 1275 UErrorCode *status); 1276 #endif /* U_HIDE_DEPRECATED_API */ 1277 1278 /** 1279 * Gets the variable top value of a Collator. 1280 * @param coll collator which variable top needs to be retrieved 1281 * @param status error code (not changed by function). If error code is set, 1282 * the return value is undefined. 1283 * @return the variable top primary weight 1284 * @see ucol_getMaxVariable 1285 * @see ucol_setVariableTop 1286 * @see ucol_restoreVariableTop 1287 * @stable ICU 2.0 1288 */ 1289 U_STABLE uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status); 1290 1291 #ifndef U_HIDE_DEPRECATED_API 1292 /** 1293 * Sets the variable top to the specified primary weight. 1294 * 1295 * Beginning with ICU 53, the variable top is pinned to 1296 * the top of one of the supported reordering groups, 1297 * and it must not be beyond the last of those groups. 1298 * See ucol_setMaxVariable(). 1299 * @param coll collator to be set 1300 * @param varTop primary weight, as returned by ucol_setVariableTop or ucol_getVariableTop 1301 * @param status error code 1302 * @see ucol_getVariableTop 1303 * @see ucol_setVariableTop 1304 * @deprecated ICU 53 Call ucol_setMaxVariable() instead. 1305 */ 1306 U_DEPRECATED void U_EXPORT2 1307 ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status); 1308 #endif /* U_HIDE_DEPRECATED_API */ 1309 1310 /** 1311 * Thread safe cloning operation. The result is a clone of a given collator. 1312 * @param coll collator to be cloned 1313 * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br> 1314 * user allocated space for the new clone. 1315 * If NULL new memory will be allocated. 1316 * If buffer is not large enough, new memory will be allocated. 1317 * Clients can use the U_COL_SAFECLONE_BUFFERSIZE. 1318 * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br> 1319 * pointer to size of allocated space. 1320 * If *pBufferSize == 0, a sufficient size for use in cloning will 1321 * be returned ('pre-flighting') 1322 * If *pBufferSize is not enough for a stack-based safe clone, 1323 * new memory will be allocated. 1324 * @param status to indicate whether the operation went on smoothly or there were errors 1325 * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any 1326 * allocations were necessary. 1327 * @return pointer to the new clone 1328 * @see ucol_open 1329 * @see ucol_openRules 1330 * @see ucol_close 1331 * @stable ICU 2.0 1332 */ 1333 U_STABLE UCollator* U_EXPORT2 1334 ucol_safeClone(const UCollator *coll, 1335 void *stackBuffer, 1336 int32_t *pBufferSize, 1337 UErrorCode *status); 1338 1339 #ifndef U_HIDE_DEPRECATED_API 1340 1341 /** default memory size for the new clone. 1342 * @deprecated ICU 52. Do not rely on ucol_safeClone() cloning into any provided buffer. 1343 */ 1344 #define U_COL_SAFECLONE_BUFFERSIZE 1 1345 1346 #endif /* U_HIDE_DEPRECATED_API */ 1347 1348 /** 1349 * Returns current rules. Delta defines whether full rules are returned or just the tailoring. 1350 * Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough 1351 * to store rules, will store up to available space. 1352 * 1353 * ucol_getRules() should normally be used instead. 1354 * See http://userguide.icu-project.org/collation/customization#TOC-Building-on-Existing-Locales 1355 * @param coll collator to get the rules from 1356 * @param delta one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES. 1357 * @param buffer buffer to store the result in. If NULL, you'll get no rules. 1358 * @param bufferLen length of buffer to store rules in. If less than needed you'll get only the part that fits in. 1359 * @return current rules 1360 * @stable ICU 2.0 1361 * @see UCOL_FULL_RULES 1362 */ 1363 U_STABLE int32_t U_EXPORT2 1364 ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen); 1365 1366 #ifndef U_HIDE_DEPRECATED_API 1367 /** 1368 * gets the locale name of the collator. If the collator 1369 * is instantiated from the rules, then this function returns 1370 * NULL. 1371 * @param coll The UCollator for which the locale is needed 1372 * @param type You can choose between requested, valid and actual 1373 * locale. For description see the definition of 1374 * ULocDataLocaleType in uloc.h 1375 * @param status error code of the operation 1376 * @return real locale name from which the collation data comes. 1377 * If the collator was instantiated from rules, returns 1378 * NULL. 1379 * @deprecated ICU 2.8 Use ucol_getLocaleByType instead 1380 */ 1381 U_DEPRECATED const char * U_EXPORT2 1382 ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); 1383 #endif /* U_HIDE_DEPRECATED_API */ 1384 1385 /** 1386 * gets the locale name of the collator. If the collator 1387 * is instantiated from the rules, then this function returns 1388 * NULL. 1389 * @param coll The UCollator for which the locale is needed 1390 * @param type You can choose between requested, valid and actual 1391 * locale. For description see the definition of 1392 * ULocDataLocaleType in uloc.h 1393 * @param status error code of the operation 1394 * @return real locale name from which the collation data comes. 1395 * If the collator was instantiated from rules, returns 1396 * NULL. 1397 * @stable ICU 2.8 1398 */ 1399 U_STABLE const char * U_EXPORT2 1400 ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status); 1401 1402 /** 1403 * Get a Unicode set that contains all the characters and sequences tailored in 1404 * this collator. The result must be disposed of by using uset_close. 1405 * @param coll The UCollator for which we want to get tailored chars 1406 * @param status error code of the operation 1407 * @return a pointer to newly created USet. Must be be disposed by using uset_close 1408 * @see ucol_openRules 1409 * @see uset_close 1410 * @stable ICU 2.4 1411 */ 1412 U_STABLE USet * U_EXPORT2 1413 ucol_getTailoredSet(const UCollator *coll, UErrorCode *status); 1414 1415 #ifndef U_HIDE_INTERNAL_API 1416 /** Calculates the set of unsafe code points, given a collator. 1417 * A character is unsafe if you could append any character and cause the ordering to alter significantly. 1418 * Collation sorts in normalized order, so anything that rearranges in normalization can cause this. 1419 * Thus if you have a character like a_umlaut, and you add a lower_dot to it, 1420 * then it normalizes to a_lower_dot + umlaut, and sorts differently. 1421 * @param coll Collator 1422 * @param unsafe a fill-in set to receive the unsafe points 1423 * @param status for catching errors 1424 * @return number of elements in the set 1425 * @internal ICU 3.0 1426 */ 1427 U_INTERNAL int32_t U_EXPORT2 1428 ucol_getUnsafeSet( const UCollator *coll, 1429 USet *unsafe, 1430 UErrorCode *status); 1431 1432 /** Touches all resources needed for instantiating a collator from a short string definition, 1433 * thus filling up the cache. 1434 * @param definition A short string containing a locale and a set of attributes. 1435 * Attributes not explicitly mentioned are left at the default 1436 * state for a locale. 1437 * @param parseError if not NULL, structure that will get filled with error's pre 1438 * and post context in case of error. 1439 * @param forceDefaults if FALSE, the settings that are the same as the collator 1440 * default settings will not be applied (for example, setting 1441 * French secondary on a French collator would not be executed). 1442 * If TRUE, all the settings will be applied regardless of the 1443 * collator default value. If the definition 1444 * strings are to be cached, should be set to FALSE. 1445 * @param status Error code. Apart from regular error conditions connected to 1446 * instantiating collators (like out of memory or similar), this 1447 * API will return an error if an invalid attribute or attribute/value 1448 * combination is specified. 1449 * @see ucol_openFromShortString 1450 * @internal ICU 3.2.1 1451 */ 1452 U_INTERNAL void U_EXPORT2 1453 ucol_prepareShortStringOpen( const char *definition, 1454 UBool forceDefaults, 1455 UParseError *parseError, 1456 UErrorCode *status); 1457 #endif /* U_HIDE_INTERNAL_API */ 1458 1459 /** Creates a binary image of a collator. This binary image can be stored and 1460 * later used to instantiate a collator using ucol_openBinary. 1461 * This API supports preflighting. 1462 * @param coll Collator 1463 * @param buffer a fill-in buffer to receive the binary image 1464 * @param capacity capacity of the destination buffer 1465 * @param status for catching errors 1466 * @return size of the image 1467 * @see ucol_openBinary 1468 * @stable ICU 3.2 1469 */ 1470 U_STABLE int32_t U_EXPORT2 1471 ucol_cloneBinary(const UCollator *coll, 1472 uint8_t *buffer, int32_t capacity, 1473 UErrorCode *status); 1474 1475 /** Opens a collator from a collator binary image created using 1476 * ucol_cloneBinary. Binary image used in instantiation of the 1477 * collator remains owned by the user and should stay around for 1478 * the lifetime of the collator. The API also takes a base collator 1479 * which must be the root collator. 1480 * @param bin binary image owned by the user and required through the 1481 * lifetime of the collator 1482 * @param length size of the image. If negative, the API will try to 1483 * figure out the length of the image 1484 * @param base Base collator, for lookup of untailored characters. 1485 * Must be the root collator, must not be NULL. 1486 * The base is required to be present through the lifetime of the collator. 1487 * @param status for catching errors 1488 * @return newly created collator 1489 * @see ucol_cloneBinary 1490 * @stable ICU 3.2 1491 */ 1492 U_STABLE UCollator* U_EXPORT2 1493 ucol_openBinary(const uint8_t *bin, int32_t length, 1494 const UCollator *base, 1495 UErrorCode *status); 1496 1497 1498 #endif /* #if !UCONFIG_NO_COLLATION */ 1499 1500 #endif 1501