1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2009-2015, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: unorm2.h 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2009dec15 16 * created by: Markus W. Scherer 17 */ 18 19 #ifndef __UNORM2_H__ 20 #define __UNORM2_H__ 21 22 /** 23 * \file 24 * \brief C API: New API for Unicode Normalization. 25 * 26 * Unicode normalization functionality for standard Unicode normalization or 27 * for using custom mapping tables. 28 * All instances of UNormalizer2 are unmodifiable/immutable. 29 * Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller. 30 * For more details see the Normalizer2 C++ class. 31 */ 32 33 #include "unicode/utypes.h" 34 #include "unicode/localpointer.h" 35 #include "unicode/stringoptions.h" 36 #include "unicode/uset.h" 37 38 /** 39 * Constants for normalization modes. 40 * For details about standard Unicode normalization forms 41 * and about the algorithms which are also used with custom mapping tables 42 * see http://www.unicode.org/unicode/reports/tr15/ 43 * @stable ICU 4.4 44 */ 45 typedef enum { 46 /** 47 * Decomposition followed by composition. 48 * Same as standard NFC when using an "nfc" instance. 49 * Same as standard NFKC when using an "nfkc" instance. 50 * For details about standard Unicode normalization forms 51 * see http://www.unicode.org/unicode/reports/tr15/ 52 * @stable ICU 4.4 53 */ 54 UNORM2_COMPOSE, 55 /** 56 * Map, and reorder canonically. 57 * Same as standard NFD when using an "nfc" instance. 58 * Same as standard NFKD when using an "nfkc" instance. 59 * For details about standard Unicode normalization forms 60 * see http://www.unicode.org/unicode/reports/tr15/ 61 * @stable ICU 4.4 62 */ 63 UNORM2_DECOMPOSE, 64 /** 65 * "Fast C or D" form. 66 * If a string is in this form, then further decomposition <i>without reordering</i> 67 * would yield the same form as DECOMPOSE. 68 * Text in "Fast C or D" form can be processed efficiently with data tables 69 * that are "canonically closed", that is, that provide equivalent data for 70 * equivalent text, without having to be fully normalized. 71 * Not a standard Unicode normalization form. 72 * Not a unique form: Different FCD strings can be canonically equivalent. 73 * For details see http://www.unicode.org/notes/tn5/#FCD 74 * @stable ICU 4.4 75 */ 76 UNORM2_FCD, 77 /** 78 * Compose only contiguously. 79 * Also known as "FCC" or "Fast C Contiguous". 80 * The result will often but not always be in NFC. 81 * The result will conform to FCD which is useful for processing. 82 * Not a standard Unicode normalization form. 83 * For details see http://www.unicode.org/notes/tn5/#FCC 84 * @stable ICU 4.4 85 */ 86 UNORM2_COMPOSE_CONTIGUOUS 87 } UNormalization2Mode; 88 89 /** 90 * Result values for normalization quick check functions. 91 * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms 92 * @stable ICU 2.0 93 */ 94 typedef enum UNormalizationCheckResult { 95 /** 96 * The input string is not in the normalization form. 97 * @stable ICU 2.0 98 */ 99 UNORM_NO, 100 /** 101 * The input string is in the normalization form. 102 * @stable ICU 2.0 103 */ 104 UNORM_YES, 105 /** 106 * The input string may or may not be in the normalization form. 107 * This value is only returned for composition forms like NFC and FCC, 108 * when a backward-combining character is found for which the surrounding text 109 * would have to be analyzed further. 110 * @stable ICU 2.0 111 */ 112 UNORM_MAYBE 113 } UNormalizationCheckResult; 114 115 /** 116 * Opaque C service object type for the new normalization API. 117 * @stable ICU 4.4 118 */ 119 struct UNormalizer2; 120 typedef struct UNormalizer2 UNormalizer2; /**< C typedef for struct UNormalizer2. @stable ICU 4.4 */ 121 122 #if !UCONFIG_NO_NORMALIZATION 123 124 /** 125 * Returns a UNormalizer2 instance for Unicode NFC normalization. 126 * Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode). 127 * Returns an unmodifiable singleton instance. Do not delete it. 128 * @param pErrorCode Standard ICU error code. Its input value must 129 * pass the U_SUCCESS() test, or else the function returns 130 * immediately. Check for U_FAILURE() on output or use with 131 * function chaining. (See User Guide for details.) 132 * @return the requested Normalizer2, if successful 133 * @stable ICU 49 134 */ 135 U_STABLE const UNormalizer2 * U_EXPORT2 136 unorm2_getNFCInstance(UErrorCode *pErrorCode); 137 138 /** 139 * Returns a UNormalizer2 instance for Unicode NFD normalization. 140 * Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode). 141 * Returns an unmodifiable singleton instance. Do not delete it. 142 * @param pErrorCode Standard ICU error code. Its input value must 143 * pass the U_SUCCESS() test, or else the function returns 144 * immediately. Check for U_FAILURE() on output or use with 145 * function chaining. (See User Guide for details.) 146 * @return the requested Normalizer2, if successful 147 * @stable ICU 49 148 */ 149 U_STABLE const UNormalizer2 * U_EXPORT2 150 unorm2_getNFDInstance(UErrorCode *pErrorCode); 151 152 /** 153 * Returns a UNormalizer2 instance for Unicode NFKC normalization. 154 * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode). 155 * Returns an unmodifiable singleton instance. Do not delete it. 156 * @param pErrorCode Standard ICU error code. Its input value must 157 * pass the U_SUCCESS() test, or else the function returns 158 * immediately. Check for U_FAILURE() on output or use with 159 * function chaining. (See User Guide for details.) 160 * @return the requested Normalizer2, if successful 161 * @stable ICU 49 162 */ 163 U_STABLE const UNormalizer2 * U_EXPORT2 164 unorm2_getNFKCInstance(UErrorCode *pErrorCode); 165 166 /** 167 * Returns a UNormalizer2 instance for Unicode NFKD normalization. 168 * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode). 169 * Returns an unmodifiable singleton instance. Do not delete it. 170 * @param pErrorCode Standard ICU error code. Its input value must 171 * pass the U_SUCCESS() test, or else the function returns 172 * immediately. Check for U_FAILURE() on output or use with 173 * function chaining. (See User Guide for details.) 174 * @return the requested Normalizer2, if successful 175 * @stable ICU 49 176 */ 177 U_STABLE const UNormalizer2 * U_EXPORT2 178 unorm2_getNFKDInstance(UErrorCode *pErrorCode); 179 180 /** 181 * Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization. 182 * Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode). 183 * Returns an unmodifiable singleton instance. Do not delete it. 184 * @param pErrorCode Standard ICU error code. Its input value must 185 * pass the U_SUCCESS() test, or else the function returns 186 * immediately. Check for U_FAILURE() on output or use with 187 * function chaining. (See User Guide for details.) 188 * @return the requested Normalizer2, if successful 189 * @stable ICU 49 190 */ 191 U_STABLE const UNormalizer2 * U_EXPORT2 192 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode); 193 194 /** 195 * Returns a UNormalizer2 instance which uses the specified data file 196 * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) 197 * and which composes or decomposes text according to the specified mode. 198 * Returns an unmodifiable singleton instance. Do not delete it. 199 * 200 * Use packageName=NULL for data files that are part of ICU's own data. 201 * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD. 202 * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD. 203 * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold. 204 * 205 * @param packageName NULL for ICU built-in data, otherwise application data package name 206 * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file 207 * @param mode normalization mode (compose or decompose etc.) 208 * @param pErrorCode Standard ICU error code. Its input value must 209 * pass the U_SUCCESS() test, or else the function returns 210 * immediately. Check for U_FAILURE() on output or use with 211 * function chaining. (See User Guide for details.) 212 * @return the requested UNormalizer2, if successful 213 * @stable ICU 4.4 214 */ 215 U_STABLE const UNormalizer2 * U_EXPORT2 216 unorm2_getInstance(const char *packageName, 217 const char *name, 218 UNormalization2Mode mode, 219 UErrorCode *pErrorCode); 220 221 /** 222 * Constructs a filtered normalizer wrapping any UNormalizer2 instance 223 * and a filter set. 224 * Both are aliased and must not be modified or deleted while this object 225 * is used. 226 * The filter set should be frozen; otherwise the performance will suffer greatly. 227 * @param norm2 wrapped UNormalizer2 instance 228 * @param filterSet USet which determines the characters to be normalized 229 * @param pErrorCode Standard ICU error code. Its input value must 230 * pass the U_SUCCESS() test, or else the function returns 231 * immediately. Check for U_FAILURE() on output or use with 232 * function chaining. (See User Guide for details.) 233 * @return the requested UNormalizer2, if successful 234 * @stable ICU 4.4 235 */ 236 U_STABLE UNormalizer2 * U_EXPORT2 237 unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode); 238 239 /** 240 * Closes a UNormalizer2 instance from unorm2_openFiltered(). 241 * Do not close instances from unorm2_getInstance()! 242 * @param norm2 UNormalizer2 instance to be closed 243 * @stable ICU 4.4 244 */ 245 U_STABLE void U_EXPORT2 246 unorm2_close(UNormalizer2 *norm2); 247 248 #if U_SHOW_CPLUSPLUS_API 249 250 U_NAMESPACE_BEGIN 251 252 /** 253 * \class LocalUNormalizer2Pointer 254 * "Smart pointer" class, closes a UNormalizer2 via unorm2_close(). 255 * For most methods see the LocalPointerBase base class. 256 * 257 * @see LocalPointerBase 258 * @see LocalPointer 259 * @stable ICU 4.4 260 */ 261 U_DEFINE_LOCAL_OPEN_POINTER(LocalUNormalizer2Pointer, UNormalizer2, unorm2_close); 262 263 U_NAMESPACE_END 264 265 #endif 266 267 /** 268 * Writes the normalized form of the source string to the destination string 269 * (replacing its contents) and returns the length of the destination string. 270 * The source and destination strings must be different buffers. 271 * @param norm2 UNormalizer2 instance 272 * @param src source string 273 * @param length length of the source string, or -1 if NUL-terminated 274 * @param dest destination string; its contents is replaced with normalized src 275 * @param capacity number of UChars that can be written to dest 276 * @param pErrorCode Standard ICU error code. Its input value must 277 * pass the U_SUCCESS() test, or else the function returns 278 * immediately. Check for U_FAILURE() on output or use with 279 * function chaining. (See User Guide for details.) 280 * @return dest 281 * @stable ICU 4.4 282 */ 283 U_STABLE int32_t U_EXPORT2 284 unorm2_normalize(const UNormalizer2 *norm2, 285 const UChar *src, int32_t length, 286 UChar *dest, int32_t capacity, 287 UErrorCode *pErrorCode); 288 /** 289 * Appends the normalized form of the second string to the first string 290 * (merging them at the boundary) and returns the length of the first string. 291 * The result is normalized if the first string was normalized. 292 * The first and second strings must be different buffers. 293 * @param norm2 UNormalizer2 instance 294 * @param first string, should be normalized 295 * @param firstLength length of the first string, or -1 if NUL-terminated 296 * @param firstCapacity number of UChars that can be written to first 297 * @param second string, will be normalized 298 * @param secondLength length of the source string, or -1 if NUL-terminated 299 * @param pErrorCode Standard ICU error code. Its input value must 300 * pass the U_SUCCESS() test, or else the function returns 301 * immediately. Check for U_FAILURE() on output or use with 302 * function chaining. (See User Guide for details.) 303 * @return first 304 * @stable ICU 4.4 305 */ 306 U_STABLE int32_t U_EXPORT2 307 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, 308 UChar *first, int32_t firstLength, int32_t firstCapacity, 309 const UChar *second, int32_t secondLength, 310 UErrorCode *pErrorCode); 311 /** 312 * Appends the second string to the first string 313 * (merging them at the boundary) and returns the length of the first string. 314 * The result is normalized if both the strings were normalized. 315 * The first and second strings must be different buffers. 316 * @param norm2 UNormalizer2 instance 317 * @param first string, should be normalized 318 * @param firstLength length of the first string, or -1 if NUL-terminated 319 * @param firstCapacity number of UChars that can be written to first 320 * @param second string, should be normalized 321 * @param secondLength length of the source string, or -1 if NUL-terminated 322 * @param pErrorCode Standard ICU error code. Its input value must 323 * pass the U_SUCCESS() test, or else the function returns 324 * immediately. Check for U_FAILURE() on output or use with 325 * function chaining. (See User Guide for details.) 326 * @return first 327 * @stable ICU 4.4 328 */ 329 U_STABLE int32_t U_EXPORT2 330 unorm2_append(const UNormalizer2 *norm2, 331 UChar *first, int32_t firstLength, int32_t firstCapacity, 332 const UChar *second, int32_t secondLength, 333 UErrorCode *pErrorCode); 334 335 /** 336 * Gets the decomposition mapping of c. 337 * Roughly equivalent to normalizing the String form of c 338 * on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function 339 * returns a negative value and does not write a string 340 * if c does not have a decomposition mapping in this instance's data. 341 * This function is independent of the mode of the UNormalizer2. 342 * @param norm2 UNormalizer2 instance 343 * @param c code point 344 * @param decomposition String buffer which will be set to c's 345 * decomposition mapping, if there is one. 346 * @param capacity number of UChars that can be written to decomposition 347 * @param pErrorCode Standard ICU error code. Its input value must 348 * pass the U_SUCCESS() test, or else the function returns 349 * immediately. Check for U_FAILURE() on output or use with 350 * function chaining. (See User Guide for details.) 351 * @return the non-negative length of c's decomposition, if there is one; otherwise a negative value 352 * @stable ICU 4.6 353 */ 354 U_STABLE int32_t U_EXPORT2 355 unorm2_getDecomposition(const UNormalizer2 *norm2, 356 UChar32 c, UChar *decomposition, int32_t capacity, 357 UErrorCode *pErrorCode); 358 359 /** 360 * Gets the raw decomposition mapping of c. 361 * 362 * This is similar to the unorm2_getDecomposition() function but returns the 363 * raw decomposition mapping as specified in UnicodeData.txt or 364 * (for custom data) in the mapping files processed by the gennorm2 tool. 365 * By contrast, unorm2_getDecomposition() returns the processed, 366 * recursively-decomposed version of this mapping. 367 * 368 * When used on a standard NFKC Normalizer2 instance, 369 * unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property. 370 * 371 * When used on a standard NFC Normalizer2 instance, 372 * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can); 373 * in this case, the result contains either one or two code points (=1..4 UChars). 374 * 375 * This function is independent of the mode of the UNormalizer2. 376 * @param norm2 UNormalizer2 instance 377 * @param c code point 378 * @param decomposition String buffer which will be set to c's 379 * raw decomposition mapping, if there is one. 380 * @param capacity number of UChars that can be written to decomposition 381 * @param pErrorCode Standard ICU error code. Its input value must 382 * pass the U_SUCCESS() test, or else the function returns 383 * immediately. Check for U_FAILURE() on output or use with 384 * function chaining. (See User Guide for details.) 385 * @return the non-negative length of c's raw decomposition, if there is one; otherwise a negative value 386 * @stable ICU 49 387 */ 388 U_STABLE int32_t U_EXPORT2 389 unorm2_getRawDecomposition(const UNormalizer2 *norm2, 390 UChar32 c, UChar *decomposition, int32_t capacity, 391 UErrorCode *pErrorCode); 392 393 /** 394 * Performs pairwise composition of a & b and returns the composite if there is one. 395 * 396 * Returns a composite code point c only if c has a two-way mapping to a+b. 397 * In standard Unicode normalization, this means that 398 * c has a canonical decomposition to a+b 399 * and c does not have the Full_Composition_Exclusion property. 400 * 401 * This function is independent of the mode of the UNormalizer2. 402 * @param norm2 UNormalizer2 instance 403 * @param a A (normalization starter) code point. 404 * @param b Another code point. 405 * @return The non-negative composite code point if there is one; otherwise a negative value. 406 * @stable ICU 49 407 */ 408 U_STABLE UChar32 U_EXPORT2 409 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b); 410 411 /** 412 * Gets the combining class of c. 413 * The default implementation returns 0 414 * but all standard implementations return the Unicode Canonical_Combining_Class value. 415 * @param norm2 UNormalizer2 instance 416 * @param c code point 417 * @return c's combining class 418 * @stable ICU 49 419 */ 420 U_STABLE uint8_t U_EXPORT2 421 unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c); 422 423 /** 424 * Tests if the string is normalized. 425 * Internally, in cases where the quickCheck() method would return "maybe" 426 * (which is only possible for the two COMPOSE modes) this method 427 * resolves to "yes" or "no" to provide a definitive result, 428 * at the cost of doing more work in those cases. 429 * @param norm2 UNormalizer2 instance 430 * @param s input string 431 * @param length length of the string, or -1 if NUL-terminated 432 * @param pErrorCode Standard ICU error code. Its input value must 433 * pass the U_SUCCESS() test, or else the function returns 434 * immediately. Check for U_FAILURE() on output or use with 435 * function chaining. (See User Guide for details.) 436 * @return TRUE if s is normalized 437 * @stable ICU 4.4 438 */ 439 U_STABLE UBool U_EXPORT2 440 unorm2_isNormalized(const UNormalizer2 *norm2, 441 const UChar *s, int32_t length, 442 UErrorCode *pErrorCode); 443 444 /** 445 * Tests if the string is normalized. 446 * For the two COMPOSE modes, the result could be "maybe" in cases that 447 * would take a little more work to resolve definitively. 448 * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster 449 * combination of quick check + normalization, to avoid 450 * re-checking the "yes" prefix. 451 * @param norm2 UNormalizer2 instance 452 * @param s input string 453 * @param length length of the string, or -1 if NUL-terminated 454 * @param pErrorCode Standard ICU error code. Its input value must 455 * pass the U_SUCCESS() test, or else the function returns 456 * immediately. Check for U_FAILURE() on output or use with 457 * function chaining. (See User Guide for details.) 458 * @return UNormalizationCheckResult 459 * @stable ICU 4.4 460 */ 461 U_STABLE UNormalizationCheckResult U_EXPORT2 462 unorm2_quickCheck(const UNormalizer2 *norm2, 463 const UChar *s, int32_t length, 464 UErrorCode *pErrorCode); 465 466 /** 467 * Returns the end of the normalized substring of the input string. 468 * In other words, with <code>end=spanQuickCheckYes(s, ec);</code> 469 * the substring <code>UnicodeString(s, 0, end)</code> 470 * will pass the quick check with a "yes" result. 471 * 472 * The returned end index is usually one or more characters before the 473 * "no" or "maybe" character: The end index is at a normalization boundary. 474 * (See the class documentation for more about normalization boundaries.) 475 * 476 * When the goal is a normalized string and most input strings are expected 477 * to be normalized already, then call this method, 478 * and if it returns a prefix shorter than the input string, 479 * copy that prefix and use normalizeSecondAndAppend() for the remainder. 480 * @param norm2 UNormalizer2 instance 481 * @param s input string 482 * @param length length of the string, or -1 if NUL-terminated 483 * @param pErrorCode Standard ICU error code. Its input value must 484 * pass the U_SUCCESS() test, or else the function returns 485 * immediately. Check for U_FAILURE() on output or use with 486 * function chaining. (See User Guide for details.) 487 * @return "yes" span end index 488 * @stable ICU 4.4 489 */ 490 U_STABLE int32_t U_EXPORT2 491 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, 492 const UChar *s, int32_t length, 493 UErrorCode *pErrorCode); 494 495 /** 496 * Tests if the character always has a normalization boundary before it, 497 * regardless of context. 498 * For details see the Normalizer2 base class documentation. 499 * @param norm2 UNormalizer2 instance 500 * @param c character to test 501 * @return TRUE if c has a normalization boundary before it 502 * @stable ICU 4.4 503 */ 504 U_STABLE UBool U_EXPORT2 505 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c); 506 507 /** 508 * Tests if the character always has a normalization boundary after it, 509 * regardless of context. 510 * For details see the Normalizer2 base class documentation. 511 * @param norm2 UNormalizer2 instance 512 * @param c character to test 513 * @return TRUE if c has a normalization boundary after it 514 * @stable ICU 4.4 515 */ 516 U_STABLE UBool U_EXPORT2 517 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c); 518 519 /** 520 * Tests if the character is normalization-inert. 521 * For details see the Normalizer2 base class documentation. 522 * @param norm2 UNormalizer2 instance 523 * @param c character to test 524 * @return TRUE if c is normalization-inert 525 * @stable ICU 4.4 526 */ 527 U_STABLE UBool U_EXPORT2 528 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c); 529 530 /** 531 * Compares two strings for canonical equivalence. 532 * Further options include case-insensitive comparison and 533 * code point order (as opposed to code unit order). 534 * 535 * Canonical equivalence between two strings is defined as their normalized 536 * forms (NFD or NFC) being identical. 537 * This function compares strings incrementally instead of normalizing 538 * (and optionally case-folding) both strings entirely, 539 * improving performance significantly. 540 * 541 * Bulk normalization is only necessary if the strings do not fulfill the FCD 542 * conditions. Only in this case, and only if the strings are relatively long, 543 * is memory allocated temporarily. 544 * For FCD strings and short non-FCD strings there is no memory allocation. 545 * 546 * Semantically, this is equivalent to 547 * strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2)))) 548 * where code point order and foldCase are all optional. 549 * 550 * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match 551 * the case folding must be performed first, then the normalization. 552 * 553 * @param s1 First source string. 554 * @param length1 Length of first source string, or -1 if NUL-terminated. 555 * 556 * @param s2 Second source string. 557 * @param length2 Length of second source string, or -1 if NUL-terminated. 558 * 559 * @param options A bit set of options: 560 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 561 * Case-sensitive comparison in code unit order, and the input strings 562 * are quick-checked for FCD. 563 * 564 * - UNORM_INPUT_IS_FCD 565 * Set if the caller knows that both s1 and s2 fulfill the FCD conditions. 566 * If not set, the function will quickCheck for FCD 567 * and normalize if necessary. 568 * 569 * - U_COMPARE_CODE_POINT_ORDER 570 * Set to choose code point order instead of code unit order 571 * (see u_strCompare for details). 572 * 573 * - U_COMPARE_IGNORE_CASE 574 * Set to compare strings case-insensitively using case folding, 575 * instead of case-sensitively. 576 * If set, then the following case folding options are used. 577 * 578 * - Options as used with case-insensitive comparisons, currently: 579 * 580 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 581 * (see u_strCaseCompare for details) 582 * 583 * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT 584 * 585 * @param pErrorCode ICU error code in/out parameter. 586 * Must fulfill U_SUCCESS before the function call. 587 * @return <0 or 0 or >0 as usual for string comparisons 588 * 589 * @see unorm_normalize 590 * @see UNORM_FCD 591 * @see u_strCompare 592 * @see u_strCaseCompare 593 * 594 * @stable ICU 2.2 595 */ 596 U_STABLE int32_t U_EXPORT2 597 unorm_compare(const UChar *s1, int32_t length1, 598 const UChar *s2, int32_t length2, 599 uint32_t options, 600 UErrorCode *pErrorCode); 601 602 #endif /* !UCONFIG_NO_NORMALIZATION */ 603 #endif /* __UNORM2_H__ */ 604