1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2009-2015, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: unorm2.h 11 * encoding: US-ASCII 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2009dec15 16 * created by: Markus W. Scherer 17 */ 18 19 #ifndef __UNORM2_H__ 20 #define __UNORM2_H__ 21 22 /** 23 * \file 24 * \brief C API: New API for Unicode Normalization. 25 * 26 * Unicode normalization functionality for standard Unicode normalization or 27 * for using custom mapping tables. 28 * All instances of UNormalizer2 are unmodifiable/immutable. 29 * Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller. 30 * For more details see the Normalizer2 C++ class. 31 */ 32 33 #include "unicode/utypes.h" 34 #include "unicode/localpointer.h" 35 #include "unicode/uset.h" 36 37 /** 38 * Constants for normalization modes. 39 * For details about standard Unicode normalization forms 40 * and about the algorithms which are also used with custom mapping tables 41 * see http://www.unicode.org/unicode/reports/tr15/ 42 * @stable ICU 4.4 43 */ 44 typedef enum { 45 /** 46 * Decomposition followed by composition. 47 * Same as standard NFC when using an "nfc" instance. 48 * Same as standard NFKC when using an "nfkc" instance. 49 * For details about standard Unicode normalization forms 50 * see http://www.unicode.org/unicode/reports/tr15/ 51 * @stable ICU 4.4 52 */ 53 UNORM2_COMPOSE, 54 /** 55 * Map, and reorder canonically. 56 * Same as standard NFD when using an "nfc" instance. 57 * Same as standard NFKD when using an "nfkc" instance. 58 * For details about standard Unicode normalization forms 59 * see http://www.unicode.org/unicode/reports/tr15/ 60 * @stable ICU 4.4 61 */ 62 UNORM2_DECOMPOSE, 63 /** 64 * "Fast C or D" form. 65 * If a string is in this form, then further decomposition <i>without reordering</i> 66 * would yield the same form as DECOMPOSE. 67 * Text in "Fast C or D" form can be processed efficiently with data tables 68 * that are "canonically closed", that is, that provide equivalent data for 69 * equivalent text, without having to be fully normalized. 70 * Not a standard Unicode normalization form. 71 * Not a unique form: Different FCD strings can be canonically equivalent. 72 * For details see http://www.unicode.org/notes/tn5/#FCD 73 * @stable ICU 4.4 74 */ 75 UNORM2_FCD, 76 /** 77 * Compose only contiguously. 78 * Also known as "FCC" or "Fast C Contiguous". 79 * The result will often but not always be in NFC. 80 * The result will conform to FCD which is useful for processing. 81 * Not a standard Unicode normalization form. 82 * For details see http://www.unicode.org/notes/tn5/#FCC 83 * @stable ICU 4.4 84 */ 85 UNORM2_COMPOSE_CONTIGUOUS 86 } UNormalization2Mode; 87 88 /** 89 * Result values for normalization quick check functions. 90 * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms 91 * @stable ICU 2.0 92 */ 93 typedef enum UNormalizationCheckResult { 94 /** 95 * The input string is not in the normalization form. 96 * @stable ICU 2.0 97 */ 98 UNORM_NO, 99 /** 100 * The input string is in the normalization form. 101 * @stable ICU 2.0 102 */ 103 UNORM_YES, 104 /** 105 * The input string may or may not be in the normalization form. 106 * This value is only returned for composition forms like NFC and FCC, 107 * when a backward-combining character is found for which the surrounding text 108 * would have to be analyzed further. 109 * @stable ICU 2.0 110 */ 111 UNORM_MAYBE 112 } UNormalizationCheckResult; 113 114 /** 115 * Opaque C service object type for the new normalization API. 116 * @stable ICU 4.4 117 */ 118 struct UNormalizer2; 119 typedef struct UNormalizer2 UNormalizer2; /**< C typedef for struct UNormalizer2. @stable ICU 4.4 */ 120 121 #if !UCONFIG_NO_NORMALIZATION 122 123 /** 124 * Returns a UNormalizer2 instance for Unicode NFC normalization. 125 * Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode). 126 * Returns an unmodifiable singleton instance. Do not delete it. 127 * @param pErrorCode Standard ICU error code. Its input value must 128 * pass the U_SUCCESS() test, or else the function returns 129 * immediately. Check for U_FAILURE() on output or use with 130 * function chaining. (See User Guide for details.) 131 * @return the requested Normalizer2, if successful 132 * @stable ICU 49 133 */ 134 U_STABLE const UNormalizer2 * U_EXPORT2 135 unorm2_getNFCInstance(UErrorCode *pErrorCode); 136 137 /** 138 * Returns a UNormalizer2 instance for Unicode NFD normalization. 139 * Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode). 140 * Returns an unmodifiable singleton instance. Do not delete it. 141 * @param pErrorCode Standard ICU error code. Its input value must 142 * pass the U_SUCCESS() test, or else the function returns 143 * immediately. Check for U_FAILURE() on output or use with 144 * function chaining. (See User Guide for details.) 145 * @return the requested Normalizer2, if successful 146 * @stable ICU 49 147 */ 148 U_STABLE const UNormalizer2 * U_EXPORT2 149 unorm2_getNFDInstance(UErrorCode *pErrorCode); 150 151 /** 152 * Returns a UNormalizer2 instance for Unicode NFKC normalization. 153 * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode). 154 * Returns an unmodifiable singleton instance. Do not delete it. 155 * @param pErrorCode Standard ICU error code. Its input value must 156 * pass the U_SUCCESS() test, or else the function returns 157 * immediately. Check for U_FAILURE() on output or use with 158 * function chaining. (See User Guide for details.) 159 * @return the requested Normalizer2, if successful 160 * @stable ICU 49 161 */ 162 U_STABLE const UNormalizer2 * U_EXPORT2 163 unorm2_getNFKCInstance(UErrorCode *pErrorCode); 164 165 /** 166 * Returns a UNormalizer2 instance for Unicode NFKD normalization. 167 * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode). 168 * Returns an unmodifiable singleton instance. Do not delete it. 169 * @param pErrorCode Standard ICU error code. Its input value must 170 * pass the U_SUCCESS() test, or else the function returns 171 * immediately. Check for U_FAILURE() on output or use with 172 * function chaining. (See User Guide for details.) 173 * @return the requested Normalizer2, if successful 174 * @stable ICU 49 175 */ 176 U_STABLE const UNormalizer2 * U_EXPORT2 177 unorm2_getNFKDInstance(UErrorCode *pErrorCode); 178 179 /** 180 * Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization. 181 * Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode). 182 * Returns an unmodifiable singleton instance. Do not delete it. 183 * @param pErrorCode Standard ICU error code. Its input value must 184 * pass the U_SUCCESS() test, or else the function returns 185 * immediately. Check for U_FAILURE() on output or use with 186 * function chaining. (See User Guide for details.) 187 * @return the requested Normalizer2, if successful 188 * @stable ICU 49 189 */ 190 U_STABLE const UNormalizer2 * U_EXPORT2 191 unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode); 192 193 /** 194 * Returns a UNormalizer2 instance which uses the specified data file 195 * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) 196 * and which composes or decomposes text according to the specified mode. 197 * Returns an unmodifiable singleton instance. Do not delete it. 198 * 199 * Use packageName=NULL for data files that are part of ICU's own data. 200 * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD. 201 * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD. 202 * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold. 203 * 204 * @param packageName NULL for ICU built-in data, otherwise application data package name 205 * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file 206 * @param mode normalization mode (compose or decompose etc.) 207 * @param pErrorCode Standard ICU error code. Its input value must 208 * pass the U_SUCCESS() test, or else the function returns 209 * immediately. Check for U_FAILURE() on output or use with 210 * function chaining. (See User Guide for details.) 211 * @return the requested UNormalizer2, if successful 212 * @stable ICU 4.4 213 */ 214 U_STABLE const UNormalizer2 * U_EXPORT2 215 unorm2_getInstance(const char *packageName, 216 const char *name, 217 UNormalization2Mode mode, 218 UErrorCode *pErrorCode); 219 220 /** 221 * Constructs a filtered normalizer wrapping any UNormalizer2 instance 222 * and a filter set. 223 * Both are aliased and must not be modified or deleted while this object 224 * is used. 225 * The filter set should be frozen; otherwise the performance will suffer greatly. 226 * @param norm2 wrapped UNormalizer2 instance 227 * @param filterSet USet which determines the characters to be normalized 228 * @param pErrorCode Standard ICU error code. Its input value must 229 * pass the U_SUCCESS() test, or else the function returns 230 * immediately. Check for U_FAILURE() on output or use with 231 * function chaining. (See User Guide for details.) 232 * @return the requested UNormalizer2, if successful 233 * @stable ICU 4.4 234 */ 235 U_STABLE UNormalizer2 * U_EXPORT2 236 unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode); 237 238 /** 239 * Closes a UNormalizer2 instance from unorm2_openFiltered(). 240 * Do not close instances from unorm2_getInstance()! 241 * @param norm2 UNormalizer2 instance to be closed 242 * @stable ICU 4.4 243 */ 244 U_STABLE void U_EXPORT2 245 unorm2_close(UNormalizer2 *norm2); 246 247 #if U_SHOW_CPLUSPLUS_API 248 249 U_NAMESPACE_BEGIN 250 251 /** 252 * \class LocalUNormalizer2Pointer 253 * "Smart pointer" class, closes a UNormalizer2 via unorm2_close(). 254 * For most methods see the LocalPointerBase base class. 255 * 256 * @see LocalPointerBase 257 * @see LocalPointer 258 * @stable ICU 4.4 259 */ 260 U_DEFINE_LOCAL_OPEN_POINTER(LocalUNormalizer2Pointer, UNormalizer2, unorm2_close); 261 262 U_NAMESPACE_END 263 264 #endif 265 266 /** 267 * Writes the normalized form of the source string to the destination string 268 * (replacing its contents) and returns the length of the destination string. 269 * The source and destination strings must be different buffers. 270 * @param norm2 UNormalizer2 instance 271 * @param src source string 272 * @param length length of the source string, or -1 if NUL-terminated 273 * @param dest destination string; its contents is replaced with normalized src 274 * @param capacity number of UChars that can be written to dest 275 * @param pErrorCode Standard ICU error code. Its input value must 276 * pass the U_SUCCESS() test, or else the function returns 277 * immediately. Check for U_FAILURE() on output or use with 278 * function chaining. (See User Guide for details.) 279 * @return dest 280 * @stable ICU 4.4 281 */ 282 U_STABLE int32_t U_EXPORT2 283 unorm2_normalize(const UNormalizer2 *norm2, 284 const UChar *src, int32_t length, 285 UChar *dest, int32_t capacity, 286 UErrorCode *pErrorCode); 287 /** 288 * Appends the normalized form of the second string to the first string 289 * (merging them at the boundary) and returns the length of the first string. 290 * The result is normalized if the first string was normalized. 291 * The first and second strings must be different buffers. 292 * @param norm2 UNormalizer2 instance 293 * @param first string, should be normalized 294 * @param firstLength length of the first string, or -1 if NUL-terminated 295 * @param firstCapacity number of UChars that can be written to first 296 * @param second string, will be normalized 297 * @param secondLength length of the source string, or -1 if NUL-terminated 298 * @param pErrorCode Standard ICU error code. Its input value must 299 * pass the U_SUCCESS() test, or else the function returns 300 * immediately. Check for U_FAILURE() on output or use with 301 * function chaining. (See User Guide for details.) 302 * @return first 303 * @stable ICU 4.4 304 */ 305 U_STABLE int32_t U_EXPORT2 306 unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, 307 UChar *first, int32_t firstLength, int32_t firstCapacity, 308 const UChar *second, int32_t secondLength, 309 UErrorCode *pErrorCode); 310 /** 311 * Appends the second string to the first string 312 * (merging them at the boundary) and returns the length of the first string. 313 * The result is normalized if both the strings were normalized. 314 * The first and second strings must be different buffers. 315 * @param norm2 UNormalizer2 instance 316 * @param first string, should be normalized 317 * @param firstLength length of the first string, or -1 if NUL-terminated 318 * @param firstCapacity number of UChars that can be written to first 319 * @param second string, should be normalized 320 * @param secondLength length of the source string, or -1 if NUL-terminated 321 * @param pErrorCode Standard ICU error code. Its input value must 322 * pass the U_SUCCESS() test, or else the function returns 323 * immediately. Check for U_FAILURE() on output or use with 324 * function chaining. (See User Guide for details.) 325 * @return first 326 * @stable ICU 4.4 327 */ 328 U_STABLE int32_t U_EXPORT2 329 unorm2_append(const UNormalizer2 *norm2, 330 UChar *first, int32_t firstLength, int32_t firstCapacity, 331 const UChar *second, int32_t secondLength, 332 UErrorCode *pErrorCode); 333 334 /** 335 * Gets the decomposition mapping of c. 336 * Roughly equivalent to normalizing the String form of c 337 * on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function 338 * returns a negative value and does not write a string 339 * if c does not have a decomposition mapping in this instance's data. 340 * This function is independent of the mode of the UNormalizer2. 341 * @param norm2 UNormalizer2 instance 342 * @param c code point 343 * @param decomposition String buffer which will be set to c's 344 * decomposition mapping, if there is one. 345 * @param capacity number of UChars that can be written to decomposition 346 * @param pErrorCode Standard ICU error code. Its input value must 347 * pass the U_SUCCESS() test, or else the function returns 348 * immediately. Check for U_FAILURE() on output or use with 349 * function chaining. (See User Guide for details.) 350 * @return the non-negative length of c's decomposition, if there is one; otherwise a negative value 351 * @stable ICU 4.6 352 */ 353 U_STABLE int32_t U_EXPORT2 354 unorm2_getDecomposition(const UNormalizer2 *norm2, 355 UChar32 c, UChar *decomposition, int32_t capacity, 356 UErrorCode *pErrorCode); 357 358 /** 359 * Gets the raw decomposition mapping of c. 360 * 361 * This is similar to the unorm2_getDecomposition() function but returns the 362 * raw decomposition mapping as specified in UnicodeData.txt or 363 * (for custom data) in the mapping files processed by the gennorm2 tool. 364 * By contrast, unorm2_getDecomposition() returns the processed, 365 * recursively-decomposed version of this mapping. 366 * 367 * When used on a standard NFKC Normalizer2 instance, 368 * unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property. 369 * 370 * When used on a standard NFC Normalizer2 instance, 371 * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can); 372 * in this case, the result contains either one or two code points (=1..4 UChars). 373 * 374 * This function is independent of the mode of the UNormalizer2. 375 * @param norm2 UNormalizer2 instance 376 * @param c code point 377 * @param decomposition String buffer which will be set to c's 378 * raw decomposition mapping, if there is one. 379 * @param capacity number of UChars that can be written to decomposition 380 * @param pErrorCode Standard ICU error code. Its input value must 381 * pass the U_SUCCESS() test, or else the function returns 382 * immediately. Check for U_FAILURE() on output or use with 383 * function chaining. (See User Guide for details.) 384 * @return the non-negative length of c's raw decomposition, if there is one; otherwise a negative value 385 * @stable ICU 49 386 */ 387 U_STABLE int32_t U_EXPORT2 388 unorm2_getRawDecomposition(const UNormalizer2 *norm2, 389 UChar32 c, UChar *decomposition, int32_t capacity, 390 UErrorCode *pErrorCode); 391 392 /** 393 * Performs pairwise composition of a & b and returns the composite if there is one. 394 * 395 * Returns a composite code point c only if c has a two-way mapping to a+b. 396 * In standard Unicode normalization, this means that 397 * c has a canonical decomposition to a+b 398 * and c does not have the Full_Composition_Exclusion property. 399 * 400 * This function is independent of the mode of the UNormalizer2. 401 * @param norm2 UNormalizer2 instance 402 * @param a A (normalization starter) code point. 403 * @param b Another code point. 404 * @return The non-negative composite code point if there is one; otherwise a negative value. 405 * @stable ICU 49 406 */ 407 U_STABLE UChar32 U_EXPORT2 408 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b); 409 410 /** 411 * Gets the combining class of c. 412 * The default implementation returns 0 413 * but all standard implementations return the Unicode Canonical_Combining_Class value. 414 * @param norm2 UNormalizer2 instance 415 * @param c code point 416 * @return c's combining class 417 * @stable ICU 49 418 */ 419 U_STABLE uint8_t U_EXPORT2 420 unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c); 421 422 /** 423 * Tests if the string is normalized. 424 * Internally, in cases where the quickCheck() method would return "maybe" 425 * (which is only possible for the two COMPOSE modes) this method 426 * resolves to "yes" or "no" to provide a definitive result, 427 * at the cost of doing more work in those cases. 428 * @param norm2 UNormalizer2 instance 429 * @param s input string 430 * @param length length of the string, or -1 if NUL-terminated 431 * @param pErrorCode Standard ICU error code. Its input value must 432 * pass the U_SUCCESS() test, or else the function returns 433 * immediately. Check for U_FAILURE() on output or use with 434 * function chaining. (See User Guide for details.) 435 * @return TRUE if s is normalized 436 * @stable ICU 4.4 437 */ 438 U_STABLE UBool U_EXPORT2 439 unorm2_isNormalized(const UNormalizer2 *norm2, 440 const UChar *s, int32_t length, 441 UErrorCode *pErrorCode); 442 443 /** 444 * Tests if the string is normalized. 445 * For the two COMPOSE modes, the result could be "maybe" in cases that 446 * would take a little more work to resolve definitively. 447 * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster 448 * combination of quick check + normalization, to avoid 449 * re-checking the "yes" prefix. 450 * @param norm2 UNormalizer2 instance 451 * @param s input string 452 * @param length length of the string, or -1 if NUL-terminated 453 * @param pErrorCode Standard ICU error code. Its input value must 454 * pass the U_SUCCESS() test, or else the function returns 455 * immediately. Check for U_FAILURE() on output or use with 456 * function chaining. (See User Guide for details.) 457 * @return UNormalizationCheckResult 458 * @stable ICU 4.4 459 */ 460 U_STABLE UNormalizationCheckResult U_EXPORT2 461 unorm2_quickCheck(const UNormalizer2 *norm2, 462 const UChar *s, int32_t length, 463 UErrorCode *pErrorCode); 464 465 /** 466 * Returns the end of the normalized substring of the input string. 467 * In other words, with <code>end=spanQuickCheckYes(s, ec);</code> 468 * the substring <code>UnicodeString(s, 0, end)</code> 469 * will pass the quick check with a "yes" result. 470 * 471 * The returned end index is usually one or more characters before the 472 * "no" or "maybe" character: The end index is at a normalization boundary. 473 * (See the class documentation for more about normalization boundaries.) 474 * 475 * When the goal is a normalized string and most input strings are expected 476 * to be normalized already, then call this method, 477 * and if it returns a prefix shorter than the input string, 478 * copy that prefix and use normalizeSecondAndAppend() for the remainder. 479 * @param norm2 UNormalizer2 instance 480 * @param s input string 481 * @param length length of the string, or -1 if NUL-terminated 482 * @param pErrorCode Standard ICU error code. Its input value must 483 * pass the U_SUCCESS() test, or else the function returns 484 * immediately. Check for U_FAILURE() on output or use with 485 * function chaining. (See User Guide for details.) 486 * @return "yes" span end index 487 * @stable ICU 4.4 488 */ 489 U_STABLE int32_t U_EXPORT2 490 unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, 491 const UChar *s, int32_t length, 492 UErrorCode *pErrorCode); 493 494 /** 495 * Tests if the character always has a normalization boundary before it, 496 * regardless of context. 497 * For details see the Normalizer2 base class documentation. 498 * @param norm2 UNormalizer2 instance 499 * @param c character to test 500 * @return TRUE if c has a normalization boundary before it 501 * @stable ICU 4.4 502 */ 503 U_STABLE UBool U_EXPORT2 504 unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c); 505 506 /** 507 * Tests if the character always has a normalization boundary after it, 508 * regardless of context. 509 * For details see the Normalizer2 base class documentation. 510 * @param norm2 UNormalizer2 instance 511 * @param c character to test 512 * @return TRUE if c has a normalization boundary after it 513 * @stable ICU 4.4 514 */ 515 U_STABLE UBool U_EXPORT2 516 unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c); 517 518 /** 519 * Tests if the character is normalization-inert. 520 * For details see the Normalizer2 base class documentation. 521 * @param norm2 UNormalizer2 instance 522 * @param c character to test 523 * @return TRUE if c is normalization-inert 524 * @stable ICU 4.4 525 */ 526 U_STABLE UBool U_EXPORT2 527 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c); 528 529 /** 530 * Option bit for unorm_compare: 531 * Both input strings are assumed to fulfill FCD conditions. 532 * @stable ICU 2.2 533 */ 534 #define UNORM_INPUT_IS_FCD 0x20000 535 536 /** 537 * Option bit for unorm_compare: 538 * Perform case-insensitive comparison. 539 * @stable ICU 2.2 540 */ 541 #define U_COMPARE_IGNORE_CASE 0x10000 542 543 #ifndef U_COMPARE_CODE_POINT_ORDER 544 /* see also unistr.h and ustring.h */ 545 /** 546 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: 547 * Compare strings in code point order instead of code unit order. 548 * @stable ICU 2.2 549 */ 550 #define U_COMPARE_CODE_POINT_ORDER 0x8000 551 #endif 552 553 /** 554 * Compares two strings for canonical equivalence. 555 * Further options include case-insensitive comparison and 556 * code point order (as opposed to code unit order). 557 * 558 * Canonical equivalence between two strings is defined as their normalized 559 * forms (NFD or NFC) being identical. 560 * This function compares strings incrementally instead of normalizing 561 * (and optionally case-folding) both strings entirely, 562 * improving performance significantly. 563 * 564 * Bulk normalization is only necessary if the strings do not fulfill the FCD 565 * conditions. Only in this case, and only if the strings are relatively long, 566 * is memory allocated temporarily. 567 * For FCD strings and short non-FCD strings there is no memory allocation. 568 * 569 * Semantically, this is equivalent to 570 * strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2)))) 571 * where code point order and foldCase are all optional. 572 * 573 * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match 574 * the case folding must be performed first, then the normalization. 575 * 576 * @param s1 First source string. 577 * @param length1 Length of first source string, or -1 if NUL-terminated. 578 * 579 * @param s2 Second source string. 580 * @param length2 Length of second source string, or -1 if NUL-terminated. 581 * 582 * @param options A bit set of options: 583 * - U_FOLD_CASE_DEFAULT or 0 is used for default options: 584 * Case-sensitive comparison in code unit order, and the input strings 585 * are quick-checked for FCD. 586 * 587 * - UNORM_INPUT_IS_FCD 588 * Set if the caller knows that both s1 and s2 fulfill the FCD conditions. 589 * If not set, the function will quickCheck for FCD 590 * and normalize if necessary. 591 * 592 * - U_COMPARE_CODE_POINT_ORDER 593 * Set to choose code point order instead of code unit order 594 * (see u_strCompare for details). 595 * 596 * - U_COMPARE_IGNORE_CASE 597 * Set to compare strings case-insensitively using case folding, 598 * instead of case-sensitively. 599 * If set, then the following case folding options are used. 600 * 601 * - Options as used with case-insensitive comparisons, currently: 602 * 603 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I 604 * (see u_strCaseCompare for details) 605 * 606 * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT 607 * 608 * @param pErrorCode ICU error code in/out parameter. 609 * Must fulfill U_SUCCESS before the function call. 610 * @return <0 or 0 or >0 as usual for string comparisons 611 * 612 * @see unorm_normalize 613 * @see UNORM_FCD 614 * @see u_strCompare 615 * @see u_strCaseCompare 616 * 617 * @stable ICU 2.2 618 */ 619 U_STABLE int32_t U_EXPORT2 620 unorm_compare(const UChar *s1, int32_t length1, 621 const UChar *s2, int32_t length2, 622 uint32_t options, 623 UErrorCode *pErrorCode); 624 625 #endif /* !UCONFIG_NO_NORMALIZATION */ 626 #endif /* __UNORM2_H__ */ 627