1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2009-2012, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: normalizer2.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2009nov22 14 * created by: Markus W. Scherer 15 */ 16 17 #ifndef __NORMALIZER2_H__ 18 #define __NORMALIZER2_H__ 19 20 /** 21 * \file 22 * \brief C++ API: New API for Unicode Normalization. 23 */ 24 25 #include "unicode/utypes.h" 26 27 #if !UCONFIG_NO_NORMALIZATION 28 29 #include "unicode/uniset.h" 30 #include "unicode/unistr.h" 31 #include "unicode/unorm2.h" 32 33 U_NAMESPACE_BEGIN 34 35 /** 36 * Unicode normalization functionality for standard Unicode normalization or 37 * for using custom mapping tables. 38 * All instances of this class are unmodifiable/immutable. 39 * Instances returned by getInstance() are singletons that must not be deleted by the caller. 40 * The Normalizer2 class is not intended for public subclassing. 41 * 42 * The primary functions are to produce a normalized string and to detect whether 43 * a string is already normalized. 44 * The most commonly used normalization forms are those defined in 45 * http://www.unicode.org/unicode/reports/tr15/ 46 * However, this API supports additional normalization forms for specialized purposes. 47 * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE) 48 * and can be used in implementations of UTS #46. 49 * 50 * Not only are the standard compose and decompose modes supplied, 51 * but additional modes are provided as documented in the Mode enum. 52 * 53 * Some of the functions in this class identify normalization boundaries. 54 * At a normalization boundary, the portions of the string 55 * before it and starting from it do not interact and can be handled independently. 56 * 57 * The spanQuickCheckYes() stops at a normalization boundary. 58 * When the goal is a normalized string, then the text before the boundary 59 * can be copied, and the remainder can be processed with normalizeSecondAndAppend(). 60 * 61 * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether 62 * a character is guaranteed to be at a normalization boundary, 63 * regardless of context. 64 * This is used for moving from one normalization boundary to the next 65 * or preceding boundary, and for performing iterative normalization. 66 * 67 * Iterative normalization is useful when only a small portion of a 68 * longer string needs to be processed. 69 * For example, in ICU, iterative normalization is used by the NormalizationTransliterator 70 * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart() 71 * (to process only the substring for which sort key bytes are computed). 72 * 73 * The set of normalization boundaries returned by these functions may not be 74 * complete: There may be more boundaries that could be returned. 75 * Different functions may return different boundaries. 76 * @stable ICU 4.4 77 */ 78 class U_COMMON_API Normalizer2 : public UObject { 79 public: 80 /** 81 * Destructor. 82 * @stable ICU 4.4 83 */ 84 ~Normalizer2(); 85 86 #ifndef U_HIDE_DRAFT_API 87 /** 88 * Returns a Normalizer2 instance for Unicode NFC normalization. 89 * Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode). 90 * Returns an unmodifiable singleton instance. Do not delete it. 91 * @param errorCode Standard ICU error code. Its input value must 92 * pass the U_SUCCESS() test, or else the function returns 93 * immediately. Check for U_FAILURE() on output or use with 94 * function chaining. (See User Guide for details.) 95 * @return the requested Normalizer2, if successful 96 * @draft ICU 49 97 */ 98 static const Normalizer2 * 99 getNFCInstance(UErrorCode &errorCode); 100 101 /** 102 * Returns a Normalizer2 instance for Unicode NFD normalization. 103 * Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode). 104 * Returns an unmodifiable singleton instance. Do not delete it. 105 * @param errorCode Standard ICU error code. Its input value must 106 * pass the U_SUCCESS() test, or else the function returns 107 * immediately. Check for U_FAILURE() on output or use with 108 * function chaining. (See User Guide for details.) 109 * @return the requested Normalizer2, if successful 110 * @draft ICU 49 111 */ 112 static const Normalizer2 * 113 getNFDInstance(UErrorCode &errorCode); 114 115 /** 116 * Returns a Normalizer2 instance for Unicode NFKC normalization. 117 * Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode). 118 * Returns an unmodifiable singleton instance. Do not delete it. 119 * @param errorCode Standard ICU error code. Its input value must 120 * pass the U_SUCCESS() test, or else the function returns 121 * immediately. Check for U_FAILURE() on output or use with 122 * function chaining. (See User Guide for details.) 123 * @return the requested Normalizer2, if successful 124 * @draft ICU 49 125 */ 126 static const Normalizer2 * 127 getNFKCInstance(UErrorCode &errorCode); 128 129 /** 130 * Returns a Normalizer2 instance for Unicode NFKD normalization. 131 * Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode). 132 * Returns an unmodifiable singleton instance. Do not delete it. 133 * @param errorCode Standard ICU error code. Its input value must 134 * pass the U_SUCCESS() test, or else the function returns 135 * immediately. Check for U_FAILURE() on output or use with 136 * function chaining. (See User Guide for details.) 137 * @return the requested Normalizer2, if successful 138 * @draft ICU 49 139 */ 140 static const Normalizer2 * 141 getNFKDInstance(UErrorCode &errorCode); 142 143 /** 144 * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization. 145 * Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode). 146 * Returns an unmodifiable singleton instance. Do not delete it. 147 * @param errorCode Standard ICU error code. Its input value must 148 * pass the U_SUCCESS() test, or else the function returns 149 * immediately. Check for U_FAILURE() on output or use with 150 * function chaining. (See User Guide for details.) 151 * @return the requested Normalizer2, if successful 152 * @draft ICU 49 153 */ 154 static const Normalizer2 * 155 getNFKCCasefoldInstance(UErrorCode &errorCode); 156 #endif /* U_HIDE_DRAFT_API */ 157 158 /** 159 * Returns a Normalizer2 instance which uses the specified data file 160 * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle) 161 * and which composes or decomposes text according to the specified mode. 162 * Returns an unmodifiable singleton instance. Do not delete it. 163 * 164 * Use packageName=NULL for data files that are part of ICU's own data. 165 * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD. 166 * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD. 167 * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold. 168 * 169 * @param packageName NULL for ICU built-in data, otherwise application data package name 170 * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file 171 * @param mode normalization mode (compose or decompose etc.) 172 * @param errorCode Standard ICU error code. Its input value must 173 * pass the U_SUCCESS() test, or else the function returns 174 * immediately. Check for U_FAILURE() on output or use with 175 * function chaining. (See User Guide for details.) 176 * @return the requested Normalizer2, if successful 177 * @stable ICU 4.4 178 */ 179 static const Normalizer2 * 180 getInstance(const char *packageName, 181 const char *name, 182 UNormalization2Mode mode, 183 UErrorCode &errorCode); 184 185 /** 186 * Returns the normalized form of the source string. 187 * @param src source string 188 * @param errorCode Standard ICU error code. Its input value must 189 * pass the U_SUCCESS() test, or else the function returns 190 * immediately. Check for U_FAILURE() on output or use with 191 * function chaining. (See User Guide for details.) 192 * @return normalized src 193 * @stable ICU 4.4 194 */ 195 UnicodeString normalize(const UnicodeString & src,UErrorCode & errorCode)196 normalize(const UnicodeString &src, UErrorCode &errorCode) const { 197 UnicodeString result; 198 normalize(src, result, errorCode); 199 return result; 200 } 201 /** 202 * Writes the normalized form of the source string to the destination string 203 * (replacing its contents) and returns the destination string. 204 * The source and destination strings must be different objects. 205 * @param src source string 206 * @param dest destination string; its contents is replaced with normalized src 207 * @param errorCode Standard ICU error code. Its input value must 208 * pass the U_SUCCESS() test, or else the function returns 209 * immediately. Check for U_FAILURE() on output or use with 210 * function chaining. (See User Guide for details.) 211 * @return dest 212 * @stable ICU 4.4 213 */ 214 virtual UnicodeString & 215 normalize(const UnicodeString &src, 216 UnicodeString &dest, 217 UErrorCode &errorCode) const = 0; 218 /** 219 * Appends the normalized form of the second string to the first string 220 * (merging them at the boundary) and returns the first string. 221 * The result is normalized if the first string was normalized. 222 * The first and second strings must be different objects. 223 * @param first string, should be normalized 224 * @param second string, will be normalized 225 * @param errorCode Standard ICU error code. Its input value must 226 * pass the U_SUCCESS() test, or else the function returns 227 * immediately. Check for U_FAILURE() on output or use with 228 * function chaining. (See User Guide for details.) 229 * @return first 230 * @stable ICU 4.4 231 */ 232 virtual UnicodeString & 233 normalizeSecondAndAppend(UnicodeString &first, 234 const UnicodeString &second, 235 UErrorCode &errorCode) const = 0; 236 /** 237 * Appends the second string to the first string 238 * (merging them at the boundary) and returns the first string. 239 * The result is normalized if both the strings were normalized. 240 * The first and second strings must be different objects. 241 * @param first string, should be normalized 242 * @param second string, should be normalized 243 * @param errorCode Standard ICU error code. Its input value must 244 * pass the U_SUCCESS() test, or else the function returns 245 * immediately. Check for U_FAILURE() on output or use with 246 * function chaining. (See User Guide for details.) 247 * @return first 248 * @stable ICU 4.4 249 */ 250 virtual UnicodeString & 251 append(UnicodeString &first, 252 const UnicodeString &second, 253 UErrorCode &errorCode) const = 0; 254 255 /** 256 * Gets the decomposition mapping of c. 257 * Roughly equivalent to normalizing the String form of c 258 * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function 259 * returns FALSE and does not write a string 260 * if c does not have a decomposition mapping in this instance's data. 261 * This function is independent of the mode of the Normalizer2. 262 * @param c code point 263 * @param decomposition String object which will be set to c's 264 * decomposition mapping, if there is one. 265 * @return TRUE if c has a decomposition, otherwise FALSE 266 * @stable ICU 4.6 267 */ 268 virtual UBool 269 getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0; 270 271 /** 272 * Gets the raw decomposition mapping of c. 273 * 274 * This is similar to the getDecomposition() method but returns the 275 * raw decomposition mapping as specified in UnicodeData.txt or 276 * (for custom data) in the mapping files processed by the gennorm2 tool. 277 * By contrast, getDecomposition() returns the processed, 278 * recursively-decomposed version of this mapping. 279 * 280 * When used on a standard NFKC Normalizer2 instance, 281 * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property. 282 * 283 * When used on a standard NFC Normalizer2 instance, 284 * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can); 285 * in this case, the result contains either one or two code points (=1..4 UChars). 286 * 287 * This function is independent of the mode of the Normalizer2. 288 * The default implementation returns FALSE. 289 * @param c code point 290 * @param decomposition String object which will be set to c's 291 * raw decomposition mapping, if there is one. 292 * @return TRUE if c has a decomposition, otherwise FALSE 293 * @draft ICU 49 294 */ 295 virtual UBool 296 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const; 297 298 /** 299 * Performs pairwise composition of a & b and returns the composite if there is one. 300 * 301 * Returns a composite code point c only if c has a two-way mapping to a+b. 302 * In standard Unicode normalization, this means that 303 * c has a canonical decomposition to a+b 304 * and c does not have the Full_Composition_Exclusion property. 305 * 306 * This function is independent of the mode of the Normalizer2. 307 * The default implementation returns a negative value. 308 * @param a A (normalization starter) code point. 309 * @param b Another code point. 310 * @return The non-negative composite code point if there is one; otherwise a negative value. 311 * @draft ICU 49 312 */ 313 virtual UChar32 314 composePair(UChar32 a, UChar32 b) const; 315 316 /** 317 * Gets the combining class of c. 318 * The default implementation returns 0 319 * but all standard implementations return the Unicode Canonical_Combining_Class value. 320 * @param c code point 321 * @return c's combining class 322 * @draft ICU 49 323 */ 324 virtual uint8_t 325 getCombiningClass(UChar32 c) const; 326 327 /** 328 * Tests if the string is normalized. 329 * Internally, in cases where the quickCheck() method would return "maybe" 330 * (which is only possible for the two COMPOSE modes) this method 331 * resolves to "yes" or "no" to provide a definitive result, 332 * at the cost of doing more work in those cases. 333 * @param s input string 334 * @param errorCode Standard ICU error code. Its input value must 335 * pass the U_SUCCESS() test, or else the function returns 336 * immediately. Check for U_FAILURE() on output or use with 337 * function chaining. (See User Guide for details.) 338 * @return TRUE if s is normalized 339 * @stable ICU 4.4 340 */ 341 virtual UBool 342 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0; 343 344 /** 345 * Tests if the string is normalized. 346 * For the two COMPOSE modes, the result could be "maybe" in cases that 347 * would take a little more work to resolve definitively. 348 * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster 349 * combination of quick check + normalization, to avoid 350 * re-checking the "yes" prefix. 351 * @param s input string 352 * @param errorCode Standard ICU error code. Its input value must 353 * pass the U_SUCCESS() test, or else the function returns 354 * immediately. Check for U_FAILURE() on output or use with 355 * function chaining. (See User Guide for details.) 356 * @return UNormalizationCheckResult 357 * @stable ICU 4.4 358 */ 359 virtual UNormalizationCheckResult 360 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0; 361 362 /** 363 * Returns the end of the normalized substring of the input string. 364 * In other words, with <code>end=spanQuickCheckYes(s, ec);</code> 365 * the substring <code>UnicodeString(s, 0, end)</code> 366 * will pass the quick check with a "yes" result. 367 * 368 * The returned end index is usually one or more characters before the 369 * "no" or "maybe" character: The end index is at a normalization boundary. 370 * (See the class documentation for more about normalization boundaries.) 371 * 372 * When the goal is a normalized string and most input strings are expected 373 * to be normalized already, then call this method, 374 * and if it returns a prefix shorter than the input string, 375 * copy that prefix and use normalizeSecondAndAppend() for the remainder. 376 * @param s input string 377 * @param errorCode Standard ICU error code. Its input value must 378 * pass the U_SUCCESS() test, or else the function returns 379 * immediately. Check for U_FAILURE() on output or use with 380 * function chaining. (See User Guide for details.) 381 * @return "yes" span end index 382 * @stable ICU 4.4 383 */ 384 virtual int32_t 385 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0; 386 387 /** 388 * Tests if the character always has a normalization boundary before it, 389 * regardless of context. 390 * If true, then the character does not normalization-interact with 391 * preceding characters. 392 * In other words, a string containing this character can be normalized 393 * by processing portions before this character and starting from this 394 * character independently. 395 * This is used for iterative normalization. See the class documentation for details. 396 * @param c character to test 397 * @return TRUE if c has a normalization boundary before it 398 * @stable ICU 4.4 399 */ 400 virtual UBool hasBoundaryBefore(UChar32 c) const = 0; 401 402 /** 403 * Tests if the character always has a normalization boundary after it, 404 * regardless of context. 405 * If true, then the character does not normalization-interact with 406 * following characters. 407 * In other words, a string containing this character can be normalized 408 * by processing portions up to this character and after this 409 * character independently. 410 * This is used for iterative normalization. See the class documentation for details. 411 * Note that this operation may be significantly slower than hasBoundaryBefore(). 412 * @param c character to test 413 * @return TRUE if c has a normalization boundary after it 414 * @stable ICU 4.4 415 */ 416 virtual UBool hasBoundaryAfter(UChar32 c) const = 0; 417 418 /** 419 * Tests if the character is normalization-inert. 420 * If true, then the character does not change, nor normalization-interact with 421 * preceding or following characters. 422 * In other words, a string containing this character can be normalized 423 * by processing portions before this character and after this 424 * character independently. 425 * This is used for iterative normalization. See the class documentation for details. 426 * Note that this operation may be significantly slower than hasBoundaryBefore(). 427 * @param c character to test 428 * @return TRUE if c is normalization-inert 429 * @stable ICU 4.4 430 */ 431 virtual UBool isInert(UChar32 c) const = 0; 432 433 private: 434 // No ICU "poor man's RTTI" for this class nor its subclasses. 435 virtual UClassID getDynamicClassID() const; 436 }; 437 438 /** 439 * Normalization filtered by a UnicodeSet. 440 * Normalizes portions of the text contained in the filter set and leaves 441 * portions not contained in the filter set unchanged. 442 * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE). 443 * Not-in-the-filter text is treated as "is normalized" and "quick check yes". 444 * This class implements all of (and only) the Normalizer2 API. 445 * An instance of this class is unmodifiable/immutable but is constructed and 446 * must be destructed by the owner. 447 * @stable ICU 4.4 448 */ 449 class U_COMMON_API FilteredNormalizer2 : public Normalizer2 { 450 public: 451 /** 452 * Constructs a filtered normalizer wrapping any Normalizer2 instance 453 * and a filter set. 454 * Both are aliased and must not be modified or deleted while this object 455 * is used. 456 * The filter set should be frozen; otherwise the performance will suffer greatly. 457 * @param n2 wrapped Normalizer2 instance 458 * @param filterSet UnicodeSet which determines the characters to be normalized 459 * @stable ICU 4.4 460 */ FilteredNormalizer2(const Normalizer2 & n2,const UnicodeSet & filterSet)461 FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) : 462 norm2(n2), set(filterSet) {} 463 464 /** 465 * Destructor. 466 * @stable ICU 4.4 467 */ 468 ~FilteredNormalizer2(); 469 470 /** 471 * Writes the normalized form of the source string to the destination string 472 * (replacing its contents) and returns the destination string. 473 * The source and destination strings must be different objects. 474 * @param src source string 475 * @param dest destination string; its contents is replaced with normalized src 476 * @param errorCode Standard ICU error code. Its input value must 477 * pass the U_SUCCESS() test, or else the function returns 478 * immediately. Check for U_FAILURE() on output or use with 479 * function chaining. (See User Guide for details.) 480 * @return dest 481 * @stable ICU 4.4 482 */ 483 virtual UnicodeString & 484 normalize(const UnicodeString &src, 485 UnicodeString &dest, 486 UErrorCode &errorCode) const; 487 /** 488 * Appends the normalized form of the second string to the first string 489 * (merging them at the boundary) and returns the first string. 490 * The result is normalized if the first string was normalized. 491 * The first and second strings must be different objects. 492 * @param first string, should be normalized 493 * @param second string, will be normalized 494 * @param errorCode Standard ICU error code. Its input value must 495 * pass the U_SUCCESS() test, or else the function returns 496 * immediately. Check for U_FAILURE() on output or use with 497 * function chaining. (See User Guide for details.) 498 * @return first 499 * @stable ICU 4.4 500 */ 501 virtual UnicodeString & 502 normalizeSecondAndAppend(UnicodeString &first, 503 const UnicodeString &second, 504 UErrorCode &errorCode) const; 505 /** 506 * Appends the second string to the first string 507 * (merging them at the boundary) and returns the first string. 508 * The result is normalized if both the strings were normalized. 509 * The first and second strings must be different objects. 510 * @param first string, should be normalized 511 * @param second string, should be normalized 512 * @param errorCode Standard ICU error code. Its input value must 513 * pass the U_SUCCESS() test, or else the function returns 514 * immediately. Check for U_FAILURE() on output or use with 515 * function chaining. (See User Guide for details.) 516 * @return first 517 * @stable ICU 4.4 518 */ 519 virtual UnicodeString & 520 append(UnicodeString &first, 521 const UnicodeString &second, 522 UErrorCode &errorCode) const; 523 524 /** 525 * Gets the decomposition mapping of c. 526 * For details see the base class documentation. 527 * 528 * This function is independent of the mode of the Normalizer2. 529 * @param c code point 530 * @param decomposition String object which will be set to c's 531 * decomposition mapping, if there is one. 532 * @return TRUE if c has a decomposition, otherwise FALSE 533 * @stable ICU 4.6 534 */ 535 virtual UBool 536 getDecomposition(UChar32 c, UnicodeString &decomposition) const; 537 538 /** 539 * Gets the raw decomposition mapping of c. 540 * For details see the base class documentation. 541 * 542 * This function is independent of the mode of the Normalizer2. 543 * @param c code point 544 * @param decomposition String object which will be set to c's 545 * raw decomposition mapping, if there is one. 546 * @return TRUE if c has a decomposition, otherwise FALSE 547 * @draft ICU 49 548 */ 549 virtual UBool 550 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const; 551 552 /** 553 * Performs pairwise composition of a & b and returns the composite if there is one. 554 * For details see the base class documentation. 555 * 556 * This function is independent of the mode of the Normalizer2. 557 * @param a A (normalization starter) code point. 558 * @param b Another code point. 559 * @return The non-negative composite code point if there is one; otherwise a negative value. 560 * @draft ICU 49 561 */ 562 virtual UChar32 563 composePair(UChar32 a, UChar32 b) const; 564 565 /** 566 * Gets the combining class of c. 567 * The default implementation returns 0 568 * but all standard implementations return the Unicode Canonical_Combining_Class value. 569 * @param c code point 570 * @return c's combining class 571 * @draft ICU 49 572 */ 573 virtual uint8_t 574 getCombiningClass(UChar32 c) const; 575 576 /** 577 * Tests if the string is normalized. 578 * For details see the Normalizer2 base class documentation. 579 * @param s input string 580 * @param errorCode Standard ICU error code. Its input value must 581 * pass the U_SUCCESS() test, or else the function returns 582 * immediately. Check for U_FAILURE() on output or use with 583 * function chaining. (See User Guide for details.) 584 * @return TRUE if s is normalized 585 * @stable ICU 4.4 586 */ 587 virtual UBool 588 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const; 589 /** 590 * Tests if the string is normalized. 591 * For details see the Normalizer2 base class documentation. 592 * @param s input string 593 * @param errorCode Standard ICU error code. Its input value must 594 * pass the U_SUCCESS() test, or else the function returns 595 * immediately. Check for U_FAILURE() on output or use with 596 * function chaining. (See User Guide for details.) 597 * @return UNormalizationCheckResult 598 * @stable ICU 4.4 599 */ 600 virtual UNormalizationCheckResult 601 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const; 602 /** 603 * Returns the end of the normalized substring of the input string. 604 * For details see the Normalizer2 base class documentation. 605 * @param s input string 606 * @param errorCode Standard ICU error code. Its input value must 607 * pass the U_SUCCESS() test, or else the function returns 608 * immediately. Check for U_FAILURE() on output or use with 609 * function chaining. (See User Guide for details.) 610 * @return "yes" span end index 611 * @stable ICU 4.4 612 */ 613 virtual int32_t 614 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const; 615 616 /** 617 * Tests if the character always has a normalization boundary before it, 618 * regardless of context. 619 * For details see the Normalizer2 base class documentation. 620 * @param c character to test 621 * @return TRUE if c has a normalization boundary before it 622 * @stable ICU 4.4 623 */ 624 virtual UBool hasBoundaryBefore(UChar32 c) const; 625 626 /** 627 * Tests if the character always has a normalization boundary after it, 628 * regardless of context. 629 * For details see the Normalizer2 base class documentation. 630 * @param c character to test 631 * @return TRUE if c has a normalization boundary after it 632 * @stable ICU 4.4 633 */ 634 virtual UBool hasBoundaryAfter(UChar32 c) const; 635 636 /** 637 * Tests if the character is normalization-inert. 638 * For details see the Normalizer2 base class documentation. 639 * @param c character to test 640 * @return TRUE if c is normalization-inert 641 * @stable ICU 4.4 642 */ 643 virtual UBool isInert(UChar32 c) const; 644 private: 645 UnicodeString & 646 normalize(const UnicodeString &src, 647 UnicodeString &dest, 648 USetSpanCondition spanCondition, 649 UErrorCode &errorCode) const; 650 651 UnicodeString & 652 normalizeSecondAndAppend(UnicodeString &first, 653 const UnicodeString &second, 654 UBool doNormalize, 655 UErrorCode &errorCode) const; 656 657 const Normalizer2 &norm2; 658 const UnicodeSet &set; 659 }; 660 661 U_NAMESPACE_END 662 663 #endif // !UCONFIG_NO_NORMALIZATION 664 #endif // __NORMALIZER2_H__ 665