1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /** 4 ******************************************************************************* 5 * Copyright (C) 1996-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.lang; 11 12 /** 13 * <p>Selection constants for Unicode properties. 14 * <p>These constants are used in functions like 15 * UCharacter.hasBinaryProperty(int) to select one of the Unicode properties. 16 * 17 * <p>The properties APIs are intended to reflect Unicode properties as 18 * defined in the Unicode Character Database (UCD) and Unicode Technical 19 * Reports (UTR). 20 * <p>For details about the properties see 21 * <a href="http://www.unicode.org/reports/tr44/">UAX #44: Unicode Character Database</a>. 22 * 23 * <p>Important: If ICU is built with UCD files from Unicode versions below 24 * 3.2, then properties marked with "new" are not or not fully 25 * available. Check UCharacter.getUnicodeVersion() to be sure. 26 * @author Syn Wee Quek 27 * @stable ICU 2.6 28 * @see com.ibm.icu.lang.UCharacter 29 */ 30 public interface UProperty 31 { 32 // public data member -------------------------------------------------- 33 34 /** 35 * Special value indicating undefined property. 36 * @internal 37 * @deprecated This API is ICU internal only. 38 */ 39 @Deprecated 40 public static final int UNDEFINED = -1; 41 42 /** 43 * <p>Binary property Alphabetic. 44 * <p>Property for UCharacter.isUAlphabetic(), different from the property 45 * in UCharacter.isalpha(). 46 * <p>Lu + Ll + Lt + Lm + Lo + Nl + Other_Alphabetic. 47 * @stable ICU 2.6 48 */ 49 public static final int ALPHABETIC = 0; 50 51 /** 52 * First constant for binary Unicode properties. 53 * @stable ICU 2.6 54 */ 55 public static final int BINARY_START = ALPHABETIC; 56 57 /** 58 * Binary property ASCII_Hex_Digit (0-9 A-F a-f). 59 * @stable ICU 2.6 60 */ 61 public static final int ASCII_HEX_DIGIT = 1; 62 63 /** 64 * <p>Binary property Bidi_Control. 65 * <p>Format controls which have specific functions in the Bidi Algorithm. 66 * 67 * @stable ICU 2.6 68 */ 69 public static final int BIDI_CONTROL = 2; 70 71 /** 72 * <p>Binary property Bidi_Mirrored. 73 * <p>Characters that may change display in RTL text. 74 * <p>Property for UCharacter.isMirrored(). 75 * <p>See Bidi Algorithm; UTR 9. 76 * @stable ICU 2.6 77 */ 78 public static final int BIDI_MIRRORED = 3; 79 80 /** 81 * <p>Binary property Dash. 82 * <p>Variations of dashes. 83 * @stable ICU 2.6 84 */ 85 public static final int DASH = 4; 86 87 /** 88 * <p>Binary property Default_Ignorable_Code_Point (new). 89 * 90 * <p>Property that indicates codepoint is ignorable in most processing. 91 * 92 * <p>Codepoints (2060..206F, FFF0..FFFB, E0000..E0FFF) + 93 * Other_Default_Ignorable_Code_Point + (Cf + Cc + Cs - White_Space) 94 * @stable ICU 2.6 95 */ 96 public static final int DEFAULT_IGNORABLE_CODE_POINT = 5; 97 98 /** 99 * <p>Binary property Deprecated (new). 100 * <p>The usage of deprecated characters is strongly discouraged. 101 * @stable ICU 2.6 102 */ 103 public static final int DEPRECATED = 6; 104 105 /** 106 * <p>Binary property Diacritic. 107 * <p>Characters that linguistically modify the meaning of another 108 * character to which they apply. 109 * @stable ICU 2.6 110 */ 111 public static final int DIACRITIC = 7; 112 113 /** 114 * <p>Binary property Extender. 115 * <p>Extend the value or shape of a preceding alphabetic character, e.g. 116 * length and iteration marks. 117 * @stable ICU 2.6 118 */ 119 public static final int EXTENDER = 8; 120 121 /** 122 * <p>Binary property Full_Composition_Exclusion. 123 * <p>CompositionExclusions.txt + Singleton Decompositions + 124 * Non-Starter Decompositions. 125 * @stable ICU 2.6 126 */ 127 public static final int FULL_COMPOSITION_EXCLUSION = 9; 128 129 /** 130 * <p>Binary property Grapheme_Base (new). 131 * <p>For programmatic determination of grapheme cluster boundaries. 132 * [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ 133 * @stable ICU 2.6 134 */ 135 public static final int GRAPHEME_BASE = 10; 136 137 /** 138 * <p>Binary property Grapheme_Extend (new). 139 * <p>For programmatic determination of grapheme cluster boundaries. 140 * <p>Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ 141 * @stable ICU 2.6 142 */ 143 public static final int GRAPHEME_EXTEND = 11; 144 145 /** 146 * <p>Binary property Grapheme_Link (new). 147 * <p>For programmatic determination of grapheme cluster boundaries. 148 * @stable ICU 2.6 149 */ 150 public static final int GRAPHEME_LINK = 12; 151 152 /** 153 * <p>Binary property Hex_Digit. 154 * <p>Characters commonly used for hexadecimal numbers. 155 * @stable ICU 2.6 156 */ 157 public static final int HEX_DIGIT = 13; 158 159 /** 160 * <p>Binary property Hyphen. 161 * <p>Dashes used to mark connections between pieces of words, plus the 162 * Katakana middle dot. 163 * @stable ICU 2.6 164 */ 165 public static final int HYPHEN = 14; 166 167 /** 168 * <p>Binary property ID_Continue. 169 * <p>Characters that can continue an identifier. 170 * <p>ID_Start+Mn+Mc+Nd+Pc 171 * @stable ICU 2.6 172 */ 173 public static final int ID_CONTINUE = 15; 174 175 /** 176 * <p>Binary property ID_Start. 177 * <p>Characters that can start an identifier. 178 * <p>Lu+Ll+Lt+Lm+Lo+Nl 179 * @stable ICU 2.6 180 */ 181 public static final int ID_START = 16; 182 183 /** 184 * <p>Binary property Ideographic. 185 * <p>CJKV ideographs. 186 * @stable ICU 2.6 187 */ 188 public static final int IDEOGRAPHIC = 17; 189 190 /** 191 * <p>Binary property IDS_Binary_Operator (new). 192 * <p>For programmatic determination of Ideographic Description Sequences. 193 * 194 * @stable ICU 2.6 195 */ 196 public static final int IDS_BINARY_OPERATOR = 18; 197 198 /** 199 * <p>Binary property IDS_Trinary_Operator (new). 200 * <p>For programmatic determination of Ideographic Description 201 * Sequences. 202 * @stable ICU 2.6 203 */ 204 public static final int IDS_TRINARY_OPERATOR = 19; 205 206 /** 207 * <p>Binary property Join_Control. 208 * <p>Format controls for cursive joining and ligation. 209 * @stable ICU 2.6 210 */ 211 public static final int JOIN_CONTROL = 20; 212 213 /** 214 * <p>Binary property Logical_Order_Exception (new). 215 * <p>Characters that do not use logical order and require special 216 * handling in most processing. 217 * @stable ICU 2.6 218 */ 219 public static final int LOGICAL_ORDER_EXCEPTION = 21; 220 221 /** 222 * <p>Binary property Lowercase. 223 * <p>Same as UCharacter.isULowercase(), different from 224 * UCharacter.islower(). 225 * <p>Ll+Other_Lowercase 226 * @stable ICU 2.6 227 */ 228 public static final int LOWERCASE = 22; 229 230 /** <p>Binary property Math. 231 * <p>Sm+Other_Math 232 * @stable ICU 2.6 233 */ 234 public static final int MATH = 23; 235 236 /** 237 * <p>Binary property Noncharacter_Code_Point. 238 * <p>Code points that are explicitly defined as illegal for the encoding 239 * of characters. 240 * @stable ICU 2.6 241 */ 242 public static final int NONCHARACTER_CODE_POINT = 24; 243 244 /** 245 * <p>Binary property Quotation_Mark. 246 * @stable ICU 2.6 247 */ 248 public static final int QUOTATION_MARK = 25; 249 250 /** 251 * <p>Binary property Radical (new). 252 * <p>For programmatic determination of Ideographic Description 253 * Sequences. 254 * @stable ICU 2.6 255 */ 256 public static final int RADICAL = 26; 257 258 /** 259 * <p>Binary property Soft_Dotted (new). 260 * <p>Characters with a "soft dot", like i or j. 261 * <p>An accent placed on these characters causes the dot to disappear. 262 * @stable ICU 2.6 263 */ 264 public static final int SOFT_DOTTED = 27; 265 266 /** 267 * <p>Binary property Terminal_Punctuation. 268 * <p>Punctuation characters that generally mark the end of textual 269 * units. 270 * @stable ICU 2.6 271 */ 272 public static final int TERMINAL_PUNCTUATION = 28; 273 274 /** 275 * <p>Binary property Unified_Ideograph (new). 276 * <p>For programmatic determination of Ideographic Description 277 * Sequences. 278 * @stable ICU 2.6 279 */ 280 public static final int UNIFIED_IDEOGRAPH = 29; 281 282 /** 283 * <p>Binary property Uppercase. 284 * <p>Same as UCharacter.isUUppercase(), different from 285 * UCharacter.isUpperCase(). 286 * <p>Lu+Other_Uppercase 287 * @stable ICU 2.6 288 */ 289 public static final int UPPERCASE = 30; 290 291 /** 292 * <p>Binary property White_Space. 293 * <p>Same as UCharacter.isUWhiteSpace(), different from 294 * UCharacter.isSpace() and UCharacter.isWhitespace(). 295 * Space characters+TAB+CR+LF-ZWSP-ZWNBSP 296 * @stable ICU 2.6 297 */ 298 public static final int WHITE_SPACE = 31; 299 300 /** 301 * <p>Binary property XID_Continue. 302 * <p>ID_Continue modified to allow closure under normalization forms 303 * NFKC and NFKD. 304 * @stable ICU 2.6 305 */ 306 public static final int XID_CONTINUE = 32; 307 308 /** 309 * <p>Binary property XID_Start. 310 * <p>ID_Start modified to allow closure under normalization forms NFKC 311 * and NFKD. 312 * @stable ICU 2.6 313 */ 314 public static final int XID_START = 33; 315 316 /** 317 * <p>Binary property Case_Sensitive. 318 * <p>Either the source of a case 319 * mapping or _in_ the target of a case mapping. Not the same as 320 * the general category Cased_Letter. 321 * @stable ICU 2.6 322 */ 323 public static final int CASE_SENSITIVE = 34; 324 325 /** 326 * Binary property STerm (new in Unicode 4.0.1). 327 * Sentence Terminal. Used in UAX #29: Text Boundaries 328 * (http://www.unicode.org/reports/tr29/) 329 * @stable ICU 3.0 330 */ 331 public static final int S_TERM = 35; 332 333 /** 334 * Binary property Variation_Selector (new in Unicode 4.0.1). 335 * Indicates all those characters that qualify as Variation Selectors. 336 * For details on the behavior of these characters, 337 * see StandardizedVariants.html and 15.6 Variation Selectors. 338 * @stable ICU 3.0 339 */ 340 public static final int VARIATION_SELECTOR = 36; 341 342 /** 343 * Binary property NFD_Inert. 344 * ICU-specific property for characters that are inert under NFD, 345 * i.e., they do not interact with adjacent characters. 346 * Used for example in normalizing transforms in incremental mode 347 * to find the boundary of safely normalizable text despite possible 348 * text additions. 349 * 350 * There is one such property per normalization form. 351 * These properties are computed as follows - an inert character is: 352 * a) unassigned, or ALL of the following: 353 * b) of combining class 0. 354 * c) not decomposed by this normalization form. 355 * AND if NFC or NFKC, 356 * d) can never compose with a previous character. 357 * e) can never compose with a following character. 358 * f) can never change if another character is added. 359 * Example: a-breve might satisfy all but f, but if you 360 * add an ogonek it changes to a-ogonek + breve 361 * 362 * See also com.ibm.text.UCD.NFSkippable in the ICU4J repository, 363 * and icu/source/common/unormimp.h . 364 * @stable ICU 3.0 365 */ 366 public static final int NFD_INERT = 37; 367 368 /** 369 * Binary property NFKD_Inert. 370 * ICU-specific property for characters that are inert under NFKD, 371 * i.e., they do not interact with adjacent characters. 372 * Used for example in normalizing transforms in incremental mode 373 * to find the boundary of safely normalizable text despite possible 374 * text additions. 375 * @see #NFD_INERT 376 * @stable ICU 3.0 377 */ 378 public static final int NFKD_INERT = 38; 379 380 /** 381 * Binary property NFC_Inert. 382 * ICU-specific property for characters that are inert under NFC, 383 * i.e., they do not interact with adjacent characters. 384 * Used for example in normalizing transforms in incremental mode 385 * to find the boundary of safely normalizable text despite possible 386 * text additions. 387 * @see #NFD_INERT 388 * @stable ICU 3.0 389 */ 390 public static final int NFC_INERT = 39; 391 392 /** 393 * Binary property NFKC_Inert. 394 * ICU-specific property for characters that are inert under NFKC, 395 * i.e., they do not interact with adjacent characters. 396 * Used for example in normalizing transforms in incremental mode 397 * to find the boundary of safely normalizable text despite possible 398 * text additions. 399 * @see #NFD_INERT 400 * @stable ICU 3.0 401 */ 402 public static final int NFKC_INERT = 40; 403 404 /** 405 * Binary Property Segment_Starter. 406 * ICU-specific property for characters that are starters in terms of 407 * Unicode normalization and combining character sequences. 408 * They have ccc=0 and do not occur in non-initial position of the 409 * canonical decomposition of any character 410 * (like " in NFD(a-umlaut) and a Jamo T in an NFD(Hangul LVT)). 411 * ICU uses this property for segmenting a string for generating a set of 412 * canonically equivalent strings, e.g. for canonical closure while 413 * processing collation tailoring rules. 414 * @stable ICU 3.0 415 */ 416 public static final int SEGMENT_STARTER = 41; 417 418 /** 419 * Binary property Pattern_Syntax (new in Unicode 4.1). 420 * See UAX #31 Identifier and Pattern Syntax 421 * (http://www.unicode.org/reports/tr31/) 422 * @stable ICU 3.4 423 */ 424 public static final int PATTERN_SYNTAX = 42; 425 426 /** 427 * Binary property Pattern_White_Space (new in Unicode 4.1). 428 * See UAX #31 Identifier and Pattern Syntax 429 * (http://www.unicode.org/reports/tr31/) 430 * @stable ICU 3.4 431 */ 432 public static final int PATTERN_WHITE_SPACE = 43; 433 434 /** 435 * Binary property alnum (a C/POSIX character class). 436 * Implemented according to the UTS #18 Annex C Standard Recommendation. 437 * See the UCharacter class documentation. 438 * @stable ICU 3.4 439 */ 440 public static final int POSIX_ALNUM = 44; 441 442 /** 443 * Binary property blank (a C/POSIX character class). 444 * Implemented according to the UTS #18 Annex C Standard Recommendation. 445 * See the UCharacter class documentation. 446 * @stable ICU 3.4 447 */ 448 public static final int POSIX_BLANK = 45; 449 450 /** 451 * Binary property graph (a C/POSIX character class). 452 * Implemented according to the UTS #18 Annex C Standard Recommendation. 453 * See the UCharacter class documentation. 454 * @stable ICU 3.4 455 */ 456 public static final int POSIX_GRAPH = 46; 457 458 /** 459 * Binary property print (a C/POSIX character class). 460 * Implemented according to the UTS #18 Annex C Standard Recommendation. 461 * See the UCharacter class documentation. 462 * @stable ICU 3.4 463 */ 464 public static final int POSIX_PRINT = 47; 465 466 /** 467 * Binary property xdigit (a C/POSIX character class). 468 * Implemented according to the UTS #18 Annex C Standard Recommendation. 469 * See the UCharacter class documentation. 470 * @stable ICU 3.4 471 */ 472 public static final int POSIX_XDIGIT = 48; 473 474 /** 475 * Binary property Cased. 476 * For Lowercase, Uppercase and Titlecase characters. 477 * @stable ICU 4.4 478 */ 479 public static final int CASED=49; 480 /** 481 * Binary property Case_Ignorable. 482 * Used in context-sensitive case mappings. 483 * @stable ICU 4.4 484 */ 485 public static final int CASE_IGNORABLE=50; 486 /** 487 * Binary property Changes_When_Lowercased. 488 * @stable ICU 4.4 489 */ 490 public static final int CHANGES_WHEN_LOWERCASED=51; 491 /** 492 * Binary property Changes_When_Uppercased. 493 * @stable ICU 4.4 494 */ 495 public static final int CHANGES_WHEN_UPPERCASED=52; 496 /** 497 * Binary property Changes_When_Titlecased. 498 * @stable ICU 4.4 499 */ 500 public static final int CHANGES_WHEN_TITLECASED=53; 501 /** 502 * Binary property Changes_When_Casefolded. 503 * @stable ICU 4.4 504 */ 505 public static final int CHANGES_WHEN_CASEFOLDED=54; 506 /** 507 * Binary property Changes_When_Casemapped. 508 * @stable ICU 4.4 509 */ 510 public static final int CHANGES_WHEN_CASEMAPPED=55; 511 /** 512 * Binary property Changes_When_NFKC_Casefolded. 513 * @stable ICU 4.4 514 */ 515 public static final int CHANGES_WHEN_NFKC_CASEFOLDED=56; 516 /** 517 * Binary property Emoji. 518 * See http://www.unicode.org/reports/tr51/#Emoji_Properties 519 * 520 * @stable ICU 57 521 */ 522 public static final int EMOJI=57; 523 /** 524 * Binary property Emoji_Presentation. 525 * See http://www.unicode.org/reports/tr51/#Emoji_Properties 526 * 527 * @stable ICU 57 528 */ 529 public static final int EMOJI_PRESENTATION=58; 530 /** 531 * Binary property Emoji_Modifier. 532 * See http://www.unicode.org/reports/tr51/#Emoji_Properties 533 * 534 * @stable ICU 57 535 */ 536 public static final int EMOJI_MODIFIER=59; 537 /** 538 * Binary property Emoji_Modifier_Base. 539 * See http://www.unicode.org/reports/tr51/#Emoji_Properties 540 * 541 * @stable ICU 57 542 */ 543 public static final int EMOJI_MODIFIER_BASE=60; 544 /** 545 * Binary property Emoji_Component. 546 * See http://www.unicode.org/reports/tr51/#Emoji_Properties 547 * 548 * @stable ICU 60 549 */ 550 public static final int EMOJI_COMPONENT=61; 551 /** 552 * Binary property Regional_Indicator. 553 * 554 * @stable ICU 60 555 */ 556 public static final int REGIONAL_INDICATOR=62; 557 /** 558 * Binary property Prepended_Concatenation_Mark. 559 * 560 * @stable ICU 60 561 */ 562 public static final int PREPENDED_CONCATENATION_MARK=63; 563 /** 564 * Binary property Extended_Pictographic. 565 * See http://www.unicode.org/reports/tr51/#Emoji_Properties 566 * 567 * @stable ICU 62 568 */ 569 public static final int EXTENDED_PICTOGRAPHIC=64; 570 /** 571 * Binary property of strings Basic_Emoji. 572 * See https://www.unicode.org/reports/tr51/#Emoji_Sets 573 * 574 * @stable ICU 70 575 */ 576 public static final int BASIC_EMOJI=65; 577 /** 578 * Binary property of strings Emoji_Keycap_Sequence. 579 * See https://www.unicode.org/reports/tr51/#Emoji_Sets 580 * 581 * @stable ICU 70 582 */ 583 public static final int EMOJI_KEYCAP_SEQUENCE=66; 584 /** 585 * Binary property of strings RGI_Emoji_Modifier_Sequence. 586 * See https://www.unicode.org/reports/tr51/#Emoji_Sets 587 * 588 * @stable ICU 70 589 */ 590 public static final int RGI_EMOJI_MODIFIER_SEQUENCE=67; 591 /** 592 * Binary property of strings RGI_Emoji_Flag_Sequence. 593 * See https://www.unicode.org/reports/tr51/#Emoji_Sets 594 * 595 * @stable ICU 70 596 */ 597 public static final int RGI_EMOJI_FLAG_SEQUENCE=68; 598 /** 599 * Binary property of strings RGI_Emoji_Tag_Sequence. 600 * See https://www.unicode.org/reports/tr51/#Emoji_Sets 601 * 602 * @stable ICU 70 603 */ 604 public static final int RGI_EMOJI_TAG_SEQUENCE=69; 605 /** 606 * Binary property of strings RGI_Emoji_ZWJ_Sequence. 607 * See https://www.unicode.org/reports/tr51/#Emoji_Sets 608 * 609 * @stable ICU 70 610 */ 611 public static final int RGI_EMOJI_ZWJ_SEQUENCE=70; 612 /** 613 * Binary property of strings RGI_Emoji. 614 * See https://www.unicode.org/reports/tr51/#Emoji_Sets 615 * 616 * @stable ICU 70 617 */ 618 public static final int RGI_EMOJI=71; 619 620 /** 621 * One more than the last constant for binary Unicode properties. 622 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 623 */ 624 @Deprecated 625 public static final int BINARY_LIMIT = 72; 626 627 /** 628 * Enumerated property Bidi_Class. 629 * Same as UCharacter.getDirection(int), returns UCharacterDirection values. 630 * @stable ICU 2.4 631 */ 632 public static final int BIDI_CLASS = 0x1000; 633 634 /** 635 * First constant for enumerated/integer Unicode properties. 636 * @stable ICU 2.4 637 */ 638 public static final int INT_START = BIDI_CLASS; 639 640 /** 641 * Enumerated property Block. 642 * Same as UCharacter.UnicodeBlock.of(int), returns UCharacter.UnicodeBlock 643 * values. 644 * @stable ICU 2.4 645 */ 646 public static final int BLOCK = 0x1001; 647 648 /** 649 * Enumerated property Canonical_Combining_Class. 650 * Same as UCharacter.getCombiningClass(int), returns 8-bit numeric values. 651 * @stable ICU 2.4 652 */ 653 public static final int CANONICAL_COMBINING_CLASS = 0x1002; 654 655 /** 656 * Enumerated property Decomposition_Type. 657 * Returns UCharacter.DecompositionType values. 658 * @stable ICU 2.4 659 */ 660 public static final int DECOMPOSITION_TYPE = 0x1003; 661 662 /** 663 * Enumerated property East_Asian_Width. 664 * See http://www.unicode.org/reports/tr11/ 665 * Returns UCharacter.EastAsianWidth values. 666 * @stable ICU 2.4 667 */ 668 public static final int EAST_ASIAN_WIDTH = 0x1004; 669 670 /** 671 * Enumerated property General_Category. 672 * Same as UCharacter.getType(int), returns UCharacterCategory values. 673 * @stable ICU 2.4 674 */ 675 public static final int GENERAL_CATEGORY = 0x1005; 676 677 /** 678 * Enumerated property Joining_Group. 679 * Returns UCharacter.JoiningGroup values. 680 * @stable ICU 2.4 681 */ 682 public static final int JOINING_GROUP = 0x1006; 683 684 /** 685 * Enumerated property Joining_Type. 686 * Returns UCharacter.JoiningType values. 687 * @stable ICU 2.4 688 */ 689 public static final int JOINING_TYPE = 0x1007; 690 691 /** 692 * Enumerated property Line_Break. 693 * Returns UCharacter.LineBreak values. 694 * @stable ICU 2.4 695 */ 696 public static final int LINE_BREAK = 0x1008; 697 698 /** 699 * Enumerated property Numeric_Type. 700 * Returns UCharacter.NumericType values. 701 * @stable ICU 2.4 702 */ 703 public static final int NUMERIC_TYPE = 0x1009; 704 705 /** 706 * Enumerated property Script. 707 * Same as UScript.getScript(int), returns UScript values. 708 * @stable ICU 2.4 709 */ 710 public static final int SCRIPT = 0x100A; 711 712 /** 713 * Enumerated property Hangul_Syllable_Type, new in Unicode 4. 714 * Returns UCharacter.HangulSyllableType values. 715 * @stable ICU 2.6 716 */ 717 public static final int HANGUL_SYLLABLE_TYPE = 0x100B; 718 719 /** 720 * Enumerated property NFD_Quick_Check. 721 * Returns numeric values compatible with Normalizer.QuickCheckResult. 722 * @stable ICU 3.0 723 */ 724 public static final int NFD_QUICK_CHECK = 0x100C; 725 726 /** 727 * Enumerated property NFKD_Quick_Check. 728 * Returns numeric values compatible with Normalizer.QuickCheckResult. 729 * @stable ICU 3.0 730 */ 731 public static final int NFKD_QUICK_CHECK = 0x100D; 732 733 /** 734 * Enumerated property NFC_Quick_Check. 735 * Returns numeric values compatible with Normalizer.QuickCheckResult. 736 * @stable ICU 3.0 737 */ 738 public static final int NFC_QUICK_CHECK = 0x100E; 739 740 /** 741 * Enumerated property NFKC_Quick_Check. 742 * Returns numeric values compatible with Normalizer.QuickCheckResult. 743 * @stable ICU 3.0 744 */ 745 public static final int NFKC_QUICK_CHECK = 0x100F; 746 747 /** 748 * Enumerated property Lead_Canonical_Combining_Class. 749 * ICU-specific property for the ccc of the first code point 750 * of the decomposition, or lccc(c)=ccc(NFD(c)[0]). 751 * Useful for checking for canonically ordered text; 752 * see Normalizer.FCD and http://www.unicode.org/notes/tn5/#FCD . 753 * Returns 8-bit numeric values like CANONICAL_COMBINING_CLASS. 754 * @stable ICU 3.0 755 */ 756 public static final int LEAD_CANONICAL_COMBINING_CLASS = 0x1010; 757 758 /** 759 * Enumerated property Trail_Canonical_Combining_Class. 760 * ICU-specific property for the ccc of the last code point 761 * of the decomposition, or lccc(c)=ccc(NFD(c)[last]). 762 * Useful for checking for canonically ordered text; 763 * see Normalizer.FCD and http://www.unicode.org/notes/tn5/#FCD . 764 * Returns 8-bit numeric values like CANONICAL_COMBINING_CLASS. 765 * @stable ICU 3.0 766 */ 767 public static final int TRAIL_CANONICAL_COMBINING_CLASS = 0x1011; 768 769 /** 770 * Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1). 771 * Used in UAX #29: Text Boundaries 772 * (http://www.unicode.org/reports/tr29/) 773 * Returns UCharacter.GraphemeClusterBreak values. 774 * @stable ICU 3.4 775 */ 776 public static final int GRAPHEME_CLUSTER_BREAK = 0x1012; 777 778 /** 779 * Enumerated property Sentence_Break (new in Unicode 4.1). 780 * Used in UAX #29: Text Boundaries 781 * (http://www.unicode.org/reports/tr29/) 782 * Returns UCharacter.SentenceBreak values. 783 * @stable ICU 3.4 784 */ 785 public static final int SENTENCE_BREAK = 0x1013; 786 787 /** 788 * Enumerated property Word_Break (new in Unicode 4.1). 789 * Used in UAX #29: Text Boundaries 790 * (http://www.unicode.org/reports/tr29/) 791 * Returns UCharacter.WordBreak values. 792 * @stable ICU 3.4 793 */ 794 public static final int WORD_BREAK = 0x1014; 795 796 /** 797 * Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3). 798 * Used in UAX #9: Unicode Bidirectional Algorithm 799 * (http://www.unicode.org/reports/tr9/) 800 * Returns UCharacter.BidiPairedBracketType values. 801 * @stable ICU 52 802 */ 803 public static final int BIDI_PAIRED_BRACKET_TYPE = 0x1015; 804 805 /** 806 * Enumerated property Indic_Positional_Category. 807 * New in Unicode 6.0 as provisional property Indic_Matra_Category; 808 * renamed and changed to informative in Unicode 8.0. 809 * See http://www.unicode.org/reports/tr44/#IndicPositionalCategory.txt 810 * @stable ICU 63 811 */ 812 public static final int INDIC_POSITIONAL_CATEGORY = 0x1016; 813 814 /** 815 * Enumerated property Indic_Syllabic_Category. 816 * New in Unicode 6.0 as provisional; informative since Unicode 8.0. 817 * See http://www.unicode.org/reports/tr44/#IndicSyllabicCategory.txt 818 * @stable ICU 63 819 */ 820 public static final int INDIC_SYLLABIC_CATEGORY = 0x1017; 821 822 /** 823 * Enumerated property Vertical_Orientation. 824 * Used for UAX #50 Unicode Vertical Text Layout (https://www.unicode.org/reports/tr50/). 825 * New as a UCD property in Unicode 10.0. 826 * @stable ICU 63 827 */ 828 public static final int VERTICAL_ORIENTATION = 0x1018; 829 830 /** 831 * One more than the last constant for enumerated/integer Unicode properties. 832 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 833 */ 834 @Deprecated 835 public static final int INT_LIMIT = 0x1019; 836 837 /** 838 * Bitmask property General_Category_Mask. 839 * This is the General_Category property returned as a bit mask. 840 * When used in UCharacter.getIntPropertyValue(c), 841 * returns bit masks for UCharacterCategory values where exactly one bit is set. 842 * When used with UCharacter.getPropertyValueName() and UCharacter.getPropertyValueEnum(), 843 * a multi-bit mask is used for sets of categories like "Letters". 844 * @stable ICU 2.4 845 */ 846 public static final int GENERAL_CATEGORY_MASK = 0x2000; 847 848 /** 849 * First constant for bit-mask Unicode properties. 850 * @stable ICU 2.4 851 */ 852 public static final int MASK_START = GENERAL_CATEGORY_MASK; 853 854 /** 855 * One more than the last constant for bit-mask Unicode properties. 856 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 857 */ 858 @Deprecated 859 public static final int MASK_LIMIT = 0x2001; 860 861 /** 862 * Double property Numeric_Value. 863 * Corresponds to UCharacter.getUnicodeNumericValue(int). 864 * @stable ICU 2.4 865 */ 866 public static final int NUMERIC_VALUE = 0x3000; 867 868 /** 869 * First constant for double Unicode properties. 870 * @stable ICU 2.4 871 */ 872 public static final int DOUBLE_START = NUMERIC_VALUE; 873 874 /** 875 * One more than the last constant for double Unicode properties. 876 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 877 */ 878 @Deprecated 879 public static final int DOUBLE_LIMIT = 0x3001; 880 881 /** 882 * String property Age. 883 * Corresponds to UCharacter.getAge(int). 884 * @stable ICU 2.4 885 */ 886 public static final int AGE = 0x4000; 887 888 /** 889 * First constant for string Unicode properties. 890 * @stable ICU 2.4 891 */ 892 public static final int STRING_START = AGE; 893 894 /** 895 * String property Bidi_Mirroring_Glyph. 896 * Corresponds to UCharacter.getMirror(int). 897 * @stable ICU 2.4 898 */ 899 public static final int BIDI_MIRRORING_GLYPH = 0x4001; 900 901 /** 902 * String property Case_Folding. 903 * Corresponds to UCharacter.foldCase(String, boolean). 904 * @stable ICU 2.4 905 */ 906 public static final int CASE_FOLDING = 0x4002; 907 908 /** 909 * Deprecated string property ISO_Comment. 910 * Corresponds to UCharacter.getISOComment(int). 911 * @deprecated ICU 49 912 */ 913 @Deprecated 914 public static final int ISO_COMMENT = 0x4003; 915 916 /** 917 * String property Lowercase_Mapping. 918 * Corresponds to UCharacter.toLowerCase(String). 919 * @stable ICU 2.4 920 */ 921 public static final int LOWERCASE_MAPPING = 0x4004; 922 923 /** 924 * String property Name. 925 * Corresponds to UCharacter.getName(int). 926 * @stable ICU 2.4 927 */ 928 public static final int NAME = 0x4005; 929 930 /** 931 * String property Simple_Case_Folding. 932 * Corresponds to UCharacter.foldCase(int, boolean). 933 * @stable ICU 2.4 934 */ 935 public static final int SIMPLE_CASE_FOLDING = 0x4006; 936 937 /** 938 * String property Simple_Lowercase_Mapping. 939 * Corresponds to UCharacter.toLowerCase(int). 940 * @stable ICU 2.4 941 */ 942 public static final int SIMPLE_LOWERCASE_MAPPING = 0x4007; 943 944 /** 945 * String property Simple_Titlecase_Mapping. 946 * Corresponds to UCharacter.toTitleCase(int). 947 * @stable ICU 2.4 948 */ 949 public static final int SIMPLE_TITLECASE_MAPPING = 0x4008; 950 951 /** 952 * String property Simple_Uppercase_Mapping. 953 * Corresponds to UCharacter.toUpperCase(int). 954 * @stable ICU 2.4 955 */ 956 public static final int SIMPLE_UPPERCASE_MAPPING = 0x4009; 957 958 /** 959 * String property Titlecase_Mapping. 960 * Corresponds to UCharacter.toTitleCase(String). 961 * @stable ICU 2.4 962 */ 963 public static final int TITLECASE_MAPPING = 0x400A; 964 965 /** 966 * String property Unicode_1_Name. 967 * This property is of little practical value. 968 * Beginning with ICU 49, ICU APIs return null or an empty string for this property. 969 * Corresponds to UCharacter.getName1_0(int). 970 * @deprecated ICU 49 971 */ 972 @Deprecated 973 public static final int UNICODE_1_NAME = 0x400B; 974 975 /** 976 * String property Uppercase_Mapping. 977 * Corresponds to UCharacter.toUpperCase(String). 978 * @stable ICU 2.4 979 */ 980 public static final int UPPERCASE_MAPPING = 0x400C; 981 982 /** 983 * String property Bidi_Paired_Bracket (new in Unicode 6.3). 984 * Corresponds to UCharacter.getBidiPairedBracket. 985 * @stable ICU 52 986 */ 987 public static final int BIDI_PAIRED_BRACKET = 0x400D; 988 989 /** 990 * One more than the last constant for string Unicode properties. 991 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 992 */ 993 @Deprecated 994 public static final int STRING_LIMIT = 0x400E; 995 996 /** 997 * Miscellaneous property Script_Extensions (new in Unicode 6.0). 998 * Some characters are commonly used in multiple scripts. 999 * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. 1000 * Corresponds to UScript.hasScript and UScript.getScriptExtensions. 1001 * @stable ICU 4.6 1002 */ 1003 public static final int SCRIPT_EXTENSIONS=0x7000; 1004 /** 1005 * First constant for Unicode properties with unusual value types. 1006 * @stable ICU 4.6 1007 */ 1008 public static final int OTHER_PROPERTY_START=SCRIPT_EXTENSIONS; 1009 /** 1010 * One more than the last constant for Unicode properties with unusual value types. 1011 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 1012 */ 1013 @Deprecated 1014 public static final int OTHER_PROPERTY_LIMIT=0x7001; 1015 1016 /** 1017 * Selector constants for UCharacter.getPropertyName() and 1018 * UCharacter.getPropertyValueName(). These selectors are used to 1019 * choose which name is returned for a given property or value. 1020 * All properties and values have a long name. Most have a short 1021 * name, but some do not. Unicode allows for additional names, 1022 * beyond the long and short name, which would be indicated by 1023 * LONG + i, where i=1, 2,... 1024 * 1025 * @see UCharacter#getPropertyName 1026 * @see UCharacter#getPropertyValueName 1027 * @stable ICU 2.4 1028 */ 1029 public interface NameChoice { 1030 /** 1031 * Selector for the abbreviated name of a property or value. 1032 * Most properties and values have a short name; those that do 1033 * not return null. 1034 * @stable ICU 2.4 1035 */ 1036 static final int SHORT = 0; 1037 1038 /** 1039 * Selector for the long name of a property or value. All 1040 * properties and values have a long name. 1041 * @stable ICU 2.4 1042 */ 1043 static final int LONG = 1; 1044 1045 /** 1046 * The number of predefined property name choices. Individual 1047 * properties or values may have more than COUNT aliases. 1048 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 1049 */ 1050 @Deprecated 1051 static final int COUNT = 2; 1052 } 1053 } 1054