1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /** 4 ******************************************************************************* 5 * Copyright (C) 1996-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.lang; 11 12 /** 13 * <p>Selection constants for Unicode properties. 14 * <p>These constants are used in functions like 15 * UCharacter.hasBinaryProperty(int) to select one of the Unicode properties. 16 * 17 * <p>The properties APIs are intended to reflect Unicode properties as 18 * defined in the Unicode Character Database (UCD) and Unicode Technical 19 * Reports (UTR). 20 * <p>For details about the properties see 21 * <a href="http://www.unicode.org/reports/tr44/">UAX #44: Unicode Character Database</a>. 22 * 23 * <p>Important: If ICU is built with UCD files from Unicode versions below 24 * 3.2, then properties marked with "new" are not or not fully 25 * available. Check UCharacter.getUnicodeVersion() to be sure. 26 * @author Syn Wee Quek 27 * @stable ICU 2.6 28 * @see com.ibm.icu.lang.UCharacter 29 */ 30 public interface UProperty 31 { 32 // public data member -------------------------------------------------- 33 34 /** 35 * Special value indicating undefined property. 36 * @internal 37 * @deprecated This API is ICU internal only. 38 */ 39 @Deprecated 40 public static final int UNDEFINED = -1; 41 42 /** 43 * <p>Binary property Alphabetic. 44 * <p>Property for UCharacter.isUAlphabetic(), different from the property 45 * in UCharacter.isalpha(). 46 * <p>Lu + Ll + Lt + Lm + Lo + Nl + Other_Alphabetic. 47 * @stable ICU 2.6 48 */ 49 public static final int ALPHABETIC = 0; 50 51 /** 52 * First constant for binary Unicode properties. 53 * @stable ICU 2.6 54 */ 55 public static final int BINARY_START = ALPHABETIC; 56 57 /** 58 * Binary property ASCII_Hex_Digit (0-9 A-F a-f). 59 * @stable ICU 2.6 60 */ 61 public static final int ASCII_HEX_DIGIT = 1; 62 63 /** 64 * <p>Binary property Bidi_Control. 65 * <p>Format controls which have specific functions in the Bidi Algorithm. 66 * 67 * @stable ICU 2.6 68 */ 69 public static final int BIDI_CONTROL = 2; 70 71 /** 72 * <p>Binary property Bidi_Mirrored. 73 * <p>Characters that may change display in RTL text. 74 * <p>Property for UCharacter.isMirrored(). 75 * <p>See Bidi Algorithm; UTR 9. 76 * @stable ICU 2.6 77 */ 78 public static final int BIDI_MIRRORED = 3; 79 80 /** 81 * <p>Binary property Dash. 82 * <p>Variations of dashes. 83 * @stable ICU 2.6 84 */ 85 public static final int DASH = 4; 86 87 /** 88 * <p>Binary property Default_Ignorable_Code_Point (new). 89 * 90 * <p>Property that indicates codepoint is ignorable in most processing. 91 * 92 * <p>Codepoints (2060..206F, FFF0..FFFB, E0000..E0FFF) + 93 * Other_Default_Ignorable_Code_Point + (Cf + Cc + Cs - White_Space) 94 * @stable ICU 2.6 95 */ 96 public static final int DEFAULT_IGNORABLE_CODE_POINT = 5; 97 98 /** 99 * <p>Binary property Deprecated (new). 100 * <p>The usage of deprecated characters is strongly discouraged. 101 * @stable ICU 2.6 102 */ 103 public static final int DEPRECATED = 6; 104 105 /** 106 * <p>Binary property Diacritic. 107 * <p>Characters that linguistically modify the meaning of another 108 * character to which they apply. 109 * @stable ICU 2.6 110 */ 111 public static final int DIACRITIC = 7; 112 113 /** 114 * <p>Binary property Extender. 115 * <p>Extend the value or shape of a preceding alphabetic character, e.g. 116 * length and iteration marks. 117 * @stable ICU 2.6 118 */ 119 public static final int EXTENDER = 8; 120 121 /** 122 * <p>Binary property Full_Composition_Exclusion. 123 * <p>CompositionExclusions.txt + Singleton Decompositions + 124 * Non-Starter Decompositions. 125 * @stable ICU 2.6 126 */ 127 public static final int FULL_COMPOSITION_EXCLUSION = 9; 128 129 /** 130 * <p>Binary property Grapheme_Base (new). 131 * <p>For programmatic determination of grapheme cluster boundaries. 132 * [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ 133 * @stable ICU 2.6 134 */ 135 public static final int GRAPHEME_BASE = 10; 136 137 /** 138 * <p>Binary property Grapheme_Extend (new). 139 * <p>For programmatic determination of grapheme cluster boundaries. 140 * <p>Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ 141 * @stable ICU 2.6 142 */ 143 public static final int GRAPHEME_EXTEND = 11; 144 145 /** 146 * <p>Binary property Grapheme_Link (new). 147 * <p>For programmatic determination of grapheme cluster boundaries. 148 * @stable ICU 2.6 149 */ 150 public static final int GRAPHEME_LINK = 12; 151 152 /** 153 * <p>Binary property Hex_Digit. 154 * <p>Characters commonly used for hexadecimal numbers. 155 * @stable ICU 2.6 156 */ 157 public static final int HEX_DIGIT = 13; 158 159 /** 160 * <p>Binary property Hyphen. 161 * <p>Dashes used to mark connections between pieces of words, plus the 162 * Katakana middle dot. 163 * @stable ICU 2.6 164 */ 165 public static final int HYPHEN = 14; 166 167 /** 168 * <p>Binary property ID_Continue. 169 * <p>Characters that can continue an identifier. 170 * <p>ID_Start+Mn+Mc+Nd+Pc 171 * @stable ICU 2.6 172 */ 173 public static final int ID_CONTINUE = 15; 174 175 /** 176 * <p>Binary property ID_Start. 177 * <p>Characters that can start an identifier. 178 * <p>Lu+Ll+Lt+Lm+Lo+Nl 179 * @stable ICU 2.6 180 */ 181 public static final int ID_START = 16; 182 183 /** 184 * <p>Binary property Ideographic. 185 * <p>CJKV ideographs. 186 * @stable ICU 2.6 187 */ 188 public static final int IDEOGRAPHIC = 17; 189 190 /** 191 * <p>Binary property IDS_Binary_Operator (new). 192 * <p>For programmatic determination of Ideographic Description Sequences. 193 * 194 * @stable ICU 2.6 195 */ 196 public static final int IDS_BINARY_OPERATOR = 18; 197 198 /** 199 * <p>Binary property IDS_Trinary_Operator (new). 200 * <p>For programmatic determination of Ideographic Description 201 * Sequences. 202 * @stable ICU 2.6 203 */ 204 public static final int IDS_TRINARY_OPERATOR = 19; 205 206 /** 207 * <p>Binary property Join_Control. 208 * <p>Format controls for cursive joining and ligation. 209 * @stable ICU 2.6 210 */ 211 public static final int JOIN_CONTROL = 20; 212 213 /** 214 * <p>Binary property Logical_Order_Exception (new). 215 * <p>Characters that do not use logical order and require special 216 * handling in most processing. 217 * @stable ICU 2.6 218 */ 219 public static final int LOGICAL_ORDER_EXCEPTION = 21; 220 221 /** 222 * <p>Binary property Lowercase. 223 * <p>Same as UCharacter.isULowercase(), different from 224 * UCharacter.islower(). 225 * <p>Ll+Other_Lowercase 226 * @stable ICU 2.6 227 */ 228 public static final int LOWERCASE = 22; 229 230 /** <p>Binary property Math. 231 * <p>Sm+Other_Math 232 * @stable ICU 2.6 233 */ 234 public static final int MATH = 23; 235 236 /** 237 * <p>Binary property Noncharacter_Code_Point. 238 * <p>Code points that are explicitly defined as illegal for the encoding 239 * of characters. 240 * @stable ICU 2.6 241 */ 242 public static final int NONCHARACTER_CODE_POINT = 24; 243 244 /** 245 * <p>Binary property Quotation_Mark. 246 * @stable ICU 2.6 247 */ 248 public static final int QUOTATION_MARK = 25; 249 250 /** 251 * <p>Binary property Radical (new). 252 * <p>For programmatic determination of Ideographic Description 253 * Sequences. 254 * @stable ICU 2.6 255 */ 256 public static final int RADICAL = 26; 257 258 /** 259 * <p>Binary property Soft_Dotted (new). 260 * <p>Characters with a "soft dot", like i or j. 261 * <p>An accent placed on these characters causes the dot to disappear. 262 * @stable ICU 2.6 263 */ 264 public static final int SOFT_DOTTED = 27; 265 266 /** 267 * <p>Binary property Terminal_Punctuation. 268 * <p>Punctuation characters that generally mark the end of textual 269 * units. 270 * @stable ICU 2.6 271 */ 272 public static final int TERMINAL_PUNCTUATION = 28; 273 274 /** 275 * <p>Binary property Unified_Ideograph (new). 276 * <p>For programmatic determination of Ideographic Description 277 * Sequences. 278 * @stable ICU 2.6 279 */ 280 public static final int UNIFIED_IDEOGRAPH = 29; 281 282 /** 283 * <p>Binary property Uppercase. 284 * <p>Same as UCharacter.isUUppercase(), different from 285 * UCharacter.isUpperCase(). 286 * <p>Lu+Other_Uppercase 287 * @stable ICU 2.6 288 */ 289 public static final int UPPERCASE = 30; 290 291 /** 292 * <p>Binary property White_Space. 293 * <p>Same as UCharacter.isUWhiteSpace(), different from 294 * UCharacter.isSpace() and UCharacter.isWhitespace(). 295 * Space characters+TAB+CR+LF-ZWSP-ZWNBSP 296 * @stable ICU 2.6 297 */ 298 public static final int WHITE_SPACE = 31; 299 300 /** 301 * <p>Binary property XID_Continue. 302 * <p>ID_Continue modified to allow closure under normalization forms 303 * NFKC and NFKD. 304 * @stable ICU 2.6 305 */ 306 public static final int XID_CONTINUE = 32; 307 308 /** 309 * <p>Binary property XID_Start. 310 * <p>ID_Start modified to allow closure under normalization forms NFKC 311 * and NFKD. 312 * @stable ICU 2.6 313 */ 314 public static final int XID_START = 33; 315 316 /** 317 * <p>Binary property Case_Sensitive. 318 * <p>Either the source of a case 319 * mapping or <i>in</i> the target of a case mapping. Not the same as 320 * the general category Cased_Letter. 321 * @stable ICU 2.6 322 */ 323 public static final int CASE_SENSITIVE = 34; 324 325 /** 326 * Binary property STerm (new in Unicode 4.0.1). 327 * Sentence Terminal. Used in UAX #29: Text Boundaries 328 * (http://www.unicode.org/reports/tr29/) 329 * @stable ICU 3.0 330 */ 331 public static final int S_TERM = 35; 332 333 /** 334 * Binary property Variation_Selector (new in Unicode 4.0.1). 335 * Indicates all those characters that qualify as Variation Selectors. 336 * For details on the behavior of these characters, 337 * see StandardizedVariants.html and 15.6 Variation Selectors. 338 * @stable ICU 3.0 339 */ 340 public static final int VARIATION_SELECTOR = 36; 341 342 /** 343 * Binary property NFD_Inert. 344 * ICU-specific property for characters that are inert under NFD, 345 * i.e., they do not interact with adjacent characters. 346 * Used for example in normalizing transforms in incremental mode 347 * to find the boundary of safely normalizable text despite possible 348 * text additions. 349 * 350 * There is one such property per normalization form. 351 * These properties are computed as follows - an inert character is: 352 * a) unassigned, or ALL of the following: 353 * b) of combining class 0. 354 * c) not decomposed by this normalization form. 355 * AND if NFC or NFKC, 356 * d) can never compose with a previous character. 357 * e) can never compose with a following character. 358 * f) can never change if another character is added. 359 * Example: a-breve might satisfy all but f, but if you 360 * add an ogonek it changes to a-ogonek + breve 361 * 362 * See also com.ibm.text.UCD.NFSkippable in the ICU4J repository, 363 * and icu/source/common/unormimp.h . 364 * @stable ICU 3.0 365 */ 366 public static final int NFD_INERT = 37; 367 368 /** 369 * Binary property NFKD_Inert. 370 * ICU-specific property for characters that are inert under NFKD, 371 * i.e., they do not interact with adjacent characters. 372 * Used for example in normalizing transforms in incremental mode 373 * to find the boundary of safely normalizable text despite possible 374 * text additions. 375 * @see #NFD_INERT 376 * @stable ICU 3.0 377 */ 378 public static final int NFKD_INERT = 38; 379 380 /** 381 * Binary property NFC_Inert. 382 * ICU-specific property for characters that are inert under NFC, 383 * i.e., they do not interact with adjacent characters. 384 * Used for example in normalizing transforms in incremental mode 385 * to find the boundary of safely normalizable text despite possible 386 * text additions. 387 * @see #NFD_INERT 388 * @stable ICU 3.0 389 */ 390 public static final int NFC_INERT = 39; 391 392 /** 393 * Binary property NFKC_Inert. 394 * ICU-specific property for characters that are inert under NFKC, 395 * i.e., they do not interact with adjacent characters. 396 * Used for example in normalizing transforms in incremental mode 397 * to find the boundary of safely normalizable text despite possible 398 * text additions. 399 * @see #NFD_INERT 400 * @stable ICU 3.0 401 */ 402 public static final int NFKC_INERT = 40; 403 404 /** 405 * Binary Property Segment_Starter. 406 * ICU-specific property for characters that are starters in terms of 407 * Unicode normalization and combining character sequences. 408 * They have ccc=0 and do not occur in non-initial position of the 409 * canonical decomposition of any character 410 * (like " in NFD(a-umlaut) and a Jamo T in an NFD(Hangul LVT)). 411 * ICU uses this property for segmenting a string for generating a set of 412 * canonically equivalent strings, e.g. for canonical closure while 413 * processing collation tailoring rules. 414 * @stable ICU 3.0 415 */ 416 public static final int SEGMENT_STARTER = 41; 417 418 /** 419 * Binary property Pattern_Syntax (new in Unicode 4.1). 420 * See UAX #31 Identifier and Pattern Syntax 421 * (http://www.unicode.org/reports/tr31/) 422 * @stable ICU 3.4 423 */ 424 public static final int PATTERN_SYNTAX = 42; 425 426 /** 427 * Binary property Pattern_White_Space (new in Unicode 4.1). 428 * See UAX #31 Identifier and Pattern Syntax 429 * (http://www.unicode.org/reports/tr31/) 430 * @stable ICU 3.4 431 */ 432 public static final int PATTERN_WHITE_SPACE = 43; 433 434 /** 435 * Binary property alnum (a C/POSIX character class). 436 * Implemented according to the UTS #18 Annex C Standard Recommendation. 437 * See the UCharacter class documentation. 438 * @stable ICU 3.4 439 */ 440 public static final int POSIX_ALNUM = 44; 441 442 /** 443 * Binary property blank (a C/POSIX character class). 444 * Implemented according to the UTS #18 Annex C Standard Recommendation. 445 * See the UCharacter class documentation. 446 * @stable ICU 3.4 447 */ 448 public static final int POSIX_BLANK = 45; 449 450 /** 451 * Binary property graph (a C/POSIX character class). 452 * Implemented according to the UTS #18 Annex C Standard Recommendation. 453 * See the UCharacter class documentation. 454 * @stable ICU 3.4 455 */ 456 public static final int POSIX_GRAPH = 46; 457 458 /** 459 * Binary property print (a C/POSIX character class). 460 * Implemented according to the UTS #18 Annex C Standard Recommendation. 461 * See the UCharacter class documentation. 462 * @stable ICU 3.4 463 */ 464 public static final int POSIX_PRINT = 47; 465 466 /** 467 * Binary property xdigit (a C/POSIX character class). 468 * Implemented according to the UTS #18 Annex C Standard Recommendation. 469 * See the UCharacter class documentation. 470 * @stable ICU 3.4 471 */ 472 public static final int POSIX_XDIGIT = 48; 473 474 /** 475 * Binary property Cased. 476 * For Lowercase, Uppercase and Titlecase characters. 477 * @stable ICU 4.4 478 */ 479 public static final int CASED=49; 480 /** 481 * Binary property Case_Ignorable. 482 * Used in context-sensitive case mappings. 483 * @stable ICU 4.4 484 */ 485 public static final int CASE_IGNORABLE=50; 486 /** 487 * Binary property Changes_When_Lowercased. 488 * @stable ICU 4.4 489 */ 490 public static final int CHANGES_WHEN_LOWERCASED=51; 491 /** 492 * Binary property Changes_When_Uppercased. 493 * @stable ICU 4.4 494 */ 495 public static final int CHANGES_WHEN_UPPERCASED=52; 496 /** 497 * Binary property Changes_When_Titlecased. 498 * @stable ICU 4.4 499 */ 500 public static final int CHANGES_WHEN_TITLECASED=53; 501 /** 502 * Binary property Changes_When_Casefolded. 503 * @stable ICU 4.4 504 */ 505 public static final int CHANGES_WHEN_CASEFOLDED=54; 506 /** 507 * Binary property Changes_When_Casemapped. 508 * @stable ICU 4.4 509 */ 510 public static final int CHANGES_WHEN_CASEMAPPED=55; 511 /** 512 * Binary property Changes_When_NFKC_Casefolded. 513 * @stable ICU 4.4 514 */ 515 public static final int CHANGES_WHEN_NFKC_CASEFOLDED=56; 516 /** 517 * Binary property Emoji. 518 * See http://www.unicode.org/reports/tr51/#Emoji_Properties 519 * 520 * @stable ICU 57 521 */ 522 public static final int EMOJI=57; 523 /** 524 * Binary property Emoji_Presentation. 525 * See http://www.unicode.org/reports/tr51/#Emoji_Properties 526 * 527 * @stable ICU 57 528 */ 529 public static final int EMOJI_PRESENTATION=58; 530 /** 531 * Binary property Emoji_Modifier. 532 * See http://www.unicode.org/reports/tr51/#Emoji_Properties 533 * 534 * @stable ICU 57 535 */ 536 public static final int EMOJI_MODIFIER=59; 537 /** 538 * Binary property Emoji_Modifier_Base. 539 * See http://www.unicode.org/reports/tr51/#Emoji_Properties 540 * 541 * @stable ICU 57 542 */ 543 public static final int EMOJI_MODIFIER_BASE=60; 544 /** 545 * Binary property Emoji_Component. 546 * See http://www.unicode.org/reports/tr51/#Emoji_Properties 547 * 548 * @stable ICU 60 549 */ 550 public static final int EMOJI_COMPONENT=61; 551 /** 552 * Binary property Regional_Indicator. 553 * 554 * @stable ICU 60 555 */ 556 public static final int REGIONAL_INDICATOR=62; 557 /** 558 * Binary property Prepended_Concatenation_Mark. 559 * 560 * @stable ICU 60 561 */ 562 public static final int PREPENDED_CONCATENATION_MARK=63; 563 /** 564 * Binary property Extended_Pictographic. 565 * See http://www.unicode.org/reports/tr51/#Emoji_Properties 566 * 567 * @stable ICU 62 568 */ 569 public static final int EXTENDED_PICTOGRAPHIC=64; 570 /** 571 * Binary property of strings Basic_Emoji. 572 * See https://www.unicode.org/reports/tr51/#Emoji_Sets 573 * 574 * @stable ICU 70 575 */ 576 public static final int BASIC_EMOJI=65; 577 /** 578 * Binary property of strings Emoji_Keycap_Sequence. 579 * See https://www.unicode.org/reports/tr51/#Emoji_Sets 580 * 581 * @stable ICU 70 582 */ 583 public static final int EMOJI_KEYCAP_SEQUENCE=66; 584 /** 585 * Binary property of strings RGI_Emoji_Modifier_Sequence. 586 * See https://www.unicode.org/reports/tr51/#Emoji_Sets 587 * 588 * @stable ICU 70 589 */ 590 public static final int RGI_EMOJI_MODIFIER_SEQUENCE=67; 591 /** 592 * Binary property of strings RGI_Emoji_Flag_Sequence. 593 * See https://www.unicode.org/reports/tr51/#Emoji_Sets 594 * 595 * @stable ICU 70 596 */ 597 public static final int RGI_EMOJI_FLAG_SEQUENCE=68; 598 /** 599 * Binary property of strings RGI_Emoji_Tag_Sequence. 600 * See https://www.unicode.org/reports/tr51/#Emoji_Sets 601 * 602 * @stable ICU 70 603 */ 604 public static final int RGI_EMOJI_TAG_SEQUENCE=69; 605 /** 606 * Binary property of strings RGI_Emoji_ZWJ_Sequence. 607 * See https://www.unicode.org/reports/tr51/#Emoji_Sets 608 * 609 * @stable ICU 70 610 */ 611 public static final int RGI_EMOJI_ZWJ_SEQUENCE=70; 612 /** 613 * Binary property of strings RGI_Emoji. 614 * See https://www.unicode.org/reports/tr51/#Emoji_Sets 615 * 616 * @stable ICU 70 617 */ 618 public static final int RGI_EMOJI=71; 619 620 /** 621 * Binary property IDS_Unary_Operator. 622 * For programmatic determination of Ideographic Description Sequences. 623 * 624 * @draft ICU 74 625 */ 626 public static final int IDS_UNARY_OPERATOR = 72; 627 628 /** 629 * Binary property ID_Compat_Math_Start. 630 * <p>Used in mathematical identifier profile in UAX #31. 631 * 632 * @draft ICU 74 633 */ 634 public static final int ID_COMPAT_MATH_START = 73; 635 636 /** 637 * Binary property ID_Compat_Math_Continue. 638 * <p>Used in mathematical identifier profile in UAX #31. 639 * 640 * @draft ICU 74 641 */ 642 public static final int ID_COMPAT_MATH_CONTINUE = 74; 643 644 /** 645 * One more than the last constant for binary Unicode properties. 646 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 647 */ 648 @Deprecated 649 public static final int BINARY_LIMIT = 75; 650 651 /** 652 * Enumerated property Bidi_Class. 653 * Same as UCharacter.getDirection(int), returns UCharacterDirection values. 654 * @stable ICU 2.4 655 */ 656 public static final int BIDI_CLASS = 0x1000; 657 658 /** 659 * First constant for enumerated/integer Unicode properties. 660 * @stable ICU 2.4 661 */ 662 public static final int INT_START = BIDI_CLASS; 663 664 /** 665 * Enumerated property Block. 666 * Same as UCharacter.UnicodeBlock.of(int), returns UCharacter.UnicodeBlock 667 * values. 668 * @stable ICU 2.4 669 */ 670 public static final int BLOCK = 0x1001; 671 672 /** 673 * Enumerated property Canonical_Combining_Class. 674 * Same as UCharacter.getCombiningClass(int), returns 8-bit numeric values. 675 * @stable ICU 2.4 676 */ 677 public static final int CANONICAL_COMBINING_CLASS = 0x1002; 678 679 /** 680 * Enumerated property Decomposition_Type. 681 * Returns UCharacter.DecompositionType values. 682 * @stable ICU 2.4 683 */ 684 public static final int DECOMPOSITION_TYPE = 0x1003; 685 686 /** 687 * Enumerated property East_Asian_Width. 688 * See http://www.unicode.org/reports/tr11/ 689 * Returns UCharacter.EastAsianWidth values. 690 * @stable ICU 2.4 691 */ 692 public static final int EAST_ASIAN_WIDTH = 0x1004; 693 694 /** 695 * Enumerated property General_Category. 696 * Same as UCharacter.getType(int), returns UCharacterCategory values. 697 * @stable ICU 2.4 698 */ 699 public static final int GENERAL_CATEGORY = 0x1005; 700 701 /** 702 * Enumerated property Joining_Group. 703 * Returns UCharacter.JoiningGroup values. 704 * @stable ICU 2.4 705 */ 706 public static final int JOINING_GROUP = 0x1006; 707 708 /** 709 * Enumerated property Joining_Type. 710 * Returns UCharacter.JoiningType values. 711 * @stable ICU 2.4 712 */ 713 public static final int JOINING_TYPE = 0x1007; 714 715 /** 716 * Enumerated property Line_Break. 717 * Returns UCharacter.LineBreak values. 718 * @stable ICU 2.4 719 */ 720 public static final int LINE_BREAK = 0x1008; 721 722 /** 723 * Enumerated property Numeric_Type. 724 * Returns UCharacter.NumericType values. 725 * @stable ICU 2.4 726 */ 727 public static final int NUMERIC_TYPE = 0x1009; 728 729 /** 730 * Enumerated property Script. 731 * Same as UScript.getScript(int), returns UScript values. 732 * @stable ICU 2.4 733 */ 734 public static final int SCRIPT = 0x100A; 735 736 /** 737 * Enumerated property Hangul_Syllable_Type, new in Unicode 4. 738 * Returns UCharacter.HangulSyllableType values. 739 * @stable ICU 2.6 740 */ 741 public static final int HANGUL_SYLLABLE_TYPE = 0x100B; 742 743 /** 744 * Enumerated property NFD_Quick_Check. 745 * Returns numeric values compatible with Normalizer.QuickCheckResult. 746 * @stable ICU 3.0 747 */ 748 public static final int NFD_QUICK_CHECK = 0x100C; 749 750 /** 751 * Enumerated property NFKD_Quick_Check. 752 * Returns numeric values compatible with Normalizer.QuickCheckResult. 753 * @stable ICU 3.0 754 */ 755 public static final int NFKD_QUICK_CHECK = 0x100D; 756 757 /** 758 * Enumerated property NFC_Quick_Check. 759 * Returns numeric values compatible with Normalizer.QuickCheckResult. 760 * @stable ICU 3.0 761 */ 762 public static final int NFC_QUICK_CHECK = 0x100E; 763 764 /** 765 * Enumerated property NFKC_Quick_Check. 766 * Returns numeric values compatible with Normalizer.QuickCheckResult. 767 * @stable ICU 3.0 768 */ 769 public static final int NFKC_QUICK_CHECK = 0x100F; 770 771 /** 772 * Enumerated property Lead_Canonical_Combining_Class. 773 * ICU-specific property for the ccc of the first code point 774 * of the decomposition, or lccc(c)=ccc(NFD(c)[0]). 775 * Useful for checking for canonically ordered text; 776 * see Normalizer.FCD and http://www.unicode.org/notes/tn5/#FCD . 777 * Returns 8-bit numeric values like CANONICAL_COMBINING_CLASS. 778 * @stable ICU 3.0 779 */ 780 public static final int LEAD_CANONICAL_COMBINING_CLASS = 0x1010; 781 782 /** 783 * Enumerated property Trail_Canonical_Combining_Class. 784 * ICU-specific property for the ccc of the last code point 785 * of the decomposition, or lccc(c)=ccc(NFD(c)[last]). 786 * Useful for checking for canonically ordered text; 787 * see Normalizer.FCD and http://www.unicode.org/notes/tn5/#FCD . 788 * Returns 8-bit numeric values like CANONICAL_COMBINING_CLASS. 789 * @stable ICU 3.0 790 */ 791 public static final int TRAIL_CANONICAL_COMBINING_CLASS = 0x1011; 792 793 /** 794 * Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1). 795 * Used in UAX #29: Text Boundaries 796 * (http://www.unicode.org/reports/tr29/) 797 * Returns UCharacter.GraphemeClusterBreak values. 798 * @stable ICU 3.4 799 */ 800 public static final int GRAPHEME_CLUSTER_BREAK = 0x1012; 801 802 /** 803 * Enumerated property Sentence_Break (new in Unicode 4.1). 804 * Used in UAX #29: Text Boundaries 805 * (http://www.unicode.org/reports/tr29/) 806 * Returns UCharacter.SentenceBreak values. 807 * @stable ICU 3.4 808 */ 809 public static final int SENTENCE_BREAK = 0x1013; 810 811 /** 812 * Enumerated property Word_Break (new in Unicode 4.1). 813 * Used in UAX #29: Text Boundaries 814 * (http://www.unicode.org/reports/tr29/) 815 * Returns UCharacter.WordBreak values. 816 * @stable ICU 3.4 817 */ 818 public static final int WORD_BREAK = 0x1014; 819 820 /** 821 * Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3). 822 * Used in UAX #9: Unicode Bidirectional Algorithm 823 * (http://www.unicode.org/reports/tr9/) 824 * Returns UCharacter.BidiPairedBracketType values. 825 * @stable ICU 52 826 */ 827 public static final int BIDI_PAIRED_BRACKET_TYPE = 0x1015; 828 829 /** 830 * Enumerated property Indic_Positional_Category. 831 * New in Unicode 6.0 as provisional property Indic_Matra_Category; 832 * renamed and changed to informative in Unicode 8.0. 833 * See http://www.unicode.org/reports/tr44/#IndicPositionalCategory.txt 834 * @stable ICU 63 835 */ 836 public static final int INDIC_POSITIONAL_CATEGORY = 0x1016; 837 838 /** 839 * Enumerated property Indic_Syllabic_Category. 840 * New in Unicode 6.0 as provisional; informative since Unicode 8.0. 841 * See http://www.unicode.org/reports/tr44/#IndicSyllabicCategory.txt 842 * @stable ICU 63 843 */ 844 public static final int INDIC_SYLLABIC_CATEGORY = 0x1017; 845 846 /** 847 * Enumerated property Vertical_Orientation. 848 * Used for UAX #50 Unicode Vertical Text Layout (https://www.unicode.org/reports/tr50/). 849 * New as a UCD property in Unicode 10.0. 850 * @stable ICU 63 851 */ 852 public static final int VERTICAL_ORIENTATION = 0x1018; 853 854 /** 855 * Enumerated property Identifier_Status. 856 * Used for UTS #39 General Security Profile for Identifiers 857 * (https://www.unicode.org/reports/tr39/#General_Security_Profile). 858 * @draft ICU 75 859 */ 860 public static final int IDENTIFIER_STATUS = 0x1019; 861 862 /** 863 * One more than the last constant for enumerated/integer Unicode properties. 864 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 865 */ 866 @Deprecated 867 public static final int INT_LIMIT = 0x101A; 868 869 /** 870 * Bitmask property General_Category_Mask. 871 * This is the General_Category property returned as a bit mask. 872 * When used in UCharacter.getIntPropertyValue(c), 873 * returns bit masks for UCharacterCategory values where exactly one bit is set. 874 * When used with UCharacter.getPropertyValueName() and UCharacter.getPropertyValueEnum(), 875 * a multi-bit mask is used for sets of categories like "Letters". 876 * @stable ICU 2.4 877 */ 878 public static final int GENERAL_CATEGORY_MASK = 0x2000; 879 880 /** 881 * First constant for bit-mask Unicode properties. 882 * @stable ICU 2.4 883 */ 884 public static final int MASK_START = GENERAL_CATEGORY_MASK; 885 886 /** 887 * One more than the last constant for bit-mask Unicode properties. 888 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 889 */ 890 @Deprecated 891 public static final int MASK_LIMIT = 0x2001; 892 893 /** 894 * Double property Numeric_Value. 895 * Corresponds to UCharacter.getUnicodeNumericValue(int). 896 * @stable ICU 2.4 897 */ 898 public static final int NUMERIC_VALUE = 0x3000; 899 900 /** 901 * First constant for double Unicode properties. 902 * @stable ICU 2.4 903 */ 904 public static final int DOUBLE_START = NUMERIC_VALUE; 905 906 /** 907 * One more than the last constant for double Unicode properties. 908 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 909 */ 910 @Deprecated 911 public static final int DOUBLE_LIMIT = 0x3001; 912 913 /** 914 * String property Age. 915 * Corresponds to UCharacter.getAge(int). 916 * @stable ICU 2.4 917 */ 918 public static final int AGE = 0x4000; 919 920 /** 921 * First constant for string Unicode properties. 922 * @stable ICU 2.4 923 */ 924 public static final int STRING_START = AGE; 925 926 /** 927 * String property Bidi_Mirroring_Glyph. 928 * Corresponds to UCharacter.getMirror(int). 929 * @stable ICU 2.4 930 */ 931 public static final int BIDI_MIRRORING_GLYPH = 0x4001; 932 933 /** 934 * String property Case_Folding. 935 * Corresponds to UCharacter.foldCase(String, boolean). 936 * @stable ICU 2.4 937 */ 938 public static final int CASE_FOLDING = 0x4002; 939 940 /** 941 * Deprecated string property ISO_Comment. 942 * Corresponds to UCharacter.getISOComment(int). 943 * @deprecated ICU 49 944 */ 945 @Deprecated 946 public static final int ISO_COMMENT = 0x4003; 947 948 /** 949 * String property Lowercase_Mapping. 950 * Corresponds to UCharacter.toLowerCase(String). 951 * @stable ICU 2.4 952 */ 953 public static final int LOWERCASE_MAPPING = 0x4004; 954 955 /** 956 * String property Name. 957 * Corresponds to UCharacter.getName(int). 958 * @stable ICU 2.4 959 */ 960 public static final int NAME = 0x4005; 961 962 /** 963 * String property Simple_Case_Folding. 964 * Corresponds to UCharacter.foldCase(int, boolean). 965 * @stable ICU 2.4 966 */ 967 public static final int SIMPLE_CASE_FOLDING = 0x4006; 968 969 /** 970 * String property Simple_Lowercase_Mapping. 971 * Corresponds to UCharacter.toLowerCase(int). 972 * @stable ICU 2.4 973 */ 974 public static final int SIMPLE_LOWERCASE_MAPPING = 0x4007; 975 976 /** 977 * String property Simple_Titlecase_Mapping. 978 * Corresponds to UCharacter.toTitleCase(int). 979 * @stable ICU 2.4 980 */ 981 public static final int SIMPLE_TITLECASE_MAPPING = 0x4008; 982 983 /** 984 * String property Simple_Uppercase_Mapping. 985 * Corresponds to UCharacter.toUpperCase(int). 986 * @stable ICU 2.4 987 */ 988 public static final int SIMPLE_UPPERCASE_MAPPING = 0x4009; 989 990 /** 991 * String property Titlecase_Mapping. 992 * Corresponds to UCharacter.toTitleCase(String). 993 * @stable ICU 2.4 994 */ 995 public static final int TITLECASE_MAPPING = 0x400A; 996 997 /** 998 * String property Unicode_1_Name. 999 * This property is of little practical value. 1000 * Beginning with ICU 49, ICU APIs return null or an empty string for this property. 1001 * Corresponds to UCharacter.getName1_0(int). 1002 * @deprecated ICU 49 1003 */ 1004 @Deprecated 1005 public static final int UNICODE_1_NAME = 0x400B; 1006 1007 /** 1008 * String property Uppercase_Mapping. 1009 * Corresponds to UCharacter.toUpperCase(String). 1010 * @stable ICU 2.4 1011 */ 1012 public static final int UPPERCASE_MAPPING = 0x400C; 1013 1014 /** 1015 * String property Bidi_Paired_Bracket (new in Unicode 6.3). 1016 * Corresponds to UCharacter.getBidiPairedBracket. 1017 * @stable ICU 52 1018 */ 1019 public static final int BIDI_PAIRED_BRACKET = 0x400D; 1020 1021 /** 1022 * One more than the last constant for string Unicode properties. 1023 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 1024 */ 1025 @Deprecated 1026 public static final int STRING_LIMIT = 0x400E; 1027 1028 /** 1029 * Miscellaneous property Script_Extensions (new in Unicode 6.0). 1030 * Some characters are commonly used in multiple scripts. 1031 * For more information, see UAX #24: http://www.unicode.org/reports/tr24/. 1032 * Corresponds to UScript.hasScript and UScript.getScriptExtensions. 1033 * @stable ICU 4.6 1034 */ 1035 public static final int SCRIPT_EXTENSIONS=0x7000; 1036 /** 1037 * First constant for Unicode properties with unusual value types. 1038 * @stable ICU 4.6 1039 */ 1040 public static final int OTHER_PROPERTY_START=SCRIPT_EXTENSIONS; 1041 /** 1042 * Miscellaneous property Identifier_Type. 1043 * Used for UTS #39 General Security Profile for Identifiers 1044 * (https://www.unicode.org/reports/tr39/#General_Security_Profile). 1045 * 1046 * <p>Corresponds to {@link UCharacter#hasIdentifierType(int, UCharacter.IdentifierType)} and 1047 * {@link UCharacter#getIdentifierTypes(int, java.util.EnumSet)}. 1048 * 1049 * <p>Each code point maps to a <i>set</i> of IdentifierType values. 1050 * 1051 * @see UCharacter#hasIdentifierType(int, UCharacter.IdentifierType) 1052 * @see UCharacter#getIdentifierTypes(int, java.util.EnumSet) 1053 * @draft ICU 75 1054 */ 1055 public static final int IDENTIFIER_TYPE = 0x7001; 1056 /** 1057 * One more than the last constant for Unicode properties with unusual value types. 1058 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 1059 */ 1060 @Deprecated 1061 public static final int OTHER_PROPERTY_LIMIT = 0x7002; 1062 1063 /** 1064 * Selector constants for UCharacter.getPropertyName() and 1065 * UCharacter.getPropertyValueName(). These selectors are used to 1066 * choose which name is returned for a given property or value. 1067 * All properties and values have a long name. Most have a short 1068 * name, but some do not. Unicode allows for additional names, 1069 * beyond the long and short name, which would be indicated by 1070 * LONG + i, where i=1, 2,... 1071 * 1072 * @see UCharacter#getPropertyName 1073 * @see UCharacter#getPropertyValueName 1074 * @stable ICU 2.4 1075 */ 1076 public interface NameChoice { 1077 /** 1078 * Selector for the abbreviated name of a property or value. 1079 * Most properties and values have a short name; those that do 1080 * not return null. 1081 * @stable ICU 2.4 1082 */ 1083 static final int SHORT = 0; 1084 1085 /** 1086 * Selector for the long name of a property or value. All 1087 * properties and values have a long name. 1088 * @stable ICU 2.4 1089 */ 1090 static final int LONG = 1; 1091 1092 /** 1093 * The number of predefined property name choices. Individual 1094 * properties or values may have more than COUNT aliases. 1095 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 1096 */ 1097 @Deprecated 1098 static final int COUNT = 2; 1099 } 1100 } 1101