1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /** 5 ******************************************************************************* 6 * Copyright (C) 1996-2016, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 11 package android.icu.lang; 12 13 import java.lang.ref.SoftReference; 14 import java.util.HashMap; 15 import java.util.Iterator; 16 import java.util.Locale; 17 import java.util.Map; 18 19 import android.icu.impl.CaseMapImpl; 20 import android.icu.impl.IllegalIcuArgumentException; 21 import android.icu.impl.Trie2; 22 import android.icu.impl.UBiDiProps; 23 import android.icu.impl.UCaseProps; 24 import android.icu.impl.UCharacterName; 25 import android.icu.impl.UCharacterNameChoice; 26 import android.icu.impl.UCharacterProperty; 27 import android.icu.impl.UCharacterUtility; 28 import android.icu.impl.UPropertyAliases; 29 import android.icu.lang.UCharacterEnums.ECharacterCategory; 30 import android.icu.lang.UCharacterEnums.ECharacterDirection; 31 import android.icu.text.BreakIterator; 32 import android.icu.text.Edits; 33 import android.icu.text.Normalizer2; 34 import android.icu.util.RangeValueIterator; 35 import android.icu.util.ULocale; 36 import android.icu.util.ValueIterator; 37 import android.icu.util.VersionInfo; 38 39 /** 40 * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character}. Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'. 41 * 42 * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class. 43 * These extensions provide support for more Unicode properties. 44 * Each ICU release supports the latest version of Unicode available at that time. 45 * 46 * <p>For some time before Java 5 added support for supplementary Unicode code points, 47 * The ICU UCharacter class and many other ICU classes already supported them. 48 * Some UCharacter methods and constants were widened slightly differently than 49 * how the Character class methods and constants were widened later. 50 * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF, 51 * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF. 52 * 53 * <p>Code points are represented in these API using ints. While it would be 54 * more convenient in Java to have a separate primitive datatype for them, 55 * ints suffice in the meantime. 56 * 57 * <p>Aside from the additions for UTF-16 support, and the updated Unicode 58 * properties, the main differences between UCharacter and Character are: 59 * <ul> 60 * <li> UCharacter is not designed to be a char wrapper and does not have 61 * APIs to which involves management of that single char.<br> 62 * These include: 63 * <ul> 64 * <li> char charValue(), 65 * <li> int compareTo(java.lang.Character, java.lang.Character), etc. 66 * </ul> 67 * <li> UCharacter does not include Character APIs that are deprecated, nor 68 * does it include the Java-specific character information, such as 69 * boolean isJavaIdentifierPart(char ch). 70 * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric 71 * values '10' - '35'. UCharacter also does this in digit and 72 * getNumericValue, to adhere to the java semantics of these 73 * methods. New methods unicodeDigit, and 74 * getUnicodeNumericValue do not treat the above code points 75 * as having numeric values. This is a semantic change from ICU4J 1.3.1. 76 * </ul> 77 * <p> 78 * In addition to Java compatibility functions, which calculate derived properties, 79 * this API provides low-level access to the Unicode Character Database. 80 * <p> 81 * Unicode assigns each code point (not just assigned character) values for 82 * many properties. 83 * Most of them are simple boolean flags, or constants from a small enumerated list. 84 * For some properties, values are strings or other relatively more complex types. 85 * <p> 86 * For more information see 87 * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a> 88 * (http://www.unicode.org/ucd/) 89 * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU 90 * User Guide chapter on Properties</a> 91 * (http://www.icu-project.org/userguide/properties.html). 92 * <p> 93 * There are also functions that provide easy migration from C/POSIX functions 94 * like isblank(). Their use is generally discouraged because the C/POSIX 95 * standards do not define their semantics beyond the ASCII range, which means 96 * that different implementations exhibit very different behavior. 97 * Instead, Unicode properties should be used directly. 98 * <p> 99 * There are also only a few, broad C/POSIX character classes, and they tend 100 * to be used for conflicting purposes. For example, the "isalpha()" class 101 * is sometimes used to determine word boundaries, while a more sophisticated 102 * approach would at least distinguish initial letters from continuation 103 * characters (the latter including combining marks). 104 * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) 105 * Another example: There is no "istitle()" class for titlecase characters. 106 * <p> 107 * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. 108 * ICU implements them according to the Standard Recommendations in 109 * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions 110 * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). 111 * <p> 112 * API access for C/POSIX character classes is as follows: 113 * <pre>{@code 114 * - alpha: isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC) 115 * - lower: isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE) 116 * - upper: isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE) 117 * - punct: ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)| 118 * (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)| 119 * (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0 120 * - digit: isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER 121 * - xdigit: hasBinaryProperty(c, UProperty.POSIX_XDIGIT) 122 * - alnum: hasBinaryProperty(c, UProperty.POSIX_ALNUM) 123 * - space: isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE) 124 * - blank: hasBinaryProperty(c, UProperty.POSIX_BLANK) 125 * - cntrl: getType(c)==CONTROL 126 * - graph: hasBinaryProperty(c, UProperty.POSIX_GRAPH) 127 * - print: hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre> 128 * <p> 129 * The C/POSIX character classes are also available in UnicodeSet patterns, 130 * using patterns like [:graph:] or \p{graph}. 131 * 132 * <p><strong>[icu] Note:</strong> There are several ICU (and Java) whitespace functions. 133 * Comparison:<ul> 134 * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; 135 * most of general categories "Z" (separators) + most whitespace ISO controls 136 * (including no-break spaces, but excluding IS1..IS4 and ZWSP) 137 * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces 138 * <li> isSpaceChar: just Z (including no-break spaces)</ul> 139 * 140 * <p> 141 * This class is not subclassable. 142 * 143 * @author Syn Wee Quek 144 * @see android.icu.lang.UCharacterEnums 145 */ 146 147 public final class UCharacter implements ECharacterCategory, ECharacterDirection 148 { 149 // public inner classes ---------------------------------------------- 150 151 /** 152 * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character.UnicodeBlock}. Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'. 153 * 154 * A family of character subsets representing the character blocks in the 155 * Unicode specification, generated from Unicode Data file Blocks.txt. 156 * Character blocks generally define characters used for a specific script 157 * or purpose. A character is contained by at most one Unicode block. 158 * 159 * <strong>[icu] Note:</strong> All fields named XXX_ID are specific to ICU. 160 */ 161 public static final class UnicodeBlock extends Character.Subset 162 { 163 // block id corresponding to icu4c ----------------------------------- 164 165 /** 166 */ 167 public static final int INVALID_CODE_ID = -1; 168 /** 169 */ 170 public static final int BASIC_LATIN_ID = 1; 171 /** 172 */ 173 public static final int LATIN_1_SUPPLEMENT_ID = 2; 174 /** 175 */ 176 public static final int LATIN_EXTENDED_A_ID = 3; 177 /** 178 */ 179 public static final int LATIN_EXTENDED_B_ID = 4; 180 /** 181 */ 182 public static final int IPA_EXTENSIONS_ID = 5; 183 /** 184 */ 185 public static final int SPACING_MODIFIER_LETTERS_ID = 6; 186 /** 187 */ 188 public static final int COMBINING_DIACRITICAL_MARKS_ID = 7; 189 /** 190 * Unicode 3.2 renames this block to "Greek and Coptic". 191 */ 192 public static final int GREEK_ID = 8; 193 /** 194 */ 195 public static final int CYRILLIC_ID = 9; 196 /** 197 */ 198 public static final int ARMENIAN_ID = 10; 199 /** 200 */ 201 public static final int HEBREW_ID = 11; 202 /** 203 */ 204 public static final int ARABIC_ID = 12; 205 /** 206 */ 207 public static final int SYRIAC_ID = 13; 208 /** 209 */ 210 public static final int THAANA_ID = 14; 211 /** 212 */ 213 public static final int DEVANAGARI_ID = 15; 214 /** 215 */ 216 public static final int BENGALI_ID = 16; 217 /** 218 */ 219 public static final int GURMUKHI_ID = 17; 220 /** 221 */ 222 public static final int GUJARATI_ID = 18; 223 /** 224 */ 225 public static final int ORIYA_ID = 19; 226 /** 227 */ 228 public static final int TAMIL_ID = 20; 229 /** 230 */ 231 public static final int TELUGU_ID = 21; 232 /** 233 */ 234 public static final int KANNADA_ID = 22; 235 /** 236 */ 237 public static final int MALAYALAM_ID = 23; 238 /** 239 */ 240 public static final int SINHALA_ID = 24; 241 /** 242 */ 243 public static final int THAI_ID = 25; 244 /** 245 */ 246 public static final int LAO_ID = 26; 247 /** 248 */ 249 public static final int TIBETAN_ID = 27; 250 /** 251 */ 252 public static final int MYANMAR_ID = 28; 253 /** 254 */ 255 public static final int GEORGIAN_ID = 29; 256 /** 257 */ 258 public static final int HANGUL_JAMO_ID = 30; 259 /** 260 */ 261 public static final int ETHIOPIC_ID = 31; 262 /** 263 */ 264 public static final int CHEROKEE_ID = 32; 265 /** 266 */ 267 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33; 268 /** 269 */ 270 public static final int OGHAM_ID = 34; 271 /** 272 */ 273 public static final int RUNIC_ID = 35; 274 /** 275 */ 276 public static final int KHMER_ID = 36; 277 /** 278 */ 279 public static final int MONGOLIAN_ID = 37; 280 /** 281 */ 282 public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38; 283 /** 284 */ 285 public static final int GREEK_EXTENDED_ID = 39; 286 /** 287 */ 288 public static final int GENERAL_PUNCTUATION_ID = 40; 289 /** 290 */ 291 public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41; 292 /** 293 */ 294 public static final int CURRENCY_SYMBOLS_ID = 42; 295 /** 296 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 297 * Symbols". 298 */ 299 public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43; 300 /** 301 */ 302 public static final int LETTERLIKE_SYMBOLS_ID = 44; 303 /** 304 */ 305 public static final int NUMBER_FORMS_ID = 45; 306 /** 307 */ 308 public static final int ARROWS_ID = 46; 309 /** 310 */ 311 public static final int MATHEMATICAL_OPERATORS_ID = 47; 312 /** 313 */ 314 public static final int MISCELLANEOUS_TECHNICAL_ID = 48; 315 /** 316 */ 317 public static final int CONTROL_PICTURES_ID = 49; 318 /** 319 */ 320 public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50; 321 /** 322 */ 323 public static final int ENCLOSED_ALPHANUMERICS_ID = 51; 324 /** 325 */ 326 public static final int BOX_DRAWING_ID = 52; 327 /** 328 */ 329 public static final int BLOCK_ELEMENTS_ID = 53; 330 /** 331 */ 332 public static final int GEOMETRIC_SHAPES_ID = 54; 333 /** 334 */ 335 public static final int MISCELLANEOUS_SYMBOLS_ID = 55; 336 /** 337 */ 338 public static final int DINGBATS_ID = 56; 339 /** 340 */ 341 public static final int BRAILLE_PATTERNS_ID = 57; 342 /** 343 */ 344 public static final int CJK_RADICALS_SUPPLEMENT_ID = 58; 345 /** 346 */ 347 public static final int KANGXI_RADICALS_ID = 59; 348 /** 349 */ 350 public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60; 351 /** 352 */ 353 public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61; 354 /** 355 */ 356 public static final int HIRAGANA_ID = 62; 357 /** 358 */ 359 public static final int KATAKANA_ID = 63; 360 /** 361 */ 362 public static final int BOPOMOFO_ID = 64; 363 /** 364 */ 365 public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65; 366 /** 367 */ 368 public static final int KANBUN_ID = 66; 369 /** 370 */ 371 public static final int BOPOMOFO_EXTENDED_ID = 67; 372 /** 373 */ 374 public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68; 375 /** 376 */ 377 public static final int CJK_COMPATIBILITY_ID = 69; 378 /** 379 */ 380 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70; 381 /** 382 */ 383 public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71; 384 /** 385 */ 386 public static final int YI_SYLLABLES_ID = 72; 387 /** 388 */ 389 public static final int YI_RADICALS_ID = 73; 390 /** 391 */ 392 public static final int HANGUL_SYLLABLES_ID = 74; 393 /** 394 */ 395 public static final int HIGH_SURROGATES_ID = 75; 396 /** 397 */ 398 public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76; 399 /** 400 */ 401 public static final int LOW_SURROGATES_ID = 77; 402 /** 403 * Same as public static final int PRIVATE_USE. 404 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 405 * and multiple code point ranges had this block. 406 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 407 * and adds separate blocks for the supplementary PUAs. 408 */ 409 public static final int PRIVATE_USE_AREA_ID = 78; 410 /** 411 * Same as public static final int PRIVATE_USE_AREA. 412 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 413 * and multiple code point ranges had this block. 414 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 415 * and adds separate blocks for the supplementary PUAs. 416 */ 417 public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID; 418 /** 419 */ 420 public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79; 421 /** 422 */ 423 public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80; 424 /** 425 */ 426 public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81; 427 /** 428 */ 429 public static final int COMBINING_HALF_MARKS_ID = 82; 430 /** 431 */ 432 public static final int CJK_COMPATIBILITY_FORMS_ID = 83; 433 /** 434 */ 435 public static final int SMALL_FORM_VARIANTS_ID = 84; 436 /** 437 */ 438 public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85; 439 /** 440 */ 441 public static final int SPECIALS_ID = 86; 442 /** 443 */ 444 public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87; 445 /** 446 */ 447 public static final int OLD_ITALIC_ID = 88; 448 /** 449 */ 450 public static final int GOTHIC_ID = 89; 451 /** 452 */ 453 public static final int DESERET_ID = 90; 454 /** 455 */ 456 public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91; 457 /** 458 */ 459 public static final int MUSICAL_SYMBOLS_ID = 92; 460 /** 461 */ 462 public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93; 463 /** 464 */ 465 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94; 466 /** 467 */ 468 public static final int 469 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95; 470 /** 471 */ 472 public static final int TAGS_ID = 96; 473 474 // New blocks in Unicode 3.2 475 476 /** 477 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 478 */ 479 public static final int CYRILLIC_SUPPLEMENTARY_ID = 97; 480 /** 481 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 482 */ 483 484 public static final int CYRILLIC_SUPPLEMENT_ID = 97; 485 /** 486 */ 487 public static final int TAGALOG_ID = 98; 488 /** 489 */ 490 public static final int HANUNOO_ID = 99; 491 /** 492 */ 493 public static final int BUHID_ID = 100; 494 /** 495 */ 496 public static final int TAGBANWA_ID = 101; 497 /** 498 */ 499 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102; 500 /** 501 */ 502 public static final int SUPPLEMENTAL_ARROWS_A_ID = 103; 503 /** 504 */ 505 public static final int SUPPLEMENTAL_ARROWS_B_ID = 104; 506 /** 507 */ 508 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105; 509 /** 510 */ 511 public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106; 512 /** 513 */ 514 public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107; 515 /** 516 */ 517 public static final int VARIATION_SELECTORS_ID = 108; 518 /** 519 */ 520 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109; 521 /** 522 */ 523 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110; 524 525 /** 526 */ 527 public static final int LIMBU_ID = 111; /*[1900]*/ 528 /** 529 */ 530 public static final int TAI_LE_ID = 112; /*[1950]*/ 531 /** 532 */ 533 public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/ 534 /** 535 */ 536 public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/ 537 /** 538 */ 539 public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/ 540 /** 541 */ 542 public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/ 543 /** 544 */ 545 public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/ 546 /** 547 */ 548 public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/ 549 /** 550 */ 551 public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/ 552 /** 553 */ 554 public static final int UGARITIC_ID = 120; /*[10380]*/ 555 /** 556 */ 557 public static final int SHAVIAN_ID = 121; /*[10450]*/ 558 /** 559 */ 560 public static final int OSMANYA_ID = 122; /*[10480]*/ 561 /** 562 */ 563 public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/ 564 /** 565 */ 566 public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/ 567 /** 568 */ 569 public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/ 570 571 /* New blocks in Unicode 4.1 */ 572 573 /** 574 */ 575 public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/ 576 577 /** 578 */ 579 public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/ 580 581 /** 582 */ 583 public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/ 584 585 /** 586 */ 587 public static final int BUGINESE_ID = 129; /*[1A00]*/ 588 589 /** 590 */ 591 public static final int CJK_STROKES_ID = 130; /*[31C0]*/ 592 593 /** 594 */ 595 public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/ 596 597 /** 598 */ 599 public static final int COPTIC_ID = 132; /*[2C80]*/ 600 601 /** 602 */ 603 public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/ 604 605 /** 606 */ 607 public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/ 608 609 /** 610 */ 611 public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/ 612 613 /** 614 */ 615 public static final int GLAGOLITIC_ID = 136; /*[2C00]*/ 616 617 /** 618 */ 619 public static final int KHAROSHTHI_ID = 137; /*[10A00]*/ 620 621 /** 622 */ 623 public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/ 624 625 /** 626 */ 627 public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/ 628 629 /** 630 */ 631 public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/ 632 633 /** 634 */ 635 public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/ 636 637 /** 638 */ 639 public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/ 640 641 /** 642 */ 643 public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/ 644 645 /** 646 */ 647 public static final int TIFINAGH_ID = 144; /*[2D30]*/ 648 649 /** 650 */ 651 public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/ 652 653 /* New blocks in Unicode 5.0 */ 654 655 /** 656 */ 657 public static final int NKO_ID = 146; /*[07C0]*/ 658 /** 659 */ 660 public static final int BALINESE_ID = 147; /*[1B00]*/ 661 /** 662 */ 663 public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/ 664 /** 665 */ 666 public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/ 667 /** 668 */ 669 public static final int PHAGS_PA_ID = 150; /*[A840]*/ 670 /** 671 */ 672 public static final int PHOENICIAN_ID = 151; /*[10900]*/ 673 /** 674 */ 675 public static final int CUNEIFORM_ID = 152; /*[12000]*/ 676 /** 677 */ 678 public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/ 679 /** 680 */ 681 public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/ 682 683 /** 684 */ 685 public static final int SUNDANESE_ID = 155; /* [1B80] */ 686 687 /** 688 */ 689 public static final int LEPCHA_ID = 156; /* [1C00] */ 690 691 /** 692 */ 693 public static final int OL_CHIKI_ID = 157; /* [1C50] */ 694 695 /** 696 */ 697 public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */ 698 699 /** 700 */ 701 public static final int VAI_ID = 159; /* [A500] */ 702 703 /** 704 */ 705 public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */ 706 707 /** 708 */ 709 public static final int SAURASHTRA_ID = 161; /* [A880] */ 710 711 /** 712 */ 713 public static final int KAYAH_LI_ID = 162; /* [A900] */ 714 715 /** 716 */ 717 public static final int REJANG_ID = 163; /* [A930] */ 718 719 /** 720 */ 721 public static final int CHAM_ID = 164; /* [AA00] */ 722 723 /** 724 */ 725 public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */ 726 727 /** 728 */ 729 public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */ 730 731 /** 732 */ 733 public static final int LYCIAN_ID = 167; /* [10280] */ 734 735 /** 736 */ 737 public static final int CARIAN_ID = 168; /* [102A0] */ 738 739 /** 740 */ 741 public static final int LYDIAN_ID = 169; /* [10920] */ 742 743 /** 744 */ 745 public static final int MAHJONG_TILES_ID = 170; /* [1F000] */ 746 747 /** 748 */ 749 public static final int DOMINO_TILES_ID = 171; /* [1F030] */ 750 751 /* New blocks in Unicode 5.2 */ 752 753 /***/ 754 public static final int SAMARITAN_ID = 172; /*[0800]*/ 755 /***/ 756 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/ 757 /***/ 758 public static final int TAI_THAM_ID = 174; /*[1A20]*/ 759 /***/ 760 public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/ 761 /***/ 762 public static final int LISU_ID = 176; /*[A4D0]*/ 763 /***/ 764 public static final int BAMUM_ID = 177; /*[A6A0]*/ 765 /***/ 766 public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/ 767 /***/ 768 public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/ 769 /***/ 770 public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/ 771 /***/ 772 public static final int JAVANESE_ID = 181; /*[A980]*/ 773 /***/ 774 public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/ 775 /***/ 776 public static final int TAI_VIET_ID = 183; /*[AA80]*/ 777 /***/ 778 public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/ 779 /***/ 780 public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/ 781 /***/ 782 public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/ 783 /***/ 784 public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/ 785 /***/ 786 public static final int AVESTAN_ID = 188; /*[10B00]*/ 787 /***/ 788 public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/ 789 /***/ 790 public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/ 791 /***/ 792 public static final int OLD_TURKIC_ID = 191; /*[10C00]*/ 793 /***/ 794 public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/ 795 /***/ 796 public static final int KAITHI_ID = 193; /*[11080]*/ 797 /***/ 798 public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/ 799 /***/ 800 public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/ 801 /***/ 802 public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/ 803 /***/ 804 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/ 805 806 /* New blocks in Unicode 6.0 */ 807 808 /***/ 809 public static final int MANDAIC_ID = 198; /*[0840]*/ 810 /***/ 811 public static final int BATAK_ID = 199; /*[1BC0]*/ 812 /***/ 813 public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/ 814 /***/ 815 public static final int BRAHMI_ID = 201; /*[11000]*/ 816 /***/ 817 public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/ 818 /***/ 819 public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/ 820 /***/ 821 public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/ 822 /***/ 823 public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/ 824 /***/ 825 public static final int EMOTICONS_ID = 206; /*[1F600]*/ 826 /***/ 827 public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/ 828 /***/ 829 public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/ 830 /***/ 831 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/ 832 833 /* New blocks in Unicode 6.1 */ 834 835 /***/ 836 public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/ 837 /***/ 838 public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/ 839 /***/ 840 public static final int CHAKMA_ID = 212; /*[11100]*/ 841 /***/ 842 public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/ 843 /***/ 844 public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/ 845 /***/ 846 public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/ 847 /***/ 848 public static final int MIAO_ID = 216; /*[16F00]*/ 849 /***/ 850 public static final int SHARADA_ID = 217; /*[11180]*/ 851 /***/ 852 public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/ 853 /***/ 854 public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/ 855 /***/ 856 public static final int TAKRI_ID = 220; /*[11680]*/ 857 858 /* New blocks in Unicode 7.0 */ 859 860 /***/ 861 public static final int BASSA_VAH_ID = 221; /*[16AD0]*/ 862 /***/ 863 public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/ 864 /***/ 865 public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/ 866 /***/ 867 public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/ 868 /***/ 869 public static final int DUPLOYAN_ID = 225; /*[1BC00]*/ 870 /***/ 871 public static final int ELBASAN_ID = 226; /*[10500]*/ 872 /***/ 873 public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/ 874 /***/ 875 public static final int GRANTHA_ID = 228; /*[11300]*/ 876 /***/ 877 public static final int KHOJKI_ID = 229; /*[11200]*/ 878 /***/ 879 public static final int KHUDAWADI_ID = 230; /*[112B0]*/ 880 /***/ 881 public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/ 882 /***/ 883 public static final int LINEAR_A_ID = 232; /*[10600]*/ 884 /***/ 885 public static final int MAHAJANI_ID = 233; /*[11150]*/ 886 /***/ 887 public static final int MANICHAEAN_ID = 234; /*[10AC0]*/ 888 /***/ 889 public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/ 890 /***/ 891 public static final int MODI_ID = 236; /*[11600]*/ 892 /***/ 893 public static final int MRO_ID = 237; /*[16A40]*/ 894 /***/ 895 public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/ 896 /***/ 897 public static final int NABATAEAN_ID = 239; /*[10880]*/ 898 /***/ 899 public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/ 900 /***/ 901 public static final int OLD_PERMIC_ID = 241; /*[10350]*/ 902 /***/ 903 public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/ 904 /***/ 905 public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/ 906 /***/ 907 public static final int PALMYRENE_ID = 244; /*[10860]*/ 908 /***/ 909 public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/ 910 /***/ 911 public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/ 912 /***/ 913 public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/ 914 /***/ 915 public static final int SIDDHAM_ID = 248; /*[11580]*/ 916 /***/ 917 public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/ 918 /***/ 919 public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/ 920 /***/ 921 public static final int TIRHUTA_ID = 251; /*[11480]*/ 922 /***/ 923 public static final int WARANG_CITI_ID = 252; /*[118A0]*/ 924 925 /* New blocks in Unicode 8.0 */ 926 927 /***/ 928 public static final int AHOM_ID = 253; /*[11700]*/ 929 /***/ 930 public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/ 931 /***/ 932 public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/ 933 /***/ 934 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/ 935 /***/ 936 public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/ 937 /***/ 938 public static final int HATRAN_ID = 258; /*[108E0]*/ 939 /***/ 940 public static final int MULTANI_ID = 259; /*[11280]*/ 941 /***/ 942 public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/ 943 /***/ 944 public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/ 945 /***/ 946 public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/ 947 948 /* New blocks in Unicode 9.0 */ 949 950 /***/ 951 public static final int ADLAM_ID = 263; /*[1E900]*/ 952 /***/ 953 public static final int BHAIKSUKI_ID = 264; /*[11C00]*/ 954 /***/ 955 public static final int CYRILLIC_EXTENDED_C_ID = 265; /*[1C80]*/ 956 /***/ 957 public static final int GLAGOLITIC_SUPPLEMENT_ID = 266; /*[1E000]*/ 958 /***/ 959 public static final int IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID = 267; /*[16FE0]*/ 960 /***/ 961 public static final int MARCHEN_ID = 268; /*[11C70]*/ 962 /***/ 963 public static final int MONGOLIAN_SUPPLEMENT_ID = 269; /*[11660]*/ 964 /***/ 965 public static final int NEWA_ID = 270; /*[11400]*/ 966 /***/ 967 public static final int OSAGE_ID = 271; /*[104B0]*/ 968 /***/ 969 public static final int TANGUT_ID = 272; /*[17000]*/ 970 /***/ 971 public static final int TANGUT_COMPONENTS_ID = 273; /*[18800]*/ 972 973 /** 974 * One more than the highest normal UnicodeBlock value. 975 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BLOCK). 976 * 977 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 978 * @hide unsupported on Android 979 */ 980 @Deprecated 981 public static final int COUNT = 274; 982 983 // blocks objects --------------------------------------------------- 984 985 /** 986 * Array of UnicodeBlocks, for easy access in getInstance(int) 987 */ 988 private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT]; 989 990 /** 991 */ 992 public static final UnicodeBlock NO_BLOCK 993 = new UnicodeBlock("NO_BLOCK", 0); 994 995 /** 996 */ 997 public static final UnicodeBlock BASIC_LATIN 998 = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID); 999 /** 1000 */ 1001 public static final UnicodeBlock LATIN_1_SUPPLEMENT 1002 = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID); 1003 /** 1004 */ 1005 public static final UnicodeBlock LATIN_EXTENDED_A 1006 = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID); 1007 /** 1008 */ 1009 public static final UnicodeBlock LATIN_EXTENDED_B 1010 = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID); 1011 /** 1012 */ 1013 public static final UnicodeBlock IPA_EXTENSIONS 1014 = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID); 1015 /** 1016 */ 1017 public static final UnicodeBlock SPACING_MODIFIER_LETTERS 1018 = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID); 1019 /** 1020 */ 1021 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS 1022 = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID); 1023 /** 1024 * Unicode 3.2 renames this block to "Greek and Coptic". 1025 */ 1026 public static final UnicodeBlock GREEK 1027 = new UnicodeBlock("GREEK", GREEK_ID); 1028 /** 1029 */ 1030 public static final UnicodeBlock CYRILLIC 1031 = new UnicodeBlock("CYRILLIC", CYRILLIC_ID); 1032 /** 1033 */ 1034 public static final UnicodeBlock ARMENIAN 1035 = new UnicodeBlock("ARMENIAN", ARMENIAN_ID); 1036 /** 1037 */ 1038 public static final UnicodeBlock HEBREW 1039 = new UnicodeBlock("HEBREW", HEBREW_ID); 1040 /** 1041 */ 1042 public static final UnicodeBlock ARABIC 1043 = new UnicodeBlock("ARABIC", ARABIC_ID); 1044 /** 1045 */ 1046 public static final UnicodeBlock SYRIAC 1047 = new UnicodeBlock("SYRIAC", SYRIAC_ID); 1048 /** 1049 */ 1050 public static final UnicodeBlock THAANA 1051 = new UnicodeBlock("THAANA", THAANA_ID); 1052 /** 1053 */ 1054 public static final UnicodeBlock DEVANAGARI 1055 = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID); 1056 /** 1057 */ 1058 public static final UnicodeBlock BENGALI 1059 = new UnicodeBlock("BENGALI", BENGALI_ID); 1060 /** 1061 */ 1062 public static final UnicodeBlock GURMUKHI 1063 = new UnicodeBlock("GURMUKHI", GURMUKHI_ID); 1064 /** 1065 */ 1066 public static final UnicodeBlock GUJARATI 1067 = new UnicodeBlock("GUJARATI", GUJARATI_ID); 1068 /** 1069 */ 1070 public static final UnicodeBlock ORIYA 1071 = new UnicodeBlock("ORIYA", ORIYA_ID); 1072 /** 1073 */ 1074 public static final UnicodeBlock TAMIL 1075 = new UnicodeBlock("TAMIL", TAMIL_ID); 1076 /** 1077 */ 1078 public static final UnicodeBlock TELUGU 1079 = new UnicodeBlock("TELUGU", TELUGU_ID); 1080 /** 1081 */ 1082 public static final UnicodeBlock KANNADA 1083 = new UnicodeBlock("KANNADA", KANNADA_ID); 1084 /** 1085 */ 1086 public static final UnicodeBlock MALAYALAM 1087 = new UnicodeBlock("MALAYALAM", MALAYALAM_ID); 1088 /** 1089 */ 1090 public static final UnicodeBlock SINHALA 1091 = new UnicodeBlock("SINHALA", SINHALA_ID); 1092 /** 1093 */ 1094 public static final UnicodeBlock THAI 1095 = new UnicodeBlock("THAI", THAI_ID); 1096 /** 1097 */ 1098 public static final UnicodeBlock LAO 1099 = new UnicodeBlock("LAO", LAO_ID); 1100 /** 1101 */ 1102 public static final UnicodeBlock TIBETAN 1103 = new UnicodeBlock("TIBETAN", TIBETAN_ID); 1104 /** 1105 */ 1106 public static final UnicodeBlock MYANMAR 1107 = new UnicodeBlock("MYANMAR", MYANMAR_ID); 1108 /** 1109 */ 1110 public static final UnicodeBlock GEORGIAN 1111 = new UnicodeBlock("GEORGIAN", GEORGIAN_ID); 1112 /** 1113 */ 1114 public static final UnicodeBlock HANGUL_JAMO 1115 = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID); 1116 /** 1117 */ 1118 public static final UnicodeBlock ETHIOPIC 1119 = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID); 1120 /** 1121 */ 1122 public static final UnicodeBlock CHEROKEE 1123 = new UnicodeBlock("CHEROKEE", CHEROKEE_ID); 1124 /** 1125 */ 1126 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 1127 = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1128 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID); 1129 /** 1130 */ 1131 public static final UnicodeBlock OGHAM 1132 = new UnicodeBlock("OGHAM", OGHAM_ID); 1133 /** 1134 */ 1135 public static final UnicodeBlock RUNIC 1136 = new UnicodeBlock("RUNIC", RUNIC_ID); 1137 /** 1138 */ 1139 public static final UnicodeBlock KHMER 1140 = new UnicodeBlock("KHMER", KHMER_ID); 1141 /** 1142 */ 1143 public static final UnicodeBlock MONGOLIAN 1144 = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID); 1145 /** 1146 */ 1147 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL 1148 = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID); 1149 /** 1150 */ 1151 public static final UnicodeBlock GREEK_EXTENDED 1152 = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID); 1153 /** 1154 */ 1155 public static final UnicodeBlock GENERAL_PUNCTUATION 1156 = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID); 1157 /** 1158 */ 1159 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS 1160 = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID); 1161 /** 1162 */ 1163 public static final UnicodeBlock CURRENCY_SYMBOLS 1164 = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID); 1165 /** 1166 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 1167 * Symbols". 1168 */ 1169 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS 1170 = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID); 1171 /** 1172 */ 1173 public static final UnicodeBlock LETTERLIKE_SYMBOLS 1174 = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID); 1175 /** 1176 */ 1177 public static final UnicodeBlock NUMBER_FORMS 1178 = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID); 1179 /** 1180 */ 1181 public static final UnicodeBlock ARROWS 1182 = new UnicodeBlock("ARROWS", ARROWS_ID); 1183 /** 1184 */ 1185 public static final UnicodeBlock MATHEMATICAL_OPERATORS 1186 = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID); 1187 /** 1188 */ 1189 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL 1190 = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID); 1191 /** 1192 */ 1193 public static final UnicodeBlock CONTROL_PICTURES 1194 = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID); 1195 /** 1196 */ 1197 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION 1198 = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID); 1199 /** 1200 */ 1201 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS 1202 = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID); 1203 /** 1204 */ 1205 public static final UnicodeBlock BOX_DRAWING 1206 = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID); 1207 /** 1208 */ 1209 public static final UnicodeBlock BLOCK_ELEMENTS 1210 = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID); 1211 /** 1212 */ 1213 public static final UnicodeBlock GEOMETRIC_SHAPES 1214 = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID); 1215 /** 1216 */ 1217 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS 1218 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID); 1219 /** 1220 */ 1221 public static final UnicodeBlock DINGBATS 1222 = new UnicodeBlock("DINGBATS", DINGBATS_ID); 1223 /** 1224 */ 1225 public static final UnicodeBlock BRAILLE_PATTERNS 1226 = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID); 1227 /** 1228 */ 1229 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT 1230 = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID); 1231 /** 1232 */ 1233 public static final UnicodeBlock KANGXI_RADICALS 1234 = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID); 1235 /** 1236 */ 1237 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS 1238 = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1239 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID); 1240 /** 1241 */ 1242 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION 1243 = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID); 1244 /** 1245 */ 1246 public static final UnicodeBlock HIRAGANA 1247 = new UnicodeBlock("HIRAGANA", HIRAGANA_ID); 1248 /** 1249 */ 1250 public static final UnicodeBlock KATAKANA 1251 = new UnicodeBlock("KATAKANA", KATAKANA_ID); 1252 /** 1253 */ 1254 public static final UnicodeBlock BOPOMOFO 1255 = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID); 1256 /** 1257 */ 1258 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO 1259 = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID); 1260 /** 1261 */ 1262 public static final UnicodeBlock KANBUN 1263 = new UnicodeBlock("KANBUN", KANBUN_ID); 1264 /** 1265 */ 1266 public static final UnicodeBlock BOPOMOFO_EXTENDED 1267 = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID); 1268 /** 1269 */ 1270 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS 1271 = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1272 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID); 1273 /** 1274 */ 1275 public static final UnicodeBlock CJK_COMPATIBILITY 1276 = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID); 1277 /** 1278 */ 1279 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 1280 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1281 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID); 1282 /** 1283 */ 1284 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS 1285 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID); 1286 /** 1287 */ 1288 public static final UnicodeBlock YI_SYLLABLES 1289 = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID); 1290 /** 1291 */ 1292 public static final UnicodeBlock YI_RADICALS 1293 = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID); 1294 /** 1295 */ 1296 public static final UnicodeBlock HANGUL_SYLLABLES 1297 = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID); 1298 /** 1299 */ 1300 public static final UnicodeBlock HIGH_SURROGATES 1301 = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID); 1302 /** 1303 */ 1304 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES 1305 = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID); 1306 /** 1307 */ 1308 public static final UnicodeBlock LOW_SURROGATES 1309 = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID); 1310 /** 1311 * Same as public static final int PRIVATE_USE. 1312 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1313 * and multiple code point ranges had this block. 1314 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1315 * and adds separate blocks for the supplementary PUAs. 1316 */ 1317 public static final UnicodeBlock PRIVATE_USE_AREA 1318 = new UnicodeBlock("PRIVATE_USE_AREA", 78); 1319 /** 1320 * Same as public static final int PRIVATE_USE_AREA. 1321 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1322 * and multiple code point ranges had this block. 1323 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1324 * and adds separate blocks for the supplementary PUAs. 1325 */ 1326 public static final UnicodeBlock PRIVATE_USE 1327 = PRIVATE_USE_AREA; 1328 /** 1329 */ 1330 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS 1331 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID); 1332 /** 1333 */ 1334 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS 1335 = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID); 1336 /** 1337 */ 1338 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A 1339 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID); 1340 /** 1341 */ 1342 public static final UnicodeBlock COMBINING_HALF_MARKS 1343 = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID); 1344 /** 1345 */ 1346 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS 1347 = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID); 1348 /** 1349 */ 1350 public static final UnicodeBlock SMALL_FORM_VARIANTS 1351 = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID); 1352 /** 1353 */ 1354 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B 1355 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID); 1356 /** 1357 */ 1358 public static final UnicodeBlock SPECIALS 1359 = new UnicodeBlock("SPECIALS", SPECIALS_ID); 1360 /** 1361 */ 1362 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS 1363 = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID); 1364 /** 1365 */ 1366 public static final UnicodeBlock OLD_ITALIC 1367 = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID); 1368 /** 1369 */ 1370 public static final UnicodeBlock GOTHIC 1371 = new UnicodeBlock("GOTHIC", GOTHIC_ID); 1372 /** 1373 */ 1374 public static final UnicodeBlock DESERET 1375 = new UnicodeBlock("DESERET", DESERET_ID); 1376 /** 1377 */ 1378 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS 1379 = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID); 1380 /** 1381 */ 1382 public static final UnicodeBlock MUSICAL_SYMBOLS 1383 = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID); 1384 /** 1385 */ 1386 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS 1387 = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1388 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID); 1389 /** 1390 */ 1391 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B 1392 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1393 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID); 1394 /** 1395 */ 1396 public static final UnicodeBlock 1397 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT 1398 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1399 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID); 1400 /** 1401 */ 1402 public static final UnicodeBlock TAGS 1403 = new UnicodeBlock("TAGS", TAGS_ID); 1404 1405 // New blocks in Unicode 3.2 1406 1407 /** 1408 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1409 */ 1410 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY 1411 = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID); 1412 /** 1413 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1414 */ 1415 public static final UnicodeBlock CYRILLIC_SUPPLEMENT 1416 = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID); 1417 /** 1418 */ 1419 public static final UnicodeBlock TAGALOG 1420 = new UnicodeBlock("TAGALOG", TAGALOG_ID); 1421 /** 1422 */ 1423 public static final UnicodeBlock HANUNOO 1424 = new UnicodeBlock("HANUNOO", HANUNOO_ID); 1425 /** 1426 */ 1427 public static final UnicodeBlock BUHID 1428 = new UnicodeBlock("BUHID", BUHID_ID); 1429 /** 1430 */ 1431 public static final UnicodeBlock TAGBANWA 1432 = new UnicodeBlock("TAGBANWA", TAGBANWA_ID); 1433 /** 1434 */ 1435 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A 1436 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1437 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID); 1438 /** 1439 */ 1440 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A 1441 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID); 1442 /** 1443 */ 1444 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B 1445 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID); 1446 /** 1447 */ 1448 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B 1449 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1450 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID); 1451 /** 1452 */ 1453 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS 1454 = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1455 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID); 1456 /** 1457 */ 1458 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS 1459 = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID); 1460 /** 1461 */ 1462 public static final UnicodeBlock VARIATION_SELECTORS 1463 = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID); 1464 /** 1465 */ 1466 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A 1467 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1468 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID); 1469 /** 1470 */ 1471 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B 1472 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1473 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID); 1474 1475 /** 1476 */ 1477 public static final UnicodeBlock LIMBU 1478 = new UnicodeBlock("LIMBU", LIMBU_ID); 1479 /** 1480 */ 1481 public static final UnicodeBlock TAI_LE 1482 = new UnicodeBlock("TAI_LE", TAI_LE_ID); 1483 /** 1484 */ 1485 public static final UnicodeBlock KHMER_SYMBOLS 1486 = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID); 1487 1488 /** 1489 */ 1490 public static final UnicodeBlock PHONETIC_EXTENSIONS 1491 = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID); 1492 1493 /** 1494 */ 1495 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS 1496 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1497 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID); 1498 /** 1499 */ 1500 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS 1501 = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID); 1502 /** 1503 */ 1504 public static final UnicodeBlock LINEAR_B_SYLLABARY 1505 = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID); 1506 /** 1507 */ 1508 public static final UnicodeBlock LINEAR_B_IDEOGRAMS 1509 = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID); 1510 /** 1511 */ 1512 public static final UnicodeBlock AEGEAN_NUMBERS 1513 = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID); 1514 /** 1515 */ 1516 public static final UnicodeBlock UGARITIC 1517 = new UnicodeBlock("UGARITIC", UGARITIC_ID); 1518 /** 1519 */ 1520 public static final UnicodeBlock SHAVIAN 1521 = new UnicodeBlock("SHAVIAN", SHAVIAN_ID); 1522 /** 1523 */ 1524 public static final UnicodeBlock OSMANYA 1525 = new UnicodeBlock("OSMANYA", OSMANYA_ID); 1526 /** 1527 */ 1528 public static final UnicodeBlock CYPRIOT_SYLLABARY 1529 = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID); 1530 /** 1531 */ 1532 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS 1533 = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID); 1534 1535 /** 1536 */ 1537 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT 1538 = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID); 1539 1540 /* New blocks in Unicode 4.1 */ 1541 1542 /** 1543 */ 1544 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 1545 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 1546 ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/ 1547 1548 /** 1549 */ 1550 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 1551 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/ 1552 1553 /** 1554 */ 1555 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1556 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/ 1557 1558 /** 1559 */ 1560 public static final UnicodeBlock BUGINESE = 1561 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/ 1562 1563 /** 1564 */ 1565 public static final UnicodeBlock CJK_STROKES = 1566 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/ 1567 1568 /** 1569 */ 1570 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1571 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1572 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/ 1573 1574 /** 1575 */ 1576 public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/ 1577 1578 /** 1579 */ 1580 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1581 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/ 1582 1583 /** 1584 */ 1585 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1586 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/ 1587 1588 /** 1589 */ 1590 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1591 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/ 1592 1593 /** 1594 */ 1595 public static final UnicodeBlock GLAGOLITIC = 1596 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/ 1597 1598 /** 1599 */ 1600 public static final UnicodeBlock KHAROSHTHI = 1601 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/ 1602 1603 /** 1604 */ 1605 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 1606 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/ 1607 1608 /** 1609 */ 1610 public static final UnicodeBlock NEW_TAI_LUE = 1611 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/ 1612 1613 /** 1614 */ 1615 public static final UnicodeBlock OLD_PERSIAN = 1616 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/ 1617 1618 /** 1619 */ 1620 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1621 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1622 PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/ 1623 1624 /** 1625 */ 1626 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 1627 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/ 1628 1629 /** 1630 */ 1631 public static final UnicodeBlock SYLOTI_NAGRI = 1632 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/ 1633 1634 /** 1635 */ 1636 public static final UnicodeBlock TIFINAGH = 1637 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/ 1638 1639 /** 1640 */ 1641 public static final UnicodeBlock VERTICAL_FORMS = 1642 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/ 1643 1644 /** 1645 */ 1646 public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/ 1647 /** 1648 */ 1649 public static final UnicodeBlock BALINESE = 1650 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/ 1651 /** 1652 */ 1653 public static final UnicodeBlock LATIN_EXTENDED_C = 1654 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/ 1655 /** 1656 */ 1657 public static final UnicodeBlock LATIN_EXTENDED_D = 1658 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/ 1659 /** 1660 */ 1661 public static final UnicodeBlock PHAGS_PA = 1662 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/ 1663 /** 1664 */ 1665 public static final UnicodeBlock PHOENICIAN = 1666 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/ 1667 /** 1668 */ 1669 public static final UnicodeBlock CUNEIFORM = 1670 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/ 1671 /** 1672 */ 1673 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 1674 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 1675 CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/ 1676 /** 1677 */ 1678 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 1679 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/ 1680 1681 /** 1682 */ 1683 public static final UnicodeBlock SUNDANESE = 1684 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */ 1685 1686 /** 1687 */ 1688 public static final UnicodeBlock LEPCHA = 1689 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */ 1690 1691 /** 1692 */ 1693 public static final UnicodeBlock OL_CHIKI = 1694 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */ 1695 1696 /** 1697 */ 1698 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 1699 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */ 1700 1701 /** 1702 */ 1703 public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */ 1704 1705 /** 1706 */ 1707 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 1708 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */ 1709 1710 /** 1711 */ 1712 public static final UnicodeBlock SAURASHTRA = 1713 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */ 1714 1715 /** 1716 */ 1717 public static final UnicodeBlock KAYAH_LI = 1718 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */ 1719 1720 /** 1721 */ 1722 public static final UnicodeBlock REJANG = 1723 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */ 1724 1725 /** 1726 */ 1727 public static final UnicodeBlock CHAM = 1728 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */ 1729 1730 /** 1731 */ 1732 public static final UnicodeBlock ANCIENT_SYMBOLS = 1733 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */ 1734 1735 /** 1736 */ 1737 public static final UnicodeBlock PHAISTOS_DISC = 1738 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */ 1739 1740 /** 1741 */ 1742 public static final UnicodeBlock LYCIAN = 1743 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */ 1744 1745 /** 1746 */ 1747 public static final UnicodeBlock CARIAN = 1748 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */ 1749 1750 /** 1751 */ 1752 public static final UnicodeBlock LYDIAN = 1753 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */ 1754 1755 /** 1756 */ 1757 public static final UnicodeBlock MAHJONG_TILES = 1758 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */ 1759 1760 /** 1761 */ 1762 public static final UnicodeBlock DOMINO_TILES = 1763 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */ 1764 1765 /* New blocks in Unicode 5.2 */ 1766 1767 /***/ 1768 public static final UnicodeBlock SAMARITAN = 1769 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/ 1770 /***/ 1771 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1772 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1773 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/ 1774 /***/ 1775 public static final UnicodeBlock TAI_THAM = 1776 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/ 1777 /***/ 1778 public static final UnicodeBlock VEDIC_EXTENSIONS = 1779 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/ 1780 /***/ 1781 public static final UnicodeBlock LISU = 1782 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/ 1783 /***/ 1784 public static final UnicodeBlock BAMUM = 1785 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/ 1786 /***/ 1787 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 1788 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/ 1789 /***/ 1790 public static final UnicodeBlock DEVANAGARI_EXTENDED = 1791 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/ 1792 /***/ 1793 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 1794 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/ 1795 /***/ 1796 public static final UnicodeBlock JAVANESE = 1797 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/ 1798 /***/ 1799 public static final UnicodeBlock MYANMAR_EXTENDED_A = 1800 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/ 1801 /***/ 1802 public static final UnicodeBlock TAI_VIET = 1803 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/ 1804 /***/ 1805 public static final UnicodeBlock MEETEI_MAYEK = 1806 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/ 1807 /***/ 1808 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 1809 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/ 1810 /***/ 1811 public static final UnicodeBlock IMPERIAL_ARAMAIC = 1812 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/ 1813 /***/ 1814 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 1815 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/ 1816 /***/ 1817 public static final UnicodeBlock AVESTAN = 1818 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/ 1819 /***/ 1820 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 1821 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/ 1822 /***/ 1823 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 1824 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/ 1825 /***/ 1826 public static final UnicodeBlock OLD_TURKIC = 1827 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/ 1828 /***/ 1829 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 1830 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/ 1831 /***/ 1832 public static final UnicodeBlock KAITHI = 1833 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/ 1834 /***/ 1835 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 1836 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/ 1837 /***/ 1838 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 1839 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 1840 ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/ 1841 /***/ 1842 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 1843 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 1844 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/ 1845 /***/ 1846 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 1847 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 1848 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/ 1849 1850 /* New blocks in Unicode 6.0 */ 1851 1852 /***/ 1853 public static final UnicodeBlock MANDAIC = 1854 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/ 1855 /***/ 1856 public static final UnicodeBlock BATAK = 1857 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/ 1858 /***/ 1859 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 1860 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/ 1861 /***/ 1862 public static final UnicodeBlock BRAHMI = 1863 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/ 1864 /***/ 1865 public static final UnicodeBlock BAMUM_SUPPLEMENT = 1866 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/ 1867 /***/ 1868 public static final UnicodeBlock KANA_SUPPLEMENT = 1869 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/ 1870 /***/ 1871 public static final UnicodeBlock PLAYING_CARDS = 1872 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/ 1873 /***/ 1874 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 1875 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 1876 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/ 1877 /***/ 1878 public static final UnicodeBlock EMOTICONS = 1879 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/ 1880 /***/ 1881 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 1882 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/ 1883 /***/ 1884 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 1885 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/ 1886 /***/ 1887 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 1888 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 1889 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/ 1890 1891 /* New blocks in Unicode 6.1 */ 1892 1893 /***/ 1894 public static final UnicodeBlock ARABIC_EXTENDED_A = 1895 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/ 1896 /***/ 1897 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 1898 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/ 1899 /***/ 1900 public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/ 1901 /***/ 1902 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 1903 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/ 1904 /***/ 1905 public static final UnicodeBlock MEROITIC_CURSIVE = 1906 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/ 1907 /***/ 1908 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 1909 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/ 1910 /***/ 1911 public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/ 1912 /***/ 1913 public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/ 1914 /***/ 1915 public static final UnicodeBlock SORA_SOMPENG = 1916 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/ 1917 /***/ 1918 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 1919 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/ 1920 /***/ 1921 public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/ 1922 1923 /* New blocks in Unicode 7.0 */ 1924 1925 /***/ 1926 public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/ 1927 /***/ 1928 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 1929 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/ 1930 /***/ 1931 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 1932 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/ 1933 /***/ 1934 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 1935 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/ 1936 /***/ 1937 public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/ 1938 /***/ 1939 public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/ 1940 /***/ 1941 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 1942 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/ 1943 /***/ 1944 public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/ 1945 /***/ 1946 public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/ 1947 /***/ 1948 public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/ 1949 /***/ 1950 public static final UnicodeBlock LATIN_EXTENDED_E = 1951 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/ 1952 /***/ 1953 public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/ 1954 /***/ 1955 public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/ 1956 /***/ 1957 public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/ 1958 /***/ 1959 public static final UnicodeBlock MENDE_KIKAKUI = 1960 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/ 1961 /***/ 1962 public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/ 1963 /***/ 1964 public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/ 1965 /***/ 1966 public static final UnicodeBlock MYANMAR_EXTENDED_B = 1967 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/ 1968 /***/ 1969 public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/ 1970 /***/ 1971 public static final UnicodeBlock OLD_NORTH_ARABIAN = 1972 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/ 1973 /***/ 1974 public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/ 1975 /***/ 1976 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 1977 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/ 1978 /***/ 1979 public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/ 1980 /***/ 1981 public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/ 1982 /***/ 1983 public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/ 1984 /***/ 1985 public static final UnicodeBlock PSALTER_PAHLAVI = 1986 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/ 1987 /***/ 1988 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 1989 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/ 1990 /***/ 1991 public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/ 1992 /***/ 1993 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 1994 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/ 1995 /***/ 1996 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 1997 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/ 1998 /***/ 1999 public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/ 2000 /***/ 2001 public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/ 2002 2003 /* New blocks in Unicode 8.0 */ 2004 2005 /***/ 2006 public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/ 2007 /***/ 2008 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2009 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/ 2010 /***/ 2011 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2012 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/ 2013 /***/ 2014 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 2015 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 2016 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/ 2017 /***/ 2018 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2019 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/ 2020 /***/ 2021 public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/ 2022 /***/ 2023 public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/ 2024 /***/ 2025 public static final UnicodeBlock OLD_HUNGARIAN = 2026 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/ 2027 /***/ 2028 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 2029 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 2030 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/ 2031 /***/ 2032 public static final UnicodeBlock SUTTON_SIGNWRITING = 2033 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/ 2034 2035 /* New blocks in Unicode 9.0 */ 2036 2037 /***/ 2038 public static final UnicodeBlock ADLAM = new UnicodeBlock("ADLAM", ADLAM_ID); /*[1E900]*/ 2039 /***/ 2040 public static final UnicodeBlock BHAIKSUKI = new UnicodeBlock("BHAIKSUKI", BHAIKSUKI_ID); /*[11C00]*/ 2041 /***/ 2042 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 2043 new UnicodeBlock("CYRILLIC_EXTENDED_C", CYRILLIC_EXTENDED_C_ID); /*[1C80]*/ 2044 /***/ 2045 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 2046 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", GLAGOLITIC_SUPPLEMENT_ID); /*[1E000]*/ 2047 /***/ 2048 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 2049 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID); /*[16FE0]*/ 2050 /***/ 2051 public static final UnicodeBlock MARCHEN = new UnicodeBlock("MARCHEN", MARCHEN_ID); /*[11C70]*/ 2052 /***/ 2053 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 2054 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", MONGOLIAN_SUPPLEMENT_ID); /*[11660]*/ 2055 /***/ 2056 public static final UnicodeBlock NEWA = new UnicodeBlock("NEWA", NEWA_ID); /*[11400]*/ 2057 /***/ 2058 public static final UnicodeBlock OSAGE = new UnicodeBlock("OSAGE", OSAGE_ID); /*[104B0]*/ 2059 /***/ 2060 public static final UnicodeBlock TANGUT = new UnicodeBlock("TANGUT", TANGUT_ID); /*[17000]*/ 2061 /***/ 2062 public static final UnicodeBlock TANGUT_COMPONENTS = 2063 new UnicodeBlock("TANGUT_COMPONENTS", TANGUT_COMPONENTS_ID); /*[18800]*/ 2064 2065 /** 2066 */ 2067 public static final UnicodeBlock INVALID_CODE 2068 = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID); 2069 2070 static { 2071 for (int blockId = 0; blockId < COUNT; ++blockId) { 2072 if (BLOCKS_[blockId] == null) { 2073 throw new java.lang.IllegalStateException( 2074 "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized"); 2075 } 2076 } 2077 } 2078 2079 // public methods -------------------------------------------------- 2080 2081 /** 2082 * <strong>[icu]</strong> Returns the only instance of the UnicodeBlock with the argument ID. 2083 * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned. 2084 * @param id UnicodeBlock ID 2085 * @return the only instance of the UnicodeBlock with the argument ID 2086 * if it exists, otherwise a INVALID_CODE UnicodeBlock will be 2087 * returned. 2088 */ getInstance(int id)2089 public static UnicodeBlock getInstance(int id) 2090 { 2091 if (id >= 0 && id < BLOCKS_.length) { 2092 return BLOCKS_[id]; 2093 } 2094 return INVALID_CODE; 2095 } 2096 2097 /** 2098 * Returns the Unicode allocation block that contains the code point, 2099 * or null if the code point is not a member of a defined block. 2100 * @param ch code point to be tested 2101 * @return the Unicode allocation block that contains the code point 2102 */ of(int ch)2103 public static UnicodeBlock of(int ch) 2104 { 2105 if (ch > MAX_VALUE) { 2106 return INVALID_CODE; 2107 } 2108 2109 return UnicodeBlock.getInstance( 2110 UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK)); 2111 } 2112 2113 /** 2114 * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method. 2115 * Returns the Unicode block with the given name. <strong>[icu] Note:</strong> Unlike 2116 * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches 2117 * against the official UCD name and the Java block name 2118 * (ignoring case). 2119 * @param blockName the name of the block to match 2120 * @return the UnicodeBlock with that name 2121 * @throws IllegalArgumentException if the blockName could not be matched 2122 */ forName(String blockName)2123 public static final UnicodeBlock forName(String blockName) { 2124 Map<String, UnicodeBlock> m = null; 2125 if (mref != null) { 2126 m = mref.get(); 2127 } 2128 if (m == null) { 2129 m = new HashMap<String, UnicodeBlock>(BLOCKS_.length); 2130 for (int i = 0; i < BLOCKS_.length; ++i) { 2131 UnicodeBlock b = BLOCKS_[i]; 2132 String name = trimBlockName( 2133 getPropertyValueName(UProperty.BLOCK, b.getID(), 2134 UProperty.NameChoice.LONG)); 2135 m.put(name, b); 2136 } 2137 mref = new SoftReference<Map<String, UnicodeBlock>>(m); 2138 } 2139 UnicodeBlock b = m.get(trimBlockName(blockName)); 2140 if (b == null) { 2141 throw new IllegalArgumentException(); 2142 } 2143 return b; 2144 } 2145 private static SoftReference<Map<String, UnicodeBlock>> mref; 2146 trimBlockName(String name)2147 private static String trimBlockName(String name) { 2148 String upper = name.toUpperCase(Locale.ENGLISH); 2149 StringBuilder result = new StringBuilder(upper.length()); 2150 for (int i = 0; i < upper.length(); i++) { 2151 char c = upper.charAt(i); 2152 if (c != ' ' && c != '_' && c != '-') { 2153 result.append(c); 2154 } 2155 } 2156 return result.toString(); 2157 } 2158 2159 /** 2160 * {icu} Returns the type ID of this Unicode block 2161 * @return integer type ID of this Unicode block 2162 */ getID()2163 public int getID() 2164 { 2165 return m_id_; 2166 } 2167 2168 // private data members --------------------------------------------- 2169 2170 /** 2171 * Identification code for this UnicodeBlock 2172 */ 2173 private int m_id_; 2174 2175 // private constructor ---------------------------------------------- 2176 2177 /** 2178 * UnicodeBlock constructor 2179 * @param name name of this UnicodeBlock 2180 * @param id unique id of this UnicodeBlock 2181 * @exception NullPointerException if name is <code>null</code> 2182 */ UnicodeBlock(String name, int id)2183 private UnicodeBlock(String name, int id) 2184 { 2185 super(name); 2186 m_id_ = id; 2187 if (id >= 0) { 2188 BLOCKS_[id] = this; 2189 } 2190 } 2191 } 2192 2193 /** 2194 * East Asian Width constants. 2195 * @see UProperty#EAST_ASIAN_WIDTH 2196 * @see UCharacter#getIntPropertyValue 2197 */ 2198 public static interface EastAsianWidth 2199 { 2200 /** 2201 */ 2202 public static final int NEUTRAL = 0; 2203 /** 2204 */ 2205 public static final int AMBIGUOUS = 1; 2206 /** 2207 */ 2208 public static final int HALFWIDTH = 2; 2209 /** 2210 */ 2211 public static final int FULLWIDTH = 3; 2212 /** 2213 */ 2214 public static final int NARROW = 4; 2215 /** 2216 */ 2217 public static final int WIDE = 5; 2218 /** 2219 * One more than the highest normal EastAsianWidth value. 2220 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH). 2221 * 2222 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2223 * @hide unsupported on Android 2224 */ 2225 @Deprecated 2226 public static final int COUNT = 6; 2227 } 2228 2229 /** 2230 * Decomposition Type constants. 2231 * @see UProperty#DECOMPOSITION_TYPE 2232 */ 2233 public static interface DecompositionType 2234 { 2235 /** 2236 */ 2237 public static final int NONE = 0; 2238 /** 2239 */ 2240 public static final int CANONICAL = 1; 2241 /** 2242 */ 2243 public static final int COMPAT = 2; 2244 /** 2245 */ 2246 public static final int CIRCLE = 3; 2247 /** 2248 */ 2249 public static final int FINAL = 4; 2250 /** 2251 */ 2252 public static final int FONT = 5; 2253 /** 2254 */ 2255 public static final int FRACTION = 6; 2256 /** 2257 */ 2258 public static final int INITIAL = 7; 2259 /** 2260 */ 2261 public static final int ISOLATED = 8; 2262 /** 2263 */ 2264 public static final int MEDIAL = 9; 2265 /** 2266 */ 2267 public static final int NARROW = 10; 2268 /** 2269 */ 2270 public static final int NOBREAK = 11; 2271 /** 2272 */ 2273 public static final int SMALL = 12; 2274 /** 2275 */ 2276 public static final int SQUARE = 13; 2277 /** 2278 */ 2279 public static final int SUB = 14; 2280 /** 2281 */ 2282 public static final int SUPER = 15; 2283 /** 2284 */ 2285 public static final int VERTICAL = 16; 2286 /** 2287 */ 2288 public static final int WIDE = 17; 2289 /** 2290 * One more than the highest normal DecompositionType value. 2291 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE). 2292 * 2293 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2294 * @hide unsupported on Android 2295 */ 2296 @Deprecated 2297 public static final int COUNT = 18; 2298 } 2299 2300 /** 2301 * Joining Type constants. 2302 * @see UProperty#JOINING_TYPE 2303 */ 2304 public static interface JoiningType 2305 { 2306 /** 2307 */ 2308 public static final int NON_JOINING = 0; 2309 /** 2310 */ 2311 public static final int JOIN_CAUSING = 1; 2312 /** 2313 */ 2314 public static final int DUAL_JOINING = 2; 2315 /** 2316 */ 2317 public static final int LEFT_JOINING = 3; 2318 /** 2319 */ 2320 public static final int RIGHT_JOINING = 4; 2321 /** 2322 */ 2323 public static final int TRANSPARENT = 5; 2324 /** 2325 * One more than the highest normal JoiningType value. 2326 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE). 2327 * 2328 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2329 * @hide unsupported on Android 2330 */ 2331 @Deprecated 2332 public static final int COUNT = 6; 2333 } 2334 2335 /** 2336 * Joining Group constants. 2337 * @see UProperty#JOINING_GROUP 2338 */ 2339 public static interface JoiningGroup 2340 { 2341 /** 2342 */ 2343 public static final int NO_JOINING_GROUP = 0; 2344 /** 2345 */ 2346 public static final int AIN = 1; 2347 /** 2348 */ 2349 public static final int ALAPH = 2; 2350 /** 2351 */ 2352 public static final int ALEF = 3; 2353 /** 2354 */ 2355 public static final int BEH = 4; 2356 /** 2357 */ 2358 public static final int BETH = 5; 2359 /** 2360 */ 2361 public static final int DAL = 6; 2362 /** 2363 */ 2364 public static final int DALATH_RISH = 7; 2365 /** 2366 */ 2367 public static final int E = 8; 2368 /** 2369 */ 2370 public static final int FEH = 9; 2371 /** 2372 */ 2373 public static final int FINAL_SEMKATH = 10; 2374 /** 2375 */ 2376 public static final int GAF = 11; 2377 /** 2378 */ 2379 public static final int GAMAL = 12; 2380 /** 2381 */ 2382 public static final int HAH = 13; 2383 /***/ 2384 public static final int TEH_MARBUTA_GOAL = 14; 2385 /** 2386 */ 2387 public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL; 2388 /** 2389 */ 2390 public static final int HE = 15; 2391 /** 2392 */ 2393 public static final int HEH = 16; 2394 /** 2395 */ 2396 public static final int HEH_GOAL = 17; 2397 /** 2398 */ 2399 public static final int HETH = 18; 2400 /** 2401 */ 2402 public static final int KAF = 19; 2403 /** 2404 */ 2405 public static final int KAPH = 20; 2406 /** 2407 */ 2408 public static final int KNOTTED_HEH = 21; 2409 /** 2410 */ 2411 public static final int LAM = 22; 2412 /** 2413 */ 2414 public static final int LAMADH = 23; 2415 /** 2416 */ 2417 public static final int MEEM = 24; 2418 /** 2419 */ 2420 public static final int MIM = 25; 2421 /** 2422 */ 2423 public static final int NOON = 26; 2424 /** 2425 */ 2426 public static final int NUN = 27; 2427 /** 2428 */ 2429 public static final int PE = 28; 2430 /** 2431 */ 2432 public static final int QAF = 29; 2433 /** 2434 */ 2435 public static final int QAPH = 30; 2436 /** 2437 */ 2438 public static final int REH = 31; 2439 /** 2440 */ 2441 public static final int REVERSED_PE = 32; 2442 /** 2443 */ 2444 public static final int SAD = 33; 2445 /** 2446 */ 2447 public static final int SADHE = 34; 2448 /** 2449 */ 2450 public static final int SEEN = 35; 2451 /** 2452 */ 2453 public static final int SEMKATH = 36; 2454 /** 2455 */ 2456 public static final int SHIN = 37; 2457 /** 2458 */ 2459 public static final int SWASH_KAF = 38; 2460 /** 2461 */ 2462 public static final int SYRIAC_WAW = 39; 2463 /** 2464 */ 2465 public static final int TAH = 40; 2466 /** 2467 */ 2468 public static final int TAW = 41; 2469 /** 2470 */ 2471 public static final int TEH_MARBUTA = 42; 2472 /** 2473 */ 2474 public static final int TETH = 43; 2475 /** 2476 */ 2477 public static final int WAW = 44; 2478 /** 2479 */ 2480 public static final int YEH = 45; 2481 /** 2482 */ 2483 public static final int YEH_BARREE = 46; 2484 /** 2485 */ 2486 public static final int YEH_WITH_TAIL = 47; 2487 /** 2488 */ 2489 public static final int YUDH = 48; 2490 /** 2491 */ 2492 public static final int YUDH_HE = 49; 2493 /** 2494 */ 2495 public static final int ZAIN = 50; 2496 /** 2497 */ 2498 public static final int FE = 51; 2499 /** 2500 */ 2501 public static final int KHAPH = 52; 2502 /** 2503 */ 2504 public static final int ZHAIN = 53; 2505 /** 2506 */ 2507 public static final int BURUSHASKI_YEH_BARREE = 54; 2508 /***/ 2509 public static final int FARSI_YEH = 55; 2510 /***/ 2511 public static final int NYA = 56; 2512 /***/ 2513 public static final int ROHINGYA_YEH = 57; 2514 2515 /***/ 2516 public static final int MANICHAEAN_ALEPH = 58; 2517 /***/ 2518 public static final int MANICHAEAN_AYIN = 59; 2519 /***/ 2520 public static final int MANICHAEAN_BETH = 60; 2521 /***/ 2522 public static final int MANICHAEAN_DALETH = 61; 2523 /***/ 2524 public static final int MANICHAEAN_DHAMEDH = 62; 2525 /***/ 2526 public static final int MANICHAEAN_FIVE = 63; 2527 /***/ 2528 public static final int MANICHAEAN_GIMEL = 64; 2529 /***/ 2530 public static final int MANICHAEAN_HETH = 65; 2531 /***/ 2532 public static final int MANICHAEAN_HUNDRED = 66; 2533 /***/ 2534 public static final int MANICHAEAN_KAPH = 67; 2535 /***/ 2536 public static final int MANICHAEAN_LAMEDH = 68; 2537 /***/ 2538 public static final int MANICHAEAN_MEM = 69; 2539 /***/ 2540 public static final int MANICHAEAN_NUN = 70; 2541 /***/ 2542 public static final int MANICHAEAN_ONE = 71; 2543 /***/ 2544 public static final int MANICHAEAN_PE = 72; 2545 /***/ 2546 public static final int MANICHAEAN_QOPH = 73; 2547 /***/ 2548 public static final int MANICHAEAN_RESH = 74; 2549 /***/ 2550 public static final int MANICHAEAN_SADHE = 75; 2551 /***/ 2552 public static final int MANICHAEAN_SAMEKH = 76; 2553 /***/ 2554 public static final int MANICHAEAN_TAW = 77; 2555 /***/ 2556 public static final int MANICHAEAN_TEN = 78; 2557 /***/ 2558 public static final int MANICHAEAN_TETH = 79; 2559 /***/ 2560 public static final int MANICHAEAN_THAMEDH = 80; 2561 /***/ 2562 public static final int MANICHAEAN_TWENTY = 81; 2563 /***/ 2564 public static final int MANICHAEAN_WAW = 82; 2565 /***/ 2566 public static final int MANICHAEAN_YODH = 83; 2567 /***/ 2568 public static final int MANICHAEAN_ZAYIN = 84; 2569 /***/ 2570 public static final int STRAIGHT_WAW = 85; 2571 2572 /***/ 2573 public static final int AFRICAN_FEH = 86; 2574 /***/ 2575 public static final int AFRICAN_NOON = 87; 2576 /***/ 2577 public static final int AFRICAN_QAF = 88; 2578 2579 /** 2580 * One more than the highest normal JoiningGroup value. 2581 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JoiningGroup). 2582 * 2583 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2584 * @hide unsupported on Android 2585 */ 2586 @Deprecated 2587 public static final int COUNT = 89; 2588 } 2589 2590 /** 2591 * Grapheme Cluster Break constants. 2592 * @see UProperty#GRAPHEME_CLUSTER_BREAK 2593 */ 2594 public static interface GraphemeClusterBreak { 2595 /** 2596 */ 2597 public static final int OTHER = 0; 2598 /** 2599 */ 2600 public static final int CONTROL = 1; 2601 /** 2602 */ 2603 public static final int CR = 2; 2604 /** 2605 */ 2606 public static final int EXTEND = 3; 2607 /** 2608 */ 2609 public static final int L = 4; 2610 /** 2611 */ 2612 public static final int LF = 5; 2613 /** 2614 */ 2615 public static final int LV = 6; 2616 /** 2617 */ 2618 public static final int LVT = 7; 2619 /** 2620 */ 2621 public static final int T = 8; 2622 /** 2623 */ 2624 public static final int V = 9; 2625 /** 2626 */ 2627 public static final int SPACING_MARK = 10; 2628 /** 2629 */ 2630 public static final int PREPEND = 11; 2631 /***/ 2632 public static final int REGIONAL_INDICATOR = 12; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 2633 /***/ 2634 public static final int E_BASE = 13; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 2635 /***/ 2636 public static final int E_BASE_GAZ = 14; /*[EBG]*/ 2637 /***/ 2638 public static final int E_MODIFIER = 15; /*[EM]*/ 2639 /***/ 2640 public static final int GLUE_AFTER_ZWJ = 16; /*[GAZ]*/ 2641 /***/ 2642 public static final int ZWJ = 17; /*[ZWJ]*/ 2643 /** 2644 * One more than the highest normal GraphemeClusterBreak value. 2645 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK). 2646 * 2647 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2648 * @hide unsupported on Android 2649 */ 2650 @Deprecated 2651 public static final int COUNT = 18; 2652 } 2653 2654 /** 2655 * Word Break constants. 2656 * @see UProperty#WORD_BREAK 2657 */ 2658 public static interface WordBreak { 2659 /** 2660 */ 2661 public static final int OTHER = 0; 2662 /** 2663 */ 2664 public static final int ALETTER = 1; 2665 /** 2666 */ 2667 public static final int FORMAT = 2; 2668 /** 2669 */ 2670 public static final int KATAKANA = 3; 2671 /** 2672 */ 2673 public static final int MIDLETTER = 4; 2674 /** 2675 */ 2676 public static final int MIDNUM = 5; 2677 /** 2678 */ 2679 public static final int NUMERIC = 6; 2680 /** 2681 */ 2682 public static final int EXTENDNUMLET = 7; 2683 /** 2684 */ 2685 public static final int CR = 8; 2686 /** 2687 */ 2688 public static final int EXTEND = 9; 2689 /** 2690 */ 2691 public static final int LF = 10; 2692 /** 2693 */ 2694 public static final int MIDNUMLET = 11; 2695 /** 2696 */ 2697 public static final int NEWLINE = 12; 2698 /***/ 2699 public static final int REGIONAL_INDICATOR = 13; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 2700 /***/ 2701 public static final int HEBREW_LETTER = 14; /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ 2702 /***/ 2703 public static final int SINGLE_QUOTE = 15; /*[SQ]*/ 2704 /***/ 2705 public static final int DOUBLE_QUOTE = 16; /*[DQ]*/ 2706 /***/ 2707 public static final int E_BASE = 17; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 2708 /***/ 2709 public static final int E_BASE_GAZ = 18; /*[EBG]*/ 2710 /***/ 2711 public static final int E_MODIFIER = 19; /*[EM]*/ 2712 /***/ 2713 public static final int GLUE_AFTER_ZWJ = 20; /*[GAZ]*/ 2714 /***/ 2715 public static final int ZWJ = 21; /*[ZWJ]*/ 2716 /** 2717 * One more than the highest normal WordBreak value. 2718 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK). 2719 * 2720 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2721 * @hide unsupported on Android 2722 */ 2723 @Deprecated 2724 public static final int COUNT = 22; 2725 } 2726 2727 /** 2728 * Sentence Break constants. 2729 * @see UProperty#SENTENCE_BREAK 2730 */ 2731 public static interface SentenceBreak { 2732 /** 2733 */ 2734 public static final int OTHER = 0; 2735 /** 2736 */ 2737 public static final int ATERM = 1; 2738 /** 2739 */ 2740 public static final int CLOSE = 2; 2741 /** 2742 */ 2743 public static final int FORMAT = 3; 2744 /** 2745 */ 2746 public static final int LOWER = 4; 2747 /** 2748 */ 2749 public static final int NUMERIC = 5; 2750 /** 2751 */ 2752 public static final int OLETTER = 6; 2753 /** 2754 */ 2755 public static final int SEP = 7; 2756 /** 2757 */ 2758 public static final int SP = 8; 2759 /** 2760 */ 2761 public static final int STERM = 9; 2762 /** 2763 */ 2764 public static final int UPPER = 10; 2765 /** 2766 */ 2767 public static final int CR = 11; 2768 /** 2769 */ 2770 public static final int EXTEND = 12; 2771 /** 2772 */ 2773 public static final int LF = 13; 2774 /** 2775 */ 2776 public static final int SCONTINUE = 14; 2777 /** 2778 * One more than the highest normal SentenceBreak value. 2779 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK). 2780 * 2781 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2782 * @hide unsupported on Android 2783 */ 2784 @Deprecated 2785 public static final int COUNT = 15; 2786 } 2787 2788 /** 2789 * Line Break constants. 2790 * @see UProperty#LINE_BREAK 2791 */ 2792 public static interface LineBreak 2793 { 2794 /** 2795 */ 2796 public static final int UNKNOWN = 0; 2797 /** 2798 */ 2799 public static final int AMBIGUOUS = 1; 2800 /** 2801 */ 2802 public static final int ALPHABETIC = 2; 2803 /** 2804 */ 2805 public static final int BREAK_BOTH = 3; 2806 /** 2807 */ 2808 public static final int BREAK_AFTER = 4; 2809 /** 2810 */ 2811 public static final int BREAK_BEFORE = 5; 2812 /** 2813 */ 2814 public static final int MANDATORY_BREAK = 6; 2815 /** 2816 */ 2817 public static final int CONTINGENT_BREAK = 7; 2818 /** 2819 */ 2820 public static final int CLOSE_PUNCTUATION = 8; 2821 /** 2822 */ 2823 public static final int COMBINING_MARK = 9; 2824 /** 2825 */ 2826 public static final int CARRIAGE_RETURN = 10; 2827 /** 2828 */ 2829 public static final int EXCLAMATION = 11; 2830 /** 2831 */ 2832 public static final int GLUE = 12; 2833 /** 2834 */ 2835 public static final int HYPHEN = 13; 2836 /** 2837 */ 2838 public static final int IDEOGRAPHIC = 14; 2839 /** 2840 * @see #INSEPARABLE 2841 */ 2842 public static final int INSEPERABLE = 15; 2843 /** 2844 * Renamed from the misspelled "inseperable" in Unicode 4.0.1. 2845 */ 2846 public static final int INSEPARABLE = 15; 2847 /** 2848 */ 2849 public static final int INFIX_NUMERIC = 16; 2850 /** 2851 */ 2852 public static final int LINE_FEED = 17; 2853 /** 2854 */ 2855 public static final int NONSTARTER = 18; 2856 /** 2857 */ 2858 public static final int NUMERIC = 19; 2859 /** 2860 */ 2861 public static final int OPEN_PUNCTUATION = 20; 2862 /** 2863 */ 2864 public static final int POSTFIX_NUMERIC = 21; 2865 /** 2866 */ 2867 public static final int PREFIX_NUMERIC = 22; 2868 /** 2869 */ 2870 public static final int QUOTATION = 23; 2871 /** 2872 */ 2873 public static final int COMPLEX_CONTEXT = 24; 2874 /** 2875 */ 2876 public static final int SURROGATE = 25; 2877 /** 2878 */ 2879 public static final int SPACE = 26; 2880 /** 2881 */ 2882 public static final int BREAK_SYMBOLS = 27; 2883 /** 2884 */ 2885 public static final int ZWSPACE = 28; 2886 /** 2887 */ 2888 public static final int NEXT_LINE = 29; /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ 2889 /** 2890 */ 2891 public static final int WORD_JOINER = 30; /*[WJ]*/ 2892 /** 2893 */ 2894 public static final int H2 = 31; /* from here on: new in Unicode 4.1/ICU 3.4 */ 2895 /** 2896 */ 2897 public static final int H3 = 32; 2898 /** 2899 */ 2900 public static final int JL = 33; 2901 /** 2902 */ 2903 public static final int JT = 34; 2904 /** 2905 */ 2906 public static final int JV = 35; 2907 /***/ 2908 public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ 2909 /***/ 2910 public static final int CONDITIONAL_JAPANESE_STARTER = 37; /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ 2911 /***/ 2912 public static final int HEBREW_LETTER = 38; /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ 2913 /***/ 2914 public static final int REGIONAL_INDICATOR = 39; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 2915 /***/ 2916 public static final int E_BASE = 40; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 2917 /***/ 2918 public static final int E_MODIFIER = 41; /*[EM]*/ 2919 /***/ 2920 public static final int ZWJ = 42; /*[ZWJ]*/ 2921 /** 2922 * One more than the highest normal LineBreak value. 2923 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK). 2924 * 2925 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2926 * @hide unsupported on Android 2927 */ 2928 @Deprecated 2929 public static final int COUNT = 43; 2930 } 2931 2932 /** 2933 * Numeric Type constants. 2934 * @see UProperty#NUMERIC_TYPE 2935 */ 2936 public static interface NumericType 2937 { 2938 /** 2939 */ 2940 public static final int NONE = 0; 2941 /** 2942 */ 2943 public static final int DECIMAL = 1; 2944 /** 2945 */ 2946 public static final int DIGIT = 2; 2947 /** 2948 */ 2949 public static final int NUMERIC = 3; 2950 /** 2951 * One more than the highest normal NumericType value. 2952 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE). 2953 * 2954 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2955 * @hide unsupported on Android 2956 */ 2957 @Deprecated 2958 public static final int COUNT = 4; 2959 } 2960 2961 /** 2962 * Hangul Syllable Type constants. 2963 * 2964 * @see UProperty#HANGUL_SYLLABLE_TYPE 2965 */ 2966 public static interface HangulSyllableType 2967 { 2968 /** 2969 */ 2970 public static final int NOT_APPLICABLE = 0; /*[NA]*/ /*See note !!*/ 2971 /** 2972 */ 2973 public static final int LEADING_JAMO = 1; /*[L]*/ 2974 /** 2975 */ 2976 public static final int VOWEL_JAMO = 2; /*[V]*/ 2977 /** 2978 */ 2979 public static final int TRAILING_JAMO = 3; /*[T]*/ 2980 /** 2981 */ 2982 public static final int LV_SYLLABLE = 4; /*[LV]*/ 2983 /** 2984 */ 2985 public static final int LVT_SYLLABLE = 5; /*[LVT]*/ 2986 /** 2987 * One more than the highest normal HangulSyllableType value. 2988 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE). 2989 * 2990 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2991 * @hide unsupported on Android 2992 */ 2993 @Deprecated 2994 public static final int COUNT = 6; 2995 } 2996 2997 /** 2998 * Bidi Paired Bracket Type constants. 2999 * 3000 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 3001 */ 3002 public static interface BidiPairedBracketType { 3003 /** 3004 * Not a paired bracket. 3005 */ 3006 public static final int NONE = 0; 3007 /** 3008 * Open paired bracket. 3009 */ 3010 public static final int OPEN = 1; 3011 /** 3012 * Close paired bracket. 3013 */ 3014 public static final int CLOSE = 2; 3015 /** 3016 * One more than the highest normal BidiPairedBracketType value. 3017 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE). 3018 * 3019 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3020 * @hide unsupported on Android 3021 */ 3022 @Deprecated 3023 public static final int COUNT = 3; 3024 } 3025 3026 // public data members ----------------------------------------------- 3027 3028 /** 3029 * The lowest Unicode code point value, constant 0. 3030 * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}. 3031 */ 3032 public static final int MIN_VALUE = Character.MIN_CODE_POINT; 3033 3034 /** 3035 * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits). 3036 * Same as {@link Character#MAX_CODE_POINT}. 3037 * 3038 * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE} 3039 * which is still a char with the value U+FFFF. 3040 */ 3041 public static final int MAX_VALUE = Character.MAX_CODE_POINT; 3042 3043 /** 3044 * The minimum value for Supplementary code points, constant U+10000. 3045 * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 3046 */ 3047 public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT; 3048 3049 /** 3050 * Unicode value used when translating into Unicode encoding form and there 3051 * is no existing character. 3052 */ 3053 public static final int REPLACEMENT_CHAR = '\uFFFD'; 3054 3055 /** 3056 * Special value that is returned by getUnicodeNumericValue(int) when no 3057 * numeric value is defined for a code point. 3058 * @see #getUnicodeNumericValue 3059 */ 3060 public static final double NO_NUMERIC_VALUE = -123456789; 3061 3062 /** 3063 * Compatibility constant for Java Character's MIN_RADIX. 3064 */ 3065 public static final int MIN_RADIX = java.lang.Character.MIN_RADIX; 3066 3067 /** 3068 * Compatibility constant for Java Character's MAX_RADIX. 3069 */ 3070 public static final int MAX_RADIX = java.lang.Character.MAX_RADIX; 3071 3072 /** 3073 * Do not lowercase non-initial parts of words when titlecasing. 3074 * Option bit for titlecasing APIs that take an options bit set. 3075 * 3076 * By default, titlecasing will titlecase the first cased character 3077 * of a word and lowercase all other characters. 3078 * With this option, the other characters will not be modified. 3079 * 3080 * @see #toTitleCase 3081 */ 3082 public static final int TITLECASE_NO_LOWERCASE = 0x100; 3083 3084 /** 3085 * Do not adjust the titlecasing indexes from BreakIterator::next() indexes; 3086 * titlecase exactly the characters at breaks from the iterator. 3087 * Option bit for titlecasing APIs that take an options bit set. 3088 * 3089 * By default, titlecasing will take each break iterator index, 3090 * adjust it by looking for the next cased character, and titlecase that one. 3091 * Other characters are lowercased. 3092 * 3093 * This follows Unicode 4 & 5 section 3.13 Default Case Operations: 3094 * 3095 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 3096 * #29, "Text Boundaries." Between each pair of word boundaries, find the first 3097 * cased character F. If F exists, map F to default_title(F); then map each 3098 * subsequent character C to default_lower(C). 3099 * 3100 * @see #toTitleCase 3101 * @see #TITLECASE_NO_LOWERCASE 3102 */ 3103 public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200; 3104 3105 // public methods ---------------------------------------------------- 3106 3107 /** 3108 * Returnss the numeric value of a decimal digit code point. 3109 * <br>This method observes the semantics of 3110 * <code>java.lang.Character.digit()</code>. Note that this 3111 * will return positive values for code points for which isDigit 3112 * returns false, just like java.lang.Character. 3113 * <br><em>Semantic Change:</em> In release 1.3.1 and 3114 * prior, this did not treat the European letters as having a 3115 * digit value, and also treated numeric letters and other numbers as 3116 * digits. 3117 * This has been changed to conform to the java semantics. 3118 * <br>A code point is a valid digit if and only if: 3119 * <ul> 3120 * <li>ch is a decimal digit or one of the european letters, and 3121 * <li>the value of ch is less than the specified radix. 3122 * </ul> 3123 * @param ch the code point to query 3124 * @param radix the radix 3125 * @return the numeric value represented by the code point in the 3126 * specified radix, or -1 if the code point is not a decimal digit 3127 * or if its value is too large for the radix 3128 */ digit(int ch, int radix)3129 public static int digit(int ch, int radix) 3130 { 3131 if (2 <= radix && radix <= 36) { 3132 int value = digit(ch); 3133 if (value < 0) { 3134 // ch is not a decimal digit, try latin letters 3135 value = UCharacterProperty.getEuropeanDigit(ch); 3136 } 3137 return (value < radix) ? value : -1; 3138 } else { 3139 return -1; // invalid radix 3140 } 3141 } 3142 3143 /** 3144 * Returnss the numeric value of a decimal digit code point. 3145 * <br>This is a convenience overload of <code>digit(int, int)</code> 3146 * that provides a decimal radix. 3147 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this 3148 * treated numeric letters and other numbers as digits. This has 3149 * been changed to conform to the java semantics. 3150 * @param ch the code point to query 3151 * @return the numeric value represented by the code point, 3152 * or -1 if the code point is not a decimal digit or if its 3153 * value is too large for a decimal radix 3154 */ digit(int ch)3155 public static int digit(int ch) 3156 { 3157 return UCharacterProperty.INSTANCE.digit(ch); 3158 } 3159 3160 /** 3161 * Returns the numeric value of the code point as a nonnegative 3162 * integer. 3163 * <br>If the code point does not have a numeric value, then -1 is returned. 3164 * <br> 3165 * If the code point has a numeric value that cannot be represented as a 3166 * nonnegative integer (for example, a fractional value), then -2 is 3167 * returned. 3168 * @param ch the code point to query 3169 * @return the numeric value of the code point, or -1 if it has no numeric 3170 * value, or -2 if it has a numeric value that cannot be represented as a 3171 * nonnegative integer 3172 */ getNumericValue(int ch)3173 public static int getNumericValue(int ch) 3174 { 3175 return UCharacterProperty.INSTANCE.getNumericValue(ch); 3176 } 3177 3178 /** 3179 * <strong>[icu]</strong> Returns the numeric value for a Unicode code point as defined in the 3180 * Unicode Character Database. 3181 * <p>A "double" return type is necessary because some numeric values are 3182 * fractions, negative, or too large for int. 3183 * <p>For characters without any numeric values in the Unicode Character 3184 * Database, this function will return NO_NUMERIC_VALUE. 3185 * Note: This is different from the Unicode Standard which specifies NaN as the default value. 3186 * <p><em>API Change:</em> In release 2.2 and prior, this API has a 3187 * return type int and returns -1 when the argument ch does not have a 3188 * corresponding numeric value. This has been changed to synch with ICU4C 3189 * 3190 * This corresponds to the ICU4C function u_getNumericValue. 3191 * @param ch Code point to get the numeric value for. 3192 * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined. 3193 */ getUnicodeNumericValue(int ch)3194 public static double getUnicodeNumericValue(int ch) 3195 { 3196 return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch); 3197 } 3198 3199 /** 3200 * Compatibility override of Java deprecated method. This 3201 * method will always remain deprecated. 3202 * Same as java.lang.Character.isSpace(). 3203 * @param ch the code point 3204 * @return true if the code point is a space character as 3205 * defined by java.lang.Character.isSpace. 3206 * @deprecated ICU 3.4 (Java) 3207 * @hide original deprecated declaration 3208 */ 3209 @Deprecated isSpace(int ch)3210 public static boolean isSpace(int ch) { 3211 return ch <= 0x20 && 3212 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d); 3213 } 3214 3215 /** 3216 * Returns a value indicating a code point's Unicode category. 3217 * Up-to-date Unicode implementation of java.lang.Character.getType() 3218 * except for the above mentioned code points that had their category 3219 * changed.<br> 3220 * Return results are constants from the interface 3221 * <a href=UCharacterCategory.html>UCharacterCategory</a><br> 3222 * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with 3223 * those returned by java.lang.Character.getType. UCharacterCategory values 3224 * match the ones used in ICU4C, while java.lang.Character type 3225 * values, though similar, skip the value 17. 3226 * @param ch code point whose type is to be determined 3227 * @return category which is a value of UCharacterCategory 3228 */ getType(int ch)3229 public static int getType(int ch) 3230 { 3231 return UCharacterProperty.INSTANCE.getType(ch); 3232 } 3233 3234 /** 3235 * Determines if a code point has a defined meaning in the up-to-date 3236 * Unicode standard. 3237 * E.g. supplementary code points though allocated space are not defined in 3238 * Unicode yet.<br> 3239 * Up-to-date Unicode implementation of java.lang.Character.isDefined() 3240 * @param ch code point to be determined if it is defined in the most 3241 * current version of Unicode 3242 * @return true if this code point is defined in unicode 3243 */ isDefined(int ch)3244 public static boolean isDefined(int ch) 3245 { 3246 return getType(ch) != 0; 3247 } 3248 3249 /** 3250 * Determines if a code point is a Java digit. 3251 * <br>This method observes the semantics of 3252 * <code>java.lang.Character.isDigit()</code>. It returns true for decimal 3253 * digits only. 3254 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated 3255 * numeric letters and other numbers as digits. 3256 * This has been changed to conform to the java semantics. 3257 * @param ch code point to query 3258 * @return true if this code point is a digit 3259 */ isDigit(int ch)3260 public static boolean isDigit(int ch) 3261 { 3262 return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER; 3263 } 3264 3265 /** 3266 * Determines if the specified code point is an ISO control character. 3267 * A code point is considered to be an ISO control character if it is in 3268 * the range \u0000 through \u001F or in the range \u007F through 3269 * \u009F.<br> 3270 * Up-to-date Unicode implementation of java.lang.Character.isISOControl() 3271 * @param ch code point to determine if it is an ISO control character 3272 * @return true if code point is a ISO control character 3273 */ isISOControl(int ch)3274 public static boolean isISOControl(int ch) 3275 { 3276 return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ && 3277 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_)); 3278 } 3279 3280 /** 3281 * Determines if the specified code point is a letter. 3282 * Up-to-date Unicode implementation of java.lang.Character.isLetter() 3283 * @param ch code point to determine if it is a letter 3284 * @return true if code point is a letter 3285 */ isLetter(int ch)3286 public static boolean isLetter(int ch) 3287 { 3288 // if props == 0, it will just fall through and return false 3289 return ((1 << getType(ch)) 3290 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3291 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3292 | (1 << UCharacterCategory.TITLECASE_LETTER) 3293 | (1 << UCharacterCategory.MODIFIER_LETTER) 3294 | (1 << UCharacterCategory.OTHER_LETTER))) != 0; 3295 } 3296 3297 /** 3298 * Determines if the specified code point is a letter or digit. 3299 * <strong>[icu] Note:</strong> This method, unlike java.lang.Character does not regard the ascii 3300 * characters 'A' - 'Z' and 'a' - 'z' as digits. 3301 * @param ch code point to determine if it is a letter or a digit 3302 * @return true if code point is a letter or a digit 3303 */ isLetterOrDigit(int ch)3304 public static boolean isLetterOrDigit(int ch) 3305 { 3306 return ((1 << getType(ch)) 3307 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3308 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3309 | (1 << UCharacterCategory.TITLECASE_LETTER) 3310 | (1 << UCharacterCategory.MODIFIER_LETTER) 3311 | (1 << UCharacterCategory.OTHER_LETTER) 3312 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0; 3313 } 3314 3315 /** 3316 * Compatibility override of Java deprecated method. This 3317 * method will always remain deprecated. Delegates to 3318 * java.lang.Character.isJavaIdentifierStart. 3319 * @param cp the code point 3320 * @return true if the code point can start a java identifier. 3321 * @deprecated ICU 3.4 (Java) 3322 * @hide original deprecated declaration 3323 */ 3324 @Deprecated isJavaLetter(int cp)3325 public static boolean isJavaLetter(int cp) { 3326 return isJavaIdentifierStart(cp); 3327 } 3328 3329 /** 3330 * Compatibility override of Java deprecated method. This 3331 * method will always remain deprecated. Delegates to 3332 * java.lang.Character.isJavaIdentifierPart. 3333 * @param cp the code point 3334 * @return true if the code point can continue a java identifier. 3335 * @deprecated ICU 3.4 (Java) 3336 * @hide original deprecated declaration 3337 */ 3338 @Deprecated isJavaLetterOrDigit(int cp)3339 public static boolean isJavaLetterOrDigit(int cp) { 3340 return isJavaIdentifierPart(cp); 3341 } 3342 3343 /** 3344 * Compatibility override of Java method, delegates to 3345 * java.lang.Character.isJavaIdentifierStart. 3346 * @param cp the code point 3347 * @return true if the code point can start a java identifier. 3348 */ isJavaIdentifierStart(int cp)3349 public static boolean isJavaIdentifierStart(int cp) { 3350 // note, downcast to char for jdk 1.4 compatibility 3351 return java.lang.Character.isJavaIdentifierStart((char)cp); 3352 } 3353 3354 /** 3355 * Compatibility override of Java method, delegates to 3356 * java.lang.Character.isJavaIdentifierPart. 3357 * @param cp the code point 3358 * @return true if the code point can continue a java identifier. 3359 */ isJavaIdentifierPart(int cp)3360 public static boolean isJavaIdentifierPart(int cp) { 3361 // note, downcast to char for jdk 1.4 compatibility 3362 return java.lang.Character.isJavaIdentifierPart((char)cp); 3363 } 3364 3365 /** 3366 * Determines if the specified code point is a lowercase character. 3367 * UnicodeData only contains case mappings for code points where they are 3368 * one-to-one mappings; it also omits information about context-sensitive 3369 * case mappings.<br> For more information about Unicode case mapping 3370 * please refer to the 3371 * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report 3372 * #21</a>.<br> 3373 * Up-to-date Unicode implementation of java.lang.Character.isLowerCase() 3374 * @param ch code point to determine if it is in lowercase 3375 * @return true if code point is a lowercase character 3376 */ isLowerCase(int ch)3377 public static boolean isLowerCase(int ch) 3378 { 3379 // if props == 0, it will just fall through and return false 3380 return getType(ch) == UCharacterCategory.LOWERCASE_LETTER; 3381 } 3382 3383 /** 3384 * Determines if the specified code point is a white space character. 3385 * A code point is considered to be an whitespace character if and only 3386 * if it satisfies one of the following criteria: 3387 * <ul> 3388 * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not 3389 * also a non-breaking space (\u00A0 or \u2007 or \u202F). 3390 * <li> It is \u0009, HORIZONTAL TABULATION. 3391 * <li> It is \u000A, LINE FEED. 3392 * <li> It is \u000B, VERTICAL TABULATION. 3393 * <li> It is \u000C, FORM FEED. 3394 * <li> It is \u000D, CARRIAGE RETURN. 3395 * <li> It is \u001C, FILE SEPARATOR. 3396 * <li> It is \u001D, GROUP SEPARATOR. 3397 * <li> It is \u001E, RECORD SEPARATOR. 3398 * <li> It is \u001F, UNIT SEPARATOR. 3399 * </ul> 3400 * 3401 * This API tries to sync with the semantics of Java's 3402 * java.lang.Character.isWhitespace(), but it may not return 3403 * the exact same results because of the Unicode version 3404 * difference. 3405 * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) 3406 * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. 3407 * See http://www.unicode.org/versions/Unicode4.0.1/ 3408 * @param ch code point to determine if it is a white space 3409 * @return true if the specified code point is a white space character 3410 */ isWhitespace(int ch)3411 public static boolean isWhitespace(int ch) 3412 { 3413 // exclude no-break spaces 3414 // if props == 0, it will just fall through and return false 3415 return ((1 << getType(ch)) & 3416 ((1 << UCharacterCategory.SPACE_SEPARATOR) 3417 | (1 << UCharacterCategory.LINE_SEPARATOR) 3418 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0 3419 && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_) 3420 // TAB VT LF FF CR FS GS RS US NL are all control characters 3421 // that are white spaces. 3422 || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f); 3423 } 3424 3425 /** 3426 * Determines if the specified code point is a Unicode specified space 3427 * character, i.e. if code point is in the category Zs, Zl and Zp. 3428 * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar(). 3429 * @param ch code point to determine if it is a space 3430 * @return true if the specified code point is a space character 3431 */ isSpaceChar(int ch)3432 public static boolean isSpaceChar(int ch) 3433 { 3434 // if props == 0, it will just fall through and return false 3435 return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR) 3436 | (1 << UCharacterCategory.LINE_SEPARATOR) 3437 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) 3438 != 0; 3439 } 3440 3441 /** 3442 * Determines if the specified code point is a titlecase character. 3443 * UnicodeData only contains case mappings for code points where they are 3444 * one-to-one mappings; it also omits information about context-sensitive 3445 * case mappings.<br> 3446 * For more information about Unicode case mapping please refer to the 3447 * <a href=http://www.unicode.org/unicode/reports/tr21/> 3448 * Technical report #21</a>.<br> 3449 * Up-to-date Unicode implementation of java.lang.Character.isTitleCase(). 3450 * @param ch code point to determine if it is in title case 3451 * @return true if the specified code point is a titlecase character 3452 */ isTitleCase(int ch)3453 public static boolean isTitleCase(int ch) 3454 { 3455 // if props == 0, it will just fall through and return false 3456 return getType(ch) == UCharacterCategory.TITLECASE_LETTER; 3457 } 3458 3459 /** 3460 * Determines if the specified code point may be any part of a Unicode 3461 * identifier other than the starting character. 3462 * A code point may be part of a Unicode identifier if and only if it is 3463 * one of the following: 3464 * <ul> 3465 * <li> Lu Uppercase letter 3466 * <li> Ll Lowercase letter 3467 * <li> Lt Titlecase letter 3468 * <li> Lm Modifier letter 3469 * <li> Lo Other letter 3470 * <li> Nl Letter number 3471 * <li> Pc Connecting punctuation character 3472 * <li> Nd decimal number 3473 * <li> Mc Spacing combining mark 3474 * <li> Mn Non-spacing mark 3475 * <li> Cf formatting code 3476 * </ul> 3477 * Up-to-date Unicode implementation of 3478 * java.lang.Character.isUnicodeIdentifierPart().<br> 3479 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 3480 * @param ch code point to determine if is can be part of a Unicode 3481 * identifier 3482 * @return true if code point is any character belonging a unicode 3483 * identifier suffix after the first character 3484 */ isUnicodeIdentifierPart(int ch)3485 public static boolean isUnicodeIdentifierPart(int ch) 3486 { 3487 // if props == 0, it will just fall through and return false 3488 // cat == format 3489 return ((1 << getType(ch)) 3490 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3491 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3492 | (1 << UCharacterCategory.TITLECASE_LETTER) 3493 | (1 << UCharacterCategory.MODIFIER_LETTER) 3494 | (1 << UCharacterCategory.OTHER_LETTER) 3495 | (1 << UCharacterCategory.LETTER_NUMBER) 3496 | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION) 3497 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) 3498 | (1 << UCharacterCategory.COMBINING_SPACING_MARK) 3499 | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0 3500 || isIdentifierIgnorable(ch); 3501 } 3502 3503 /** 3504 * Determines if the specified code point is permissible as the first 3505 * character in a Unicode identifier. 3506 * A code point may start a Unicode identifier if it is of type either 3507 * <ul> 3508 * <li> Lu Uppercase letter 3509 * <li> Ll Lowercase letter 3510 * <li> Lt Titlecase letter 3511 * <li> Lm Modifier letter 3512 * <li> Lo Other letter 3513 * <li> Nl Letter number 3514 * </ul> 3515 * Up-to-date Unicode implementation of 3516 * java.lang.Character.isUnicodeIdentifierStart().<br> 3517 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 3518 * @param ch code point to determine if it can start a Unicode identifier 3519 * @return true if code point is the first character belonging a unicode 3520 * identifier 3521 */ isUnicodeIdentifierStart(int ch)3522 public static boolean isUnicodeIdentifierStart(int ch) 3523 { 3524 /*int cat = getType(ch);*/ 3525 // if props == 0, it will just fall through and return false 3526 return ((1 << getType(ch)) 3527 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3528 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3529 | (1 << UCharacterCategory.TITLECASE_LETTER) 3530 | (1 << UCharacterCategory.MODIFIER_LETTER) 3531 | (1 << UCharacterCategory.OTHER_LETTER) 3532 | (1 << UCharacterCategory.LETTER_NUMBER))) != 0; 3533 } 3534 3535 /** 3536 * Determines if the specified code point should be regarded as an 3537 * ignorable character in a Java identifier. 3538 * A character is Java-identifier-ignorable if it has the general category 3539 * Cf Formatting Control, or it is a non-Java-whitespace ISO control: 3540 * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br> 3541 * Up-to-date Unicode implementation of 3542 * java.lang.Character.isIdentifierIgnorable().<br> 3543 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 3544 * <p>Note that Unicode just recommends to ignore Cf (format controls). 3545 * @param ch code point to be determined if it can be ignored in a Unicode 3546 * identifier. 3547 * @return true if the code point is ignorable 3548 */ isIdentifierIgnorable(int ch)3549 public static boolean isIdentifierIgnorable(int ch) 3550 { 3551 // see java.lang.Character.isIdentifierIgnorable() on range of 3552 // ignorable characters. 3553 if (ch <= 0x9f) { 3554 return isISOControl(ch) 3555 && !((ch >= 0x9 && ch <= 0xd) 3556 || (ch >= 0x1c && ch <= 0x1f)); 3557 } 3558 return getType(ch) == UCharacterCategory.FORMAT; 3559 } 3560 3561 /** 3562 * Determines if the specified code point is an uppercase character. 3563 * UnicodeData only contains case mappings for code point where they are 3564 * one-to-one mappings; it also omits information about context-sensitive 3565 * case mappings.<br> 3566 * For language specific case conversion behavior, use 3567 * toUpperCase(locale, str). <br> 3568 * For example, the case conversion for dot-less i and dotted I in Turkish, 3569 * or for final sigma in Greek. 3570 * For more information about Unicode case mapping please refer to the 3571 * <a href=http://www.unicode.org/unicode/reports/tr21/> 3572 * Technical report #21</a>.<br> 3573 * Up-to-date Unicode implementation of java.lang.Character.isUpperCase(). 3574 * @param ch code point to determine if it is in uppercase 3575 * @return true if the code point is an uppercase character 3576 */ isUpperCase(int ch)3577 public static boolean isUpperCase(int ch) 3578 { 3579 // if props == 0, it will just fall through and return false 3580 return getType(ch) == UCharacterCategory.UPPERCASE_LETTER; 3581 } 3582 3583 /** 3584 * The given code point is mapped to its lowercase equivalent; if the code 3585 * point has no lowercase equivalent, the code point itself is returned. 3586 * Up-to-date Unicode implementation of java.lang.Character.toLowerCase() 3587 * 3588 * <p>This function only returns the simple, single-code point case mapping. 3589 * Full case mappings should be used whenever possible because they produce 3590 * better results by working on whole strings. 3591 * They take into account the string context and the language and can map 3592 * to a result string with a different length as appropriate. 3593 * Full case mappings are applied by the case mapping functions 3594 * that take String parameters rather than code points (int). 3595 * See also the User Guide chapter on C/POSIX migration: 3596 * http://www.icu-project.org/userguide/posix.html#case_mappings 3597 * 3598 * @param ch code point whose lowercase equivalent is to be retrieved 3599 * @return the lowercase equivalent code point 3600 */ toLowerCase(int ch)3601 public static int toLowerCase(int ch) { 3602 return UCaseProps.INSTANCE.tolower(ch); 3603 } 3604 3605 /** 3606 * Converts argument code point and returns a String object representing 3607 * the code point's value in UTF-16 format. 3608 * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones. 3609 * 3610 * <p>Up-to-date Unicode implementation of java.lang.Character.toString(). 3611 * 3612 * @param ch code point 3613 * @return string representation of the code point, null if code point is not 3614 * defined in unicode 3615 */ toString(int ch)3616 public static String toString(int ch) 3617 { 3618 if (ch < MIN_VALUE || ch > MAX_VALUE) { 3619 return null; 3620 } 3621 3622 if (ch < SUPPLEMENTARY_MIN_VALUE) { 3623 return String.valueOf((char)ch); 3624 } 3625 3626 return new String(Character.toChars(ch)); 3627 } 3628 3629 /** 3630 * Converts the code point argument to titlecase. 3631 * If no titlecase is available, the uppercase is returned. If no uppercase 3632 * is available, the code point itself is returned. 3633 * Up-to-date Unicode implementation of java.lang.Character.toTitleCase() 3634 * 3635 * <p>This function only returns the simple, single-code point case mapping. 3636 * Full case mappings should be used whenever possible because they produce 3637 * better results by working on whole strings. 3638 * They take into account the string context and the language and can map 3639 * to a result string with a different length as appropriate. 3640 * Full case mappings are applied by the case mapping functions 3641 * that take String parameters rather than code points (int). 3642 * See also the User Guide chapter on C/POSIX migration: 3643 * http://www.icu-project.org/userguide/posix.html#case_mappings 3644 * 3645 * @param ch code point whose title case is to be retrieved 3646 * @return titlecase code point 3647 */ toTitleCase(int ch)3648 public static int toTitleCase(int ch) { 3649 return UCaseProps.INSTANCE.totitle(ch); 3650 } 3651 3652 /** 3653 * Converts the character argument to uppercase. 3654 * If no uppercase is available, the character itself is returned. 3655 * Up-to-date Unicode implementation of java.lang.Character.toUpperCase() 3656 * 3657 * <p>This function only returns the simple, single-code point case mapping. 3658 * Full case mappings should be used whenever possible because they produce 3659 * better results by working on whole strings. 3660 * They take into account the string context and the language and can map 3661 * to a result string with a different length as appropriate. 3662 * Full case mappings are applied by the case mapping functions 3663 * that take String parameters rather than code points (int). 3664 * See also the User Guide chapter on C/POSIX migration: 3665 * http://www.icu-project.org/userguide/posix.html#case_mappings 3666 * 3667 * @param ch code point whose uppercase is to be retrieved 3668 * @return uppercase code point 3669 */ toUpperCase(int ch)3670 public static int toUpperCase(int ch) { 3671 return UCaseProps.INSTANCE.toupper(ch); 3672 } 3673 3674 // extra methods not in java.lang.Character -------------------------- 3675 3676 /** 3677 * <strong>[icu]</strong> Determines if the code point is a supplementary character. 3678 * A code point is a supplementary character if and only if it is greater 3679 * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a> 3680 * @param ch code point to be determined if it is in the supplementary 3681 * plane 3682 * @return true if code point is a supplementary character 3683 */ isSupplementary(int ch)3684 public static boolean isSupplementary(int ch) 3685 { 3686 return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE && 3687 ch <= UCharacter.MAX_VALUE; 3688 } 3689 3690 /** 3691 * <strong>[icu]</strong> Determines if the code point is in the BMP plane. 3692 * @param ch code point to be determined if it is not a supplementary 3693 * character 3694 * @return true if code point is not a supplementary character 3695 */ isBMP(int ch)3696 public static boolean isBMP(int ch) 3697 { 3698 return (ch >= 0 && ch <= LAST_CHAR_MASK_); 3699 } 3700 3701 /** 3702 * <strong>[icu]</strong> Determines whether the specified code point is a printable character 3703 * according to the Unicode standard. 3704 * @param ch code point to be determined if it is printable 3705 * @return true if the code point is a printable character 3706 */ isPrintable(int ch)3707 public static boolean isPrintable(int ch) 3708 { 3709 int cat = getType(ch); 3710 // if props == 0, it will just fall through and return false 3711 return (cat != UCharacterCategory.UNASSIGNED && 3712 cat != UCharacterCategory.CONTROL && 3713 cat != UCharacterCategory.FORMAT && 3714 cat != UCharacterCategory.PRIVATE_USE && 3715 cat != UCharacterCategory.SURROGATE && 3716 cat != UCharacterCategory.GENERAL_OTHER_TYPES); 3717 } 3718 3719 /** 3720 * <strong>[icu]</strong> Determines whether the specified code point is of base form. 3721 * A code point of base form does not graphically combine with preceding 3722 * characters, and is neither a control nor a format character. 3723 * @param ch code point to be determined if it is of base form 3724 * @return true if the code point is of base form 3725 */ isBaseForm(int ch)3726 public static boolean isBaseForm(int ch) 3727 { 3728 int cat = getType(ch); 3729 // if props == 0, it will just fall through and return false 3730 return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER || 3731 cat == UCharacterCategory.OTHER_NUMBER || 3732 cat == UCharacterCategory.LETTER_NUMBER || 3733 cat == UCharacterCategory.UPPERCASE_LETTER || 3734 cat == UCharacterCategory.LOWERCASE_LETTER || 3735 cat == UCharacterCategory.TITLECASE_LETTER || 3736 cat == UCharacterCategory.MODIFIER_LETTER || 3737 cat == UCharacterCategory.OTHER_LETTER || 3738 cat == UCharacterCategory.NON_SPACING_MARK || 3739 cat == UCharacterCategory.ENCLOSING_MARK || 3740 cat == UCharacterCategory.COMBINING_SPACING_MARK; 3741 } 3742 3743 /** 3744 * <strong>[icu]</strong> Returns the Bidirection property of a code point. 3745 * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional 3746 * property.<br> 3747 * Result returned belongs to the interface 3748 * <a href=UCharacterDirection.html>UCharacterDirection</a> 3749 * @param ch the code point to be determined its direction 3750 * @return direction constant from UCharacterDirection. 3751 */ getDirection(int ch)3752 public static int getDirection(int ch) 3753 { 3754 return UBiDiProps.INSTANCE.getClass(ch); 3755 } 3756 3757 /** 3758 * Determines whether the code point has the "mirrored" property. 3759 * This property is set for characters that are commonly used in 3760 * Right-To-Left contexts and need to be displayed with a "mirrored" 3761 * glyph. 3762 * @param ch code point whose mirror is to be determined 3763 * @return true if the code point has the "mirrored" property 3764 */ isMirrored(int ch)3765 public static boolean isMirrored(int ch) 3766 { 3767 return UBiDiProps.INSTANCE.isMirrored(ch); 3768 } 3769 3770 /** 3771 * <strong>[icu]</strong> Maps the specified code point to a "mirror-image" code point. 3772 * For code points with the "mirrored" property, implementations sometimes 3773 * need a "poor man's" mapping to another code point such that the default 3774 * glyph may serve as the mirror-image of the default glyph of the 3775 * specified code point.<br> 3776 * This is useful for text conversion to and from codepages with visual 3777 * order, and for displays without glyph selection capabilities. 3778 * @param ch code point whose mirror is to be retrieved 3779 * @return another code point that may serve as a mirror-image substitute, 3780 * or ch itself if there is no such mapping or ch does not have the 3781 * "mirrored" property 3782 */ getMirror(int ch)3783 public static int getMirror(int ch) 3784 { 3785 return UBiDiProps.INSTANCE.getMirror(ch); 3786 } 3787 3788 /** 3789 * <strong>[icu]</strong> Maps the specified character to its paired bracket character. 3790 * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int). 3791 * Otherwise c itself is returned. 3792 * See http://www.unicode.org/reports/tr9/ 3793 * 3794 * @param c the code point to be mapped 3795 * @return the paired bracket code point, 3796 * or c itself if there is no such mapping 3797 * (Bidi_Paired_Bracket_Type=None) 3798 * 3799 * @see UProperty#BIDI_PAIRED_BRACKET 3800 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 3801 * @see #getMirror(int) 3802 */ getBidiPairedBracket(int c)3803 public static int getBidiPairedBracket(int c) { 3804 return UBiDiProps.INSTANCE.getPairedBracket(c); 3805 } 3806 3807 /** 3808 * <strong>[icu]</strong> Returns the combining class of the argument codepoint 3809 * @param ch code point whose combining is to be retrieved 3810 * @return the combining class of the codepoint 3811 */ getCombiningClass(int ch)3812 public static int getCombiningClass(int ch) 3813 { 3814 return Normalizer2.getNFDInstance().getCombiningClass(ch); 3815 } 3816 3817 /** 3818 * <strong>[icu]</strong> A code point is illegal if and only if 3819 * <ul> 3820 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 3821 * <li> A surrogate value, 0xD800 to 0xDFFF 3822 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 3823 * </ul> 3824 * Note: legal does not mean that it is assigned in this version of Unicode. 3825 * @param ch code point to determine if it is a legal code point by itself 3826 * @return true if and only if legal. 3827 */ isLegal(int ch)3828 public static boolean isLegal(int ch) 3829 { 3830 if (ch < MIN_VALUE) { 3831 return false; 3832 } 3833 if (ch < Character.MIN_SURROGATE) { 3834 return true; 3835 } 3836 if (ch <= Character.MAX_SURROGATE) { 3837 return false; 3838 } 3839 if (UCharacterUtility.isNonCharacter(ch)) { 3840 return false; 3841 } 3842 return (ch <= MAX_VALUE); 3843 } 3844 3845 /** 3846 * <strong>[icu]</strong> A string is legal iff all its code points are legal. 3847 * A code point is illegal if and only if 3848 * <ul> 3849 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 3850 * <li> A surrogate value, 0xD800 to 0xDFFF 3851 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 3852 * </ul> 3853 * Note: legal does not mean that it is assigned in this version of Unicode. 3854 * @param str containing code points to examin 3855 * @return true if and only if legal. 3856 */ isLegal(String str)3857 public static boolean isLegal(String str) 3858 { 3859 int size = str.length(); 3860 int codepoint; 3861 for (int i = 0; i < size; i += Character.charCount(codepoint)) 3862 { 3863 codepoint = str.codePointAt(i); 3864 if (!isLegal(codepoint)) { 3865 return false; 3866 } 3867 } 3868 return true; 3869 } 3870 3871 /** 3872 * <strong>[icu]</strong> Returns the version of Unicode data used. 3873 * @return the unicode version number used 3874 */ getUnicodeVersion()3875 public static VersionInfo getUnicodeVersion() 3876 { 3877 return UCharacterProperty.INSTANCE.m_unicodeVersion_; 3878 } 3879 3880 /** 3881 * <strong>[icu]</strong> Returns the most current Unicode name of the argument code point, or 3882 * null if the character is unassigned or outside the range 3883 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 3884 * <br> 3885 * Note calling any methods related to code point names, e.g. get*Name*() 3886 * incurs a one-time initialisation cost to construct the name tables. 3887 * @param ch the code point for which to get the name 3888 * @return most current Unicode name 3889 */ getName(int ch)3890 public static String getName(int ch) 3891 { 3892 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); 3893 } 3894 3895 /** 3896 * <strong>[icu]</strong> Returns the names for each of the characters in a string 3897 * @param s string to format 3898 * @param separator string to go between names 3899 * @return string of names 3900 */ getName(String s, String separator)3901 public static String getName(String s, String separator) { 3902 if (s.length() == 1) { // handle common case 3903 return getName(s.charAt(0)); 3904 } 3905 int cp; 3906 StringBuilder sb = new StringBuilder(); 3907 for (int i = 0; i < s.length(); i += Character.charCount(cp)) { 3908 cp = s.codePointAt(i); 3909 if (i != 0) sb.append(separator); 3910 sb.append(UCharacter.getName(cp)); 3911 } 3912 return sb.toString(); 3913 } 3914 3915 /** 3916 * <strong>[icu]</strong> Returns null. 3917 * Used to return the Unicode_1_Name property value which was of little practical value. 3918 * @param ch the code point for which to get the name 3919 * @return null 3920 * @deprecated ICU 49 3921 * @hide original deprecated declaration 3922 */ 3923 @Deprecated getName1_0(int ch)3924 public static String getName1_0(int ch) 3925 { 3926 return null; 3927 } 3928 3929 /** 3930 * <strong>[icu]</strong> Returns a name for a valid codepoint. Unlike, getName(int) and 3931 * getName1_0(int), this method will return a name even for codepoints that 3932 * are not assigned a name in UnicodeData.txt. 3933 * 3934 * <p>The names are returned in the following order. 3935 * <ul> 3936 * <li> Most current Unicode name if there is any 3937 * <li> Unicode 1.0 name if there is any 3938 * <li> Extended name in the form of 3939 * "<codepoint_type-codepoint_hex_digits>". E.g., <noncharacter-fffe> 3940 * </ul> 3941 * Note calling any methods related to code point names, e.g. get*Name*() 3942 * incurs a one-time initialisation cost to construct the name tables. 3943 * @param ch the code point for which to get the name 3944 * @return a name for the argument codepoint 3945 */ getExtendedName(int ch)3946 public static String getExtendedName(int ch) { 3947 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME); 3948 } 3949 3950 /** 3951 * <strong>[icu]</strong> Returns the corrected name from NameAliases.txt if there is one. 3952 * Returns null if the character is unassigned or outside the range 3953 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 3954 * <br> 3955 * Note calling any methods related to code point names, e.g. get*Name*() 3956 * incurs a one-time initialisation cost to construct the name tables. 3957 * @param ch the code point for which to get the name alias 3958 * @return Unicode name alias, or null 3959 */ getNameAlias(int ch)3960 public static String getNameAlias(int ch) 3961 { 3962 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS); 3963 } 3964 3965 /** 3966 * <strong>[icu]</strong> Returns null. 3967 * Used to return the ISO 10646 comment for a character. 3968 * The Unicode ISO_Comment property is deprecated and has no values. 3969 * 3970 * @param ch The code point for which to get the ISO comment. 3971 * It must be the case that {@code 0 <= ch <= 0x10ffff}. 3972 * @return null 3973 * @deprecated ICU 49 3974 * @hide original deprecated declaration 3975 */ 3976 @Deprecated getISOComment(int ch)3977 public static String getISOComment(int ch) 3978 { 3979 return null; 3980 } 3981 3982 /** 3983 * <strong>[icu]</strong> <p>Finds a Unicode code point by its most current Unicode name and 3984 * return its code point value. All Unicode names are in uppercase. 3985 * Note calling any methods related to code point names, e.g. get*Name*() 3986 * incurs a one-time initialisation cost to construct the name tables. 3987 * @param name most current Unicode character name whose code point is to 3988 * be returned 3989 * @return code point or -1 if name is not found 3990 */ getCharFromName(String name)3991 public static int getCharFromName(String name){ 3992 return UCharacterName.INSTANCE.getCharFromName( 3993 UCharacterNameChoice.UNICODE_CHAR_NAME, name); 3994 } 3995 3996 /** 3997 * <strong>[icu]</strong> Returns -1. 3998 * <p>Used to find a Unicode character by its version 1.0 Unicode name and return 3999 * its code point value. 4000 * @param name Unicode 1.0 code point name whose code point is to be 4001 * returned 4002 * @return -1 4003 * @deprecated ICU 49 4004 * @see #getName1_0(int) 4005 * @hide original deprecated declaration 4006 */ 4007 @Deprecated getCharFromName1_0(String name)4008 public static int getCharFromName1_0(String name){ 4009 return -1; 4010 } 4011 4012 /** 4013 * <strong>[icu]</strong> <p>Find a Unicode character by either its name and return its code 4014 * point value. All Unicode names are in uppercase. 4015 * Extended names are all lowercase except for numbers and are contained 4016 * within angle brackets. 4017 * The names are searched in the following order 4018 * <ul> 4019 * <li> Most current Unicode name if there is any 4020 * <li> Unicode 1.0 name if there is any 4021 * <li> Extended name in the form of 4022 * "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-FFFE> 4023 * </ul> 4024 * Note calling any methods related to code point names, e.g. get*Name*() 4025 * incurs a one-time initialisation cost to construct the name tables. 4026 * @param name codepoint name 4027 * @return code point associated with the name or -1 if the name is not 4028 * found. 4029 */ getCharFromExtendedName(String name)4030 public static int getCharFromExtendedName(String name){ 4031 return UCharacterName.INSTANCE.getCharFromName( 4032 UCharacterNameChoice.EXTENDED_CHAR_NAME, name); 4033 } 4034 4035 /** 4036 * <strong>[icu]</strong> <p>Find a Unicode character by its corrected name alias and return 4037 * its code point value. All Unicode names are in uppercase. 4038 * Note calling any methods related to code point names, e.g. get*Name*() 4039 * incurs a one-time initialisation cost to construct the name tables. 4040 * @param name Unicode name alias whose code point is to be returned 4041 * @return code point or -1 if name is not found 4042 */ getCharFromNameAlias(String name)4043 public static int getCharFromNameAlias(String name){ 4044 return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name); 4045 } 4046 4047 /** 4048 * <strong>[icu]</strong> Return the Unicode name for a given property, as given in the 4049 * Unicode database file PropertyAliases.txt. Most properties 4050 * have more than one name. The nameChoice determines which one 4051 * is returned. 4052 * 4053 * In addition, this function maps the property 4054 * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" / 4055 * "General_Category_Mask". These names are not in 4056 * PropertyAliases.txt. 4057 * 4058 * @param property UProperty selector. 4059 * 4060 * @param nameChoice UProperty.NameChoice selector for which name 4061 * to get. All properties have a long name. Most have a short 4062 * name, but some do not. Unicode allows for additional names; if 4063 * present these will be returned by UProperty.NameChoice.LONG + i, 4064 * where i=1, 2,... 4065 * 4066 * @return a name, or null if Unicode explicitly defines no name 4067 * ("n/a") for a given property/nameChoice. If a given nameChoice 4068 * throws an exception, then all larger values of nameChoice will 4069 * throw an exception. If null is returned for a given 4070 * nameChoice, then other nameChoice values may return non-null 4071 * results. 4072 * 4073 * @exception IllegalArgumentException thrown if property or 4074 * nameChoice are invalid. 4075 * 4076 * @see UProperty 4077 * @see UProperty.NameChoice 4078 */ getPropertyName(int property, int nameChoice)4079 public static String getPropertyName(int property, 4080 int nameChoice) { 4081 return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice); 4082 } 4083 4084 /** 4085 * <strong>[icu]</strong> Return the UProperty selector for a given property name, as 4086 * specified in the Unicode database file PropertyAliases.txt. 4087 * Short, long, and any other variants are recognized. 4088 * 4089 * In addition, this function maps the synthetic names "gcm" / 4090 * "General_Category_Mask" to the property 4091 * UProperty.GENERAL_CATEGORY_MASK. These names are not in 4092 * PropertyAliases.txt. 4093 * 4094 * @param propertyAlias the property name to be matched. The name 4095 * is compared using "loose matching" as described in 4096 * PropertyAliases.txt. 4097 * 4098 * @return a UProperty enum. 4099 * 4100 * @exception IllegalArgumentException thrown if propertyAlias 4101 * is not recognized. 4102 * 4103 * @see UProperty 4104 */ getPropertyEnum(CharSequence propertyAlias)4105 public static int getPropertyEnum(CharSequence propertyAlias) { 4106 int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias); 4107 if (propEnum == UProperty.UNDEFINED) { 4108 throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias); 4109 } 4110 return propEnum; 4111 } 4112 4113 /** 4114 * <strong>[icu]</strong> Return the Unicode name for a given property value, as given in 4115 * the Unicode database file PropertyValueAliases.txt. Most 4116 * values have more than one name. The nameChoice determines 4117 * which one is returned. 4118 * 4119 * Note: Some of the names in PropertyValueAliases.txt can only be 4120 * retrieved using UProperty.GENERAL_CATEGORY_MASK, not 4121 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 4122 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 4123 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 4124 * 4125 * @param property UProperty selector constant. 4126 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 4127 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 4128 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 4129 * If out of range, null is returned. 4130 * 4131 * @param value selector for a value for the given property. In 4132 * general, valid values range from 0 up to some maximum. There 4133 * are a few exceptions: (1.) UProperty.BLOCK values begin at the 4134 * non-zero value BASIC_LATIN.getID(). (2.) 4135 * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous 4136 * and range from 0..240. (3.) UProperty.GENERAL_CATEGORY_MASK values 4137 * are mask values produced by left-shifting 1 by 4138 * UCharacter.getType(). This allows grouped categories such as 4139 * [:L:] to be represented. Mask values are non-contiguous. 4140 * 4141 * @param nameChoice UProperty.NameChoice selector for which name 4142 * to get. All values have a long name. Most have a short name, 4143 * but some do not. Unicode allows for additional names; if 4144 * present these will be returned by UProperty.NameChoice.LONG + i, 4145 * where i=1, 2,... 4146 * 4147 * @return a name, or null if Unicode explicitly defines no name 4148 * ("n/a") for a given property/value/nameChoice. If a given 4149 * nameChoice throws an exception, then all larger values of 4150 * nameChoice will throw an exception. If null is returned for a 4151 * given nameChoice, then other nameChoice values may return 4152 * non-null results. 4153 * 4154 * @exception IllegalArgumentException thrown if property, value, 4155 * or nameChoice are invalid. 4156 * 4157 * @see UProperty 4158 * @see UProperty.NameChoice 4159 */ getPropertyValueName(int property, int value, int nameChoice)4160 public static String getPropertyValueName(int property, 4161 int value, 4162 int nameChoice) 4163 { 4164 if ((property == UProperty.CANONICAL_COMBINING_CLASS 4165 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS 4166 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) 4167 && value >= UCharacter.getIntPropertyMinValue( 4168 UProperty.CANONICAL_COMBINING_CLASS) 4169 && value <= UCharacter.getIntPropertyMaxValue( 4170 UProperty.CANONICAL_COMBINING_CLASS) 4171 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) { 4172 // this is hard coded for the valid cc 4173 // because PropertyValueAliases.txt does not contain all of them 4174 try { 4175 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, 4176 nameChoice); 4177 } 4178 catch (IllegalArgumentException e) { 4179 return null; 4180 } 4181 } 4182 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice); 4183 } 4184 4185 /** 4186 * <strong>[icu]</strong> Return the property value integer for a given value name, as 4187 * specified in the Unicode database file PropertyValueAliases.txt. 4188 * Short, long, and any other variants are recognized. 4189 * 4190 * Note: Some of the names in PropertyValueAliases.txt will only be 4191 * recognized with UProperty.GENERAL_CATEGORY_MASK, not 4192 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 4193 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 4194 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 4195 * 4196 * @param property UProperty selector constant. 4197 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 4198 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 4199 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 4200 * Only these properties can be enumerated. 4201 * 4202 * @param valueAlias the value name to be matched. The name is 4203 * compared using "loose matching" as described in 4204 * PropertyValueAliases.txt. 4205 * 4206 * @return a value integer. Note: UProperty.GENERAL_CATEGORY 4207 * values are mask values produced by left-shifting 1 by 4208 * UCharacter.getType(). This allows grouped categories such as 4209 * [:L:] to be represented. 4210 * 4211 * @see UProperty 4212 * @throws IllegalArgumentException if property is not a valid UProperty 4213 * selector or valueAlias is not a value of this property 4214 */ getPropertyValueEnum(int property, CharSequence valueAlias)4215 public static int getPropertyValueEnum(int property, CharSequence valueAlias) { 4216 int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias); 4217 if (propEnum == UProperty.UNDEFINED) { 4218 throw new IllegalIcuArgumentException("Invalid name: " + valueAlias); 4219 } 4220 return propEnum; 4221 } 4222 4223 /** 4224 * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED. 4225 * @param property Same as {@link #getPropertyValueEnum(int, CharSequence)} 4226 * @param valueAlias Same as {@link #getPropertyValueEnum(int, CharSequence)} 4227 * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value. 4228 * @deprecated This API is ICU internal only. 4229 * @hide original deprecated declaration 4230 * @hide draft / provisional / internal are hidden on Android 4231 */ 4232 @Deprecated getPropertyValueEnumNoThrow(int property, CharSequence valueAlias)4233 public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) { 4234 return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias); 4235 } 4236 4237 4238 /** 4239 * <strong>[icu]</strong> Returns a code point corresponding to the two surrogate code units. 4240 * 4241 * @param lead the lead char 4242 * @param trail the trail char 4243 * @return code point if surrogate characters are valid. 4244 * @exception IllegalArgumentException thrown when the code units do 4245 * not form a valid code point 4246 */ getCodePoint(char lead, char trail)4247 public static int getCodePoint(char lead, char trail) 4248 { 4249 if (Character.isSurrogatePair(lead, trail)) { 4250 return Character.toCodePoint(lead, trail); 4251 } 4252 throw new IllegalArgumentException("Illegal surrogate characters"); 4253 } 4254 4255 /** 4256 * <strong>[icu]</strong> Returns the code point corresponding to the BMP code point. 4257 * 4258 * @param char16 the BMP code point 4259 * @return code point if argument is a valid character. 4260 * @exception IllegalArgumentException thrown when char16 is not a valid 4261 * code point 4262 */ getCodePoint(char char16)4263 public static int getCodePoint(char char16) 4264 { 4265 if (UCharacter.isLegal(char16)) { 4266 return char16; 4267 } 4268 throw new IllegalArgumentException("Illegal codepoint"); 4269 } 4270 4271 /** 4272 * Returns the uppercase version of the argument string. 4273 * Casing is dependent on the default locale and context-sensitive. 4274 * @param str source string to be performed on 4275 * @return uppercase version of the argument string 4276 */ toUpperCase(String str)4277 public static String toUpperCase(String str) 4278 { 4279 return toUpperCase(getDefaultCaseLocale(), str); 4280 } 4281 4282 /** 4283 * Returns the lowercase version of the argument string. 4284 * Casing is dependent on the default locale and context-sensitive 4285 * @param str source string to be performed on 4286 * @return lowercase version of the argument string 4287 */ toLowerCase(String str)4288 public static String toLowerCase(String str) 4289 { 4290 return toLowerCase(getDefaultCaseLocale(), str); 4291 } 4292 4293 /** 4294 * <p>Returns the titlecase version of the argument string. 4295 * <p>Position for titlecasing is determined by the argument break 4296 * iterator, hence the user can customize his break iterator for 4297 * a specialized titlecasing. In this case only the forward iteration 4298 * needs to be implemented. 4299 * If the break iterator passed in is null, the default Unicode algorithm 4300 * will be used to determine the titlecase positions. 4301 * 4302 * <p>Only positions returned by the break iterator will be title cased, 4303 * character in between the positions will all be in lower case. 4304 * <p>Casing is dependent on the default locale and context-sensitive 4305 * @param str source string to be performed on 4306 * @param breakiter break iterator to determine the positions in which 4307 * the character should be title cased. 4308 * @return lowercase version of the argument string 4309 */ toTitleCase(String str, BreakIterator breakiter)4310 public static String toTitleCase(String str, BreakIterator breakiter) 4311 { 4312 return toTitleCase(Locale.getDefault(), str, breakiter, 0); 4313 } 4314 getDefaultCaseLocale()4315 private static int getDefaultCaseLocale() { 4316 return UCaseProps.getCaseLocale(Locale.getDefault()); 4317 } 4318 getCaseLocale(Locale locale)4319 private static int getCaseLocale(Locale locale) { 4320 if (locale == null) { 4321 locale = Locale.getDefault(); 4322 } 4323 return UCaseProps.getCaseLocale(locale); 4324 } 4325 getCaseLocale(ULocale locale)4326 private static int getCaseLocale(ULocale locale) { 4327 if (locale == null) { 4328 locale = ULocale.getDefault(); 4329 } 4330 return UCaseProps.getCaseLocale(locale); 4331 } 4332 toLowerCase(int caseLocale, String str)4333 private static String toLowerCase(int caseLocale, String str) { 4334 if (str.length() <= 100) { 4335 if (str.isEmpty()) { 4336 return str; 4337 } 4338 // Collect and apply only changes. 4339 // Good if no or few changes. Bad (slow) if many changes. 4340 Edits edits = new Edits(); 4341 StringBuilder replacementChars = CaseMapImpl.toLower( 4342 caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits); 4343 return applyEdits(str, replacementChars, edits); 4344 } else { 4345 return CaseMapImpl.toLower(caseLocale, 0, str, 4346 new StringBuilder(str.length()), null).toString(); 4347 } 4348 } 4349 toUpperCase(int caseLocale, String str)4350 private static String toUpperCase(int caseLocale, String str) { 4351 if (str.length() <= 100) { 4352 if (str.isEmpty()) { 4353 return str; 4354 } 4355 // Collect and apply only changes. 4356 // Good if no or few changes. Bad (slow) if many changes. 4357 Edits edits = new Edits(); 4358 StringBuilder replacementChars = CaseMapImpl.toUpper( 4359 caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits); 4360 return applyEdits(str, replacementChars, edits); 4361 } else { 4362 return CaseMapImpl.toUpper(caseLocale, 0, str, 4363 new StringBuilder(str.length()), null).toString(); 4364 } 4365 } 4366 toTitleCase(int caseLocale, int options, BreakIterator titleIter, String str)4367 private static String toTitleCase(int caseLocale, int options, BreakIterator titleIter, String str) { 4368 if (str.length() <= 100) { 4369 if (str.isEmpty()) { 4370 return str; 4371 } 4372 // Collect and apply only changes. 4373 // Good if no or few changes. Bad (slow) if many changes. 4374 Edits edits = new Edits(); 4375 StringBuilder replacementChars = CaseMapImpl.toTitle( 4376 caseLocale, options | CaseMapImpl.OMIT_UNCHANGED_TEXT, titleIter, str, 4377 new StringBuilder(), edits); 4378 return applyEdits(str, replacementChars, edits); 4379 } else { 4380 return CaseMapImpl.toTitle(caseLocale, options, titleIter, str, 4381 new StringBuilder(str.length()), null).toString(); 4382 } 4383 } 4384 applyEdits(String str, StringBuilder replacementChars, Edits edits)4385 private static String applyEdits(String str, StringBuilder replacementChars, Edits edits) { 4386 if (!edits.hasChanges()) { 4387 return str; 4388 } 4389 StringBuilder result = new StringBuilder(str.length() + edits.lengthDelta()); 4390 for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) { 4391 if (ei.hasChange()) { 4392 int i = ei.replacementIndex(); 4393 result.append(replacementChars, i, i + ei.newLength()); 4394 } else { 4395 int i = ei.sourceIndex(); 4396 result.append(str, i, i + ei.oldLength()); 4397 } 4398 } 4399 return result.toString(); 4400 } 4401 4402 /** 4403 * Returns the uppercase version of the argument string. 4404 * Casing is dependent on the argument locale and context-sensitive. 4405 * @param locale which string is to be converted in 4406 * @param str source string to be performed on 4407 * @return uppercase version of the argument string 4408 */ toUpperCase(Locale locale, String str)4409 public static String toUpperCase(Locale locale, String str) 4410 { 4411 return toUpperCase(getCaseLocale(locale), str); 4412 } 4413 4414 /** 4415 * Returns the uppercase version of the argument string. 4416 * Casing is dependent on the argument locale and context-sensitive. 4417 * @param locale which string is to be converted in 4418 * @param str source string to be performed on 4419 * @return uppercase version of the argument string 4420 */ toUpperCase(ULocale locale, String str)4421 public static String toUpperCase(ULocale locale, String str) { 4422 return toUpperCase(getCaseLocale(locale), str); 4423 } 4424 4425 /** 4426 * Returns the lowercase version of the argument string. 4427 * Casing is dependent on the argument locale and context-sensitive 4428 * @param locale which string is to be converted in 4429 * @param str source string to be performed on 4430 * @return lowercase version of the argument string 4431 */ toLowerCase(Locale locale, String str)4432 public static String toLowerCase(Locale locale, String str) 4433 { 4434 return toLowerCase(getCaseLocale(locale), str); 4435 } 4436 4437 /** 4438 * Returns the lowercase version of the argument string. 4439 * Casing is dependent on the argument locale and context-sensitive 4440 * @param locale which string is to be converted in 4441 * @param str source string to be performed on 4442 * @return lowercase version of the argument string 4443 */ toLowerCase(ULocale locale, String str)4444 public static String toLowerCase(ULocale locale, String str) { 4445 return toLowerCase(getCaseLocale(locale), str); 4446 } 4447 4448 /** 4449 * <p>Returns the titlecase version of the argument string. 4450 * <p>Position for titlecasing is determined by the argument break 4451 * iterator, hence the user can customize his break iterator for 4452 * a specialized titlecasing. In this case only the forward iteration 4453 * needs to be implemented. 4454 * If the break iterator passed in is null, the default Unicode algorithm 4455 * will be used to determine the titlecase positions. 4456 * 4457 * <p>Only positions returned by the break iterator will be title cased, 4458 * character in between the positions will all be in lower case. 4459 * <p>Casing is dependent on the argument locale and context-sensitive 4460 * @param locale which string is to be converted in 4461 * @param str source string to be performed on 4462 * @param breakiter break iterator to determine the positions in which 4463 * the character should be title cased. 4464 * @return lowercase version of the argument string 4465 */ toTitleCase(Locale locale, String str, BreakIterator breakiter)4466 public static String toTitleCase(Locale locale, String str, 4467 BreakIterator breakiter) 4468 { 4469 return toTitleCase(locale, str, breakiter, 0); 4470 } 4471 4472 /** 4473 * <p>Returns the titlecase version of the argument string. 4474 * <p>Position for titlecasing is determined by the argument break 4475 * iterator, hence the user can customize his break iterator for 4476 * a specialized titlecasing. In this case only the forward iteration 4477 * needs to be implemented. 4478 * If the break iterator passed in is null, the default Unicode algorithm 4479 * will be used to determine the titlecase positions. 4480 * 4481 * <p>Only positions returned by the break iterator will be title cased, 4482 * character in between the positions will all be in lower case. 4483 * <p>Casing is dependent on the argument locale and context-sensitive 4484 * @param locale which string is to be converted in 4485 * @param str source string to be performed on 4486 * @param titleIter break iterator to determine the positions in which 4487 * the character should be title cased. 4488 * @return lowercase version of the argument string 4489 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter)4490 public static String toTitleCase(ULocale locale, String str, 4491 BreakIterator titleIter) { 4492 return toTitleCase(locale, str, titleIter, 0); 4493 } 4494 4495 /** 4496 * <p>Returns the titlecase version of the argument string. 4497 * <p>Position for titlecasing is determined by the argument break 4498 * iterator, hence the user can customize his break iterator for 4499 * a specialized titlecasing. In this case only the forward iteration 4500 * needs to be implemented. 4501 * If the break iterator passed in is null, the default Unicode algorithm 4502 * will be used to determine the titlecase positions. 4503 * 4504 * <p>Only positions returned by the break iterator will be title cased, 4505 * character in between the positions will all be in lower case. 4506 * <p>Casing is dependent on the argument locale and context-sensitive 4507 * @param locale which string is to be converted in 4508 * @param str source string to be performed on 4509 * @param titleIter break iterator to determine the positions in which 4510 * the character should be title cased. 4511 * @param options bit set to modify the titlecasing operation 4512 * @return lowercase version of the argument string 4513 * @see #TITLECASE_NO_LOWERCASE 4514 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 4515 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)4516 public static String toTitleCase(ULocale locale, String str, 4517 BreakIterator titleIter, int options) { 4518 if(titleIter == null) { 4519 if (locale == null) { 4520 locale = ULocale.getDefault(); 4521 } 4522 titleIter = BreakIterator.getWordInstance(locale); 4523 } 4524 titleIter.setText(str); 4525 return toTitleCase(getCaseLocale(locale), options, titleIter, str); 4526 } 4527 4528 4529 private static final int BREAK_MASK = 4530 (1<<UCharacterCategory.DECIMAL_DIGIT_NUMBER) 4531 | (1<<UCharacterCategory.OTHER_LETTER) 4532 | (1<<UCharacterCategory.MODIFIER_LETTER); 4533 4534 /** 4535 * Return a string with just the first word titlecased, for menus and UI, etc. This does not affect most of the string, 4536 * and sometimes has no effect at all; the original string is returned whenever casing 4537 * would not be appropriate for the first word (such as for CJK characters or initial numbers). 4538 * Initial non-letters are skipped in order to find the character to change. 4539 * Characters past the first affected are left untouched: see also TITLECASE_NO_LOWERCASE. 4540 * <p>Examples: 4541 * <table border='1'><tr><th>Source</th><th>Result</th><th>Locale</th></tr> 4542 * <tr><td>anglo-American locale</td><td>Anglo-American locale</td></tr> 4543 * <tr><td>“contact us”</td><td>“Contact us”</td></tr> 4544 * <tr><td>49ers win!</td><td>49ers win!</td></tr> 4545 * <tr><td>丰(abc)</td><td>丰(abc)</td></tr> 4546 * <tr><td>«ijs»</td><td>«Ijs»</td></tr> 4547 * <tr><td>«ijs»</td><td>«IJs»</td><td>nl-BE</td></tr> 4548 * <tr><td>«ijs»</td><td>«İjs»</td><td>tr-DE</td></tr> 4549 * </table> 4550 * @param locale the locale for accessing exceptional behavior (eg for tr). 4551 * @param str the source string to change 4552 * @return the modified string, or the original if no modifications were necessary. 4553 * @deprecated ICU internal only 4554 * @hide original deprecated declaration 4555 * @hide draft / provisional / internal are hidden on Android 4556 */ 4557 @Deprecated toTitleFirst(ULocale locale, String str)4558 public static String toTitleFirst(ULocale locale, String str) { 4559 int c = 0; 4560 for (int i = 0; i < str.length(); i += UCharacter.charCount(c)) { 4561 c = UCharacter.codePointAt(str, i); 4562 int propertyMask = UCharacter.getIntPropertyValue(c, UProperty.GENERAL_CATEGORY_MASK); 4563 if ((propertyMask & BREAK_MASK) != 0) { // handle "49ers", initial CJK 4564 break; 4565 } 4566 if (UCaseProps.INSTANCE.getType(c) == UCaseProps.NONE) { 4567 continue; 4568 } 4569 4570 // we now have the first cased character 4571 // What we really want is something like: 4572 // String titled = UCharacter.toTitleCase(locale, str, i, outputCharsTaken); 4573 // That is, just give us the titlecased string, for the locale, at i and following, 4574 // and tell us how many characters are replaced. 4575 // The following won't work completely: it needs some more substantial changes to UCaseProps 4576 4577 String substring = str.substring(i, i+UCharacter.charCount(c)); 4578 String titled = UCharacter.toTitleCase(locale, substring, BreakIterator.getSentenceInstance(locale), 0); 4579 4580 // skip if no change 4581 if (titled.codePointAt(0) == c) { 4582 // Using 0 is safe, since any change in titling will not have first initial character 4583 break; 4584 } 4585 StringBuilder result = new StringBuilder(str.length()).append(str, 0, i); 4586 int startOfSuffix; 4587 4588 // handle dutch, but check first for 'i', since that's faster. Should be built into UCaseProps. 4589 4590 if (c == 'i' && locale.getLanguage().equals("nl") && i < str.length() && str.charAt(i+1) == 'j') { 4591 result.append("IJ"); 4592 startOfSuffix = 2; 4593 } else { 4594 result.append(titled); 4595 startOfSuffix = i + UCharacter.charCount(c); 4596 } 4597 4598 // add the remainder, and return 4599 return result.append(str, startOfSuffix, str.length()).toString(); 4600 } 4601 return str; // no change 4602 } 4603 4604 /** 4605 * <strong>[icu]</strong> <p>Returns the titlecase version of the argument string. 4606 * <p>Position for titlecasing is determined by the argument break 4607 * iterator, hence the user can customize his break iterator for 4608 * a specialized titlecasing. In this case only the forward iteration 4609 * needs to be implemented. 4610 * If the break iterator passed in is null, the default Unicode algorithm 4611 * will be used to determine the titlecase positions. 4612 * 4613 * <p>Only positions returned by the break iterator will be title cased, 4614 * character in between the positions will all be in lower case. 4615 * <p>Casing is dependent on the argument locale and context-sensitive 4616 * @param locale which string is to be converted in 4617 * @param str source string to be performed on 4618 * @param titleIter break iterator to determine the positions in which 4619 * the character should be title cased. 4620 * @param options bit set to modify the titlecasing operation 4621 * @return lowercase version of the argument string 4622 * @see #TITLECASE_NO_LOWERCASE 4623 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 4624 */ toTitleCase(Locale locale, String str, BreakIterator titleIter, int options)4625 public static String toTitleCase(Locale locale, String str, 4626 BreakIterator titleIter, 4627 int options) { 4628 if(titleIter == null) { 4629 titleIter = BreakIterator.getWordInstance(locale); 4630 } 4631 titleIter.setText(str); 4632 return toTitleCase(getCaseLocale(locale), options, titleIter, str); 4633 } 4634 4635 /** 4636 * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according 4637 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 4638 * folding equivalent, the character itself is returned. 4639 * 4640 * <p>This function only returns the simple, single-code point case mapping. 4641 * Full case mappings should be used whenever possible because they produce 4642 * better results by working on whole strings. 4643 * They can map to a result string with a different length as appropriate. 4644 * Full case mappings are applied by the case mapping functions 4645 * that take String parameters rather than code points (int). 4646 * See also the User Guide chapter on C/POSIX migration: 4647 * http://www.icu-project.org/userguide/posix.html#case_mappings 4648 * 4649 * @param ch the character to be converted 4650 * @param defaultmapping Indicates whether the default mappings defined in 4651 * CaseFolding.txt are to be used, otherwise the 4652 * mappings for dotted I and dotless i marked with 4653 * 'T' in CaseFolding.txt are included. 4654 * @return the case folding equivalent of the character, if 4655 * any; otherwise the character itself. 4656 * @see #foldCase(String, boolean) 4657 */ foldCase(int ch, boolean defaultmapping)4658 public static int foldCase(int ch, boolean defaultmapping) { 4659 return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 4660 } 4661 4662 /** 4663 * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to 4664 * UnicodeData.txt and CaseFolding.txt; if any character has no case 4665 * folding equivalent, the character itself is returned. 4666 * "Full", multiple-code point case folding mappings are returned here. 4667 * For "simple" single-code point mappings use the API 4668 * foldCase(int ch, boolean defaultmapping). 4669 * @param str the String to be converted 4670 * @param defaultmapping Indicates whether the default mappings defined in 4671 * CaseFolding.txt are to be used, otherwise the 4672 * mappings for dotted I and dotless i marked with 4673 * 'T' in CaseFolding.txt are included. 4674 * @return the case folding equivalent of the character, if 4675 * any; otherwise the character itself. 4676 * @see #foldCase(int, boolean) 4677 */ foldCase(String str, boolean defaultmapping)4678 public static String foldCase(String str, boolean defaultmapping) { 4679 return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 4680 } 4681 4682 /** 4683 * <strong>[icu]</strong> Option value for case folding: use default mappings defined in 4684 * CaseFolding.txt. 4685 */ 4686 public static final int FOLD_CASE_DEFAULT = 0x0000; 4687 /** 4688 * <strong>[icu]</strong> Option value for case folding: 4689 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I 4690 * and dotless i appropriately for Turkic languages (tr, az). 4691 * 4692 * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that 4693 * are to be included for default mappings and 4694 * excluded for the Turkic-specific mappings. 4695 * 4696 * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that 4697 * are to be excluded for default mappings and 4698 * included for the Turkic-specific mappings. 4699 */ 4700 public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001; 4701 4702 /** 4703 * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according 4704 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 4705 * folding equivalent, the character itself is returned. 4706 * 4707 * <p>This function only returns the simple, single-code point case mapping. 4708 * Full case mappings should be used whenever possible because they produce 4709 * better results by working on whole strings. 4710 * They can map to a result string with a different length as appropriate. 4711 * Full case mappings are applied by the case mapping functions 4712 * that take String parameters rather than code points (int). 4713 * See also the User Guide chapter on C/POSIX migration: 4714 * http://www.icu-project.org/userguide/posix.html#case_mappings 4715 * 4716 * @param ch the character to be converted 4717 * @param options A bit set for special processing. Currently the recognised options 4718 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 4719 * @return the case folding equivalent of the character, if any; otherwise the 4720 * character itself. 4721 * @see #foldCase(String, boolean) 4722 */ foldCase(int ch, int options)4723 public static int foldCase(int ch, int options) { 4724 return UCaseProps.INSTANCE.fold(ch, options); 4725 } 4726 4727 /** 4728 * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to 4729 * UnicodeData.txt and CaseFolding.txt; if any character has no case 4730 * folding equivalent, the character itself is returned. 4731 * "Full", multiple-code point case folding mappings are returned here. 4732 * For "simple" single-code point mappings use the API 4733 * foldCase(int ch, boolean defaultmapping). 4734 * @param str the String to be converted 4735 * @param options A bit set for special processing. Currently the recognised options 4736 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 4737 * @return the case folding equivalent of the character, if any; otherwise the 4738 * character itself. 4739 * @see #foldCase(int, boolean) 4740 */ foldCase(String str, int options)4741 public static final String foldCase(String str, int options) { 4742 if (str.length() <= 100) { 4743 if (str.isEmpty()) { 4744 return str; 4745 } 4746 // Collect and apply only changes. 4747 // Good if no or few changes. Bad (slow) if many changes. 4748 Edits edits = new Edits(); 4749 StringBuilder replacementChars = CaseMapImpl.fold( 4750 options | CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits); 4751 return applyEdits(str, replacementChars, edits); 4752 } else { 4753 return CaseMapImpl.fold(options, str, new StringBuilder(str.length()), null).toString(); 4754 } 4755 } 4756 4757 /** 4758 * <strong>[icu]</strong> Returns the numeric value of a Han character. 4759 * 4760 * <p>This returns the value of Han 'numeric' code points, 4761 * including those for zero, ten, hundred, thousand, ten thousand, 4762 * and hundred million. 4763 * This includes both the standard and 'checkwriting' 4764 * characters, the 'big circle' zero character, and the standard 4765 * zero character. 4766 * 4767 * <p>Note: The Unicode Standard has numeric values for more 4768 * Han characters recognized by this method 4769 * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt), 4770 * and a {@link android.icu.text.NumberFormat} can be used with 4771 * a Chinese {@link android.icu.text.NumberingSystem}. 4772 * 4773 * @param ch code point to query 4774 * @return value if it is a Han 'numeric character,' otherwise return -1. 4775 */ getHanNumericValue(int ch)4776 public static int getHanNumericValue(int ch) 4777 { 4778 switch(ch) 4779 { 4780 case IDEOGRAPHIC_NUMBER_ZERO_ : 4781 case CJK_IDEOGRAPH_COMPLEX_ZERO_ : 4782 return 0; // Han Zero 4783 case CJK_IDEOGRAPH_FIRST_ : 4784 case CJK_IDEOGRAPH_COMPLEX_ONE_ : 4785 return 1; // Han One 4786 case CJK_IDEOGRAPH_SECOND_ : 4787 case CJK_IDEOGRAPH_COMPLEX_TWO_ : 4788 return 2; // Han Two 4789 case CJK_IDEOGRAPH_THIRD_ : 4790 case CJK_IDEOGRAPH_COMPLEX_THREE_ : 4791 return 3; // Han Three 4792 case CJK_IDEOGRAPH_FOURTH_ : 4793 case CJK_IDEOGRAPH_COMPLEX_FOUR_ : 4794 return 4; // Han Four 4795 case CJK_IDEOGRAPH_FIFTH_ : 4796 case CJK_IDEOGRAPH_COMPLEX_FIVE_ : 4797 return 5; // Han Five 4798 case CJK_IDEOGRAPH_SIXTH_ : 4799 case CJK_IDEOGRAPH_COMPLEX_SIX_ : 4800 return 6; // Han Six 4801 case CJK_IDEOGRAPH_SEVENTH_ : 4802 case CJK_IDEOGRAPH_COMPLEX_SEVEN_ : 4803 return 7; // Han Seven 4804 case CJK_IDEOGRAPH_EIGHTH_ : 4805 case CJK_IDEOGRAPH_COMPLEX_EIGHT_ : 4806 return 8; // Han Eight 4807 case CJK_IDEOGRAPH_NINETH_ : 4808 case CJK_IDEOGRAPH_COMPLEX_NINE_ : 4809 return 9; // Han Nine 4810 case CJK_IDEOGRAPH_TEN_ : 4811 case CJK_IDEOGRAPH_COMPLEX_TEN_ : 4812 return 10; 4813 case CJK_IDEOGRAPH_HUNDRED_ : 4814 case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ : 4815 return 100; 4816 case CJK_IDEOGRAPH_THOUSAND_ : 4817 case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ : 4818 return 1000; 4819 case CJK_IDEOGRAPH_TEN_THOUSAND_ : 4820 return 10000; 4821 case CJK_IDEOGRAPH_HUNDRED_MILLION_ : 4822 return 100000000; 4823 } 4824 return -1; // no value 4825 } 4826 4827 /** 4828 * <strong>[icu]</strong> <p>Returns an iterator for character types, iterating over codepoints. 4829 * <p>Example of use:<br> 4830 * <pre> 4831 * RangeValueIterator iterator = UCharacter.getTypeIterator(); 4832 * RangeValueIterator.Element element = new RangeValueIterator.Element(); 4833 * while (iterator.next(element)) { 4834 * System.out.println("Codepoint \\u" + 4835 * Integer.toHexString(element.start) + 4836 * " to codepoint \\u" + 4837 * Integer.toHexString(element.limit - 1) + 4838 * " has the character type " + 4839 * element.value); 4840 * } 4841 * </pre> 4842 * @return an iterator 4843 */ getTypeIterator()4844 public static RangeValueIterator getTypeIterator() 4845 { 4846 return new UCharacterTypeIterator(); 4847 } 4848 4849 private static final class UCharacterTypeIterator implements RangeValueIterator { UCharacterTypeIterator()4850 UCharacterTypeIterator() { 4851 reset(); 4852 } 4853 4854 // implements RangeValueIterator 4855 @Override next(Element element)4856 public boolean next(Element element) { 4857 if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 4858 element.start=range.startCodePoint; 4859 element.limit=range.endCodePoint+1; 4860 element.value=range.value; 4861 return true; 4862 } else { 4863 return false; 4864 } 4865 } 4866 4867 // implements RangeValueIterator 4868 @Override reset()4869 public void reset() { 4870 trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE); 4871 } 4872 4873 private Iterator<Trie2.Range> trieIterator; 4874 private Trie2.Range range; 4875 4876 private static final class MaskType implements Trie2.ValueMapper { 4877 // Extracts the general category ("character type") from the trie value. 4878 @Override map(int value)4879 public int map(int value) { 4880 return value & UCharacterProperty.TYPE_MASK; 4881 } 4882 } 4883 private static final MaskType MASK_TYPE=new MaskType(); 4884 } 4885 4886 /** 4887 * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints. 4888 * <p>This API only gets the iterator for the modern, most up-to-date 4889 * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or 4890 * for extended names use getExtendedNameIterator(). 4891 * <p>Example of use:<br> 4892 * <pre> 4893 * ValueIterator iterator = UCharacter.getNameIterator(); 4894 * ValueIterator.Element element = new ValueIterator.Element(); 4895 * while (iterator.next(element)) { 4896 * System.out.println("Codepoint \\u" + 4897 * Integer.toHexString(element.codepoint) + 4898 * " has the name " + (String)element.value); 4899 * } 4900 * </pre> 4901 * <p>The maximal range which the name iterator iterates is from 4902 * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE. 4903 * @return an iterator 4904 */ getNameIterator()4905 public static ValueIterator getNameIterator(){ 4906 return new UCharacterNameIterator(UCharacterName.INSTANCE, 4907 UCharacterNameChoice.UNICODE_CHAR_NAME); 4908 } 4909 4910 /** 4911 * <strong>[icu]</strong> Returns an empty iterator. 4912 * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints. 4913 * @return an empty iterator 4914 * @deprecated ICU 49 4915 * @see #getName1_0(int) 4916 * @hide original deprecated declaration 4917 */ 4918 @Deprecated getName1_0Iterator()4919 public static ValueIterator getName1_0Iterator(){ 4920 return new DummyValueIterator(); 4921 } 4922 4923 private static final class DummyValueIterator implements ValueIterator { 4924 @Override next(Element element)4925 public boolean next(Element element) { return false; } 4926 @Override reset()4927 public void reset() {} 4928 @Override setRange(int start, int limit)4929 public void setRange(int start, int limit) {} 4930 } 4931 4932 /** 4933 * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints. 4934 * <p>This API only gets the iterator for the extended names. 4935 * For modern, most up-to-date Unicode names use getNameIterator() or 4936 * for older 1.0 Unicode names use get1_0NameIterator(). 4937 * <p>Example of use:<br> 4938 * <pre> 4939 * ValueIterator iterator = UCharacter.getExtendedNameIterator(); 4940 * ValueIterator.Element element = new ValueIterator.Element(); 4941 * while (iterator.next(element)) { 4942 * System.out.println("Codepoint \\u" + 4943 * Integer.toHexString(element.codepoint) + 4944 * " has the name " + (String)element.value); 4945 * } 4946 * </pre> 4947 * <p>The maximal range which the name iterator iterates is from 4948 * @return an iterator 4949 */ getExtendedNameIterator()4950 public static ValueIterator getExtendedNameIterator(){ 4951 return new UCharacterNameIterator(UCharacterName.INSTANCE, 4952 UCharacterNameChoice.EXTENDED_CHAR_NAME); 4953 } 4954 4955 /** 4956 * <strong>[icu]</strong> Returns the "age" of the code point. 4957 * <p>The "age" is the Unicode version when the code point was first 4958 * designated (as a non-character or for Private Use) or assigned a 4959 * character. 4960 * <p>This can be useful to avoid emitting code points to receiving 4961 * processes that do not accept newer characters. 4962 * <p>The data is from the UCD file DerivedAge.txt. 4963 * @param ch The code point. 4964 * @return the Unicode version number 4965 */ getAge(int ch)4966 public static VersionInfo getAge(int ch) 4967 { 4968 if (ch < MIN_VALUE || ch > MAX_VALUE) { 4969 throw new IllegalArgumentException("Codepoint out of bounds"); 4970 } 4971 return UCharacterProperty.INSTANCE.getAge(ch); 4972 } 4973 4974 /** 4975 * <strong>[icu]</strong> <p>Check a binary Unicode property for a code point. 4976 * <p>Unicode, especially in version 3.2, defines many more properties 4977 * than the original set in UnicodeData.txt. 4978 * <p>This API is intended to reflect Unicode properties as defined in 4979 * the Unicode Character Database (UCD) and Unicode Technical Reports 4980 * (UTR). 4981 * <p>For details about the properties see 4982 * <a href=http://www.unicode.org/>http://www.unicode.org/</a>. 4983 * <p>For names of Unicode properties see the UCD file 4984 * PropertyAliases.txt. 4985 * <p>This API does not check the validity of the codepoint. 4986 * <p>Important: If ICU is built with UCD files from Unicode versions 4987 * below 3.2, then properties marked with "new" are not or 4988 * not fully available. 4989 * @param ch code point to test. 4990 * @param property selector constant from android.icu.lang.UProperty, 4991 * identifies which binary property to check. 4992 * @return true or false according to the binary Unicode property value 4993 * for ch. Also false if property is out of bounds or if the 4994 * Unicode version does not have data for the property at all, or 4995 * not for this code point. 4996 * @see android.icu.lang.UProperty 4997 */ hasBinaryProperty(int ch, int property)4998 public static boolean hasBinaryProperty(int ch, int property) 4999 { 5000 return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property); 5001 } 5002 5003 /** 5004 * <strong>[icu]</strong> <p>Check if a code point has the Alphabetic Unicode property. 5005 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC). 5006 * <p>Different from UCharacter.isLetter(ch)! 5007 * @param ch codepoint to be tested 5008 */ isUAlphabetic(int ch)5009 public static boolean isUAlphabetic(int ch) 5010 { 5011 return hasBinaryProperty(ch, UProperty.ALPHABETIC); 5012 } 5013 5014 /** 5015 * <strong>[icu]</strong> <p>Check if a code point has the Lowercase Unicode property. 5016 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE). 5017 * <p>This is different from UCharacter.isLowerCase(ch)! 5018 * @param ch codepoint to be tested 5019 */ isULowercase(int ch)5020 public static boolean isULowercase(int ch) 5021 { 5022 return hasBinaryProperty(ch, UProperty.LOWERCASE); 5023 } 5024 5025 /** 5026 * <strong>[icu]</strong> <p>Check if a code point has the Uppercase Unicode property. 5027 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE). 5028 * <p>This is different from UCharacter.isUpperCase(ch)! 5029 * @param ch codepoint to be tested 5030 */ isUUppercase(int ch)5031 public static boolean isUUppercase(int ch) 5032 { 5033 return hasBinaryProperty(ch, UProperty.UPPERCASE); 5034 } 5035 5036 /** 5037 * <strong>[icu]</strong> <p>Check if a code point has the White_Space Unicode property. 5038 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE). 5039 * <p>This is different from both UCharacter.isSpace(ch) and 5040 * UCharacter.isWhitespace(ch)! 5041 * @param ch codepoint to be tested 5042 */ isUWhiteSpace(int ch)5043 public static boolean isUWhiteSpace(int ch) 5044 { 5045 return hasBinaryProperty(ch, UProperty.WHITE_SPACE); 5046 } 5047 5048 /** 5049 * <strong>[icu]</strong> <p>Returns the property value for an Unicode property type of a code point. 5050 * Also returns binary and mask property values. 5051 * <p>Unicode, especially in version 3.2, defines many more properties than 5052 * the original set in UnicodeData.txt. 5053 * <p>The properties APIs are intended to reflect Unicode properties as 5054 * defined in the Unicode Character Database (UCD) and Unicode Technical 5055 * Reports (UTR). For details about the properties see 5056 * http://www.unicode.org/. 5057 * <p>For names of Unicode properties see the UCD file PropertyAliases.txt. 5058 * 5059 * <pre> 5060 * Sample usage: 5061 * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH); 5062 * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC); 5063 * boolean b = (ideo == 1) ? true : false; 5064 * </pre> 5065 * @param ch code point to test. 5066 * @param type UProperty selector constant, identifies which binary 5067 * property to check. Must be 5068 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5069 * UProperty.INT_START <= type < UProperty.INT_LIMIT or 5070 * UProperty.MASK_START <= type < UProperty.MASK_LIMIT. 5071 * @return numeric value that is directly the property value or, 5072 * for enumerated properties, corresponds to the numeric value of 5073 * the enumerated constant of the respective property value 5074 * enumeration type (cast to enum type if necessary). 5075 * Returns 0 or 1 (for false / true) for binary Unicode properties. 5076 * Returns a bit-mask for mask properties. 5077 * Returns 0 if 'type' is out of bounds or if the Unicode version 5078 * does not have data for the property at all, or not for this code 5079 * point. 5080 * @see UProperty 5081 * @see #hasBinaryProperty 5082 * @see #getIntPropertyMinValue 5083 * @see #getIntPropertyMaxValue 5084 * @see #getUnicodeVersion 5085 */ getIntPropertyValue(int ch, int type)5086 public static int getIntPropertyValue(int ch, int type) 5087 { 5088 return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type); 5089 } 5090 /** 5091 * <strong>[icu]</strong> Returns a string version of the property value. 5092 * @param propertyEnum The property enum value. 5093 * @param codepoint The codepoint value. 5094 * @param nameChoice The choice of the name. 5095 * @return value as string 5096 * @deprecated This API is ICU internal only. 5097 * @hide original deprecated declaration 5098 * @hide draft / provisional / internal are hidden on Android 5099 */ 5100 @Deprecated 5101 ///CLOVER:OFF getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice)5102 public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) { 5103 if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) || 5104 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) { 5105 return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum), 5106 nameChoice); 5107 } 5108 if (propertyEnum == UProperty.NUMERIC_VALUE) { 5109 return String.valueOf(getUnicodeNumericValue(codepoint)); 5110 } 5111 // otherwise must be string property 5112 switch (propertyEnum) { 5113 case UProperty.AGE: return getAge(codepoint).toString(); 5114 case UProperty.ISO_COMMENT: return getISOComment(codepoint); 5115 case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint)); 5116 case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true)); 5117 case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 5118 case UProperty.NAME: return getName(codepoint); 5119 case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true)); 5120 case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 5121 case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 5122 case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 5123 case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 5124 case UProperty.UNICODE_1_NAME: return getName1_0(codepoint); 5125 case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 5126 } 5127 throw new IllegalArgumentException("Illegal Property Enum"); 5128 } 5129 ///CLOVER:ON 5130 5131 /** 5132 * <strong>[icu]</strong> Returns the minimum value for an integer/binary Unicode property type. 5133 * Can be used together with UCharacter.getIntPropertyMaxValue(int) 5134 * to allocate arrays of android.icu.text.UnicodeSet or similar. 5135 * @param type UProperty selector constant, identifies which binary 5136 * property to check. Must be 5137 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5138 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 5139 * @return Minimum value returned by UCharacter.getIntPropertyValue(int) 5140 * for a Unicode property. 0 if the property 5141 * selector 'type' is out of range. 5142 * @see UProperty 5143 * @see #hasBinaryProperty 5144 * @see #getUnicodeVersion 5145 * @see #getIntPropertyMaxValue 5146 * @see #getIntPropertyValue 5147 */ getIntPropertyMinValue(int type)5148 public static int getIntPropertyMinValue(int type){ 5149 5150 return 0; // undefined; and: all other properties have a minimum value of 0 5151 } 5152 5153 5154 /** 5155 * <strong>[icu]</strong> Returns the maximum value for an integer/binary Unicode property. 5156 * Can be used together with UCharacter.getIntPropertyMinValue(int) 5157 * to allocate arrays of android.icu.text.UnicodeSet or similar. 5158 * Examples for min/max values (for Unicode 3.2): 5159 * <ul> 5160 * <li> UProperty.BIDI_CLASS: 0/18 5161 * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL) 5162 * <li> UProperty.SCRIPT: 0/45 (UScript.COMMON/UScript.TAGBANWA) 5163 * <li> UProperty.IDEOGRAPHIC: 0/1 (false/true) 5164 * </ul> 5165 * For undefined UProperty constant values, min/max values will be 0/-1. 5166 * @param type UProperty selector constant, identifies which binary 5167 * property to check. Must be 5168 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5169 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 5170 * @return Maximum value returned by u_getIntPropertyValue for a Unicode 5171 * property. <= 0 if the property selector 'type' is out of range. 5172 * @see UProperty 5173 * @see #hasBinaryProperty 5174 * @see #getUnicodeVersion 5175 * @see #getIntPropertyMaxValue 5176 * @see #getIntPropertyValue 5177 */ getIntPropertyMaxValue(int type)5178 public static int getIntPropertyMaxValue(int type) 5179 { 5180 return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type); 5181 } 5182 5183 /** 5184 * Provide the java.lang.Character forDigit API, for convenience. 5185 */ forDigit(int digit, int radix)5186 public static char forDigit(int digit, int radix) { 5187 return java.lang.Character.forDigit(digit, radix); 5188 } 5189 5190 // JDK 1.5 API coverage 5191 5192 /** 5193 * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}. 5194 */ 5195 public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE; 5196 5197 /** 5198 * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}. 5199 */ 5200 public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE; 5201 5202 /** 5203 * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}. 5204 */ 5205 public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE; 5206 5207 /** 5208 * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}. 5209 */ 5210 public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE; 5211 5212 /** 5213 * Constant U+D800, same as {@link Character#MIN_SURROGATE}. 5214 */ 5215 public static final char MIN_SURROGATE = Character.MIN_SURROGATE; 5216 5217 /** 5218 * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}. 5219 */ 5220 public static final char MAX_SURROGATE = Character.MAX_SURROGATE; 5221 5222 /** 5223 * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 5224 */ 5225 public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT; 5226 5227 /** 5228 * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}. 5229 */ 5230 public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT; 5231 5232 /** 5233 * Constant U+0000, same as {@link Character#MIN_CODE_POINT}. 5234 */ 5235 public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT; 5236 5237 /** 5238 * Equivalent to {@link Character#isValidCodePoint}. 5239 * 5240 * @param cp the code point to check 5241 * @return true if cp is a valid code point 5242 */ isValidCodePoint(int cp)5243 public static final boolean isValidCodePoint(int cp) { 5244 return cp >= 0 && cp <= MAX_CODE_POINT; 5245 } 5246 5247 /** 5248 * Same as {@link Character#isSupplementaryCodePoint}. 5249 * 5250 * @param cp the code point to check 5251 * @return true if cp is a supplementary code point 5252 */ isSupplementaryCodePoint(int cp)5253 public static final boolean isSupplementaryCodePoint(int cp) { 5254 return Character.isSupplementaryCodePoint(cp); 5255 } 5256 5257 /** 5258 * Same as {@link Character#isHighSurrogate}. 5259 * 5260 * @param ch the char to check 5261 * @return true if ch is a high (lead) surrogate 5262 */ isHighSurrogate(char ch)5263 public static boolean isHighSurrogate(char ch) { 5264 return Character.isHighSurrogate(ch); 5265 } 5266 5267 /** 5268 * Same as {@link Character#isLowSurrogate}. 5269 * 5270 * @param ch the char to check 5271 * @return true if ch is a low (trail) surrogate 5272 */ isLowSurrogate(char ch)5273 public static boolean isLowSurrogate(char ch) { 5274 return Character.isLowSurrogate(ch); 5275 } 5276 5277 /** 5278 * Same as {@link Character#isSurrogatePair}. 5279 * 5280 * @param high the high (lead) char 5281 * @param low the low (trail) char 5282 * @return true if high, low form a surrogate pair 5283 */ isSurrogatePair(char high, char low)5284 public static final boolean isSurrogatePair(char high, char low) { 5285 return Character.isSurrogatePair(high, low); 5286 } 5287 5288 /** 5289 * Same as {@link Character#charCount}. 5290 * Returns the number of chars needed to represent the code point (1 or 2). 5291 * This does not check the code point for validity. 5292 * 5293 * @param cp the code point to check 5294 * @return the number of chars needed to represent the code point 5295 */ charCount(int cp)5296 public static int charCount(int cp) { 5297 return Character.charCount(cp); 5298 } 5299 5300 /** 5301 * Same as {@link Character#toCodePoint}. 5302 * Returns the code point represented by the two surrogate code units. 5303 * This does not check the surrogate pair for validity. 5304 * 5305 * @param high the high (lead) surrogate 5306 * @param low the low (trail) surrogate 5307 * @return the code point formed by the surrogate pair 5308 */ toCodePoint(char high, char low)5309 public static final int toCodePoint(char high, char low) { 5310 return Character.toCodePoint(high, low); 5311 } 5312 5313 /** 5314 * Same as {@link Character#codePointAt(CharSequence, int)}. 5315 * Returns the code point at index. 5316 * This examines only the characters at index and index+1. 5317 * 5318 * @param seq the characters to check 5319 * @param index the index of the first or only char forming the code point 5320 * @return the code point at the index 5321 */ codePointAt(CharSequence seq, int index)5322 public static final int codePointAt(CharSequence seq, int index) { 5323 char c1 = seq.charAt(index++); 5324 if (isHighSurrogate(c1)) { 5325 if (index < seq.length()) { 5326 char c2 = seq.charAt(index); 5327 if (isLowSurrogate(c2)) { 5328 return toCodePoint(c1, c2); 5329 } 5330 } 5331 } 5332 return c1; 5333 } 5334 5335 /** 5336 * Same as {@link Character#codePointAt(char[], int)}. 5337 * Returns the code point at index. 5338 * This examines only the characters at index and index+1. 5339 * 5340 * @param text the characters to check 5341 * @param index the index of the first or only char forming the code point 5342 * @return the code point at the index 5343 */ codePointAt(char[] text, int index)5344 public static final int codePointAt(char[] text, int index) { 5345 char c1 = text[index++]; 5346 if (isHighSurrogate(c1)) { 5347 if (index < text.length) { 5348 char c2 = text[index]; 5349 if (isLowSurrogate(c2)) { 5350 return toCodePoint(c1, c2); 5351 } 5352 } 5353 } 5354 return c1; 5355 } 5356 5357 /** 5358 * Same as {@link Character#codePointAt(char[], int, int)}. 5359 * Returns the code point at index. 5360 * This examines only the characters at index and index+1. 5361 * 5362 * @param text the characters to check 5363 * @param index the index of the first or only char forming the code point 5364 * @param limit the limit of the valid text 5365 * @return the code point at the index 5366 */ codePointAt(char[] text, int index, int limit)5367 public static final int codePointAt(char[] text, int index, int limit) { 5368 if (index >= limit || limit > text.length) { 5369 throw new IndexOutOfBoundsException(); 5370 } 5371 char c1 = text[index++]; 5372 if (isHighSurrogate(c1)) { 5373 if (index < limit) { 5374 char c2 = text[index]; 5375 if (isLowSurrogate(c2)) { 5376 return toCodePoint(c1, c2); 5377 } 5378 } 5379 } 5380 return c1; 5381 } 5382 5383 /** 5384 * Same as {@link Character#codePointBefore(CharSequence, int)}. 5385 * Return the code point before index. 5386 * This examines only the characters at index-1 and index-2. 5387 * 5388 * @param seq the characters to check 5389 * @param index the index after the last or only char forming the code point 5390 * @return the code point before the index 5391 */ codePointBefore(CharSequence seq, int index)5392 public static final int codePointBefore(CharSequence seq, int index) { 5393 char c2 = seq.charAt(--index); 5394 if (isLowSurrogate(c2)) { 5395 if (index > 0) { 5396 char c1 = seq.charAt(--index); 5397 if (isHighSurrogate(c1)) { 5398 return toCodePoint(c1, c2); 5399 } 5400 } 5401 } 5402 return c2; 5403 } 5404 5405 /** 5406 * Same as {@link Character#codePointBefore(char[], int)}. 5407 * Returns the code point before index. 5408 * This examines only the characters at index-1 and index-2. 5409 * 5410 * @param text the characters to check 5411 * @param index the index after the last or only char forming the code point 5412 * @return the code point before the index 5413 */ codePointBefore(char[] text, int index)5414 public static final int codePointBefore(char[] text, int index) { 5415 char c2 = text[--index]; 5416 if (isLowSurrogate(c2)) { 5417 if (index > 0) { 5418 char c1 = text[--index]; 5419 if (isHighSurrogate(c1)) { 5420 return toCodePoint(c1, c2); 5421 } 5422 } 5423 } 5424 return c2; 5425 } 5426 5427 /** 5428 * Same as {@link Character#codePointBefore(char[], int, int)}. 5429 * Return the code point before index. 5430 * This examines only the characters at index-1 and index-2. 5431 * 5432 * @param text the characters to check 5433 * @param index the index after the last or only char forming the code point 5434 * @param limit the start of the valid text 5435 * @return the code point before the index 5436 */ codePointBefore(char[] text, int index, int limit)5437 public static final int codePointBefore(char[] text, int index, int limit) { 5438 if (index <= limit || limit < 0) { 5439 throw new IndexOutOfBoundsException(); 5440 } 5441 char c2 = text[--index]; 5442 if (isLowSurrogate(c2)) { 5443 if (index > limit) { 5444 char c1 = text[--index]; 5445 if (isHighSurrogate(c1)) { 5446 return toCodePoint(c1, c2); 5447 } 5448 } 5449 } 5450 return c2; 5451 } 5452 5453 /** 5454 * Same as {@link Character#toChars(int, char[], int)}. 5455 * Writes the chars representing the 5456 * code point into the destination at the given index. 5457 * 5458 * @param cp the code point to convert 5459 * @param dst the destination array into which to put the char(s) representing the code point 5460 * @param dstIndex the index at which to put the first (or only) char 5461 * @return the count of the number of chars written (1 or 2) 5462 * @throws IllegalArgumentException if cp is not a valid code point 5463 */ toChars(int cp, char[] dst, int dstIndex)5464 public static final int toChars(int cp, char[] dst, int dstIndex) { 5465 return Character.toChars(cp, dst, dstIndex); 5466 } 5467 5468 /** 5469 * Same as {@link Character#toChars(int)}. 5470 * Returns a char array representing the code point. 5471 * 5472 * @param cp the code point to convert 5473 * @return an array containing the char(s) representing the code point 5474 * @throws IllegalArgumentException if cp is not a valid code point 5475 */ toChars(int cp)5476 public static final char[] toChars(int cp) { 5477 return Character.toChars(cp); 5478 } 5479 5480 /** 5481 * Equivalent to the {@link Character#getDirectionality(char)} method, for 5482 * convenience. Returns a byte representing the directionality of the 5483 * character. 5484 * 5485 * <strong>[icu] Note:</strong> Unlike {@link Character#getDirectionality(char)}, this returns 5486 * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters. 5487 * 5488 * <strong>[icu] Note:</strong> The return value must be tested using the constants defined in {@link 5489 * UCharacterDirection} and its interface {@link 5490 * UCharacterEnums.ECharacterDirection} since the values are different from the ones 5491 * defined by <code>java.lang.Character</code>. 5492 * @param cp the code point to check 5493 * @return the directionality of the code point 5494 * @see #getDirection 5495 */ getDirectionality(int cp)5496 public static byte getDirectionality(int cp) 5497 { 5498 return (byte)getDirection(cp); 5499 } 5500 5501 /** 5502 * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)} 5503 * method, for convenience. Counts the number of code points in the range 5504 * of text. 5505 * @param text the characters to check 5506 * @param start the start of the range 5507 * @param limit the limit of the range 5508 * @return the number of code points in the range 5509 */ codePointCount(CharSequence text, int start, int limit)5510 public static int codePointCount(CharSequence text, int start, int limit) { 5511 if (start < 0 || limit < start || limit > text.length()) { 5512 throw new IndexOutOfBoundsException("start (" + start + 5513 ") or limit (" + limit + 5514 ") invalid or out of range 0, " + text.length()); 5515 } 5516 5517 int len = limit - start; 5518 while (limit > start) { 5519 char ch = text.charAt(--limit); 5520 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 5521 ch = text.charAt(--limit); 5522 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 5523 --len; 5524 break; 5525 } 5526 } 5527 } 5528 return len; 5529 } 5530 5531 /** 5532 * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for 5533 * convenience. Counts the number of code points in the range of text. 5534 * @param text the characters to check 5535 * @param start the start of the range 5536 * @param limit the limit of the range 5537 * @return the number of code points in the range 5538 */ codePointCount(char[] text, int start, int limit)5539 public static int codePointCount(char[] text, int start, int limit) { 5540 if (start < 0 || limit < start || limit > text.length) { 5541 throw new IndexOutOfBoundsException("start (" + start + 5542 ") or limit (" + limit + 5543 ") invalid or out of range 0, " + text.length); 5544 } 5545 5546 int len = limit - start; 5547 while (limit > start) { 5548 char ch = text[--limit]; 5549 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 5550 ch = text[--limit]; 5551 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 5552 --len; 5553 break; 5554 } 5555 } 5556 } 5557 return len; 5558 } 5559 5560 /** 5561 * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)} 5562 * method, for convenience. Adjusts the char index by a code point offset. 5563 * @param text the characters to check 5564 * @param index the index to adjust 5565 * @param codePointOffset the number of code points by which to offset the index 5566 * @return the adjusted index 5567 */ offsetByCodePoints(CharSequence text, int index, int codePointOffset)5568 public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) { 5569 if (index < 0 || index > text.length()) { 5570 throw new IndexOutOfBoundsException("index ( " + index + 5571 ") out of range 0, " + text.length()); 5572 } 5573 5574 if (codePointOffset < 0) { 5575 while (++codePointOffset <= 0) { 5576 char ch = text.charAt(--index); 5577 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) { 5578 ch = text.charAt(--index); 5579 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 5580 if (++codePointOffset > 0) { 5581 return index+1; 5582 } 5583 } 5584 } 5585 } 5586 } else { 5587 int limit = text.length(); 5588 while (--codePointOffset >= 0) { 5589 char ch = text.charAt(index++); 5590 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 5591 ch = text.charAt(index++); 5592 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 5593 if (--codePointOffset < 0) { 5594 return index-1; 5595 } 5596 } 5597 } 5598 } 5599 } 5600 5601 return index; 5602 } 5603 5604 /** 5605 * Equivalent to the 5606 * {@link Character#offsetByCodePoints(char[], int, int, int, int)} 5607 * method, for convenience. Adjusts the char index by a code point offset. 5608 * @param text the characters to check 5609 * @param start the start of the range to check 5610 * @param count the length of the range to check 5611 * @param index the index to adjust 5612 * @param codePointOffset the number of code points by which to offset the index 5613 * @return the adjusted index 5614 */ offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset)5615 public static int offsetByCodePoints(char[] text, int start, int count, int index, 5616 int codePointOffset) { 5617 int limit = start + count; 5618 if (start < 0 || limit < start || limit > text.length || index < start || index > limit) { 5619 throw new IndexOutOfBoundsException("index ( " + index + 5620 ") out of range " + start + 5621 ", " + limit + 5622 " in array 0, " + text.length); 5623 } 5624 5625 if (codePointOffset < 0) { 5626 while (++codePointOffset <= 0) { 5627 char ch = text[--index]; 5628 if (index < start) { 5629 throw new IndexOutOfBoundsException("index ( " + index + 5630 ") < start (" + start + 5631 ")"); 5632 } 5633 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) { 5634 ch = text[--index]; 5635 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 5636 if (++codePointOffset > 0) { 5637 return index+1; 5638 } 5639 } 5640 } 5641 } 5642 } else { 5643 while (--codePointOffset >= 0) { 5644 char ch = text[index++]; 5645 if (index > limit) { 5646 throw new IndexOutOfBoundsException("index ( " + index + 5647 ") > limit (" + limit + 5648 ")"); 5649 } 5650 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 5651 ch = text[index++]; 5652 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 5653 if (--codePointOffset < 0) { 5654 return index-1; 5655 } 5656 } 5657 } 5658 } 5659 } 5660 5661 return index; 5662 } 5663 5664 // private variables ------------------------------------------------- 5665 5666 /** 5667 * To get the last character out from a data type 5668 */ 5669 private static final int LAST_CHAR_MASK_ = 0xFFFF; 5670 5671 // /** 5672 // * To get the last byte out from a data type 5673 // */ 5674 // private static final int LAST_BYTE_MASK_ = 0xFF; 5675 // 5676 // /** 5677 // * Shift 16 bits 5678 // */ 5679 // private static final int SHIFT_16_ = 16; 5680 // 5681 // /** 5682 // * Shift 24 bits 5683 // */ 5684 // private static final int SHIFT_24_ = 24; 5685 // 5686 // /** 5687 // * Decimal radix 5688 // */ 5689 // private static final int DECIMAL_RADIX_ = 10; 5690 5691 /** 5692 * No break space code point 5693 */ 5694 private static final int NO_BREAK_SPACE_ = 0xA0; 5695 5696 /** 5697 * Figure space code point 5698 */ 5699 private static final int FIGURE_SPACE_ = 0x2007; 5700 5701 /** 5702 * Narrow no break space code point 5703 */ 5704 private static final int NARROW_NO_BREAK_SPACE_ = 0x202F; 5705 5706 /** 5707 * Ideographic number zero code point 5708 */ 5709 private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007; 5710 5711 /** 5712 * CJK Ideograph, First code point 5713 */ 5714 private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00; 5715 5716 /** 5717 * CJK Ideograph, Second code point 5718 */ 5719 private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c; 5720 5721 /** 5722 * CJK Ideograph, Third code point 5723 */ 5724 private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09; 5725 5726 /** 5727 * CJK Ideograph, Fourth code point 5728 */ 5729 private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db; 5730 5731 /** 5732 * CJK Ideograph, FIFTH code point 5733 */ 5734 private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94; 5735 5736 /** 5737 * CJK Ideograph, Sixth code point 5738 */ 5739 private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d; 5740 5741 /** 5742 * CJK Ideograph, Seventh code point 5743 */ 5744 private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03; 5745 5746 /** 5747 * CJK Ideograph, Eighth code point 5748 */ 5749 private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b; 5750 5751 /** 5752 * CJK Ideograph, Nineth code point 5753 */ 5754 private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d; 5755 5756 /** 5757 * Application Program command code point 5758 */ 5759 private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F; 5760 5761 /** 5762 * Unit separator code point 5763 */ 5764 private static final int UNIT_SEPARATOR_ = 0x001F; 5765 5766 /** 5767 * Delete code point 5768 */ 5769 private static final int DELETE_ = 0x007F; 5770 5771 /** 5772 * Han digit characters 5773 */ 5774 private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_ = 0x96f6; 5775 private static final int CJK_IDEOGRAPH_COMPLEX_ONE_ = 0x58f9; 5776 private static final int CJK_IDEOGRAPH_COMPLEX_TWO_ = 0x8cb3; 5777 private static final int CJK_IDEOGRAPH_COMPLEX_THREE_ = 0x53c3; 5778 private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_ = 0x8086; 5779 private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_ = 0x4f0d; 5780 private static final int CJK_IDEOGRAPH_COMPLEX_SIX_ = 0x9678; 5781 private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_ = 0x67d2; 5782 private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_ = 0x634c; 5783 private static final int CJK_IDEOGRAPH_COMPLEX_NINE_ = 0x7396; 5784 private static final int CJK_IDEOGRAPH_TEN_ = 0x5341; 5785 private static final int CJK_IDEOGRAPH_COMPLEX_TEN_ = 0x62fe; 5786 private static final int CJK_IDEOGRAPH_HUNDRED_ = 0x767e; 5787 private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_ = 0x4f70; 5788 private static final int CJK_IDEOGRAPH_THOUSAND_ = 0x5343; 5789 private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf; 5790 private static final int CJK_IDEOGRAPH_TEN_THOUSAND_ = 0x824c; 5791 private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_ = 0x5104; 5792 5793 // private constructor ----------------------------------------------- 5794 ///CLOVER:OFF 5795 /** 5796 * Private constructor to prevent instantiation 5797 */ UCharacter()5798 private UCharacter() 5799 { 5800 } 5801 ///CLOVER:ON 5802 } 5803