1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html 4 /** 5 ******************************************************************************* 6 * Copyright (C) 1996-2016, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 11 package android.icu.lang; 12 13 import java.lang.ref.SoftReference; 14 import java.util.EnumSet; 15 import java.util.HashMap; 16 import java.util.Iterator; 17 import java.util.Locale; 18 import java.util.Map; 19 20 import android.icu.impl.CaseMapImpl; 21 import android.icu.impl.EmojiProps; 22 import android.icu.impl.IllegalIcuArgumentException; 23 import android.icu.impl.Trie2; 24 import android.icu.impl.UBiDiProps; 25 import android.icu.impl.UCaseProps; 26 import android.icu.impl.UCharacterName; 27 import android.icu.impl.UCharacterNameChoice; 28 import android.icu.impl.UCharacterProperty; 29 import android.icu.impl.UCharacterUtility; 30 import android.icu.impl.UPropertyAliases; 31 import android.icu.lang.UCharacterEnums.ECharacterCategory; 32 import android.icu.lang.UCharacterEnums.ECharacterDirection; 33 import android.icu.text.BreakIterator; 34 import android.icu.text.Normalizer2; 35 import android.icu.util.RangeValueIterator; 36 import android.icu.util.ULocale; 37 import android.icu.util.ValueIterator; 38 import android.icu.util.VersionInfo; 39 40 /** 41 * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character}. Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'. 42 * 43 * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class. 44 * These extensions provide support for more Unicode properties. 45 * Each ICU release supports the latest version of Unicode available at that time. 46 * 47 * <p>For some time before Java 5 added support for supplementary Unicode code points, 48 * The ICU UCharacter class and many other ICU classes already supported them. 49 * Some UCharacter methods and constants were widened slightly differently than 50 * how the Character class methods and constants were widened later. 51 * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF, 52 * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF. 53 * 54 * <p>Code points are represented in these API using ints. While it would be 55 * more convenient in Java to have a separate primitive datatype for them, 56 * ints suffice in the meantime. 57 * 58 * <p>Aside from the additions for UTF-16 support, and the updated Unicode 59 * properties, the main differences between UCharacter and Character are: 60 * <ul> 61 * <li> UCharacter is not designed to be a char wrapper and does not have 62 * APIs to which involves management of that single char.<br> 63 * These include: 64 * <ul> 65 * <li> char charValue(), 66 * <li> int compareTo(java.lang.Character, java.lang.Character), etc. 67 * </ul> 68 * <li> UCharacter does not include Character APIs that are deprecated, nor 69 * does it include the Java-specific character information, such as 70 * boolean isJavaIdentifierPart(char ch). 71 * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric 72 * values '10' - '35'. UCharacter also does this in digit and 73 * getNumericValue, to adhere to the java semantics of these 74 * methods. New methods unicodeDigit, and 75 * getUnicodeNumericValue do not treat the above code points 76 * as having numeric values. This is a semantic change from ICU4J 1.3.1. 77 * </ul> 78 * <p> 79 * In addition to Java compatibility functions, which calculate derived properties, 80 * this API provides low-level access to the Unicode Character Database. 81 * <p> 82 * Unicode assigns each code point (not just assigned character) values for 83 * many properties. 84 * Most of them are simple boolean flags, or constants from a small enumerated list. 85 * For some properties, values are strings or other relatively more complex types. 86 * <p> 87 * For more information see 88 * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a> 89 * (http://www.unicode.org/ucd/) 90 * and the <a href="https://unicode-org.github.io/icu/userguide/strings/properties">ICU 91 * User Guide chapter on Properties</a> 92 * (https://unicode-org.github.io/icu/userguide/strings/properties). 93 * <p> 94 * There are also functions that provide easy migration from C/POSIX functions 95 * like isblank(). Their use is generally discouraged because the C/POSIX 96 * standards do not define their semantics beyond the ASCII range, which means 97 * that different implementations exhibit very different behavior. 98 * Instead, Unicode properties should be used directly. 99 * <p> 100 * There are also only a few, broad C/POSIX character classes, and they tend 101 * to be used for conflicting purposes. For example, the "isalpha()" class 102 * is sometimes used to determine word boundaries, while a more sophisticated 103 * approach would at least distinguish initial letters from continuation 104 * characters (the latter including combining marks). 105 * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) 106 * Another example: There is no "istitle()" class for titlecase characters. 107 * <p> 108 * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. 109 * ICU implements them according to the Standard Recommendations in 110 * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions 111 * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). 112 * <p> 113 * API access for C/POSIX character classes is as follows: 114 * <pre>{@code 115 * - alpha: isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC) 116 * - lower: isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE) 117 * - upper: isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE) 118 * - punct: ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)| 119 * (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)| 120 * (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0 121 * - digit: isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER 122 * - xdigit: hasBinaryProperty(c, UProperty.POSIX_XDIGIT) 123 * - alnum: hasBinaryProperty(c, UProperty.POSIX_ALNUM) 124 * - space: isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE) 125 * - blank: hasBinaryProperty(c, UProperty.POSIX_BLANK) 126 * - cntrl: getType(c)==CONTROL 127 * - graph: hasBinaryProperty(c, UProperty.POSIX_GRAPH) 128 * - print: hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre> 129 * <p> 130 * The C/POSIX character classes are also available in UnicodeSet patterns, 131 * using patterns like [:graph:] or \p{graph}. 132 * 133 * <p><strong>[icu] Note:</strong> There are several ICU (and Java) whitespace functions. 134 * Comparison:<ul> 135 * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; 136 * most of general categories "Z" (separators) + most whitespace ISO controls 137 * (including no-break spaces, but excluding IS1..IS4) 138 * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces 139 * <li> isSpaceChar: just Z (including no-break spaces)</ul> 140 * 141 * <p> 142 * This class is not subclassable. 143 * 144 * @author Syn Wee Quek 145 * @see android.icu.lang.UCharacterEnums 146 */ 147 148 public final class UCharacter implements ECharacterCategory, ECharacterDirection 149 { 150 /** 151 * Lead surrogate bitmask 152 */ 153 private static final int LEAD_SURROGATE_BITMASK = 0xFFFFFC00; 154 155 /** 156 * Trail surrogate bitmask 157 */ 158 private static final int TRAIL_SURROGATE_BITMASK = 0xFFFFFC00; 159 160 /** 161 * Lead surrogate bits 162 */ 163 private static final int LEAD_SURROGATE_BITS = 0xD800; 164 165 /** 166 * Trail surrogate bits 167 */ 168 private static final int TRAIL_SURROGATE_BITS = 0xDC00; 169 170 private static final int U16_SURROGATE_OFFSET = ((0xd800 << 10) + 0xdc00 - 0x10000); 171 172 // public inner classes ---------------------------------------------- 173 174 /** 175 * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character.UnicodeBlock}. Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'. 176 * 177 * A family of character subsets representing the character blocks in the 178 * Unicode specification, generated from Unicode Data file Blocks.txt. 179 * Character blocks generally define characters used for a specific script 180 * or purpose. A character is contained by at most one Unicode block. 181 * 182 * <strong>[icu] Note:</strong> All fields named XXX_ID are specific to ICU. 183 */ 184 public static final class UnicodeBlock extends Character.Subset 185 { 186 // block id corresponding to icu4c ----------------------------------- 187 188 /** 189 */ 190 public static final int INVALID_CODE_ID = -1; 191 /** 192 */ 193 public static final int BASIC_LATIN_ID = 1; 194 /** 195 */ 196 public static final int LATIN_1_SUPPLEMENT_ID = 2; 197 /** 198 */ 199 public static final int LATIN_EXTENDED_A_ID = 3; 200 /** 201 */ 202 public static final int LATIN_EXTENDED_B_ID = 4; 203 /** 204 */ 205 public static final int IPA_EXTENSIONS_ID = 5; 206 /** 207 */ 208 public static final int SPACING_MODIFIER_LETTERS_ID = 6; 209 /** 210 */ 211 public static final int COMBINING_DIACRITICAL_MARKS_ID = 7; 212 /** 213 * Unicode 3.2 renames this block to "Greek and Coptic". 214 */ 215 public static final int GREEK_ID = 8; 216 /** 217 */ 218 public static final int CYRILLIC_ID = 9; 219 /** 220 */ 221 public static final int ARMENIAN_ID = 10; 222 /** 223 */ 224 public static final int HEBREW_ID = 11; 225 /** 226 */ 227 public static final int ARABIC_ID = 12; 228 /** 229 */ 230 public static final int SYRIAC_ID = 13; 231 /** 232 */ 233 public static final int THAANA_ID = 14; 234 /** 235 */ 236 public static final int DEVANAGARI_ID = 15; 237 /** 238 */ 239 public static final int BENGALI_ID = 16; 240 /** 241 */ 242 public static final int GURMUKHI_ID = 17; 243 /** 244 */ 245 public static final int GUJARATI_ID = 18; 246 /** 247 */ 248 public static final int ORIYA_ID = 19; 249 /** 250 */ 251 public static final int TAMIL_ID = 20; 252 /** 253 */ 254 public static final int TELUGU_ID = 21; 255 /** 256 */ 257 public static final int KANNADA_ID = 22; 258 /** 259 */ 260 public static final int MALAYALAM_ID = 23; 261 /** 262 */ 263 public static final int SINHALA_ID = 24; 264 /** 265 */ 266 public static final int THAI_ID = 25; 267 /** 268 */ 269 public static final int LAO_ID = 26; 270 /** 271 */ 272 public static final int TIBETAN_ID = 27; 273 /** 274 */ 275 public static final int MYANMAR_ID = 28; 276 /** 277 */ 278 public static final int GEORGIAN_ID = 29; 279 /** 280 */ 281 public static final int HANGUL_JAMO_ID = 30; 282 /** 283 */ 284 public static final int ETHIOPIC_ID = 31; 285 /** 286 */ 287 public static final int CHEROKEE_ID = 32; 288 /** 289 */ 290 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33; 291 /** 292 */ 293 public static final int OGHAM_ID = 34; 294 /** 295 */ 296 public static final int RUNIC_ID = 35; 297 /** 298 */ 299 public static final int KHMER_ID = 36; 300 /** 301 */ 302 public static final int MONGOLIAN_ID = 37; 303 /** 304 */ 305 public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38; 306 /** 307 */ 308 public static final int GREEK_EXTENDED_ID = 39; 309 /** 310 */ 311 public static final int GENERAL_PUNCTUATION_ID = 40; 312 /** 313 */ 314 public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41; 315 /** 316 */ 317 public static final int CURRENCY_SYMBOLS_ID = 42; 318 /** 319 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 320 * Symbols". 321 */ 322 public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43; 323 /** 324 */ 325 public static final int LETTERLIKE_SYMBOLS_ID = 44; 326 /** 327 */ 328 public static final int NUMBER_FORMS_ID = 45; 329 /** 330 */ 331 public static final int ARROWS_ID = 46; 332 /** 333 */ 334 public static final int MATHEMATICAL_OPERATORS_ID = 47; 335 /** 336 */ 337 public static final int MISCELLANEOUS_TECHNICAL_ID = 48; 338 /** 339 */ 340 public static final int CONTROL_PICTURES_ID = 49; 341 /** 342 */ 343 public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50; 344 /** 345 */ 346 public static final int ENCLOSED_ALPHANUMERICS_ID = 51; 347 /** 348 */ 349 public static final int BOX_DRAWING_ID = 52; 350 /** 351 */ 352 public static final int BLOCK_ELEMENTS_ID = 53; 353 /** 354 */ 355 public static final int GEOMETRIC_SHAPES_ID = 54; 356 /** 357 */ 358 public static final int MISCELLANEOUS_SYMBOLS_ID = 55; 359 /** 360 */ 361 public static final int DINGBATS_ID = 56; 362 /** 363 */ 364 public static final int BRAILLE_PATTERNS_ID = 57; 365 /** 366 */ 367 public static final int CJK_RADICALS_SUPPLEMENT_ID = 58; 368 /** 369 */ 370 public static final int KANGXI_RADICALS_ID = 59; 371 /** 372 */ 373 public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60; 374 /** 375 */ 376 public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61; 377 /** 378 */ 379 public static final int HIRAGANA_ID = 62; 380 /** 381 */ 382 public static final int KATAKANA_ID = 63; 383 /** 384 */ 385 public static final int BOPOMOFO_ID = 64; 386 /** 387 */ 388 public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65; 389 /** 390 */ 391 public static final int KANBUN_ID = 66; 392 /** 393 */ 394 public static final int BOPOMOFO_EXTENDED_ID = 67; 395 /** 396 */ 397 public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68; 398 /** 399 */ 400 public static final int CJK_COMPATIBILITY_ID = 69; 401 /** 402 */ 403 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70; 404 /** 405 */ 406 public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71; 407 /** 408 */ 409 public static final int YI_SYLLABLES_ID = 72; 410 /** 411 */ 412 public static final int YI_RADICALS_ID = 73; 413 /** 414 */ 415 public static final int HANGUL_SYLLABLES_ID = 74; 416 /** 417 */ 418 public static final int HIGH_SURROGATES_ID = 75; 419 /** 420 */ 421 public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76; 422 /** 423 */ 424 public static final int LOW_SURROGATES_ID = 77; 425 /** 426 * Same as public static final int PRIVATE_USE. 427 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 428 * and multiple code point ranges had this block. 429 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 430 * and adds separate blocks for the supplementary PUAs. 431 */ 432 public static final int PRIVATE_USE_AREA_ID = 78; 433 /** 434 * Same as public static final int PRIVATE_USE_AREA. 435 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 436 * and multiple code point ranges had this block. 437 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 438 * and adds separate blocks for the supplementary PUAs. 439 */ 440 public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID; 441 /** 442 */ 443 public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79; 444 /** 445 */ 446 public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80; 447 /** 448 */ 449 public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81; 450 /** 451 */ 452 public static final int COMBINING_HALF_MARKS_ID = 82; 453 /** 454 */ 455 public static final int CJK_COMPATIBILITY_FORMS_ID = 83; 456 /** 457 */ 458 public static final int SMALL_FORM_VARIANTS_ID = 84; 459 /** 460 */ 461 public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85; 462 /** 463 */ 464 public static final int SPECIALS_ID = 86; 465 /** 466 */ 467 public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87; 468 /** 469 */ 470 public static final int OLD_ITALIC_ID = 88; 471 /** 472 */ 473 public static final int GOTHIC_ID = 89; 474 /** 475 */ 476 public static final int DESERET_ID = 90; 477 /** 478 */ 479 public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91; 480 /** 481 */ 482 public static final int MUSICAL_SYMBOLS_ID = 92; 483 /** 484 */ 485 public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93; 486 /** 487 */ 488 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94; 489 /** 490 */ 491 public static final int 492 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95; 493 /** 494 */ 495 public static final int TAGS_ID = 96; 496 497 // New blocks in Unicode 3.2 498 499 /** 500 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 501 */ 502 public static final int CYRILLIC_SUPPLEMENTARY_ID = 97; 503 /** 504 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 505 */ 506 507 public static final int CYRILLIC_SUPPLEMENT_ID = 97; 508 /** 509 */ 510 public static final int TAGALOG_ID = 98; 511 /** 512 */ 513 public static final int HANUNOO_ID = 99; 514 /** 515 */ 516 public static final int BUHID_ID = 100; 517 /** 518 */ 519 public static final int TAGBANWA_ID = 101; 520 /** 521 */ 522 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102; 523 /** 524 */ 525 public static final int SUPPLEMENTAL_ARROWS_A_ID = 103; 526 /** 527 */ 528 public static final int SUPPLEMENTAL_ARROWS_B_ID = 104; 529 /** 530 */ 531 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105; 532 /** 533 */ 534 public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106; 535 /** 536 */ 537 public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107; 538 /** 539 */ 540 public static final int VARIATION_SELECTORS_ID = 108; 541 /** 542 */ 543 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109; 544 /** 545 */ 546 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110; 547 548 /** 549 */ 550 public static final int LIMBU_ID = 111; /*[1900]*/ 551 /** 552 */ 553 public static final int TAI_LE_ID = 112; /*[1950]*/ 554 /** 555 */ 556 public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/ 557 /** 558 */ 559 public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/ 560 /** 561 */ 562 public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/ 563 /** 564 */ 565 public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/ 566 /** 567 */ 568 public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/ 569 /** 570 */ 571 public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/ 572 /** 573 */ 574 public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/ 575 /** 576 */ 577 public static final int UGARITIC_ID = 120; /*[10380]*/ 578 /** 579 */ 580 public static final int SHAVIAN_ID = 121; /*[10450]*/ 581 /** 582 */ 583 public static final int OSMANYA_ID = 122; /*[10480]*/ 584 /** 585 */ 586 public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/ 587 /** 588 */ 589 public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/ 590 /** 591 */ 592 public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/ 593 594 /* New blocks in Unicode 4.1 */ 595 596 /** 597 */ 598 public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/ 599 600 /** 601 */ 602 public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/ 603 604 /** 605 */ 606 public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/ 607 608 /** 609 */ 610 public static final int BUGINESE_ID = 129; /*[1A00]*/ 611 612 /** 613 */ 614 public static final int CJK_STROKES_ID = 130; /*[31C0]*/ 615 616 /** 617 */ 618 public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/ 619 620 /** 621 */ 622 public static final int COPTIC_ID = 132; /*[2C80]*/ 623 624 /** 625 */ 626 public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/ 627 628 /** 629 */ 630 public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/ 631 632 /** 633 */ 634 public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/ 635 636 /** 637 */ 638 public static final int GLAGOLITIC_ID = 136; /*[2C00]*/ 639 640 /** 641 */ 642 public static final int KHAROSHTHI_ID = 137; /*[10A00]*/ 643 644 /** 645 */ 646 public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/ 647 648 /** 649 */ 650 public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/ 651 652 /** 653 */ 654 public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/ 655 656 /** 657 */ 658 public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/ 659 660 /** 661 */ 662 public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/ 663 664 /** 665 */ 666 public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/ 667 668 /** 669 */ 670 public static final int TIFINAGH_ID = 144; /*[2D30]*/ 671 672 /** 673 */ 674 public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/ 675 676 /* New blocks in Unicode 5.0 */ 677 678 /** 679 */ 680 public static final int NKO_ID = 146; /*[07C0]*/ 681 /** 682 */ 683 public static final int BALINESE_ID = 147; /*[1B00]*/ 684 /** 685 */ 686 public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/ 687 /** 688 */ 689 public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/ 690 /** 691 */ 692 public static final int PHAGS_PA_ID = 150; /*[A840]*/ 693 /** 694 */ 695 public static final int PHOENICIAN_ID = 151; /*[10900]*/ 696 /** 697 */ 698 public static final int CUNEIFORM_ID = 152; /*[12000]*/ 699 /** 700 */ 701 public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/ 702 /** 703 */ 704 public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/ 705 706 /** 707 */ 708 public static final int SUNDANESE_ID = 155; /* [1B80] */ 709 710 /** 711 */ 712 public static final int LEPCHA_ID = 156; /* [1C00] */ 713 714 /** 715 */ 716 public static final int OL_CHIKI_ID = 157; /* [1C50] */ 717 718 /** 719 */ 720 public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */ 721 722 /** 723 */ 724 public static final int VAI_ID = 159; /* [A500] */ 725 726 /** 727 */ 728 public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */ 729 730 /** 731 */ 732 public static final int SAURASHTRA_ID = 161; /* [A880] */ 733 734 /** 735 */ 736 public static final int KAYAH_LI_ID = 162; /* [A900] */ 737 738 /** 739 */ 740 public static final int REJANG_ID = 163; /* [A930] */ 741 742 /** 743 */ 744 public static final int CHAM_ID = 164; /* [AA00] */ 745 746 /** 747 */ 748 public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */ 749 750 /** 751 */ 752 public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */ 753 754 /** 755 */ 756 public static final int LYCIAN_ID = 167; /* [10280] */ 757 758 /** 759 */ 760 public static final int CARIAN_ID = 168; /* [102A0] */ 761 762 /** 763 */ 764 public static final int LYDIAN_ID = 169; /* [10920] */ 765 766 /** 767 */ 768 public static final int MAHJONG_TILES_ID = 170; /* [1F000] */ 769 770 /** 771 */ 772 public static final int DOMINO_TILES_ID = 171; /* [1F030] */ 773 774 /* New blocks in Unicode 5.2 */ 775 776 /***/ 777 public static final int SAMARITAN_ID = 172; /*[0800]*/ 778 /***/ 779 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/ 780 /***/ 781 public static final int TAI_THAM_ID = 174; /*[1A20]*/ 782 /***/ 783 public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/ 784 /***/ 785 public static final int LISU_ID = 176; /*[A4D0]*/ 786 /***/ 787 public static final int BAMUM_ID = 177; /*[A6A0]*/ 788 /***/ 789 public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/ 790 /***/ 791 public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/ 792 /***/ 793 public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/ 794 /***/ 795 public static final int JAVANESE_ID = 181; /*[A980]*/ 796 /***/ 797 public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/ 798 /***/ 799 public static final int TAI_VIET_ID = 183; /*[AA80]*/ 800 /***/ 801 public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/ 802 /***/ 803 public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/ 804 /***/ 805 public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/ 806 /***/ 807 public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/ 808 /***/ 809 public static final int AVESTAN_ID = 188; /*[10B00]*/ 810 /***/ 811 public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/ 812 /***/ 813 public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/ 814 /***/ 815 public static final int OLD_TURKIC_ID = 191; /*[10C00]*/ 816 /***/ 817 public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/ 818 /***/ 819 public static final int KAITHI_ID = 193; /*[11080]*/ 820 /***/ 821 public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/ 822 /***/ 823 public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/ 824 /***/ 825 public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/ 826 /***/ 827 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/ 828 829 /* New blocks in Unicode 6.0 */ 830 831 /***/ 832 public static final int MANDAIC_ID = 198; /*[0840]*/ 833 /***/ 834 public static final int BATAK_ID = 199; /*[1BC0]*/ 835 /***/ 836 public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/ 837 /***/ 838 public static final int BRAHMI_ID = 201; /*[11000]*/ 839 /***/ 840 public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/ 841 /***/ 842 public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/ 843 /***/ 844 public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/ 845 /***/ 846 public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/ 847 /***/ 848 public static final int EMOTICONS_ID = 206; /*[1F600]*/ 849 /***/ 850 public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/ 851 /***/ 852 public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/ 853 /***/ 854 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/ 855 856 /* New blocks in Unicode 6.1 */ 857 858 /***/ 859 public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/ 860 /***/ 861 public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/ 862 /***/ 863 public static final int CHAKMA_ID = 212; /*[11100]*/ 864 /***/ 865 public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/ 866 /***/ 867 public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/ 868 /***/ 869 public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/ 870 /***/ 871 public static final int MIAO_ID = 216; /*[16F00]*/ 872 /***/ 873 public static final int SHARADA_ID = 217; /*[11180]*/ 874 /***/ 875 public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/ 876 /***/ 877 public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/ 878 /***/ 879 public static final int TAKRI_ID = 220; /*[11680]*/ 880 881 /* New blocks in Unicode 7.0 */ 882 883 /***/ 884 public static final int BASSA_VAH_ID = 221; /*[16AD0]*/ 885 /***/ 886 public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/ 887 /***/ 888 public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/ 889 /***/ 890 public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/ 891 /***/ 892 public static final int DUPLOYAN_ID = 225; /*[1BC00]*/ 893 /***/ 894 public static final int ELBASAN_ID = 226; /*[10500]*/ 895 /***/ 896 public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/ 897 /***/ 898 public static final int GRANTHA_ID = 228; /*[11300]*/ 899 /***/ 900 public static final int KHOJKI_ID = 229; /*[11200]*/ 901 /***/ 902 public static final int KHUDAWADI_ID = 230; /*[112B0]*/ 903 /***/ 904 public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/ 905 /***/ 906 public static final int LINEAR_A_ID = 232; /*[10600]*/ 907 /***/ 908 public static final int MAHAJANI_ID = 233; /*[11150]*/ 909 /***/ 910 public static final int MANICHAEAN_ID = 234; /*[10AC0]*/ 911 /***/ 912 public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/ 913 /***/ 914 public static final int MODI_ID = 236; /*[11600]*/ 915 /***/ 916 public static final int MRO_ID = 237; /*[16A40]*/ 917 /***/ 918 public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/ 919 /***/ 920 public static final int NABATAEAN_ID = 239; /*[10880]*/ 921 /***/ 922 public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/ 923 /***/ 924 public static final int OLD_PERMIC_ID = 241; /*[10350]*/ 925 /***/ 926 public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/ 927 /***/ 928 public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/ 929 /***/ 930 public static final int PALMYRENE_ID = 244; /*[10860]*/ 931 /***/ 932 public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/ 933 /***/ 934 public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/ 935 /***/ 936 public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/ 937 /***/ 938 public static final int SIDDHAM_ID = 248; /*[11580]*/ 939 /***/ 940 public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/ 941 /***/ 942 public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/ 943 /***/ 944 public static final int TIRHUTA_ID = 251; /*[11480]*/ 945 /***/ 946 public static final int WARANG_CITI_ID = 252; /*[118A0]*/ 947 948 /* New blocks in Unicode 8.0 */ 949 950 /***/ 951 public static final int AHOM_ID = 253; /*[11700]*/ 952 /***/ 953 public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/ 954 /***/ 955 public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/ 956 /***/ 957 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/ 958 /***/ 959 public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/ 960 /***/ 961 public static final int HATRAN_ID = 258; /*[108E0]*/ 962 /***/ 963 public static final int MULTANI_ID = 259; /*[11280]*/ 964 /***/ 965 public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/ 966 /***/ 967 public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/ 968 /***/ 969 public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/ 970 971 /* New blocks in Unicode 9.0 */ 972 973 /***/ 974 public static final int ADLAM_ID = 263; /*[1E900]*/ 975 /***/ 976 public static final int BHAIKSUKI_ID = 264; /*[11C00]*/ 977 /***/ 978 public static final int CYRILLIC_EXTENDED_C_ID = 265; /*[1C80]*/ 979 /***/ 980 public static final int GLAGOLITIC_SUPPLEMENT_ID = 266; /*[1E000]*/ 981 /***/ 982 public static final int IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID = 267; /*[16FE0]*/ 983 /***/ 984 public static final int MARCHEN_ID = 268; /*[11C70]*/ 985 /***/ 986 public static final int MONGOLIAN_SUPPLEMENT_ID = 269; /*[11660]*/ 987 /***/ 988 public static final int NEWA_ID = 270; /*[11400]*/ 989 /***/ 990 public static final int OSAGE_ID = 271; /*[104B0]*/ 991 /***/ 992 public static final int TANGUT_ID = 272; /*[17000]*/ 993 /***/ 994 public static final int TANGUT_COMPONENTS_ID = 273; /*[18800]*/ 995 996 // New blocks in Unicode 10.0 997 998 /***/ 999 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID = 274; /*[2CEB0]*/ 1000 /***/ 1001 public static final int KANA_EXTENDED_A_ID = 275; /*[1B100]*/ 1002 /***/ 1003 public static final int MASARAM_GONDI_ID = 276; /*[11D00]*/ 1004 /***/ 1005 public static final int NUSHU_ID = 277; /*[1B170]*/ 1006 /***/ 1007 public static final int SOYOMBO_ID = 278; /*[11A50]*/ 1008 /***/ 1009 public static final int SYRIAC_SUPPLEMENT_ID = 279; /*[0860]*/ 1010 /***/ 1011 public static final int ZANABAZAR_SQUARE_ID = 280; /*[11A00]*/ 1012 1013 // New blocks in Unicode 11.0 1014 1015 /***/ 1016 public static final int CHESS_SYMBOLS_ID = 281; /*[1FA00]*/ 1017 /***/ 1018 public static final int DOGRA_ID = 282; /*[11800]*/ 1019 /***/ 1020 public static final int GEORGIAN_EXTENDED_ID = 283; /*[1C90]*/ 1021 /***/ 1022 public static final int GUNJALA_GONDI_ID = 284; /*[11D60]*/ 1023 /***/ 1024 public static final int HANIFI_ROHINGYA_ID = 285; /*[10D00]*/ 1025 /***/ 1026 public static final int INDIC_SIYAQ_NUMBERS_ID = 286; /*[1EC70]*/ 1027 /***/ 1028 public static final int MAKASAR_ID = 287; /*[11EE0]*/ 1029 /***/ 1030 public static final int MAYAN_NUMERALS_ID = 288; /*[1D2E0]*/ 1031 /***/ 1032 public static final int MEDEFAIDRIN_ID = 289; /*[16E40]*/ 1033 /***/ 1034 public static final int OLD_SOGDIAN_ID = 290; /*[10F00]*/ 1035 /***/ 1036 public static final int SOGDIAN_ID = 291; /*[10F30]*/ 1037 1038 // New blocks in Unicode 12.0 1039 1040 /***/ 1041 public static final int EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID = 292; /*[13430]*/ 1042 /***/ 1043 public static final int ELYMAIC_ID = 293; /*[10FE0]*/ 1044 /***/ 1045 public static final int NANDINAGARI_ID = 294; /*[119A0]*/ 1046 /***/ 1047 public static final int NYIAKENG_PUACHUE_HMONG_ID = 295; /*[1E100]*/ 1048 /***/ 1049 public static final int OTTOMAN_SIYAQ_NUMBERS_ID = 296; /*[1ED00]*/ 1050 /***/ 1051 public static final int SMALL_KANA_EXTENSION_ID = 297; /*[1B130]*/ 1052 /***/ 1053 public static final int SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID = 298; /*[1FA70]*/ 1054 /***/ 1055 public static final int TAMIL_SUPPLEMENT_ID = 299; /*[11FC0]*/ 1056 /***/ 1057 public static final int WANCHO_ID = 300; /*[1E2C0]*/ 1058 1059 // New blocks in Unicode 13.0 1060 1061 /***/ 1062 public static final int CHORASMIAN_ID = 301; /*[10FB0]*/ 1063 /***/ 1064 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID = 302; /*[30000]*/ 1065 /***/ 1066 public static final int DIVES_AKURU_ID = 303; /*[11900]*/ 1067 /***/ 1068 public static final int KHITAN_SMALL_SCRIPT_ID = 304; /*[18B00]*/ 1069 /***/ 1070 public static final int LISU_SUPPLEMENT_ID = 305; /*[11FB0]*/ 1071 /***/ 1072 public static final int SYMBOLS_FOR_LEGACY_COMPUTING_ID = 306; /*[1FB00]*/ 1073 /***/ 1074 public static final int TANGUT_SUPPLEMENT_ID = 307; /*[18D00]*/ 1075 /***/ 1076 public static final int YEZIDI_ID = 308; /*[10E80]*/ 1077 1078 // New blocks in Unicode 14.0 1079 1080 /***/ 1081 public static final int ARABIC_EXTENDED_B_ID = 309; /*[0870]*/ 1082 /***/ 1083 public static final int CYPRO_MINOAN_ID = 310; /*[12F90]*/ 1084 /***/ 1085 public static final int ETHIOPIC_EXTENDED_B_ID = 311; /*[1E7E0]*/ 1086 /***/ 1087 public static final int KANA_EXTENDED_B_ID = 312; /*[1AFF0]*/ 1088 /***/ 1089 public static final int LATIN_EXTENDED_F_ID = 313; /*[10780]*/ 1090 /***/ 1091 public static final int LATIN_EXTENDED_G_ID = 314; /*[1DF00]*/ 1092 /***/ 1093 public static final int OLD_UYGHUR_ID = 315; /*[10F70]*/ 1094 /***/ 1095 public static final int TANGSA_ID = 316; /*[16A70]*/ 1096 /***/ 1097 public static final int TOTO_ID = 317; /*[1E290]*/ 1098 /***/ 1099 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A_ID = 318; /*[11AB0]*/ 1100 /***/ 1101 public static final int VITHKUQI_ID = 319; /*[10570]*/ 1102 /***/ 1103 public static final int ZNAMENNY_MUSICAL_NOTATION_ID = 320; /*[1CF00]*/ 1104 1105 // New blocks in Unicode 15.0 1106 1107 /***/ 1108 public static final int ARABIC_EXTENDED_C_ID = 321; /*[10EC0]*/ 1109 /***/ 1110 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H_ID = 322; /*[31350]*/ 1111 /***/ 1112 public static final int CYRILLIC_EXTENDED_D_ID = 323; /*[1E030]*/ 1113 /***/ 1114 public static final int DEVANAGARI_EXTENDED_A_ID = 324; /*[11B00]*/ 1115 /***/ 1116 public static final int KAKTOVIK_NUMERALS_ID = 325; /*[1D2C0]*/ 1117 /***/ 1118 public static final int KAWI_ID = 326; /*[11F00]*/ 1119 /***/ 1120 public static final int NAG_MUNDARI_ID = 327; /*[1E4D0]*/ 1121 1122 // New block in Unicode 15.1 1123 1124 /***/ 1125 @android.annotation.FlaggedApi(com.android.icu.Flags.FLAG_ICU_V_API) 1126 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I_ID = 328; /*[2EBF0]*/ 1127 1128 /** 1129 * One more than the highest normal UnicodeBlock value. 1130 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BLOCK). 1131 * 1132 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 1133 * @hide unsupported on Android 1134 */ 1135 @Deprecated 1136 public static final int COUNT = 329; 1137 1138 // blocks objects --------------------------------------------------- 1139 1140 /** 1141 * Array of UnicodeBlocks, for easy access in getInstance(int) 1142 */ 1143 private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT]; 1144 1145 /** 1146 */ 1147 public static final UnicodeBlock NO_BLOCK 1148 = new UnicodeBlock("NO_BLOCK", 0); 1149 1150 /** 1151 */ 1152 public static final UnicodeBlock BASIC_LATIN 1153 = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID); 1154 /** 1155 */ 1156 public static final UnicodeBlock LATIN_1_SUPPLEMENT 1157 = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID); 1158 /** 1159 */ 1160 public static final UnicodeBlock LATIN_EXTENDED_A 1161 = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID); 1162 /** 1163 */ 1164 public static final UnicodeBlock LATIN_EXTENDED_B 1165 = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID); 1166 /** 1167 */ 1168 public static final UnicodeBlock IPA_EXTENSIONS 1169 = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID); 1170 /** 1171 */ 1172 public static final UnicodeBlock SPACING_MODIFIER_LETTERS 1173 = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID); 1174 /** 1175 */ 1176 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS 1177 = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID); 1178 /** 1179 * Unicode 3.2 renames this block to "Greek and Coptic". 1180 */ 1181 public static final UnicodeBlock GREEK 1182 = new UnicodeBlock("GREEK", GREEK_ID); 1183 /** 1184 */ 1185 public static final UnicodeBlock CYRILLIC 1186 = new UnicodeBlock("CYRILLIC", CYRILLIC_ID); 1187 /** 1188 */ 1189 public static final UnicodeBlock ARMENIAN 1190 = new UnicodeBlock("ARMENIAN", ARMENIAN_ID); 1191 /** 1192 */ 1193 public static final UnicodeBlock HEBREW 1194 = new UnicodeBlock("HEBREW", HEBREW_ID); 1195 /** 1196 */ 1197 public static final UnicodeBlock ARABIC 1198 = new UnicodeBlock("ARABIC", ARABIC_ID); 1199 /** 1200 */ 1201 public static final UnicodeBlock SYRIAC 1202 = new UnicodeBlock("SYRIAC", SYRIAC_ID); 1203 /** 1204 */ 1205 public static final UnicodeBlock THAANA 1206 = new UnicodeBlock("THAANA", THAANA_ID); 1207 /** 1208 */ 1209 public static final UnicodeBlock DEVANAGARI 1210 = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID); 1211 /** 1212 */ 1213 public static final UnicodeBlock BENGALI 1214 = new UnicodeBlock("BENGALI", BENGALI_ID); 1215 /** 1216 */ 1217 public static final UnicodeBlock GURMUKHI 1218 = new UnicodeBlock("GURMUKHI", GURMUKHI_ID); 1219 /** 1220 */ 1221 public static final UnicodeBlock GUJARATI 1222 = new UnicodeBlock("GUJARATI", GUJARATI_ID); 1223 /** 1224 */ 1225 public static final UnicodeBlock ORIYA 1226 = new UnicodeBlock("ORIYA", ORIYA_ID); 1227 /** 1228 */ 1229 public static final UnicodeBlock TAMIL 1230 = new UnicodeBlock("TAMIL", TAMIL_ID); 1231 /** 1232 */ 1233 public static final UnicodeBlock TELUGU 1234 = new UnicodeBlock("TELUGU", TELUGU_ID); 1235 /** 1236 */ 1237 public static final UnicodeBlock KANNADA 1238 = new UnicodeBlock("KANNADA", KANNADA_ID); 1239 /** 1240 */ 1241 public static final UnicodeBlock MALAYALAM 1242 = new UnicodeBlock("MALAYALAM", MALAYALAM_ID); 1243 /** 1244 */ 1245 public static final UnicodeBlock SINHALA 1246 = new UnicodeBlock("SINHALA", SINHALA_ID); 1247 /** 1248 */ 1249 public static final UnicodeBlock THAI 1250 = new UnicodeBlock("THAI", THAI_ID); 1251 /** 1252 */ 1253 public static final UnicodeBlock LAO 1254 = new UnicodeBlock("LAO", LAO_ID); 1255 /** 1256 */ 1257 public static final UnicodeBlock TIBETAN 1258 = new UnicodeBlock("TIBETAN", TIBETAN_ID); 1259 /** 1260 */ 1261 public static final UnicodeBlock MYANMAR 1262 = new UnicodeBlock("MYANMAR", MYANMAR_ID); 1263 /** 1264 */ 1265 public static final UnicodeBlock GEORGIAN 1266 = new UnicodeBlock("GEORGIAN", GEORGIAN_ID); 1267 /** 1268 */ 1269 public static final UnicodeBlock HANGUL_JAMO 1270 = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID); 1271 /** 1272 */ 1273 public static final UnicodeBlock ETHIOPIC 1274 = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID); 1275 /** 1276 */ 1277 public static final UnicodeBlock CHEROKEE 1278 = new UnicodeBlock("CHEROKEE", CHEROKEE_ID); 1279 /** 1280 */ 1281 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 1282 = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1283 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID); 1284 /** 1285 */ 1286 public static final UnicodeBlock OGHAM 1287 = new UnicodeBlock("OGHAM", OGHAM_ID); 1288 /** 1289 */ 1290 public static final UnicodeBlock RUNIC 1291 = new UnicodeBlock("RUNIC", RUNIC_ID); 1292 /** 1293 */ 1294 public static final UnicodeBlock KHMER 1295 = new UnicodeBlock("KHMER", KHMER_ID); 1296 /** 1297 */ 1298 public static final UnicodeBlock MONGOLIAN 1299 = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID); 1300 /** 1301 */ 1302 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL 1303 = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID); 1304 /** 1305 */ 1306 public static final UnicodeBlock GREEK_EXTENDED 1307 = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID); 1308 /** 1309 */ 1310 public static final UnicodeBlock GENERAL_PUNCTUATION 1311 = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID); 1312 /** 1313 */ 1314 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS 1315 = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID); 1316 /** 1317 */ 1318 public static final UnicodeBlock CURRENCY_SYMBOLS 1319 = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID); 1320 /** 1321 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 1322 * Symbols". 1323 */ 1324 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS 1325 = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID); 1326 /** 1327 */ 1328 public static final UnicodeBlock LETTERLIKE_SYMBOLS 1329 = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID); 1330 /** 1331 */ 1332 public static final UnicodeBlock NUMBER_FORMS 1333 = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID); 1334 /** 1335 */ 1336 public static final UnicodeBlock ARROWS 1337 = new UnicodeBlock("ARROWS", ARROWS_ID); 1338 /** 1339 */ 1340 public static final UnicodeBlock MATHEMATICAL_OPERATORS 1341 = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID); 1342 /** 1343 */ 1344 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL 1345 = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID); 1346 /** 1347 */ 1348 public static final UnicodeBlock CONTROL_PICTURES 1349 = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID); 1350 /** 1351 */ 1352 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION 1353 = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID); 1354 /** 1355 */ 1356 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS 1357 = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID); 1358 /** 1359 */ 1360 public static final UnicodeBlock BOX_DRAWING 1361 = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID); 1362 /** 1363 */ 1364 public static final UnicodeBlock BLOCK_ELEMENTS 1365 = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID); 1366 /** 1367 */ 1368 public static final UnicodeBlock GEOMETRIC_SHAPES 1369 = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID); 1370 /** 1371 */ 1372 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS 1373 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID); 1374 /** 1375 */ 1376 public static final UnicodeBlock DINGBATS 1377 = new UnicodeBlock("DINGBATS", DINGBATS_ID); 1378 /** 1379 */ 1380 public static final UnicodeBlock BRAILLE_PATTERNS 1381 = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID); 1382 /** 1383 */ 1384 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT 1385 = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID); 1386 /** 1387 */ 1388 public static final UnicodeBlock KANGXI_RADICALS 1389 = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID); 1390 /** 1391 */ 1392 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS 1393 = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1394 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID); 1395 /** 1396 */ 1397 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION 1398 = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID); 1399 /** 1400 */ 1401 public static final UnicodeBlock HIRAGANA 1402 = new UnicodeBlock("HIRAGANA", HIRAGANA_ID); 1403 /** 1404 */ 1405 public static final UnicodeBlock KATAKANA 1406 = new UnicodeBlock("KATAKANA", KATAKANA_ID); 1407 /** 1408 */ 1409 public static final UnicodeBlock BOPOMOFO 1410 = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID); 1411 /** 1412 */ 1413 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO 1414 = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID); 1415 /** 1416 */ 1417 public static final UnicodeBlock KANBUN 1418 = new UnicodeBlock("KANBUN", KANBUN_ID); 1419 /** 1420 */ 1421 public static final UnicodeBlock BOPOMOFO_EXTENDED 1422 = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID); 1423 /** 1424 */ 1425 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS 1426 = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1427 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID); 1428 /** 1429 */ 1430 public static final UnicodeBlock CJK_COMPATIBILITY 1431 = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID); 1432 /** 1433 */ 1434 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 1435 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1436 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID); 1437 /** 1438 */ 1439 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS 1440 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID); 1441 /** 1442 */ 1443 public static final UnicodeBlock YI_SYLLABLES 1444 = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID); 1445 /** 1446 */ 1447 public static final UnicodeBlock YI_RADICALS 1448 = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID); 1449 /** 1450 */ 1451 public static final UnicodeBlock HANGUL_SYLLABLES 1452 = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID); 1453 /** 1454 */ 1455 public static final UnicodeBlock HIGH_SURROGATES 1456 = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID); 1457 /** 1458 */ 1459 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES 1460 = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID); 1461 /** 1462 */ 1463 public static final UnicodeBlock LOW_SURROGATES 1464 = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID); 1465 /** 1466 * Same as public static final int PRIVATE_USE. 1467 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1468 * and multiple code point ranges had this block. 1469 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1470 * and adds separate blocks for the supplementary PUAs. 1471 */ 1472 public static final UnicodeBlock PRIVATE_USE_AREA 1473 = new UnicodeBlock("PRIVATE_USE_AREA", 78); 1474 /** 1475 * Same as public static final int PRIVATE_USE_AREA. 1476 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1477 * and multiple code point ranges had this block. 1478 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1479 * and adds separate blocks for the supplementary PUAs. 1480 */ 1481 public static final UnicodeBlock PRIVATE_USE 1482 = PRIVATE_USE_AREA; 1483 /** 1484 */ 1485 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS 1486 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID); 1487 /** 1488 */ 1489 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS 1490 = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID); 1491 /** 1492 */ 1493 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A 1494 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID); 1495 /** 1496 */ 1497 public static final UnicodeBlock COMBINING_HALF_MARKS 1498 = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID); 1499 /** 1500 */ 1501 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS 1502 = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID); 1503 /** 1504 */ 1505 public static final UnicodeBlock SMALL_FORM_VARIANTS 1506 = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID); 1507 /** 1508 */ 1509 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B 1510 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID); 1511 /** 1512 */ 1513 public static final UnicodeBlock SPECIALS 1514 = new UnicodeBlock("SPECIALS", SPECIALS_ID); 1515 /** 1516 */ 1517 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS 1518 = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID); 1519 /** 1520 */ 1521 public static final UnicodeBlock OLD_ITALIC 1522 = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID); 1523 /** 1524 */ 1525 public static final UnicodeBlock GOTHIC 1526 = new UnicodeBlock("GOTHIC", GOTHIC_ID); 1527 /** 1528 */ 1529 public static final UnicodeBlock DESERET 1530 = new UnicodeBlock("DESERET", DESERET_ID); 1531 /** 1532 */ 1533 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS 1534 = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID); 1535 /** 1536 */ 1537 public static final UnicodeBlock MUSICAL_SYMBOLS 1538 = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID); 1539 /** 1540 */ 1541 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS 1542 = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1543 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID); 1544 /** 1545 */ 1546 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B 1547 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1548 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID); 1549 /** 1550 */ 1551 public static final UnicodeBlock 1552 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT 1553 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1554 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID); 1555 /** 1556 */ 1557 public static final UnicodeBlock TAGS 1558 = new UnicodeBlock("TAGS", TAGS_ID); 1559 1560 // New blocks in Unicode 3.2 1561 1562 /** 1563 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1564 */ 1565 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY 1566 = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID); 1567 /** 1568 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1569 */ 1570 public static final UnicodeBlock CYRILLIC_SUPPLEMENT 1571 = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID); 1572 /** 1573 */ 1574 public static final UnicodeBlock TAGALOG 1575 = new UnicodeBlock("TAGALOG", TAGALOG_ID); 1576 /** 1577 */ 1578 public static final UnicodeBlock HANUNOO 1579 = new UnicodeBlock("HANUNOO", HANUNOO_ID); 1580 /** 1581 */ 1582 public static final UnicodeBlock BUHID 1583 = new UnicodeBlock("BUHID", BUHID_ID); 1584 /** 1585 */ 1586 public static final UnicodeBlock TAGBANWA 1587 = new UnicodeBlock("TAGBANWA", TAGBANWA_ID); 1588 /** 1589 */ 1590 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A 1591 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1592 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID); 1593 /** 1594 */ 1595 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A 1596 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID); 1597 /** 1598 */ 1599 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B 1600 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID); 1601 /** 1602 */ 1603 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B 1604 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1605 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID); 1606 /** 1607 */ 1608 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS 1609 = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1610 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID); 1611 /** 1612 */ 1613 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS 1614 = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID); 1615 /** 1616 */ 1617 public static final UnicodeBlock VARIATION_SELECTORS 1618 = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID); 1619 /** 1620 */ 1621 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A 1622 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1623 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID); 1624 /** 1625 */ 1626 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B 1627 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1628 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID); 1629 1630 /** 1631 */ 1632 public static final UnicodeBlock LIMBU 1633 = new UnicodeBlock("LIMBU", LIMBU_ID); 1634 /** 1635 */ 1636 public static final UnicodeBlock TAI_LE 1637 = new UnicodeBlock("TAI_LE", TAI_LE_ID); 1638 /** 1639 */ 1640 public static final UnicodeBlock KHMER_SYMBOLS 1641 = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID); 1642 1643 /** 1644 */ 1645 public static final UnicodeBlock PHONETIC_EXTENSIONS 1646 = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID); 1647 1648 /** 1649 */ 1650 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS 1651 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1652 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID); 1653 /** 1654 */ 1655 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS 1656 = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID); 1657 /** 1658 */ 1659 public static final UnicodeBlock LINEAR_B_SYLLABARY 1660 = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID); 1661 /** 1662 */ 1663 public static final UnicodeBlock LINEAR_B_IDEOGRAMS 1664 = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID); 1665 /** 1666 */ 1667 public static final UnicodeBlock AEGEAN_NUMBERS 1668 = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID); 1669 /** 1670 */ 1671 public static final UnicodeBlock UGARITIC 1672 = new UnicodeBlock("UGARITIC", UGARITIC_ID); 1673 /** 1674 */ 1675 public static final UnicodeBlock SHAVIAN 1676 = new UnicodeBlock("SHAVIAN", SHAVIAN_ID); 1677 /** 1678 */ 1679 public static final UnicodeBlock OSMANYA 1680 = new UnicodeBlock("OSMANYA", OSMANYA_ID); 1681 /** 1682 */ 1683 public static final UnicodeBlock CYPRIOT_SYLLABARY 1684 = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID); 1685 /** 1686 */ 1687 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS 1688 = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID); 1689 1690 /** 1691 */ 1692 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT 1693 = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID); 1694 1695 /* New blocks in Unicode 4.1 */ 1696 1697 /** 1698 */ 1699 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 1700 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 1701 ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/ 1702 1703 /** 1704 */ 1705 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 1706 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/ 1707 1708 /** 1709 */ 1710 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1711 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/ 1712 1713 /** 1714 */ 1715 public static final UnicodeBlock BUGINESE = 1716 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/ 1717 1718 /** 1719 */ 1720 public static final UnicodeBlock CJK_STROKES = 1721 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/ 1722 1723 /** 1724 */ 1725 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1726 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1727 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/ 1728 1729 /** 1730 */ 1731 public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/ 1732 1733 /** 1734 */ 1735 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1736 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/ 1737 1738 /** 1739 */ 1740 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1741 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/ 1742 1743 /** 1744 */ 1745 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1746 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/ 1747 1748 /** 1749 */ 1750 public static final UnicodeBlock GLAGOLITIC = 1751 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/ 1752 1753 /** 1754 */ 1755 public static final UnicodeBlock KHAROSHTHI = 1756 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/ 1757 1758 /** 1759 */ 1760 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 1761 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/ 1762 1763 /** 1764 */ 1765 public static final UnicodeBlock NEW_TAI_LUE = 1766 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/ 1767 1768 /** 1769 */ 1770 public static final UnicodeBlock OLD_PERSIAN = 1771 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/ 1772 1773 /** 1774 */ 1775 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1776 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1777 PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/ 1778 1779 /** 1780 */ 1781 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 1782 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/ 1783 1784 /** 1785 */ 1786 public static final UnicodeBlock SYLOTI_NAGRI = 1787 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/ 1788 1789 /** 1790 */ 1791 public static final UnicodeBlock TIFINAGH = 1792 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/ 1793 1794 /** 1795 */ 1796 public static final UnicodeBlock VERTICAL_FORMS = 1797 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/ 1798 1799 /** 1800 */ 1801 public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/ 1802 /** 1803 */ 1804 public static final UnicodeBlock BALINESE = 1805 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/ 1806 /** 1807 */ 1808 public static final UnicodeBlock LATIN_EXTENDED_C = 1809 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/ 1810 /** 1811 */ 1812 public static final UnicodeBlock LATIN_EXTENDED_D = 1813 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/ 1814 /** 1815 */ 1816 public static final UnicodeBlock PHAGS_PA = 1817 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/ 1818 /** 1819 */ 1820 public static final UnicodeBlock PHOENICIAN = 1821 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/ 1822 /** 1823 */ 1824 public static final UnicodeBlock CUNEIFORM = 1825 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/ 1826 /** 1827 */ 1828 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 1829 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 1830 CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/ 1831 /** 1832 */ 1833 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 1834 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/ 1835 1836 /** 1837 */ 1838 public static final UnicodeBlock SUNDANESE = 1839 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */ 1840 1841 /** 1842 */ 1843 public static final UnicodeBlock LEPCHA = 1844 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */ 1845 1846 /** 1847 */ 1848 public static final UnicodeBlock OL_CHIKI = 1849 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */ 1850 1851 /** 1852 */ 1853 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 1854 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */ 1855 1856 /** 1857 */ 1858 public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */ 1859 1860 /** 1861 */ 1862 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 1863 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */ 1864 1865 /** 1866 */ 1867 public static final UnicodeBlock SAURASHTRA = 1868 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */ 1869 1870 /** 1871 */ 1872 public static final UnicodeBlock KAYAH_LI = 1873 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */ 1874 1875 /** 1876 */ 1877 public static final UnicodeBlock REJANG = 1878 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */ 1879 1880 /** 1881 */ 1882 public static final UnicodeBlock CHAM = 1883 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */ 1884 1885 /** 1886 */ 1887 public static final UnicodeBlock ANCIENT_SYMBOLS = 1888 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */ 1889 1890 /** 1891 */ 1892 public static final UnicodeBlock PHAISTOS_DISC = 1893 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */ 1894 1895 /** 1896 */ 1897 public static final UnicodeBlock LYCIAN = 1898 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */ 1899 1900 /** 1901 */ 1902 public static final UnicodeBlock CARIAN = 1903 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */ 1904 1905 /** 1906 */ 1907 public static final UnicodeBlock LYDIAN = 1908 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */ 1909 1910 /** 1911 */ 1912 public static final UnicodeBlock MAHJONG_TILES = 1913 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */ 1914 1915 /** 1916 */ 1917 public static final UnicodeBlock DOMINO_TILES = 1918 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */ 1919 1920 /* New blocks in Unicode 5.2 */ 1921 1922 /***/ 1923 public static final UnicodeBlock SAMARITAN = 1924 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/ 1925 /***/ 1926 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1927 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1928 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/ 1929 /***/ 1930 public static final UnicodeBlock TAI_THAM = 1931 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/ 1932 /***/ 1933 public static final UnicodeBlock VEDIC_EXTENSIONS = 1934 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/ 1935 /***/ 1936 public static final UnicodeBlock LISU = 1937 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/ 1938 /***/ 1939 public static final UnicodeBlock BAMUM = 1940 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/ 1941 /***/ 1942 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 1943 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/ 1944 /***/ 1945 public static final UnicodeBlock DEVANAGARI_EXTENDED = 1946 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/ 1947 /***/ 1948 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 1949 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/ 1950 /***/ 1951 public static final UnicodeBlock JAVANESE = 1952 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/ 1953 /***/ 1954 public static final UnicodeBlock MYANMAR_EXTENDED_A = 1955 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/ 1956 /***/ 1957 public static final UnicodeBlock TAI_VIET = 1958 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/ 1959 /***/ 1960 public static final UnicodeBlock MEETEI_MAYEK = 1961 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/ 1962 /***/ 1963 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 1964 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/ 1965 /***/ 1966 public static final UnicodeBlock IMPERIAL_ARAMAIC = 1967 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/ 1968 /***/ 1969 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 1970 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/ 1971 /***/ 1972 public static final UnicodeBlock AVESTAN = 1973 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/ 1974 /***/ 1975 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 1976 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/ 1977 /***/ 1978 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 1979 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/ 1980 /***/ 1981 public static final UnicodeBlock OLD_TURKIC = 1982 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/ 1983 /***/ 1984 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 1985 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/ 1986 /***/ 1987 public static final UnicodeBlock KAITHI = 1988 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/ 1989 /***/ 1990 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 1991 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/ 1992 /***/ 1993 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 1994 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 1995 ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/ 1996 /***/ 1997 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 1998 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 1999 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/ 2000 /***/ 2001 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2002 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2003 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/ 2004 2005 /* New blocks in Unicode 6.0 */ 2006 2007 /***/ 2008 public static final UnicodeBlock MANDAIC = 2009 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/ 2010 /***/ 2011 public static final UnicodeBlock BATAK = 2012 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/ 2013 /***/ 2014 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2015 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/ 2016 /***/ 2017 public static final UnicodeBlock BRAHMI = 2018 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/ 2019 /***/ 2020 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2021 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/ 2022 /***/ 2023 public static final UnicodeBlock KANA_SUPPLEMENT = 2024 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/ 2025 /***/ 2026 public static final UnicodeBlock PLAYING_CARDS = 2027 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/ 2028 /***/ 2029 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2030 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2031 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/ 2032 /***/ 2033 public static final UnicodeBlock EMOTICONS = 2034 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/ 2035 /***/ 2036 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2037 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/ 2038 /***/ 2039 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2040 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/ 2041 /***/ 2042 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2043 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2044 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/ 2045 2046 /* New blocks in Unicode 6.1 */ 2047 2048 /***/ 2049 public static final UnicodeBlock ARABIC_EXTENDED_A = 2050 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/ 2051 /***/ 2052 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2053 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/ 2054 /***/ 2055 public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/ 2056 /***/ 2057 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2058 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/ 2059 /***/ 2060 public static final UnicodeBlock MEROITIC_CURSIVE = 2061 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/ 2062 /***/ 2063 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2064 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/ 2065 /***/ 2066 public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/ 2067 /***/ 2068 public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/ 2069 /***/ 2070 public static final UnicodeBlock SORA_SOMPENG = 2071 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/ 2072 /***/ 2073 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2074 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/ 2075 /***/ 2076 public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/ 2077 2078 /* New blocks in Unicode 7.0 */ 2079 2080 /***/ 2081 public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/ 2082 /***/ 2083 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2084 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/ 2085 /***/ 2086 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2087 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/ 2088 /***/ 2089 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2090 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/ 2091 /***/ 2092 public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/ 2093 /***/ 2094 public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/ 2095 /***/ 2096 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2097 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/ 2098 /***/ 2099 public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/ 2100 /***/ 2101 public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/ 2102 /***/ 2103 public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/ 2104 /***/ 2105 public static final UnicodeBlock LATIN_EXTENDED_E = 2106 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/ 2107 /***/ 2108 public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/ 2109 /***/ 2110 public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/ 2111 /***/ 2112 public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/ 2113 /***/ 2114 public static final UnicodeBlock MENDE_KIKAKUI = 2115 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/ 2116 /***/ 2117 public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/ 2118 /***/ 2119 public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/ 2120 /***/ 2121 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2122 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/ 2123 /***/ 2124 public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/ 2125 /***/ 2126 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2127 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/ 2128 /***/ 2129 public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/ 2130 /***/ 2131 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2132 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/ 2133 /***/ 2134 public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/ 2135 /***/ 2136 public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/ 2137 /***/ 2138 public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/ 2139 /***/ 2140 public static final UnicodeBlock PSALTER_PAHLAVI = 2141 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/ 2142 /***/ 2143 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2144 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/ 2145 /***/ 2146 public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/ 2147 /***/ 2148 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2149 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/ 2150 /***/ 2151 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2152 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/ 2153 /***/ 2154 public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/ 2155 /***/ 2156 public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/ 2157 2158 /* New blocks in Unicode 8.0 */ 2159 2160 /***/ 2161 public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/ 2162 /***/ 2163 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2164 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/ 2165 /***/ 2166 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2167 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/ 2168 /***/ 2169 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 2170 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 2171 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/ 2172 /***/ 2173 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2174 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/ 2175 /***/ 2176 public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/ 2177 /***/ 2178 public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/ 2179 /***/ 2180 public static final UnicodeBlock OLD_HUNGARIAN = 2181 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/ 2182 /***/ 2183 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 2184 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 2185 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/ 2186 /***/ 2187 public static final UnicodeBlock SUTTON_SIGNWRITING = 2188 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/ 2189 2190 /* New blocks in Unicode 9.0 */ 2191 2192 /***/ 2193 public static final UnicodeBlock ADLAM = new UnicodeBlock("ADLAM", ADLAM_ID); /*[1E900]*/ 2194 /***/ 2195 public static final UnicodeBlock BHAIKSUKI = new UnicodeBlock("BHAIKSUKI", BHAIKSUKI_ID); /*[11C00]*/ 2196 /***/ 2197 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 2198 new UnicodeBlock("CYRILLIC_EXTENDED_C", CYRILLIC_EXTENDED_C_ID); /*[1C80]*/ 2199 /***/ 2200 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 2201 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", GLAGOLITIC_SUPPLEMENT_ID); /*[1E000]*/ 2202 /***/ 2203 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 2204 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID); /*[16FE0]*/ 2205 /***/ 2206 public static final UnicodeBlock MARCHEN = new UnicodeBlock("MARCHEN", MARCHEN_ID); /*[11C70]*/ 2207 /***/ 2208 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 2209 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", MONGOLIAN_SUPPLEMENT_ID); /*[11660]*/ 2210 /***/ 2211 public static final UnicodeBlock NEWA = new UnicodeBlock("NEWA", NEWA_ID); /*[11400]*/ 2212 /***/ 2213 public static final UnicodeBlock OSAGE = new UnicodeBlock("OSAGE", OSAGE_ID); /*[104B0]*/ 2214 /***/ 2215 public static final UnicodeBlock TANGUT = new UnicodeBlock("TANGUT", TANGUT_ID); /*[17000]*/ 2216 /***/ 2217 public static final UnicodeBlock TANGUT_COMPONENTS = 2218 new UnicodeBlock("TANGUT_COMPONENTS", TANGUT_COMPONENTS_ID); /*[18800]*/ 2219 2220 // New blocks in Unicode 10.0 2221 2222 /***/ 2223 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 2224 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID); /*[2CEB0]*/ 2225 /***/ 2226 public static final UnicodeBlock KANA_EXTENDED_A = 2227 new UnicodeBlock("KANA_EXTENDED_A", KANA_EXTENDED_A_ID); /*[1B100]*/ 2228 /***/ 2229 public static final UnicodeBlock MASARAM_GONDI = 2230 new UnicodeBlock("MASARAM_GONDI", MASARAM_GONDI_ID); /*[11D00]*/ 2231 /***/ 2232 public static final UnicodeBlock NUSHU = new UnicodeBlock("NUSHU", NUSHU_ID); /*[1B170]*/ 2233 /***/ 2234 public static final UnicodeBlock SOYOMBO = new UnicodeBlock("SOYOMBO", SOYOMBO_ID); /*[11A50]*/ 2235 /***/ 2236 public static final UnicodeBlock SYRIAC_SUPPLEMENT = 2237 new UnicodeBlock("SYRIAC_SUPPLEMENT", SYRIAC_SUPPLEMENT_ID); /*[0860]*/ 2238 /***/ 2239 public static final UnicodeBlock ZANABAZAR_SQUARE = 2240 new UnicodeBlock("ZANABAZAR_SQUARE", ZANABAZAR_SQUARE_ID); /*[11A00]*/ 2241 2242 // New blocks in Unicode 11.0 2243 2244 /***/ 2245 public static final UnicodeBlock CHESS_SYMBOLS = 2246 new UnicodeBlock("CHESS_SYMBOLS", CHESS_SYMBOLS_ID); /*[1FA00]*/ 2247 /***/ 2248 public static final UnicodeBlock DOGRA = new UnicodeBlock("DOGRA", DOGRA_ID); /*[11800]*/ 2249 /***/ 2250 public static final UnicodeBlock GEORGIAN_EXTENDED = 2251 new UnicodeBlock("GEORGIAN_EXTENDED", GEORGIAN_EXTENDED_ID); /*[1C90]*/ 2252 /***/ 2253 public static final UnicodeBlock GUNJALA_GONDI = 2254 new UnicodeBlock("GUNJALA_GONDI", GUNJALA_GONDI_ID); /*[11D60]*/ 2255 /***/ 2256 public static final UnicodeBlock HANIFI_ROHINGYA = 2257 new UnicodeBlock("HANIFI_ROHINGYA", HANIFI_ROHINGYA_ID); /*[10D00]*/ 2258 /***/ 2259 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS = 2260 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", INDIC_SIYAQ_NUMBERS_ID); /*[1EC70]*/ 2261 /***/ 2262 public static final UnicodeBlock MAKASAR = new UnicodeBlock("MAKASAR", MAKASAR_ID); /*[11EE0]*/ 2263 /***/ 2264 public static final UnicodeBlock MAYAN_NUMERALS = 2265 new UnicodeBlock("MAYAN_NUMERALS", MAYAN_NUMERALS_ID); /*[1D2E0]*/ 2266 /***/ 2267 public static final UnicodeBlock MEDEFAIDRIN = 2268 new UnicodeBlock("MEDEFAIDRIN", MEDEFAIDRIN_ID); /*[16E40]*/ 2269 /***/ 2270 public static final UnicodeBlock OLD_SOGDIAN = 2271 new UnicodeBlock("OLD_SOGDIAN", OLD_SOGDIAN_ID); /*[10F00]*/ 2272 /***/ 2273 public static final UnicodeBlock SOGDIAN = new UnicodeBlock("SOGDIAN", SOGDIAN_ID); /*[10F30]*/ 2274 2275 // New blocks in Unicode 12.0 2276 2277 /***/ 2278 public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 2279 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID); /*[13430]*/ 2280 /***/ 2281 public static final UnicodeBlock ELYMAIC = new UnicodeBlock("ELYMAIC", ELYMAIC_ID); /*[10FE0]*/ 2282 /***/ 2283 public static final UnicodeBlock NANDINAGARI = 2284 new UnicodeBlock("NANDINAGARI", NANDINAGARI_ID); /*[119A0]*/ 2285 /***/ 2286 public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG = 2287 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", NYIAKENG_PUACHUE_HMONG_ID); /*[1E100]*/ 2288 /***/ 2289 public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS = 2290 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", OTTOMAN_SIYAQ_NUMBERS_ID); /*[1ED00]*/ 2291 /***/ 2292 public static final UnicodeBlock SMALL_KANA_EXTENSION = 2293 new UnicodeBlock("SMALL_KANA_EXTENSION", SMALL_KANA_EXTENSION_ID); /*[1B130]*/ 2294 /***/ 2295 public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 2296 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID); /*[1FA70]*/ 2297 /***/ 2298 public static final UnicodeBlock TAMIL_SUPPLEMENT = 2299 new UnicodeBlock("TAMIL_SUPPLEMENT", TAMIL_SUPPLEMENT_ID); /*[11FC0]*/ 2300 /***/ 2301 public static final UnicodeBlock WANCHO = new UnicodeBlock("WANCHO", WANCHO_ID); /*[1E2C0]*/ 2302 2303 // New blocks in Unicode 13.0 2304 2305 /***/ 2306 public static final UnicodeBlock CHORASMIAN = 2307 new UnicodeBlock("CHORASMIAN", CHORASMIAN_ID); /*[10FB0]*/ 2308 /***/ 2309 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 2310 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G", 2311 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID); /*[30000]*/ 2312 /***/ 2313 public static final UnicodeBlock DIVES_AKURU = 2314 new UnicodeBlock("DIVES_AKURU", DIVES_AKURU_ID); /*[11900]*/ 2315 /***/ 2316 public static final UnicodeBlock KHITAN_SMALL_SCRIPT = 2317 new UnicodeBlock("KHITAN_SMALL_SCRIPT", KHITAN_SMALL_SCRIPT_ID); /*[18B00]*/ 2318 /***/ 2319 public static final UnicodeBlock LISU_SUPPLEMENT = 2320 new UnicodeBlock("LISU_SUPPLEMENT", LISU_SUPPLEMENT_ID); /*[11FB0]*/ 2321 /***/ 2322 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING = 2323 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING", SYMBOLS_FOR_LEGACY_COMPUTING_ID); /*[1FB00]*/ 2324 /***/ 2325 public static final UnicodeBlock TANGUT_SUPPLEMENT = 2326 new UnicodeBlock("TANGUT_SUPPLEMENT", TANGUT_SUPPLEMENT_ID); /*[18D00]*/ 2327 /***/ 2328 public static final UnicodeBlock YEZIDI = new UnicodeBlock("YEZIDI", YEZIDI_ID); /*[10E80]*/ 2329 2330 // New blocks in Unicode 14.0 2331 2332 /***/ 2333 public static final UnicodeBlock ARABIC_EXTENDED_B = 2334 new UnicodeBlock("ARABIC_EXTENDED_B", ARABIC_EXTENDED_B_ID); /*[0870]*/ 2335 /***/ 2336 public static final UnicodeBlock CYPRO_MINOAN = 2337 new UnicodeBlock("CYPRO_MINOAN", CYPRO_MINOAN_ID); /*[12F90]*/ 2338 /***/ 2339 public static final UnicodeBlock ETHIOPIC_EXTENDED_B = 2340 new UnicodeBlock("ETHIOPIC_EXTENDED_B", ETHIOPIC_EXTENDED_B_ID); /*[1E7E0]*/ 2341 /***/ 2342 public static final UnicodeBlock KANA_EXTENDED_B = 2343 new UnicodeBlock("KANA_EXTENDED_B", KANA_EXTENDED_B_ID); /*[1AFF0]*/ 2344 /***/ 2345 public static final UnicodeBlock LATIN_EXTENDED_F = 2346 new UnicodeBlock("LATIN_EXTENDED_F", LATIN_EXTENDED_F_ID); /*[10780]*/ 2347 /***/ 2348 public static final UnicodeBlock LATIN_EXTENDED_G = 2349 new UnicodeBlock("LATIN_EXTENDED_G", LATIN_EXTENDED_G_ID); /*[1DF00]*/ 2350 /***/ 2351 public static final UnicodeBlock OLD_UYGHUR = 2352 new UnicodeBlock("OLD_UYGHUR", OLD_UYGHUR_ID); /*[10F70]*/ 2353 /***/ 2354 public static final UnicodeBlock TANGSA = new UnicodeBlock("TANGSA", TANGSA_ID); /*[16A70]*/ 2355 /***/ 2356 public static final UnicodeBlock TOTO = new UnicodeBlock("TOTO", TOTO_ID); /*[1E290]*/ 2357 /***/ 2358 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A = 2359 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A", 2360 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A_ID); /*[11AB0]*/ 2361 /***/ 2362 public static final UnicodeBlock VITHKUQI = 2363 new UnicodeBlock("VITHKUQI", VITHKUQI_ID); /*[10570]*/ 2364 /***/ 2365 public static final UnicodeBlock ZNAMENNY_MUSICAL_NOTATION = 2366 new UnicodeBlock("ZNAMENNY_MUSICAL_NOTATION", 2367 ZNAMENNY_MUSICAL_NOTATION_ID); /*[1CF00]*/ 2368 2369 // New blocks in Unicode 15.0 2370 2371 /***/ 2372 public static final UnicodeBlock ARABIC_EXTENDED_C = 2373 new UnicodeBlock("ARABIC_EXTENDED_C", ARABIC_EXTENDED_C_ID); /*[10EC0]*/ 2374 /***/ 2375 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H = 2376 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H", 2377 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H_ID); /*[31350]*/ 2378 /***/ 2379 public static final UnicodeBlock CYRILLIC_EXTENDED_D = 2380 new UnicodeBlock("CYRILLIC_EXTENDED_D", CYRILLIC_EXTENDED_D_ID); /*[1E030]*/ 2381 /***/ 2382 public static final UnicodeBlock DEVANAGARI_EXTENDED_A = 2383 new UnicodeBlock("DEVANAGARI_EXTENDED_A", DEVANAGARI_EXTENDED_A_ID); /*[11B00]*/ 2384 /***/ 2385 public static final UnicodeBlock KAKTOVIK_NUMERALS = 2386 new UnicodeBlock("KAKTOVIK_NUMERALS", KAKTOVIK_NUMERALS_ID); /*[1D2C0]*/ 2387 /***/ 2388 public static final UnicodeBlock KAWI = new UnicodeBlock("KAWI", KAWI_ID); /*[11F00]*/ 2389 /***/ 2390 public static final UnicodeBlock NAG_MUNDARI = 2391 new UnicodeBlock("NAG_MUNDARI", NAG_MUNDARI_ID); /*[1E4D0]*/ 2392 2393 // New block in Unicode 15.1 2394 2395 /***/ 2396 @android.annotation.FlaggedApi(com.android.icu.Flags.FLAG_ICU_V_API) 2397 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I = 2398 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I", 2399 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I_ID); /*[2EBF0]*/ 2400 2401 /** 2402 */ 2403 public static final UnicodeBlock INVALID_CODE 2404 = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID); 2405 2406 static { 2407 for (int blockId = 0; blockId < COUNT; ++blockId) { 2408 if (BLOCKS_[blockId] == null) { 2409 throw new java.lang.IllegalStateException( 2410 "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized"); 2411 } 2412 } 2413 } 2414 2415 // public methods -------------------------------------------------- 2416 2417 /** 2418 * <strong>[icu]</strong> Returns the only instance of the UnicodeBlock with the argument ID. 2419 * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned. 2420 * @param id UnicodeBlock ID 2421 * @return the only instance of the UnicodeBlock with the argument ID 2422 * if it exists, otherwise a INVALID_CODE UnicodeBlock will be 2423 * returned. 2424 */ getInstance(int id)2425 public static UnicodeBlock getInstance(int id) 2426 { 2427 if (id >= 0 && id < BLOCKS_.length) { 2428 return BLOCKS_[id]; 2429 } 2430 return INVALID_CODE; 2431 } 2432 2433 /** 2434 * Returns the Unicode allocation block that contains the code point, 2435 * or null if the code point is not a member of a defined block. 2436 * @param ch code point to be tested 2437 * @return the Unicode allocation block that contains the code point 2438 */ of(int ch)2439 public static UnicodeBlock of(int ch) 2440 { 2441 if (ch > MAX_VALUE) { 2442 return INVALID_CODE; 2443 } 2444 2445 return UnicodeBlock.getInstance( 2446 UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK)); 2447 } 2448 2449 /** 2450 * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method. 2451 * Returns the Unicode block with the given name. <strong>[icu] Note:</strong> Unlike 2452 * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches 2453 * against the official UCD name and the Java block name 2454 * (ignoring case). 2455 * @param blockName the name of the block to match 2456 * @return the UnicodeBlock with that name 2457 * @throws IllegalArgumentException if the blockName could not be matched 2458 */ forName(String blockName)2459 public static final UnicodeBlock forName(String blockName) { 2460 Map<String, UnicodeBlock> m = null; 2461 if (mref != null) { 2462 m = mref.get(); 2463 } 2464 if (m == null) { 2465 m = new HashMap<>(BLOCKS_.length); 2466 for (int i = 0; i < BLOCKS_.length; ++i) { 2467 UnicodeBlock b = BLOCKS_[i]; 2468 String name = trimBlockName( 2469 getPropertyValueName(UProperty.BLOCK, b.getID(), 2470 UProperty.NameChoice.LONG)); 2471 m.put(name, b); 2472 } 2473 mref = new SoftReference<>(m); 2474 } 2475 UnicodeBlock b = m.get(trimBlockName(blockName)); 2476 if (b == null) { 2477 throw new IllegalArgumentException(); 2478 } 2479 return b; 2480 } 2481 private static SoftReference<Map<String, UnicodeBlock>> mref; 2482 trimBlockName(String name)2483 private static String trimBlockName(String name) { 2484 String upper = name.toUpperCase(Locale.ENGLISH); 2485 StringBuilder result = new StringBuilder(upper.length()); 2486 for (int i = 0; i < upper.length(); i++) { 2487 char c = upper.charAt(i); 2488 if (c != ' ' && c != '_' && c != '-') { 2489 result.append(c); 2490 } 2491 } 2492 return result.toString(); 2493 } 2494 2495 /** 2496 * {icu} Returns the type ID of this Unicode block 2497 * @return integer type ID of this Unicode block 2498 */ getID()2499 public int getID() 2500 { 2501 return m_id_; 2502 } 2503 2504 // private data members --------------------------------------------- 2505 2506 /** 2507 * Identification code for this UnicodeBlock 2508 */ 2509 private int m_id_; 2510 2511 // private constructor ---------------------------------------------- 2512 2513 /** 2514 * UnicodeBlock constructor 2515 * @param name name of this UnicodeBlock 2516 * @param id unique id of this UnicodeBlock 2517 * @exception NullPointerException if name is <code>null</code> 2518 */ UnicodeBlock(String name, int id)2519 private UnicodeBlock(String name, int id) 2520 { 2521 super(name); 2522 m_id_ = id; 2523 if (id >= 0) { 2524 BLOCKS_[id] = this; 2525 } 2526 } 2527 } 2528 2529 /** 2530 * East Asian Width constants. 2531 * @see UProperty#EAST_ASIAN_WIDTH 2532 * @see UCharacter#getIntPropertyValue 2533 */ 2534 public static interface EastAsianWidth 2535 { 2536 /** 2537 */ 2538 public static final int NEUTRAL = 0; 2539 /** 2540 */ 2541 public static final int AMBIGUOUS = 1; 2542 /** 2543 */ 2544 public static final int HALFWIDTH = 2; 2545 /** 2546 */ 2547 public static final int FULLWIDTH = 3; 2548 /** 2549 */ 2550 public static final int NARROW = 4; 2551 /** 2552 */ 2553 public static final int WIDE = 5; 2554 /** 2555 * One more than the highest normal EastAsianWidth value. 2556 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH). 2557 * 2558 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2559 * @hide unsupported on Android 2560 */ 2561 @Deprecated 2562 public static final int COUNT = 6; 2563 } 2564 2565 /** 2566 * Decomposition Type constants. 2567 * @see UProperty#DECOMPOSITION_TYPE 2568 */ 2569 public static interface DecompositionType 2570 { 2571 /** 2572 */ 2573 public static final int NONE = 0; 2574 /** 2575 */ 2576 public static final int CANONICAL = 1; 2577 /** 2578 */ 2579 public static final int COMPAT = 2; 2580 /** 2581 */ 2582 public static final int CIRCLE = 3; 2583 /** 2584 */ 2585 public static final int FINAL = 4; 2586 /** 2587 */ 2588 public static final int FONT = 5; 2589 /** 2590 */ 2591 public static final int FRACTION = 6; 2592 /** 2593 */ 2594 public static final int INITIAL = 7; 2595 /** 2596 */ 2597 public static final int ISOLATED = 8; 2598 /** 2599 */ 2600 public static final int MEDIAL = 9; 2601 /** 2602 */ 2603 public static final int NARROW = 10; 2604 /** 2605 */ 2606 public static final int NOBREAK = 11; 2607 /** 2608 */ 2609 public static final int SMALL = 12; 2610 /** 2611 */ 2612 public static final int SQUARE = 13; 2613 /** 2614 */ 2615 public static final int SUB = 14; 2616 /** 2617 */ 2618 public static final int SUPER = 15; 2619 /** 2620 */ 2621 public static final int VERTICAL = 16; 2622 /** 2623 */ 2624 public static final int WIDE = 17; 2625 /** 2626 * One more than the highest normal DecompositionType value. 2627 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE). 2628 * 2629 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2630 * @hide unsupported on Android 2631 */ 2632 @Deprecated 2633 public static final int COUNT = 18; 2634 } 2635 2636 /** 2637 * Joining Type constants. 2638 * @see UProperty#JOINING_TYPE 2639 */ 2640 public static interface JoiningType 2641 { 2642 /** 2643 */ 2644 public static final int NON_JOINING = 0; 2645 /** 2646 */ 2647 public static final int JOIN_CAUSING = 1; 2648 /** 2649 */ 2650 public static final int DUAL_JOINING = 2; 2651 /** 2652 */ 2653 public static final int LEFT_JOINING = 3; 2654 /** 2655 */ 2656 public static final int RIGHT_JOINING = 4; 2657 /** 2658 */ 2659 public static final int TRANSPARENT = 5; 2660 /** 2661 * One more than the highest normal JoiningType value. 2662 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE). 2663 * 2664 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2665 * @hide unsupported on Android 2666 */ 2667 @Deprecated 2668 public static final int COUNT = 6; 2669 } 2670 2671 /** 2672 * Joining Group constants. 2673 * @see UProperty#JOINING_GROUP 2674 */ 2675 public static interface JoiningGroup 2676 { 2677 /** 2678 */ 2679 public static final int NO_JOINING_GROUP = 0; 2680 /** 2681 */ 2682 public static final int AIN = 1; 2683 /** 2684 */ 2685 public static final int ALAPH = 2; 2686 /** 2687 */ 2688 public static final int ALEF = 3; 2689 /** 2690 */ 2691 public static final int BEH = 4; 2692 /** 2693 */ 2694 public static final int BETH = 5; 2695 /** 2696 */ 2697 public static final int DAL = 6; 2698 /** 2699 */ 2700 public static final int DALATH_RISH = 7; 2701 /** 2702 */ 2703 public static final int E = 8; 2704 /** 2705 */ 2706 public static final int FEH = 9; 2707 /** 2708 */ 2709 public static final int FINAL_SEMKATH = 10; 2710 /** 2711 */ 2712 public static final int GAF = 11; 2713 /** 2714 */ 2715 public static final int GAMAL = 12; 2716 /** 2717 */ 2718 public static final int HAH = 13; 2719 /***/ 2720 public static final int TEH_MARBUTA_GOAL = 14; 2721 /** 2722 */ 2723 public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL; 2724 /** 2725 */ 2726 public static final int HE = 15; 2727 /** 2728 */ 2729 public static final int HEH = 16; 2730 /** 2731 */ 2732 public static final int HEH_GOAL = 17; 2733 /** 2734 */ 2735 public static final int HETH = 18; 2736 /** 2737 */ 2738 public static final int KAF = 19; 2739 /** 2740 */ 2741 public static final int KAPH = 20; 2742 /** 2743 */ 2744 public static final int KNOTTED_HEH = 21; 2745 /** 2746 */ 2747 public static final int LAM = 22; 2748 /** 2749 */ 2750 public static final int LAMADH = 23; 2751 /** 2752 */ 2753 public static final int MEEM = 24; 2754 /** 2755 */ 2756 public static final int MIM = 25; 2757 /** 2758 */ 2759 public static final int NOON = 26; 2760 /** 2761 */ 2762 public static final int NUN = 27; 2763 /** 2764 */ 2765 public static final int PE = 28; 2766 /** 2767 */ 2768 public static final int QAF = 29; 2769 /** 2770 */ 2771 public static final int QAPH = 30; 2772 /** 2773 */ 2774 public static final int REH = 31; 2775 /** 2776 */ 2777 public static final int REVERSED_PE = 32; 2778 /** 2779 */ 2780 public static final int SAD = 33; 2781 /** 2782 */ 2783 public static final int SADHE = 34; 2784 /** 2785 */ 2786 public static final int SEEN = 35; 2787 /** 2788 */ 2789 public static final int SEMKATH = 36; 2790 /** 2791 */ 2792 public static final int SHIN = 37; 2793 /** 2794 */ 2795 public static final int SWASH_KAF = 38; 2796 /** 2797 */ 2798 public static final int SYRIAC_WAW = 39; 2799 /** 2800 */ 2801 public static final int TAH = 40; 2802 /** 2803 */ 2804 public static final int TAW = 41; 2805 /** 2806 */ 2807 public static final int TEH_MARBUTA = 42; 2808 /** 2809 */ 2810 public static final int TETH = 43; 2811 /** 2812 */ 2813 public static final int WAW = 44; 2814 /** 2815 */ 2816 public static final int YEH = 45; 2817 /** 2818 */ 2819 public static final int YEH_BARREE = 46; 2820 /** 2821 */ 2822 public static final int YEH_WITH_TAIL = 47; 2823 /** 2824 */ 2825 public static final int YUDH = 48; 2826 /** 2827 */ 2828 public static final int YUDH_HE = 49; 2829 /** 2830 */ 2831 public static final int ZAIN = 50; 2832 /** 2833 */ 2834 public static final int FE = 51; 2835 /** 2836 */ 2837 public static final int KHAPH = 52; 2838 /** 2839 */ 2840 public static final int ZHAIN = 53; 2841 /** 2842 */ 2843 public static final int BURUSHASKI_YEH_BARREE = 54; 2844 /***/ 2845 public static final int FARSI_YEH = 55; 2846 /***/ 2847 public static final int NYA = 56; 2848 /***/ 2849 public static final int ROHINGYA_YEH = 57; 2850 2851 /***/ 2852 public static final int MANICHAEAN_ALEPH = 58; 2853 /***/ 2854 public static final int MANICHAEAN_AYIN = 59; 2855 /***/ 2856 public static final int MANICHAEAN_BETH = 60; 2857 /***/ 2858 public static final int MANICHAEAN_DALETH = 61; 2859 /***/ 2860 public static final int MANICHAEAN_DHAMEDH = 62; 2861 /***/ 2862 public static final int MANICHAEAN_FIVE = 63; 2863 /***/ 2864 public static final int MANICHAEAN_GIMEL = 64; 2865 /***/ 2866 public static final int MANICHAEAN_HETH = 65; 2867 /***/ 2868 public static final int MANICHAEAN_HUNDRED = 66; 2869 /***/ 2870 public static final int MANICHAEAN_KAPH = 67; 2871 /***/ 2872 public static final int MANICHAEAN_LAMEDH = 68; 2873 /***/ 2874 public static final int MANICHAEAN_MEM = 69; 2875 /***/ 2876 public static final int MANICHAEAN_NUN = 70; 2877 /***/ 2878 public static final int MANICHAEAN_ONE = 71; 2879 /***/ 2880 public static final int MANICHAEAN_PE = 72; 2881 /***/ 2882 public static final int MANICHAEAN_QOPH = 73; 2883 /***/ 2884 public static final int MANICHAEAN_RESH = 74; 2885 /***/ 2886 public static final int MANICHAEAN_SADHE = 75; 2887 /***/ 2888 public static final int MANICHAEAN_SAMEKH = 76; 2889 /***/ 2890 public static final int MANICHAEAN_TAW = 77; 2891 /***/ 2892 public static final int MANICHAEAN_TEN = 78; 2893 /***/ 2894 public static final int MANICHAEAN_TETH = 79; 2895 /***/ 2896 public static final int MANICHAEAN_THAMEDH = 80; 2897 /***/ 2898 public static final int MANICHAEAN_TWENTY = 81; 2899 /***/ 2900 public static final int MANICHAEAN_WAW = 82; 2901 /***/ 2902 public static final int MANICHAEAN_YODH = 83; 2903 /***/ 2904 public static final int MANICHAEAN_ZAYIN = 84; 2905 /***/ 2906 public static final int STRAIGHT_WAW = 85; 2907 2908 /***/ 2909 public static final int AFRICAN_FEH = 86; 2910 /***/ 2911 public static final int AFRICAN_NOON = 87; 2912 /***/ 2913 public static final int AFRICAN_QAF = 88; 2914 2915 /***/ 2916 public static final int MALAYALAM_BHA = 89; 2917 /***/ 2918 public static final int MALAYALAM_JA = 90; 2919 /***/ 2920 public static final int MALAYALAM_LLA = 91; 2921 /***/ 2922 public static final int MALAYALAM_LLLA = 92; 2923 /***/ 2924 public static final int MALAYALAM_NGA = 93; 2925 /***/ 2926 public static final int MALAYALAM_NNA = 94; 2927 /***/ 2928 public static final int MALAYALAM_NNNA = 95; 2929 /***/ 2930 public static final int MALAYALAM_NYA = 96; 2931 /***/ 2932 public static final int MALAYALAM_RA = 97; 2933 /***/ 2934 public static final int MALAYALAM_SSA = 98; 2935 /***/ 2936 public static final int MALAYALAM_TTA = 99; 2937 2938 /***/ 2939 public static final int HANIFI_ROHINGYA_KINNA_YA = 100; 2940 /***/ 2941 public static final int HANIFI_ROHINGYA_PA = 101; 2942 2943 /***/ 2944 public static final int THIN_YEH = 102; 2945 /***/ 2946 public static final int VERTICAL_TAIL = 103; 2947 2948 /** 2949 * One more than the highest normal JoiningGroup value. 2950 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JoiningGroup). 2951 * 2952 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2953 * @hide unsupported on Android 2954 */ 2955 @Deprecated 2956 public static final int COUNT = 104; 2957 } 2958 2959 /** 2960 * Grapheme Cluster Break constants. 2961 * @see UProperty#GRAPHEME_CLUSTER_BREAK 2962 */ 2963 public static interface GraphemeClusterBreak { 2964 /** 2965 */ 2966 public static final int OTHER = 0; 2967 /** 2968 */ 2969 public static final int CONTROL = 1; 2970 /** 2971 */ 2972 public static final int CR = 2; 2973 /** 2974 */ 2975 public static final int EXTEND = 3; 2976 /** 2977 */ 2978 public static final int L = 4; 2979 /** 2980 */ 2981 public static final int LF = 5; 2982 /** 2983 */ 2984 public static final int LV = 6; 2985 /** 2986 */ 2987 public static final int LVT = 7; 2988 /** 2989 */ 2990 public static final int T = 8; 2991 /** 2992 */ 2993 public static final int V = 9; 2994 /** 2995 */ 2996 public static final int SPACING_MARK = 10; 2997 /** 2998 */ 2999 public static final int PREPEND = 11; 3000 /***/ 3001 public static final int REGIONAL_INDICATOR = 12; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3002 /***/ 3003 public static final int E_BASE = 13; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3004 /***/ 3005 public static final int E_BASE_GAZ = 14; /*[EBG]*/ 3006 /***/ 3007 public static final int E_MODIFIER = 15; /*[EM]*/ 3008 /***/ 3009 public static final int GLUE_AFTER_ZWJ = 16; /*[GAZ]*/ 3010 /***/ 3011 public static final int ZWJ = 17; /*[ZWJ]*/ 3012 3013 /** 3014 * One more than the highest normal GraphemeClusterBreak value. 3015 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK). 3016 * 3017 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3018 * @hide unsupported on Android 3019 */ 3020 @Deprecated 3021 public static final int COUNT = 18; 3022 } 3023 3024 /** 3025 * Word Break constants. 3026 * @see UProperty#WORD_BREAK 3027 */ 3028 public static interface WordBreak { 3029 /** 3030 */ 3031 public static final int OTHER = 0; 3032 /** 3033 */ 3034 public static final int ALETTER = 1; 3035 /** 3036 */ 3037 public static final int FORMAT = 2; 3038 /** 3039 */ 3040 public static final int KATAKANA = 3; 3041 /** 3042 */ 3043 public static final int MIDLETTER = 4; 3044 /** 3045 */ 3046 public static final int MIDNUM = 5; 3047 /** 3048 */ 3049 public static final int NUMERIC = 6; 3050 /** 3051 */ 3052 public static final int EXTENDNUMLET = 7; 3053 /** 3054 */ 3055 public static final int CR = 8; 3056 /** 3057 */ 3058 public static final int EXTEND = 9; 3059 /** 3060 */ 3061 public static final int LF = 10; 3062 /** 3063 */ 3064 public static final int MIDNUMLET = 11; 3065 /** 3066 */ 3067 public static final int NEWLINE = 12; 3068 /***/ 3069 public static final int REGIONAL_INDICATOR = 13; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3070 /***/ 3071 public static final int HEBREW_LETTER = 14; /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ 3072 /***/ 3073 public static final int SINGLE_QUOTE = 15; /*[SQ]*/ 3074 /***/ 3075 public static final int DOUBLE_QUOTE = 16; /*[DQ]*/ 3076 /***/ 3077 public static final int E_BASE = 17; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3078 /***/ 3079 public static final int E_BASE_GAZ = 18; /*[EBG]*/ 3080 /***/ 3081 public static final int E_MODIFIER = 19; /*[EM]*/ 3082 /***/ 3083 public static final int GLUE_AFTER_ZWJ = 20; /*[GAZ]*/ 3084 /***/ 3085 public static final int ZWJ = 21; /*[ZWJ]*/ 3086 /***/ 3087 public static final int WSEGSPACE = 22; /*[WSEGSPACE]*/ 3088 /** 3089 * One more than the highest normal WordBreak value. 3090 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK). 3091 * 3092 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3093 * @hide unsupported on Android 3094 */ 3095 @Deprecated 3096 public static final int COUNT = 23; 3097 } 3098 3099 /** 3100 * Sentence Break constants. 3101 * @see UProperty#SENTENCE_BREAK 3102 */ 3103 public static interface SentenceBreak { 3104 /** 3105 */ 3106 public static final int OTHER = 0; 3107 /** 3108 */ 3109 public static final int ATERM = 1; 3110 /** 3111 */ 3112 public static final int CLOSE = 2; 3113 /** 3114 */ 3115 public static final int FORMAT = 3; 3116 /** 3117 */ 3118 public static final int LOWER = 4; 3119 /** 3120 */ 3121 public static final int NUMERIC = 5; 3122 /** 3123 */ 3124 public static final int OLETTER = 6; 3125 /** 3126 */ 3127 public static final int SEP = 7; 3128 /** 3129 */ 3130 public static final int SP = 8; 3131 /** 3132 */ 3133 public static final int STERM = 9; 3134 /** 3135 */ 3136 public static final int UPPER = 10; 3137 /** 3138 */ 3139 public static final int CR = 11; 3140 /** 3141 */ 3142 public static final int EXTEND = 12; 3143 /** 3144 */ 3145 public static final int LF = 13; 3146 /** 3147 */ 3148 public static final int SCONTINUE = 14; 3149 /** 3150 * One more than the highest normal SentenceBreak value. 3151 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK). 3152 * 3153 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3154 * @hide unsupported on Android 3155 */ 3156 @Deprecated 3157 public static final int COUNT = 15; 3158 } 3159 3160 /** 3161 * Line Break constants. 3162 * @see UProperty#LINE_BREAK 3163 */ 3164 public static interface LineBreak 3165 { 3166 /** 3167 */ 3168 public static final int UNKNOWN = 0; 3169 /** 3170 */ 3171 public static final int AMBIGUOUS = 1; 3172 /** 3173 */ 3174 public static final int ALPHABETIC = 2; 3175 /** 3176 */ 3177 public static final int BREAK_BOTH = 3; 3178 /** 3179 */ 3180 public static final int BREAK_AFTER = 4; 3181 /** 3182 */ 3183 public static final int BREAK_BEFORE = 5; 3184 /** 3185 */ 3186 public static final int MANDATORY_BREAK = 6; 3187 /** 3188 */ 3189 public static final int CONTINGENT_BREAK = 7; 3190 /** 3191 */ 3192 public static final int CLOSE_PUNCTUATION = 8; 3193 /** 3194 */ 3195 public static final int COMBINING_MARK = 9; 3196 /** 3197 */ 3198 public static final int CARRIAGE_RETURN = 10; 3199 /** 3200 */ 3201 public static final int EXCLAMATION = 11; 3202 /** 3203 */ 3204 public static final int GLUE = 12; 3205 /** 3206 */ 3207 public static final int HYPHEN = 13; 3208 /** 3209 */ 3210 public static final int IDEOGRAPHIC = 14; 3211 /** 3212 * @see #INSEPARABLE 3213 */ 3214 public static final int INSEPERABLE = 15; 3215 /** 3216 * Renamed from the misspelled "inseperable" in Unicode 4.0.1. 3217 */ 3218 public static final int INSEPARABLE = 15; 3219 /** 3220 */ 3221 public static final int INFIX_NUMERIC = 16; 3222 /** 3223 */ 3224 public static final int LINE_FEED = 17; 3225 /** 3226 */ 3227 public static final int NONSTARTER = 18; 3228 /** 3229 */ 3230 public static final int NUMERIC = 19; 3231 /** 3232 */ 3233 public static final int OPEN_PUNCTUATION = 20; 3234 /** 3235 */ 3236 public static final int POSTFIX_NUMERIC = 21; 3237 /** 3238 */ 3239 public static final int PREFIX_NUMERIC = 22; 3240 /** 3241 */ 3242 public static final int QUOTATION = 23; 3243 /** 3244 */ 3245 public static final int COMPLEX_CONTEXT = 24; 3246 /** 3247 */ 3248 public static final int SURROGATE = 25; 3249 /** 3250 */ 3251 public static final int SPACE = 26; 3252 /** 3253 */ 3254 public static final int BREAK_SYMBOLS = 27; 3255 /** 3256 */ 3257 public static final int ZWSPACE = 28; 3258 /** 3259 */ 3260 public static final int NEXT_LINE = 29; /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ 3261 /** 3262 */ 3263 public static final int WORD_JOINER = 30; /*[WJ]*/ 3264 /** 3265 */ 3266 public static final int H2 = 31; /* from here on: new in Unicode 4.1/ICU 3.4 */ 3267 /** 3268 */ 3269 public static final int H3 = 32; 3270 /** 3271 */ 3272 public static final int JL = 33; 3273 /** 3274 */ 3275 public static final int JT = 34; 3276 /** 3277 */ 3278 public static final int JV = 35; 3279 /***/ 3280 public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ 3281 /***/ 3282 public static final int CONDITIONAL_JAPANESE_STARTER = 37; /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ 3283 /***/ 3284 public static final int HEBREW_LETTER = 38; /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ 3285 /***/ 3286 public static final int REGIONAL_INDICATOR = 39; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3287 /***/ 3288 public static final int E_BASE = 40; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3289 /***/ 3290 public static final int E_MODIFIER = 41; /*[EM]*/ 3291 /***/ 3292 public static final int ZWJ = 42; /*[ZWJ]*/ 3293 /***/ 3294 @android.annotation.FlaggedApi(com.android.icu.Flags.FLAG_ICU_V_API) 3295 public static final int AKSARA = 43; /*[AK]*/ /* from here on: new in Unicode 15.1/ICU 74 */ 3296 /***/ 3297 @android.annotation.FlaggedApi(com.android.icu.Flags.FLAG_ICU_V_API) 3298 public static final int AKSARA_PREBASE = 44; /*[AP]*/ 3299 /***/ 3300 @android.annotation.FlaggedApi(com.android.icu.Flags.FLAG_ICU_V_API) 3301 public static final int AKSARA_START = 45; /*[AS]*/ 3302 /***/ 3303 @android.annotation.FlaggedApi(com.android.icu.Flags.FLAG_ICU_V_API) 3304 public static final int VIRAMA_FINAL = 46; /*[VF]*/ 3305 /***/ 3306 @android.annotation.FlaggedApi(com.android.icu.Flags.FLAG_ICU_V_API) 3307 public static final int VIRAMA = 47; /*[VI]*/ 3308 /** 3309 * One more than the highest normal LineBreak value. 3310 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK). 3311 * 3312 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3313 * @hide unsupported on Android 3314 */ 3315 @Deprecated 3316 public static final int COUNT = 48; 3317 } 3318 3319 /** 3320 * Numeric Type constants. 3321 * @see UProperty#NUMERIC_TYPE 3322 */ 3323 public static interface NumericType 3324 { 3325 /** 3326 */ 3327 public static final int NONE = 0; 3328 /** 3329 */ 3330 public static final int DECIMAL = 1; 3331 /** 3332 */ 3333 public static final int DIGIT = 2; 3334 /** 3335 */ 3336 public static final int NUMERIC = 3; 3337 /** 3338 * One more than the highest normal NumericType value. 3339 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE). 3340 * 3341 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3342 * @hide unsupported on Android 3343 */ 3344 @Deprecated 3345 public static final int COUNT = 4; 3346 } 3347 3348 /** 3349 * Hangul Syllable Type constants. 3350 * 3351 * @see UProperty#HANGUL_SYLLABLE_TYPE 3352 */ 3353 public static interface HangulSyllableType 3354 { 3355 /** 3356 */ 3357 public static final int NOT_APPLICABLE = 0; /*[NA]*/ /*See note !!*/ 3358 /** 3359 */ 3360 public static final int LEADING_JAMO = 1; /*[L]*/ 3361 /** 3362 */ 3363 public static final int VOWEL_JAMO = 2; /*[V]*/ 3364 /** 3365 */ 3366 public static final int TRAILING_JAMO = 3; /*[T]*/ 3367 /** 3368 */ 3369 public static final int LV_SYLLABLE = 4; /*[LV]*/ 3370 /** 3371 */ 3372 public static final int LVT_SYLLABLE = 5; /*[LVT]*/ 3373 /** 3374 * One more than the highest normal HangulSyllableType value. 3375 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE). 3376 * 3377 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3378 * @hide unsupported on Android 3379 */ 3380 @Deprecated 3381 public static final int COUNT = 6; 3382 } 3383 3384 /** 3385 * Bidi Paired Bracket Type constants. 3386 * 3387 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 3388 */ 3389 public static interface BidiPairedBracketType { 3390 /** 3391 * Not a paired bracket. 3392 */ 3393 public static final int NONE = 0; 3394 /** 3395 * Open paired bracket. 3396 */ 3397 public static final int OPEN = 1; 3398 /** 3399 * Close paired bracket. 3400 */ 3401 public static final int CLOSE = 2; 3402 /** 3403 * One more than the highest normal BidiPairedBracketType value. 3404 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE). 3405 * 3406 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3407 * @hide unsupported on Android 3408 */ 3409 @Deprecated 3410 public static final int COUNT = 3; 3411 } 3412 3413 /** 3414 * Indic Positional Category constants. 3415 * 3416 * @see UProperty#INDIC_POSITIONAL_CATEGORY 3417 */ 3418 public static interface IndicPositionalCategory { 3419 /***/ 3420 public static final int NA = 0; 3421 /***/ 3422 public static final int BOTTOM = 1; 3423 /***/ 3424 public static final int BOTTOM_AND_LEFT = 2; 3425 /***/ 3426 public static final int BOTTOM_AND_RIGHT = 3; 3427 /***/ 3428 public static final int LEFT = 4; 3429 /***/ 3430 public static final int LEFT_AND_RIGHT = 5; 3431 /***/ 3432 public static final int OVERSTRUCK = 6; 3433 /***/ 3434 public static final int RIGHT = 7; 3435 /***/ 3436 public static final int TOP = 8; 3437 /***/ 3438 public static final int TOP_AND_BOTTOM = 9; 3439 /***/ 3440 public static final int TOP_AND_BOTTOM_AND_RIGHT = 10; 3441 /***/ 3442 public static final int TOP_AND_LEFT = 11; 3443 /***/ 3444 public static final int TOP_AND_LEFT_AND_RIGHT = 12; 3445 /***/ 3446 public static final int TOP_AND_RIGHT = 13; 3447 /***/ 3448 public static final int VISUAL_ORDER_LEFT = 14; 3449 /***/ 3450 public static final int TOP_AND_BOTTOM_AND_LEFT = 15; 3451 } 3452 3453 /** 3454 * Indic Syllabic Category constants. 3455 * 3456 * @see UProperty#INDIC_SYLLABIC_CATEGORY 3457 */ 3458 public static interface IndicSyllabicCategory { 3459 /***/ 3460 public static final int OTHER = 0; 3461 /***/ 3462 public static final int AVAGRAHA = 1; 3463 /***/ 3464 public static final int BINDU = 2; 3465 /***/ 3466 public static final int BRAHMI_JOINING_NUMBER = 3; 3467 /***/ 3468 public static final int CANTILLATION_MARK = 4; 3469 /***/ 3470 public static final int CONSONANT = 5; 3471 /***/ 3472 public static final int CONSONANT_DEAD = 6; 3473 /***/ 3474 public static final int CONSONANT_FINAL = 7; 3475 /***/ 3476 public static final int CONSONANT_HEAD_LETTER = 8; 3477 /***/ 3478 public static final int CONSONANT_INITIAL_POSTFIXED = 9; 3479 /***/ 3480 public static final int CONSONANT_KILLER = 10; 3481 /***/ 3482 public static final int CONSONANT_MEDIAL = 11; 3483 /***/ 3484 public static final int CONSONANT_PLACEHOLDER = 12; 3485 /***/ 3486 public static final int CONSONANT_PRECEDING_REPHA = 13; 3487 /***/ 3488 public static final int CONSONANT_PREFIXED = 14; 3489 /***/ 3490 public static final int CONSONANT_SUBJOINED = 15; 3491 /***/ 3492 public static final int CONSONANT_SUCCEEDING_REPHA = 16; 3493 /***/ 3494 public static final int CONSONANT_WITH_STACKER = 17; 3495 /***/ 3496 public static final int GEMINATION_MARK = 18; 3497 /***/ 3498 public static final int INVISIBLE_STACKER = 19; 3499 /***/ 3500 public static final int JOINER = 20; 3501 /***/ 3502 public static final int MODIFYING_LETTER = 21; 3503 /***/ 3504 public static final int NON_JOINER = 22; 3505 /***/ 3506 public static final int NUKTA = 23; 3507 /***/ 3508 public static final int NUMBER = 24; 3509 /***/ 3510 public static final int NUMBER_JOINER = 25; 3511 /***/ 3512 public static final int PURE_KILLER = 26; 3513 /***/ 3514 public static final int REGISTER_SHIFTER = 27; 3515 /***/ 3516 public static final int SYLLABLE_MODIFIER = 28; 3517 /***/ 3518 public static final int TONE_LETTER = 29; 3519 /***/ 3520 public static final int TONE_MARK = 30; 3521 /***/ 3522 public static final int VIRAMA = 31; 3523 /***/ 3524 public static final int VISARGA = 32; 3525 /***/ 3526 public static final int VOWEL = 33; 3527 /***/ 3528 public static final int VOWEL_DEPENDENT = 34; 3529 /***/ 3530 public static final int VOWEL_INDEPENDENT = 35; 3531 } 3532 3533 /** 3534 * Vertical Orientation constants. 3535 * 3536 * @see UProperty#VERTICAL_ORIENTATION 3537 */ 3538 public static interface VerticalOrientation { 3539 /***/ 3540 public static final int ROTATED = 0; 3541 /***/ 3542 public static final int TRANSFORMED_ROTATED = 1; 3543 /***/ 3544 public static final int TRANSFORMED_UPRIGHT = 2; 3545 /***/ 3546 public static final int UPRIGHT = 3; 3547 } 3548 3549 /** 3550 * Identifier Status constants. 3551 * See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type. 3552 * 3553 * @see UProperty#IDENTIFIER_STATUS 3554 * @hide Only a subset of ICU is exposed in Android 3555 * @hide draft / provisional / internal are hidden on Android 3556 */ 3557 public enum IdentifierStatus { 3558 /** @hide draft / provisional / internal are hidden on Android*/ 3559 RESTRICTED, 3560 /** @hide draft / provisional / internal are hidden on Android*/ 3561 ALLOWED, 3562 } 3563 3564 /** 3565 * Identifier Type constants. 3566 * See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type. 3567 * 3568 * @see UProperty#IDENTIFIER_TYPE 3569 * @hide Only a subset of ICU is exposed in Android 3570 * @hide draft / provisional / internal are hidden on Android 3571 */ 3572 public enum IdentifierType { 3573 /** @hide draft / provisional / internal are hidden on Android*/ 3574 NOT_CHARACTER, 3575 /** @hide draft / provisional / internal are hidden on Android*/ 3576 DEPRECATED, 3577 /** @hide draft / provisional / internal are hidden on Android*/ 3578 DEFAULT_IGNORABLE, 3579 /** @hide draft / provisional / internal are hidden on Android*/ 3580 NOT_NFKC, 3581 /** @hide draft / provisional / internal are hidden on Android*/ 3582 NOT_XID, 3583 /** @hide draft / provisional / internal are hidden on Android*/ 3584 EXCLUSION, 3585 /** @hide draft / provisional / internal are hidden on Android*/ 3586 OBSOLETE, 3587 /** @hide draft / provisional / internal are hidden on Android*/ 3588 TECHNICAL, 3589 /** @hide draft / provisional / internal are hidden on Android*/ 3590 UNCOMMON_USE, 3591 /** @hide draft / provisional / internal are hidden on Android*/ 3592 LIMITED_USE, 3593 /** @hide draft / provisional / internal are hidden on Android*/ 3594 INCLUSION, 3595 /** @hide draft / provisional / internal are hidden on Android*/ 3596 RECOMMENDED, 3597 } 3598 3599 // public data members ----------------------------------------------- 3600 3601 /** 3602 * The lowest Unicode code point value, constant 0. 3603 * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}. 3604 */ 3605 public static final int MIN_VALUE = Character.MIN_CODE_POINT; 3606 3607 /** 3608 * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits). 3609 * Same as {@link Character#MAX_CODE_POINT}. 3610 * 3611 * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE} 3612 * which is still a char with the value U+FFFF. 3613 */ 3614 public static final int MAX_VALUE = Character.MAX_CODE_POINT; 3615 3616 /** 3617 * The minimum value for Supplementary code points, constant U+10000. 3618 * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 3619 */ 3620 public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT; 3621 3622 /** 3623 * Unicode value used when translating into Unicode encoding form and there 3624 * is no existing character. 3625 */ 3626 public static final int REPLACEMENT_CHAR = '\uFFFD'; 3627 3628 /** 3629 * Special value that is returned by getUnicodeNumericValue(int) when no 3630 * numeric value is defined for a code point. 3631 * @see #getUnicodeNumericValue 3632 */ 3633 public static final double NO_NUMERIC_VALUE = -123456789; 3634 3635 /** 3636 * Compatibility constant for Java Character's MIN_RADIX. 3637 */ 3638 public static final int MIN_RADIX = java.lang.Character.MIN_RADIX; 3639 3640 /** 3641 * Compatibility constant for Java Character's MAX_RADIX. 3642 */ 3643 public static final int MAX_RADIX = java.lang.Character.MAX_RADIX; 3644 3645 /** 3646 * Do not lowercase non-initial parts of words when titlecasing. 3647 * Option bit for titlecasing APIs that take an options bit set. 3648 * 3649 * By default, titlecasing will titlecase the first cased character 3650 * of a word and lowercase all other characters. 3651 * With this option, the other characters will not be modified. 3652 * 3653 * @see #toTitleCase 3654 */ 3655 public static final int TITLECASE_NO_LOWERCASE = 0x100; 3656 3657 /** 3658 * Do not adjust the titlecasing indexes from BreakIterator::next() indexes; 3659 * titlecase exactly the characters at breaks from the iterator. 3660 * Option bit for titlecasing APIs that take an options bit set. 3661 * 3662 * By default, titlecasing will take each break iterator index, 3663 * adjust it by looking for the next cased character, and titlecase that one. 3664 * Other characters are lowercased. 3665 * 3666 * This follows Unicode 4 & 5 section 3.13 Default Case Operations: 3667 * 3668 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 3669 * #29, "Text Boundaries." Between each pair of word boundaries, find the first 3670 * cased character F. If F exists, map F to default_title(F); then map each 3671 * subsequent character C to default_lower(C). 3672 * 3673 * @see #toTitleCase 3674 * @see #TITLECASE_NO_LOWERCASE 3675 */ 3676 public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200; 3677 3678 // public methods ---------------------------------------------------- 3679 3680 /** 3681 * Returnss the numeric value of a decimal digit code point. 3682 * <br>This method observes the semantics of 3683 * <code>java.lang.Character.digit()</code>. Note that this 3684 * will return positive values for code points for which isDigit 3685 * returns false, just like java.lang.Character. 3686 * <br><em>Semantic Change:</em> In release 1.3.1 and 3687 * prior, this did not treat the European letters as having a 3688 * digit value, and also treated numeric letters and other numbers as 3689 * digits. 3690 * This has been changed to conform to the java semantics. 3691 * <br>A code point is a valid digit if and only if: 3692 * <ul> 3693 * <li>ch is a decimal digit or one of the european letters, and 3694 * <li>the value of ch is less than the specified radix. 3695 * </ul> 3696 * @param ch the code point to query 3697 * @param radix the radix 3698 * @return the numeric value represented by the code point in the 3699 * specified radix, or -1 if the code point is not a decimal digit 3700 * or if its value is too large for the radix 3701 */ digit(int ch, int radix)3702 public static int digit(int ch, int radix) 3703 { 3704 if (2 <= radix && radix <= 36) { 3705 int value = digit(ch); 3706 if (value < 0) { 3707 // ch is not a decimal digit, try latin letters 3708 value = UCharacterProperty.getEuropeanDigit(ch); 3709 } 3710 return (value < radix) ? value : -1; 3711 } else { 3712 return -1; // invalid radix 3713 } 3714 } 3715 3716 /** 3717 * Returnss the numeric value of a decimal digit code point. 3718 * <br>This is a convenience overload of <code>digit(int, int)</code> 3719 * that provides a decimal radix. 3720 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this 3721 * treated numeric letters and other numbers as digits. This has 3722 * been changed to conform to the java semantics. 3723 * @param ch the code point to query 3724 * @return the numeric value represented by the code point, 3725 * or -1 if the code point is not a decimal digit or if its 3726 * value is too large for a decimal radix 3727 */ digit(int ch)3728 public static int digit(int ch) 3729 { 3730 return UCharacterProperty.INSTANCE.digit(ch); 3731 } 3732 3733 /** 3734 * Returns the numeric value of the code point as a nonnegative 3735 * integer. 3736 * <br>If the code point does not have a numeric value, then -1 is returned. 3737 * <br> 3738 * If the code point has a numeric value that cannot be represented as a 3739 * nonnegative integer (for example, a fractional value), then -2 is 3740 * returned. 3741 * @param ch the code point to query 3742 * @return the numeric value of the code point, or -1 if it has no numeric 3743 * value, or -2 if it has a numeric value that cannot be represented as a 3744 * nonnegative integer 3745 */ getNumericValue(int ch)3746 public static int getNumericValue(int ch) 3747 { 3748 return UCharacterProperty.INSTANCE.getNumericValue(ch); 3749 } 3750 3751 /** 3752 * <strong>[icu]</strong> Returns the numeric value for a Unicode code point as defined in the 3753 * Unicode Character Database. 3754 * <p>A "double" return type is necessary because some numeric values are 3755 * fractions, negative, or too large for int. 3756 * <p>For characters without any numeric values in the Unicode Character 3757 * Database, this function will return NO_NUMERIC_VALUE. 3758 * Note: This is different from the Unicode Standard which specifies NaN as the default value. 3759 * <p><em>API Change:</em> In release 2.2 and prior, this API has a 3760 * return type int and returns -1 when the argument ch does not have a 3761 * corresponding numeric value. This has been changed to synch with ICU4C 3762 * 3763 * This corresponds to the ICU4C function u_getNumericValue. 3764 * @param ch Code point to get the numeric value for. 3765 * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined. 3766 */ getUnicodeNumericValue(int ch)3767 public static double getUnicodeNumericValue(int ch) 3768 { 3769 return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch); 3770 } 3771 3772 /** 3773 * Compatibility override of Java deprecated method. This 3774 * method will always remain deprecated. 3775 * Same as java.lang.Character.isSpace(). 3776 * @param ch the code point 3777 * @return true if the code point is a space character as 3778 * defined by java.lang.Character.isSpace. 3779 * @deprecated ICU 3.4 (Java) 3780 * @hide original deprecated declaration 3781 */ 3782 @Deprecated isSpace(int ch)3783 public static boolean isSpace(int ch) { 3784 return ch <= 0x20 && 3785 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d); 3786 } 3787 3788 /** 3789 * Returns a value indicating a code point's Unicode category. 3790 * Up-to-date Unicode implementation of java.lang.Character.getType() 3791 * except for the above mentioned code points that had their category 3792 * changed.<br> 3793 * Return results are constants from the interface 3794 * <a href=UCharacterCategory.html>UCharacterCategory</a><br> 3795 * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with 3796 * those returned by java.lang.Character.getType. UCharacterCategory values 3797 * match the ones used in ICU4C, while java.lang.Character type 3798 * values, though similar, skip the value 17. 3799 * @param ch code point whose type is to be determined 3800 * @return category which is a value of UCharacterCategory 3801 */ getType(int ch)3802 public static int getType(int ch) 3803 { 3804 return UCharacterProperty.INSTANCE.getType(ch); 3805 } 3806 3807 /** 3808 * Determines if a code point has a defined meaning in the up-to-date 3809 * Unicode standard. 3810 * E.g. supplementary code points though allocated space are not defined in 3811 * Unicode yet.<br> 3812 * Up-to-date Unicode implementation of java.lang.Character.isDefined() 3813 * @param ch code point to be determined if it is defined in the most 3814 * current version of Unicode 3815 * @return true if this code point is defined in unicode 3816 */ isDefined(int ch)3817 public static boolean isDefined(int ch) 3818 { 3819 return getType(ch) != 0; 3820 } 3821 3822 /** 3823 * Determines if a code point is a Java digit. 3824 * <br>This method observes the semantics of 3825 * <code>java.lang.Character.isDigit()</code>. It returns true for decimal 3826 * digits only. 3827 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated 3828 * numeric letters and other numbers as digits. 3829 * This has been changed to conform to the java semantics. 3830 * @param ch code point to query 3831 * @return true if this code point is a digit 3832 */ isDigit(int ch)3833 public static boolean isDigit(int ch) 3834 { 3835 return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER; 3836 } 3837 3838 /** 3839 * Determines if the specified code point is an ISO control character. 3840 * A code point is considered to be an ISO control character if it is in 3841 * the range \u0000 through \u001F or in the range \u007F through 3842 * \u009F.<br> 3843 * Up-to-date Unicode implementation of java.lang.Character.isISOControl() 3844 * @param ch code point to determine if it is an ISO control character 3845 * @return true if code point is a ISO control character 3846 */ isISOControl(int ch)3847 public static boolean isISOControl(int ch) 3848 { 3849 return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ && 3850 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_)); 3851 } 3852 3853 /** 3854 * Determines if the specified code point is a letter. 3855 * Up-to-date Unicode implementation of java.lang.Character.isLetter() 3856 * @param ch code point to determine if it is a letter 3857 * @return true if code point is a letter 3858 */ isLetter(int ch)3859 public static boolean isLetter(int ch) 3860 { 3861 // if props == 0, it will just fall through and return false 3862 return ((1 << getType(ch)) 3863 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3864 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3865 | (1 << UCharacterCategory.TITLECASE_LETTER) 3866 | (1 << UCharacterCategory.MODIFIER_LETTER) 3867 | (1 << UCharacterCategory.OTHER_LETTER))) != 0; 3868 } 3869 3870 /** 3871 * Determines if the specified code point is a letter or digit. 3872 * <strong>[icu] Note:</strong> This method, unlike java.lang.Character does not regard the ascii 3873 * characters 'A' - 'Z' and 'a' - 'z' as digits. 3874 * @param ch code point to determine if it is a letter or a digit 3875 * @return true if code point is a letter or a digit 3876 */ isLetterOrDigit(int ch)3877 public static boolean isLetterOrDigit(int ch) 3878 { 3879 return ((1 << getType(ch)) 3880 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3881 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3882 | (1 << UCharacterCategory.TITLECASE_LETTER) 3883 | (1 << UCharacterCategory.MODIFIER_LETTER) 3884 | (1 << UCharacterCategory.OTHER_LETTER) 3885 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0; 3886 } 3887 3888 /** 3889 * Compatibility override of Java deprecated method. This 3890 * method will always remain deprecated. Delegates to 3891 * java.lang.Character.isJavaIdentifierStart. 3892 * @param cp the code point 3893 * @return true if the code point can start a java identifier. 3894 * @deprecated ICU 3.4 (Java) 3895 * @hide original deprecated declaration 3896 */ 3897 @Deprecated isJavaLetter(int cp)3898 public static boolean isJavaLetter(int cp) { 3899 return isJavaIdentifierStart(cp); 3900 } 3901 3902 /** 3903 * Compatibility override of Java deprecated method. This 3904 * method will always remain deprecated. Delegates to 3905 * java.lang.Character.isJavaIdentifierPart. 3906 * @param cp the code point 3907 * @return true if the code point can continue a java identifier. 3908 * @deprecated ICU 3.4 (Java) 3909 * @hide original deprecated declaration 3910 */ 3911 @Deprecated isJavaLetterOrDigit(int cp)3912 public static boolean isJavaLetterOrDigit(int cp) { 3913 return isJavaIdentifierPart(cp); 3914 } 3915 3916 /** 3917 * Compatibility override of Java method, delegates to 3918 * java.lang.Character.isJavaIdentifierStart. 3919 * @param cp the code point 3920 * @return true if the code point can start a java identifier. 3921 */ isJavaIdentifierStart(int cp)3922 public static boolean isJavaIdentifierStart(int cp) { 3923 // note, downcast to char for jdk 1.4 compatibility 3924 return java.lang.Character.isJavaIdentifierStart((char)cp); 3925 } 3926 3927 /** 3928 * Compatibility override of Java method, delegates to 3929 * java.lang.Character.isJavaIdentifierPart. 3930 * @param cp the code point 3931 * @return true if the code point can continue a java identifier. 3932 */ isJavaIdentifierPart(int cp)3933 public static boolean isJavaIdentifierPart(int cp) { 3934 // note, downcast to char for jdk 1.4 compatibility 3935 return java.lang.Character.isJavaIdentifierPart((char)cp); 3936 } 3937 3938 /** 3939 * Determines if the specified code point is a lowercase character. 3940 * UnicodeData only contains case mappings for code points where they are 3941 * one-to-one mappings; it also omits information about context-sensitive 3942 * case mappings.<br> For more information about Unicode case mapping 3943 * please refer to the 3944 * <a href=https://www.unicode.org/reports/tr21/>Technical report 3945 * #21</a>.<br> 3946 * Up-to-date Unicode implementation of java.lang.Character.isLowerCase() 3947 * @param ch code point to determine if it is in lowercase 3948 * @return true if code point is a lowercase character 3949 */ isLowerCase(int ch)3950 public static boolean isLowerCase(int ch) 3951 { 3952 // if props == 0, it will just fall through and return false 3953 return getType(ch) == UCharacterCategory.LOWERCASE_LETTER; 3954 } 3955 3956 /** 3957 * Determines if the specified code point is a white space character. 3958 * A code point is considered to be an whitespace character if and only 3959 * if it satisfies one of the following criteria: 3960 * <ul> 3961 * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not 3962 * also a non-breaking space (\u00A0 or \u2007 or \u202F). 3963 * <li> It is \u0009, HORIZONTAL TABULATION. 3964 * <li> It is \u000A, LINE FEED. 3965 * <li> It is \u000B, VERTICAL TABULATION. 3966 * <li> It is \u000C, FORM FEED. 3967 * <li> It is \u000D, CARRIAGE RETURN. 3968 * <li> It is \u001C, FILE SEPARATOR. 3969 * <li> It is \u001D, GROUP SEPARATOR. 3970 * <li> It is \u001E, RECORD SEPARATOR. 3971 * <li> It is \u001F, UNIT SEPARATOR. 3972 * </ul> 3973 * 3974 * This API tries to sync with the semantics of Java's 3975 * java.lang.Character.isWhitespace(), but it may not return 3976 * the exact same results because of the Unicode version 3977 * difference. 3978 * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) 3979 * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. 3980 * See http://www.unicode.org/versions/Unicode4.0.1/ 3981 * @param ch code point to determine if it is a white space 3982 * @return true if the specified code point is a white space character 3983 */ isWhitespace(int ch)3984 public static boolean isWhitespace(int ch) 3985 { 3986 // exclude no-break spaces 3987 // if props == 0, it will just fall through and return false 3988 return ((1 << getType(ch)) & 3989 ((1 << UCharacterCategory.SPACE_SEPARATOR) 3990 | (1 << UCharacterCategory.LINE_SEPARATOR) 3991 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0 3992 && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_) 3993 // TAB VT LF FF CR FS GS RS US NL are all control characters 3994 // that are white spaces. 3995 || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f); 3996 } 3997 3998 /** 3999 * Determines if the specified code point is a Unicode specified space 4000 * character, i.e. if code point is in the category Zs, Zl and Zp. 4001 * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar(). 4002 * @param ch code point to determine if it is a space 4003 * @return true if the specified code point is a space character 4004 */ isSpaceChar(int ch)4005 public static boolean isSpaceChar(int ch) 4006 { 4007 // if props == 0, it will just fall through and return false 4008 return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR) 4009 | (1 << UCharacterCategory.LINE_SEPARATOR) 4010 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) 4011 != 0; 4012 } 4013 4014 /** 4015 * Determines if the specified code point is a titlecase character. 4016 * UnicodeData only contains case mappings for code points where they are 4017 * one-to-one mappings; it also omits information about context-sensitive 4018 * case mappings.<br> 4019 * For more information about Unicode case mapping please refer to the 4020 * <a href=https://www.unicode.org/reports/tr21/> 4021 * Technical report #21</a>.<br> 4022 * Up-to-date Unicode implementation of java.lang.Character.isTitleCase(). 4023 * @param ch code point to determine if it is in title case 4024 * @return true if the specified code point is a titlecase character 4025 */ isTitleCase(int ch)4026 public static boolean isTitleCase(int ch) 4027 { 4028 // if props == 0, it will just fall through and return false 4029 return getType(ch) == UCharacterCategory.TITLECASE_LETTER; 4030 } 4031 4032 /** 4033 * Determines if the specified character is permissible as a 4034 * non-initial character of an identifier 4035 * according to UAX #31 Unicode Identifier and Pattern Syntax. 4036 * 4037 * <p>Same as Unicode ID_Continue ({@link UProperty#ID_CONTINUE}). 4038 * 4039 * <p>Note that this differs from {@link java.lang.Character#isUnicodeIdentifierPart(char)} 4040 * which implements a different identifier profile. 4041 * 4042 * @param ch the code point to be tested 4043 * @return true if the code point may occur as a non-initial character of an identifier 4044 */ isUnicodeIdentifierPart(int ch)4045 public static boolean isUnicodeIdentifierPart(int ch) 4046 { 4047 return hasBinaryProperty(ch, UProperty.ID_CONTINUE); // single code point 4048 } 4049 4050 /** 4051 * Determines if the specified character is permissible as the first character in an identifier 4052 * according to UAX #31 Unicode Identifier and Pattern Syntax. 4053 * 4054 * <p>Same as Unicode ID_Start ({@link UProperty#ID_START}). 4055 * 4056 * <p>Note that this differs from {@link java.lang.Character#isUnicodeIdentifierStart(char)} 4057 * which implements a different identifier profile. 4058 * 4059 * @param ch the code point to be tested 4060 * @return true if the code point may start an identifier 4061 */ isUnicodeIdentifierStart(int ch)4062 public static boolean isUnicodeIdentifierStart(int ch) 4063 { 4064 return hasBinaryProperty(ch, UProperty.ID_START); // single code point 4065 } 4066 4067 /** 4068 * Does the set of Identifier_Type values code point c contain the given type? 4069 * 4070 * <p>Used for UTS #39 General Security Profile for Identifiers 4071 * (https://www.unicode.org/reports/tr39/#General_Security_Profile). 4072 * 4073 * <p>Each code point maps to a <i>set</i> of UIdentifierType values. 4074 * 4075 * @param c code point 4076 * @param type Identifier_Type to check 4077 * @return true if type is in Identifier_Type(c) 4078 * @hide draft / provisional / internal are hidden on Android 4079 */ hasIdentifierType(int c, IdentifierType type)4080 public static final boolean hasIdentifierType(int c, IdentifierType type) { 4081 return UCharacterProperty.INSTANCE.hasIDType(c, type); 4082 } 4083 4084 /** 4085 * Writes code point c's Identifier_Type as a set of IdentifierType values and 4086 * returns the number of types. 4087 * The set is cleared before c's types are added. 4088 * 4089 * <p>Used for UTS #39 General Security Profile for Identifiers 4090 * (https://www.unicode.org/reports/tr39/#General_Security_Profile). 4091 * 4092 * <p>Each code point maps to a <i>set</i> of IdentifierType values. 4093 * There is always at least one type. 4094 * Only some of the types can be combined with others, 4095 * and usually only a small number of types occur together. 4096 * Future versions might add additional types. 4097 * See UTS #39 and its data files for details. 4098 * 4099 * @param c code point 4100 * @param types output set 4101 * @return number of values in c's Identifier_Type 4102 * @hide draft / provisional / internal are hidden on Android 4103 */ getIdentifierTypes(int c, EnumSet<IdentifierType> types)4104 public static final int getIdentifierTypes(int c, EnumSet<IdentifierType> types) { 4105 return UCharacterProperty.INSTANCE.getIDTypes(c, types); 4106 } 4107 4108 /** 4109 * Determines if the specified code point should be regarded as an 4110 * ignorable character in a Java identifier. 4111 * A character is Java-identifier-ignorable if it has the general category 4112 * Cf Formatting Control, or it is a non-Java-whitespace ISO control: 4113 * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br> 4114 * Up-to-date Unicode implementation of 4115 * java.lang.Character.isIdentifierIgnorable().<br> 4116 * See <a href=https://www.unicode.org/reports/tr8/>UTR #8</a>. 4117 * <p>Note that Unicode just recommends to ignore Cf (format controls). 4118 * @param ch code point to be determined if it can be ignored in a Unicode 4119 * identifier. 4120 * @return true if the code point is ignorable 4121 */ isIdentifierIgnorable(int ch)4122 public static boolean isIdentifierIgnorable(int ch) 4123 { 4124 // see java.lang.Character.isIdentifierIgnorable() on range of 4125 // ignorable characters. 4126 if (ch <= 0x9f) { 4127 return isISOControl(ch) 4128 && !((ch >= 0x9 && ch <= 0xd) 4129 || (ch >= 0x1c && ch <= 0x1f)); 4130 } 4131 return getType(ch) == UCharacterCategory.FORMAT; 4132 } 4133 4134 /** 4135 * Determines if the specified code point is an uppercase character. 4136 * UnicodeData only contains case mappings for code point where they are 4137 * one-to-one mappings; it also omits information about context-sensitive 4138 * case mappings.<br> 4139 * For language specific case conversion behavior, use 4140 * toUpperCase(locale, str). <br> 4141 * For example, the case conversion for dot-less i and dotted I in Turkish, 4142 * or for final sigma in Greek. 4143 * For more information about Unicode case mapping please refer to the 4144 * <a href=https://www.unicode.org/reports/tr21/> 4145 * Technical report #21</a>.<br> 4146 * Up-to-date Unicode implementation of java.lang.Character.isUpperCase(). 4147 * @param ch code point to determine if it is in uppercase 4148 * @return true if the code point is an uppercase character 4149 */ isUpperCase(int ch)4150 public static boolean isUpperCase(int ch) 4151 { 4152 // if props == 0, it will just fall through and return false 4153 return getType(ch) == UCharacterCategory.UPPERCASE_LETTER; 4154 } 4155 4156 /** 4157 * The given code point is mapped to its lowercase equivalent; if the code 4158 * point has no lowercase equivalent, the code point itself is returned. 4159 * Up-to-date Unicode implementation of java.lang.Character.toLowerCase() 4160 * 4161 * <p>This function only returns the simple, single-code point case mapping. 4162 * Full case mappings should be used whenever possible because they produce 4163 * better results by working on whole strings. 4164 * They take into account the string context and the language and can map 4165 * to a result string with a different length as appropriate. 4166 * Full case mappings are applied by the case mapping functions 4167 * that take String parameters rather than code points (int). 4168 * See also the User Guide chapter on C/POSIX migration: 4169 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 4170 * 4171 * @param ch code point whose lowercase equivalent is to be retrieved 4172 * @return the lowercase equivalent code point 4173 */ toLowerCase(int ch)4174 public static int toLowerCase(int ch) { 4175 return UCaseProps.INSTANCE.tolower(ch); 4176 } 4177 4178 /** 4179 * Converts argument code point and returns a String object representing 4180 * the code point's value in UTF-16 format. 4181 * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones. 4182 * 4183 * <p>Up-to-date Unicode implementation of java.lang.Character.toString(). 4184 * 4185 * @param ch code point 4186 * @return string representation of the code point, null if code point is not 4187 * defined in unicode 4188 */ toString(int ch)4189 public static String toString(int ch) 4190 { 4191 if (ch < MIN_VALUE || ch > MAX_VALUE) { 4192 return null; 4193 } 4194 4195 if (ch < SUPPLEMENTARY_MIN_VALUE) { 4196 return String.valueOf((char)ch); 4197 } 4198 4199 return new String(Character.toChars(ch)); 4200 } 4201 4202 /** 4203 * Converts the code point argument to titlecase. 4204 * If no titlecase is available, the uppercase is returned. If no uppercase 4205 * is available, the code point itself is returned. 4206 * Up-to-date Unicode implementation of java.lang.Character.toTitleCase() 4207 * 4208 * <p>This function only returns the simple, single-code point case mapping. 4209 * Full case mappings should be used whenever possible because they produce 4210 * better results by working on whole strings. 4211 * They take into account the string context and the language and can map 4212 * to a result string with a different length as appropriate. 4213 * Full case mappings are applied by the case mapping functions 4214 * that take String parameters rather than code points (int). 4215 * See also the User Guide chapter on C/POSIX migration: 4216 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 4217 * 4218 * @param ch code point whose title case is to be retrieved 4219 * @return titlecase code point 4220 */ toTitleCase(int ch)4221 public static int toTitleCase(int ch) { 4222 return UCaseProps.INSTANCE.totitle(ch); 4223 } 4224 4225 /** 4226 * Converts the character argument to uppercase. 4227 * If no uppercase is available, the character itself is returned. 4228 * Up-to-date Unicode implementation of java.lang.Character.toUpperCase() 4229 * 4230 * <p>This function only returns the simple, single-code point case mapping. 4231 * Full case mappings should be used whenever possible because they produce 4232 * better results by working on whole strings. 4233 * They take into account the string context and the language and can map 4234 * to a result string with a different length as appropriate. 4235 * Full case mappings are applied by the case mapping functions 4236 * that take String parameters rather than code points (int). 4237 * See also the User Guide chapter on C/POSIX migration: 4238 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 4239 * 4240 * @param ch code point whose uppercase is to be retrieved 4241 * @return uppercase code point 4242 */ toUpperCase(int ch)4243 public static int toUpperCase(int ch) { 4244 return UCaseProps.INSTANCE.toupper(ch); 4245 } 4246 4247 // extra methods not in java.lang.Character -------------------------- 4248 4249 /** 4250 * <strong>[icu]</strong> Determines if the code point is a supplementary character. 4251 * A code point is a supplementary character if and only if it is greater 4252 * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a> 4253 * @param ch code point to be determined if it is in the supplementary 4254 * plane 4255 * @return true if code point is a supplementary character 4256 */ isSupplementary(int ch)4257 public static boolean isSupplementary(int ch) 4258 { 4259 return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE && 4260 ch <= UCharacter.MAX_VALUE; 4261 } 4262 4263 /** 4264 * <strong>[icu]</strong> Determines if the code point is in the BMP plane. 4265 * @param ch code point to be determined if it is not a supplementary 4266 * character 4267 * @return true if code point is not a supplementary character 4268 */ isBMP(int ch)4269 public static boolean isBMP(int ch) 4270 { 4271 return (ch >= 0 && ch <= LAST_CHAR_MASK_); 4272 } 4273 4274 /** 4275 * <strong>[icu]</strong> Determines whether the specified code point is a printable character 4276 * according to the Unicode standard. 4277 * @param ch code point to be determined if it is printable 4278 * @return true if the code point is a printable character 4279 */ isPrintable(int ch)4280 public static boolean isPrintable(int ch) 4281 { 4282 int cat = getType(ch); 4283 // if props == 0, it will just fall through and return false 4284 return (cat != UCharacterCategory.UNASSIGNED && 4285 cat != UCharacterCategory.CONTROL && 4286 cat != UCharacterCategory.FORMAT && 4287 cat != UCharacterCategory.PRIVATE_USE && 4288 cat != UCharacterCategory.SURROGATE && 4289 cat != UCharacterCategory.GENERAL_OTHER_TYPES); 4290 } 4291 4292 /** 4293 * <strong>[icu]</strong> Determines whether the specified code point is of base form. 4294 * A code point of base form does not graphically combine with preceding 4295 * characters, and is neither a control nor a format character. 4296 * @param ch code point to be determined if it is of base form 4297 * @return true if the code point is of base form 4298 */ isBaseForm(int ch)4299 public static boolean isBaseForm(int ch) 4300 { 4301 int cat = getType(ch); 4302 // if props == 0, it will just fall through and return false 4303 return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER || 4304 cat == UCharacterCategory.OTHER_NUMBER || 4305 cat == UCharacterCategory.LETTER_NUMBER || 4306 cat == UCharacterCategory.UPPERCASE_LETTER || 4307 cat == UCharacterCategory.LOWERCASE_LETTER || 4308 cat == UCharacterCategory.TITLECASE_LETTER || 4309 cat == UCharacterCategory.MODIFIER_LETTER || 4310 cat == UCharacterCategory.OTHER_LETTER || 4311 cat == UCharacterCategory.NON_SPACING_MARK || 4312 cat == UCharacterCategory.ENCLOSING_MARK || 4313 cat == UCharacterCategory.COMBINING_SPACING_MARK; 4314 } 4315 4316 /** 4317 * <strong>[icu]</strong> Returns the Bidirection property of a code point. 4318 * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional 4319 * property.<br> 4320 * Result returned belongs to the interface 4321 * <a href=UCharacterDirection.html>UCharacterDirection</a> 4322 * @param ch the code point to be determined its direction 4323 * @return direction constant from UCharacterDirection. 4324 */ getDirection(int ch)4325 public static int getDirection(int ch) 4326 { 4327 return UBiDiProps.INSTANCE.getClass(ch); 4328 } 4329 4330 /** 4331 * Determines whether the code point has the "mirrored" property. 4332 * This property is set for characters that are commonly used in 4333 * Right-To-Left contexts and need to be displayed with a "mirrored" 4334 * glyph. 4335 * @param ch code point whose mirror is to be determined 4336 * @return true if the code point has the "mirrored" property 4337 */ isMirrored(int ch)4338 public static boolean isMirrored(int ch) 4339 { 4340 return UBiDiProps.INSTANCE.isMirrored(ch); 4341 } 4342 4343 /** 4344 * <strong>[icu]</strong> Maps the specified code point to a "mirror-image" code point. 4345 * For code points with the "mirrored" property, implementations sometimes 4346 * need a "poor man's" mapping to another code point such that the default 4347 * glyph may serve as the mirror-image of the default glyph of the 4348 * specified code point.<br> 4349 * This is useful for text conversion to and from codepages with visual 4350 * order, and for displays without glyph selection capabilities. 4351 * @param ch code point whose mirror is to be retrieved 4352 * @return another code point that may serve as a mirror-image substitute, 4353 * or ch itself if there is no such mapping or ch does not have the 4354 * "mirrored" property 4355 */ getMirror(int ch)4356 public static int getMirror(int ch) 4357 { 4358 return UBiDiProps.INSTANCE.getMirror(ch); 4359 } 4360 4361 /** 4362 * <strong>[icu]</strong> Maps the specified character to its paired bracket character. 4363 * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int). 4364 * Otherwise c itself is returned. 4365 * See http://www.unicode.org/reports/tr9/ 4366 * 4367 * @param c the code point to be mapped 4368 * @return the paired bracket code point, 4369 * or c itself if there is no such mapping 4370 * (Bidi_Paired_Bracket_Type=None) 4371 * 4372 * @see UProperty#BIDI_PAIRED_BRACKET 4373 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 4374 * @see #getMirror(int) 4375 */ getBidiPairedBracket(int c)4376 public static int getBidiPairedBracket(int c) { 4377 return UBiDiProps.INSTANCE.getPairedBracket(c); 4378 } 4379 4380 /** 4381 * <strong>[icu]</strong> Returns the combining class of the argument codepoint 4382 * @param ch code point whose combining is to be retrieved 4383 * @return the combining class of the codepoint 4384 */ getCombiningClass(int ch)4385 public static int getCombiningClass(int ch) 4386 { 4387 return Normalizer2.getNFDInstance().getCombiningClass(ch); 4388 } 4389 4390 /** 4391 * <strong>[icu]</strong> A code point is illegal if and only if 4392 * <ul> 4393 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 4394 * <li> A surrogate value, 0xD800 to 0xDFFF 4395 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 4396 * </ul> 4397 * Note: legal does not mean that it is assigned in this version of Unicode. 4398 * @param ch code point to determine if it is a legal code point by itself 4399 * @return true if and only if legal. 4400 */ isLegal(int ch)4401 public static boolean isLegal(int ch) 4402 { 4403 if (ch < MIN_VALUE) { 4404 return false; 4405 } 4406 if (ch < Character.MIN_SURROGATE) { 4407 return true; 4408 } 4409 if (ch <= Character.MAX_SURROGATE) { 4410 return false; 4411 } 4412 if (UCharacterUtility.isNonCharacter(ch)) { 4413 return false; 4414 } 4415 return (ch <= MAX_VALUE); 4416 } 4417 4418 /** 4419 * <strong>[icu]</strong> A string is legal iff all its code points are legal. 4420 * A code point is illegal if and only if 4421 * <ul> 4422 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 4423 * <li> A surrogate value, 0xD800 to 0xDFFF 4424 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 4425 * </ul> 4426 * Note: legal does not mean that it is assigned in this version of Unicode. 4427 * @param str containing code points to examin 4428 * @return true if and only if legal. 4429 */ isLegal(String str)4430 public static boolean isLegal(String str) 4431 { 4432 int size = str.length(); 4433 int codepoint; 4434 for (int i = 0; i < size; i += Character.charCount(codepoint)) 4435 { 4436 codepoint = str.codePointAt(i); 4437 if (!isLegal(codepoint)) { 4438 return false; 4439 } 4440 } 4441 return true; 4442 } 4443 4444 /** 4445 * <strong>[icu]</strong> Returns the version of Unicode data used. 4446 * @return the unicode version number used 4447 */ getUnicodeVersion()4448 public static VersionInfo getUnicodeVersion() 4449 { 4450 return UCharacterProperty.INSTANCE.m_unicodeVersion_; 4451 } 4452 4453 /** 4454 * <strong>[icu]</strong> Returns the most current Unicode name of the argument code point, or 4455 * null if the character is unassigned or outside the range 4456 * {@code UCharacter.MIN_VALUE} and {@code UCharacter.MAX_VALUE} or does not 4457 * have a name. 4458 * <br> 4459 * Note calling any methods related to code point names, e.g. {@code getName()} 4460 * incurs a one-time initialization cost to construct the name tables. 4461 * @param ch the code point for which to get the name 4462 * @return most current Unicode name 4463 */ getName(int ch)4464 public static String getName(int ch) 4465 { 4466 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); 4467 } 4468 4469 /** 4470 * <strong>[icu]</strong> Returns the names for each of the characters in a string 4471 * @param s string to format 4472 * @param separator string to go between names 4473 * @return string of names 4474 */ getName(String s, String separator)4475 public static String getName(String s, String separator) { 4476 if (s.length() == 1) { // handle common case 4477 return getName(s.charAt(0)); 4478 } 4479 int cp; 4480 StringBuilder sb = new StringBuilder(); 4481 for (int i = 0; i < s.length(); i += Character.charCount(cp)) { 4482 cp = s.codePointAt(i); 4483 if (i != 0) sb.append(separator); 4484 sb.append(UCharacter.getName(cp)); 4485 } 4486 return sb.toString(); 4487 } 4488 4489 /** 4490 * <strong>[icu]</strong> Returns null. 4491 * Used to return the Unicode_1_Name property value which was of little practical value. 4492 * @param ch the code point for which to get the name 4493 * @return null 4494 * @deprecated ICU 49 4495 * @hide original deprecated declaration 4496 */ 4497 @Deprecated getName1_0(int ch)4498 public static String getName1_0(int ch) 4499 { 4500 return null; 4501 } 4502 4503 /** 4504 * <strong>[icu]</strong> Returns a name for a valid codepoint. Unlike, getName(int) and 4505 * getName1_0(int), this method will return a name even for codepoints that 4506 * are not assigned a name in UnicodeData.txt. 4507 * 4508 * <p>The names are returned in the following order. 4509 * <ul> 4510 * <li> Most current Unicode name if there is any 4511 * <li> Unicode 1.0 name if there is any 4512 * <li> Extended name in the form of 4513 * "<codepoint_type-codepoint_hex_digits>". E.g., <noncharacter-fffe> 4514 * </ul> 4515 * Note calling any methods related to code point names, e.g. {@code getName()} 4516 * incurs a one-time initialization cost to construct the name tables. 4517 * @param ch the code point for which to get the name 4518 * @return a name for the argument codepoint 4519 */ getExtendedName(int ch)4520 public static String getExtendedName(int ch) { 4521 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME); 4522 } 4523 4524 /** 4525 * <strong>[icu]</strong> Returns the corrected name from NameAliases.txt if there is one. 4526 * Returns null if the character is unassigned or outside the range 4527 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 4528 * <br> 4529 * Note calling any methods related to code point names, e.g. {@code getName()} 4530 * incurs a one-time initialization cost to construct the name tables. 4531 * @param ch the code point for which to get the name alias 4532 * @return Unicode name alias, or null 4533 */ getNameAlias(int ch)4534 public static String getNameAlias(int ch) 4535 { 4536 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS); 4537 } 4538 4539 /** 4540 * <strong>[icu]</strong> Returns null. 4541 * Used to return the ISO 10646 comment for a character. 4542 * The Unicode ISO_Comment property is deprecated and has no values. 4543 * 4544 * @param ch The code point for which to get the ISO comment. 4545 * It must be the case that {@code 0 <= ch <= 0x10ffff}. 4546 * @return null 4547 * @deprecated ICU 49 4548 * @hide original deprecated declaration 4549 */ 4550 @Deprecated getISOComment(int ch)4551 public static String getISOComment(int ch) 4552 { 4553 return null; 4554 } 4555 4556 /** 4557 * <strong>[icu]</strong> <p>Finds a Unicode code point by its most current Unicode name and 4558 * return its code point value. All Unicode names are in uppercase. 4559 * Note calling any methods related to code point names, e.g. {@code getName()} 4560 * incurs a one-time initialization cost to construct the name tables. 4561 * @param name most current Unicode character name whose code point is to 4562 * be returned 4563 * @return code point or -1 if name is not found 4564 */ getCharFromName(String name)4565 public static int getCharFromName(String name){ 4566 return UCharacterName.INSTANCE.getCharFromName( 4567 UCharacterNameChoice.UNICODE_CHAR_NAME, name); 4568 } 4569 4570 /** 4571 * <strong>[icu]</strong> Returns -1. 4572 * <p>Used to find a Unicode character by its version 1.0 Unicode name and return 4573 * its code point value. 4574 * @param name Unicode 1.0 code point name whose code point is to be 4575 * returned 4576 * @return -1 4577 * @deprecated ICU 49 4578 * @see #getName1_0(int) 4579 * @hide original deprecated declaration 4580 */ 4581 @Deprecated getCharFromName1_0(String name)4582 public static int getCharFromName1_0(String name){ 4583 return -1; 4584 } 4585 4586 /** 4587 * <strong>[icu]</strong> <p>Find a Unicode character by either its name and return its code 4588 * point value. All Unicode names are in uppercase. 4589 * Extended names are all lowercase except for numbers and are contained 4590 * within angle brackets. 4591 * The names are searched in the following order 4592 * <ul> 4593 * <li> Most current Unicode name if there is any 4594 * <li> Unicode 1.0 name if there is any 4595 * <li> Extended name in the form of 4596 * "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-FFFE> 4597 * </ul> 4598 * Note calling any methods related to code point names, e.g. {@code getName()} 4599 * incurs a one-time initialization cost to construct the name tables. 4600 * @param name codepoint name 4601 * @return code point associated with the name or -1 if the name is not 4602 * found. 4603 */ getCharFromExtendedName(String name)4604 public static int getCharFromExtendedName(String name){ 4605 return UCharacterName.INSTANCE.getCharFromName( 4606 UCharacterNameChoice.EXTENDED_CHAR_NAME, name); 4607 } 4608 4609 /** 4610 * <strong>[icu]</strong> <p>Find a Unicode character by its corrected name alias and return 4611 * its code point value. All Unicode names are in uppercase. 4612 * Note calling any methods related to code point names, e.g. {@code getName()} 4613 * incurs a one-time initialization cost to construct the name tables. 4614 * @param name Unicode name alias whose code point is to be returned 4615 * @return code point or -1 if name is not found 4616 */ getCharFromNameAlias(String name)4617 public static int getCharFromNameAlias(String name){ 4618 return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name); 4619 } 4620 4621 /** 4622 * <strong>[icu]</strong> Return the Unicode name for a given property, as given in the 4623 * Unicode database file PropertyAliases.txt. Most properties 4624 * have more than one name. The nameChoice determines which one 4625 * is returned. 4626 * 4627 * In addition, this function maps the property 4628 * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" / 4629 * "General_Category_Mask". These names are not in 4630 * PropertyAliases.txt. 4631 * 4632 * @param property UProperty selector. 4633 * 4634 * @param nameChoice UProperty.NameChoice selector for which name 4635 * to get. All properties have a long name. Most have a short 4636 * name, but some do not. Unicode allows for additional names; if 4637 * present these will be returned by UProperty.NameChoice.LONG + i, 4638 * where i=1, 2,... 4639 * 4640 * @return a name, or null if Unicode explicitly defines no name 4641 * ("n/a") for a given property/nameChoice. If a given nameChoice 4642 * throws an exception, then all larger values of nameChoice will 4643 * throw an exception. If null is returned for a given 4644 * nameChoice, then other nameChoice values may return non-null 4645 * results. 4646 * 4647 * @exception IllegalArgumentException thrown if property or 4648 * nameChoice are invalid. 4649 * 4650 * @see UProperty 4651 * @see UProperty.NameChoice 4652 */ getPropertyName(int property, int nameChoice)4653 public static String getPropertyName(int property, 4654 int nameChoice) { 4655 return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice); 4656 } 4657 4658 /** 4659 * <strong>[icu]</strong> Return the UProperty selector for a given property name, as 4660 * specified in the Unicode database file PropertyAliases.txt. 4661 * Short, long, and any other variants are recognized. 4662 * 4663 * In addition, this function maps the synthetic names "gcm" / 4664 * "General_Category_Mask" to the property 4665 * UProperty.GENERAL_CATEGORY_MASK. These names are not in 4666 * PropertyAliases.txt. 4667 * 4668 * @param propertyAlias the property name to be matched. The name 4669 * is compared using "loose matching" as described in 4670 * PropertyAliases.txt. 4671 * 4672 * @return a UProperty enum. 4673 * 4674 * @exception IllegalArgumentException thrown if propertyAlias 4675 * is not recognized. 4676 * 4677 * @see UProperty 4678 */ getPropertyEnum(CharSequence propertyAlias)4679 public static int getPropertyEnum(CharSequence propertyAlias) { 4680 int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias); 4681 if (propEnum == UProperty.UNDEFINED) { 4682 throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias); 4683 } 4684 return propEnum; 4685 } 4686 4687 /** 4688 * <strong>[icu]</strong> Return the Unicode name for a given property value, as given in 4689 * the Unicode database file PropertyValueAliases.txt. Most 4690 * values have more than one name. The nameChoice determines 4691 * which one is returned. 4692 * 4693 * Note: Some of the names in PropertyValueAliases.txt can only be 4694 * retrieved using UProperty.GENERAL_CATEGORY_MASK, not 4695 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 4696 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 4697 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 4698 * 4699 * @param property UProperty selector constant. 4700 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 4701 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 4702 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 4703 * If out of range, null is returned. 4704 * 4705 * @param value selector for a value for the given property. In 4706 * general, valid values range from 0 up to some maximum. There 4707 * are a few exceptions: (1.) UProperty.BLOCK values begin at the 4708 * non-zero value BASIC_LATIN.getID(). (2.) 4709 * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous 4710 * and range from 0..240. (3.) UProperty.GENERAL_CATEGORY_MASK values 4711 * are mask values produced by left-shifting 1 by 4712 * UCharacter.getType(). This allows grouped categories such as 4713 * [:L:] to be represented. Mask values are non-contiguous. 4714 * 4715 * @param nameChoice UProperty.NameChoice selector for which name 4716 * to get. All values have a long name. Most have a short name, 4717 * but some do not. Unicode allows for additional names; if 4718 * present these will be returned by UProperty.NameChoice.LONG + i, 4719 * where i=1, 2,... 4720 * 4721 * @return a name, or null if Unicode explicitly defines no name 4722 * ("n/a") for a given property/value/nameChoice. If a given 4723 * nameChoice throws an exception, then all larger values of 4724 * nameChoice will throw an exception. If null is returned for a 4725 * given nameChoice, then other nameChoice values may return 4726 * non-null results. 4727 * 4728 * @exception IllegalArgumentException thrown if property, value, 4729 * or nameChoice are invalid. 4730 * 4731 * @see UProperty 4732 * @see UProperty.NameChoice 4733 */ getPropertyValueName(int property, int value, int nameChoice)4734 public static String getPropertyValueName(int property, 4735 int value, 4736 int nameChoice) 4737 { 4738 if ((property == UProperty.CANONICAL_COMBINING_CLASS 4739 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS 4740 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) 4741 && value >= UCharacter.getIntPropertyMinValue( 4742 UProperty.CANONICAL_COMBINING_CLASS) 4743 && value <= UCharacter.getIntPropertyMaxValue( 4744 UProperty.CANONICAL_COMBINING_CLASS) 4745 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) { 4746 // this is hard coded for the valid cc 4747 // because PropertyValueAliases.txt does not contain all of them 4748 try { 4749 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, 4750 nameChoice); 4751 } 4752 catch (IllegalArgumentException e) { 4753 return null; 4754 } 4755 } 4756 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice); 4757 } 4758 4759 /** 4760 * <strong>[icu]</strong> Return the property value integer for a given value name, as 4761 * specified in the Unicode database file PropertyValueAliases.txt. 4762 * Short, long, and any other variants are recognized. 4763 * 4764 * Note: Some of the names in PropertyValueAliases.txt will only be 4765 * recognized with UProperty.GENERAL_CATEGORY_MASK, not 4766 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 4767 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 4768 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 4769 * 4770 * @param property UProperty selector constant. 4771 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 4772 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 4773 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 4774 * Only these properties can be enumerated. 4775 * 4776 * @param valueAlias the value name to be matched. The name is 4777 * compared using "loose matching" as described in 4778 * PropertyValueAliases.txt. 4779 * 4780 * @return a value integer. Note: UProperty.GENERAL_CATEGORY 4781 * values are mask values produced by left-shifting 1 by 4782 * UCharacter.getType(). This allows grouped categories such as 4783 * [:L:] to be represented. 4784 * 4785 * @see UProperty 4786 * @throws IllegalArgumentException if property is not a valid UProperty 4787 * selector or valueAlias is not a value of this property 4788 */ getPropertyValueEnum(int property, CharSequence valueAlias)4789 public static int getPropertyValueEnum(int property, CharSequence valueAlias) { 4790 int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias); 4791 if (propEnum == UProperty.UNDEFINED) { 4792 throw new IllegalIcuArgumentException("Invalid name: " + valueAlias); 4793 } 4794 return propEnum; 4795 } 4796 4797 /** 4798 * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED. 4799 * @param property Same as {@link #getPropertyValueEnum(int, CharSequence)} 4800 * @param valueAlias Same as {@link #getPropertyValueEnum(int, CharSequence)} 4801 * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value. 4802 * @deprecated This API is ICU internal only. 4803 * @hide original deprecated declaration 4804 * @hide draft / provisional / internal are hidden on Android 4805 */ 4806 @Deprecated getPropertyValueEnumNoThrow(int property, CharSequence valueAlias)4807 public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) { 4808 return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias); 4809 } 4810 4811 4812 /** 4813 * <strong>[icu]</strong> Returns a code point corresponding to the two surrogate code units. 4814 * 4815 * @param lead the lead unit 4816 * (In ICU 2.1-69 the type of both parameters was <code>char</code>.) 4817 * @param trail the trail unit 4818 * @return code point if lead and trail form a valid surrogate pair. 4819 * @exception IllegalArgumentException thrown when the code units do 4820 * not form a valid surrogate pair 4821 * @see #toCodePoint(int, int) 4822 */ getCodePoint(int lead, int trail)4823 public static int getCodePoint(int lead, int trail) 4824 { 4825 if (isHighSurrogate(lead) && isLowSurrogate(trail)) { 4826 return toCodePoint(lead, trail); 4827 } 4828 throw new IllegalArgumentException("Not a valid surrogate pair"); 4829 } 4830 4831 // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655 4832 /** 4833 * <strong>[icu]</strong> Returns a code point corresponding to the two surrogate code units. 4834 * 4835 * @param lead the lead char 4836 * @param trail the trail char 4837 * @return code point if surrogate characters are valid. 4838 * @exception IllegalArgumentException thrown when the code units do 4839 * not form a valid code point 4840 */ getCodePoint(char lead, char trail)4841 public static int getCodePoint(char lead, char trail) 4842 { 4843 return getCodePoint((int) lead, (int) trail); 4844 } 4845 // END Android patch: Keep the `char` version on Android. See ICU-21655 4846 4847 /** 4848 * <strong>[icu]</strong> Returns the code point corresponding to the BMP code point. 4849 * 4850 * @param char16 the BMP code point 4851 * @return code point if argument is a valid character. 4852 * @exception IllegalArgumentException thrown when char16 is not a valid 4853 * code point 4854 */ getCodePoint(char char16)4855 public static int getCodePoint(char char16) 4856 { 4857 if (UCharacter.isLegal(char16)) { 4858 return char16; 4859 } 4860 throw new IllegalArgumentException("Illegal codepoint"); 4861 } 4862 4863 /** 4864 * Returns the uppercase version of the argument string. 4865 * Casing is dependent on the default locale and context-sensitive. 4866 * @param str source string to be performed on 4867 * @return uppercase version of the argument string 4868 */ toUpperCase(String str)4869 public static String toUpperCase(String str) 4870 { 4871 return CaseMapImpl.toUpper(getDefaultCaseLocale(), 0, str); 4872 } 4873 4874 /** 4875 * Returns the lowercase version of the argument string. 4876 * Casing is dependent on the default locale and context-sensitive 4877 * @param str source string to be performed on 4878 * @return lowercase version of the argument string 4879 */ toLowerCase(String str)4880 public static String toLowerCase(String str) 4881 { 4882 return CaseMapImpl.toLower(getDefaultCaseLocale(), 0, str); 4883 } 4884 4885 /** 4886 * <p>Returns the titlecase version of the argument string. 4887 * <p>Position for titlecasing is determined by the argument break 4888 * iterator, hence the user can customize his break iterator for 4889 * a specialized titlecasing. In this case only the forward iteration 4890 * needs to be implemented. 4891 * If the break iterator passed in is null, the default Unicode algorithm 4892 * will be used to determine the titlecase positions. 4893 * 4894 * <p>Only positions returned by the break iterator will be title cased, 4895 * character in between the positions will all be in lower case. 4896 * <p>Casing is dependent on the default locale and context-sensitive 4897 * @param str source string to be performed on 4898 * @param breakiter break iterator to determine the positions in which 4899 * the character should be title cased. 4900 * @return titlecase version of the argument string 4901 */ toTitleCase(String str, BreakIterator breakiter)4902 public static String toTitleCase(String str, BreakIterator breakiter) 4903 { 4904 return toTitleCase(Locale.getDefault(), str, breakiter, 0); 4905 } 4906 getDefaultCaseLocale()4907 private static int getDefaultCaseLocale() { 4908 return UCaseProps.getCaseLocale(Locale.getDefault()); 4909 } 4910 getCaseLocale(Locale locale)4911 private static int getCaseLocale(Locale locale) { 4912 if (locale == null) { 4913 locale = Locale.getDefault(); 4914 } 4915 return UCaseProps.getCaseLocale(locale); 4916 } 4917 getCaseLocale(ULocale locale)4918 private static int getCaseLocale(ULocale locale) { 4919 if (locale == null) { 4920 locale = ULocale.getDefault(); 4921 } 4922 return UCaseProps.getCaseLocale(locale); 4923 } 4924 4925 /** 4926 * Returns the uppercase version of the argument string. 4927 * Casing is dependent on the argument locale and context-sensitive. 4928 * @param locale which string is to be converted in 4929 * @param str source string to be performed on 4930 * @return uppercase version of the argument string 4931 */ toUpperCase(Locale locale, String str)4932 public static String toUpperCase(Locale locale, String str) 4933 { 4934 return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str); 4935 } 4936 4937 /** 4938 * Returns the uppercase version of the argument string. 4939 * Casing is dependent on the argument locale and context-sensitive. 4940 * @param locale which string is to be converted in 4941 * @param str source string to be performed on 4942 * @return uppercase version of the argument string 4943 */ toUpperCase(ULocale locale, String str)4944 public static String toUpperCase(ULocale locale, String str) { 4945 return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str); 4946 } 4947 4948 /** 4949 * Returns the lowercase version of the argument string. 4950 * Casing is dependent on the argument locale and context-sensitive 4951 * @param locale which string is to be converted in 4952 * @param str source string to be performed on 4953 * @return lowercase version of the argument string 4954 */ toLowerCase(Locale locale, String str)4955 public static String toLowerCase(Locale locale, String str) 4956 { 4957 return CaseMapImpl.toLower(getCaseLocale(locale), 0, str); 4958 } 4959 4960 /** 4961 * Returns the lowercase version of the argument string. 4962 * Casing is dependent on the argument locale and context-sensitive 4963 * @param locale which string is to be converted in 4964 * @param str source string to be performed on 4965 * @return lowercase version of the argument string 4966 */ toLowerCase(ULocale locale, String str)4967 public static String toLowerCase(ULocale locale, String str) { 4968 return CaseMapImpl.toLower(getCaseLocale(locale), 0, str); 4969 } 4970 4971 /** 4972 * <p>Returns the titlecase version of the argument string. 4973 * <p>Position for titlecasing is determined by the argument break 4974 * iterator, hence the user can customize his break iterator for 4975 * a specialized titlecasing. In this case only the forward iteration 4976 * needs to be implemented. 4977 * If the break iterator passed in is null, the default Unicode algorithm 4978 * will be used to determine the titlecase positions. 4979 * 4980 * <p>Only positions returned by the break iterator will be title cased, 4981 * character in between the positions will all be in lower case. 4982 * <p>Casing is dependent on the argument locale and context-sensitive 4983 * @param locale which string is to be converted in 4984 * @param str source string to be performed on 4985 * @param breakiter break iterator to determine the positions in which 4986 * the character should be title cased. 4987 * @return titlecase version of the argument string 4988 */ toTitleCase(Locale locale, String str, BreakIterator breakiter)4989 public static String toTitleCase(Locale locale, String str, 4990 BreakIterator breakiter) 4991 { 4992 return toTitleCase(locale, str, breakiter, 0); 4993 } 4994 4995 /** 4996 * <p>Returns the titlecase version of the argument string. 4997 * <p>Position for titlecasing is determined by the argument break 4998 * iterator, hence the user can customize his break iterator for 4999 * a specialized titlecasing. In this case only the forward iteration 5000 * needs to be implemented. 5001 * If the break iterator passed in is null, the default Unicode algorithm 5002 * will be used to determine the titlecase positions. 5003 * 5004 * <p>Only positions returned by the break iterator will be title cased, 5005 * character in between the positions will all be in lower case. 5006 * <p>Casing is dependent on the argument locale and context-sensitive 5007 * @param locale which string is to be converted in 5008 * @param str source string to be performed on 5009 * @param titleIter break iterator to determine the positions in which 5010 * the character should be title cased. 5011 * @return titlecase version of the argument string 5012 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter)5013 public static String toTitleCase(ULocale locale, String str, 5014 BreakIterator titleIter) { 5015 return toTitleCase(locale, str, titleIter, 0); 5016 } 5017 5018 /** 5019 * <p>Returns the titlecase version of the argument string. 5020 * <p>Position for titlecasing is determined by the argument break 5021 * iterator, hence the user can customize his break iterator for 5022 * a specialized titlecasing. In this case only the forward iteration 5023 * needs to be implemented. 5024 * If the break iterator passed in is null, the default Unicode algorithm 5025 * will be used to determine the titlecase positions. 5026 * 5027 * <p>Only positions returned by the break iterator will be title cased, 5028 * character in between the positions will all be in lower case. 5029 * <p>Casing is dependent on the argument locale and context-sensitive 5030 * @param locale which string is to be converted in 5031 * @param str source string to be performed on 5032 * @param titleIter break iterator to determine the positions in which 5033 * the character should be title cased. 5034 * @param options bit set to modify the titlecasing operation 5035 * @return titlecase version of the argument string 5036 * @see #TITLECASE_NO_LOWERCASE 5037 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 5038 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)5039 public static String toTitleCase(ULocale locale, String str, 5040 BreakIterator titleIter, int options) { 5041 if (titleIter == null && locale == null) { 5042 locale = ULocale.getDefault(); 5043 } 5044 titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter); 5045 titleIter.setText(str); 5046 return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str); 5047 } 5048 5049 /** 5050 * <strong>[icu]</strong> <p>Returns the titlecase version of the argument string. 5051 * <p>Position for titlecasing is determined by the argument break 5052 * iterator, hence the user can customize his break iterator for 5053 * a specialized titlecasing. In this case only the forward iteration 5054 * needs to be implemented. 5055 * If the break iterator passed in is null, the default Unicode algorithm 5056 * will be used to determine the titlecase positions. 5057 * 5058 * <p>Only positions returned by the break iterator will be title cased, 5059 * character in between the positions will all be in lower case. 5060 * <p>Casing is dependent on the argument locale and context-sensitive 5061 * @param locale which string is to be converted in 5062 * @param str source string to be performed on 5063 * @param titleIter break iterator to determine the positions in which 5064 * the character should be title cased. 5065 * @param options bit set to modify the titlecasing operation 5066 * @return titlecase version of the argument string 5067 * @see #TITLECASE_NO_LOWERCASE 5068 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 5069 */ toTitleCase(Locale locale, String str, BreakIterator titleIter, int options)5070 public static String toTitleCase(Locale locale, String str, 5071 BreakIterator titleIter, 5072 int options) { 5073 if (titleIter == null && locale == null) { 5074 locale = Locale.getDefault(); 5075 } 5076 titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter); 5077 titleIter.setText(str); 5078 return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str); 5079 } 5080 5081 /** 5082 * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according 5083 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 5084 * folding equivalent, the character itself is returned. 5085 * 5086 * <p>This function only returns the simple, single-code point case mapping. 5087 * Full case mappings should be used whenever possible because they produce 5088 * better results by working on whole strings. 5089 * They can map to a result string with a different length as appropriate. 5090 * Full case mappings are applied by the case mapping functions 5091 * that take String parameters rather than code points (int). 5092 * See also the User Guide chapter on C/POSIX migration: 5093 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 5094 * 5095 * @param ch the character to be converted 5096 * @param defaultmapping Indicates whether the default mappings defined in 5097 * CaseFolding.txt are to be used, otherwise the 5098 * mappings for dotted I and dotless i marked with 5099 * 'T' in CaseFolding.txt are included. 5100 * @return the case folding equivalent of the character, if 5101 * any; otherwise the character itself. 5102 * @see #foldCase(String, boolean) 5103 */ foldCase(int ch, boolean defaultmapping)5104 public static int foldCase(int ch, boolean defaultmapping) { 5105 return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 5106 } 5107 5108 /** 5109 * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to 5110 * UnicodeData.txt and CaseFolding.txt; if any character has no case 5111 * folding equivalent, the character itself is returned. 5112 * "Full", multiple-code point case folding mappings are returned here. 5113 * For "simple" single-code point mappings use the API 5114 * foldCase(int ch, boolean defaultmapping). 5115 * @param str the String to be converted 5116 * @param defaultmapping Indicates whether the default mappings defined in 5117 * CaseFolding.txt are to be used, otherwise the 5118 * mappings for dotted I and dotless i marked with 5119 * 'T' in CaseFolding.txt are included. 5120 * @return the case folding equivalent of the character, if 5121 * any; otherwise the character itself. 5122 * @see #foldCase(int, boolean) 5123 */ foldCase(String str, boolean defaultmapping)5124 public static String foldCase(String str, boolean defaultmapping) { 5125 return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 5126 } 5127 5128 /** 5129 * <strong>[icu]</strong> Option value for case folding: use default mappings defined in 5130 * CaseFolding.txt. 5131 */ 5132 public static final int FOLD_CASE_DEFAULT = 0x0000; 5133 /** 5134 * <strong>[icu]</strong> Option value for case folding: 5135 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I 5136 * and dotless i appropriately for Turkic languages (tr, az). 5137 * 5138 * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that 5139 * are to be included for default mappings and 5140 * excluded for the Turkic-specific mappings. 5141 * 5142 * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that 5143 * are to be excluded for default mappings and 5144 * included for the Turkic-specific mappings. 5145 */ 5146 public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001; 5147 5148 /** 5149 * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according 5150 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 5151 * folding equivalent, the character itself is returned. 5152 * 5153 * <p>This function only returns the simple, single-code point case mapping. 5154 * Full case mappings should be used whenever possible because they produce 5155 * better results by working on whole strings. 5156 * They can map to a result string with a different length as appropriate. 5157 * Full case mappings are applied by the case mapping functions 5158 * that take String parameters rather than code points (int). 5159 * See also the User Guide chapter on C/POSIX migration: 5160 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 5161 * 5162 * @param ch the character to be converted 5163 * @param options A bit set for special processing. Currently the recognised options 5164 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 5165 * @return the case folding equivalent of the character, if any; otherwise the 5166 * character itself. 5167 * @see #foldCase(String, boolean) 5168 */ foldCase(int ch, int options)5169 public static int foldCase(int ch, int options) { 5170 return UCaseProps.INSTANCE.fold(ch, options); 5171 } 5172 5173 /** 5174 * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to 5175 * UnicodeData.txt and CaseFolding.txt; if any character has no case 5176 * folding equivalent, the character itself is returned. 5177 * "Full", multiple-code point case folding mappings are returned here. 5178 * For "simple" single-code point mappings use the API 5179 * foldCase(int ch, boolean defaultmapping). 5180 * @param str the String to be converted 5181 * @param options A bit set for special processing. Currently the recognised options 5182 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 5183 * @return the case folding equivalent of the character, if any; otherwise the 5184 * character itself. 5185 * @see #foldCase(int, boolean) 5186 */ foldCase(String str, int options)5187 public static final String foldCase(String str, int options) { 5188 return CaseMapImpl.fold(options, str); 5189 } 5190 5191 /** 5192 * <strong>[icu]</strong> Returns the numeric value of a Han character. 5193 * 5194 * <p>This returns the value of Han 'numeric' code points, 5195 * including those for zero, ten, hundred, thousand, ten thousand, 5196 * and hundred million. 5197 * This includes both the standard and 'checkwriting' 5198 * characters, the 'big circle' zero character, and the standard 5199 * zero character. 5200 * 5201 * <p>Note: The Unicode Standard has numeric values for more 5202 * Han characters recognized by this method 5203 * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt), 5204 * and a {@link android.icu.text.NumberFormat} can be used with 5205 * a Chinese {@link android.icu.text.NumberingSystem}. 5206 * 5207 * @param ch code point to query 5208 * @return value if it is a Han 'numeric character,' otherwise return -1. 5209 */ getHanNumericValue(int ch)5210 public static int getHanNumericValue(int ch) 5211 { 5212 switch(ch) 5213 { 5214 case IDEOGRAPHIC_NUMBER_ZERO_ : 5215 case CJK_IDEOGRAPH_COMPLEX_ZERO_ : 5216 return 0; // Han Zero 5217 case CJK_IDEOGRAPH_FIRST_ : 5218 case CJK_IDEOGRAPH_COMPLEX_ONE_ : 5219 return 1; // Han One 5220 case CJK_IDEOGRAPH_SECOND_ : 5221 case CJK_IDEOGRAPH_COMPLEX_TWO_ : 5222 return 2; // Han Two 5223 case CJK_IDEOGRAPH_THIRD_ : 5224 case CJK_IDEOGRAPH_COMPLEX_THREE_ : 5225 return 3; // Han Three 5226 case CJK_IDEOGRAPH_FOURTH_ : 5227 case CJK_IDEOGRAPH_COMPLEX_FOUR_ : 5228 return 4; // Han Four 5229 case CJK_IDEOGRAPH_FIFTH_ : 5230 case CJK_IDEOGRAPH_COMPLEX_FIVE_ : 5231 return 5; // Han Five 5232 case CJK_IDEOGRAPH_SIXTH_ : 5233 case CJK_IDEOGRAPH_COMPLEX_SIX_ : 5234 return 6; // Han Six 5235 case CJK_IDEOGRAPH_SEVENTH_ : 5236 case CJK_IDEOGRAPH_COMPLEX_SEVEN_ : 5237 return 7; // Han Seven 5238 case CJK_IDEOGRAPH_EIGHTH_ : 5239 case CJK_IDEOGRAPH_COMPLEX_EIGHT_ : 5240 return 8; // Han Eight 5241 case CJK_IDEOGRAPH_NINETH_ : 5242 case CJK_IDEOGRAPH_COMPLEX_NINE_ : 5243 return 9; // Han Nine 5244 case CJK_IDEOGRAPH_TEN_ : 5245 case CJK_IDEOGRAPH_COMPLEX_TEN_ : 5246 return 10; 5247 case CJK_IDEOGRAPH_HUNDRED_ : 5248 case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ : 5249 return 100; 5250 case CJK_IDEOGRAPH_THOUSAND_ : 5251 case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ : 5252 return 1000; 5253 case CJK_IDEOGRAPH_TEN_THOUSAND_ : 5254 return 10000; 5255 case CJK_IDEOGRAPH_HUNDRED_MILLION_ : 5256 return 100000000; 5257 } 5258 return -1; // no value 5259 } 5260 5261 /** 5262 * <strong>[icu]</strong> <p>Returns an iterator for character types, iterating over codepoints. 5263 * <p>Example of use:<br> 5264 * <pre> 5265 * RangeValueIterator iterator = UCharacter.getTypeIterator(); 5266 * RangeValueIterator.Element element = new RangeValueIterator.Element(); 5267 * while (iterator.next(element)) { 5268 * System.out.println("Codepoint \\u" + 5269 * Integer.toHexString(element.start) + 5270 * " to codepoint \\u" + 5271 * Integer.toHexString(element.limit - 1) + 5272 * " has the character type " + 5273 * element.value); 5274 * } 5275 * </pre> 5276 * @return an iterator 5277 */ getTypeIterator()5278 public static RangeValueIterator getTypeIterator() 5279 { 5280 return new UCharacterTypeIterator(); 5281 } 5282 5283 private static final class UCharacterTypeIterator implements RangeValueIterator { UCharacterTypeIterator()5284 UCharacterTypeIterator() { 5285 reset(); 5286 } 5287 5288 // implements RangeValueIterator 5289 @Override next(Element element)5290 public boolean next(Element element) { 5291 if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 5292 element.start=range.startCodePoint; 5293 element.limit=range.endCodePoint+1; 5294 element.value=range.value; 5295 return true; 5296 } else { 5297 return false; 5298 } 5299 } 5300 5301 // implements RangeValueIterator 5302 @Override reset()5303 public void reset() { 5304 trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE); 5305 } 5306 5307 private Iterator<Trie2.Range> trieIterator; 5308 private Trie2.Range range; 5309 5310 private static final class MaskType implements Trie2.ValueMapper { 5311 // Extracts the general category ("character type") from the trie value. 5312 @Override map(int value)5313 public int map(int value) { 5314 return value & UCharacterProperty.TYPE_MASK; 5315 } 5316 } 5317 private static final MaskType MASK_TYPE=new MaskType(); 5318 } 5319 5320 /** 5321 * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints. 5322 * <p>This API only gets the iterator for the modern, most up-to-date 5323 * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or 5324 * for extended names use getExtendedNameIterator(). 5325 * <p>Example of use:<br> 5326 * <pre> 5327 * ValueIterator iterator = UCharacter.getNameIterator(); 5328 * ValueIterator.Element element = new ValueIterator.Element(); 5329 * while (iterator.next(element)) { 5330 * System.out.println("Codepoint \\u" + 5331 * Integer.toHexString(element.codepoint) + 5332 * " has the name " + (String)element.value); 5333 * } 5334 * </pre> 5335 * <p>The maximal range which the name iterator iterates is from 5336 * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE. 5337 * @return an iterator 5338 */ getNameIterator()5339 public static ValueIterator getNameIterator(){ 5340 return new UCharacterNameIterator(UCharacterName.INSTANCE, 5341 UCharacterNameChoice.UNICODE_CHAR_NAME); 5342 } 5343 5344 /** 5345 * <strong>[icu]</strong> Returns an empty iterator. 5346 * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints. 5347 * @return an empty iterator 5348 * @deprecated ICU 49 5349 * @see #getName1_0(int) 5350 * @hide original deprecated declaration 5351 */ 5352 @Deprecated getName1_0Iterator()5353 public static ValueIterator getName1_0Iterator(){ 5354 return new DummyValueIterator(); 5355 } 5356 5357 private static final class DummyValueIterator implements ValueIterator { 5358 @Override next(Element element)5359 public boolean next(Element element) { return false; } 5360 @Override reset()5361 public void reset() {} 5362 @Override setRange(int start, int limit)5363 public void setRange(int start, int limit) {} 5364 } 5365 5366 /** 5367 * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints. 5368 * <p>This API only gets the iterator for the extended names. 5369 * For modern, most up-to-date Unicode names use getNameIterator() or 5370 * for older 1.0 Unicode names use get1_0NameIterator(). 5371 * <p>Example of use:<br> 5372 * <pre> 5373 * ValueIterator iterator = UCharacter.getExtendedNameIterator(); 5374 * ValueIterator.Element element = new ValueIterator.Element(); 5375 * while (iterator.next(element)) { 5376 * System.out.println("Codepoint \\u" + 5377 * Integer.toHexString(element.codepoint) + 5378 * " has the name " + (String)element.value); 5379 * } 5380 * </pre> 5381 * <p>The maximal range which the name iterator iterates is from 5382 * @return an iterator 5383 */ getExtendedNameIterator()5384 public static ValueIterator getExtendedNameIterator(){ 5385 return new UCharacterNameIterator(UCharacterName.INSTANCE, 5386 UCharacterNameChoice.EXTENDED_CHAR_NAME); 5387 } 5388 5389 /** 5390 * <strong>[icu]</strong> Returns the "age" of the code point. 5391 * <p>The "age" is the Unicode version when the code point was first 5392 * designated (as a non-character or for Private Use) or assigned a 5393 * character. 5394 * <p>This can be useful to avoid emitting code points to receiving 5395 * processes that do not accept newer characters. 5396 * <p>The data is from the UCD file DerivedAge.txt. 5397 * @param ch The code point. 5398 * @return the Unicode version number 5399 */ getAge(int ch)5400 public static VersionInfo getAge(int ch) 5401 { 5402 if (ch < MIN_VALUE || ch > MAX_VALUE) { 5403 throw new IllegalArgumentException("Codepoint out of bounds"); 5404 } 5405 return UCharacterProperty.INSTANCE.getAge(ch); 5406 } 5407 5408 /** 5409 * <strong>[icu]</strong> Check a binary Unicode property for a code point. 5410 * <p>Unicode, especially in version 3.2, defines many more properties 5411 * than the original set in UnicodeData.txt. 5412 * <p>This API is intended to reflect Unicode properties as defined in 5413 * the Unicode Character Database (UCD) and Unicode Technical Reports 5414 * (UTR). 5415 * <p>For details about the properties see 5416 * <a href=http://www.unicode.org/>http://www.unicode.org/</a>. 5417 * <p>For names of Unicode properties see the UCD file 5418 * PropertyAliases.txt. 5419 * <p>This API does not check the validity of the codepoint. 5420 * <p>Important: If ICU is built with UCD files from Unicode versions 5421 * below 3.2, then properties marked with "new" are not or 5422 * not fully available. 5423 * @param ch code point to test. 5424 * @param property selector constant from android.icu.lang.UProperty, 5425 * identifies which binary property to check. 5426 * @return true or false according to the binary Unicode property value 5427 * for ch. Also false if property is out of bounds or if the 5428 * Unicode version does not have data for the property at all, or 5429 * not for this code point. 5430 * @see android.icu.lang.UProperty 5431 */ hasBinaryProperty(int ch, int property)5432 public static boolean hasBinaryProperty(int ch, int property) 5433 { 5434 return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property); 5435 } 5436 5437 /** 5438 * <strong>[icu]</strong> Returns true if the property is true for the string. 5439 * Same as {@link #hasBinaryProperty(int, int)} 5440 * if the string contains exactly one code point. 5441 * 5442 * <p>Most properties apply only to single code points. 5443 * <a href="https://www.unicode.org/reports/tr51/#Emoji_Sets">UTS #51 Unicode Emoji</a> 5444 * defines several properties of strings. 5445 * 5446 * @param s String to test. 5447 * @param property UProperty selector constant, identifies which binary property to check. 5448 * Must be BINARY_START<=which<BINARY_LIMIT. 5449 * @return true or false according to the binary Unicode property value for the string. 5450 * Also false if <code>property</code> is out of bounds or if the Unicode version 5451 * does not have data for the property at all. 5452 * 5453 * @see android.icu.lang.UProperty 5454 */ hasBinaryProperty(CharSequence s, int property)5455 public static boolean hasBinaryProperty(CharSequence s, int property) { 5456 int length = s.length(); 5457 if (length == 1) { 5458 return hasBinaryProperty(s.charAt(0), property); // single code point 5459 } else if (length == 2) { 5460 // first code point 5461 int c = Character.codePointAt(s, 0); 5462 if (Character.charCount(c) == length) { 5463 return hasBinaryProperty(c, property); // single code point 5464 } 5465 } 5466 // Only call into EmojiProps for a relevant property, 5467 // so that we not unnecessarily try to load its data file. 5468 return UProperty.BASIC_EMOJI <= property && property <= UProperty.RGI_EMOJI && 5469 EmojiProps.INSTANCE.hasBinaryProperty(s, property); 5470 } 5471 5472 /** 5473 * <strong>[icu]</strong> <p>Check if a code point has the Alphabetic Unicode property. 5474 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC). 5475 * <p>Different from UCharacter.isLetter(ch)! 5476 * @param ch codepoint to be tested 5477 */ isUAlphabetic(int ch)5478 public static boolean isUAlphabetic(int ch) 5479 { 5480 return hasBinaryProperty(ch, UProperty.ALPHABETIC); 5481 } 5482 5483 /** 5484 * <strong>[icu]</strong> <p>Check if a code point has the Lowercase Unicode property. 5485 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE). 5486 * <p>This is different from UCharacter.isLowerCase(ch)! 5487 * @param ch codepoint to be tested 5488 */ isULowercase(int ch)5489 public static boolean isULowercase(int ch) 5490 { 5491 return hasBinaryProperty(ch, UProperty.LOWERCASE); 5492 } 5493 5494 /** 5495 * <strong>[icu]</strong> <p>Check if a code point has the Uppercase Unicode property. 5496 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE). 5497 * <p>This is different from UCharacter.isUpperCase(ch)! 5498 * @param ch codepoint to be tested 5499 */ isUUppercase(int ch)5500 public static boolean isUUppercase(int ch) 5501 { 5502 return hasBinaryProperty(ch, UProperty.UPPERCASE); 5503 } 5504 5505 /** 5506 * <strong>[icu]</strong> <p>Check if a code point has the White_Space Unicode property. 5507 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE). 5508 * <p>This is different from both UCharacter.isSpace(ch) and 5509 * UCharacter.isWhitespace(ch)! 5510 * @param ch codepoint to be tested 5511 */ isUWhiteSpace(int ch)5512 public static boolean isUWhiteSpace(int ch) 5513 { 5514 return hasBinaryProperty(ch, UProperty.WHITE_SPACE); 5515 } 5516 5517 /** 5518 * <strong>[icu]</strong> Returns the property value for a Unicode property type of a code point. 5519 * Also returns binary and mask property values. 5520 * <p>Unicode, especially in version 3.2, defines many more properties than 5521 * the original set in UnicodeData.txt. 5522 * <p>The properties APIs are intended to reflect Unicode properties as 5523 * defined in the Unicode Character Database (UCD) and Unicode Technical 5524 * Reports (UTR). For details about the properties see 5525 * http://www.unicode.org/. 5526 * <p>For names of Unicode properties see the UCD file PropertyAliases.txt. 5527 * 5528 * <pre> 5529 * Sample usage: 5530 * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH); 5531 * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC); 5532 * boolean b = (ideo == 1) ? true : false; 5533 * </pre> 5534 * @param ch code point to test. 5535 * @param type UProperty selector constant, identifies which binary 5536 * property to check. Must be 5537 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5538 * UProperty.INT_START <= type < UProperty.INT_LIMIT or 5539 * UProperty.MASK_START <= type < UProperty.MASK_LIMIT. 5540 * @return numeric value that is directly the property value or, 5541 * for enumerated properties, corresponds to the numeric value of 5542 * the enumerated constant of the respective property value type 5543 * ({@link ECharacterCategory}, {@link ECharacterDirection}, 5544 * {@link DecompositionType}, etc.). 5545 * Returns 0 or 1 (for false / true) for binary Unicode properties. 5546 * Returns a bit-mask for mask properties. 5547 * Returns 0 if 'type' is out of bounds or if the Unicode version 5548 * does not have data for the property at all, or not for this code 5549 * point. 5550 * @see UProperty 5551 * @see #hasBinaryProperty 5552 * @see #getIntPropertyMinValue 5553 * @see #getIntPropertyMaxValue 5554 * @see #getUnicodeVersion 5555 */ getIntPropertyValue(int ch, int type)5556 public static int getIntPropertyValue(int ch, int type) 5557 { 5558 return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type); 5559 } 5560 /** 5561 * <strong>[icu]</strong> Returns a string version of the property value. 5562 * @param propertyEnum The property enum value. 5563 * @param codepoint The codepoint value. 5564 * @param nameChoice The choice of the name. 5565 * @return value as string 5566 * @deprecated This API is ICU internal only. 5567 * @hide original deprecated declaration 5568 * @hide draft / provisional / internal are hidden on Android 5569 */ 5570 @Deprecated 5571 ///CLOVER:OFF getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice)5572 public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) { 5573 if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) || 5574 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) { 5575 return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum), 5576 nameChoice); 5577 } 5578 if (propertyEnum == UProperty.NUMERIC_VALUE) { 5579 return String.valueOf(getUnicodeNumericValue(codepoint)); 5580 } 5581 // otherwise must be string property 5582 switch (propertyEnum) { 5583 case UProperty.AGE: return getAge(codepoint).toString(); 5584 case UProperty.ISO_COMMENT: return getISOComment(codepoint); 5585 case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint)); 5586 case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true)); 5587 case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 5588 case UProperty.NAME: return getName(codepoint); 5589 case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true)); 5590 case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 5591 case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 5592 case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 5593 case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 5594 case UProperty.UNICODE_1_NAME: return getName1_0(codepoint); 5595 case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 5596 } 5597 throw new IllegalArgumentException("Illegal Property Enum"); 5598 } 5599 ///CLOVER:ON 5600 5601 /** 5602 * <strong>[icu]</strong> Returns the minimum value for an integer/binary Unicode property type. 5603 * Can be used together with UCharacter.getIntPropertyMaxValue(int) 5604 * to allocate arrays of android.icu.text.UnicodeSet or similar. 5605 * @param type UProperty selector constant, identifies which binary 5606 * property to check. Must be 5607 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5608 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 5609 * @return Minimum value returned by UCharacter.getIntPropertyValue(int) 5610 * for a Unicode property. 0 if the property 5611 * selector 'type' is out of range. 5612 * @see UProperty 5613 * @see #hasBinaryProperty 5614 * @see #getUnicodeVersion 5615 * @see #getIntPropertyMaxValue 5616 * @see #getIntPropertyValue 5617 */ getIntPropertyMinValue(int type)5618 public static int getIntPropertyMinValue(int type){ 5619 5620 return 0; // undefined; and: all other properties have a minimum value of 0 5621 } 5622 5623 5624 /** 5625 * <strong>[icu]</strong> Returns the maximum value for an integer/binary Unicode property. 5626 * Can be used together with UCharacter.getIntPropertyMinValue(int) 5627 * to allocate arrays of android.icu.text.UnicodeSet or similar. 5628 * Examples for min/max values (for Unicode 3.2): 5629 * <ul> 5630 * <li> UProperty.BIDI_CLASS: 0/18 5631 * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL) 5632 * <li> UProperty.SCRIPT: 0/45 (UScript.COMMON/UScript.TAGBANWA) 5633 * <li> UProperty.IDEOGRAPHIC: 0/1 (false/true) 5634 * </ul> 5635 * For undefined UProperty constant values, min/max values will be 0/-1. 5636 * @param type UProperty selector constant, identifies which binary 5637 * property to check. Must be 5638 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5639 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 5640 * @return Maximum value returned by u_getIntPropertyValue for a Unicode 5641 * property. <= 0 if the property selector 'type' is out of range. 5642 * @see UProperty 5643 * @see #hasBinaryProperty 5644 * @see #getUnicodeVersion 5645 * @see #getIntPropertyMaxValue 5646 * @see #getIntPropertyValue 5647 */ getIntPropertyMaxValue(int type)5648 public static int getIntPropertyMaxValue(int type) 5649 { 5650 return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type); 5651 } 5652 5653 /** 5654 * Provide the java.lang.Character forDigit API, for convenience. 5655 */ forDigit(int digit, int radix)5656 public static char forDigit(int digit, int radix) { 5657 return java.lang.Character.forDigit(digit, radix); 5658 } 5659 5660 // JDK 1.5 API coverage 5661 5662 /** 5663 * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}. 5664 */ 5665 public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE; 5666 5667 /** 5668 * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}. 5669 */ 5670 public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE; 5671 5672 /** 5673 * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}. 5674 */ 5675 public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE; 5676 5677 /** 5678 * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}. 5679 */ 5680 public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE; 5681 5682 /** 5683 * Constant U+D800, same as {@link Character#MIN_SURROGATE}. 5684 */ 5685 public static final char MIN_SURROGATE = Character.MIN_SURROGATE; 5686 5687 /** 5688 * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}. 5689 */ 5690 public static final char MAX_SURROGATE = Character.MAX_SURROGATE; 5691 5692 /** 5693 * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 5694 */ 5695 public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT; 5696 5697 /** 5698 * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}. 5699 */ 5700 public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT; 5701 5702 /** 5703 * Constant U+0000, same as {@link Character#MIN_CODE_POINT}. 5704 */ 5705 public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT; 5706 5707 /** 5708 * Equivalent to {@link Character#isValidCodePoint}. 5709 * 5710 * @param cp the code point to check 5711 * @return true if cp is a valid code point 5712 */ isValidCodePoint(int cp)5713 public static final boolean isValidCodePoint(int cp) { 5714 return cp >= 0 && cp <= MAX_CODE_POINT; 5715 } 5716 5717 /** 5718 * Same as {@link Character#isSupplementaryCodePoint}. 5719 * 5720 * @param cp the code point to check 5721 * @return true if cp is a supplementary code point 5722 */ isSupplementaryCodePoint(int cp)5723 public static final boolean isSupplementaryCodePoint(int cp) { 5724 return Character.isSupplementaryCodePoint(cp); 5725 } 5726 5727 /** 5728 * Same as {@link Character#isHighSurrogate}, 5729 * except that the ICU version accepts <code>int</code> for code points. 5730 * 5731 * @param codePoint the code point to check 5732 * (In ICU 3.0-69 the type of this parameter was <code>char</code>.) 5733 * @return true if codePoint is a high (lead) surrogate 5734 */ isHighSurrogate(int codePoint)5735 public static boolean isHighSurrogate(int codePoint) { 5736 return (codePoint & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS; 5737 } 5738 5739 // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655 5740 /** 5741 * Same as {@link Character#isHighSurrogate}, 5742 * 5743 * @param ch the char to check 5744 * @return true if ch is a high (lead) surrogate 5745 */ isHighSurrogate(char ch)5746 public static boolean isHighSurrogate(char ch) { 5747 return isHighSurrogate((int) ch); 5748 } 5749 // END Android patch: Keep the `char` version on Android. See ICU-21655 5750 5751 /** 5752 * Same as {@link Character#isLowSurrogate}, 5753 * except that the ICU version accepts <code>int</code> for code points. 5754 * 5755 * @param codePoint the code point to check 5756 * (In ICU 3.0-69 the type of this parameter was <code>char</code>.) 5757 * @return true if codePoint is a low (trail) surrogate 5758 */ isLowSurrogate(int codePoint)5759 public static boolean isLowSurrogate(int codePoint) { 5760 return (codePoint & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS; 5761 } 5762 5763 // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655 5764 /** 5765 * Same as {@link Character#isLowSurrogate}, 5766 * 5767 * @param ch the char to check 5768 * @return true if ch is a low (trail) surrogate 5769 */ isLowSurrogate(char ch)5770 public static boolean isLowSurrogate(char ch) { 5771 return isLowSurrogate((int) ch); 5772 } 5773 // END Android patch: Keep the `char` version on Android. See ICU-21655 5774 5775 /** 5776 * Same as {@link Character#isSurrogatePair}, 5777 * except that the ICU version accepts <code>int</code> for code points. 5778 * 5779 * @param high the high (lead) unit 5780 * (In ICU 3.0-69 the type of both parameters was <code>char</code>.) 5781 * @param low the low (trail) unit 5782 * @return true if high, low form a surrogate pair 5783 */ isSurrogatePair(int high, int low)5784 public static final boolean isSurrogatePair(int high, int low) { 5785 return isHighSurrogate(high) && isLowSurrogate(low); 5786 } 5787 5788 // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655 5789 /** 5790 * Same as {@link Character#isSurrogatePair}. 5791 * 5792 * @param high the high (lead) char 5793 * @param low the low (trail) char 5794 * @return true if high, low form a surrogate pair 5795 */ isSurrogatePair(char high, char low)5796 public static final boolean isSurrogatePair(char high, char low) { 5797 return isSurrogatePair((int) high, (int) low); 5798 } 5799 // END Android patch: Keep the `char` version on Android. See ICU-21655 5800 5801 /** 5802 * Same as {@link Character#charCount}. 5803 * Returns the number of chars needed to represent the code point (1 or 2). 5804 * This does not check the code point for validity. 5805 * 5806 * @param cp the code point to check 5807 * @return the number of chars needed to represent the code point 5808 */ charCount(int cp)5809 public static int charCount(int cp) { 5810 return Character.charCount(cp); 5811 } 5812 5813 /** 5814 * Same as {@link Character#toCodePoint}, 5815 * except that the ICU version accepts <code>int</code> for code points. 5816 * Returns the code point represented by the two surrogate code units. 5817 * This does not check the surrogate pair for validity. 5818 * 5819 * @param high the high (lead) surrogate 5820 * (In ICU 3.0-69 the type of both parameters was <code>char</code>.) 5821 * @param low the low (trail) surrogate 5822 * @return the code point formed by the surrogate pair 5823 * @see #getCodePoint(int, int) 5824 */ toCodePoint(int high, int low)5825 public static final int toCodePoint(int high, int low) { 5826 // see ICU4C U16_GET_SUPPLEMENTARY() 5827 return (high << 10) + low - U16_SURROGATE_OFFSET; 5828 } 5829 5830 // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655 5831 /** 5832 * Same as {@link Character#toCodePoint}. 5833 * Returns the code point represented by the two surrogate code units. 5834 * This does not check the surrogate pair for validity. 5835 * 5836 * @param high the high (lead) surrogate 5837 * @param low the low (trail) surrogate 5838 * @return the code point formed by the surrogate pair 5839 */ toCodePoint(char high, char low)5840 public static final int toCodePoint(char high, char low) { 5841 return toCodePoint((int) high, (int) low); 5842 } 5843 // END Android patch: Keep the `char` version on Android. See ICU-21655 5844 5845 /** 5846 * Same as {@link Character#codePointAt(CharSequence, int)}. 5847 * Returns the code point at index. 5848 * This examines only the characters at index and index+1. 5849 * 5850 * @param seq the characters to check 5851 * @param index the index of the first or only char forming the code point 5852 * @return the code point at the index 5853 */ codePointAt(CharSequence seq, int index)5854 public static final int codePointAt(CharSequence seq, int index) { 5855 char c1 = seq.charAt(index++); 5856 if (isHighSurrogate(c1)) { 5857 if (index < seq.length()) { 5858 char c2 = seq.charAt(index); 5859 if (isLowSurrogate(c2)) { 5860 return toCodePoint(c1, c2); 5861 } 5862 } 5863 } 5864 return c1; 5865 } 5866 5867 /** 5868 * Same as {@link Character#codePointAt(char[], int)}. 5869 * Returns the code point at index. 5870 * This examines only the characters at index and index+1. 5871 * 5872 * @param text the characters to check 5873 * @param index the index of the first or only char forming the code point 5874 * @return the code point at the index 5875 */ codePointAt(char[] text, int index)5876 public static final int codePointAt(char[] text, int index) { 5877 char c1 = text[index++]; 5878 if (isHighSurrogate(c1)) { 5879 if (index < text.length) { 5880 char c2 = text[index]; 5881 if (isLowSurrogate(c2)) { 5882 return toCodePoint(c1, c2); 5883 } 5884 } 5885 } 5886 return c1; 5887 } 5888 5889 /** 5890 * Same as {@link Character#codePointAt(char[], int, int)}. 5891 * Returns the code point at index. 5892 * This examines only the characters at index and index+1. 5893 * 5894 * @param text the characters to check 5895 * @param index the index of the first or only char forming the code point 5896 * @param limit the limit of the valid text 5897 * @return the code point at the index 5898 */ codePointAt(char[] text, int index, int limit)5899 public static final int codePointAt(char[] text, int index, int limit) { 5900 if (index >= limit || limit > text.length) { 5901 throw new IndexOutOfBoundsException(); 5902 } 5903 char c1 = text[index++]; 5904 if (isHighSurrogate(c1)) { 5905 if (index < limit) { 5906 char c2 = text[index]; 5907 if (isLowSurrogate(c2)) { 5908 return toCodePoint(c1, c2); 5909 } 5910 } 5911 } 5912 return c1; 5913 } 5914 5915 /** 5916 * Same as {@link Character#codePointBefore(CharSequence, int)}. 5917 * Return the code point before index. 5918 * This examines only the characters at index-1 and index-2. 5919 * 5920 * @param seq the characters to check 5921 * @param index the index after the last or only char forming the code point 5922 * @return the code point before the index 5923 */ codePointBefore(CharSequence seq, int index)5924 public static final int codePointBefore(CharSequence seq, int index) { 5925 char c2 = seq.charAt(--index); 5926 if (isLowSurrogate(c2)) { 5927 if (index > 0) { 5928 char c1 = seq.charAt(--index); 5929 if (isHighSurrogate(c1)) { 5930 return toCodePoint(c1, c2); 5931 } 5932 } 5933 } 5934 return c2; 5935 } 5936 5937 /** 5938 * Same as {@link Character#codePointBefore(char[], int)}. 5939 * Returns the code point before index. 5940 * This examines only the characters at index-1 and index-2. 5941 * 5942 * @param text the characters to check 5943 * @param index the index after the last or only char forming the code point 5944 * @return the code point before the index 5945 */ codePointBefore(char[] text, int index)5946 public static final int codePointBefore(char[] text, int index) { 5947 char c2 = text[--index]; 5948 if (isLowSurrogate(c2)) { 5949 if (index > 0) { 5950 char c1 = text[--index]; 5951 if (isHighSurrogate(c1)) { 5952 return toCodePoint(c1, c2); 5953 } 5954 } 5955 } 5956 return c2; 5957 } 5958 5959 /** 5960 * Same as {@link Character#codePointBefore(char[], int, int)}. 5961 * Return the code point before index. 5962 * This examines only the characters at index-1 and index-2. 5963 * 5964 * @param text the characters to check 5965 * @param index the index after the last or only char forming the code point 5966 * @param limit the start of the valid text 5967 * @return the code point before the index 5968 */ codePointBefore(char[] text, int index, int limit)5969 public static final int codePointBefore(char[] text, int index, int limit) { 5970 if (index <= limit || limit < 0) { 5971 throw new IndexOutOfBoundsException(); 5972 } 5973 char c2 = text[--index]; 5974 if (isLowSurrogate(c2)) { 5975 if (index > limit) { 5976 char c1 = text[--index]; 5977 if (isHighSurrogate(c1)) { 5978 return toCodePoint(c1, c2); 5979 } 5980 } 5981 } 5982 return c2; 5983 } 5984 5985 /** 5986 * Same as {@link Character#toChars(int, char[], int)}. 5987 * Writes the chars representing the 5988 * code point into the destination at the given index. 5989 * 5990 * @param cp the code point to convert 5991 * @param dst the destination array into which to put the char(s) representing the code point 5992 * @param dstIndex the index at which to put the first (or only) char 5993 * @return the count of the number of chars written (1 or 2) 5994 * @throws IllegalArgumentException if cp is not a valid code point 5995 */ toChars(int cp, char[] dst, int dstIndex)5996 public static final int toChars(int cp, char[] dst, int dstIndex) { 5997 return Character.toChars(cp, dst, dstIndex); 5998 } 5999 6000 /** 6001 * Same as {@link Character#toChars(int)}. 6002 * Returns a char array representing the code point. 6003 * 6004 * @param cp the code point to convert 6005 * @return an array containing the char(s) representing the code point 6006 * @throws IllegalArgumentException if cp is not a valid code point 6007 */ toChars(int cp)6008 public static final char[] toChars(int cp) { 6009 return Character.toChars(cp); 6010 } 6011 6012 /** 6013 * Equivalent to the {@link Character#getDirectionality(char)} method, for 6014 * convenience. Returns a byte representing the directionality of the 6015 * character. 6016 * 6017 * <strong>[icu] Note:</strong> Unlike {@link Character#getDirectionality(char)}, this returns 6018 * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters. 6019 * 6020 * <strong>[icu] Note:</strong> The return value must be tested using the constants defined in {@link 6021 * UCharacterDirection} and its interface {@link 6022 * UCharacterEnums.ECharacterDirection} since the values are different from the ones 6023 * defined by <code>java.lang.Character</code>. 6024 * @param cp the code point to check 6025 * @return the directionality of the code point 6026 * @see #getDirection 6027 */ getDirectionality(int cp)6028 public static byte getDirectionality(int cp) 6029 { 6030 return (byte)getDirection(cp); 6031 } 6032 6033 /** 6034 * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)} 6035 * method, for convenience. Counts the number of code points in the range 6036 * of text. 6037 * @param text the characters to check 6038 * @param start the start of the range 6039 * @param limit the limit of the range 6040 * @return the number of code points in the range 6041 */ codePointCount(CharSequence text, int start, int limit)6042 public static int codePointCount(CharSequence text, int start, int limit) { 6043 if (start < 0 || limit < start || limit > text.length()) { 6044 throw new IndexOutOfBoundsException("start (" + start + 6045 ") or limit (" + limit + 6046 ") invalid or out of range 0, " + text.length()); 6047 } 6048 6049 int len = limit - start; 6050 while (limit > start) { 6051 char ch = text.charAt(--limit); 6052 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 6053 ch = text.charAt(--limit); 6054 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 6055 --len; 6056 break; 6057 } 6058 } 6059 } 6060 return len; 6061 } 6062 6063 /** 6064 * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for 6065 * convenience. Counts the number of code points in the range of text. 6066 * @param text the characters to check 6067 * @param start the start of the range 6068 * @param limit the limit of the range 6069 * @return the number of code points in the range 6070 */ codePointCount(char[] text, int start, int limit)6071 public static int codePointCount(char[] text, int start, int limit) { 6072 if (start < 0 || limit < start || limit > text.length) { 6073 throw new IndexOutOfBoundsException("start (" + start + 6074 ") or limit (" + limit + 6075 ") invalid or out of range 0, " + text.length); 6076 } 6077 6078 int len = limit - start; 6079 while (limit > start) { 6080 char ch = text[--limit]; 6081 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 6082 ch = text[--limit]; 6083 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 6084 --len; 6085 break; 6086 } 6087 } 6088 } 6089 return len; 6090 } 6091 6092 /** 6093 * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)} 6094 * method, for convenience. Adjusts the char index by a code point offset. 6095 * @param text the characters to check 6096 * @param index the index to adjust 6097 * @param codePointOffset the number of code points by which to offset the index 6098 * @return the adjusted index 6099 */ offsetByCodePoints(CharSequence text, int index, int codePointOffset)6100 public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) { 6101 if (index < 0 || index > text.length()) { 6102 throw new IndexOutOfBoundsException("index ( " + index + 6103 ") out of range 0, " + text.length()); 6104 } 6105 6106 if (codePointOffset < 0) { 6107 while (++codePointOffset <= 0) { 6108 char ch = text.charAt(--index); 6109 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) { 6110 ch = text.charAt(--index); 6111 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 6112 if (++codePointOffset > 0) { 6113 return index+1; 6114 } 6115 } 6116 } 6117 } 6118 } else { 6119 int limit = text.length(); 6120 while (--codePointOffset >= 0) { 6121 char ch = text.charAt(index++); 6122 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 6123 ch = text.charAt(index++); 6124 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 6125 if (--codePointOffset < 0) { 6126 return index-1; 6127 } 6128 } 6129 } 6130 } 6131 } 6132 6133 return index; 6134 } 6135 6136 /** 6137 * Equivalent to the 6138 * {@link Character#offsetByCodePoints(char[], int, int, int, int)} 6139 * method, for convenience. Adjusts the char index by a code point offset. 6140 * @param text the characters to check 6141 * @param start the start of the range to check 6142 * @param count the length of the range to check 6143 * @param index the index to adjust 6144 * @param codePointOffset the number of code points by which to offset the index 6145 * @return the adjusted index 6146 */ offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset)6147 public static int offsetByCodePoints(char[] text, int start, int count, int index, 6148 int codePointOffset) { 6149 int limit = start + count; 6150 if (start < 0 || limit < start || limit > text.length || index < start || index > limit) { 6151 throw new IndexOutOfBoundsException("index ( " + index + 6152 ") out of range " + start + 6153 ", " + limit + 6154 " in array 0, " + text.length); 6155 } 6156 6157 if (codePointOffset < 0) { 6158 while (++codePointOffset <= 0) { 6159 char ch = text[--index]; 6160 if (index < start) { 6161 throw new IndexOutOfBoundsException("index ( " + index + 6162 ") < start (" + start + 6163 ")"); 6164 } 6165 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) { 6166 ch = text[--index]; 6167 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 6168 if (++codePointOffset > 0) { 6169 return index+1; 6170 } 6171 } 6172 } 6173 } 6174 } else { 6175 while (--codePointOffset >= 0) { 6176 char ch = text[index++]; 6177 if (index > limit) { 6178 throw new IndexOutOfBoundsException("index ( " + index + 6179 ") > limit (" + limit + 6180 ")"); 6181 } 6182 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 6183 ch = text[index++]; 6184 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 6185 if (--codePointOffset < 0) { 6186 return index-1; 6187 } 6188 } 6189 } 6190 } 6191 } 6192 6193 return index; 6194 } 6195 6196 // private variables ------------------------------------------------- 6197 6198 /** 6199 * To get the last character out from a data type 6200 */ 6201 private static final int LAST_CHAR_MASK_ = 0xFFFF; 6202 6203 // /** 6204 // * To get the last byte out from a data type 6205 // */ 6206 // private static final int LAST_BYTE_MASK_ = 0xFF; 6207 // 6208 // /** 6209 // * Shift 16 bits 6210 // */ 6211 // private static final int SHIFT_16_ = 16; 6212 // 6213 // /** 6214 // * Shift 24 bits 6215 // */ 6216 // private static final int SHIFT_24_ = 24; 6217 // 6218 // /** 6219 // * Decimal radix 6220 // */ 6221 // private static final int DECIMAL_RADIX_ = 10; 6222 6223 /** 6224 * No break space code point 6225 */ 6226 private static final int NO_BREAK_SPACE_ = 0xA0; 6227 6228 /** 6229 * Figure space code point 6230 */ 6231 private static final int FIGURE_SPACE_ = 0x2007; 6232 6233 /** 6234 * Narrow no break space code point 6235 */ 6236 private static final int NARROW_NO_BREAK_SPACE_ = 0x202F; 6237 6238 /** 6239 * Ideographic number zero code point 6240 */ 6241 private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007; 6242 6243 /** 6244 * CJK Ideograph, First code point 6245 */ 6246 private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00; 6247 6248 /** 6249 * CJK Ideograph, Second code point 6250 */ 6251 private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c; 6252 6253 /** 6254 * CJK Ideograph, Third code point 6255 */ 6256 private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09; 6257 6258 /** 6259 * CJK Ideograph, Fourth code point 6260 */ 6261 private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db; 6262 6263 /** 6264 * CJK Ideograph, FIFTH code point 6265 */ 6266 private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94; 6267 6268 /** 6269 * CJK Ideograph, Sixth code point 6270 */ 6271 private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d; 6272 6273 /** 6274 * CJK Ideograph, Seventh code point 6275 */ 6276 private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03; 6277 6278 /** 6279 * CJK Ideograph, Eighth code point 6280 */ 6281 private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b; 6282 6283 /** 6284 * CJK Ideograph, Nineth code point 6285 */ 6286 private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d; 6287 6288 /** 6289 * Application Program command code point 6290 */ 6291 private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F; 6292 6293 /** 6294 * Unit separator code point 6295 */ 6296 private static final int UNIT_SEPARATOR_ = 0x001F; 6297 6298 /** 6299 * Delete code point 6300 */ 6301 private static final int DELETE_ = 0x007F; 6302 6303 /** 6304 * Han digit characters 6305 */ 6306 private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_ = 0x96f6; 6307 private static final int CJK_IDEOGRAPH_COMPLEX_ONE_ = 0x58f9; 6308 private static final int CJK_IDEOGRAPH_COMPLEX_TWO_ = 0x8cb3; 6309 private static final int CJK_IDEOGRAPH_COMPLEX_THREE_ = 0x53c3; 6310 private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_ = 0x8086; 6311 private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_ = 0x4f0d; 6312 private static final int CJK_IDEOGRAPH_COMPLEX_SIX_ = 0x9678; 6313 private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_ = 0x67d2; 6314 private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_ = 0x634c; 6315 private static final int CJK_IDEOGRAPH_COMPLEX_NINE_ = 0x7396; 6316 private static final int CJK_IDEOGRAPH_TEN_ = 0x5341; 6317 private static final int CJK_IDEOGRAPH_COMPLEX_TEN_ = 0x62fe; 6318 private static final int CJK_IDEOGRAPH_HUNDRED_ = 0x767e; 6319 private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_ = 0x4f70; 6320 private static final int CJK_IDEOGRAPH_THOUSAND_ = 0x5343; 6321 private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf; 6322 private static final int CJK_IDEOGRAPH_TEN_THOUSAND_ = 0x824c; 6323 private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_ = 0x5104; 6324 6325 // private constructor ----------------------------------------------- 6326 ///CLOVER:OFF 6327 /** 6328 * Private constructor to prevent instantiation 6329 */ UCharacter()6330 private UCharacter() 6331 { 6332 } 6333 ///CLOVER:ON 6334 } 6335