1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /** 5 ******************************************************************************* 6 * Copyright (C) 1996-2016, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 11 package ohos.global.icu.lang; 12 13 import java.lang.ref.SoftReference; 14 import java.util.HashMap; 15 import java.util.Iterator; 16 import java.util.Locale; 17 import java.util.Map; 18 19 import ohos.global.icu.impl.CaseMapImpl; 20 import ohos.global.icu.impl.IllegalIcuArgumentException; 21 import ohos.global.icu.impl.Trie2; 22 import ohos.global.icu.impl.UBiDiProps; 23 import ohos.global.icu.impl.UCaseProps; 24 import ohos.global.icu.impl.UCharacterName; 25 import ohos.global.icu.impl.UCharacterNameChoice; 26 import ohos.global.icu.impl.UCharacterProperty; 27 import ohos.global.icu.impl.UCharacterUtility; 28 import ohos.global.icu.impl.UPropertyAliases; 29 import ohos.global.icu.lang.UCharacterEnums.ECharacterCategory; 30 import ohos.global.icu.lang.UCharacterEnums.ECharacterDirection; 31 import ohos.global.icu.text.BreakIterator; 32 import ohos.global.icu.text.Normalizer2; 33 import ohos.global.icu.util.RangeValueIterator; 34 import ohos.global.icu.util.ULocale; 35 import ohos.global.icu.util.ValueIterator; 36 import ohos.global.icu.util.VersionInfo; 37 38 /** 39 * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character}. Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'. 40 * 41 * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class. 42 * These extensions provide support for more Unicode properties. 43 * Each ICU release supports the latest version of Unicode available at that time. 44 * 45 * <p>For some time before Java 5 added support for supplementary Unicode code points, 46 * The ICU UCharacter class and many other ICU classes already supported them. 47 * Some UCharacter methods and constants were widened slightly differently than 48 * how the Character class methods and constants were widened later. 49 * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF, 50 * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF. 51 * 52 * <p>Code points are represented in these API using ints. While it would be 53 * more convenient in Java to have a separate primitive datatype for them, 54 * ints suffice in the meantime. 55 * 56 * <p>To use this class please add the jar file name icu4j.jar to the 57 * class path, since it contains data files which supply the information used 58 * by this file.<br> 59 * E.g. In Windows <br> 60 * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br> 61 * Otherwise, another method would be to copy the files uprops.dat and 62 * unames.icu from the icu4j source subdirectory 63 * <i>$ICU4J_SRC/src/ohos.global.icu.impl.data</i> to your class directory 64 * <i>$ICU4J_CLASS/ohos.global.icu.impl.data</i>. 65 * 66 * <p>Aside from the additions for UTF-16 support, and the updated Unicode 67 * properties, the main differences between UCharacter and Character are: 68 * <ul> 69 * <li> UCharacter is not designed to be a char wrapper and does not have 70 * APIs to which involves management of that single char.<br> 71 * These include: 72 * <ul> 73 * <li> char charValue(), 74 * <li> int compareTo(java.lang.Character, java.lang.Character), etc. 75 * </ul> 76 * <li> UCharacter does not include Character APIs that are deprecated, nor 77 * does it include the Java-specific character information, such as 78 * boolean isJavaIdentifierPart(char ch). 79 * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric 80 * values '10' - '35'. UCharacter also does this in digit and 81 * getNumericValue, to adhere to the java semantics of these 82 * methods. New methods unicodeDigit, and 83 * getUnicodeNumericValue do not treat the above code points 84 * as having numeric values. This is a semantic change from ICU4J 1.3.1. 85 * </ul> 86 * <p> 87 * Further detail on differences can be determined using the program 88 * <a href= 89 * "http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java"> 90 * ohos.global.icu.dev.test.lang.UCharacterCompare</a> 91 * <p> 92 * In addition to Java compatibility functions, which calculate derived properties, 93 * this API provides low-level access to the Unicode Character Database. 94 * <p> 95 * Unicode assigns each code point (not just assigned character) values for 96 * many properties. 97 * Most of them are simple boolean flags, or constants from a small enumerated list. 98 * For some properties, values are strings or other relatively more complex types. 99 * <p> 100 * For more information see 101 * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a> 102 * (http://www.unicode.org/ucd/) 103 * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU 104 * User Guide chapter on Properties</a> 105 * (http://www.icu-project.org/userguide/properties.html). 106 * <p> 107 * There are also functions that provide easy migration from C/POSIX functions 108 * like isblank(). Their use is generally discouraged because the C/POSIX 109 * standards do not define their semantics beyond the ASCII range, which means 110 * that different implementations exhibit very different behavior. 111 * Instead, Unicode properties should be used directly. 112 * <p> 113 * There are also only a few, broad C/POSIX character classes, and they tend 114 * to be used for conflicting purposes. For example, the "isalpha()" class 115 * is sometimes used to determine word boundaries, while a more sophisticated 116 * approach would at least distinguish initial letters from continuation 117 * characters (the latter including combining marks). 118 * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) 119 * Another example: There is no "istitle()" class for titlecase characters. 120 * <p> 121 * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. 122 * ICU implements them according to the Standard Recommendations in 123 * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions 124 * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). 125 * <p> 126 * API access for C/POSIX character classes is as follows: 127 * <pre>{@code 128 * - alpha: isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC) 129 * - lower: isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE) 130 * - upper: isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE) 131 * - punct: ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)| 132 * (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)| 133 * (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0 134 * - digit: isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER 135 * - xdigit: hasBinaryProperty(c, UProperty.POSIX_XDIGIT) 136 * - alnum: hasBinaryProperty(c, UProperty.POSIX_ALNUM) 137 * - space: isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE) 138 * - blank: hasBinaryProperty(c, UProperty.POSIX_BLANK) 139 * - cntrl: getType(c)==CONTROL 140 * - graph: hasBinaryProperty(c, UProperty.POSIX_GRAPH) 141 * - print: hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre> 142 * <p> 143 * The C/POSIX character classes are also available in UnicodeSet patterns, 144 * using patterns like [:graph:] or \p{graph}. 145 * 146 * <p><strong>[icu] Note:</strong> There are several ICU (and Java) whitespace functions. 147 * Comparison:<ul> 148 * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; 149 * most of general categories "Z" (separators) + most whitespace ISO controls 150 * (including no-break spaces, but excluding IS1..IS4) 151 * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces 152 * <li> isSpaceChar: just Z (including no-break spaces)</ul> 153 * 154 * <p> 155 * This class is not subclassable. 156 * 157 * @author Syn Wee Quek 158 * @see ohos.global.icu.lang.UCharacterEnums 159 */ 160 161 public final class UCharacter implements ECharacterCategory, ECharacterDirection 162 { 163 // public inner classes ---------------------------------------------- 164 165 /** 166 * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character.UnicodeBlock}. Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'. 167 * 168 * A family of character subsets representing the character blocks in the 169 * Unicode specification, generated from Unicode Data file Blocks.txt. 170 * Character blocks generally define characters used for a specific script 171 * or purpose. A character is contained by at most one Unicode block. 172 * 173 * <strong>[icu] Note:</strong> All fields named XXX_ID are specific to ICU. 174 */ 175 public static final class UnicodeBlock extends Character.Subset 176 { 177 // block id corresponding to icu4c ----------------------------------- 178 179 /** 180 */ 181 public static final int INVALID_CODE_ID = -1; 182 /** 183 */ 184 public static final int BASIC_LATIN_ID = 1; 185 /** 186 */ 187 public static final int LATIN_1_SUPPLEMENT_ID = 2; 188 /** 189 */ 190 public static final int LATIN_EXTENDED_A_ID = 3; 191 /** 192 */ 193 public static final int LATIN_EXTENDED_B_ID = 4; 194 /** 195 */ 196 public static final int IPA_EXTENSIONS_ID = 5; 197 /** 198 */ 199 public static final int SPACING_MODIFIER_LETTERS_ID = 6; 200 /** 201 */ 202 public static final int COMBINING_DIACRITICAL_MARKS_ID = 7; 203 /** 204 * Unicode 3.2 renames this block to "Greek and Coptic". 205 */ 206 public static final int GREEK_ID = 8; 207 /** 208 */ 209 public static final int CYRILLIC_ID = 9; 210 /** 211 */ 212 public static final int ARMENIAN_ID = 10; 213 /** 214 */ 215 public static final int HEBREW_ID = 11; 216 /** 217 */ 218 public static final int ARABIC_ID = 12; 219 /** 220 */ 221 public static final int SYRIAC_ID = 13; 222 /** 223 */ 224 public static final int THAANA_ID = 14; 225 /** 226 */ 227 public static final int DEVANAGARI_ID = 15; 228 /** 229 */ 230 public static final int BENGALI_ID = 16; 231 /** 232 */ 233 public static final int GURMUKHI_ID = 17; 234 /** 235 */ 236 public static final int GUJARATI_ID = 18; 237 /** 238 */ 239 public static final int ORIYA_ID = 19; 240 /** 241 */ 242 public static final int TAMIL_ID = 20; 243 /** 244 */ 245 public static final int TELUGU_ID = 21; 246 /** 247 */ 248 public static final int KANNADA_ID = 22; 249 /** 250 */ 251 public static final int MALAYALAM_ID = 23; 252 /** 253 */ 254 public static final int SINHALA_ID = 24; 255 /** 256 */ 257 public static final int THAI_ID = 25; 258 /** 259 */ 260 public static final int LAO_ID = 26; 261 /** 262 */ 263 public static final int TIBETAN_ID = 27; 264 /** 265 */ 266 public static final int MYANMAR_ID = 28; 267 /** 268 */ 269 public static final int GEORGIAN_ID = 29; 270 /** 271 */ 272 public static final int HANGUL_JAMO_ID = 30; 273 /** 274 */ 275 public static final int ETHIOPIC_ID = 31; 276 /** 277 */ 278 public static final int CHEROKEE_ID = 32; 279 /** 280 */ 281 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33; 282 /** 283 */ 284 public static final int OGHAM_ID = 34; 285 /** 286 */ 287 public static final int RUNIC_ID = 35; 288 /** 289 */ 290 public static final int KHMER_ID = 36; 291 /** 292 */ 293 public static final int MONGOLIAN_ID = 37; 294 /** 295 */ 296 public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38; 297 /** 298 */ 299 public static final int GREEK_EXTENDED_ID = 39; 300 /** 301 */ 302 public static final int GENERAL_PUNCTUATION_ID = 40; 303 /** 304 */ 305 public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41; 306 /** 307 */ 308 public static final int CURRENCY_SYMBOLS_ID = 42; 309 /** 310 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 311 * Symbols". 312 */ 313 public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43; 314 /** 315 */ 316 public static final int LETTERLIKE_SYMBOLS_ID = 44; 317 /** 318 */ 319 public static final int NUMBER_FORMS_ID = 45; 320 /** 321 */ 322 public static final int ARROWS_ID = 46; 323 /** 324 */ 325 public static final int MATHEMATICAL_OPERATORS_ID = 47; 326 /** 327 */ 328 public static final int MISCELLANEOUS_TECHNICAL_ID = 48; 329 /** 330 */ 331 public static final int CONTROL_PICTURES_ID = 49; 332 /** 333 */ 334 public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50; 335 /** 336 */ 337 public static final int ENCLOSED_ALPHANUMERICS_ID = 51; 338 /** 339 */ 340 public static final int BOX_DRAWING_ID = 52; 341 /** 342 */ 343 public static final int BLOCK_ELEMENTS_ID = 53; 344 /** 345 */ 346 public static final int GEOMETRIC_SHAPES_ID = 54; 347 /** 348 */ 349 public static final int MISCELLANEOUS_SYMBOLS_ID = 55; 350 /** 351 */ 352 public static final int DINGBATS_ID = 56; 353 /** 354 */ 355 public static final int BRAILLE_PATTERNS_ID = 57; 356 /** 357 */ 358 public static final int CJK_RADICALS_SUPPLEMENT_ID = 58; 359 /** 360 */ 361 public static final int KANGXI_RADICALS_ID = 59; 362 /** 363 */ 364 public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60; 365 /** 366 */ 367 public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61; 368 /** 369 */ 370 public static final int HIRAGANA_ID = 62; 371 /** 372 */ 373 public static final int KATAKANA_ID = 63; 374 /** 375 */ 376 public static final int BOPOMOFO_ID = 64; 377 /** 378 */ 379 public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65; 380 /** 381 */ 382 public static final int KANBUN_ID = 66; 383 /** 384 */ 385 public static final int BOPOMOFO_EXTENDED_ID = 67; 386 /** 387 */ 388 public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68; 389 /** 390 */ 391 public static final int CJK_COMPATIBILITY_ID = 69; 392 /** 393 */ 394 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70; 395 /** 396 */ 397 public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71; 398 /** 399 */ 400 public static final int YI_SYLLABLES_ID = 72; 401 /** 402 */ 403 public static final int YI_RADICALS_ID = 73; 404 /** 405 */ 406 public static final int HANGUL_SYLLABLES_ID = 74; 407 /** 408 */ 409 public static final int HIGH_SURROGATES_ID = 75; 410 /** 411 */ 412 public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76; 413 /** 414 */ 415 public static final int LOW_SURROGATES_ID = 77; 416 /** 417 * Same as public static final int PRIVATE_USE. 418 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 419 * and multiple code point ranges had this block. 420 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 421 * and adds separate blocks for the supplementary PUAs. 422 */ 423 public static final int PRIVATE_USE_AREA_ID = 78; 424 /** 425 * Same as public static final int PRIVATE_USE_AREA. 426 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 427 * and multiple code point ranges had this block. 428 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 429 * and adds separate blocks for the supplementary PUAs. 430 */ 431 public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID; 432 /** 433 */ 434 public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79; 435 /** 436 */ 437 public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80; 438 /** 439 */ 440 public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81; 441 /** 442 */ 443 public static final int COMBINING_HALF_MARKS_ID = 82; 444 /** 445 */ 446 public static final int CJK_COMPATIBILITY_FORMS_ID = 83; 447 /** 448 */ 449 public static final int SMALL_FORM_VARIANTS_ID = 84; 450 /** 451 */ 452 public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85; 453 /** 454 */ 455 public static final int SPECIALS_ID = 86; 456 /** 457 */ 458 public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87; 459 /** 460 */ 461 public static final int OLD_ITALIC_ID = 88; 462 /** 463 */ 464 public static final int GOTHIC_ID = 89; 465 /** 466 */ 467 public static final int DESERET_ID = 90; 468 /** 469 */ 470 public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91; 471 /** 472 */ 473 public static final int MUSICAL_SYMBOLS_ID = 92; 474 /** 475 */ 476 public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93; 477 /** 478 */ 479 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94; 480 /** 481 */ 482 public static final int 483 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95; 484 /** 485 */ 486 public static final int TAGS_ID = 96; 487 488 // New blocks in Unicode 3.2 489 490 /** 491 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 492 */ 493 public static final int CYRILLIC_SUPPLEMENTARY_ID = 97; 494 /** 495 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 496 */ 497 498 public static final int CYRILLIC_SUPPLEMENT_ID = 97; 499 /** 500 */ 501 public static final int TAGALOG_ID = 98; 502 /** 503 */ 504 public static final int HANUNOO_ID = 99; 505 /** 506 */ 507 public static final int BUHID_ID = 100; 508 /** 509 */ 510 public static final int TAGBANWA_ID = 101; 511 /** 512 */ 513 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102; 514 /** 515 */ 516 public static final int SUPPLEMENTAL_ARROWS_A_ID = 103; 517 /** 518 */ 519 public static final int SUPPLEMENTAL_ARROWS_B_ID = 104; 520 /** 521 */ 522 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105; 523 /** 524 */ 525 public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106; 526 /** 527 */ 528 public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107; 529 /** 530 */ 531 public static final int VARIATION_SELECTORS_ID = 108; 532 /** 533 */ 534 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109; 535 /** 536 */ 537 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110; 538 539 /** 540 */ 541 public static final int LIMBU_ID = 111; /*[1900]*/ 542 /** 543 */ 544 public static final int TAI_LE_ID = 112; /*[1950]*/ 545 /** 546 */ 547 public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/ 548 /** 549 */ 550 public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/ 551 /** 552 */ 553 public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/ 554 /** 555 */ 556 public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/ 557 /** 558 */ 559 public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/ 560 /** 561 */ 562 public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/ 563 /** 564 */ 565 public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/ 566 /** 567 */ 568 public static final int UGARITIC_ID = 120; /*[10380]*/ 569 /** 570 */ 571 public static final int SHAVIAN_ID = 121; /*[10450]*/ 572 /** 573 */ 574 public static final int OSMANYA_ID = 122; /*[10480]*/ 575 /** 576 */ 577 public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/ 578 /** 579 */ 580 public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/ 581 /** 582 */ 583 public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/ 584 585 /* New blocks in Unicode 4.1 */ 586 587 /** 588 */ 589 public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/ 590 591 /** 592 */ 593 public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/ 594 595 /** 596 */ 597 public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/ 598 599 /** 600 */ 601 public static final int BUGINESE_ID = 129; /*[1A00]*/ 602 603 /** 604 */ 605 public static final int CJK_STROKES_ID = 130; /*[31C0]*/ 606 607 /** 608 */ 609 public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/ 610 611 /** 612 */ 613 public static final int COPTIC_ID = 132; /*[2C80]*/ 614 615 /** 616 */ 617 public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/ 618 619 /** 620 */ 621 public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/ 622 623 /** 624 */ 625 public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/ 626 627 /** 628 */ 629 public static final int GLAGOLITIC_ID = 136; /*[2C00]*/ 630 631 /** 632 */ 633 public static final int KHAROSHTHI_ID = 137; /*[10A00]*/ 634 635 /** 636 */ 637 public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/ 638 639 /** 640 */ 641 public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/ 642 643 /** 644 */ 645 public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/ 646 647 /** 648 */ 649 public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/ 650 651 /** 652 */ 653 public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/ 654 655 /** 656 */ 657 public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/ 658 659 /** 660 */ 661 public static final int TIFINAGH_ID = 144; /*[2D30]*/ 662 663 /** 664 */ 665 public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/ 666 667 /* New blocks in Unicode 5.0 */ 668 669 /** 670 */ 671 public static final int NKO_ID = 146; /*[07C0]*/ 672 /** 673 */ 674 public static final int BALINESE_ID = 147; /*[1B00]*/ 675 /** 676 */ 677 public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/ 678 /** 679 */ 680 public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/ 681 /** 682 */ 683 public static final int PHAGS_PA_ID = 150; /*[A840]*/ 684 /** 685 */ 686 public static final int PHOENICIAN_ID = 151; /*[10900]*/ 687 /** 688 */ 689 public static final int CUNEIFORM_ID = 152; /*[12000]*/ 690 /** 691 */ 692 public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/ 693 /** 694 */ 695 public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/ 696 697 /** 698 */ 699 public static final int SUNDANESE_ID = 155; /* [1B80] */ 700 701 /** 702 */ 703 public static final int LEPCHA_ID = 156; /* [1C00] */ 704 705 /** 706 */ 707 public static final int OL_CHIKI_ID = 157; /* [1C50] */ 708 709 /** 710 */ 711 public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */ 712 713 /** 714 */ 715 public static final int VAI_ID = 159; /* [A500] */ 716 717 /** 718 */ 719 public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */ 720 721 /** 722 */ 723 public static final int SAURASHTRA_ID = 161; /* [A880] */ 724 725 /** 726 */ 727 public static final int KAYAH_LI_ID = 162; /* [A900] */ 728 729 /** 730 */ 731 public static final int REJANG_ID = 163; /* [A930] */ 732 733 /** 734 */ 735 public static final int CHAM_ID = 164; /* [AA00] */ 736 737 /** 738 */ 739 public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */ 740 741 /** 742 */ 743 public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */ 744 745 /** 746 */ 747 public static final int LYCIAN_ID = 167; /* [10280] */ 748 749 /** 750 */ 751 public static final int CARIAN_ID = 168; /* [102A0] */ 752 753 /** 754 */ 755 public static final int LYDIAN_ID = 169; /* [10920] */ 756 757 /** 758 */ 759 public static final int MAHJONG_TILES_ID = 170; /* [1F000] */ 760 761 /** 762 */ 763 public static final int DOMINO_TILES_ID = 171; /* [1F030] */ 764 765 /* New blocks in Unicode 5.2 */ 766 767 /***/ 768 public static final int SAMARITAN_ID = 172; /*[0800]*/ 769 /***/ 770 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/ 771 /***/ 772 public static final int TAI_THAM_ID = 174; /*[1A20]*/ 773 /***/ 774 public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/ 775 /***/ 776 public static final int LISU_ID = 176; /*[A4D0]*/ 777 /***/ 778 public static final int BAMUM_ID = 177; /*[A6A0]*/ 779 /***/ 780 public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/ 781 /***/ 782 public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/ 783 /***/ 784 public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/ 785 /***/ 786 public static final int JAVANESE_ID = 181; /*[A980]*/ 787 /***/ 788 public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/ 789 /***/ 790 public static final int TAI_VIET_ID = 183; /*[AA80]*/ 791 /***/ 792 public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/ 793 /***/ 794 public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/ 795 /***/ 796 public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/ 797 /***/ 798 public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/ 799 /***/ 800 public static final int AVESTAN_ID = 188; /*[10B00]*/ 801 /***/ 802 public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/ 803 /***/ 804 public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/ 805 /***/ 806 public static final int OLD_TURKIC_ID = 191; /*[10C00]*/ 807 /***/ 808 public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/ 809 /***/ 810 public static final int KAITHI_ID = 193; /*[11080]*/ 811 /***/ 812 public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/ 813 /***/ 814 public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/ 815 /***/ 816 public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/ 817 /***/ 818 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/ 819 820 /* New blocks in Unicode 6.0 */ 821 822 /***/ 823 public static final int MANDAIC_ID = 198; /*[0840]*/ 824 /***/ 825 public static final int BATAK_ID = 199; /*[1BC0]*/ 826 /***/ 827 public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/ 828 /***/ 829 public static final int BRAHMI_ID = 201; /*[11000]*/ 830 /***/ 831 public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/ 832 /***/ 833 public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/ 834 /***/ 835 public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/ 836 /***/ 837 public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/ 838 /***/ 839 public static final int EMOTICONS_ID = 206; /*[1F600]*/ 840 /***/ 841 public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/ 842 /***/ 843 public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/ 844 /***/ 845 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/ 846 847 /* New blocks in Unicode 6.1 */ 848 849 /***/ 850 public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/ 851 /***/ 852 public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/ 853 /***/ 854 public static final int CHAKMA_ID = 212; /*[11100]*/ 855 /***/ 856 public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/ 857 /***/ 858 public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/ 859 /***/ 860 public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/ 861 /***/ 862 public static final int MIAO_ID = 216; /*[16F00]*/ 863 /***/ 864 public static final int SHARADA_ID = 217; /*[11180]*/ 865 /***/ 866 public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/ 867 /***/ 868 public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/ 869 /***/ 870 public static final int TAKRI_ID = 220; /*[11680]*/ 871 872 /* New blocks in Unicode 7.0 */ 873 874 /***/ 875 public static final int BASSA_VAH_ID = 221; /*[16AD0]*/ 876 /***/ 877 public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/ 878 /***/ 879 public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/ 880 /***/ 881 public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/ 882 /***/ 883 public static final int DUPLOYAN_ID = 225; /*[1BC00]*/ 884 /***/ 885 public static final int ELBASAN_ID = 226; /*[10500]*/ 886 /***/ 887 public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/ 888 /***/ 889 public static final int GRANTHA_ID = 228; /*[11300]*/ 890 /***/ 891 public static final int KHOJKI_ID = 229; /*[11200]*/ 892 /***/ 893 public static final int KHUDAWADI_ID = 230; /*[112B0]*/ 894 /***/ 895 public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/ 896 /***/ 897 public static final int LINEAR_A_ID = 232; /*[10600]*/ 898 /***/ 899 public static final int MAHAJANI_ID = 233; /*[11150]*/ 900 /***/ 901 public static final int MANICHAEAN_ID = 234; /*[10AC0]*/ 902 /***/ 903 public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/ 904 /***/ 905 public static final int MODI_ID = 236; /*[11600]*/ 906 /***/ 907 public static final int MRO_ID = 237; /*[16A40]*/ 908 /***/ 909 public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/ 910 /***/ 911 public static final int NABATAEAN_ID = 239; /*[10880]*/ 912 /***/ 913 public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/ 914 /***/ 915 public static final int OLD_PERMIC_ID = 241; /*[10350]*/ 916 /***/ 917 public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/ 918 /***/ 919 public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/ 920 /***/ 921 public static final int PALMYRENE_ID = 244; /*[10860]*/ 922 /***/ 923 public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/ 924 /***/ 925 public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/ 926 /***/ 927 public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/ 928 /***/ 929 public static final int SIDDHAM_ID = 248; /*[11580]*/ 930 /***/ 931 public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/ 932 /***/ 933 public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/ 934 /***/ 935 public static final int TIRHUTA_ID = 251; /*[11480]*/ 936 /***/ 937 public static final int WARANG_CITI_ID = 252; /*[118A0]*/ 938 939 /* New blocks in Unicode 8.0 */ 940 941 /***/ 942 public static final int AHOM_ID = 253; /*[11700]*/ 943 /***/ 944 public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/ 945 /***/ 946 public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/ 947 /***/ 948 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/ 949 /***/ 950 public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/ 951 /***/ 952 public static final int HATRAN_ID = 258; /*[108E0]*/ 953 /***/ 954 public static final int MULTANI_ID = 259; /*[11280]*/ 955 /***/ 956 public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/ 957 /***/ 958 public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/ 959 /***/ 960 public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/ 961 962 /* New blocks in Unicode 9.0 */ 963 964 /***/ 965 public static final int ADLAM_ID = 263; /*[1E900]*/ 966 /***/ 967 public static final int BHAIKSUKI_ID = 264; /*[11C00]*/ 968 /***/ 969 public static final int CYRILLIC_EXTENDED_C_ID = 265; /*[1C80]*/ 970 /***/ 971 public static final int GLAGOLITIC_SUPPLEMENT_ID = 266; /*[1E000]*/ 972 /***/ 973 public static final int IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID = 267; /*[16FE0]*/ 974 /***/ 975 public static final int MARCHEN_ID = 268; /*[11C70]*/ 976 /***/ 977 public static final int MONGOLIAN_SUPPLEMENT_ID = 269; /*[11660]*/ 978 /***/ 979 public static final int NEWA_ID = 270; /*[11400]*/ 980 /***/ 981 public static final int OSAGE_ID = 271; /*[104B0]*/ 982 /***/ 983 public static final int TANGUT_ID = 272; /*[17000]*/ 984 /***/ 985 public static final int TANGUT_COMPONENTS_ID = 273; /*[18800]*/ 986 987 // New blocks in Unicode 10.0 988 989 /***/ 990 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID = 274; /*[2CEB0]*/ 991 /***/ 992 public static final int KANA_EXTENDED_A_ID = 275; /*[1B100]*/ 993 /***/ 994 public static final int MASARAM_GONDI_ID = 276; /*[11D00]*/ 995 /***/ 996 public static final int NUSHU_ID = 277; /*[1B170]*/ 997 /***/ 998 public static final int SOYOMBO_ID = 278; /*[11A50]*/ 999 /***/ 1000 public static final int SYRIAC_SUPPLEMENT_ID = 279; /*[0860]*/ 1001 /***/ 1002 public static final int ZANABAZAR_SQUARE_ID = 280; /*[11A00]*/ 1003 1004 // New blocks in Unicode 11.0 1005 1006 /***/ 1007 public static final int CHESS_SYMBOLS_ID = 281; /*[1FA00]*/ 1008 /***/ 1009 public static final int DOGRA_ID = 282; /*[11800]*/ 1010 /***/ 1011 public static final int GEORGIAN_EXTENDED_ID = 283; /*[1C90]*/ 1012 /***/ 1013 public static final int GUNJALA_GONDI_ID = 284; /*[11D60]*/ 1014 /***/ 1015 public static final int HANIFI_ROHINGYA_ID = 285; /*[10D00]*/ 1016 /***/ 1017 public static final int INDIC_SIYAQ_NUMBERS_ID = 286; /*[1EC70]*/ 1018 /***/ 1019 public static final int MAKASAR_ID = 287; /*[11EE0]*/ 1020 /***/ 1021 public static final int MAYAN_NUMERALS_ID = 288; /*[1D2E0]*/ 1022 /***/ 1023 public static final int MEDEFAIDRIN_ID = 289; /*[16E40]*/ 1024 /***/ 1025 public static final int OLD_SOGDIAN_ID = 290; /*[10F00]*/ 1026 /***/ 1027 public static final int SOGDIAN_ID = 291; /*[10F30]*/ 1028 1029 // New blocks in Unicode 12.0 1030 1031 /***/ 1032 public static final int EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID = 292; /*[13430]*/ 1033 /***/ 1034 public static final int ELYMAIC_ID = 293; /*[10FE0]*/ 1035 /***/ 1036 public static final int NANDINAGARI_ID = 294; /*[119A0]*/ 1037 /***/ 1038 public static final int NYIAKENG_PUACHUE_HMONG_ID = 295; /*[1E100]*/ 1039 /***/ 1040 public static final int OTTOMAN_SIYAQ_NUMBERS_ID = 296; /*[1ED00]*/ 1041 /***/ 1042 public static final int SMALL_KANA_EXTENSION_ID = 297; /*[1B130]*/ 1043 /***/ 1044 public static final int SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID = 298; /*[1FA70]*/ 1045 /***/ 1046 public static final int TAMIL_SUPPLEMENT_ID = 299; /*[11FC0]*/ 1047 /***/ 1048 public static final int WANCHO_ID = 300; /*[1E2C0]*/ 1049 1050 // New blocks in Unicode 13.0 1051 1052 /***/ 1053 public static final int CHORASMIAN_ID = 301; /*[10FB0]*/ 1054 /***/ 1055 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID = 302; /*[30000]*/ 1056 /***/ 1057 public static final int DIVES_AKURU_ID = 303; /*[11900]*/ 1058 /***/ 1059 public static final int KHITAN_SMALL_SCRIPT_ID = 304; /*[18B00]*/ 1060 /***/ 1061 public static final int LISU_SUPPLEMENT_ID = 305; /*[11FB0]*/ 1062 /***/ 1063 public static final int SYMBOLS_FOR_LEGACY_COMPUTING_ID = 306; /*[1FB00]*/ 1064 /***/ 1065 public static final int TANGUT_SUPPLEMENT_ID = 307; /*[18D00]*/ 1066 /***/ 1067 public static final int YEZIDI_ID = 308; /*[10E80]*/ 1068 1069 /** 1070 * One more than the highest normal UnicodeBlock value. 1071 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BLOCK). 1072 * 1073 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 1074 * @hide unsupported on OHOS 1075 */ 1076 @Deprecated 1077 public static final int COUNT = 309; 1078 1079 // blocks objects --------------------------------------------------- 1080 1081 /** 1082 * Array of UnicodeBlocks, for easy access in getInstance(int) 1083 */ 1084 private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT]; 1085 1086 /** 1087 */ 1088 public static final UnicodeBlock NO_BLOCK 1089 = new UnicodeBlock("NO_BLOCK", 0); 1090 1091 /** 1092 */ 1093 public static final UnicodeBlock BASIC_LATIN 1094 = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID); 1095 /** 1096 */ 1097 public static final UnicodeBlock LATIN_1_SUPPLEMENT 1098 = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID); 1099 /** 1100 */ 1101 public static final UnicodeBlock LATIN_EXTENDED_A 1102 = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID); 1103 /** 1104 */ 1105 public static final UnicodeBlock LATIN_EXTENDED_B 1106 = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID); 1107 /** 1108 */ 1109 public static final UnicodeBlock IPA_EXTENSIONS 1110 = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID); 1111 /** 1112 */ 1113 public static final UnicodeBlock SPACING_MODIFIER_LETTERS 1114 = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID); 1115 /** 1116 */ 1117 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS 1118 = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID); 1119 /** 1120 * Unicode 3.2 renames this block to "Greek and Coptic". 1121 */ 1122 public static final UnicodeBlock GREEK 1123 = new UnicodeBlock("GREEK", GREEK_ID); 1124 /** 1125 */ 1126 public static final UnicodeBlock CYRILLIC 1127 = new UnicodeBlock("CYRILLIC", CYRILLIC_ID); 1128 /** 1129 */ 1130 public static final UnicodeBlock ARMENIAN 1131 = new UnicodeBlock("ARMENIAN", ARMENIAN_ID); 1132 /** 1133 */ 1134 public static final UnicodeBlock HEBREW 1135 = new UnicodeBlock("HEBREW", HEBREW_ID); 1136 /** 1137 */ 1138 public static final UnicodeBlock ARABIC 1139 = new UnicodeBlock("ARABIC", ARABIC_ID); 1140 /** 1141 */ 1142 public static final UnicodeBlock SYRIAC 1143 = new UnicodeBlock("SYRIAC", SYRIAC_ID); 1144 /** 1145 */ 1146 public static final UnicodeBlock THAANA 1147 = new UnicodeBlock("THAANA", THAANA_ID); 1148 /** 1149 */ 1150 public static final UnicodeBlock DEVANAGARI 1151 = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID); 1152 /** 1153 */ 1154 public static final UnicodeBlock BENGALI 1155 = new UnicodeBlock("BENGALI", BENGALI_ID); 1156 /** 1157 */ 1158 public static final UnicodeBlock GURMUKHI 1159 = new UnicodeBlock("GURMUKHI", GURMUKHI_ID); 1160 /** 1161 */ 1162 public static final UnicodeBlock GUJARATI 1163 = new UnicodeBlock("GUJARATI", GUJARATI_ID); 1164 /** 1165 */ 1166 public static final UnicodeBlock ORIYA 1167 = new UnicodeBlock("ORIYA", ORIYA_ID); 1168 /** 1169 */ 1170 public static final UnicodeBlock TAMIL 1171 = new UnicodeBlock("TAMIL", TAMIL_ID); 1172 /** 1173 */ 1174 public static final UnicodeBlock TELUGU 1175 = new UnicodeBlock("TELUGU", TELUGU_ID); 1176 /** 1177 */ 1178 public static final UnicodeBlock KANNADA 1179 = new UnicodeBlock("KANNADA", KANNADA_ID); 1180 /** 1181 */ 1182 public static final UnicodeBlock MALAYALAM 1183 = new UnicodeBlock("MALAYALAM", MALAYALAM_ID); 1184 /** 1185 */ 1186 public static final UnicodeBlock SINHALA 1187 = new UnicodeBlock("SINHALA", SINHALA_ID); 1188 /** 1189 */ 1190 public static final UnicodeBlock THAI 1191 = new UnicodeBlock("THAI", THAI_ID); 1192 /** 1193 */ 1194 public static final UnicodeBlock LAO 1195 = new UnicodeBlock("LAO", LAO_ID); 1196 /** 1197 */ 1198 public static final UnicodeBlock TIBETAN 1199 = new UnicodeBlock("TIBETAN", TIBETAN_ID); 1200 /** 1201 */ 1202 public static final UnicodeBlock MYANMAR 1203 = new UnicodeBlock("MYANMAR", MYANMAR_ID); 1204 /** 1205 */ 1206 public static final UnicodeBlock GEORGIAN 1207 = new UnicodeBlock("GEORGIAN", GEORGIAN_ID); 1208 /** 1209 */ 1210 public static final UnicodeBlock HANGUL_JAMO 1211 = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID); 1212 /** 1213 */ 1214 public static final UnicodeBlock ETHIOPIC 1215 = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID); 1216 /** 1217 */ 1218 public static final UnicodeBlock CHEROKEE 1219 = new UnicodeBlock("CHEROKEE", CHEROKEE_ID); 1220 /** 1221 */ 1222 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 1223 = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1224 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID); 1225 /** 1226 */ 1227 public static final UnicodeBlock OGHAM 1228 = new UnicodeBlock("OGHAM", OGHAM_ID); 1229 /** 1230 */ 1231 public static final UnicodeBlock RUNIC 1232 = new UnicodeBlock("RUNIC", RUNIC_ID); 1233 /** 1234 */ 1235 public static final UnicodeBlock KHMER 1236 = new UnicodeBlock("KHMER", KHMER_ID); 1237 /** 1238 */ 1239 public static final UnicodeBlock MONGOLIAN 1240 = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID); 1241 /** 1242 */ 1243 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL 1244 = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID); 1245 /** 1246 */ 1247 public static final UnicodeBlock GREEK_EXTENDED 1248 = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID); 1249 /** 1250 */ 1251 public static final UnicodeBlock GENERAL_PUNCTUATION 1252 = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID); 1253 /** 1254 */ 1255 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS 1256 = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID); 1257 /** 1258 */ 1259 public static final UnicodeBlock CURRENCY_SYMBOLS 1260 = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID); 1261 /** 1262 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 1263 * Symbols". 1264 */ 1265 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS 1266 = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID); 1267 /** 1268 */ 1269 public static final UnicodeBlock LETTERLIKE_SYMBOLS 1270 = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID); 1271 /** 1272 */ 1273 public static final UnicodeBlock NUMBER_FORMS 1274 = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID); 1275 /** 1276 */ 1277 public static final UnicodeBlock ARROWS 1278 = new UnicodeBlock("ARROWS", ARROWS_ID); 1279 /** 1280 */ 1281 public static final UnicodeBlock MATHEMATICAL_OPERATORS 1282 = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID); 1283 /** 1284 */ 1285 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL 1286 = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID); 1287 /** 1288 */ 1289 public static final UnicodeBlock CONTROL_PICTURES 1290 = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID); 1291 /** 1292 */ 1293 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION 1294 = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID); 1295 /** 1296 */ 1297 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS 1298 = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID); 1299 /** 1300 */ 1301 public static final UnicodeBlock BOX_DRAWING 1302 = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID); 1303 /** 1304 */ 1305 public static final UnicodeBlock BLOCK_ELEMENTS 1306 = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID); 1307 /** 1308 */ 1309 public static final UnicodeBlock GEOMETRIC_SHAPES 1310 = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID); 1311 /** 1312 */ 1313 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS 1314 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID); 1315 /** 1316 */ 1317 public static final UnicodeBlock DINGBATS 1318 = new UnicodeBlock("DINGBATS", DINGBATS_ID); 1319 /** 1320 */ 1321 public static final UnicodeBlock BRAILLE_PATTERNS 1322 = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID); 1323 /** 1324 */ 1325 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT 1326 = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID); 1327 /** 1328 */ 1329 public static final UnicodeBlock KANGXI_RADICALS 1330 = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID); 1331 /** 1332 */ 1333 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS 1334 = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1335 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID); 1336 /** 1337 */ 1338 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION 1339 = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID); 1340 /** 1341 */ 1342 public static final UnicodeBlock HIRAGANA 1343 = new UnicodeBlock("HIRAGANA", HIRAGANA_ID); 1344 /** 1345 */ 1346 public static final UnicodeBlock KATAKANA 1347 = new UnicodeBlock("KATAKANA", KATAKANA_ID); 1348 /** 1349 */ 1350 public static final UnicodeBlock BOPOMOFO 1351 = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID); 1352 /** 1353 */ 1354 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO 1355 = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID); 1356 /** 1357 */ 1358 public static final UnicodeBlock KANBUN 1359 = new UnicodeBlock("KANBUN", KANBUN_ID); 1360 /** 1361 */ 1362 public static final UnicodeBlock BOPOMOFO_EXTENDED 1363 = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID); 1364 /** 1365 */ 1366 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS 1367 = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1368 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID); 1369 /** 1370 */ 1371 public static final UnicodeBlock CJK_COMPATIBILITY 1372 = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID); 1373 /** 1374 */ 1375 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 1376 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1377 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID); 1378 /** 1379 */ 1380 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS 1381 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID); 1382 /** 1383 */ 1384 public static final UnicodeBlock YI_SYLLABLES 1385 = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID); 1386 /** 1387 */ 1388 public static final UnicodeBlock YI_RADICALS 1389 = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID); 1390 /** 1391 */ 1392 public static final UnicodeBlock HANGUL_SYLLABLES 1393 = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID); 1394 /** 1395 */ 1396 public static final UnicodeBlock HIGH_SURROGATES 1397 = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID); 1398 /** 1399 */ 1400 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES 1401 = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID); 1402 /** 1403 */ 1404 public static final UnicodeBlock LOW_SURROGATES 1405 = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID); 1406 /** 1407 * Same as public static final int PRIVATE_USE. 1408 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1409 * and multiple code point ranges had this block. 1410 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1411 * and adds separate blocks for the supplementary PUAs. 1412 */ 1413 public static final UnicodeBlock PRIVATE_USE_AREA 1414 = new UnicodeBlock("PRIVATE_USE_AREA", 78); 1415 /** 1416 * Same as public static final int PRIVATE_USE_AREA. 1417 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1418 * and multiple code point ranges had this block. 1419 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1420 * and adds separate blocks for the supplementary PUAs. 1421 */ 1422 public static final UnicodeBlock PRIVATE_USE 1423 = PRIVATE_USE_AREA; 1424 /** 1425 */ 1426 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS 1427 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID); 1428 /** 1429 */ 1430 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS 1431 = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID); 1432 /** 1433 */ 1434 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A 1435 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID); 1436 /** 1437 */ 1438 public static final UnicodeBlock COMBINING_HALF_MARKS 1439 = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID); 1440 /** 1441 */ 1442 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS 1443 = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID); 1444 /** 1445 */ 1446 public static final UnicodeBlock SMALL_FORM_VARIANTS 1447 = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID); 1448 /** 1449 */ 1450 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B 1451 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID); 1452 /** 1453 */ 1454 public static final UnicodeBlock SPECIALS 1455 = new UnicodeBlock("SPECIALS", SPECIALS_ID); 1456 /** 1457 */ 1458 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS 1459 = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID); 1460 /** 1461 */ 1462 public static final UnicodeBlock OLD_ITALIC 1463 = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID); 1464 /** 1465 */ 1466 public static final UnicodeBlock GOTHIC 1467 = new UnicodeBlock("GOTHIC", GOTHIC_ID); 1468 /** 1469 */ 1470 public static final UnicodeBlock DESERET 1471 = new UnicodeBlock("DESERET", DESERET_ID); 1472 /** 1473 */ 1474 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS 1475 = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID); 1476 /** 1477 */ 1478 public static final UnicodeBlock MUSICAL_SYMBOLS 1479 = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID); 1480 /** 1481 */ 1482 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS 1483 = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1484 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID); 1485 /** 1486 */ 1487 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B 1488 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1489 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID); 1490 /** 1491 */ 1492 public static final UnicodeBlock 1493 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT 1494 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1495 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID); 1496 /** 1497 */ 1498 public static final UnicodeBlock TAGS 1499 = new UnicodeBlock("TAGS", TAGS_ID); 1500 1501 // New blocks in Unicode 3.2 1502 1503 /** 1504 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1505 */ 1506 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY 1507 = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID); 1508 /** 1509 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1510 */ 1511 public static final UnicodeBlock CYRILLIC_SUPPLEMENT 1512 = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID); 1513 /** 1514 */ 1515 public static final UnicodeBlock TAGALOG 1516 = new UnicodeBlock("TAGALOG", TAGALOG_ID); 1517 /** 1518 */ 1519 public static final UnicodeBlock HANUNOO 1520 = new UnicodeBlock("HANUNOO", HANUNOO_ID); 1521 /** 1522 */ 1523 public static final UnicodeBlock BUHID 1524 = new UnicodeBlock("BUHID", BUHID_ID); 1525 /** 1526 */ 1527 public static final UnicodeBlock TAGBANWA 1528 = new UnicodeBlock("TAGBANWA", TAGBANWA_ID); 1529 /** 1530 */ 1531 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A 1532 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1533 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID); 1534 /** 1535 */ 1536 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A 1537 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID); 1538 /** 1539 */ 1540 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B 1541 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID); 1542 /** 1543 */ 1544 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B 1545 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1546 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID); 1547 /** 1548 */ 1549 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS 1550 = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1551 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID); 1552 /** 1553 */ 1554 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS 1555 = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID); 1556 /** 1557 */ 1558 public static final UnicodeBlock VARIATION_SELECTORS 1559 = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID); 1560 /** 1561 */ 1562 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A 1563 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1564 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID); 1565 /** 1566 */ 1567 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B 1568 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1569 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID); 1570 1571 /** 1572 */ 1573 public static final UnicodeBlock LIMBU 1574 = new UnicodeBlock("LIMBU", LIMBU_ID); 1575 /** 1576 */ 1577 public static final UnicodeBlock TAI_LE 1578 = new UnicodeBlock("TAI_LE", TAI_LE_ID); 1579 /** 1580 */ 1581 public static final UnicodeBlock KHMER_SYMBOLS 1582 = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID); 1583 1584 /** 1585 */ 1586 public static final UnicodeBlock PHONETIC_EXTENSIONS 1587 = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID); 1588 1589 /** 1590 */ 1591 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS 1592 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1593 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID); 1594 /** 1595 */ 1596 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS 1597 = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID); 1598 /** 1599 */ 1600 public static final UnicodeBlock LINEAR_B_SYLLABARY 1601 = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID); 1602 /** 1603 */ 1604 public static final UnicodeBlock LINEAR_B_IDEOGRAMS 1605 = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID); 1606 /** 1607 */ 1608 public static final UnicodeBlock AEGEAN_NUMBERS 1609 = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID); 1610 /** 1611 */ 1612 public static final UnicodeBlock UGARITIC 1613 = new UnicodeBlock("UGARITIC", UGARITIC_ID); 1614 /** 1615 */ 1616 public static final UnicodeBlock SHAVIAN 1617 = new UnicodeBlock("SHAVIAN", SHAVIAN_ID); 1618 /** 1619 */ 1620 public static final UnicodeBlock OSMANYA 1621 = new UnicodeBlock("OSMANYA", OSMANYA_ID); 1622 /** 1623 */ 1624 public static final UnicodeBlock CYPRIOT_SYLLABARY 1625 = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID); 1626 /** 1627 */ 1628 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS 1629 = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID); 1630 1631 /** 1632 */ 1633 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT 1634 = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID); 1635 1636 /* New blocks in Unicode 4.1 */ 1637 1638 /** 1639 */ 1640 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 1641 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 1642 ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/ 1643 1644 /** 1645 */ 1646 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 1647 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/ 1648 1649 /** 1650 */ 1651 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1652 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/ 1653 1654 /** 1655 */ 1656 public static final UnicodeBlock BUGINESE = 1657 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/ 1658 1659 /** 1660 */ 1661 public static final UnicodeBlock CJK_STROKES = 1662 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/ 1663 1664 /** 1665 */ 1666 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1667 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1668 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/ 1669 1670 /** 1671 */ 1672 public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/ 1673 1674 /** 1675 */ 1676 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1677 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/ 1678 1679 /** 1680 */ 1681 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1682 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/ 1683 1684 /** 1685 */ 1686 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 1687 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/ 1688 1689 /** 1690 */ 1691 public static final UnicodeBlock GLAGOLITIC = 1692 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/ 1693 1694 /** 1695 */ 1696 public static final UnicodeBlock KHAROSHTHI = 1697 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/ 1698 1699 /** 1700 */ 1701 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 1702 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/ 1703 1704 /** 1705 */ 1706 public static final UnicodeBlock NEW_TAI_LUE = 1707 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/ 1708 1709 /** 1710 */ 1711 public static final UnicodeBlock OLD_PERSIAN = 1712 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/ 1713 1714 /** 1715 */ 1716 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 1717 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 1718 PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/ 1719 1720 /** 1721 */ 1722 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 1723 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/ 1724 1725 /** 1726 */ 1727 public static final UnicodeBlock SYLOTI_NAGRI = 1728 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/ 1729 1730 /** 1731 */ 1732 public static final UnicodeBlock TIFINAGH = 1733 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/ 1734 1735 /** 1736 */ 1737 public static final UnicodeBlock VERTICAL_FORMS = 1738 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/ 1739 1740 /** 1741 */ 1742 public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/ 1743 /** 1744 */ 1745 public static final UnicodeBlock BALINESE = 1746 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/ 1747 /** 1748 */ 1749 public static final UnicodeBlock LATIN_EXTENDED_C = 1750 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/ 1751 /** 1752 */ 1753 public static final UnicodeBlock LATIN_EXTENDED_D = 1754 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/ 1755 /** 1756 */ 1757 public static final UnicodeBlock PHAGS_PA = 1758 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/ 1759 /** 1760 */ 1761 public static final UnicodeBlock PHOENICIAN = 1762 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/ 1763 /** 1764 */ 1765 public static final UnicodeBlock CUNEIFORM = 1766 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/ 1767 /** 1768 */ 1769 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 1770 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 1771 CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/ 1772 /** 1773 */ 1774 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 1775 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/ 1776 1777 /** 1778 */ 1779 public static final UnicodeBlock SUNDANESE = 1780 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */ 1781 1782 /** 1783 */ 1784 public static final UnicodeBlock LEPCHA = 1785 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */ 1786 1787 /** 1788 */ 1789 public static final UnicodeBlock OL_CHIKI = 1790 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */ 1791 1792 /** 1793 */ 1794 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 1795 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */ 1796 1797 /** 1798 */ 1799 public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */ 1800 1801 /** 1802 */ 1803 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 1804 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */ 1805 1806 /** 1807 */ 1808 public static final UnicodeBlock SAURASHTRA = 1809 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */ 1810 1811 /** 1812 */ 1813 public static final UnicodeBlock KAYAH_LI = 1814 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */ 1815 1816 /** 1817 */ 1818 public static final UnicodeBlock REJANG = 1819 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */ 1820 1821 /** 1822 */ 1823 public static final UnicodeBlock CHAM = 1824 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */ 1825 1826 /** 1827 */ 1828 public static final UnicodeBlock ANCIENT_SYMBOLS = 1829 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */ 1830 1831 /** 1832 */ 1833 public static final UnicodeBlock PHAISTOS_DISC = 1834 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */ 1835 1836 /** 1837 */ 1838 public static final UnicodeBlock LYCIAN = 1839 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */ 1840 1841 /** 1842 */ 1843 public static final UnicodeBlock CARIAN = 1844 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */ 1845 1846 /** 1847 */ 1848 public static final UnicodeBlock LYDIAN = 1849 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */ 1850 1851 /** 1852 */ 1853 public static final UnicodeBlock MAHJONG_TILES = 1854 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */ 1855 1856 /** 1857 */ 1858 public static final UnicodeBlock DOMINO_TILES = 1859 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */ 1860 1861 /* New blocks in Unicode 5.2 */ 1862 1863 /***/ 1864 public static final UnicodeBlock SAMARITAN = 1865 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/ 1866 /***/ 1867 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1868 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1869 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/ 1870 /***/ 1871 public static final UnicodeBlock TAI_THAM = 1872 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/ 1873 /***/ 1874 public static final UnicodeBlock VEDIC_EXTENSIONS = 1875 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/ 1876 /***/ 1877 public static final UnicodeBlock LISU = 1878 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/ 1879 /***/ 1880 public static final UnicodeBlock BAMUM = 1881 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/ 1882 /***/ 1883 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 1884 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/ 1885 /***/ 1886 public static final UnicodeBlock DEVANAGARI_EXTENDED = 1887 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/ 1888 /***/ 1889 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 1890 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/ 1891 /***/ 1892 public static final UnicodeBlock JAVANESE = 1893 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/ 1894 /***/ 1895 public static final UnicodeBlock MYANMAR_EXTENDED_A = 1896 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/ 1897 /***/ 1898 public static final UnicodeBlock TAI_VIET = 1899 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/ 1900 /***/ 1901 public static final UnicodeBlock MEETEI_MAYEK = 1902 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/ 1903 /***/ 1904 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 1905 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/ 1906 /***/ 1907 public static final UnicodeBlock IMPERIAL_ARAMAIC = 1908 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/ 1909 /***/ 1910 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 1911 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/ 1912 /***/ 1913 public static final UnicodeBlock AVESTAN = 1914 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/ 1915 /***/ 1916 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 1917 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/ 1918 /***/ 1919 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 1920 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/ 1921 /***/ 1922 public static final UnicodeBlock OLD_TURKIC = 1923 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/ 1924 /***/ 1925 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 1926 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/ 1927 /***/ 1928 public static final UnicodeBlock KAITHI = 1929 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/ 1930 /***/ 1931 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 1932 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/ 1933 /***/ 1934 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 1935 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 1936 ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/ 1937 /***/ 1938 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 1939 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 1940 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/ 1941 /***/ 1942 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 1943 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 1944 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/ 1945 1946 /* New blocks in Unicode 6.0 */ 1947 1948 /***/ 1949 public static final UnicodeBlock MANDAIC = 1950 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/ 1951 /***/ 1952 public static final UnicodeBlock BATAK = 1953 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/ 1954 /***/ 1955 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 1956 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/ 1957 /***/ 1958 public static final UnicodeBlock BRAHMI = 1959 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/ 1960 /***/ 1961 public static final UnicodeBlock BAMUM_SUPPLEMENT = 1962 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/ 1963 /***/ 1964 public static final UnicodeBlock KANA_SUPPLEMENT = 1965 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/ 1966 /***/ 1967 public static final UnicodeBlock PLAYING_CARDS = 1968 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/ 1969 /***/ 1970 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 1971 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 1972 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/ 1973 /***/ 1974 public static final UnicodeBlock EMOTICONS = 1975 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/ 1976 /***/ 1977 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 1978 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/ 1979 /***/ 1980 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 1981 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/ 1982 /***/ 1983 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 1984 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 1985 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/ 1986 1987 /* New blocks in Unicode 6.1 */ 1988 1989 /***/ 1990 public static final UnicodeBlock ARABIC_EXTENDED_A = 1991 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/ 1992 /***/ 1993 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 1994 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/ 1995 /***/ 1996 public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/ 1997 /***/ 1998 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 1999 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/ 2000 /***/ 2001 public static final UnicodeBlock MEROITIC_CURSIVE = 2002 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/ 2003 /***/ 2004 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2005 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/ 2006 /***/ 2007 public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/ 2008 /***/ 2009 public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/ 2010 /***/ 2011 public static final UnicodeBlock SORA_SOMPENG = 2012 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/ 2013 /***/ 2014 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2015 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/ 2016 /***/ 2017 public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/ 2018 2019 /* New blocks in Unicode 7.0 */ 2020 2021 /***/ 2022 public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/ 2023 /***/ 2024 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2025 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/ 2026 /***/ 2027 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2028 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/ 2029 /***/ 2030 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2031 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/ 2032 /***/ 2033 public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/ 2034 /***/ 2035 public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/ 2036 /***/ 2037 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2038 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/ 2039 /***/ 2040 public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/ 2041 /***/ 2042 public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/ 2043 /***/ 2044 public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/ 2045 /***/ 2046 public static final UnicodeBlock LATIN_EXTENDED_E = 2047 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/ 2048 /***/ 2049 public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/ 2050 /***/ 2051 public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/ 2052 /***/ 2053 public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/ 2054 /***/ 2055 public static final UnicodeBlock MENDE_KIKAKUI = 2056 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/ 2057 /***/ 2058 public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/ 2059 /***/ 2060 public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/ 2061 /***/ 2062 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2063 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/ 2064 /***/ 2065 public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/ 2066 /***/ 2067 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2068 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/ 2069 /***/ 2070 public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/ 2071 /***/ 2072 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2073 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/ 2074 /***/ 2075 public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/ 2076 /***/ 2077 public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/ 2078 /***/ 2079 public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/ 2080 /***/ 2081 public static final UnicodeBlock PSALTER_PAHLAVI = 2082 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/ 2083 /***/ 2084 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2085 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/ 2086 /***/ 2087 public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/ 2088 /***/ 2089 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2090 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/ 2091 /***/ 2092 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2093 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/ 2094 /***/ 2095 public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/ 2096 /***/ 2097 public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/ 2098 2099 /* New blocks in Unicode 8.0 */ 2100 2101 /***/ 2102 public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/ 2103 /***/ 2104 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2105 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/ 2106 /***/ 2107 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2108 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/ 2109 /***/ 2110 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 2111 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 2112 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/ 2113 /***/ 2114 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2115 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/ 2116 /***/ 2117 public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/ 2118 /***/ 2119 public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/ 2120 /***/ 2121 public static final UnicodeBlock OLD_HUNGARIAN = 2122 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/ 2123 /***/ 2124 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 2125 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 2126 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/ 2127 /***/ 2128 public static final UnicodeBlock SUTTON_SIGNWRITING = 2129 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/ 2130 2131 /* New blocks in Unicode 9.0 */ 2132 2133 /***/ 2134 public static final UnicodeBlock ADLAM = new UnicodeBlock("ADLAM", ADLAM_ID); /*[1E900]*/ 2135 /***/ 2136 public static final UnicodeBlock BHAIKSUKI = new UnicodeBlock("BHAIKSUKI", BHAIKSUKI_ID); /*[11C00]*/ 2137 /***/ 2138 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 2139 new UnicodeBlock("CYRILLIC_EXTENDED_C", CYRILLIC_EXTENDED_C_ID); /*[1C80]*/ 2140 /***/ 2141 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 2142 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", GLAGOLITIC_SUPPLEMENT_ID); /*[1E000]*/ 2143 /***/ 2144 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 2145 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID); /*[16FE0]*/ 2146 /***/ 2147 public static final UnicodeBlock MARCHEN = new UnicodeBlock("MARCHEN", MARCHEN_ID); /*[11C70]*/ 2148 /***/ 2149 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 2150 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", MONGOLIAN_SUPPLEMENT_ID); /*[11660]*/ 2151 /***/ 2152 public static final UnicodeBlock NEWA = new UnicodeBlock("NEWA", NEWA_ID); /*[11400]*/ 2153 /***/ 2154 public static final UnicodeBlock OSAGE = new UnicodeBlock("OSAGE", OSAGE_ID); /*[104B0]*/ 2155 /***/ 2156 public static final UnicodeBlock TANGUT = new UnicodeBlock("TANGUT", TANGUT_ID); /*[17000]*/ 2157 /***/ 2158 public static final UnicodeBlock TANGUT_COMPONENTS = 2159 new UnicodeBlock("TANGUT_COMPONENTS", TANGUT_COMPONENTS_ID); /*[18800]*/ 2160 2161 // New blocks in Unicode 10.0 2162 2163 /***/ 2164 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 2165 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID); /*[2CEB0]*/ 2166 /***/ 2167 public static final UnicodeBlock KANA_EXTENDED_A = 2168 new UnicodeBlock("KANA_EXTENDED_A", KANA_EXTENDED_A_ID); /*[1B100]*/ 2169 /***/ 2170 public static final UnicodeBlock MASARAM_GONDI = 2171 new UnicodeBlock("MASARAM_GONDI", MASARAM_GONDI_ID); /*[11D00]*/ 2172 /***/ 2173 public static final UnicodeBlock NUSHU = new UnicodeBlock("NUSHU", NUSHU_ID); /*[1B170]*/ 2174 /***/ 2175 public static final UnicodeBlock SOYOMBO = new UnicodeBlock("SOYOMBO", SOYOMBO_ID); /*[11A50]*/ 2176 /***/ 2177 public static final UnicodeBlock SYRIAC_SUPPLEMENT = 2178 new UnicodeBlock("SYRIAC_SUPPLEMENT", SYRIAC_SUPPLEMENT_ID); /*[0860]*/ 2179 /***/ 2180 public static final UnicodeBlock ZANABAZAR_SQUARE = 2181 new UnicodeBlock("ZANABAZAR_SQUARE", ZANABAZAR_SQUARE_ID); /*[11A00]*/ 2182 2183 // New blocks in Unicode 11.0 2184 2185 /***/ 2186 public static final UnicodeBlock CHESS_SYMBOLS = 2187 new UnicodeBlock("CHESS_SYMBOLS", CHESS_SYMBOLS_ID); /*[1FA00]*/ 2188 /***/ 2189 public static final UnicodeBlock DOGRA = new UnicodeBlock("DOGRA", DOGRA_ID); /*[11800]*/ 2190 /***/ 2191 public static final UnicodeBlock GEORGIAN_EXTENDED = 2192 new UnicodeBlock("GEORGIAN_EXTENDED", GEORGIAN_EXTENDED_ID); /*[1C90]*/ 2193 /***/ 2194 public static final UnicodeBlock GUNJALA_GONDI = 2195 new UnicodeBlock("GUNJALA_GONDI", GUNJALA_GONDI_ID); /*[11D60]*/ 2196 /***/ 2197 public static final UnicodeBlock HANIFI_ROHINGYA = 2198 new UnicodeBlock("HANIFI_ROHINGYA", HANIFI_ROHINGYA_ID); /*[10D00]*/ 2199 /***/ 2200 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS = 2201 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", INDIC_SIYAQ_NUMBERS_ID); /*[1EC70]*/ 2202 /***/ 2203 public static final UnicodeBlock MAKASAR = new UnicodeBlock("MAKASAR", MAKASAR_ID); /*[11EE0]*/ 2204 /***/ 2205 public static final UnicodeBlock MAYAN_NUMERALS = 2206 new UnicodeBlock("MAYAN_NUMERALS", MAYAN_NUMERALS_ID); /*[1D2E0]*/ 2207 /***/ 2208 public static final UnicodeBlock MEDEFAIDRIN = 2209 new UnicodeBlock("MEDEFAIDRIN", MEDEFAIDRIN_ID); /*[16E40]*/ 2210 /***/ 2211 public static final UnicodeBlock OLD_SOGDIAN = 2212 new UnicodeBlock("OLD_SOGDIAN", OLD_SOGDIAN_ID); /*[10F00]*/ 2213 /***/ 2214 public static final UnicodeBlock SOGDIAN = new UnicodeBlock("SOGDIAN", SOGDIAN_ID); /*[10F30]*/ 2215 2216 // New blocks in Unicode 12.0 2217 2218 /***/ 2219 public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 2220 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID); /*[13430]*/ 2221 /***/ 2222 public static final UnicodeBlock ELYMAIC = new UnicodeBlock("ELYMAIC", ELYMAIC_ID); /*[10FE0]*/ 2223 /***/ 2224 public static final UnicodeBlock NANDINAGARI = 2225 new UnicodeBlock("NANDINAGARI", NANDINAGARI_ID); /*[119A0]*/ 2226 /***/ 2227 public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG = 2228 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", NYIAKENG_PUACHUE_HMONG_ID); /*[1E100]*/ 2229 /***/ 2230 public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS = 2231 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", OTTOMAN_SIYAQ_NUMBERS_ID); /*[1ED00]*/ 2232 /***/ 2233 public static final UnicodeBlock SMALL_KANA_EXTENSION = 2234 new UnicodeBlock("SMALL_KANA_EXTENSION", SMALL_KANA_EXTENSION_ID); /*[1B130]*/ 2235 /***/ 2236 public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 2237 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID); /*[1FA70]*/ 2238 /***/ 2239 public static final UnicodeBlock TAMIL_SUPPLEMENT = 2240 new UnicodeBlock("TAMIL_SUPPLEMENT", TAMIL_SUPPLEMENT_ID); /*[11FC0]*/ 2241 /***/ 2242 public static final UnicodeBlock WANCHO = new UnicodeBlock("WANCHO", WANCHO_ID); /*[1E2C0]*/ 2243 2244 // New blocks in Unicode 13.0 2245 2246 /***/ 2247 public static final UnicodeBlock CHORASMIAN = 2248 new UnicodeBlock("CHORASMIAN", CHORASMIAN_ID); /*[10FB0]*/ 2249 /***/ 2250 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 2251 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G", 2252 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID); /*[30000]*/ 2253 /***/ 2254 public static final UnicodeBlock DIVES_AKURU = 2255 new UnicodeBlock("DIVES_AKURU", DIVES_AKURU_ID); /*[11900]*/ 2256 /***/ 2257 public static final UnicodeBlock KHITAN_SMALL_SCRIPT = 2258 new UnicodeBlock("KHITAN_SMALL_SCRIPT", KHITAN_SMALL_SCRIPT_ID); /*[18B00]*/ 2259 /***/ 2260 public static final UnicodeBlock LISU_SUPPLEMENT = 2261 new UnicodeBlock("LISU_SUPPLEMENT", LISU_SUPPLEMENT_ID); /*[11FB0]*/ 2262 /***/ 2263 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING = 2264 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING", SYMBOLS_FOR_LEGACY_COMPUTING_ID); /*[1FB00]*/ 2265 /***/ 2266 public static final UnicodeBlock TANGUT_SUPPLEMENT = 2267 new UnicodeBlock("TANGUT_SUPPLEMENT", TANGUT_SUPPLEMENT_ID); /*[18D00]*/ 2268 /***/ 2269 public static final UnicodeBlock YEZIDI = new UnicodeBlock("YEZIDI", YEZIDI_ID); /*[10E80]*/ 2270 2271 /** 2272 */ 2273 public static final UnicodeBlock INVALID_CODE 2274 = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID); 2275 2276 static { 2277 for (int blockId = 0; blockId < COUNT; ++blockId) { 2278 if (BLOCKS_[blockId] == null) { 2279 throw new java.lang.IllegalStateException( 2280 "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized"); 2281 } 2282 } 2283 } 2284 2285 // public methods -------------------------------------------------- 2286 2287 /** 2288 * <strong>[icu]</strong> Returns the only instance of the UnicodeBlock with the argument ID. 2289 * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned. 2290 * @param id UnicodeBlock ID 2291 * @return the only instance of the UnicodeBlock with the argument ID 2292 * if it exists, otherwise a INVALID_CODE UnicodeBlock will be 2293 * returned. 2294 */ getInstance(int id)2295 public static UnicodeBlock getInstance(int id) 2296 { 2297 if (id >= 0 && id < BLOCKS_.length) { 2298 return BLOCKS_[id]; 2299 } 2300 return INVALID_CODE; 2301 } 2302 2303 /** 2304 * Returns the Unicode allocation block that contains the code point, 2305 * or null if the code point is not a member of a defined block. 2306 * @param ch code point to be tested 2307 * @return the Unicode allocation block that contains the code point 2308 */ of(int ch)2309 public static UnicodeBlock of(int ch) 2310 { 2311 if (ch > MAX_VALUE) { 2312 return INVALID_CODE; 2313 } 2314 2315 return UnicodeBlock.getInstance( 2316 UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK)); 2317 } 2318 2319 /** 2320 * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method. 2321 * Returns the Unicode block with the given name. <strong>[icu] Note:</strong> Unlike 2322 * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches 2323 * against the official UCD name and the Java block name 2324 * (ignoring case). 2325 * @param blockName the name of the block to match 2326 * @return the UnicodeBlock with that name 2327 * @throws IllegalArgumentException if the blockName could not be matched 2328 */ forName(String blockName)2329 public static final UnicodeBlock forName(String blockName) { 2330 Map<String, UnicodeBlock> m = null; 2331 if (mref != null) { 2332 m = mref.get(); 2333 } 2334 if (m == null) { 2335 m = new HashMap<>(BLOCKS_.length); 2336 for (int i = 0; i < BLOCKS_.length; ++i) { 2337 UnicodeBlock b = BLOCKS_[i]; 2338 String name = trimBlockName( 2339 getPropertyValueName(UProperty.BLOCK, b.getID(), 2340 UProperty.NameChoice.LONG)); 2341 m.put(name, b); 2342 } 2343 mref = new SoftReference<>(m); 2344 } 2345 UnicodeBlock b = m.get(trimBlockName(blockName)); 2346 if (b == null) { 2347 throw new IllegalArgumentException(); 2348 } 2349 return b; 2350 } 2351 private static SoftReference<Map<String, UnicodeBlock>> mref; 2352 trimBlockName(String name)2353 private static String trimBlockName(String name) { 2354 String upper = name.toUpperCase(Locale.ENGLISH); 2355 StringBuilder result = new StringBuilder(upper.length()); 2356 for (int i = 0; i < upper.length(); i++) { 2357 char c = upper.charAt(i); 2358 if (c != ' ' && c != '_' && c != '-') { 2359 result.append(c); 2360 } 2361 } 2362 return result.toString(); 2363 } 2364 2365 /** 2366 * {icu} Returns the type ID of this Unicode block 2367 * @return integer type ID of this Unicode block 2368 */ getID()2369 public int getID() 2370 { 2371 return m_id_; 2372 } 2373 2374 // private data members --------------------------------------------- 2375 2376 /** 2377 * Identification code for this UnicodeBlock 2378 */ 2379 private int m_id_; 2380 2381 // private constructor ---------------------------------------------- 2382 2383 /** 2384 * UnicodeBlock constructor 2385 * @param name name of this UnicodeBlock 2386 * @param id unique id of this UnicodeBlock 2387 * @exception NullPointerException if name is <code>null</code> 2388 */ UnicodeBlock(String name, int id)2389 private UnicodeBlock(String name, int id) 2390 { 2391 super(name); 2392 m_id_ = id; 2393 if (id >= 0) { 2394 BLOCKS_[id] = this; 2395 } 2396 } 2397 } 2398 2399 /** 2400 * East Asian Width constants. 2401 * @see UProperty#EAST_ASIAN_WIDTH 2402 * @see UCharacter#getIntPropertyValue 2403 */ 2404 public static interface EastAsianWidth 2405 { 2406 /** 2407 */ 2408 public static final int NEUTRAL = 0; 2409 /** 2410 */ 2411 public static final int AMBIGUOUS = 1; 2412 /** 2413 */ 2414 public static final int HALFWIDTH = 2; 2415 /** 2416 */ 2417 public static final int FULLWIDTH = 3; 2418 /** 2419 */ 2420 public static final int NARROW = 4; 2421 /** 2422 */ 2423 public static final int WIDE = 5; 2424 /** 2425 * One more than the highest normal EastAsianWidth value. 2426 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH). 2427 * 2428 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2429 * @hide unsupported on OHOS 2430 */ 2431 @Deprecated 2432 public static final int COUNT = 6; 2433 } 2434 2435 /** 2436 * Decomposition Type constants. 2437 * @see UProperty#DECOMPOSITION_TYPE 2438 */ 2439 public static interface DecompositionType 2440 { 2441 /** 2442 */ 2443 public static final int NONE = 0; 2444 /** 2445 */ 2446 public static final int CANONICAL = 1; 2447 /** 2448 */ 2449 public static final int COMPAT = 2; 2450 /** 2451 */ 2452 public static final int CIRCLE = 3; 2453 /** 2454 */ 2455 public static final int FINAL = 4; 2456 /** 2457 */ 2458 public static final int FONT = 5; 2459 /** 2460 */ 2461 public static final int FRACTION = 6; 2462 /** 2463 */ 2464 public static final int INITIAL = 7; 2465 /** 2466 */ 2467 public static final int ISOLATED = 8; 2468 /** 2469 */ 2470 public static final int MEDIAL = 9; 2471 /** 2472 */ 2473 public static final int NARROW = 10; 2474 /** 2475 */ 2476 public static final int NOBREAK = 11; 2477 /** 2478 */ 2479 public static final int SMALL = 12; 2480 /** 2481 */ 2482 public static final int SQUARE = 13; 2483 /** 2484 */ 2485 public static final int SUB = 14; 2486 /** 2487 */ 2488 public static final int SUPER = 15; 2489 /** 2490 */ 2491 public static final int VERTICAL = 16; 2492 /** 2493 */ 2494 public static final int WIDE = 17; 2495 /** 2496 * One more than the highest normal DecompositionType value. 2497 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE). 2498 * 2499 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2500 * @hide unsupported on OHOS 2501 */ 2502 @Deprecated 2503 public static final int COUNT = 18; 2504 } 2505 2506 /** 2507 * Joining Type constants. 2508 * @see UProperty#JOINING_TYPE 2509 */ 2510 public static interface JoiningType 2511 { 2512 /** 2513 */ 2514 public static final int NON_JOINING = 0; 2515 /** 2516 */ 2517 public static final int JOIN_CAUSING = 1; 2518 /** 2519 */ 2520 public static final int DUAL_JOINING = 2; 2521 /** 2522 */ 2523 public static final int LEFT_JOINING = 3; 2524 /** 2525 */ 2526 public static final int RIGHT_JOINING = 4; 2527 /** 2528 */ 2529 public static final int TRANSPARENT = 5; 2530 /** 2531 * One more than the highest normal JoiningType value. 2532 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE). 2533 * 2534 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2535 * @hide unsupported on OHOS 2536 */ 2537 @Deprecated 2538 public static final int COUNT = 6; 2539 } 2540 2541 /** 2542 * Joining Group constants. 2543 * @see UProperty#JOINING_GROUP 2544 */ 2545 public static interface JoiningGroup 2546 { 2547 /** 2548 */ 2549 public static final int NO_JOINING_GROUP = 0; 2550 /** 2551 */ 2552 public static final int AIN = 1; 2553 /** 2554 */ 2555 public static final int ALAPH = 2; 2556 /** 2557 */ 2558 public static final int ALEF = 3; 2559 /** 2560 */ 2561 public static final int BEH = 4; 2562 /** 2563 */ 2564 public static final int BETH = 5; 2565 /** 2566 */ 2567 public static final int DAL = 6; 2568 /** 2569 */ 2570 public static final int DALATH_RISH = 7; 2571 /** 2572 */ 2573 public static final int E = 8; 2574 /** 2575 */ 2576 public static final int FEH = 9; 2577 /** 2578 */ 2579 public static final int FINAL_SEMKATH = 10; 2580 /** 2581 */ 2582 public static final int GAF = 11; 2583 /** 2584 */ 2585 public static final int GAMAL = 12; 2586 /** 2587 */ 2588 public static final int HAH = 13; 2589 /***/ 2590 public static final int TEH_MARBUTA_GOAL = 14; 2591 /** 2592 */ 2593 public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL; 2594 /** 2595 */ 2596 public static final int HE = 15; 2597 /** 2598 */ 2599 public static final int HEH = 16; 2600 /** 2601 */ 2602 public static final int HEH_GOAL = 17; 2603 /** 2604 */ 2605 public static final int HETH = 18; 2606 /** 2607 */ 2608 public static final int KAF = 19; 2609 /** 2610 */ 2611 public static final int KAPH = 20; 2612 /** 2613 */ 2614 public static final int KNOTTED_HEH = 21; 2615 /** 2616 */ 2617 public static final int LAM = 22; 2618 /** 2619 */ 2620 public static final int LAMADH = 23; 2621 /** 2622 */ 2623 public static final int MEEM = 24; 2624 /** 2625 */ 2626 public static final int MIM = 25; 2627 /** 2628 */ 2629 public static final int NOON = 26; 2630 /** 2631 */ 2632 public static final int NUN = 27; 2633 /** 2634 */ 2635 public static final int PE = 28; 2636 /** 2637 */ 2638 public static final int QAF = 29; 2639 /** 2640 */ 2641 public static final int QAPH = 30; 2642 /** 2643 */ 2644 public static final int REH = 31; 2645 /** 2646 */ 2647 public static final int REVERSED_PE = 32; 2648 /** 2649 */ 2650 public static final int SAD = 33; 2651 /** 2652 */ 2653 public static final int SADHE = 34; 2654 /** 2655 */ 2656 public static final int SEEN = 35; 2657 /** 2658 */ 2659 public static final int SEMKATH = 36; 2660 /** 2661 */ 2662 public static final int SHIN = 37; 2663 /** 2664 */ 2665 public static final int SWASH_KAF = 38; 2666 /** 2667 */ 2668 public static final int SYRIAC_WAW = 39; 2669 /** 2670 */ 2671 public static final int TAH = 40; 2672 /** 2673 */ 2674 public static final int TAW = 41; 2675 /** 2676 */ 2677 public static final int TEH_MARBUTA = 42; 2678 /** 2679 */ 2680 public static final int TETH = 43; 2681 /** 2682 */ 2683 public static final int WAW = 44; 2684 /** 2685 */ 2686 public static final int YEH = 45; 2687 /** 2688 */ 2689 public static final int YEH_BARREE = 46; 2690 /** 2691 */ 2692 public static final int YEH_WITH_TAIL = 47; 2693 /** 2694 */ 2695 public static final int YUDH = 48; 2696 /** 2697 */ 2698 public static final int YUDH_HE = 49; 2699 /** 2700 */ 2701 public static final int ZAIN = 50; 2702 /** 2703 */ 2704 public static final int FE = 51; 2705 /** 2706 */ 2707 public static final int KHAPH = 52; 2708 /** 2709 */ 2710 public static final int ZHAIN = 53; 2711 /** 2712 */ 2713 public static final int BURUSHASKI_YEH_BARREE = 54; 2714 /***/ 2715 public static final int FARSI_YEH = 55; 2716 /***/ 2717 public static final int NYA = 56; 2718 /***/ 2719 public static final int ROHINGYA_YEH = 57; 2720 2721 /***/ 2722 public static final int MANICHAEAN_ALEPH = 58; 2723 /***/ 2724 public static final int MANICHAEAN_AYIN = 59; 2725 /***/ 2726 public static final int MANICHAEAN_BETH = 60; 2727 /***/ 2728 public static final int MANICHAEAN_DALETH = 61; 2729 /***/ 2730 public static final int MANICHAEAN_DHAMEDH = 62; 2731 /***/ 2732 public static final int MANICHAEAN_FIVE = 63; 2733 /***/ 2734 public static final int MANICHAEAN_GIMEL = 64; 2735 /***/ 2736 public static final int MANICHAEAN_HETH = 65; 2737 /***/ 2738 public static final int MANICHAEAN_HUNDRED = 66; 2739 /***/ 2740 public static final int MANICHAEAN_KAPH = 67; 2741 /***/ 2742 public static final int MANICHAEAN_LAMEDH = 68; 2743 /***/ 2744 public static final int MANICHAEAN_MEM = 69; 2745 /***/ 2746 public static final int MANICHAEAN_NUN = 70; 2747 /***/ 2748 public static final int MANICHAEAN_ONE = 71; 2749 /***/ 2750 public static final int MANICHAEAN_PE = 72; 2751 /***/ 2752 public static final int MANICHAEAN_QOPH = 73; 2753 /***/ 2754 public static final int MANICHAEAN_RESH = 74; 2755 /***/ 2756 public static final int MANICHAEAN_SADHE = 75; 2757 /***/ 2758 public static final int MANICHAEAN_SAMEKH = 76; 2759 /***/ 2760 public static final int MANICHAEAN_TAW = 77; 2761 /***/ 2762 public static final int MANICHAEAN_TEN = 78; 2763 /***/ 2764 public static final int MANICHAEAN_TETH = 79; 2765 /***/ 2766 public static final int MANICHAEAN_THAMEDH = 80; 2767 /***/ 2768 public static final int MANICHAEAN_TWENTY = 81; 2769 /***/ 2770 public static final int MANICHAEAN_WAW = 82; 2771 /***/ 2772 public static final int MANICHAEAN_YODH = 83; 2773 /***/ 2774 public static final int MANICHAEAN_ZAYIN = 84; 2775 /***/ 2776 public static final int STRAIGHT_WAW = 85; 2777 2778 /***/ 2779 public static final int AFRICAN_FEH = 86; 2780 /***/ 2781 public static final int AFRICAN_NOON = 87; 2782 /***/ 2783 public static final int AFRICAN_QAF = 88; 2784 2785 /***/ 2786 public static final int MALAYALAM_BHA = 89; 2787 /***/ 2788 public static final int MALAYALAM_JA = 90; 2789 /***/ 2790 public static final int MALAYALAM_LLA = 91; 2791 /***/ 2792 public static final int MALAYALAM_LLLA = 92; 2793 /***/ 2794 public static final int MALAYALAM_NGA = 93; 2795 /***/ 2796 public static final int MALAYALAM_NNA = 94; 2797 /***/ 2798 public static final int MALAYALAM_NNNA = 95; 2799 /***/ 2800 public static final int MALAYALAM_NYA = 96; 2801 /***/ 2802 public static final int MALAYALAM_RA = 97; 2803 /***/ 2804 public static final int MALAYALAM_SSA = 98; 2805 /***/ 2806 public static final int MALAYALAM_TTA = 99; 2807 2808 /***/ 2809 public static final int HANIFI_ROHINGYA_KINNA_YA = 100; 2810 /***/ 2811 public static final int HANIFI_ROHINGYA_PA = 101; 2812 2813 /** 2814 * One more than the highest normal JoiningGroup value. 2815 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JoiningGroup). 2816 * 2817 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2818 * @hide unsupported on OHOS 2819 */ 2820 @Deprecated 2821 public static final int COUNT = 102; 2822 } 2823 2824 /** 2825 * Grapheme Cluster Break constants. 2826 * @see UProperty#GRAPHEME_CLUSTER_BREAK 2827 */ 2828 public static interface GraphemeClusterBreak { 2829 /** 2830 */ 2831 public static final int OTHER = 0; 2832 /** 2833 */ 2834 public static final int CONTROL = 1; 2835 /** 2836 */ 2837 public static final int CR = 2; 2838 /** 2839 */ 2840 public static final int EXTEND = 3; 2841 /** 2842 */ 2843 public static final int L = 4; 2844 /** 2845 */ 2846 public static final int LF = 5; 2847 /** 2848 */ 2849 public static final int LV = 6; 2850 /** 2851 */ 2852 public static final int LVT = 7; 2853 /** 2854 */ 2855 public static final int T = 8; 2856 /** 2857 */ 2858 public static final int V = 9; 2859 /** 2860 */ 2861 public static final int SPACING_MARK = 10; 2862 /** 2863 */ 2864 public static final int PREPEND = 11; 2865 /***/ 2866 public static final int REGIONAL_INDICATOR = 12; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 2867 /***/ 2868 public static final int E_BASE = 13; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 2869 /***/ 2870 public static final int E_BASE_GAZ = 14; /*[EBG]*/ 2871 /***/ 2872 public static final int E_MODIFIER = 15; /*[EM]*/ 2873 /***/ 2874 public static final int GLUE_AFTER_ZWJ = 16; /*[GAZ]*/ 2875 /***/ 2876 public static final int ZWJ = 17; /*[ZWJ]*/ 2877 2878 /** 2879 * One more than the highest normal GraphemeClusterBreak value. 2880 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK). 2881 * 2882 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2883 * @hide unsupported on OHOS 2884 */ 2885 @Deprecated 2886 public static final int COUNT = 18; 2887 } 2888 2889 /** 2890 * Word Break constants. 2891 * @see UProperty#WORD_BREAK 2892 */ 2893 public static interface WordBreak { 2894 /** 2895 */ 2896 public static final int OTHER = 0; 2897 /** 2898 */ 2899 public static final int ALETTER = 1; 2900 /** 2901 */ 2902 public static final int FORMAT = 2; 2903 /** 2904 */ 2905 public static final int KATAKANA = 3; 2906 /** 2907 */ 2908 public static final int MIDLETTER = 4; 2909 /** 2910 */ 2911 public static final int MIDNUM = 5; 2912 /** 2913 */ 2914 public static final int NUMERIC = 6; 2915 /** 2916 */ 2917 public static final int EXTENDNUMLET = 7; 2918 /** 2919 */ 2920 public static final int CR = 8; 2921 /** 2922 */ 2923 public static final int EXTEND = 9; 2924 /** 2925 */ 2926 public static final int LF = 10; 2927 /** 2928 */ 2929 public static final int MIDNUMLET = 11; 2930 /** 2931 */ 2932 public static final int NEWLINE = 12; 2933 /***/ 2934 public static final int REGIONAL_INDICATOR = 13; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 2935 /***/ 2936 public static final int HEBREW_LETTER = 14; /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ 2937 /***/ 2938 public static final int SINGLE_QUOTE = 15; /*[SQ]*/ 2939 /***/ 2940 public static final int DOUBLE_QUOTE = 16; /*[DQ]*/ 2941 /***/ 2942 public static final int E_BASE = 17; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 2943 /***/ 2944 public static final int E_BASE_GAZ = 18; /*[EBG]*/ 2945 /***/ 2946 public static final int E_MODIFIER = 19; /*[EM]*/ 2947 /***/ 2948 public static final int GLUE_AFTER_ZWJ = 20; /*[GAZ]*/ 2949 /***/ 2950 public static final int ZWJ = 21; /*[ZWJ]*/ 2951 /***/ 2952 public static final int WSEGSPACE = 22; /*[WSEGSPACE]*/ 2953 /** 2954 * One more than the highest normal WordBreak value. 2955 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK). 2956 * 2957 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2958 * @hide unsupported on OHOS 2959 */ 2960 @Deprecated 2961 public static final int COUNT = 23; 2962 } 2963 2964 /** 2965 * Sentence Break constants. 2966 * @see UProperty#SENTENCE_BREAK 2967 */ 2968 public static interface SentenceBreak { 2969 /** 2970 */ 2971 public static final int OTHER = 0; 2972 /** 2973 */ 2974 public static final int ATERM = 1; 2975 /** 2976 */ 2977 public static final int CLOSE = 2; 2978 /** 2979 */ 2980 public static final int FORMAT = 3; 2981 /** 2982 */ 2983 public static final int LOWER = 4; 2984 /** 2985 */ 2986 public static final int NUMERIC = 5; 2987 /** 2988 */ 2989 public static final int OLETTER = 6; 2990 /** 2991 */ 2992 public static final int SEP = 7; 2993 /** 2994 */ 2995 public static final int SP = 8; 2996 /** 2997 */ 2998 public static final int STERM = 9; 2999 /** 3000 */ 3001 public static final int UPPER = 10; 3002 /** 3003 */ 3004 public static final int CR = 11; 3005 /** 3006 */ 3007 public static final int EXTEND = 12; 3008 /** 3009 */ 3010 public static final int LF = 13; 3011 /** 3012 */ 3013 public static final int SCONTINUE = 14; 3014 /** 3015 * One more than the highest normal SentenceBreak value. 3016 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK). 3017 * 3018 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3019 * @hide unsupported on OHOS 3020 */ 3021 @Deprecated 3022 public static final int COUNT = 15; 3023 } 3024 3025 /** 3026 * Line Break constants. 3027 * @see UProperty#LINE_BREAK 3028 */ 3029 public static interface LineBreak 3030 { 3031 /** 3032 */ 3033 public static final int UNKNOWN = 0; 3034 /** 3035 */ 3036 public static final int AMBIGUOUS = 1; 3037 /** 3038 */ 3039 public static final int ALPHABETIC = 2; 3040 /** 3041 */ 3042 public static final int BREAK_BOTH = 3; 3043 /** 3044 */ 3045 public static final int BREAK_AFTER = 4; 3046 /** 3047 */ 3048 public static final int BREAK_BEFORE = 5; 3049 /** 3050 */ 3051 public static final int MANDATORY_BREAK = 6; 3052 /** 3053 */ 3054 public static final int CONTINGENT_BREAK = 7; 3055 /** 3056 */ 3057 public static final int CLOSE_PUNCTUATION = 8; 3058 /** 3059 */ 3060 public static final int COMBINING_MARK = 9; 3061 /** 3062 */ 3063 public static final int CARRIAGE_RETURN = 10; 3064 /** 3065 */ 3066 public static final int EXCLAMATION = 11; 3067 /** 3068 */ 3069 public static final int GLUE = 12; 3070 /** 3071 */ 3072 public static final int HYPHEN = 13; 3073 /** 3074 */ 3075 public static final int IDEOGRAPHIC = 14; 3076 /** 3077 * @see #INSEPARABLE 3078 */ 3079 public static final int INSEPERABLE = 15; 3080 /** 3081 * Renamed from the misspelled "inseperable" in Unicode 4.0.1. 3082 */ 3083 public static final int INSEPARABLE = 15; 3084 /** 3085 */ 3086 public static final int INFIX_NUMERIC = 16; 3087 /** 3088 */ 3089 public static final int LINE_FEED = 17; 3090 /** 3091 */ 3092 public static final int NONSTARTER = 18; 3093 /** 3094 */ 3095 public static final int NUMERIC = 19; 3096 /** 3097 */ 3098 public static final int OPEN_PUNCTUATION = 20; 3099 /** 3100 */ 3101 public static final int POSTFIX_NUMERIC = 21; 3102 /** 3103 */ 3104 public static final int PREFIX_NUMERIC = 22; 3105 /** 3106 */ 3107 public static final int QUOTATION = 23; 3108 /** 3109 */ 3110 public static final int COMPLEX_CONTEXT = 24; 3111 /** 3112 */ 3113 public static final int SURROGATE = 25; 3114 /** 3115 */ 3116 public static final int SPACE = 26; 3117 /** 3118 */ 3119 public static final int BREAK_SYMBOLS = 27; 3120 /** 3121 */ 3122 public static final int ZWSPACE = 28; 3123 /** 3124 */ 3125 public static final int NEXT_LINE = 29; /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ 3126 /** 3127 */ 3128 public static final int WORD_JOINER = 30; /*[WJ]*/ 3129 /** 3130 */ 3131 public static final int H2 = 31; /* from here on: new in Unicode 4.1/ICU 3.4 */ 3132 /** 3133 */ 3134 public static final int H3 = 32; 3135 /** 3136 */ 3137 public static final int JL = 33; 3138 /** 3139 */ 3140 public static final int JT = 34; 3141 /** 3142 */ 3143 public static final int JV = 35; 3144 /***/ 3145 public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ 3146 /***/ 3147 public static final int CONDITIONAL_JAPANESE_STARTER = 37; /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ 3148 /***/ 3149 public static final int HEBREW_LETTER = 38; /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ 3150 /***/ 3151 public static final int REGIONAL_INDICATOR = 39; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3152 /***/ 3153 public static final int E_BASE = 40; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3154 /***/ 3155 public static final int E_MODIFIER = 41; /*[EM]*/ 3156 /***/ 3157 public static final int ZWJ = 42; /*[ZWJ]*/ 3158 /** 3159 * One more than the highest normal LineBreak value. 3160 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK). 3161 * 3162 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3163 * @hide unsupported on OHOS 3164 */ 3165 @Deprecated 3166 public static final int COUNT = 43; 3167 } 3168 3169 /** 3170 * Numeric Type constants. 3171 * @see UProperty#NUMERIC_TYPE 3172 */ 3173 public static interface NumericType 3174 { 3175 /** 3176 */ 3177 public static final int NONE = 0; 3178 /** 3179 */ 3180 public static final int DECIMAL = 1; 3181 /** 3182 */ 3183 public static final int DIGIT = 2; 3184 /** 3185 */ 3186 public static final int NUMERIC = 3; 3187 /** 3188 * One more than the highest normal NumericType value. 3189 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE). 3190 * 3191 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3192 * @hide unsupported on OHOS 3193 */ 3194 @Deprecated 3195 public static final int COUNT = 4; 3196 } 3197 3198 /** 3199 * Hangul Syllable Type constants. 3200 * 3201 * @see UProperty#HANGUL_SYLLABLE_TYPE 3202 */ 3203 public static interface HangulSyllableType 3204 { 3205 /** 3206 */ 3207 public static final int NOT_APPLICABLE = 0; /*[NA]*/ /*See note !!*/ 3208 /** 3209 */ 3210 public static final int LEADING_JAMO = 1; /*[L]*/ 3211 /** 3212 */ 3213 public static final int VOWEL_JAMO = 2; /*[V]*/ 3214 /** 3215 */ 3216 public static final int TRAILING_JAMO = 3; /*[T]*/ 3217 /** 3218 */ 3219 public static final int LV_SYLLABLE = 4; /*[LV]*/ 3220 /** 3221 */ 3222 public static final int LVT_SYLLABLE = 5; /*[LVT]*/ 3223 /** 3224 * One more than the highest normal HangulSyllableType value. 3225 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE). 3226 * 3227 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3228 * @hide unsupported on OHOS 3229 */ 3230 @Deprecated 3231 public static final int COUNT = 6; 3232 } 3233 3234 /** 3235 * Bidi Paired Bracket Type constants. 3236 * 3237 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 3238 */ 3239 public static interface BidiPairedBracketType { 3240 /** 3241 * Not a paired bracket. 3242 */ 3243 public static final int NONE = 0; 3244 /** 3245 * Open paired bracket. 3246 */ 3247 public static final int OPEN = 1; 3248 /** 3249 * Close paired bracket. 3250 */ 3251 public static final int CLOSE = 2; 3252 /** 3253 * One more than the highest normal BidiPairedBracketType value. 3254 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE). 3255 * 3256 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3257 * @hide unsupported on OHOS 3258 */ 3259 @Deprecated 3260 public static final int COUNT = 3; 3261 } 3262 3263 /** 3264 * Indic Positional Category constants. 3265 * 3266 * @see UProperty#INDIC_POSITIONAL_CATEGORY 3267 */ 3268 public static interface IndicPositionalCategory { 3269 /***/ 3270 public static final int NA = 0; 3271 /***/ 3272 public static final int BOTTOM = 1; 3273 /***/ 3274 public static final int BOTTOM_AND_LEFT = 2; 3275 /***/ 3276 public static final int BOTTOM_AND_RIGHT = 3; 3277 /***/ 3278 public static final int LEFT = 4; 3279 /***/ 3280 public static final int LEFT_AND_RIGHT = 5; 3281 /***/ 3282 public static final int OVERSTRUCK = 6; 3283 /***/ 3284 public static final int RIGHT = 7; 3285 /***/ 3286 public static final int TOP = 8; 3287 /***/ 3288 public static final int TOP_AND_BOTTOM = 9; 3289 /***/ 3290 public static final int TOP_AND_BOTTOM_AND_RIGHT = 10; 3291 /***/ 3292 public static final int TOP_AND_LEFT = 11; 3293 /***/ 3294 public static final int TOP_AND_LEFT_AND_RIGHT = 12; 3295 /***/ 3296 public static final int TOP_AND_RIGHT = 13; 3297 /***/ 3298 public static final int VISUAL_ORDER_LEFT = 14; 3299 /***/ 3300 public static final int TOP_AND_BOTTOM_AND_LEFT = 15; 3301 } 3302 3303 /** 3304 * Indic Syllabic Category constants. 3305 * 3306 * @see UProperty#INDIC_SYLLABIC_CATEGORY 3307 */ 3308 public static interface IndicSyllabicCategory { 3309 /***/ 3310 public static final int OTHER = 0; 3311 /***/ 3312 public static final int AVAGRAHA = 1; 3313 /***/ 3314 public static final int BINDU = 2; 3315 /***/ 3316 public static final int BRAHMI_JOINING_NUMBER = 3; 3317 /***/ 3318 public static final int CANTILLATION_MARK = 4; 3319 /***/ 3320 public static final int CONSONANT = 5; 3321 /***/ 3322 public static final int CONSONANT_DEAD = 6; 3323 /***/ 3324 public static final int CONSONANT_FINAL = 7; 3325 /***/ 3326 public static final int CONSONANT_HEAD_LETTER = 8; 3327 /***/ 3328 public static final int CONSONANT_INITIAL_POSTFIXED = 9; 3329 /***/ 3330 public static final int CONSONANT_KILLER = 10; 3331 /***/ 3332 public static final int CONSONANT_MEDIAL = 11; 3333 /***/ 3334 public static final int CONSONANT_PLACEHOLDER = 12; 3335 /***/ 3336 public static final int CONSONANT_PRECEDING_REPHA = 13; 3337 /***/ 3338 public static final int CONSONANT_PREFIXED = 14; 3339 /***/ 3340 public static final int CONSONANT_SUBJOINED = 15; 3341 /***/ 3342 public static final int CONSONANT_SUCCEEDING_REPHA = 16; 3343 /***/ 3344 public static final int CONSONANT_WITH_STACKER = 17; 3345 /***/ 3346 public static final int GEMINATION_MARK = 18; 3347 /***/ 3348 public static final int INVISIBLE_STACKER = 19; 3349 /***/ 3350 public static final int JOINER = 20; 3351 /***/ 3352 public static final int MODIFYING_LETTER = 21; 3353 /***/ 3354 public static final int NON_JOINER = 22; 3355 /***/ 3356 public static final int NUKTA = 23; 3357 /***/ 3358 public static final int NUMBER = 24; 3359 /***/ 3360 public static final int NUMBER_JOINER = 25; 3361 /***/ 3362 public static final int PURE_KILLER = 26; 3363 /***/ 3364 public static final int REGISTER_SHIFTER = 27; 3365 /***/ 3366 public static final int SYLLABLE_MODIFIER = 28; 3367 /***/ 3368 public static final int TONE_LETTER = 29; 3369 /***/ 3370 public static final int TONE_MARK = 30; 3371 /***/ 3372 public static final int VIRAMA = 31; 3373 /***/ 3374 public static final int VISARGA = 32; 3375 /***/ 3376 public static final int VOWEL = 33; 3377 /***/ 3378 public static final int VOWEL_DEPENDENT = 34; 3379 /***/ 3380 public static final int VOWEL_INDEPENDENT = 35; 3381 } 3382 3383 /** 3384 * Vertical Orientation constants. 3385 * 3386 * @see UProperty#VERTICAL_ORIENTATION 3387 */ 3388 public static interface VerticalOrientation { 3389 /***/ 3390 public static final int ROTATED = 0; 3391 /***/ 3392 public static final int TRANSFORMED_ROTATED = 1; 3393 /***/ 3394 public static final int TRANSFORMED_UPRIGHT = 2; 3395 /***/ 3396 public static final int UPRIGHT = 3; 3397 } 3398 3399 // public data members ----------------------------------------------- 3400 3401 /** 3402 * The lowest Unicode code point value, constant 0. 3403 * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}. 3404 */ 3405 public static final int MIN_VALUE = Character.MIN_CODE_POINT; 3406 3407 /** 3408 * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits). 3409 * Same as {@link Character#MAX_CODE_POINT}. 3410 * 3411 * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE} 3412 * which is still a char with the value U+FFFF. 3413 */ 3414 public static final int MAX_VALUE = Character.MAX_CODE_POINT; 3415 3416 /** 3417 * The minimum value for Supplementary code points, constant U+10000. 3418 * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 3419 */ 3420 public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT; 3421 3422 /** 3423 * Unicode value used when translating into Unicode encoding form and there 3424 * is no existing character. 3425 */ 3426 public static final int REPLACEMENT_CHAR = '\uFFFD'; 3427 3428 /** 3429 * Special value that is returned by getUnicodeNumericValue(int) when no 3430 * numeric value is defined for a code point. 3431 * @see #getUnicodeNumericValue 3432 */ 3433 public static final double NO_NUMERIC_VALUE = -123456789; 3434 3435 /** 3436 * Compatibility constant for Java Character's MIN_RADIX. 3437 */ 3438 public static final int MIN_RADIX = java.lang.Character.MIN_RADIX; 3439 3440 /** 3441 * Compatibility constant for Java Character's MAX_RADIX. 3442 */ 3443 public static final int MAX_RADIX = java.lang.Character.MAX_RADIX; 3444 3445 /** 3446 * Do not lowercase non-initial parts of words when titlecasing. 3447 * Option bit for titlecasing APIs that take an options bit set. 3448 * 3449 * By default, titlecasing will titlecase the first cased character 3450 * of a word and lowercase all other characters. 3451 * With this option, the other characters will not be modified. 3452 * 3453 * @see #toTitleCase 3454 */ 3455 public static final int TITLECASE_NO_LOWERCASE = 0x100; 3456 3457 /** 3458 * Do not adjust the titlecasing indexes from BreakIterator::next() indexes; 3459 * titlecase exactly the characters at breaks from the iterator. 3460 * Option bit for titlecasing APIs that take an options bit set. 3461 * 3462 * By default, titlecasing will take each break iterator index, 3463 * adjust it by looking for the next cased character, and titlecase that one. 3464 * Other characters are lowercased. 3465 * 3466 * This follows Unicode 4 & 5 section 3.13 Default Case Operations: 3467 * 3468 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 3469 * #29, "Text Boundaries." Between each pair of word boundaries, find the first 3470 * cased character F. If F exists, map F to default_title(F); then map each 3471 * subsequent character C to default_lower(C). 3472 * 3473 * @see #toTitleCase 3474 * @see #TITLECASE_NO_LOWERCASE 3475 */ 3476 public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200; 3477 3478 // public methods ---------------------------------------------------- 3479 3480 /** 3481 * Returnss the numeric value of a decimal digit code point. 3482 * <br>This method observes the semantics of 3483 * <code>java.lang.Character.digit()</code>. Note that this 3484 * will return positive values for code points for which isDigit 3485 * returns false, just like java.lang.Character. 3486 * <br><em>Semantic Change:</em> In release 1.3.1 and 3487 * prior, this did not treat the European letters as having a 3488 * digit value, and also treated numeric letters and other numbers as 3489 * digits. 3490 * This has been changed to conform to the java semantics. 3491 * <br>A code point is a valid digit if and only if: 3492 * <ul> 3493 * <li>ch is a decimal digit or one of the european letters, and 3494 * <li>the value of ch is less than the specified radix. 3495 * </ul> 3496 * @param ch the code point to query 3497 * @param radix the radix 3498 * @return the numeric value represented by the code point in the 3499 * specified radix, or -1 if the code point is not a decimal digit 3500 * or if its value is too large for the radix 3501 */ digit(int ch, int radix)3502 public static int digit(int ch, int radix) 3503 { 3504 if (2 <= radix && radix <= 36) { 3505 int value = digit(ch); 3506 if (value < 0) { 3507 // ch is not a decimal digit, try latin letters 3508 value = UCharacterProperty.getEuropeanDigit(ch); 3509 } 3510 return (value < radix) ? value : -1; 3511 } else { 3512 return -1; // invalid radix 3513 } 3514 } 3515 3516 /** 3517 * Returnss the numeric value of a decimal digit code point. 3518 * <br>This is a convenience overload of <code>digit(int, int)</code> 3519 * that provides a decimal radix. 3520 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this 3521 * treated numeric letters and other numbers as digits. This has 3522 * been changed to conform to the java semantics. 3523 * @param ch the code point to query 3524 * @return the numeric value represented by the code point, 3525 * or -1 if the code point is not a decimal digit or if its 3526 * value is too large for a decimal radix 3527 */ digit(int ch)3528 public static int digit(int ch) 3529 { 3530 return UCharacterProperty.INSTANCE.digit(ch); 3531 } 3532 3533 /** 3534 * Returns the numeric value of the code point as a nonnegative 3535 * integer. 3536 * <br>If the code point does not have a numeric value, then -1 is returned. 3537 * <br> 3538 * If the code point has a numeric value that cannot be represented as a 3539 * nonnegative integer (for example, a fractional value), then -2 is 3540 * returned. 3541 * @param ch the code point to query 3542 * @return the numeric value of the code point, or -1 if it has no numeric 3543 * value, or -2 if it has a numeric value that cannot be represented as a 3544 * nonnegative integer 3545 */ getNumericValue(int ch)3546 public static int getNumericValue(int ch) 3547 { 3548 return UCharacterProperty.INSTANCE.getNumericValue(ch); 3549 } 3550 3551 /** 3552 * <strong>[icu]</strong> Returns the numeric value for a Unicode code point as defined in the 3553 * Unicode Character Database. 3554 * <p>A "double" return type is necessary because some numeric values are 3555 * fractions, negative, or too large for int. 3556 * <p>For characters without any numeric values in the Unicode Character 3557 * Database, this function will return NO_NUMERIC_VALUE. 3558 * Note: This is different from the Unicode Standard which specifies NaN as the default value. 3559 * <p><em>API Change:</em> In release 2.2 and prior, this API has a 3560 * return type int and returns -1 when the argument ch does not have a 3561 * corresponding numeric value. This has been changed to synch with ICU4C 3562 * 3563 * This corresponds to the ICU4C function u_getNumericValue. 3564 * @param ch Code point to get the numeric value for. 3565 * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined. 3566 */ getUnicodeNumericValue(int ch)3567 public static double getUnicodeNumericValue(int ch) 3568 { 3569 return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch); 3570 } 3571 3572 /** 3573 * Compatibility override of Java deprecated method. This 3574 * method will always remain deprecated. 3575 * Same as java.lang.Character.isSpace(). 3576 * @param ch the code point 3577 * @return true if the code point is a space character as 3578 * defined by java.lang.Character.isSpace. 3579 * @deprecated ICU 3.4 (Java) 3580 * @hide deprecated on icu4j-org 3581 */ 3582 @Deprecated isSpace(int ch)3583 public static boolean isSpace(int ch) { 3584 return ch <= 0x20 && 3585 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d); 3586 } 3587 3588 /** 3589 * Returns a value indicating a code point's Unicode category. 3590 * Up-to-date Unicode implementation of java.lang.Character.getType() 3591 * except for the above mentioned code points that had their category 3592 * changed.<br> 3593 * Return results are constants from the interface 3594 * <a href=UCharacterCategory.html>UCharacterCategory</a><br> 3595 * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with 3596 * those returned by java.lang.Character.getType. UCharacterCategory values 3597 * match the ones used in ICU4C, while java.lang.Character type 3598 * values, though similar, skip the value 17. 3599 * @param ch code point whose type is to be determined 3600 * @return category which is a value of UCharacterCategory 3601 */ getType(int ch)3602 public static int getType(int ch) 3603 { 3604 return UCharacterProperty.INSTANCE.getType(ch); 3605 } 3606 3607 /** 3608 * Determines if a code point has a defined meaning in the up-to-date 3609 * Unicode standard. 3610 * E.g. supplementary code points though allocated space are not defined in 3611 * Unicode yet.<br> 3612 * Up-to-date Unicode implementation of java.lang.Character.isDefined() 3613 * @param ch code point to be determined if it is defined in the most 3614 * current version of Unicode 3615 * @return true if this code point is defined in unicode 3616 */ isDefined(int ch)3617 public static boolean isDefined(int ch) 3618 { 3619 return getType(ch) != 0; 3620 } 3621 3622 /** 3623 * Determines if a code point is a Java digit. 3624 * <br>This method observes the semantics of 3625 * <code>java.lang.Character.isDigit()</code>. It returns true for decimal 3626 * digits only. 3627 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated 3628 * numeric letters and other numbers as digits. 3629 * This has been changed to conform to the java semantics. 3630 * @param ch code point to query 3631 * @return true if this code point is a digit 3632 */ isDigit(int ch)3633 public static boolean isDigit(int ch) 3634 { 3635 return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER; 3636 } 3637 3638 /** 3639 * Determines if the specified code point is an ISO control character. 3640 * A code point is considered to be an ISO control character if it is in 3641 * the range \u0000 through \u001F or in the range \u007F through 3642 * \u009F.<br> 3643 * Up-to-date Unicode implementation of java.lang.Character.isISOControl() 3644 * @param ch code point to determine if it is an ISO control character 3645 * @return true if code point is a ISO control character 3646 */ isISOControl(int ch)3647 public static boolean isISOControl(int ch) 3648 { 3649 return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ && 3650 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_)); 3651 } 3652 3653 /** 3654 * Determines if the specified code point is a letter. 3655 * Up-to-date Unicode implementation of java.lang.Character.isLetter() 3656 * @param ch code point to determine if it is a letter 3657 * @return true if code point is a letter 3658 */ isLetter(int ch)3659 public static boolean isLetter(int ch) 3660 { 3661 // if props == 0, it will just fall through and return false 3662 return ((1 << getType(ch)) 3663 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3664 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3665 | (1 << UCharacterCategory.TITLECASE_LETTER) 3666 | (1 << UCharacterCategory.MODIFIER_LETTER) 3667 | (1 << UCharacterCategory.OTHER_LETTER))) != 0; 3668 } 3669 3670 /** 3671 * Determines if the specified code point is a letter or digit. 3672 * <strong>[icu] Note:</strong> This method, unlike java.lang.Character does not regard the ascii 3673 * characters 'A' - 'Z' and 'a' - 'z' as digits. 3674 * @param ch code point to determine if it is a letter or a digit 3675 * @return true if code point is a letter or a digit 3676 */ isLetterOrDigit(int ch)3677 public static boolean isLetterOrDigit(int ch) 3678 { 3679 return ((1 << getType(ch)) 3680 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3681 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3682 | (1 << UCharacterCategory.TITLECASE_LETTER) 3683 | (1 << UCharacterCategory.MODIFIER_LETTER) 3684 | (1 << UCharacterCategory.OTHER_LETTER) 3685 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0; 3686 } 3687 3688 /** 3689 * Compatibility override of Java deprecated method. This 3690 * method will always remain deprecated. Delegates to 3691 * java.lang.Character.isJavaIdentifierStart. 3692 * @param cp the code point 3693 * @return true if the code point can start a java identifier. 3694 * @deprecated ICU 3.4 (Java) 3695 * @hide deprecated on icu4j-org 3696 */ 3697 @Deprecated isJavaLetter(int cp)3698 public static boolean isJavaLetter(int cp) { 3699 return isJavaIdentifierStart(cp); 3700 } 3701 3702 /** 3703 * Compatibility override of Java deprecated method. This 3704 * method will always remain deprecated. Delegates to 3705 * java.lang.Character.isJavaIdentifierPart. 3706 * @param cp the code point 3707 * @return true if the code point can continue a java identifier. 3708 * @deprecated ICU 3.4 (Java) 3709 * @hide deprecated on icu4j-org 3710 */ 3711 @Deprecated isJavaLetterOrDigit(int cp)3712 public static boolean isJavaLetterOrDigit(int cp) { 3713 return isJavaIdentifierPart(cp); 3714 } 3715 3716 /** 3717 * Compatibility override of Java method, delegates to 3718 * java.lang.Character.isJavaIdentifierStart. 3719 * @param cp the code point 3720 * @return true if the code point can start a java identifier. 3721 */ isJavaIdentifierStart(int cp)3722 public static boolean isJavaIdentifierStart(int cp) { 3723 // note, downcast to char for jdk 1.4 compatibility 3724 return java.lang.Character.isJavaIdentifierStart((char)cp); 3725 } 3726 3727 /** 3728 * Compatibility override of Java method, delegates to 3729 * java.lang.Character.isJavaIdentifierPart. 3730 * @param cp the code point 3731 * @return true if the code point can continue a java identifier. 3732 */ isJavaIdentifierPart(int cp)3733 public static boolean isJavaIdentifierPart(int cp) { 3734 // note, downcast to char for jdk 1.4 compatibility 3735 return java.lang.Character.isJavaIdentifierPart((char)cp); 3736 } 3737 3738 /** 3739 * Determines if the specified code point is a lowercase character. 3740 * UnicodeData only contains case mappings for code points where they are 3741 * one-to-one mappings; it also omits information about context-sensitive 3742 * case mappings.<br> For more information about Unicode case mapping 3743 * please refer to the 3744 * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report 3745 * #21</a>.<br> 3746 * Up-to-date Unicode implementation of java.lang.Character.isLowerCase() 3747 * @param ch code point to determine if it is in lowercase 3748 * @return true if code point is a lowercase character 3749 */ isLowerCase(int ch)3750 public static boolean isLowerCase(int ch) 3751 { 3752 // if props == 0, it will just fall through and return false 3753 return getType(ch) == UCharacterCategory.LOWERCASE_LETTER; 3754 } 3755 3756 /** 3757 * Determines if the specified code point is a white space character. 3758 * A code point is considered to be an whitespace character if and only 3759 * if it satisfies one of the following criteria: 3760 * <ul> 3761 * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not 3762 * also a non-breaking space (\u00A0 or \u2007 or \u202F). 3763 * <li> It is \u0009, HORIZONTAL TABULATION. 3764 * <li> It is \u000A, LINE FEED. 3765 * <li> It is \u000B, VERTICAL TABULATION. 3766 * <li> It is \u000C, FORM FEED. 3767 * <li> It is \u000D, CARRIAGE RETURN. 3768 * <li> It is \u001C, FILE SEPARATOR. 3769 * <li> It is \u001D, GROUP SEPARATOR. 3770 * <li> It is \u001E, RECORD SEPARATOR. 3771 * <li> It is \u001F, UNIT SEPARATOR. 3772 * </ul> 3773 * 3774 * This API tries to sync with the semantics of Java's 3775 * java.lang.Character.isWhitespace(), but it may not return 3776 * the exact same results because of the Unicode version 3777 * difference. 3778 * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) 3779 * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. 3780 * See http://www.unicode.org/versions/Unicode4.0.1/ 3781 * @param ch code point to determine if it is a white space 3782 * @return true if the specified code point is a white space character 3783 */ isWhitespace(int ch)3784 public static boolean isWhitespace(int ch) 3785 { 3786 // exclude no-break spaces 3787 // if props == 0, it will just fall through and return false 3788 return ((1 << getType(ch)) & 3789 ((1 << UCharacterCategory.SPACE_SEPARATOR) 3790 | (1 << UCharacterCategory.LINE_SEPARATOR) 3791 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0 3792 && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_) 3793 // TAB VT LF FF CR FS GS RS US NL are all control characters 3794 // that are white spaces. 3795 || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f); 3796 } 3797 3798 /** 3799 * Determines if the specified code point is a Unicode specified space 3800 * character, i.e. if code point is in the category Zs, Zl and Zp. 3801 * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar(). 3802 * @param ch code point to determine if it is a space 3803 * @return true if the specified code point is a space character 3804 */ isSpaceChar(int ch)3805 public static boolean isSpaceChar(int ch) 3806 { 3807 // if props == 0, it will just fall through and return false 3808 return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR) 3809 | (1 << UCharacterCategory.LINE_SEPARATOR) 3810 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) 3811 != 0; 3812 } 3813 3814 /** 3815 * Determines if the specified code point is a titlecase character. 3816 * UnicodeData only contains case mappings for code points where they are 3817 * one-to-one mappings; it also omits information about context-sensitive 3818 * case mappings.<br> 3819 * For more information about Unicode case mapping please refer to the 3820 * <a href=http://www.unicode.org/unicode/reports/tr21/> 3821 * Technical report #21</a>.<br> 3822 * Up-to-date Unicode implementation of java.lang.Character.isTitleCase(). 3823 * @param ch code point to determine if it is in title case 3824 * @return true if the specified code point is a titlecase character 3825 */ isTitleCase(int ch)3826 public static boolean isTitleCase(int ch) 3827 { 3828 // if props == 0, it will just fall through and return false 3829 return getType(ch) == UCharacterCategory.TITLECASE_LETTER; 3830 } 3831 3832 /** 3833 * Determines if the specified code point may be any part of a Unicode 3834 * identifier other than the starting character. 3835 * A code point may be part of a Unicode identifier if and only if it is 3836 * one of the following: 3837 * <ul> 3838 * <li> Lu Uppercase letter 3839 * <li> Ll Lowercase letter 3840 * <li> Lt Titlecase letter 3841 * <li> Lm Modifier letter 3842 * <li> Lo Other letter 3843 * <li> Nl Letter number 3844 * <li> Pc Connecting punctuation character 3845 * <li> Nd decimal number 3846 * <li> Mc Spacing combining mark 3847 * <li> Mn Non-spacing mark 3848 * <li> Cf formatting code 3849 * </ul> 3850 * Up-to-date Unicode implementation of 3851 * java.lang.Character.isUnicodeIdentifierPart().<br> 3852 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 3853 * @param ch code point to determine if is can be part of a Unicode 3854 * identifier 3855 * @return true if code point is any character belonging a unicode 3856 * identifier suffix after the first character 3857 */ isUnicodeIdentifierPart(int ch)3858 public static boolean isUnicodeIdentifierPart(int ch) 3859 { 3860 // if props == 0, it will just fall through and return false 3861 // cat == format 3862 return ((1 << getType(ch)) 3863 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3864 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3865 | (1 << UCharacterCategory.TITLECASE_LETTER) 3866 | (1 << UCharacterCategory.MODIFIER_LETTER) 3867 | (1 << UCharacterCategory.OTHER_LETTER) 3868 | (1 << UCharacterCategory.LETTER_NUMBER) 3869 | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION) 3870 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) 3871 | (1 << UCharacterCategory.COMBINING_SPACING_MARK) 3872 | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0 3873 || isIdentifierIgnorable(ch); 3874 } 3875 3876 /** 3877 * Determines if the specified code point is permissible as the first 3878 * character in a Unicode identifier. 3879 * A code point may start a Unicode identifier if it is of type either 3880 * <ul> 3881 * <li> Lu Uppercase letter 3882 * <li> Ll Lowercase letter 3883 * <li> Lt Titlecase letter 3884 * <li> Lm Modifier letter 3885 * <li> Lo Other letter 3886 * <li> Nl Letter number 3887 * </ul> 3888 * Up-to-date Unicode implementation of 3889 * java.lang.Character.isUnicodeIdentifierStart().<br> 3890 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 3891 * @param ch code point to determine if it can start a Unicode identifier 3892 * @return true if code point is the first character belonging a unicode 3893 * identifier 3894 */ isUnicodeIdentifierStart(int ch)3895 public static boolean isUnicodeIdentifierStart(int ch) 3896 { 3897 /*int cat = getType(ch);*/ 3898 // if props == 0, it will just fall through and return false 3899 return ((1 << getType(ch)) 3900 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 3901 | (1 << UCharacterCategory.LOWERCASE_LETTER) 3902 | (1 << UCharacterCategory.TITLECASE_LETTER) 3903 | (1 << UCharacterCategory.MODIFIER_LETTER) 3904 | (1 << UCharacterCategory.OTHER_LETTER) 3905 | (1 << UCharacterCategory.LETTER_NUMBER))) != 0; 3906 } 3907 3908 /** 3909 * Determines if the specified code point should be regarded as an 3910 * ignorable character in a Java identifier. 3911 * A character is Java-identifier-ignorable if it has the general category 3912 * Cf Formatting Control, or it is a non-Java-whitespace ISO control: 3913 * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br> 3914 * Up-to-date Unicode implementation of 3915 * java.lang.Character.isIdentifierIgnorable().<br> 3916 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 3917 * <p>Note that Unicode just recommends to ignore Cf (format controls). 3918 * @param ch code point to be determined if it can be ignored in a Unicode 3919 * identifier. 3920 * @return true if the code point is ignorable 3921 */ isIdentifierIgnorable(int ch)3922 public static boolean isIdentifierIgnorable(int ch) 3923 { 3924 // see java.lang.Character.isIdentifierIgnorable() on range of 3925 // ignorable characters. 3926 if (ch <= 0x9f) { 3927 return isISOControl(ch) 3928 && !((ch >= 0x9 && ch <= 0xd) 3929 || (ch >= 0x1c && ch <= 0x1f)); 3930 } 3931 return getType(ch) == UCharacterCategory.FORMAT; 3932 } 3933 3934 /** 3935 * Determines if the specified code point is an uppercase character. 3936 * UnicodeData only contains case mappings for code point where they are 3937 * one-to-one mappings; it also omits information about context-sensitive 3938 * case mappings.<br> 3939 * For language specific case conversion behavior, use 3940 * toUpperCase(locale, str). <br> 3941 * For example, the case conversion for dot-less i and dotted I in Turkish, 3942 * or for final sigma in Greek. 3943 * For more information about Unicode case mapping please refer to the 3944 * <a href=http://www.unicode.org/unicode/reports/tr21/> 3945 * Technical report #21</a>.<br> 3946 * Up-to-date Unicode implementation of java.lang.Character.isUpperCase(). 3947 * @param ch code point to determine if it is in uppercase 3948 * @return true if the code point is an uppercase character 3949 */ isUpperCase(int ch)3950 public static boolean isUpperCase(int ch) 3951 { 3952 // if props == 0, it will just fall through and return false 3953 return getType(ch) == UCharacterCategory.UPPERCASE_LETTER; 3954 } 3955 3956 /** 3957 * The given code point is mapped to its lowercase equivalent; if the code 3958 * point has no lowercase equivalent, the code point itself is returned. 3959 * Up-to-date Unicode implementation of java.lang.Character.toLowerCase() 3960 * 3961 * <p>This function only returns the simple, single-code point case mapping. 3962 * Full case mappings should be used whenever possible because they produce 3963 * better results by working on whole strings. 3964 * They take into account the string context and the language and can map 3965 * to a result string with a different length as appropriate. 3966 * Full case mappings are applied by the case mapping functions 3967 * that take String parameters rather than code points (int). 3968 * See also the User Guide chapter on C/POSIX migration: 3969 * http://www.icu-project.org/userguide/posix.html#case_mappings 3970 * 3971 * @param ch code point whose lowercase equivalent is to be retrieved 3972 * @return the lowercase equivalent code point 3973 */ toLowerCase(int ch)3974 public static int toLowerCase(int ch) { 3975 return UCaseProps.INSTANCE.tolower(ch); 3976 } 3977 3978 /** 3979 * Converts argument code point and returns a String object representing 3980 * the code point's value in UTF-16 format. 3981 * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones. 3982 * 3983 * <p>Up-to-date Unicode implementation of java.lang.Character.toString(). 3984 * 3985 * @param ch code point 3986 * @return string representation of the code point, null if code point is not 3987 * defined in unicode 3988 */ toString(int ch)3989 public static String toString(int ch) 3990 { 3991 if (ch < MIN_VALUE || ch > MAX_VALUE) { 3992 return null; 3993 } 3994 3995 if (ch < SUPPLEMENTARY_MIN_VALUE) { 3996 return String.valueOf((char)ch); 3997 } 3998 3999 return new String(Character.toChars(ch)); 4000 } 4001 4002 /** 4003 * Converts the code point argument to titlecase. 4004 * If no titlecase is available, the uppercase is returned. If no uppercase 4005 * is available, the code point itself is returned. 4006 * Up-to-date Unicode implementation of java.lang.Character.toTitleCase() 4007 * 4008 * <p>This function only returns the simple, single-code point case mapping. 4009 * Full case mappings should be used whenever possible because they produce 4010 * better results by working on whole strings. 4011 * They take into account the string context and the language and can map 4012 * to a result string with a different length as appropriate. 4013 * Full case mappings are applied by the case mapping functions 4014 * that take String parameters rather than code points (int). 4015 * See also the User Guide chapter on C/POSIX migration: 4016 * http://www.icu-project.org/userguide/posix.html#case_mappings 4017 * 4018 * @param ch code point whose title case is to be retrieved 4019 * @return titlecase code point 4020 */ toTitleCase(int ch)4021 public static int toTitleCase(int ch) { 4022 return UCaseProps.INSTANCE.totitle(ch); 4023 } 4024 4025 /** 4026 * Converts the character argument to uppercase. 4027 * If no uppercase is available, the character itself is returned. 4028 * Up-to-date Unicode implementation of java.lang.Character.toUpperCase() 4029 * 4030 * <p>This function only returns the simple, single-code point case mapping. 4031 * Full case mappings should be used whenever possible because they produce 4032 * better results by working on whole strings. 4033 * They take into account the string context and the language and can map 4034 * to a result string with a different length as appropriate. 4035 * Full case mappings are applied by the case mapping functions 4036 * that take String parameters rather than code points (int). 4037 * See also the User Guide chapter on C/POSIX migration: 4038 * http://www.icu-project.org/userguide/posix.html#case_mappings 4039 * 4040 * @param ch code point whose uppercase is to be retrieved 4041 * @return uppercase code point 4042 */ toUpperCase(int ch)4043 public static int toUpperCase(int ch) { 4044 return UCaseProps.INSTANCE.toupper(ch); 4045 } 4046 4047 // extra methods not in java.lang.Character -------------------------- 4048 4049 /** 4050 * <strong>[icu]</strong> Determines if the code point is a supplementary character. 4051 * A code point is a supplementary character if and only if it is greater 4052 * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a> 4053 * @param ch code point to be determined if it is in the supplementary 4054 * plane 4055 * @return true if code point is a supplementary character 4056 */ isSupplementary(int ch)4057 public static boolean isSupplementary(int ch) 4058 { 4059 return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE && 4060 ch <= UCharacter.MAX_VALUE; 4061 } 4062 4063 /** 4064 * <strong>[icu]</strong> Determines if the code point is in the BMP plane. 4065 * @param ch code point to be determined if it is not a supplementary 4066 * character 4067 * @return true if code point is not a supplementary character 4068 */ isBMP(int ch)4069 public static boolean isBMP(int ch) 4070 { 4071 return (ch >= 0 && ch <= LAST_CHAR_MASK_); 4072 } 4073 4074 /** 4075 * <strong>[icu]</strong> Determines whether the specified code point is a printable character 4076 * according to the Unicode standard. 4077 * @param ch code point to be determined if it is printable 4078 * @return true if the code point is a printable character 4079 */ isPrintable(int ch)4080 public static boolean isPrintable(int ch) 4081 { 4082 int cat = getType(ch); 4083 // if props == 0, it will just fall through and return false 4084 return (cat != UCharacterCategory.UNASSIGNED && 4085 cat != UCharacterCategory.CONTROL && 4086 cat != UCharacterCategory.FORMAT && 4087 cat != UCharacterCategory.PRIVATE_USE && 4088 cat != UCharacterCategory.SURROGATE && 4089 cat != UCharacterCategory.GENERAL_OTHER_TYPES); 4090 } 4091 4092 /** 4093 * <strong>[icu]</strong> Determines whether the specified code point is of base form. 4094 * A code point of base form does not graphically combine with preceding 4095 * characters, and is neither a control nor a format character. 4096 * @param ch code point to be determined if it is of base form 4097 * @return true if the code point is of base form 4098 */ isBaseForm(int ch)4099 public static boolean isBaseForm(int ch) 4100 { 4101 int cat = getType(ch); 4102 // if props == 0, it will just fall through and return false 4103 return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER || 4104 cat == UCharacterCategory.OTHER_NUMBER || 4105 cat == UCharacterCategory.LETTER_NUMBER || 4106 cat == UCharacterCategory.UPPERCASE_LETTER || 4107 cat == UCharacterCategory.LOWERCASE_LETTER || 4108 cat == UCharacterCategory.TITLECASE_LETTER || 4109 cat == UCharacterCategory.MODIFIER_LETTER || 4110 cat == UCharacterCategory.OTHER_LETTER || 4111 cat == UCharacterCategory.NON_SPACING_MARK || 4112 cat == UCharacterCategory.ENCLOSING_MARK || 4113 cat == UCharacterCategory.COMBINING_SPACING_MARK; 4114 } 4115 4116 /** 4117 * <strong>[icu]</strong> Returns the Bidirection property of a code point. 4118 * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional 4119 * property.<br> 4120 * Result returned belongs to the interface 4121 * <a href=UCharacterDirection.html>UCharacterDirection</a> 4122 * @param ch the code point to be determined its direction 4123 * @return direction constant from UCharacterDirection. 4124 */ getDirection(int ch)4125 public static int getDirection(int ch) 4126 { 4127 return UBiDiProps.INSTANCE.getClass(ch); 4128 } 4129 4130 /** 4131 * Determines whether the code point has the "mirrored" property. 4132 * This property is set for characters that are commonly used in 4133 * Right-To-Left contexts and need to be displayed with a "mirrored" 4134 * glyph. 4135 * @param ch code point whose mirror is to be determined 4136 * @return true if the code point has the "mirrored" property 4137 */ isMirrored(int ch)4138 public static boolean isMirrored(int ch) 4139 { 4140 return UBiDiProps.INSTANCE.isMirrored(ch); 4141 } 4142 4143 /** 4144 * <strong>[icu]</strong> Maps the specified code point to a "mirror-image" code point. 4145 * For code points with the "mirrored" property, implementations sometimes 4146 * need a "poor man's" mapping to another code point such that the default 4147 * glyph may serve as the mirror-image of the default glyph of the 4148 * specified code point.<br> 4149 * This is useful for text conversion to and from codepages with visual 4150 * order, and for displays without glyph selection capabilities. 4151 * @param ch code point whose mirror is to be retrieved 4152 * @return another code point that may serve as a mirror-image substitute, 4153 * or ch itself if there is no such mapping or ch does not have the 4154 * "mirrored" property 4155 */ getMirror(int ch)4156 public static int getMirror(int ch) 4157 { 4158 return UBiDiProps.INSTANCE.getMirror(ch); 4159 } 4160 4161 /** 4162 * <strong>[icu]</strong> Maps the specified character to its paired bracket character. 4163 * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int). 4164 * Otherwise c itself is returned. 4165 * See http://www.unicode.org/reports/tr9/ 4166 * 4167 * @param c the code point to be mapped 4168 * @return the paired bracket code point, 4169 * or c itself if there is no such mapping 4170 * (Bidi_Paired_Bracket_Type=None) 4171 * 4172 * @see UProperty#BIDI_PAIRED_BRACKET 4173 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 4174 * @see #getMirror(int) 4175 */ getBidiPairedBracket(int c)4176 public static int getBidiPairedBracket(int c) { 4177 return UBiDiProps.INSTANCE.getPairedBracket(c); 4178 } 4179 4180 /** 4181 * <strong>[icu]</strong> Returns the combining class of the argument codepoint 4182 * @param ch code point whose combining is to be retrieved 4183 * @return the combining class of the codepoint 4184 */ getCombiningClass(int ch)4185 public static int getCombiningClass(int ch) 4186 { 4187 return Normalizer2.getNFDInstance().getCombiningClass(ch); 4188 } 4189 4190 /** 4191 * <strong>[icu]</strong> A code point is illegal if and only if 4192 * <ul> 4193 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 4194 * <li> A surrogate value, 0xD800 to 0xDFFF 4195 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 4196 * </ul> 4197 * Note: legal does not mean that it is assigned in this version of Unicode. 4198 * @param ch code point to determine if it is a legal code point by itself 4199 * @return true if and only if legal. 4200 */ isLegal(int ch)4201 public static boolean isLegal(int ch) 4202 { 4203 if (ch < MIN_VALUE) { 4204 return false; 4205 } 4206 if (ch < Character.MIN_SURROGATE) { 4207 return true; 4208 } 4209 if (ch <= Character.MAX_SURROGATE) { 4210 return false; 4211 } 4212 if (UCharacterUtility.isNonCharacter(ch)) { 4213 return false; 4214 } 4215 return (ch <= MAX_VALUE); 4216 } 4217 4218 /** 4219 * <strong>[icu]</strong> A string is legal iff all its code points are legal. 4220 * A code point is illegal if and only if 4221 * <ul> 4222 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 4223 * <li> A surrogate value, 0xD800 to 0xDFFF 4224 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 4225 * </ul> 4226 * Note: legal does not mean that it is assigned in this version of Unicode. 4227 * @param str containing code points to examin 4228 * @return true if and only if legal. 4229 */ isLegal(String str)4230 public static boolean isLegal(String str) 4231 { 4232 int size = str.length(); 4233 int codepoint; 4234 for (int i = 0; i < size; i += Character.charCount(codepoint)) 4235 { 4236 codepoint = str.codePointAt(i); 4237 if (!isLegal(codepoint)) { 4238 return false; 4239 } 4240 } 4241 return true; 4242 } 4243 4244 /** 4245 * <strong>[icu]</strong> Returns the version of Unicode data used. 4246 * @return the unicode version number used 4247 */ getUnicodeVersion()4248 public static VersionInfo getUnicodeVersion() 4249 { 4250 return UCharacterProperty.INSTANCE.m_unicodeVersion_; 4251 } 4252 4253 /** 4254 * <strong>[icu]</strong> Returns the most current Unicode name of the argument code point, or 4255 * null if the character is unassigned or outside the range 4256 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 4257 * <br> 4258 * Note calling any methods related to code point names, e.g. get*Name*() 4259 * incurs a one-time initialisation cost to construct the name tables. 4260 * @param ch the code point for which to get the name 4261 * @return most current Unicode name 4262 */ getName(int ch)4263 public static String getName(int ch) 4264 { 4265 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); 4266 } 4267 4268 /** 4269 * <strong>[icu]</strong> Returns the names for each of the characters in a string 4270 * @param s string to format 4271 * @param separator string to go between names 4272 * @return string of names 4273 */ getName(String s, String separator)4274 public static String getName(String s, String separator) { 4275 if (s.length() == 1) { // handle common case 4276 return getName(s.charAt(0)); 4277 } 4278 int cp; 4279 StringBuilder sb = new StringBuilder(); 4280 for (int i = 0; i < s.length(); i += Character.charCount(cp)) { 4281 cp = s.codePointAt(i); 4282 if (i != 0) sb.append(separator); 4283 sb.append(UCharacter.getName(cp)); 4284 } 4285 return sb.toString(); 4286 } 4287 4288 /** 4289 * <strong>[icu]</strong> Returns null. 4290 * Used to return the Unicode_1_Name property value which was of little practical value. 4291 * @param ch the code point for which to get the name 4292 * @return null 4293 * @deprecated ICU 49 4294 * @hide deprecated on icu4j-org 4295 */ 4296 @Deprecated getName1_0(int ch)4297 public static String getName1_0(int ch) 4298 { 4299 return null; 4300 } 4301 4302 /** 4303 * <strong>[icu]</strong> Returns a name for a valid codepoint. Unlike, getName(int) and 4304 * getName1_0(int), this method will return a name even for codepoints that 4305 * are not assigned a name in UnicodeData.txt. 4306 * 4307 * <p>The names are returned in the following order. 4308 * <ul> 4309 * <li> Most current Unicode name if there is any 4310 * <li> Unicode 1.0 name if there is any 4311 * <li> Extended name in the form of 4312 * "<codepoint_type-codepoint_hex_digits>". E.g., <noncharacter-fffe> 4313 * </ul> 4314 * Note calling any methods related to code point names, e.g. get*Name*() 4315 * incurs a one-time initialisation cost to construct the name tables. 4316 * @param ch the code point for which to get the name 4317 * @return a name for the argument codepoint 4318 */ getExtendedName(int ch)4319 public static String getExtendedName(int ch) { 4320 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME); 4321 } 4322 4323 /** 4324 * <strong>[icu]</strong> Returns the corrected name from NameAliases.txt if there is one. 4325 * Returns null if the character is unassigned or outside the range 4326 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 4327 * <br> 4328 * Note calling any methods related to code point names, e.g. get*Name*() 4329 * incurs a one-time initialisation cost to construct the name tables. 4330 * @param ch the code point for which to get the name alias 4331 * @return Unicode name alias, or null 4332 */ getNameAlias(int ch)4333 public static String getNameAlias(int ch) 4334 { 4335 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS); 4336 } 4337 4338 /** 4339 * <strong>[icu]</strong> Returns null. 4340 * Used to return the ISO 10646 comment for a character. 4341 * The Unicode ISO_Comment property is deprecated and has no values. 4342 * 4343 * @param ch The code point for which to get the ISO comment. 4344 * It must be the case that {@code 0 <= ch <= 0x10ffff}. 4345 * @return null 4346 * @deprecated ICU 49 4347 * @hide deprecated on icu4j-org 4348 */ 4349 @Deprecated getISOComment(int ch)4350 public static String getISOComment(int ch) 4351 { 4352 return null; 4353 } 4354 4355 /** 4356 * <strong>[icu]</strong> <p>Finds a Unicode code point by its most current Unicode name and 4357 * return its code point value. All Unicode names are in uppercase. 4358 * Note calling any methods related to code point names, e.g. get*Name*() 4359 * incurs a one-time initialisation cost to construct the name tables. 4360 * @param name most current Unicode character name whose code point is to 4361 * be returned 4362 * @return code point or -1 if name is not found 4363 */ getCharFromName(String name)4364 public static int getCharFromName(String name){ 4365 return UCharacterName.INSTANCE.getCharFromName( 4366 UCharacterNameChoice.UNICODE_CHAR_NAME, name); 4367 } 4368 4369 /** 4370 * <strong>[icu]</strong> Returns -1. 4371 * <p>Used to find a Unicode character by its version 1.0 Unicode name and return 4372 * its code point value. 4373 * @param name Unicode 1.0 code point name whose code point is to be 4374 * returned 4375 * @return -1 4376 * @deprecated ICU 49 4377 * @see #getName1_0(int) 4378 * @hide deprecated on icu4j-org 4379 */ 4380 @Deprecated getCharFromName1_0(String name)4381 public static int getCharFromName1_0(String name){ 4382 return -1; 4383 } 4384 4385 /** 4386 * <strong>[icu]</strong> <p>Find a Unicode character by either its name and return its code 4387 * point value. All Unicode names are in uppercase. 4388 * Extended names are all lowercase except for numbers and are contained 4389 * within angle brackets. 4390 * The names are searched in the following order 4391 * <ul> 4392 * <li> Most current Unicode name if there is any 4393 * <li> Unicode 1.0 name if there is any 4394 * <li> Extended name in the form of 4395 * "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-FFFE> 4396 * </ul> 4397 * Note calling any methods related to code point names, e.g. get*Name*() 4398 * incurs a one-time initialisation cost to construct the name tables. 4399 * @param name codepoint name 4400 * @return code point associated with the name or -1 if the name is not 4401 * found. 4402 */ getCharFromExtendedName(String name)4403 public static int getCharFromExtendedName(String name){ 4404 return UCharacterName.INSTANCE.getCharFromName( 4405 UCharacterNameChoice.EXTENDED_CHAR_NAME, name); 4406 } 4407 4408 /** 4409 * <strong>[icu]</strong> <p>Find a Unicode character by its corrected name alias and return 4410 * its code point value. All Unicode names are in uppercase. 4411 * Note calling any methods related to code point names, e.g. get*Name*() 4412 * incurs a one-time initialisation cost to construct the name tables. 4413 * @param name Unicode name alias whose code point is to be returned 4414 * @return code point or -1 if name is not found 4415 */ getCharFromNameAlias(String name)4416 public static int getCharFromNameAlias(String name){ 4417 return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name); 4418 } 4419 4420 /** 4421 * <strong>[icu]</strong> Return the Unicode name for a given property, as given in the 4422 * Unicode database file PropertyAliases.txt. Most properties 4423 * have more than one name. The nameChoice determines which one 4424 * is returned. 4425 * 4426 * In addition, this function maps the property 4427 * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" / 4428 * "General_Category_Mask". These names are not in 4429 * PropertyAliases.txt. 4430 * 4431 * @param property UProperty selector. 4432 * 4433 * @param nameChoice UProperty.NameChoice selector for which name 4434 * to get. All properties have a long name. Most have a short 4435 * name, but some do not. Unicode allows for additional names; if 4436 * present these will be returned by UProperty.NameChoice.LONG + i, 4437 * where i=1, 2,... 4438 * 4439 * @return a name, or null if Unicode explicitly defines no name 4440 * ("n/a") for a given property/nameChoice. If a given nameChoice 4441 * throws an exception, then all larger values of nameChoice will 4442 * throw an exception. If null is returned for a given 4443 * nameChoice, then other nameChoice values may return non-null 4444 * results. 4445 * 4446 * @exception IllegalArgumentException thrown if property or 4447 * nameChoice are invalid. 4448 * 4449 * @see UProperty 4450 * @see UProperty.NameChoice 4451 */ getPropertyName(int property, int nameChoice)4452 public static String getPropertyName(int property, 4453 int nameChoice) { 4454 return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice); 4455 } 4456 4457 /** 4458 * <strong>[icu]</strong> Return the UProperty selector for a given property name, as 4459 * specified in the Unicode database file PropertyAliases.txt. 4460 * Short, long, and any other variants are recognized. 4461 * 4462 * In addition, this function maps the synthetic names "gcm" / 4463 * "General_Category_Mask" to the property 4464 * UProperty.GENERAL_CATEGORY_MASK. These names are not in 4465 * PropertyAliases.txt. 4466 * 4467 * @param propertyAlias the property name to be matched. The name 4468 * is compared using "loose matching" as described in 4469 * PropertyAliases.txt. 4470 * 4471 * @return a UProperty enum. 4472 * 4473 * @exception IllegalArgumentException thrown if propertyAlias 4474 * is not recognized. 4475 * 4476 * @see UProperty 4477 */ getPropertyEnum(CharSequence propertyAlias)4478 public static int getPropertyEnum(CharSequence propertyAlias) { 4479 int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias); 4480 if (propEnum == UProperty.UNDEFINED) { 4481 throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias); 4482 } 4483 return propEnum; 4484 } 4485 4486 /** 4487 * <strong>[icu]</strong> Return the Unicode name for a given property value, as given in 4488 * the Unicode database file PropertyValueAliases.txt. Most 4489 * values have more than one name. The nameChoice determines 4490 * which one is returned. 4491 * 4492 * Note: Some of the names in PropertyValueAliases.txt can only be 4493 * retrieved using UProperty.GENERAL_CATEGORY_MASK, not 4494 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 4495 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 4496 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 4497 * 4498 * @param property UProperty selector constant. 4499 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 4500 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 4501 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 4502 * If out of range, null is returned. 4503 * 4504 * @param value selector for a value for the given property. In 4505 * general, valid values range from 0 up to some maximum. There 4506 * are a few exceptions: (1.) UProperty.BLOCK values begin at the 4507 * non-zero value BASIC_LATIN.getID(). (2.) 4508 * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous 4509 * and range from 0..240. (3.) UProperty.GENERAL_CATEGORY_MASK values 4510 * are mask values produced by left-shifting 1 by 4511 * UCharacter.getType(). This allows grouped categories such as 4512 * [:L:] to be represented. Mask values are non-contiguous. 4513 * 4514 * @param nameChoice UProperty.NameChoice selector for which name 4515 * to get. All values have a long name. Most have a short name, 4516 * but some do not. Unicode allows for additional names; if 4517 * present these will be returned by UProperty.NameChoice.LONG + i, 4518 * where i=1, 2,... 4519 * 4520 * @return a name, or null if Unicode explicitly defines no name 4521 * ("n/a") for a given property/value/nameChoice. If a given 4522 * nameChoice throws an exception, then all larger values of 4523 * nameChoice will throw an exception. If null is returned for a 4524 * given nameChoice, then other nameChoice values may return 4525 * non-null results. 4526 * 4527 * @exception IllegalArgumentException thrown if property, value, 4528 * or nameChoice are invalid. 4529 * 4530 * @see UProperty 4531 * @see UProperty.NameChoice 4532 */ getPropertyValueName(int property, int value, int nameChoice)4533 public static String getPropertyValueName(int property, 4534 int value, 4535 int nameChoice) 4536 { 4537 if ((property == UProperty.CANONICAL_COMBINING_CLASS 4538 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS 4539 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) 4540 && value >= UCharacter.getIntPropertyMinValue( 4541 UProperty.CANONICAL_COMBINING_CLASS) 4542 && value <= UCharacter.getIntPropertyMaxValue( 4543 UProperty.CANONICAL_COMBINING_CLASS) 4544 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) { 4545 // this is hard coded for the valid cc 4546 // because PropertyValueAliases.txt does not contain all of them 4547 try { 4548 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, 4549 nameChoice); 4550 } 4551 catch (IllegalArgumentException e) { 4552 return null; 4553 } 4554 } 4555 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice); 4556 } 4557 4558 /** 4559 * <strong>[icu]</strong> Return the property value integer for a given value name, as 4560 * specified in the Unicode database file PropertyValueAliases.txt. 4561 * Short, long, and any other variants are recognized. 4562 * 4563 * Note: Some of the names in PropertyValueAliases.txt will only be 4564 * recognized with UProperty.GENERAL_CATEGORY_MASK, not 4565 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 4566 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 4567 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 4568 * 4569 * @param property UProperty selector constant. 4570 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 4571 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 4572 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 4573 * Only these properties can be enumerated. 4574 * 4575 * @param valueAlias the value name to be matched. The name is 4576 * compared using "loose matching" as described in 4577 * PropertyValueAliases.txt. 4578 * 4579 * @return a value integer. Note: UProperty.GENERAL_CATEGORY 4580 * values are mask values produced by left-shifting 1 by 4581 * UCharacter.getType(). This allows grouped categories such as 4582 * [:L:] to be represented. 4583 * 4584 * @see UProperty 4585 * @throws IllegalArgumentException if property is not a valid UProperty 4586 * selector or valueAlias is not a value of this property 4587 */ getPropertyValueEnum(int property, CharSequence valueAlias)4588 public static int getPropertyValueEnum(int property, CharSequence valueAlias) { 4589 int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias); 4590 if (propEnum == UProperty.UNDEFINED) { 4591 throw new IllegalIcuArgumentException("Invalid name: " + valueAlias); 4592 } 4593 return propEnum; 4594 } 4595 4596 /** 4597 * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED. 4598 * @param property Same as {@link #getPropertyValueEnum(int, CharSequence)} 4599 * @param valueAlias Same as {@link #getPropertyValueEnum(int, CharSequence)} 4600 * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value. 4601 * @deprecated This API is ICU internal only. 4602 * @hide deprecated on icu4j-org 4603 * @hide draft / provisional / internal are hidden on OHOS 4604 */ 4605 @Deprecated getPropertyValueEnumNoThrow(int property, CharSequence valueAlias)4606 public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) { 4607 return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias); 4608 } 4609 4610 4611 /** 4612 * <strong>[icu]</strong> Returns a code point corresponding to the two surrogate code units. 4613 * 4614 * @param lead the lead char 4615 * @param trail the trail char 4616 * @return code point if surrogate characters are valid. 4617 * @exception IllegalArgumentException thrown when the code units do 4618 * not form a valid code point 4619 */ getCodePoint(char lead, char trail)4620 public static int getCodePoint(char lead, char trail) 4621 { 4622 if (Character.isSurrogatePair(lead, trail)) { 4623 return Character.toCodePoint(lead, trail); 4624 } 4625 throw new IllegalArgumentException("Illegal surrogate characters"); 4626 } 4627 4628 /** 4629 * <strong>[icu]</strong> Returns the code point corresponding to the BMP code point. 4630 * 4631 * @param char16 the BMP code point 4632 * @return code point if argument is a valid character. 4633 * @exception IllegalArgumentException thrown when char16 is not a valid 4634 * code point 4635 */ getCodePoint(char char16)4636 public static int getCodePoint(char char16) 4637 { 4638 if (UCharacter.isLegal(char16)) { 4639 return char16; 4640 } 4641 throw new IllegalArgumentException("Illegal codepoint"); 4642 } 4643 4644 /** 4645 * Returns the uppercase version of the argument string. 4646 * Casing is dependent on the default locale and context-sensitive. 4647 * @param str source string to be performed on 4648 * @return uppercase version of the argument string 4649 */ toUpperCase(String str)4650 public static String toUpperCase(String str) 4651 { 4652 return CaseMapImpl.toUpper(getDefaultCaseLocale(), 0, str); 4653 } 4654 4655 /** 4656 * Returns the lowercase version of the argument string. 4657 * Casing is dependent on the default locale and context-sensitive 4658 * @param str source string to be performed on 4659 * @return lowercase version of the argument string 4660 */ toLowerCase(String str)4661 public static String toLowerCase(String str) 4662 { 4663 return CaseMapImpl.toLower(getDefaultCaseLocale(), 0, str); 4664 } 4665 4666 /** 4667 * <p>Returns the titlecase version of the argument string. 4668 * <p>Position for titlecasing is determined by the argument break 4669 * iterator, hence the user can customize his break iterator for 4670 * a specialized titlecasing. In this case only the forward iteration 4671 * needs to be implemented. 4672 * If the break iterator passed in is null, the default Unicode algorithm 4673 * will be used to determine the titlecase positions. 4674 * 4675 * <p>Only positions returned by the break iterator will be title cased, 4676 * character in between the positions will all be in lower case. 4677 * <p>Casing is dependent on the default locale and context-sensitive 4678 * @param str source string to be performed on 4679 * @param breakiter break iterator to determine the positions in which 4680 * the character should be title cased. 4681 * @return titlecase version of the argument string 4682 */ toTitleCase(String str, BreakIterator breakiter)4683 public static String toTitleCase(String str, BreakIterator breakiter) 4684 { 4685 return toTitleCase(Locale.getDefault(), str, breakiter, 0); 4686 } 4687 getDefaultCaseLocale()4688 private static int getDefaultCaseLocale() { 4689 return UCaseProps.getCaseLocale(Locale.getDefault()); 4690 } 4691 getCaseLocale(Locale locale)4692 private static int getCaseLocale(Locale locale) { 4693 if (locale == null) { 4694 locale = Locale.getDefault(); 4695 } 4696 return UCaseProps.getCaseLocale(locale); 4697 } 4698 getCaseLocale(ULocale locale)4699 private static int getCaseLocale(ULocale locale) { 4700 if (locale == null) { 4701 locale = ULocale.getDefault(); 4702 } 4703 return UCaseProps.getCaseLocale(locale); 4704 } 4705 4706 /** 4707 * Returns the uppercase version of the argument string. 4708 * Casing is dependent on the argument locale and context-sensitive. 4709 * @param locale which string is to be converted in 4710 * @param str source string to be performed on 4711 * @return uppercase version of the argument string 4712 */ toUpperCase(Locale locale, String str)4713 public static String toUpperCase(Locale locale, String str) 4714 { 4715 return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str); 4716 } 4717 4718 /** 4719 * Returns the uppercase version of the argument string. 4720 * Casing is dependent on the argument locale and context-sensitive. 4721 * @param locale which string is to be converted in 4722 * @param str source string to be performed on 4723 * @return uppercase version of the argument string 4724 */ toUpperCase(ULocale locale, String str)4725 public static String toUpperCase(ULocale locale, String str) { 4726 return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str); 4727 } 4728 4729 /** 4730 * Returns the lowercase version of the argument string. 4731 * Casing is dependent on the argument locale and context-sensitive 4732 * @param locale which string is to be converted in 4733 * @param str source string to be performed on 4734 * @return lowercase version of the argument string 4735 */ toLowerCase(Locale locale, String str)4736 public static String toLowerCase(Locale locale, String str) 4737 { 4738 return CaseMapImpl.toLower(getCaseLocale(locale), 0, str); 4739 } 4740 4741 /** 4742 * Returns the lowercase version of the argument string. 4743 * Casing is dependent on the argument locale and context-sensitive 4744 * @param locale which string is to be converted in 4745 * @param str source string to be performed on 4746 * @return lowercase version of the argument string 4747 */ toLowerCase(ULocale locale, String str)4748 public static String toLowerCase(ULocale locale, String str) { 4749 return CaseMapImpl.toLower(getCaseLocale(locale), 0, str); 4750 } 4751 4752 /** 4753 * <p>Returns the titlecase version of the argument string. 4754 * <p>Position for titlecasing is determined by the argument break 4755 * iterator, hence the user can customize his break iterator for 4756 * a specialized titlecasing. In this case only the forward iteration 4757 * needs to be implemented. 4758 * If the break iterator passed in is null, the default Unicode algorithm 4759 * will be used to determine the titlecase positions. 4760 * 4761 * <p>Only positions returned by the break iterator will be title cased, 4762 * character in between the positions will all be in lower case. 4763 * <p>Casing is dependent on the argument locale and context-sensitive 4764 * @param locale which string is to be converted in 4765 * @param str source string to be performed on 4766 * @param breakiter break iterator to determine the positions in which 4767 * the character should be title cased. 4768 * @return titlecase version of the argument string 4769 */ toTitleCase(Locale locale, String str, BreakIterator breakiter)4770 public static String toTitleCase(Locale locale, String str, 4771 BreakIterator breakiter) 4772 { 4773 return toTitleCase(locale, str, breakiter, 0); 4774 } 4775 4776 /** 4777 * <p>Returns the titlecase version of the argument string. 4778 * <p>Position for titlecasing is determined by the argument break 4779 * iterator, hence the user can customize his break iterator for 4780 * a specialized titlecasing. In this case only the forward iteration 4781 * needs to be implemented. 4782 * If the break iterator passed in is null, the default Unicode algorithm 4783 * will be used to determine the titlecase positions. 4784 * 4785 * <p>Only positions returned by the break iterator will be title cased, 4786 * character in between the positions will all be in lower case. 4787 * <p>Casing is dependent on the argument locale and context-sensitive 4788 * @param locale which string is to be converted in 4789 * @param str source string to be performed on 4790 * @param titleIter break iterator to determine the positions in which 4791 * the character should be title cased. 4792 * @return titlecase version of the argument string 4793 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter)4794 public static String toTitleCase(ULocale locale, String str, 4795 BreakIterator titleIter) { 4796 return toTitleCase(locale, str, titleIter, 0); 4797 } 4798 4799 /** 4800 * <p>Returns the titlecase version of the argument string. 4801 * <p>Position for titlecasing is determined by the argument break 4802 * iterator, hence the user can customize his break iterator for 4803 * a specialized titlecasing. In this case only the forward iteration 4804 * needs to be implemented. 4805 * If the break iterator passed in is null, the default Unicode algorithm 4806 * will be used to determine the titlecase positions. 4807 * 4808 * <p>Only positions returned by the break iterator will be title cased, 4809 * character in between the positions will all be in lower case. 4810 * <p>Casing is dependent on the argument locale and context-sensitive 4811 * @param locale which string is to be converted in 4812 * @param str source string to be performed on 4813 * @param titleIter break iterator to determine the positions in which 4814 * the character should be title cased. 4815 * @param options bit set to modify the titlecasing operation 4816 * @return titlecase version of the argument string 4817 * @see #TITLECASE_NO_LOWERCASE 4818 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 4819 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)4820 public static String toTitleCase(ULocale locale, String str, 4821 BreakIterator titleIter, int options) { 4822 if (titleIter == null && locale == null) { 4823 locale = ULocale.getDefault(); 4824 } 4825 titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter); 4826 titleIter.setText(str); 4827 return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str); 4828 } 4829 4830 /** 4831 * <strong>[icu]</strong> <p>Returns the titlecase version of the argument string. 4832 * <p>Position for titlecasing is determined by the argument break 4833 * iterator, hence the user can customize his break iterator for 4834 * a specialized titlecasing. In this case only the forward iteration 4835 * needs to be implemented. 4836 * If the break iterator passed in is null, the default Unicode algorithm 4837 * will be used to determine the titlecase positions. 4838 * 4839 * <p>Only positions returned by the break iterator will be title cased, 4840 * character in between the positions will all be in lower case. 4841 * <p>Casing is dependent on the argument locale and context-sensitive 4842 * @param locale which string is to be converted in 4843 * @param str source string to be performed on 4844 * @param titleIter break iterator to determine the positions in which 4845 * the character should be title cased. 4846 * @param options bit set to modify the titlecasing operation 4847 * @return titlecase version of the argument string 4848 * @see #TITLECASE_NO_LOWERCASE 4849 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 4850 */ toTitleCase(Locale locale, String str, BreakIterator titleIter, int options)4851 public static String toTitleCase(Locale locale, String str, 4852 BreakIterator titleIter, 4853 int options) { 4854 if (titleIter == null && locale == null) { 4855 locale = Locale.getDefault(); 4856 } 4857 titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter); 4858 titleIter.setText(str); 4859 return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str); 4860 } 4861 4862 /** 4863 * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according 4864 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 4865 * folding equivalent, the character itself is returned. 4866 * 4867 * <p>This function only returns the simple, single-code point case mapping. 4868 * Full case mappings should be used whenever possible because they produce 4869 * better results by working on whole strings. 4870 * They can map to a result string with a different length as appropriate. 4871 * Full case mappings are applied by the case mapping functions 4872 * that take String parameters rather than code points (int). 4873 * See also the User Guide chapter on C/POSIX migration: 4874 * http://www.icu-project.org/userguide/posix.html#case_mappings 4875 * 4876 * @param ch the character to be converted 4877 * @param defaultmapping Indicates whether the default mappings defined in 4878 * CaseFolding.txt are to be used, otherwise the 4879 * mappings for dotted I and dotless i marked with 4880 * 'T' in CaseFolding.txt are included. 4881 * @return the case folding equivalent of the character, if 4882 * any; otherwise the character itself. 4883 * @see #foldCase(String, boolean) 4884 */ foldCase(int ch, boolean defaultmapping)4885 public static int foldCase(int ch, boolean defaultmapping) { 4886 return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 4887 } 4888 4889 /** 4890 * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to 4891 * UnicodeData.txt and CaseFolding.txt; if any character has no case 4892 * folding equivalent, the character itself is returned. 4893 * "Full", multiple-code point case folding mappings are returned here. 4894 * For "simple" single-code point mappings use the API 4895 * foldCase(int ch, boolean defaultmapping). 4896 * @param str the String to be converted 4897 * @param defaultmapping Indicates whether the default mappings defined in 4898 * CaseFolding.txt are to be used, otherwise the 4899 * mappings for dotted I and dotless i marked with 4900 * 'T' in CaseFolding.txt are included. 4901 * @return the case folding equivalent of the character, if 4902 * any; otherwise the character itself. 4903 * @see #foldCase(int, boolean) 4904 */ foldCase(String str, boolean defaultmapping)4905 public static String foldCase(String str, boolean defaultmapping) { 4906 return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 4907 } 4908 4909 /** 4910 * <strong>[icu]</strong> Option value for case folding: use default mappings defined in 4911 * CaseFolding.txt. 4912 */ 4913 public static final int FOLD_CASE_DEFAULT = 0x0000; 4914 /** 4915 * <strong>[icu]</strong> Option value for case folding: 4916 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I 4917 * and dotless i appropriately for Turkic languages (tr, az). 4918 * 4919 * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that 4920 * are to be included for default mappings and 4921 * excluded for the Turkic-specific mappings. 4922 * 4923 * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that 4924 * are to be excluded for default mappings and 4925 * included for the Turkic-specific mappings. 4926 */ 4927 public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001; 4928 4929 /** 4930 * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according 4931 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 4932 * folding equivalent, the character itself is returned. 4933 * 4934 * <p>This function only returns the simple, single-code point case mapping. 4935 * Full case mappings should be used whenever possible because they produce 4936 * better results by working on whole strings. 4937 * They can map to a result string with a different length as appropriate. 4938 * Full case mappings are applied by the case mapping functions 4939 * that take String parameters rather than code points (int). 4940 * See also the User Guide chapter on C/POSIX migration: 4941 * http://www.icu-project.org/userguide/posix.html#case_mappings 4942 * 4943 * @param ch the character to be converted 4944 * @param options A bit set for special processing. Currently the recognised options 4945 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 4946 * @return the case folding equivalent of the character, if any; otherwise the 4947 * character itself. 4948 * @see #foldCase(String, boolean) 4949 */ foldCase(int ch, int options)4950 public static int foldCase(int ch, int options) { 4951 return UCaseProps.INSTANCE.fold(ch, options); 4952 } 4953 4954 /** 4955 * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to 4956 * UnicodeData.txt and CaseFolding.txt; if any character has no case 4957 * folding equivalent, the character itself is returned. 4958 * "Full", multiple-code point case folding mappings are returned here. 4959 * For "simple" single-code point mappings use the API 4960 * foldCase(int ch, boolean defaultmapping). 4961 * @param str the String to be converted 4962 * @param options A bit set for special processing. Currently the recognised options 4963 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 4964 * @return the case folding equivalent of the character, if any; otherwise the 4965 * character itself. 4966 * @see #foldCase(int, boolean) 4967 */ foldCase(String str, int options)4968 public static final String foldCase(String str, int options) { 4969 return CaseMapImpl.fold(options, str); 4970 } 4971 4972 /** 4973 * <strong>[icu]</strong> Returns the numeric value of a Han character. 4974 * 4975 * <p>This returns the value of Han 'numeric' code points, 4976 * including those for zero, ten, hundred, thousand, ten thousand, 4977 * and hundred million. 4978 * This includes both the standard and 'checkwriting' 4979 * characters, the 'big circle' zero character, and the standard 4980 * zero character. 4981 * 4982 * <p>Note: The Unicode Standard has numeric values for more 4983 * Han characters recognized by this method 4984 * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt), 4985 * and a {@link ohos.global.icu.text.NumberFormat} can be used with 4986 * a Chinese {@link ohos.global.icu.text.NumberingSystem}. 4987 * 4988 * @param ch code point to query 4989 * @return value if it is a Han 'numeric character,' otherwise return -1. 4990 */ getHanNumericValue(int ch)4991 public static int getHanNumericValue(int ch) 4992 { 4993 switch(ch) 4994 { 4995 case IDEOGRAPHIC_NUMBER_ZERO_ : 4996 case CJK_IDEOGRAPH_COMPLEX_ZERO_ : 4997 return 0; // Han Zero 4998 case CJK_IDEOGRAPH_FIRST_ : 4999 case CJK_IDEOGRAPH_COMPLEX_ONE_ : 5000 return 1; // Han One 5001 case CJK_IDEOGRAPH_SECOND_ : 5002 case CJK_IDEOGRAPH_COMPLEX_TWO_ : 5003 return 2; // Han Two 5004 case CJK_IDEOGRAPH_THIRD_ : 5005 case CJK_IDEOGRAPH_COMPLEX_THREE_ : 5006 return 3; // Han Three 5007 case CJK_IDEOGRAPH_FOURTH_ : 5008 case CJK_IDEOGRAPH_COMPLEX_FOUR_ : 5009 return 4; // Han Four 5010 case CJK_IDEOGRAPH_FIFTH_ : 5011 case CJK_IDEOGRAPH_COMPLEX_FIVE_ : 5012 return 5; // Han Five 5013 case CJK_IDEOGRAPH_SIXTH_ : 5014 case CJK_IDEOGRAPH_COMPLEX_SIX_ : 5015 return 6; // Han Six 5016 case CJK_IDEOGRAPH_SEVENTH_ : 5017 case CJK_IDEOGRAPH_COMPLEX_SEVEN_ : 5018 return 7; // Han Seven 5019 case CJK_IDEOGRAPH_EIGHTH_ : 5020 case CJK_IDEOGRAPH_COMPLEX_EIGHT_ : 5021 return 8; // Han Eight 5022 case CJK_IDEOGRAPH_NINETH_ : 5023 case CJK_IDEOGRAPH_COMPLEX_NINE_ : 5024 return 9; // Han Nine 5025 case CJK_IDEOGRAPH_TEN_ : 5026 case CJK_IDEOGRAPH_COMPLEX_TEN_ : 5027 return 10; 5028 case CJK_IDEOGRAPH_HUNDRED_ : 5029 case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ : 5030 return 100; 5031 case CJK_IDEOGRAPH_THOUSAND_ : 5032 case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ : 5033 return 1000; 5034 case CJK_IDEOGRAPH_TEN_THOUSAND_ : 5035 return 10000; 5036 case CJK_IDEOGRAPH_HUNDRED_MILLION_ : 5037 return 100000000; 5038 } 5039 return -1; // no value 5040 } 5041 5042 /** 5043 * <strong>[icu]</strong> <p>Returns an iterator for character types, iterating over codepoints. 5044 * <p>Example of use:<br> 5045 * <pre> 5046 * RangeValueIterator iterator = UCharacter.getTypeIterator(); 5047 * RangeValueIterator.Element element = new RangeValueIterator.Element(); 5048 * while (iterator.next(element)) { 5049 * System.out.println("Codepoint \\u" + 5050 * Integer.toHexString(element.start) + 5051 * " to codepoint \\u" + 5052 * Integer.toHexString(element.limit - 1) + 5053 * " has the character type " + 5054 * element.value); 5055 * } 5056 * </pre> 5057 * @return an iterator 5058 */ getTypeIterator()5059 public static RangeValueIterator getTypeIterator() 5060 { 5061 return new UCharacterTypeIterator(); 5062 } 5063 5064 private static final class UCharacterTypeIterator implements RangeValueIterator { UCharacterTypeIterator()5065 UCharacterTypeIterator() { 5066 reset(); 5067 } 5068 5069 // implements RangeValueIterator 5070 @Override next(Element element)5071 public boolean next(Element element) { 5072 if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 5073 element.start=range.startCodePoint; 5074 element.limit=range.endCodePoint+1; 5075 element.value=range.value; 5076 return true; 5077 } else { 5078 return false; 5079 } 5080 } 5081 5082 // implements RangeValueIterator 5083 @Override reset()5084 public void reset() { 5085 trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE); 5086 } 5087 5088 private Iterator<Trie2.Range> trieIterator; 5089 private Trie2.Range range; 5090 5091 private static final class MaskType implements Trie2.ValueMapper { 5092 // Extracts the general category ("character type") from the trie value. 5093 @Override map(int value)5094 public int map(int value) { 5095 return value & UCharacterProperty.TYPE_MASK; 5096 } 5097 } 5098 private static final MaskType MASK_TYPE=new MaskType(); 5099 } 5100 5101 /** 5102 * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints. 5103 * <p>This API only gets the iterator for the modern, most up-to-date 5104 * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or 5105 * for extended names use getExtendedNameIterator(). 5106 * <p>Example of use:<br> 5107 * <pre> 5108 * ValueIterator iterator = UCharacter.getNameIterator(); 5109 * ValueIterator.Element element = new ValueIterator.Element(); 5110 * while (iterator.next(element)) { 5111 * System.out.println("Codepoint \\u" + 5112 * Integer.toHexString(element.codepoint) + 5113 * " has the name " + (String)element.value); 5114 * } 5115 * </pre> 5116 * <p>The maximal range which the name iterator iterates is from 5117 * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE. 5118 * @return an iterator 5119 */ getNameIterator()5120 public static ValueIterator getNameIterator(){ 5121 return new UCharacterNameIterator(UCharacterName.INSTANCE, 5122 UCharacterNameChoice.UNICODE_CHAR_NAME); 5123 } 5124 5125 /** 5126 * <strong>[icu]</strong> Returns an empty iterator. 5127 * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints. 5128 * @return an empty iterator 5129 * @deprecated ICU 49 5130 * @see #getName1_0(int) 5131 * @hide deprecated on icu4j-org 5132 */ 5133 @Deprecated getName1_0Iterator()5134 public static ValueIterator getName1_0Iterator(){ 5135 return new DummyValueIterator(); 5136 } 5137 5138 private static final class DummyValueIterator implements ValueIterator { 5139 @Override next(Element element)5140 public boolean next(Element element) { return false; } 5141 @Override reset()5142 public void reset() {} 5143 @Override setRange(int start, int limit)5144 public void setRange(int start, int limit) {} 5145 } 5146 5147 /** 5148 * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints. 5149 * <p>This API only gets the iterator for the extended names. 5150 * For modern, most up-to-date Unicode names use getNameIterator() or 5151 * for older 1.0 Unicode names use get1_0NameIterator(). 5152 * <p>Example of use:<br> 5153 * <pre> 5154 * ValueIterator iterator = UCharacter.getExtendedNameIterator(); 5155 * ValueIterator.Element element = new ValueIterator.Element(); 5156 * while (iterator.next(element)) { 5157 * System.out.println("Codepoint \\u" + 5158 * Integer.toHexString(element.codepoint) + 5159 * " has the name " + (String)element.value); 5160 * } 5161 * </pre> 5162 * <p>The maximal range which the name iterator iterates is from 5163 * @return an iterator 5164 */ getExtendedNameIterator()5165 public static ValueIterator getExtendedNameIterator(){ 5166 return new UCharacterNameIterator(UCharacterName.INSTANCE, 5167 UCharacterNameChoice.EXTENDED_CHAR_NAME); 5168 } 5169 5170 /** 5171 * <strong>[icu]</strong> Returns the "age" of the code point. 5172 * <p>The "age" is the Unicode version when the code point was first 5173 * designated (as a non-character or for Private Use) or assigned a 5174 * character. 5175 * <p>This can be useful to avoid emitting code points to receiving 5176 * processes that do not accept newer characters. 5177 * <p>The data is from the UCD file DerivedAge.txt. 5178 * @param ch The code point. 5179 * @return the Unicode version number 5180 */ getAge(int ch)5181 public static VersionInfo getAge(int ch) 5182 { 5183 if (ch < MIN_VALUE || ch > MAX_VALUE) { 5184 throw new IllegalArgumentException("Codepoint out of bounds"); 5185 } 5186 return UCharacterProperty.INSTANCE.getAge(ch); 5187 } 5188 5189 /** 5190 * <strong>[icu]</strong> Check a binary Unicode property for a code point. 5191 * <p>Unicode, especially in version 3.2, defines many more properties 5192 * than the original set in UnicodeData.txt. 5193 * <p>This API is intended to reflect Unicode properties as defined in 5194 * the Unicode Character Database (UCD) and Unicode Technical Reports 5195 * (UTR). 5196 * <p>For details about the properties see 5197 * <a href=http://www.unicode.org/>http://www.unicode.org/</a>. 5198 * <p>For names of Unicode properties see the UCD file 5199 * PropertyAliases.txt. 5200 * <p>This API does not check the validity of the codepoint. 5201 * <p>Important: If ICU is built with UCD files from Unicode versions 5202 * below 3.2, then properties marked with "new" are not or 5203 * not fully available. 5204 * @param ch code point to test. 5205 * @param property selector constant from ohos.global.icu.lang.UProperty, 5206 * identifies which binary property to check. 5207 * @return true or false according to the binary Unicode property value 5208 * for ch. Also false if property is out of bounds or if the 5209 * Unicode version does not have data for the property at all, or 5210 * not for this code point. 5211 * @see ohos.global.icu.lang.UProperty 5212 * @see CharacterProperties#getBinaryPropertySet(int) 5213 */ hasBinaryProperty(int ch, int property)5214 public static boolean hasBinaryProperty(int ch, int property) 5215 { 5216 return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property); 5217 } 5218 5219 /** 5220 * <strong>[icu]</strong> <p>Check if a code point has the Alphabetic Unicode property. 5221 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC). 5222 * <p>Different from UCharacter.isLetter(ch)! 5223 * @param ch codepoint to be tested 5224 */ isUAlphabetic(int ch)5225 public static boolean isUAlphabetic(int ch) 5226 { 5227 return hasBinaryProperty(ch, UProperty.ALPHABETIC); 5228 } 5229 5230 /** 5231 * <strong>[icu]</strong> <p>Check if a code point has the Lowercase Unicode property. 5232 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE). 5233 * <p>This is different from UCharacter.isLowerCase(ch)! 5234 * @param ch codepoint to be tested 5235 */ isULowercase(int ch)5236 public static boolean isULowercase(int ch) 5237 { 5238 return hasBinaryProperty(ch, UProperty.LOWERCASE); 5239 } 5240 5241 /** 5242 * <strong>[icu]</strong> <p>Check if a code point has the Uppercase Unicode property. 5243 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE). 5244 * <p>This is different from UCharacter.isUpperCase(ch)! 5245 * @param ch codepoint to be tested 5246 */ isUUppercase(int ch)5247 public static boolean isUUppercase(int ch) 5248 { 5249 return hasBinaryProperty(ch, UProperty.UPPERCASE); 5250 } 5251 5252 /** 5253 * <strong>[icu]</strong> <p>Check if a code point has the White_Space Unicode property. 5254 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE). 5255 * <p>This is different from both UCharacter.isSpace(ch) and 5256 * UCharacter.isWhitespace(ch)! 5257 * @param ch codepoint to be tested 5258 */ isUWhiteSpace(int ch)5259 public static boolean isUWhiteSpace(int ch) 5260 { 5261 return hasBinaryProperty(ch, UProperty.WHITE_SPACE); 5262 } 5263 5264 /** 5265 * <strong>[icu]</strong> Returns the property value for a Unicode property type of a code point. 5266 * Also returns binary and mask property values. 5267 * <p>Unicode, especially in version 3.2, defines many more properties than 5268 * the original set in UnicodeData.txt. 5269 * <p>The properties APIs are intended to reflect Unicode properties as 5270 * defined in the Unicode Character Database (UCD) and Unicode Technical 5271 * Reports (UTR). For details about the properties see 5272 * http://www.unicode.org/. 5273 * <p>For names of Unicode properties see the UCD file PropertyAliases.txt. 5274 * 5275 * <pre> 5276 * Sample usage: 5277 * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH); 5278 * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC); 5279 * boolean b = (ideo == 1) ? true : false; 5280 * </pre> 5281 * @param ch code point to test. 5282 * @param type UProperty selector constant, identifies which binary 5283 * property to check. Must be 5284 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5285 * UProperty.INT_START <= type < UProperty.INT_LIMIT or 5286 * UProperty.MASK_START <= type < UProperty.MASK_LIMIT. 5287 * @return numeric value that is directly the property value or, 5288 * for enumerated properties, corresponds to the numeric value of 5289 * the enumerated constant of the respective property value type 5290 * ({@link ECharacterCategory}, {@link ECharacterDirection}, 5291 * {@link DecompositionType}, etc.). 5292 * Returns 0 or 1 (for false / true) for binary Unicode properties. 5293 * Returns a bit-mask for mask properties. 5294 * Returns 0 if 'type' is out of bounds or if the Unicode version 5295 * does not have data for the property at all, or not for this code 5296 * point. 5297 * @see UProperty 5298 * @see #hasBinaryProperty 5299 * @see #getIntPropertyMinValue 5300 * @see #getIntPropertyMaxValue 5301 * @see CharacterProperties#getIntPropertyMap(int) 5302 * @see #getUnicodeVersion 5303 */ getIntPropertyValue(int ch, int type)5304 public static int getIntPropertyValue(int ch, int type) 5305 { 5306 return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type); 5307 } 5308 /** 5309 * <strong>[icu]</strong> Returns a string version of the property value. 5310 * @param propertyEnum The property enum value. 5311 * @param codepoint The codepoint value. 5312 * @param nameChoice The choice of the name. 5313 * @return value as string 5314 * @deprecated This API is ICU internal only. 5315 * @hide deprecated on icu4j-org 5316 * @hide draft / provisional / internal are hidden on OHOS 5317 */ 5318 @Deprecated 5319 ///CLOVER:OFF getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice)5320 public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) { 5321 if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) || 5322 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) { 5323 return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum), 5324 nameChoice); 5325 } 5326 if (propertyEnum == UProperty.NUMERIC_VALUE) { 5327 return String.valueOf(getUnicodeNumericValue(codepoint)); 5328 } 5329 // otherwise must be string property 5330 switch (propertyEnum) { 5331 case UProperty.AGE: return getAge(codepoint).toString(); 5332 case UProperty.ISO_COMMENT: return getISOComment(codepoint); 5333 case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint)); 5334 case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true)); 5335 case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 5336 case UProperty.NAME: return getName(codepoint); 5337 case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true)); 5338 case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 5339 case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 5340 case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 5341 case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 5342 case UProperty.UNICODE_1_NAME: return getName1_0(codepoint); 5343 case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 5344 } 5345 throw new IllegalArgumentException("Illegal Property Enum"); 5346 } 5347 ///CLOVER:ON 5348 5349 /** 5350 * <strong>[icu]</strong> Returns the minimum value for an integer/binary Unicode property type. 5351 * Can be used together with UCharacter.getIntPropertyMaxValue(int) 5352 * to allocate arrays of ohos.global.icu.text.UnicodeSet or similar. 5353 * @param type UProperty selector constant, identifies which binary 5354 * property to check. Must be 5355 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5356 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 5357 * @return Minimum value returned by UCharacter.getIntPropertyValue(int) 5358 * for a Unicode property. 0 if the property 5359 * selector 'type' is out of range. 5360 * @see UProperty 5361 * @see #hasBinaryProperty 5362 * @see #getUnicodeVersion 5363 * @see #getIntPropertyMaxValue 5364 * @see #getIntPropertyValue 5365 */ getIntPropertyMinValue(int type)5366 public static int getIntPropertyMinValue(int type){ 5367 5368 return 0; // undefined; and: all other properties have a minimum value of 0 5369 } 5370 5371 5372 /** 5373 * <strong>[icu]</strong> Returns the maximum value for an integer/binary Unicode property. 5374 * Can be used together with UCharacter.getIntPropertyMinValue(int) 5375 * to allocate arrays of ohos.global.icu.text.UnicodeSet or similar. 5376 * Examples for min/max values (for Unicode 3.2): 5377 * <ul> 5378 * <li> UProperty.BIDI_CLASS: 0/18 5379 * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL) 5380 * <li> UProperty.SCRIPT: 0/45 (UScript.COMMON/UScript.TAGBANWA) 5381 * <li> UProperty.IDEOGRAPHIC: 0/1 (false/true) 5382 * </ul> 5383 * For undefined UProperty constant values, min/max values will be 0/-1. 5384 * @param type UProperty selector constant, identifies which binary 5385 * property to check. Must be 5386 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5387 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 5388 * @return Maximum value returned by u_getIntPropertyValue for a Unicode 5389 * property. <= 0 if the property selector 'type' is out of range. 5390 * @see UProperty 5391 * @see #hasBinaryProperty 5392 * @see #getUnicodeVersion 5393 * @see #getIntPropertyMaxValue 5394 * @see #getIntPropertyValue 5395 */ getIntPropertyMaxValue(int type)5396 public static int getIntPropertyMaxValue(int type) 5397 { 5398 return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type); 5399 } 5400 5401 /** 5402 * Provide the java.lang.Character forDigit API, for convenience. 5403 */ forDigit(int digit, int radix)5404 public static char forDigit(int digit, int radix) { 5405 return java.lang.Character.forDigit(digit, radix); 5406 } 5407 5408 // JDK 1.5 API coverage 5409 5410 /** 5411 * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}. 5412 */ 5413 public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE; 5414 5415 /** 5416 * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}. 5417 */ 5418 public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE; 5419 5420 /** 5421 * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}. 5422 */ 5423 public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE; 5424 5425 /** 5426 * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}. 5427 */ 5428 public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE; 5429 5430 /** 5431 * Constant U+D800, same as {@link Character#MIN_SURROGATE}. 5432 */ 5433 public static final char MIN_SURROGATE = Character.MIN_SURROGATE; 5434 5435 /** 5436 * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}. 5437 */ 5438 public static final char MAX_SURROGATE = Character.MAX_SURROGATE; 5439 5440 /** 5441 * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 5442 */ 5443 public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT; 5444 5445 /** 5446 * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}. 5447 */ 5448 public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT; 5449 5450 /** 5451 * Constant U+0000, same as {@link Character#MIN_CODE_POINT}. 5452 */ 5453 public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT; 5454 5455 /** 5456 * Equivalent to {@link Character#isValidCodePoint}. 5457 * 5458 * @param cp the code point to check 5459 * @return true if cp is a valid code point 5460 */ isValidCodePoint(int cp)5461 public static final boolean isValidCodePoint(int cp) { 5462 return cp >= 0 && cp <= MAX_CODE_POINT; 5463 } 5464 5465 /** 5466 * Same as {@link Character#isSupplementaryCodePoint}. 5467 * 5468 * @param cp the code point to check 5469 * @return true if cp is a supplementary code point 5470 */ isSupplementaryCodePoint(int cp)5471 public static final boolean isSupplementaryCodePoint(int cp) { 5472 return Character.isSupplementaryCodePoint(cp); 5473 } 5474 5475 /** 5476 * Same as {@link Character#isHighSurrogate}. 5477 * 5478 * @param ch the char to check 5479 * @return true if ch is a high (lead) surrogate 5480 */ isHighSurrogate(char ch)5481 public static boolean isHighSurrogate(char ch) { 5482 return Character.isHighSurrogate(ch); 5483 } 5484 5485 /** 5486 * Same as {@link Character#isLowSurrogate}. 5487 * 5488 * @param ch the char to check 5489 * @return true if ch is a low (trail) surrogate 5490 */ isLowSurrogate(char ch)5491 public static boolean isLowSurrogate(char ch) { 5492 return Character.isLowSurrogate(ch); 5493 } 5494 5495 /** 5496 * Same as {@link Character#isSurrogatePair}. 5497 * 5498 * @param high the high (lead) char 5499 * @param low the low (trail) char 5500 * @return true if high, low form a surrogate pair 5501 */ isSurrogatePair(char high, char low)5502 public static final boolean isSurrogatePair(char high, char low) { 5503 return Character.isSurrogatePair(high, low); 5504 } 5505 5506 /** 5507 * Same as {@link Character#charCount}. 5508 * Returns the number of chars needed to represent the code point (1 or 2). 5509 * This does not check the code point for validity. 5510 * 5511 * @param cp the code point to check 5512 * @return the number of chars needed to represent the code point 5513 */ charCount(int cp)5514 public static int charCount(int cp) { 5515 return Character.charCount(cp); 5516 } 5517 5518 /** 5519 * Same as {@link Character#toCodePoint}. 5520 * Returns the code point represented by the two surrogate code units. 5521 * This does not check the surrogate pair for validity. 5522 * 5523 * @param high the high (lead) surrogate 5524 * @param low the low (trail) surrogate 5525 * @return the code point formed by the surrogate pair 5526 */ toCodePoint(char high, char low)5527 public static final int toCodePoint(char high, char low) { 5528 return Character.toCodePoint(high, low); 5529 } 5530 5531 /** 5532 * Same as {@link Character#codePointAt(CharSequence, int)}. 5533 * Returns the code point at index. 5534 * This examines only the characters at index and index+1. 5535 * 5536 * @param seq the characters to check 5537 * @param index the index of the first or only char forming the code point 5538 * @return the code point at the index 5539 */ codePointAt(CharSequence seq, int index)5540 public static final int codePointAt(CharSequence seq, int index) { 5541 char c1 = seq.charAt(index++); 5542 if (isHighSurrogate(c1)) { 5543 if (index < seq.length()) { 5544 char c2 = seq.charAt(index); 5545 if (isLowSurrogate(c2)) { 5546 return toCodePoint(c1, c2); 5547 } 5548 } 5549 } 5550 return c1; 5551 } 5552 5553 /** 5554 * Same as {@link Character#codePointAt(char[], int)}. 5555 * Returns the code point at index. 5556 * This examines only the characters at index and index+1. 5557 * 5558 * @param text the characters to check 5559 * @param index the index of the first or only char forming the code point 5560 * @return the code point at the index 5561 */ codePointAt(char[] text, int index)5562 public static final int codePointAt(char[] text, int index) { 5563 char c1 = text[index++]; 5564 if (isHighSurrogate(c1)) { 5565 if (index < text.length) { 5566 char c2 = text[index]; 5567 if (isLowSurrogate(c2)) { 5568 return toCodePoint(c1, c2); 5569 } 5570 } 5571 } 5572 return c1; 5573 } 5574 5575 /** 5576 * Same as {@link Character#codePointAt(char[], int, int)}. 5577 * Returns the code point at index. 5578 * This examines only the characters at index and index+1. 5579 * 5580 * @param text the characters to check 5581 * @param index the index of the first or only char forming the code point 5582 * @param limit the limit of the valid text 5583 * @return the code point at the index 5584 */ codePointAt(char[] text, int index, int limit)5585 public static final int codePointAt(char[] text, int index, int limit) { 5586 if (index >= limit || limit > text.length) { 5587 throw new IndexOutOfBoundsException(); 5588 } 5589 char c1 = text[index++]; 5590 if (isHighSurrogate(c1)) { 5591 if (index < limit) { 5592 char c2 = text[index]; 5593 if (isLowSurrogate(c2)) { 5594 return toCodePoint(c1, c2); 5595 } 5596 } 5597 } 5598 return c1; 5599 } 5600 5601 /** 5602 * Same as {@link Character#codePointBefore(CharSequence, int)}. 5603 * Return the code point before index. 5604 * This examines only the characters at index-1 and index-2. 5605 * 5606 * @param seq the characters to check 5607 * @param index the index after the last or only char forming the code point 5608 * @return the code point before the index 5609 */ codePointBefore(CharSequence seq, int index)5610 public static final int codePointBefore(CharSequence seq, int index) { 5611 char c2 = seq.charAt(--index); 5612 if (isLowSurrogate(c2)) { 5613 if (index > 0) { 5614 char c1 = seq.charAt(--index); 5615 if (isHighSurrogate(c1)) { 5616 return toCodePoint(c1, c2); 5617 } 5618 } 5619 } 5620 return c2; 5621 } 5622 5623 /** 5624 * Same as {@link Character#codePointBefore(char[], int)}. 5625 * Returns the code point before index. 5626 * This examines only the characters at index-1 and index-2. 5627 * 5628 * @param text the characters to check 5629 * @param index the index after the last or only char forming the code point 5630 * @return the code point before the index 5631 */ codePointBefore(char[] text, int index)5632 public static final int codePointBefore(char[] text, int index) { 5633 char c2 = text[--index]; 5634 if (isLowSurrogate(c2)) { 5635 if (index > 0) { 5636 char c1 = text[--index]; 5637 if (isHighSurrogate(c1)) { 5638 return toCodePoint(c1, c2); 5639 } 5640 } 5641 } 5642 return c2; 5643 } 5644 5645 /** 5646 * Same as {@link Character#codePointBefore(char[], int, int)}. 5647 * Return the code point before index. 5648 * This examines only the characters at index-1 and index-2. 5649 * 5650 * @param text the characters to check 5651 * @param index the index after the last or only char forming the code point 5652 * @param limit the start of the valid text 5653 * @return the code point before the index 5654 */ codePointBefore(char[] text, int index, int limit)5655 public static final int codePointBefore(char[] text, int index, int limit) { 5656 if (index <= limit || limit < 0) { 5657 throw new IndexOutOfBoundsException(); 5658 } 5659 char c2 = text[--index]; 5660 if (isLowSurrogate(c2)) { 5661 if (index > limit) { 5662 char c1 = text[--index]; 5663 if (isHighSurrogate(c1)) { 5664 return toCodePoint(c1, c2); 5665 } 5666 } 5667 } 5668 return c2; 5669 } 5670 5671 /** 5672 * Same as {@link Character#toChars(int, char[], int)}. 5673 * Writes the chars representing the 5674 * code point into the destination at the given index. 5675 * 5676 * @param cp the code point to convert 5677 * @param dst the destination array into which to put the char(s) representing the code point 5678 * @param dstIndex the index at which to put the first (or only) char 5679 * @return the count of the number of chars written (1 or 2) 5680 * @throws IllegalArgumentException if cp is not a valid code point 5681 */ toChars(int cp, char[] dst, int dstIndex)5682 public static final int toChars(int cp, char[] dst, int dstIndex) { 5683 return Character.toChars(cp, dst, dstIndex); 5684 } 5685 5686 /** 5687 * Same as {@link Character#toChars(int)}. 5688 * Returns a char array representing the code point. 5689 * 5690 * @param cp the code point to convert 5691 * @return an array containing the char(s) representing the code point 5692 * @throws IllegalArgumentException if cp is not a valid code point 5693 */ toChars(int cp)5694 public static final char[] toChars(int cp) { 5695 return Character.toChars(cp); 5696 } 5697 5698 /** 5699 * Equivalent to the {@link Character#getDirectionality(char)} method, for 5700 * convenience. Returns a byte representing the directionality of the 5701 * character. 5702 * 5703 * <strong>[icu] Note:</strong> Unlike {@link Character#getDirectionality(char)}, this returns 5704 * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters. 5705 * 5706 * <strong>[icu] Note:</strong> The return value must be tested using the constants defined in {@link 5707 * UCharacterDirection} and its interface {@link 5708 * UCharacterEnums.ECharacterDirection} since the values are different from the ones 5709 * defined by <code>java.lang.Character</code>. 5710 * @param cp the code point to check 5711 * @return the directionality of the code point 5712 * @see #getDirection 5713 */ getDirectionality(int cp)5714 public static byte getDirectionality(int cp) 5715 { 5716 return (byte)getDirection(cp); 5717 } 5718 5719 /** 5720 * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)} 5721 * method, for convenience. Counts the number of code points in the range 5722 * of text. 5723 * @param text the characters to check 5724 * @param start the start of the range 5725 * @param limit the limit of the range 5726 * @return the number of code points in the range 5727 */ codePointCount(CharSequence text, int start, int limit)5728 public static int codePointCount(CharSequence text, int start, int limit) { 5729 if (start < 0 || limit < start || limit > text.length()) { 5730 throw new IndexOutOfBoundsException("start (" + start + 5731 ") or limit (" + limit + 5732 ") invalid or out of range 0, " + text.length()); 5733 } 5734 5735 int len = limit - start; 5736 while (limit > start) { 5737 char ch = text.charAt(--limit); 5738 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 5739 ch = text.charAt(--limit); 5740 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 5741 --len; 5742 break; 5743 } 5744 } 5745 } 5746 return len; 5747 } 5748 5749 /** 5750 * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for 5751 * convenience. Counts the number of code points in the range of text. 5752 * @param text the characters to check 5753 * @param start the start of the range 5754 * @param limit the limit of the range 5755 * @return the number of code points in the range 5756 */ codePointCount(char[] text, int start, int limit)5757 public static int codePointCount(char[] text, int start, int limit) { 5758 if (start < 0 || limit < start || limit > text.length) { 5759 throw new IndexOutOfBoundsException("start (" + start + 5760 ") or limit (" + limit + 5761 ") invalid or out of range 0, " + text.length); 5762 } 5763 5764 int len = limit - start; 5765 while (limit > start) { 5766 char ch = text[--limit]; 5767 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 5768 ch = text[--limit]; 5769 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 5770 --len; 5771 break; 5772 } 5773 } 5774 } 5775 return len; 5776 } 5777 5778 /** 5779 * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)} 5780 * method, for convenience. Adjusts the char index by a code point offset. 5781 * @param text the characters to check 5782 * @param index the index to adjust 5783 * @param codePointOffset the number of code points by which to offset the index 5784 * @return the adjusted index 5785 */ offsetByCodePoints(CharSequence text, int index, int codePointOffset)5786 public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) { 5787 if (index < 0 || index > text.length()) { 5788 throw new IndexOutOfBoundsException("index ( " + index + 5789 ") out of range 0, " + text.length()); 5790 } 5791 5792 if (codePointOffset < 0) { 5793 while (++codePointOffset <= 0) { 5794 char ch = text.charAt(--index); 5795 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) { 5796 ch = text.charAt(--index); 5797 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 5798 if (++codePointOffset > 0) { 5799 return index+1; 5800 } 5801 } 5802 } 5803 } 5804 } else { 5805 int limit = text.length(); 5806 while (--codePointOffset >= 0) { 5807 char ch = text.charAt(index++); 5808 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 5809 ch = text.charAt(index++); 5810 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 5811 if (--codePointOffset < 0) { 5812 return index-1; 5813 } 5814 } 5815 } 5816 } 5817 } 5818 5819 return index; 5820 } 5821 5822 /** 5823 * Equivalent to the 5824 * {@link Character#offsetByCodePoints(char[], int, int, int, int)} 5825 * method, for convenience. Adjusts the char index by a code point offset. 5826 * @param text the characters to check 5827 * @param start the start of the range to check 5828 * @param count the length of the range to check 5829 * @param index the index to adjust 5830 * @param codePointOffset the number of code points by which to offset the index 5831 * @return the adjusted index 5832 */ offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset)5833 public static int offsetByCodePoints(char[] text, int start, int count, int index, 5834 int codePointOffset) { 5835 int limit = start + count; 5836 if (start < 0 || limit < start || limit > text.length || index < start || index > limit) { 5837 throw new IndexOutOfBoundsException("index ( " + index + 5838 ") out of range " + start + 5839 ", " + limit + 5840 " in array 0, " + text.length); 5841 } 5842 5843 if (codePointOffset < 0) { 5844 while (++codePointOffset <= 0) { 5845 char ch = text[--index]; 5846 if (index < start) { 5847 throw new IndexOutOfBoundsException("index ( " + index + 5848 ") < start (" + start + 5849 ")"); 5850 } 5851 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) { 5852 ch = text[--index]; 5853 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 5854 if (++codePointOffset > 0) { 5855 return index+1; 5856 } 5857 } 5858 } 5859 } 5860 } else { 5861 while (--codePointOffset >= 0) { 5862 char ch = text[index++]; 5863 if (index > limit) { 5864 throw new IndexOutOfBoundsException("index ( " + index + 5865 ") > limit (" + limit + 5866 ")"); 5867 } 5868 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 5869 ch = text[index++]; 5870 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 5871 if (--codePointOffset < 0) { 5872 return index-1; 5873 } 5874 } 5875 } 5876 } 5877 } 5878 5879 return index; 5880 } 5881 5882 // private variables ------------------------------------------------- 5883 5884 /** 5885 * To get the last character out from a data type 5886 */ 5887 private static final int LAST_CHAR_MASK_ = 0xFFFF; 5888 5889 // /** 5890 // * To get the last byte out from a data type 5891 // */ 5892 // private static final int LAST_BYTE_MASK_ = 0xFF; 5893 // 5894 // /** 5895 // * Shift 16 bits 5896 // */ 5897 // private static final int SHIFT_16_ = 16; 5898 // 5899 // /** 5900 // * Shift 24 bits 5901 // */ 5902 // private static final int SHIFT_24_ = 24; 5903 // 5904 // /** 5905 // * Decimal radix 5906 // */ 5907 // private static final int DECIMAL_RADIX_ = 10; 5908 5909 /** 5910 * No break space code point 5911 */ 5912 private static final int NO_BREAK_SPACE_ = 0xA0; 5913 5914 /** 5915 * Figure space code point 5916 */ 5917 private static final int FIGURE_SPACE_ = 0x2007; 5918 5919 /** 5920 * Narrow no break space code point 5921 */ 5922 private static final int NARROW_NO_BREAK_SPACE_ = 0x202F; 5923 5924 /** 5925 * Ideographic number zero code point 5926 */ 5927 private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007; 5928 5929 /** 5930 * CJK Ideograph, First code point 5931 */ 5932 private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00; 5933 5934 /** 5935 * CJK Ideograph, Second code point 5936 */ 5937 private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c; 5938 5939 /** 5940 * CJK Ideograph, Third code point 5941 */ 5942 private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09; 5943 5944 /** 5945 * CJK Ideograph, Fourth code point 5946 */ 5947 private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db; 5948 5949 /** 5950 * CJK Ideograph, FIFTH code point 5951 */ 5952 private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94; 5953 5954 /** 5955 * CJK Ideograph, Sixth code point 5956 */ 5957 private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d; 5958 5959 /** 5960 * CJK Ideograph, Seventh code point 5961 */ 5962 private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03; 5963 5964 /** 5965 * CJK Ideograph, Eighth code point 5966 */ 5967 private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b; 5968 5969 /** 5970 * CJK Ideograph, Nineth code point 5971 */ 5972 private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d; 5973 5974 /** 5975 * Application Program command code point 5976 */ 5977 private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F; 5978 5979 /** 5980 * Unit separator code point 5981 */ 5982 private static final int UNIT_SEPARATOR_ = 0x001F; 5983 5984 /** 5985 * Delete code point 5986 */ 5987 private static final int DELETE_ = 0x007F; 5988 5989 /** 5990 * Han digit characters 5991 */ 5992 private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_ = 0x96f6; 5993 private static final int CJK_IDEOGRAPH_COMPLEX_ONE_ = 0x58f9; 5994 private static final int CJK_IDEOGRAPH_COMPLEX_TWO_ = 0x8cb3; 5995 private static final int CJK_IDEOGRAPH_COMPLEX_THREE_ = 0x53c3; 5996 private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_ = 0x8086; 5997 private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_ = 0x4f0d; 5998 private static final int CJK_IDEOGRAPH_COMPLEX_SIX_ = 0x9678; 5999 private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_ = 0x67d2; 6000 private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_ = 0x634c; 6001 private static final int CJK_IDEOGRAPH_COMPLEX_NINE_ = 0x7396; 6002 private static final int CJK_IDEOGRAPH_TEN_ = 0x5341; 6003 private static final int CJK_IDEOGRAPH_COMPLEX_TEN_ = 0x62fe; 6004 private static final int CJK_IDEOGRAPH_HUNDRED_ = 0x767e; 6005 private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_ = 0x4f70; 6006 private static final int CJK_IDEOGRAPH_THOUSAND_ = 0x5343; 6007 private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf; 6008 private static final int CJK_IDEOGRAPH_TEN_THOUSAND_ = 0x824c; 6009 private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_ = 0x5104; 6010 6011 // private constructor ----------------------------------------------- 6012 ///CLOVER:OFF 6013 /** 6014 * Private constructor to prevent instantiation 6015 */ UCharacter()6016 private UCharacter() 6017 { 6018 } 6019 ///CLOVER:ON 6020 } 6021