1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /** 4 ******************************************************************************* 5 * Copyright (C) 1996-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.lang; 11 12 import java.lang.ref.SoftReference; 13 import java.util.EnumSet; 14 import java.util.HashMap; 15 import java.util.Iterator; 16 import java.util.Locale; 17 import java.util.Map; 18 19 import com.ibm.icu.impl.CaseMapImpl; 20 import com.ibm.icu.impl.EmojiProps; 21 import com.ibm.icu.impl.IllegalIcuArgumentException; 22 import com.ibm.icu.impl.Trie2; 23 import com.ibm.icu.impl.UBiDiProps; 24 import com.ibm.icu.impl.UCaseProps; 25 import com.ibm.icu.impl.UCharacterName; 26 import com.ibm.icu.impl.UCharacterNameChoice; 27 import com.ibm.icu.impl.UCharacterProperty; 28 import com.ibm.icu.impl.UCharacterUtility; 29 import com.ibm.icu.impl.UPropertyAliases; 30 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; 31 import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection; 32 import com.ibm.icu.text.BreakIterator; 33 import com.ibm.icu.text.Normalizer2; 34 import com.ibm.icu.util.RangeValueIterator; 35 import com.ibm.icu.util.ULocale; 36 import com.ibm.icu.util.ValueIterator; 37 import com.ibm.icu.util.VersionInfo; 38 39 /** 40 * {@icuenhanced java.lang.Character}.{@icu _usage_} 41 * 42 * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class. 43 * These extensions provide support for more Unicode properties. 44 * Each ICU release supports the latest version of Unicode available at that time. 45 * 46 * <p>For some time before Java 5 added support for supplementary Unicode code points, 47 * The ICU UCharacter class and many other ICU classes already supported them. 48 * Some UCharacter methods and constants were widened slightly differently than 49 * how the Character class methods and constants were widened later. 50 * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF, 51 * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF. 52 * 53 * <p>Code points are represented in these API using ints. While it would be 54 * more convenient in Java to have a separate primitive datatype for them, 55 * ints suffice in the meantime. 56 * 57 * <p>To use this class please add the jar file name icu4j.jar to the 58 * class path, since it contains data files which supply the information used 59 * by this file.<br> 60 * E.g. In Windows <br> 61 * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br> 62 * Otherwise, another method would be to copy the files uprops.dat and 63 * unames.icu from the icu4j source subdirectory 64 * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory 65 * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>. 66 * 67 * <p>Aside from the additions for UTF-16 support, and the updated Unicode 68 * properties, the main differences between UCharacter and Character are: 69 * <ul> 70 * <li> UCharacter is not designed to be a char wrapper and does not have 71 * APIs to which involves management of that single char.<br> 72 * These include: 73 * <ul> 74 * <li> char charValue(), 75 * <li> int compareTo(java.lang.Character, java.lang.Character), etc. 76 * </ul> 77 * <li> UCharacter does not include Character APIs that are deprecated, nor 78 * does it include the Java-specific character information, such as 79 * boolean isJavaIdentifierPart(char ch). 80 * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric 81 * values '10' - '35'. UCharacter also does this in digit and 82 * getNumericValue, to adhere to the java semantics of these 83 * methods. New methods unicodeDigit, and 84 * getUnicodeNumericValue do not treat the above code points 85 * as having numeric values. This is a semantic change from ICU4J 1.3.1. 86 * </ul> 87 * <p> 88 * Further detail on differences can be determined using the program 89 * <a href= 90 * "https://github.com/unicode-org/icu/blob/main/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/lang/UCharacterCompare.java"> 91 * com.ibm.icu.dev.test.lang.UCharacterCompare</a> 92 * <p> 93 * In addition to Java compatibility functions, which calculate derived properties, 94 * this API provides low-level access to the Unicode Character Database. 95 * <p> 96 * Unicode assigns each code point (not just assigned character) values for 97 * many properties. 98 * Most of them are simple boolean flags, or constants from a small enumerated list. 99 * For some properties, values are strings or other relatively more complex types. 100 * <p> 101 * For more information see 102 * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a> 103 * (http://www.unicode.org/ucd/) 104 * and the <a href="https://unicode-org.github.io/icu/userguide/strings/properties">ICU 105 * User Guide chapter on Properties</a> 106 * (https://unicode-org.github.io/icu/userguide/strings/properties). 107 * <p> 108 * There are also functions that provide easy migration from C/POSIX functions 109 * like isblank(). Their use is generally discouraged because the C/POSIX 110 * standards do not define their semantics beyond the ASCII range, which means 111 * that different implementations exhibit very different behavior. 112 * Instead, Unicode properties should be used directly. 113 * <p> 114 * There are also only a few, broad C/POSIX character classes, and they tend 115 * to be used for conflicting purposes. For example, the "isalpha()" class 116 * is sometimes used to determine word boundaries, while a more sophisticated 117 * approach would at least distinguish initial letters from continuation 118 * characters (the latter including combining marks). 119 * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) 120 * Another example: There is no "istitle()" class for titlecase characters. 121 * <p> 122 * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. 123 * ICU implements them according to the Standard Recommendations in 124 * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions 125 * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). 126 * <p> 127 * API access for C/POSIX character classes is as follows: 128 * <pre>{@code 129 * - alpha: isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC) 130 * - lower: isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE) 131 * - upper: isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE) 132 * - punct: ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)| 133 * (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)| 134 * (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0 135 * - digit: isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER 136 * - xdigit: hasBinaryProperty(c, UProperty.POSIX_XDIGIT) 137 * - alnum: hasBinaryProperty(c, UProperty.POSIX_ALNUM) 138 * - space: isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE) 139 * - blank: hasBinaryProperty(c, UProperty.POSIX_BLANK) 140 * - cntrl: getType(c)==CONTROL 141 * - graph: hasBinaryProperty(c, UProperty.POSIX_GRAPH) 142 * - print: hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre> 143 * <p> 144 * The C/POSIX character classes are also available in UnicodeSet patterns, 145 * using patterns like [:graph:] or \p{graph}. 146 * 147 * <p>{@icunote} There are several ICU (and Java) whitespace functions. 148 * Comparison:<ul> 149 * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; 150 * most of general categories "Z" (separators) + most whitespace ISO controls 151 * (including no-break spaces, but excluding IS1..IS4) 152 * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces 153 * <li> isSpaceChar: just Z (including no-break spaces)</ul> 154 * 155 * <p> 156 * This class is not subclassable. 157 * 158 * @author Syn Wee Quek 159 * @stable ICU 2.1 160 * @see com.ibm.icu.lang.UCharacterEnums 161 */ 162 163 public final class UCharacter implements ECharacterCategory, ECharacterDirection 164 { 165 /** 166 * Lead surrogate bitmask 167 */ 168 private static final int LEAD_SURROGATE_BITMASK = 0xFFFFFC00; 169 170 /** 171 * Trail surrogate bitmask 172 */ 173 private static final int TRAIL_SURROGATE_BITMASK = 0xFFFFFC00; 174 175 /** 176 * Lead surrogate bits 177 */ 178 private static final int LEAD_SURROGATE_BITS = 0xD800; 179 180 /** 181 * Trail surrogate bits 182 */ 183 private static final int TRAIL_SURROGATE_BITS = 0xDC00; 184 185 private static final int U16_SURROGATE_OFFSET = ((0xd800 << 10) + 0xdc00 - 0x10000); 186 187 // public inner classes ---------------------------------------------- 188 189 /** 190 * {@icuenhanced java.lang.Character.UnicodeBlock}.{@icu _usage_} 191 * 192 * A family of character subsets representing the character blocks in the 193 * Unicode specification, generated from Unicode Data file Blocks.txt. 194 * Character blocks generally define characters used for a specific script 195 * or purpose. A character is contained by at most one Unicode block. 196 * 197 * {@icunote} All fields named XXX_ID are specific to ICU. 198 * 199 * @stable ICU 2.4 200 */ 201 public static final class UnicodeBlock extends Character.Subset 202 { 203 // block id corresponding to icu4c ----------------------------------- 204 205 /** 206 * @stable ICU 2.4 207 */ 208 public static final int INVALID_CODE_ID = -1; 209 /** 210 * @stable ICU 2.4 211 */ 212 public static final int BASIC_LATIN_ID = 1; 213 /** 214 * @stable ICU 2.4 215 */ 216 public static final int LATIN_1_SUPPLEMENT_ID = 2; 217 /** 218 * @stable ICU 2.4 219 */ 220 public static final int LATIN_EXTENDED_A_ID = 3; 221 /** 222 * @stable ICU 2.4 223 */ 224 public static final int LATIN_EXTENDED_B_ID = 4; 225 /** 226 * @stable ICU 2.4 227 */ 228 public static final int IPA_EXTENSIONS_ID = 5; 229 /** 230 * @stable ICU 2.4 231 */ 232 public static final int SPACING_MODIFIER_LETTERS_ID = 6; 233 /** 234 * @stable ICU 2.4 235 */ 236 public static final int COMBINING_DIACRITICAL_MARKS_ID = 7; 237 /** 238 * Unicode 3.2 renames this block to "Greek and Coptic". 239 * @stable ICU 2.4 240 */ 241 public static final int GREEK_ID = 8; 242 /** 243 * @stable ICU 2.4 244 */ 245 public static final int CYRILLIC_ID = 9; 246 /** 247 * @stable ICU 2.4 248 */ 249 public static final int ARMENIAN_ID = 10; 250 /** 251 * @stable ICU 2.4 252 */ 253 public static final int HEBREW_ID = 11; 254 /** 255 * @stable ICU 2.4 256 */ 257 public static final int ARABIC_ID = 12; 258 /** 259 * @stable ICU 2.4 260 */ 261 public static final int SYRIAC_ID = 13; 262 /** 263 * @stable ICU 2.4 264 */ 265 public static final int THAANA_ID = 14; 266 /** 267 * @stable ICU 2.4 268 */ 269 public static final int DEVANAGARI_ID = 15; 270 /** 271 * @stable ICU 2.4 272 */ 273 public static final int BENGALI_ID = 16; 274 /** 275 * @stable ICU 2.4 276 */ 277 public static final int GURMUKHI_ID = 17; 278 /** 279 * @stable ICU 2.4 280 */ 281 public static final int GUJARATI_ID = 18; 282 /** 283 * @stable ICU 2.4 284 */ 285 public static final int ORIYA_ID = 19; 286 /** 287 * @stable ICU 2.4 288 */ 289 public static final int TAMIL_ID = 20; 290 /** 291 * @stable ICU 2.4 292 */ 293 public static final int TELUGU_ID = 21; 294 /** 295 * @stable ICU 2.4 296 */ 297 public static final int KANNADA_ID = 22; 298 /** 299 * @stable ICU 2.4 300 */ 301 public static final int MALAYALAM_ID = 23; 302 /** 303 * @stable ICU 2.4 304 */ 305 public static final int SINHALA_ID = 24; 306 /** 307 * @stable ICU 2.4 308 */ 309 public static final int THAI_ID = 25; 310 /** 311 * @stable ICU 2.4 312 */ 313 public static final int LAO_ID = 26; 314 /** 315 * @stable ICU 2.4 316 */ 317 public static final int TIBETAN_ID = 27; 318 /** 319 * @stable ICU 2.4 320 */ 321 public static final int MYANMAR_ID = 28; 322 /** 323 * @stable ICU 2.4 324 */ 325 public static final int GEORGIAN_ID = 29; 326 /** 327 * @stable ICU 2.4 328 */ 329 public static final int HANGUL_JAMO_ID = 30; 330 /** 331 * @stable ICU 2.4 332 */ 333 public static final int ETHIOPIC_ID = 31; 334 /** 335 * @stable ICU 2.4 336 */ 337 public static final int CHEROKEE_ID = 32; 338 /** 339 * @stable ICU 2.4 340 */ 341 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33; 342 /** 343 * @stable ICU 2.4 344 */ 345 public static final int OGHAM_ID = 34; 346 /** 347 * @stable ICU 2.4 348 */ 349 public static final int RUNIC_ID = 35; 350 /** 351 * @stable ICU 2.4 352 */ 353 public static final int KHMER_ID = 36; 354 /** 355 * @stable ICU 2.4 356 */ 357 public static final int MONGOLIAN_ID = 37; 358 /** 359 * @stable ICU 2.4 360 */ 361 public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38; 362 /** 363 * @stable ICU 2.4 364 */ 365 public static final int GREEK_EXTENDED_ID = 39; 366 /** 367 * @stable ICU 2.4 368 */ 369 public static final int GENERAL_PUNCTUATION_ID = 40; 370 /** 371 * @stable ICU 2.4 372 */ 373 public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41; 374 /** 375 * @stable ICU 2.4 376 */ 377 public static final int CURRENCY_SYMBOLS_ID = 42; 378 /** 379 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 380 * Symbols". 381 * @stable ICU 2.4 382 */ 383 public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43; 384 /** 385 * @stable ICU 2.4 386 */ 387 public static final int LETTERLIKE_SYMBOLS_ID = 44; 388 /** 389 * @stable ICU 2.4 390 */ 391 public static final int NUMBER_FORMS_ID = 45; 392 /** 393 * @stable ICU 2.4 394 */ 395 public static final int ARROWS_ID = 46; 396 /** 397 * @stable ICU 2.4 398 */ 399 public static final int MATHEMATICAL_OPERATORS_ID = 47; 400 /** 401 * @stable ICU 2.4 402 */ 403 public static final int MISCELLANEOUS_TECHNICAL_ID = 48; 404 /** 405 * @stable ICU 2.4 406 */ 407 public static final int CONTROL_PICTURES_ID = 49; 408 /** 409 * @stable ICU 2.4 410 */ 411 public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50; 412 /** 413 * @stable ICU 2.4 414 */ 415 public static final int ENCLOSED_ALPHANUMERICS_ID = 51; 416 /** 417 * @stable ICU 2.4 418 */ 419 public static final int BOX_DRAWING_ID = 52; 420 /** 421 * @stable ICU 2.4 422 */ 423 public static final int BLOCK_ELEMENTS_ID = 53; 424 /** 425 * @stable ICU 2.4 426 */ 427 public static final int GEOMETRIC_SHAPES_ID = 54; 428 /** 429 * @stable ICU 2.4 430 */ 431 public static final int MISCELLANEOUS_SYMBOLS_ID = 55; 432 /** 433 * @stable ICU 2.4 434 */ 435 public static final int DINGBATS_ID = 56; 436 /** 437 * @stable ICU 2.4 438 */ 439 public static final int BRAILLE_PATTERNS_ID = 57; 440 /** 441 * @stable ICU 2.4 442 */ 443 public static final int CJK_RADICALS_SUPPLEMENT_ID = 58; 444 /** 445 * @stable ICU 2.4 446 */ 447 public static final int KANGXI_RADICALS_ID = 59; 448 /** 449 * @stable ICU 2.4 450 */ 451 public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60; 452 /** 453 * @stable ICU 2.4 454 */ 455 public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61; 456 /** 457 * @stable ICU 2.4 458 */ 459 public static final int HIRAGANA_ID = 62; 460 /** 461 * @stable ICU 2.4 462 */ 463 public static final int KATAKANA_ID = 63; 464 /** 465 * @stable ICU 2.4 466 */ 467 public static final int BOPOMOFO_ID = 64; 468 /** 469 * @stable ICU 2.4 470 */ 471 public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65; 472 /** 473 * @stable ICU 2.4 474 */ 475 public static final int KANBUN_ID = 66; 476 /** 477 * @stable ICU 2.4 478 */ 479 public static final int BOPOMOFO_EXTENDED_ID = 67; 480 /** 481 * @stable ICU 2.4 482 */ 483 public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68; 484 /** 485 * @stable ICU 2.4 486 */ 487 public static final int CJK_COMPATIBILITY_ID = 69; 488 /** 489 * @stable ICU 2.4 490 */ 491 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70; 492 /** 493 * @stable ICU 2.4 494 */ 495 public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71; 496 /** 497 * @stable ICU 2.4 498 */ 499 public static final int YI_SYLLABLES_ID = 72; 500 /** 501 * @stable ICU 2.4 502 */ 503 public static final int YI_RADICALS_ID = 73; 504 /** 505 * @stable ICU 2.4 506 */ 507 public static final int HANGUL_SYLLABLES_ID = 74; 508 /** 509 * @stable ICU 2.4 510 */ 511 public static final int HIGH_SURROGATES_ID = 75; 512 /** 513 * @stable ICU 2.4 514 */ 515 public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76; 516 /** 517 * @stable ICU 2.4 518 */ 519 public static final int LOW_SURROGATES_ID = 77; 520 /** 521 * Same as public static final int PRIVATE_USE. 522 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 523 * and multiple code point ranges had this block. 524 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 525 * and adds separate blocks for the supplementary PUAs. 526 * @stable ICU 2.4 527 */ 528 public static final int PRIVATE_USE_AREA_ID = 78; 529 /** 530 * Same as public static final int PRIVATE_USE_AREA. 531 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 532 * and multiple code point ranges had this block. 533 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 534 * and adds separate blocks for the supplementary PUAs. 535 * @stable ICU 2.4 536 */ 537 public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID; 538 /** 539 * @stable ICU 2.4 540 */ 541 public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79; 542 /** 543 * @stable ICU 2.4 544 */ 545 public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80; 546 /** 547 * @stable ICU 2.4 548 */ 549 public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81; 550 /** 551 * @stable ICU 2.4 552 */ 553 public static final int COMBINING_HALF_MARKS_ID = 82; 554 /** 555 * @stable ICU 2.4 556 */ 557 public static final int CJK_COMPATIBILITY_FORMS_ID = 83; 558 /** 559 * @stable ICU 2.4 560 */ 561 public static final int SMALL_FORM_VARIANTS_ID = 84; 562 /** 563 * @stable ICU 2.4 564 */ 565 public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85; 566 /** 567 * @stable ICU 2.4 568 */ 569 public static final int SPECIALS_ID = 86; 570 /** 571 * @stable ICU 2.4 572 */ 573 public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87; 574 /** 575 * @stable ICU 2.4 576 */ 577 public static final int OLD_ITALIC_ID = 88; 578 /** 579 * @stable ICU 2.4 580 */ 581 public static final int GOTHIC_ID = 89; 582 /** 583 * @stable ICU 2.4 584 */ 585 public static final int DESERET_ID = 90; 586 /** 587 * @stable ICU 2.4 588 */ 589 public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91; 590 /** 591 * @stable ICU 2.4 592 */ 593 public static final int MUSICAL_SYMBOLS_ID = 92; 594 /** 595 * @stable ICU 2.4 596 */ 597 public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93; 598 /** 599 * @stable ICU 2.4 600 */ 601 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94; 602 /** 603 * @stable ICU 2.4 604 */ 605 public static final int 606 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95; 607 /** 608 * @stable ICU 2.4 609 */ 610 public static final int TAGS_ID = 96; 611 612 // New blocks in Unicode 3.2 613 614 /** 615 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 616 * @stable ICU 2.4 617 */ 618 public static final int CYRILLIC_SUPPLEMENTARY_ID = 97; 619 /** 620 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 621 * @stable ICU 3.0 622 */ 623 624 public static final int CYRILLIC_SUPPLEMENT_ID = 97; 625 /** 626 * @stable ICU 2.4 627 */ 628 public static final int TAGALOG_ID = 98; 629 /** 630 * @stable ICU 2.4 631 */ 632 public static final int HANUNOO_ID = 99; 633 /** 634 * @stable ICU 2.4 635 */ 636 public static final int BUHID_ID = 100; 637 /** 638 * @stable ICU 2.4 639 */ 640 public static final int TAGBANWA_ID = 101; 641 /** 642 * @stable ICU 2.4 643 */ 644 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102; 645 /** 646 * @stable ICU 2.4 647 */ 648 public static final int SUPPLEMENTAL_ARROWS_A_ID = 103; 649 /** 650 * @stable ICU 2.4 651 */ 652 public static final int SUPPLEMENTAL_ARROWS_B_ID = 104; 653 /** 654 * @stable ICU 2.4 655 */ 656 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105; 657 /** 658 * @stable ICU 2.4 659 */ 660 public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106; 661 /** 662 * @stable ICU 2.4 663 */ 664 public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107; 665 /** 666 * @stable ICU 2.4 667 */ 668 public static final int VARIATION_SELECTORS_ID = 108; 669 /** 670 * @stable ICU 2.4 671 */ 672 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109; 673 /** 674 * @stable ICU 2.4 675 */ 676 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110; 677 678 /** 679 * @stable ICU 2.6 680 */ 681 public static final int LIMBU_ID = 111; /*[1900]*/ 682 /** 683 * @stable ICU 2.6 684 */ 685 public static final int TAI_LE_ID = 112; /*[1950]*/ 686 /** 687 * @stable ICU 2.6 688 */ 689 public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/ 690 /** 691 * @stable ICU 2.6 692 */ 693 public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/ 694 /** 695 * @stable ICU 2.6 696 */ 697 public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/ 698 /** 699 * @stable ICU 2.6 700 */ 701 public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/ 702 /** 703 * @stable ICU 2.6 704 */ 705 public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/ 706 /** 707 * @stable ICU 2.6 708 */ 709 public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/ 710 /** 711 * @stable ICU 2.6 712 */ 713 public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/ 714 /** 715 * @stable ICU 2.6 716 */ 717 public static final int UGARITIC_ID = 120; /*[10380]*/ 718 /** 719 * @stable ICU 2.6 720 */ 721 public static final int SHAVIAN_ID = 121; /*[10450]*/ 722 /** 723 * @stable ICU 2.6 724 */ 725 public static final int OSMANYA_ID = 122; /*[10480]*/ 726 /** 727 * @stable ICU 2.6 728 */ 729 public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/ 730 /** 731 * @stable ICU 2.6 732 */ 733 public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/ 734 /** 735 * @stable ICU 2.6 736 */ 737 public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/ 738 739 /* New blocks in Unicode 4.1 */ 740 741 /** 742 * @stable ICU 3.4 743 */ 744 public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/ 745 746 /** 747 * @stable ICU 3.4 748 */ 749 public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/ 750 751 /** 752 * @stable ICU 3.4 753 */ 754 public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/ 755 756 /** 757 * @stable ICU 3.4 758 */ 759 public static final int BUGINESE_ID = 129; /*[1A00]*/ 760 761 /** 762 * @stable ICU 3.4 763 */ 764 public static final int CJK_STROKES_ID = 130; /*[31C0]*/ 765 766 /** 767 * @stable ICU 3.4 768 */ 769 public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/ 770 771 /** 772 * @stable ICU 3.4 773 */ 774 public static final int COPTIC_ID = 132; /*[2C80]*/ 775 776 /** 777 * @stable ICU 3.4 778 */ 779 public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/ 780 781 /** 782 * @stable ICU 3.4 783 */ 784 public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/ 785 786 /** 787 * @stable ICU 3.4 788 */ 789 public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/ 790 791 /** 792 * @stable ICU 3.4 793 */ 794 public static final int GLAGOLITIC_ID = 136; /*[2C00]*/ 795 796 /** 797 * @stable ICU 3.4 798 */ 799 public static final int KHAROSHTHI_ID = 137; /*[10A00]*/ 800 801 /** 802 * @stable ICU 3.4 803 */ 804 public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/ 805 806 /** 807 * @stable ICU 3.4 808 */ 809 public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/ 810 811 /** 812 * @stable ICU 3.4 813 */ 814 public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/ 815 816 /** 817 * @stable ICU 3.4 818 */ 819 public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/ 820 821 /** 822 * @stable ICU 3.4 823 */ 824 public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/ 825 826 /** 827 * @stable ICU 3.4 828 */ 829 public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/ 830 831 /** 832 * @stable ICU 3.4 833 */ 834 public static final int TIFINAGH_ID = 144; /*[2D30]*/ 835 836 /** 837 * @stable ICU 3.4 838 */ 839 public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/ 840 841 /* New blocks in Unicode 5.0 */ 842 843 /** 844 * @stable ICU 3.6 845 */ 846 public static final int NKO_ID = 146; /*[07C0]*/ 847 /** 848 * @stable ICU 3.6 849 */ 850 public static final int BALINESE_ID = 147; /*[1B00]*/ 851 /** 852 * @stable ICU 3.6 853 */ 854 public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/ 855 /** 856 * @stable ICU 3.6 857 */ 858 public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/ 859 /** 860 * @stable ICU 3.6 861 */ 862 public static final int PHAGS_PA_ID = 150; /*[A840]*/ 863 /** 864 * @stable ICU 3.6 865 */ 866 public static final int PHOENICIAN_ID = 151; /*[10900]*/ 867 /** 868 * @stable ICU 3.6 869 */ 870 public static final int CUNEIFORM_ID = 152; /*[12000]*/ 871 /** 872 * @stable ICU 3.6 873 */ 874 public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/ 875 /** 876 * @stable ICU 3.6 877 */ 878 public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/ 879 880 /** 881 * @stable ICU 4.0 882 */ 883 public static final int SUNDANESE_ID = 155; /* [1B80] */ 884 885 /** 886 * @stable ICU 4.0 887 */ 888 public static final int LEPCHA_ID = 156; /* [1C00] */ 889 890 /** 891 * @stable ICU 4.0 892 */ 893 public static final int OL_CHIKI_ID = 157; /* [1C50] */ 894 895 /** 896 * @stable ICU 4.0 897 */ 898 public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */ 899 900 /** 901 * @stable ICU 4.0 902 */ 903 public static final int VAI_ID = 159; /* [A500] */ 904 905 /** 906 * @stable ICU 4.0 907 */ 908 public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */ 909 910 /** 911 * @stable ICU 4.0 912 */ 913 public static final int SAURASHTRA_ID = 161; /* [A880] */ 914 915 /** 916 * @stable ICU 4.0 917 */ 918 public static final int KAYAH_LI_ID = 162; /* [A900] */ 919 920 /** 921 * @stable ICU 4.0 922 */ 923 public static final int REJANG_ID = 163; /* [A930] */ 924 925 /** 926 * @stable ICU 4.0 927 */ 928 public static final int CHAM_ID = 164; /* [AA00] */ 929 930 /** 931 * @stable ICU 4.0 932 */ 933 public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */ 934 935 /** 936 * @stable ICU 4.0 937 */ 938 public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */ 939 940 /** 941 * @stable ICU 4.0 942 */ 943 public static final int LYCIAN_ID = 167; /* [10280] */ 944 945 /** 946 * @stable ICU 4.0 947 */ 948 public static final int CARIAN_ID = 168; /* [102A0] */ 949 950 /** 951 * @stable ICU 4.0 952 */ 953 public static final int LYDIAN_ID = 169; /* [10920] */ 954 955 /** 956 * @stable ICU 4.0 957 */ 958 public static final int MAHJONG_TILES_ID = 170; /* [1F000] */ 959 960 /** 961 * @stable ICU 4.0 962 */ 963 public static final int DOMINO_TILES_ID = 171; /* [1F030] */ 964 965 /* New blocks in Unicode 5.2 */ 966 967 /** @stable ICU 4.4 */ 968 public static final int SAMARITAN_ID = 172; /*[0800]*/ 969 /** @stable ICU 4.4 */ 970 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/ 971 /** @stable ICU 4.4 */ 972 public static final int TAI_THAM_ID = 174; /*[1A20]*/ 973 /** @stable ICU 4.4 */ 974 public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/ 975 /** @stable ICU 4.4 */ 976 public static final int LISU_ID = 176; /*[A4D0]*/ 977 /** @stable ICU 4.4 */ 978 public static final int BAMUM_ID = 177; /*[A6A0]*/ 979 /** @stable ICU 4.4 */ 980 public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/ 981 /** @stable ICU 4.4 */ 982 public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/ 983 /** @stable ICU 4.4 */ 984 public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/ 985 /** @stable ICU 4.4 */ 986 public static final int JAVANESE_ID = 181; /*[A980]*/ 987 /** @stable ICU 4.4 */ 988 public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/ 989 /** @stable ICU 4.4 */ 990 public static final int TAI_VIET_ID = 183; /*[AA80]*/ 991 /** @stable ICU 4.4 */ 992 public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/ 993 /** @stable ICU 4.4 */ 994 public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/ 995 /** @stable ICU 4.4 */ 996 public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/ 997 /** @stable ICU 4.4 */ 998 public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/ 999 /** @stable ICU 4.4 */ 1000 public static final int AVESTAN_ID = 188; /*[10B00]*/ 1001 /** @stable ICU 4.4 */ 1002 public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/ 1003 /** @stable ICU 4.4 */ 1004 public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/ 1005 /** @stable ICU 4.4 */ 1006 public static final int OLD_TURKIC_ID = 191; /*[10C00]*/ 1007 /** @stable ICU 4.4 */ 1008 public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/ 1009 /** @stable ICU 4.4 */ 1010 public static final int KAITHI_ID = 193; /*[11080]*/ 1011 /** @stable ICU 4.4 */ 1012 public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/ 1013 /** @stable ICU 4.4 */ 1014 public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/ 1015 /** @stable ICU 4.4 */ 1016 public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/ 1017 /** @stable ICU 4.4 */ 1018 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/ 1019 1020 /* New blocks in Unicode 6.0 */ 1021 1022 /** @stable ICU 4.6 */ 1023 public static final int MANDAIC_ID = 198; /*[0840]*/ 1024 /** @stable ICU 4.6 */ 1025 public static final int BATAK_ID = 199; /*[1BC0]*/ 1026 /** @stable ICU 4.6 */ 1027 public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/ 1028 /** @stable ICU 4.6 */ 1029 public static final int BRAHMI_ID = 201; /*[11000]*/ 1030 /** @stable ICU 4.6 */ 1031 public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/ 1032 /** @stable ICU 4.6 */ 1033 public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/ 1034 /** @stable ICU 4.6 */ 1035 public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/ 1036 /** @stable ICU 4.6 */ 1037 public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/ 1038 /** @stable ICU 4.6 */ 1039 public static final int EMOTICONS_ID = 206; /*[1F600]*/ 1040 /** @stable ICU 4.6 */ 1041 public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/ 1042 /** @stable ICU 4.6 */ 1043 public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/ 1044 /** @stable ICU 4.6 */ 1045 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/ 1046 1047 /* New blocks in Unicode 6.1 */ 1048 1049 /** @stable ICU 49 */ 1050 public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/ 1051 /** @stable ICU 49 */ 1052 public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/ 1053 /** @stable ICU 49 */ 1054 public static final int CHAKMA_ID = 212; /*[11100]*/ 1055 /** @stable ICU 49 */ 1056 public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/ 1057 /** @stable ICU 49 */ 1058 public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/ 1059 /** @stable ICU 49 */ 1060 public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/ 1061 /** @stable ICU 49 */ 1062 public static final int MIAO_ID = 216; /*[16F00]*/ 1063 /** @stable ICU 49 */ 1064 public static final int SHARADA_ID = 217; /*[11180]*/ 1065 /** @stable ICU 49 */ 1066 public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/ 1067 /** @stable ICU 49 */ 1068 public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/ 1069 /** @stable ICU 49 */ 1070 public static final int TAKRI_ID = 220; /*[11680]*/ 1071 1072 /* New blocks in Unicode 7.0 */ 1073 1074 /** @stable ICU 54 */ 1075 public static final int BASSA_VAH_ID = 221; /*[16AD0]*/ 1076 /** @stable ICU 54 */ 1077 public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/ 1078 /** @stable ICU 54 */ 1079 public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/ 1080 /** @stable ICU 54 */ 1081 public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/ 1082 /** @stable ICU 54 */ 1083 public static final int DUPLOYAN_ID = 225; /*[1BC00]*/ 1084 /** @stable ICU 54 */ 1085 public static final int ELBASAN_ID = 226; /*[10500]*/ 1086 /** @stable ICU 54 */ 1087 public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/ 1088 /** @stable ICU 54 */ 1089 public static final int GRANTHA_ID = 228; /*[11300]*/ 1090 /** @stable ICU 54 */ 1091 public static final int KHOJKI_ID = 229; /*[11200]*/ 1092 /** @stable ICU 54 */ 1093 public static final int KHUDAWADI_ID = 230; /*[112B0]*/ 1094 /** @stable ICU 54 */ 1095 public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/ 1096 /** @stable ICU 54 */ 1097 public static final int LINEAR_A_ID = 232; /*[10600]*/ 1098 /** @stable ICU 54 */ 1099 public static final int MAHAJANI_ID = 233; /*[11150]*/ 1100 /** @stable ICU 54 */ 1101 public static final int MANICHAEAN_ID = 234; /*[10AC0]*/ 1102 /** @stable ICU 54 */ 1103 public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/ 1104 /** @stable ICU 54 */ 1105 public static final int MODI_ID = 236; /*[11600]*/ 1106 /** @stable ICU 54 */ 1107 public static final int MRO_ID = 237; /*[16A40]*/ 1108 /** @stable ICU 54 */ 1109 public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/ 1110 /** @stable ICU 54 */ 1111 public static final int NABATAEAN_ID = 239; /*[10880]*/ 1112 /** @stable ICU 54 */ 1113 public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/ 1114 /** @stable ICU 54 */ 1115 public static final int OLD_PERMIC_ID = 241; /*[10350]*/ 1116 /** @stable ICU 54 */ 1117 public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/ 1118 /** @stable ICU 54 */ 1119 public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/ 1120 /** @stable ICU 54 */ 1121 public static final int PALMYRENE_ID = 244; /*[10860]*/ 1122 /** @stable ICU 54 */ 1123 public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/ 1124 /** @stable ICU 54 */ 1125 public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/ 1126 /** @stable ICU 54 */ 1127 public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/ 1128 /** @stable ICU 54 */ 1129 public static final int SIDDHAM_ID = 248; /*[11580]*/ 1130 /** @stable ICU 54 */ 1131 public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/ 1132 /** @stable ICU 54 */ 1133 public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/ 1134 /** @stable ICU 54 */ 1135 public static final int TIRHUTA_ID = 251; /*[11480]*/ 1136 /** @stable ICU 54 */ 1137 public static final int WARANG_CITI_ID = 252; /*[118A0]*/ 1138 1139 /* New blocks in Unicode 8.0 */ 1140 1141 /** @stable ICU 56 */ 1142 public static final int AHOM_ID = 253; /*[11700]*/ 1143 /** @stable ICU 56 */ 1144 public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/ 1145 /** @stable ICU 56 */ 1146 public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/ 1147 /** @stable ICU 56 */ 1148 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/ 1149 /** @stable ICU 56 */ 1150 public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/ 1151 /** @stable ICU 56 */ 1152 public static final int HATRAN_ID = 258; /*[108E0]*/ 1153 /** @stable ICU 56 */ 1154 public static final int MULTANI_ID = 259; /*[11280]*/ 1155 /** @stable ICU 56 */ 1156 public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/ 1157 /** @stable ICU 56 */ 1158 public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/ 1159 /** @stable ICU 56 */ 1160 public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/ 1161 1162 /* New blocks in Unicode 9.0 */ 1163 1164 /** @stable ICU 58 */ 1165 public static final int ADLAM_ID = 263; /*[1E900]*/ 1166 /** @stable ICU 58 */ 1167 public static final int BHAIKSUKI_ID = 264; /*[11C00]*/ 1168 /** @stable ICU 58 */ 1169 public static final int CYRILLIC_EXTENDED_C_ID = 265; /*[1C80]*/ 1170 /** @stable ICU 58 */ 1171 public static final int GLAGOLITIC_SUPPLEMENT_ID = 266; /*[1E000]*/ 1172 /** @stable ICU 58 */ 1173 public static final int IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID = 267; /*[16FE0]*/ 1174 /** @stable ICU 58 */ 1175 public static final int MARCHEN_ID = 268; /*[11C70]*/ 1176 /** @stable ICU 58 */ 1177 public static final int MONGOLIAN_SUPPLEMENT_ID = 269; /*[11660]*/ 1178 /** @stable ICU 58 */ 1179 public static final int NEWA_ID = 270; /*[11400]*/ 1180 /** @stable ICU 58 */ 1181 public static final int OSAGE_ID = 271; /*[104B0]*/ 1182 /** @stable ICU 58 */ 1183 public static final int TANGUT_ID = 272; /*[17000]*/ 1184 /** @stable ICU 58 */ 1185 public static final int TANGUT_COMPONENTS_ID = 273; /*[18800]*/ 1186 1187 // New blocks in Unicode 10.0 1188 1189 /** @stable ICU 60 */ 1190 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID = 274; /*[2CEB0]*/ 1191 /** @stable ICU 60 */ 1192 public static final int KANA_EXTENDED_A_ID = 275; /*[1B100]*/ 1193 /** @stable ICU 60 */ 1194 public static final int MASARAM_GONDI_ID = 276; /*[11D00]*/ 1195 /** @stable ICU 60 */ 1196 public static final int NUSHU_ID = 277; /*[1B170]*/ 1197 /** @stable ICU 60 */ 1198 public static final int SOYOMBO_ID = 278; /*[11A50]*/ 1199 /** @stable ICU 60 */ 1200 public static final int SYRIAC_SUPPLEMENT_ID = 279; /*[0860]*/ 1201 /** @stable ICU 60 */ 1202 public static final int ZANABAZAR_SQUARE_ID = 280; /*[11A00]*/ 1203 1204 // New blocks in Unicode 11.0 1205 1206 /** @stable ICU 62 */ 1207 public static final int CHESS_SYMBOLS_ID = 281; /*[1FA00]*/ 1208 /** @stable ICU 62 */ 1209 public static final int DOGRA_ID = 282; /*[11800]*/ 1210 /** @stable ICU 62 */ 1211 public static final int GEORGIAN_EXTENDED_ID = 283; /*[1C90]*/ 1212 /** @stable ICU 62 */ 1213 public static final int GUNJALA_GONDI_ID = 284; /*[11D60]*/ 1214 /** @stable ICU 62 */ 1215 public static final int HANIFI_ROHINGYA_ID = 285; /*[10D00]*/ 1216 /** @stable ICU 62 */ 1217 public static final int INDIC_SIYAQ_NUMBERS_ID = 286; /*[1EC70]*/ 1218 /** @stable ICU 62 */ 1219 public static final int MAKASAR_ID = 287; /*[11EE0]*/ 1220 /** @stable ICU 62 */ 1221 public static final int MAYAN_NUMERALS_ID = 288; /*[1D2E0]*/ 1222 /** @stable ICU 62 */ 1223 public static final int MEDEFAIDRIN_ID = 289; /*[16E40]*/ 1224 /** @stable ICU 62 */ 1225 public static final int OLD_SOGDIAN_ID = 290; /*[10F00]*/ 1226 /** @stable ICU 62 */ 1227 public static final int SOGDIAN_ID = 291; /*[10F30]*/ 1228 1229 // New blocks in Unicode 12.0 1230 1231 /** @stable ICU 64 */ 1232 public static final int EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID = 292; /*[13430]*/ 1233 /** @stable ICU 64 */ 1234 public static final int ELYMAIC_ID = 293; /*[10FE0]*/ 1235 /** @stable ICU 64 */ 1236 public static final int NANDINAGARI_ID = 294; /*[119A0]*/ 1237 /** @stable ICU 64 */ 1238 public static final int NYIAKENG_PUACHUE_HMONG_ID = 295; /*[1E100]*/ 1239 /** @stable ICU 64 */ 1240 public static final int OTTOMAN_SIYAQ_NUMBERS_ID = 296; /*[1ED00]*/ 1241 /** @stable ICU 64 */ 1242 public static final int SMALL_KANA_EXTENSION_ID = 297; /*[1B130]*/ 1243 /** @stable ICU 64 */ 1244 public static final int SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID = 298; /*[1FA70]*/ 1245 /** @stable ICU 64 */ 1246 public static final int TAMIL_SUPPLEMENT_ID = 299; /*[11FC0]*/ 1247 /** @stable ICU 64 */ 1248 public static final int WANCHO_ID = 300; /*[1E2C0]*/ 1249 1250 // New blocks in Unicode 13.0 1251 1252 /** @stable ICU 66 */ 1253 public static final int CHORASMIAN_ID = 301; /*[10FB0]*/ 1254 /** @stable ICU 66 */ 1255 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID = 302; /*[30000]*/ 1256 /** @stable ICU 66 */ 1257 public static final int DIVES_AKURU_ID = 303; /*[11900]*/ 1258 /** @stable ICU 66 */ 1259 public static final int KHITAN_SMALL_SCRIPT_ID = 304; /*[18B00]*/ 1260 /** @stable ICU 66 */ 1261 public static final int LISU_SUPPLEMENT_ID = 305; /*[11FB0]*/ 1262 /** @stable ICU 66 */ 1263 public static final int SYMBOLS_FOR_LEGACY_COMPUTING_ID = 306; /*[1FB00]*/ 1264 /** @stable ICU 66 */ 1265 public static final int TANGUT_SUPPLEMENT_ID = 307; /*[18D00]*/ 1266 /** @stable ICU 66 */ 1267 public static final int YEZIDI_ID = 308; /*[10E80]*/ 1268 1269 // New blocks in Unicode 14.0 1270 1271 /** @stable ICU 70 */ 1272 public static final int ARABIC_EXTENDED_B_ID = 309; /*[0870]*/ 1273 /** @stable ICU 70 */ 1274 public static final int CYPRO_MINOAN_ID = 310; /*[12F90]*/ 1275 /** @stable ICU 70 */ 1276 public static final int ETHIOPIC_EXTENDED_B_ID = 311; /*[1E7E0]*/ 1277 /** @stable ICU 70 */ 1278 public static final int KANA_EXTENDED_B_ID = 312; /*[1AFF0]*/ 1279 /** @stable ICU 70 */ 1280 public static final int LATIN_EXTENDED_F_ID = 313; /*[10780]*/ 1281 /** @stable ICU 70 */ 1282 public static final int LATIN_EXTENDED_G_ID = 314; /*[1DF00]*/ 1283 /** @stable ICU 70 */ 1284 public static final int OLD_UYGHUR_ID = 315; /*[10F70]*/ 1285 /** @stable ICU 70 */ 1286 public static final int TANGSA_ID = 316; /*[16A70]*/ 1287 /** @stable ICU 70 */ 1288 public static final int TOTO_ID = 317; /*[1E290]*/ 1289 /** @stable ICU 70 */ 1290 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A_ID = 318; /*[11AB0]*/ 1291 /** @stable ICU 70 */ 1292 public static final int VITHKUQI_ID = 319; /*[10570]*/ 1293 /** @stable ICU 70 */ 1294 public static final int ZNAMENNY_MUSICAL_NOTATION_ID = 320; /*[1CF00]*/ 1295 1296 // New blocks in Unicode 15.0 1297 1298 /** @stable ICU 72 */ 1299 public static final int ARABIC_EXTENDED_C_ID = 321; /*[10EC0]*/ 1300 /** @stable ICU 72 */ 1301 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H_ID = 322; /*[31350]*/ 1302 /** @stable ICU 72 */ 1303 public static final int CYRILLIC_EXTENDED_D_ID = 323; /*[1E030]*/ 1304 /** @stable ICU 72 */ 1305 public static final int DEVANAGARI_EXTENDED_A_ID = 324; /*[11B00]*/ 1306 /** @stable ICU 72 */ 1307 public static final int KAKTOVIK_NUMERALS_ID = 325; /*[1D2C0]*/ 1308 /** @stable ICU 72 */ 1309 public static final int KAWI_ID = 326; /*[11F00]*/ 1310 /** @stable ICU 72 */ 1311 public static final int NAG_MUNDARI_ID = 327; /*[1E4D0]*/ 1312 1313 // New block in Unicode 15.1 1314 1315 /** @stable ICU 74 */ 1316 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I_ID = 328; /*[2EBF0]*/ 1317 1318 /** 1319 * One more than the highest normal UnicodeBlock value. 1320 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BLOCK). 1321 * 1322 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 1323 */ 1324 @Deprecated 1325 public static final int COUNT = 329; 1326 1327 // blocks objects --------------------------------------------------- 1328 1329 /** 1330 * Array of UnicodeBlocks, for easy access in getInstance(int) 1331 */ 1332 private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT]; 1333 1334 /** 1335 * @stable ICU 2.6 1336 */ 1337 public static final UnicodeBlock NO_BLOCK 1338 = new UnicodeBlock("NO_BLOCK", 0); 1339 1340 /** 1341 * @stable ICU 2.4 1342 */ 1343 public static final UnicodeBlock BASIC_LATIN 1344 = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID); 1345 /** 1346 * @stable ICU 2.4 1347 */ 1348 public static final UnicodeBlock LATIN_1_SUPPLEMENT 1349 = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID); 1350 /** 1351 * @stable ICU 2.4 1352 */ 1353 public static final UnicodeBlock LATIN_EXTENDED_A 1354 = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID); 1355 /** 1356 * @stable ICU 2.4 1357 */ 1358 public static final UnicodeBlock LATIN_EXTENDED_B 1359 = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID); 1360 /** 1361 * @stable ICU 2.4 1362 */ 1363 public static final UnicodeBlock IPA_EXTENSIONS 1364 = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID); 1365 /** 1366 * @stable ICU 2.4 1367 */ 1368 public static final UnicodeBlock SPACING_MODIFIER_LETTERS 1369 = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID); 1370 /** 1371 * @stable ICU 2.4 1372 */ 1373 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS 1374 = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID); 1375 /** 1376 * Unicode 3.2 renames this block to "Greek and Coptic". 1377 * @stable ICU 2.4 1378 */ 1379 public static final UnicodeBlock GREEK 1380 = new UnicodeBlock("GREEK", GREEK_ID); 1381 /** 1382 * @stable ICU 2.4 1383 */ 1384 public static final UnicodeBlock CYRILLIC 1385 = new UnicodeBlock("CYRILLIC", CYRILLIC_ID); 1386 /** 1387 * @stable ICU 2.4 1388 */ 1389 public static final UnicodeBlock ARMENIAN 1390 = new UnicodeBlock("ARMENIAN", ARMENIAN_ID); 1391 /** 1392 * @stable ICU 2.4 1393 */ 1394 public static final UnicodeBlock HEBREW 1395 = new UnicodeBlock("HEBREW", HEBREW_ID); 1396 /** 1397 * @stable ICU 2.4 1398 */ 1399 public static final UnicodeBlock ARABIC 1400 = new UnicodeBlock("ARABIC", ARABIC_ID); 1401 /** 1402 * @stable ICU 2.4 1403 */ 1404 public static final UnicodeBlock SYRIAC 1405 = new UnicodeBlock("SYRIAC", SYRIAC_ID); 1406 /** 1407 * @stable ICU 2.4 1408 */ 1409 public static final UnicodeBlock THAANA 1410 = new UnicodeBlock("THAANA", THAANA_ID); 1411 /** 1412 * @stable ICU 2.4 1413 */ 1414 public static final UnicodeBlock DEVANAGARI 1415 = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID); 1416 /** 1417 * @stable ICU 2.4 1418 */ 1419 public static final UnicodeBlock BENGALI 1420 = new UnicodeBlock("BENGALI", BENGALI_ID); 1421 /** 1422 * @stable ICU 2.4 1423 */ 1424 public static final UnicodeBlock GURMUKHI 1425 = new UnicodeBlock("GURMUKHI", GURMUKHI_ID); 1426 /** 1427 * @stable ICU 2.4 1428 */ 1429 public static final UnicodeBlock GUJARATI 1430 = new UnicodeBlock("GUJARATI", GUJARATI_ID); 1431 /** 1432 * @stable ICU 2.4 1433 */ 1434 public static final UnicodeBlock ORIYA 1435 = new UnicodeBlock("ORIYA", ORIYA_ID); 1436 /** 1437 * @stable ICU 2.4 1438 */ 1439 public static final UnicodeBlock TAMIL 1440 = new UnicodeBlock("TAMIL", TAMIL_ID); 1441 /** 1442 * @stable ICU 2.4 1443 */ 1444 public static final UnicodeBlock TELUGU 1445 = new UnicodeBlock("TELUGU", TELUGU_ID); 1446 /** 1447 * @stable ICU 2.4 1448 */ 1449 public static final UnicodeBlock KANNADA 1450 = new UnicodeBlock("KANNADA", KANNADA_ID); 1451 /** 1452 * @stable ICU 2.4 1453 */ 1454 public static final UnicodeBlock MALAYALAM 1455 = new UnicodeBlock("MALAYALAM", MALAYALAM_ID); 1456 /** 1457 * @stable ICU 2.4 1458 */ 1459 public static final UnicodeBlock SINHALA 1460 = new UnicodeBlock("SINHALA", SINHALA_ID); 1461 /** 1462 * @stable ICU 2.4 1463 */ 1464 public static final UnicodeBlock THAI 1465 = new UnicodeBlock("THAI", THAI_ID); 1466 /** 1467 * @stable ICU 2.4 1468 */ 1469 public static final UnicodeBlock LAO 1470 = new UnicodeBlock("LAO", LAO_ID); 1471 /** 1472 * @stable ICU 2.4 1473 */ 1474 public static final UnicodeBlock TIBETAN 1475 = new UnicodeBlock("TIBETAN", TIBETAN_ID); 1476 /** 1477 * @stable ICU 2.4 1478 */ 1479 public static final UnicodeBlock MYANMAR 1480 = new UnicodeBlock("MYANMAR", MYANMAR_ID); 1481 /** 1482 * @stable ICU 2.4 1483 */ 1484 public static final UnicodeBlock GEORGIAN 1485 = new UnicodeBlock("GEORGIAN", GEORGIAN_ID); 1486 /** 1487 * @stable ICU 2.4 1488 */ 1489 public static final UnicodeBlock HANGUL_JAMO 1490 = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID); 1491 /** 1492 * @stable ICU 2.4 1493 */ 1494 public static final UnicodeBlock ETHIOPIC 1495 = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID); 1496 /** 1497 * @stable ICU 2.4 1498 */ 1499 public static final UnicodeBlock CHEROKEE 1500 = new UnicodeBlock("CHEROKEE", CHEROKEE_ID); 1501 /** 1502 * @stable ICU 2.4 1503 */ 1504 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 1505 = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1506 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID); 1507 /** 1508 * @stable ICU 2.4 1509 */ 1510 public static final UnicodeBlock OGHAM 1511 = new UnicodeBlock("OGHAM", OGHAM_ID); 1512 /** 1513 * @stable ICU 2.4 1514 */ 1515 public static final UnicodeBlock RUNIC 1516 = new UnicodeBlock("RUNIC", RUNIC_ID); 1517 /** 1518 * @stable ICU 2.4 1519 */ 1520 public static final UnicodeBlock KHMER 1521 = new UnicodeBlock("KHMER", KHMER_ID); 1522 /** 1523 * @stable ICU 2.4 1524 */ 1525 public static final UnicodeBlock MONGOLIAN 1526 = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID); 1527 /** 1528 * @stable ICU 2.4 1529 */ 1530 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL 1531 = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID); 1532 /** 1533 * @stable ICU 2.4 1534 */ 1535 public static final UnicodeBlock GREEK_EXTENDED 1536 = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID); 1537 /** 1538 * @stable ICU 2.4 1539 */ 1540 public static final UnicodeBlock GENERAL_PUNCTUATION 1541 = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID); 1542 /** 1543 * @stable ICU 2.4 1544 */ 1545 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS 1546 = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID); 1547 /** 1548 * @stable ICU 2.4 1549 */ 1550 public static final UnicodeBlock CURRENCY_SYMBOLS 1551 = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID); 1552 /** 1553 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 1554 * Symbols". 1555 * @stable ICU 2.4 1556 */ 1557 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS 1558 = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID); 1559 /** 1560 * @stable ICU 2.4 1561 */ 1562 public static final UnicodeBlock LETTERLIKE_SYMBOLS 1563 = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID); 1564 /** 1565 * @stable ICU 2.4 1566 */ 1567 public static final UnicodeBlock NUMBER_FORMS 1568 = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID); 1569 /** 1570 * @stable ICU 2.4 1571 */ 1572 public static final UnicodeBlock ARROWS 1573 = new UnicodeBlock("ARROWS", ARROWS_ID); 1574 /** 1575 * @stable ICU 2.4 1576 */ 1577 public static final UnicodeBlock MATHEMATICAL_OPERATORS 1578 = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID); 1579 /** 1580 * @stable ICU 2.4 1581 */ 1582 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL 1583 = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID); 1584 /** 1585 * @stable ICU 2.4 1586 */ 1587 public static final UnicodeBlock CONTROL_PICTURES 1588 = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID); 1589 /** 1590 * @stable ICU 2.4 1591 */ 1592 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION 1593 = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID); 1594 /** 1595 * @stable ICU 2.4 1596 */ 1597 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS 1598 = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID); 1599 /** 1600 * @stable ICU 2.4 1601 */ 1602 public static final UnicodeBlock BOX_DRAWING 1603 = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID); 1604 /** 1605 * @stable ICU 2.4 1606 */ 1607 public static final UnicodeBlock BLOCK_ELEMENTS 1608 = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID); 1609 /** 1610 * @stable ICU 2.4 1611 */ 1612 public static final UnicodeBlock GEOMETRIC_SHAPES 1613 = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID); 1614 /** 1615 * @stable ICU 2.4 1616 */ 1617 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS 1618 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID); 1619 /** 1620 * @stable ICU 2.4 1621 */ 1622 public static final UnicodeBlock DINGBATS 1623 = new UnicodeBlock("DINGBATS", DINGBATS_ID); 1624 /** 1625 * @stable ICU 2.4 1626 */ 1627 public static final UnicodeBlock BRAILLE_PATTERNS 1628 = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID); 1629 /** 1630 * @stable ICU 2.4 1631 */ 1632 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT 1633 = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID); 1634 /** 1635 * @stable ICU 2.4 1636 */ 1637 public static final UnicodeBlock KANGXI_RADICALS 1638 = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID); 1639 /** 1640 * @stable ICU 2.4 1641 */ 1642 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS 1643 = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1644 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID); 1645 /** 1646 * @stable ICU 2.4 1647 */ 1648 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION 1649 = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID); 1650 /** 1651 * @stable ICU 2.4 1652 */ 1653 public static final UnicodeBlock HIRAGANA 1654 = new UnicodeBlock("HIRAGANA", HIRAGANA_ID); 1655 /** 1656 * @stable ICU 2.4 1657 */ 1658 public static final UnicodeBlock KATAKANA 1659 = new UnicodeBlock("KATAKANA", KATAKANA_ID); 1660 /** 1661 * @stable ICU 2.4 1662 */ 1663 public static final UnicodeBlock BOPOMOFO 1664 = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID); 1665 /** 1666 * @stable ICU 2.4 1667 */ 1668 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO 1669 = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID); 1670 /** 1671 * @stable ICU 2.4 1672 */ 1673 public static final UnicodeBlock KANBUN 1674 = new UnicodeBlock("KANBUN", KANBUN_ID); 1675 /** 1676 * @stable ICU 2.4 1677 */ 1678 public static final UnicodeBlock BOPOMOFO_EXTENDED 1679 = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID); 1680 /** 1681 * @stable ICU 2.4 1682 */ 1683 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS 1684 = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1685 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID); 1686 /** 1687 * @stable ICU 2.4 1688 */ 1689 public static final UnicodeBlock CJK_COMPATIBILITY 1690 = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID); 1691 /** 1692 * @stable ICU 2.4 1693 */ 1694 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 1695 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1696 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID); 1697 /** 1698 * @stable ICU 2.4 1699 */ 1700 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS 1701 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID); 1702 /** 1703 * @stable ICU 2.4 1704 */ 1705 public static final UnicodeBlock YI_SYLLABLES 1706 = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID); 1707 /** 1708 * @stable ICU 2.4 1709 */ 1710 public static final UnicodeBlock YI_RADICALS 1711 = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID); 1712 /** 1713 * @stable ICU 2.4 1714 */ 1715 public static final UnicodeBlock HANGUL_SYLLABLES 1716 = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID); 1717 /** 1718 * @stable ICU 2.4 1719 */ 1720 public static final UnicodeBlock HIGH_SURROGATES 1721 = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID); 1722 /** 1723 * @stable ICU 2.4 1724 */ 1725 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES 1726 = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID); 1727 /** 1728 * @stable ICU 2.4 1729 */ 1730 public static final UnicodeBlock LOW_SURROGATES 1731 = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID); 1732 /** 1733 * Same as public static final int PRIVATE_USE. 1734 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1735 * and multiple code point ranges had this block. 1736 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1737 * and adds separate blocks for the supplementary PUAs. 1738 * @stable ICU 2.4 1739 */ 1740 public static final UnicodeBlock PRIVATE_USE_AREA 1741 = new UnicodeBlock("PRIVATE_USE_AREA", 78); 1742 /** 1743 * Same as public static final int PRIVATE_USE_AREA. 1744 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1745 * and multiple code point ranges had this block. 1746 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1747 * and adds separate blocks for the supplementary PUAs. 1748 * @stable ICU 2.4 1749 */ 1750 public static final UnicodeBlock PRIVATE_USE 1751 = PRIVATE_USE_AREA; 1752 /** 1753 * @stable ICU 2.4 1754 */ 1755 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS 1756 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID); 1757 /** 1758 * @stable ICU 2.4 1759 */ 1760 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS 1761 = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID); 1762 /** 1763 * @stable ICU 2.4 1764 */ 1765 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A 1766 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID); 1767 /** 1768 * @stable ICU 2.4 1769 */ 1770 public static final UnicodeBlock COMBINING_HALF_MARKS 1771 = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID); 1772 /** 1773 * @stable ICU 2.4 1774 */ 1775 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS 1776 = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID); 1777 /** 1778 * @stable ICU 2.4 1779 */ 1780 public static final UnicodeBlock SMALL_FORM_VARIANTS 1781 = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID); 1782 /** 1783 * @stable ICU 2.4 1784 */ 1785 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B 1786 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID); 1787 /** 1788 * @stable ICU 2.4 1789 */ 1790 public static final UnicodeBlock SPECIALS 1791 = new UnicodeBlock("SPECIALS", SPECIALS_ID); 1792 /** 1793 * @stable ICU 2.4 1794 */ 1795 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS 1796 = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID); 1797 /** 1798 * @stable ICU 2.4 1799 */ 1800 public static final UnicodeBlock OLD_ITALIC 1801 = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID); 1802 /** 1803 * @stable ICU 2.4 1804 */ 1805 public static final UnicodeBlock GOTHIC 1806 = new UnicodeBlock("GOTHIC", GOTHIC_ID); 1807 /** 1808 * @stable ICU 2.4 1809 */ 1810 public static final UnicodeBlock DESERET 1811 = new UnicodeBlock("DESERET", DESERET_ID); 1812 /** 1813 * @stable ICU 2.4 1814 */ 1815 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS 1816 = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID); 1817 /** 1818 * @stable ICU 2.4 1819 */ 1820 public static final UnicodeBlock MUSICAL_SYMBOLS 1821 = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID); 1822 /** 1823 * @stable ICU 2.4 1824 */ 1825 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS 1826 = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1827 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID); 1828 /** 1829 * @stable ICU 2.4 1830 */ 1831 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B 1832 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1833 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID); 1834 /** 1835 * @stable ICU 2.4 1836 */ 1837 public static final UnicodeBlock 1838 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT 1839 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1840 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID); 1841 /** 1842 * @stable ICU 2.4 1843 */ 1844 public static final UnicodeBlock TAGS 1845 = new UnicodeBlock("TAGS", TAGS_ID); 1846 1847 // New blocks in Unicode 3.2 1848 1849 /** 1850 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1851 * @stable ICU 2.4 1852 */ 1853 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY 1854 = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID); 1855 /** 1856 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1857 * @stable ICU 3.0 1858 */ 1859 public static final UnicodeBlock CYRILLIC_SUPPLEMENT 1860 = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID); 1861 /** 1862 * @stable ICU 2.4 1863 */ 1864 public static final UnicodeBlock TAGALOG 1865 = new UnicodeBlock("TAGALOG", TAGALOG_ID); 1866 /** 1867 * @stable ICU 2.4 1868 */ 1869 public static final UnicodeBlock HANUNOO 1870 = new UnicodeBlock("HANUNOO", HANUNOO_ID); 1871 /** 1872 * @stable ICU 2.4 1873 */ 1874 public static final UnicodeBlock BUHID 1875 = new UnicodeBlock("BUHID", BUHID_ID); 1876 /** 1877 * @stable ICU 2.4 1878 */ 1879 public static final UnicodeBlock TAGBANWA 1880 = new UnicodeBlock("TAGBANWA", TAGBANWA_ID); 1881 /** 1882 * @stable ICU 2.4 1883 */ 1884 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A 1885 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1886 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID); 1887 /** 1888 * @stable ICU 2.4 1889 */ 1890 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A 1891 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID); 1892 /** 1893 * @stable ICU 2.4 1894 */ 1895 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B 1896 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID); 1897 /** 1898 * @stable ICU 2.4 1899 */ 1900 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B 1901 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1902 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID); 1903 /** 1904 * @stable ICU 2.4 1905 */ 1906 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS 1907 = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1908 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID); 1909 /** 1910 * @stable ICU 2.4 1911 */ 1912 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS 1913 = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID); 1914 /** 1915 * @stable ICU 2.4 1916 */ 1917 public static final UnicodeBlock VARIATION_SELECTORS 1918 = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID); 1919 /** 1920 * @stable ICU 2.4 1921 */ 1922 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A 1923 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1924 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID); 1925 /** 1926 * @stable ICU 2.4 1927 */ 1928 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B 1929 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1930 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID); 1931 1932 /** 1933 * @stable ICU 2.6 1934 */ 1935 public static final UnicodeBlock LIMBU 1936 = new UnicodeBlock("LIMBU", LIMBU_ID); 1937 /** 1938 * @stable ICU 2.6 1939 */ 1940 public static final UnicodeBlock TAI_LE 1941 = new UnicodeBlock("TAI_LE", TAI_LE_ID); 1942 /** 1943 * @stable ICU 2.6 1944 */ 1945 public static final UnicodeBlock KHMER_SYMBOLS 1946 = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID); 1947 1948 /** 1949 * @stable ICU 2.6 1950 */ 1951 public static final UnicodeBlock PHONETIC_EXTENSIONS 1952 = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID); 1953 1954 /** 1955 * @stable ICU 2.6 1956 */ 1957 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS 1958 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1959 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID); 1960 /** 1961 * @stable ICU 2.6 1962 */ 1963 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS 1964 = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID); 1965 /** 1966 * @stable ICU 2.6 1967 */ 1968 public static final UnicodeBlock LINEAR_B_SYLLABARY 1969 = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID); 1970 /** 1971 * @stable ICU 2.6 1972 */ 1973 public static final UnicodeBlock LINEAR_B_IDEOGRAMS 1974 = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID); 1975 /** 1976 * @stable ICU 2.6 1977 */ 1978 public static final UnicodeBlock AEGEAN_NUMBERS 1979 = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID); 1980 /** 1981 * @stable ICU 2.6 1982 */ 1983 public static final UnicodeBlock UGARITIC 1984 = new UnicodeBlock("UGARITIC", UGARITIC_ID); 1985 /** 1986 * @stable ICU 2.6 1987 */ 1988 public static final UnicodeBlock SHAVIAN 1989 = new UnicodeBlock("SHAVIAN", SHAVIAN_ID); 1990 /** 1991 * @stable ICU 2.6 1992 */ 1993 public static final UnicodeBlock OSMANYA 1994 = new UnicodeBlock("OSMANYA", OSMANYA_ID); 1995 /** 1996 * @stable ICU 2.6 1997 */ 1998 public static final UnicodeBlock CYPRIOT_SYLLABARY 1999 = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID); 2000 /** 2001 * @stable ICU 2.6 2002 */ 2003 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS 2004 = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID); 2005 2006 /** 2007 * @stable ICU 2.6 2008 */ 2009 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT 2010 = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID); 2011 2012 /* New blocks in Unicode 4.1 */ 2013 2014 /** 2015 * @stable ICU 3.4 2016 */ 2017 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2018 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2019 ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/ 2020 2021 /** 2022 * @stable ICU 3.4 2023 */ 2024 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2025 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/ 2026 2027 /** 2028 * @stable ICU 3.4 2029 */ 2030 public static final UnicodeBlock ARABIC_SUPPLEMENT = 2031 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/ 2032 2033 /** 2034 * @stable ICU 3.4 2035 */ 2036 public static final UnicodeBlock BUGINESE = 2037 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/ 2038 2039 /** 2040 * @stable ICU 3.4 2041 */ 2042 public static final UnicodeBlock CJK_STROKES = 2043 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/ 2044 2045 /** 2046 * @stable ICU 3.4 2047 */ 2048 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 2049 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 2050 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/ 2051 2052 /** 2053 * @stable ICU 3.4 2054 */ 2055 public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/ 2056 2057 /** 2058 * @stable ICU 3.4 2059 */ 2060 public static final UnicodeBlock ETHIOPIC_EXTENDED = 2061 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/ 2062 2063 /** 2064 * @stable ICU 3.4 2065 */ 2066 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 2067 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/ 2068 2069 /** 2070 * @stable ICU 3.4 2071 */ 2072 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 2073 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/ 2074 2075 /** 2076 * @stable ICU 3.4 2077 */ 2078 public static final UnicodeBlock GLAGOLITIC = 2079 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/ 2080 2081 /** 2082 * @stable ICU 3.4 2083 */ 2084 public static final UnicodeBlock KHAROSHTHI = 2085 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/ 2086 2087 /** 2088 * @stable ICU 3.4 2089 */ 2090 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2091 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/ 2092 2093 /** 2094 * @stable ICU 3.4 2095 */ 2096 public static final UnicodeBlock NEW_TAI_LUE = 2097 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/ 2098 2099 /** 2100 * @stable ICU 3.4 2101 */ 2102 public static final UnicodeBlock OLD_PERSIAN = 2103 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/ 2104 2105 /** 2106 * @stable ICU 3.4 2107 */ 2108 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 2109 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 2110 PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/ 2111 2112 /** 2113 * @stable ICU 3.4 2114 */ 2115 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 2116 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/ 2117 2118 /** 2119 * @stable ICU 3.4 2120 */ 2121 public static final UnicodeBlock SYLOTI_NAGRI = 2122 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/ 2123 2124 /** 2125 * @stable ICU 3.4 2126 */ 2127 public static final UnicodeBlock TIFINAGH = 2128 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/ 2129 2130 /** 2131 * @stable ICU 3.4 2132 */ 2133 public static final UnicodeBlock VERTICAL_FORMS = 2134 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/ 2135 2136 /** 2137 * @stable ICU 3.6 2138 */ 2139 public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/ 2140 /** 2141 * @stable ICU 3.6 2142 */ 2143 public static final UnicodeBlock BALINESE = 2144 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/ 2145 /** 2146 * @stable ICU 3.6 2147 */ 2148 public static final UnicodeBlock LATIN_EXTENDED_C = 2149 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/ 2150 /** 2151 * @stable ICU 3.6 2152 */ 2153 public static final UnicodeBlock LATIN_EXTENDED_D = 2154 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/ 2155 /** 2156 * @stable ICU 3.6 2157 */ 2158 public static final UnicodeBlock PHAGS_PA = 2159 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/ 2160 /** 2161 * @stable ICU 3.6 2162 */ 2163 public static final UnicodeBlock PHOENICIAN = 2164 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/ 2165 /** 2166 * @stable ICU 3.6 2167 */ 2168 public static final UnicodeBlock CUNEIFORM = 2169 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/ 2170 /** 2171 * @stable ICU 3.6 2172 */ 2173 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2174 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2175 CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/ 2176 /** 2177 * @stable ICU 3.6 2178 */ 2179 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2180 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/ 2181 2182 /** 2183 * @stable ICU 4.0 2184 */ 2185 public static final UnicodeBlock SUNDANESE = 2186 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */ 2187 2188 /** 2189 * @stable ICU 4.0 2190 */ 2191 public static final UnicodeBlock LEPCHA = 2192 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */ 2193 2194 /** 2195 * @stable ICU 4.0 2196 */ 2197 public static final UnicodeBlock OL_CHIKI = 2198 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */ 2199 2200 /** 2201 * @stable ICU 4.0 2202 */ 2203 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 2204 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */ 2205 2206 /** 2207 * @stable ICU 4.0 2208 */ 2209 public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */ 2210 2211 /** 2212 * @stable ICU 4.0 2213 */ 2214 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2215 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */ 2216 2217 /** 2218 * @stable ICU 4.0 2219 */ 2220 public static final UnicodeBlock SAURASHTRA = 2221 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */ 2222 2223 /** 2224 * @stable ICU 4.0 2225 */ 2226 public static final UnicodeBlock KAYAH_LI = 2227 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */ 2228 2229 /** 2230 * @stable ICU 4.0 2231 */ 2232 public static final UnicodeBlock REJANG = 2233 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */ 2234 2235 /** 2236 * @stable ICU 4.0 2237 */ 2238 public static final UnicodeBlock CHAM = 2239 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */ 2240 2241 /** 2242 * @stable ICU 4.0 2243 */ 2244 public static final UnicodeBlock ANCIENT_SYMBOLS = 2245 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */ 2246 2247 /** 2248 * @stable ICU 4.0 2249 */ 2250 public static final UnicodeBlock PHAISTOS_DISC = 2251 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */ 2252 2253 /** 2254 * @stable ICU 4.0 2255 */ 2256 public static final UnicodeBlock LYCIAN = 2257 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */ 2258 2259 /** 2260 * @stable ICU 4.0 2261 */ 2262 public static final UnicodeBlock CARIAN = 2263 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */ 2264 2265 /** 2266 * @stable ICU 4.0 2267 */ 2268 public static final UnicodeBlock LYDIAN = 2269 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */ 2270 2271 /** 2272 * @stable ICU 4.0 2273 */ 2274 public static final UnicodeBlock MAHJONG_TILES = 2275 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */ 2276 2277 /** 2278 * @stable ICU 4.0 2279 */ 2280 public static final UnicodeBlock DOMINO_TILES = 2281 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */ 2282 2283 /* New blocks in Unicode 5.2 */ 2284 2285 /** @stable ICU 4.4 */ 2286 public static final UnicodeBlock SAMARITAN = 2287 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/ 2288 /** @stable ICU 4.4 */ 2289 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 2290 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 2291 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/ 2292 /** @stable ICU 4.4 */ 2293 public static final UnicodeBlock TAI_THAM = 2294 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/ 2295 /** @stable ICU 4.4 */ 2296 public static final UnicodeBlock VEDIC_EXTENSIONS = 2297 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/ 2298 /** @stable ICU 4.4 */ 2299 public static final UnicodeBlock LISU = 2300 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/ 2301 /** @stable ICU 4.4 */ 2302 public static final UnicodeBlock BAMUM = 2303 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/ 2304 /** @stable ICU 4.4 */ 2305 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2306 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/ 2307 /** @stable ICU 4.4 */ 2308 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2309 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/ 2310 /** @stable ICU 4.4 */ 2311 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2312 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/ 2313 /** @stable ICU 4.4 */ 2314 public static final UnicodeBlock JAVANESE = 2315 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/ 2316 /** @stable ICU 4.4 */ 2317 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2318 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/ 2319 /** @stable ICU 4.4 */ 2320 public static final UnicodeBlock TAI_VIET = 2321 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/ 2322 /** @stable ICU 4.4 */ 2323 public static final UnicodeBlock MEETEI_MAYEK = 2324 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/ 2325 /** @stable ICU 4.4 */ 2326 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2327 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/ 2328 /** @stable ICU 4.4 */ 2329 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2330 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/ 2331 /** @stable ICU 4.4 */ 2332 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2333 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/ 2334 /** @stable ICU 4.4 */ 2335 public static final UnicodeBlock AVESTAN = 2336 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/ 2337 /** @stable ICU 4.4 */ 2338 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2339 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/ 2340 /** @stable ICU 4.4 */ 2341 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2342 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/ 2343 /** @stable ICU 4.4 */ 2344 public static final UnicodeBlock OLD_TURKIC = 2345 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/ 2346 /** @stable ICU 4.4 */ 2347 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2348 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/ 2349 /** @stable ICU 4.4 */ 2350 public static final UnicodeBlock KAITHI = 2351 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/ 2352 /** @stable ICU 4.4 */ 2353 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2354 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/ 2355 /** @stable ICU 4.4 */ 2356 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2357 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2358 ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/ 2359 /** @stable ICU 4.4 */ 2360 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2361 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2362 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/ 2363 /** @stable ICU 4.4 */ 2364 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2365 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2366 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/ 2367 2368 /* New blocks in Unicode 6.0 */ 2369 2370 /** @stable ICU 4.6 */ 2371 public static final UnicodeBlock MANDAIC = 2372 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/ 2373 /** @stable ICU 4.6 */ 2374 public static final UnicodeBlock BATAK = 2375 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/ 2376 /** @stable ICU 4.6 */ 2377 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2378 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/ 2379 /** @stable ICU 4.6 */ 2380 public static final UnicodeBlock BRAHMI = 2381 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/ 2382 /** @stable ICU 4.6 */ 2383 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2384 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/ 2385 /** @stable ICU 4.6 */ 2386 public static final UnicodeBlock KANA_SUPPLEMENT = 2387 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/ 2388 /** @stable ICU 4.6 */ 2389 public static final UnicodeBlock PLAYING_CARDS = 2390 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/ 2391 /** @stable ICU 4.6 */ 2392 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2393 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2394 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/ 2395 /** @stable ICU 4.6 */ 2396 public static final UnicodeBlock EMOTICONS = 2397 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/ 2398 /** @stable ICU 4.6 */ 2399 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2400 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/ 2401 /** @stable ICU 4.6 */ 2402 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2403 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/ 2404 /** @stable ICU 4.6 */ 2405 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2406 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2407 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/ 2408 2409 /* New blocks in Unicode 6.1 */ 2410 2411 /** @stable ICU 49 */ 2412 public static final UnicodeBlock ARABIC_EXTENDED_A = 2413 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/ 2414 /** @stable ICU 49 */ 2415 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2416 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/ 2417 /** @stable ICU 49 */ 2418 public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/ 2419 /** @stable ICU 49 */ 2420 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2421 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/ 2422 /** @stable ICU 49 */ 2423 public static final UnicodeBlock MEROITIC_CURSIVE = 2424 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/ 2425 /** @stable ICU 49 */ 2426 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2427 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/ 2428 /** @stable ICU 49 */ 2429 public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/ 2430 /** @stable ICU 49 */ 2431 public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/ 2432 /** @stable ICU 49 */ 2433 public static final UnicodeBlock SORA_SOMPENG = 2434 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/ 2435 /** @stable ICU 49 */ 2436 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2437 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/ 2438 /** @stable ICU 49 */ 2439 public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/ 2440 2441 /* New blocks in Unicode 7.0 */ 2442 2443 /** @stable ICU 54 */ 2444 public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/ 2445 /** @stable ICU 54 */ 2446 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2447 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/ 2448 /** @stable ICU 54 */ 2449 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2450 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/ 2451 /** @stable ICU 54 */ 2452 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2453 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/ 2454 /** @stable ICU 54 */ 2455 public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/ 2456 /** @stable ICU 54 */ 2457 public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/ 2458 /** @stable ICU 54 */ 2459 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2460 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/ 2461 /** @stable ICU 54 */ 2462 public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/ 2463 /** @stable ICU 54 */ 2464 public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/ 2465 /** @stable ICU 54 */ 2466 public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/ 2467 /** @stable ICU 54 */ 2468 public static final UnicodeBlock LATIN_EXTENDED_E = 2469 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/ 2470 /** @stable ICU 54 */ 2471 public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/ 2472 /** @stable ICU 54 */ 2473 public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/ 2474 /** @stable ICU 54 */ 2475 public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/ 2476 /** @stable ICU 54 */ 2477 public static final UnicodeBlock MENDE_KIKAKUI = 2478 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/ 2479 /** @stable ICU 54 */ 2480 public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/ 2481 /** @stable ICU 54 */ 2482 public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/ 2483 /** @stable ICU 54 */ 2484 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2485 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/ 2486 /** @stable ICU 54 */ 2487 public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/ 2488 /** @stable ICU 54 */ 2489 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2490 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/ 2491 /** @stable ICU 54 */ 2492 public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/ 2493 /** @stable ICU 54 */ 2494 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2495 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/ 2496 /** @stable ICU 54 */ 2497 public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/ 2498 /** @stable ICU 54 */ 2499 public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/ 2500 /** @stable ICU 54 */ 2501 public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/ 2502 /** @stable ICU 54 */ 2503 public static final UnicodeBlock PSALTER_PAHLAVI = 2504 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/ 2505 /** @stable ICU 54 */ 2506 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2507 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/ 2508 /** @stable ICU 54 */ 2509 public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/ 2510 /** @stable ICU 54 */ 2511 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2512 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/ 2513 /** @stable ICU 54 */ 2514 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2515 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/ 2516 /** @stable ICU 54 */ 2517 public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/ 2518 /** @stable ICU 54 */ 2519 public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/ 2520 2521 /* New blocks in Unicode 8.0 */ 2522 2523 /** @stable ICU 56 */ 2524 public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/ 2525 /** @stable ICU 56 */ 2526 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2527 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/ 2528 /** @stable ICU 56 */ 2529 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2530 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/ 2531 /** @stable ICU 56 */ 2532 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 2533 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 2534 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/ 2535 /** @stable ICU 56 */ 2536 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2537 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/ 2538 /** @stable ICU 56 */ 2539 public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/ 2540 /** @stable ICU 56 */ 2541 public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/ 2542 /** @stable ICU 56 */ 2543 public static final UnicodeBlock OLD_HUNGARIAN = 2544 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/ 2545 /** @stable ICU 56 */ 2546 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 2547 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 2548 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/ 2549 /** @stable ICU 56 */ 2550 public static final UnicodeBlock SUTTON_SIGNWRITING = 2551 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/ 2552 2553 /* New blocks in Unicode 9.0 */ 2554 2555 /** @stable ICU 58 */ 2556 public static final UnicodeBlock ADLAM = new UnicodeBlock("ADLAM", ADLAM_ID); /*[1E900]*/ 2557 /** @stable ICU 58 */ 2558 public static final UnicodeBlock BHAIKSUKI = new UnicodeBlock("BHAIKSUKI", BHAIKSUKI_ID); /*[11C00]*/ 2559 /** @stable ICU 58 */ 2560 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 2561 new UnicodeBlock("CYRILLIC_EXTENDED_C", CYRILLIC_EXTENDED_C_ID); /*[1C80]*/ 2562 /** @stable ICU 58 */ 2563 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 2564 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", GLAGOLITIC_SUPPLEMENT_ID); /*[1E000]*/ 2565 /** @stable ICU 58 */ 2566 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 2567 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID); /*[16FE0]*/ 2568 /** @stable ICU 58 */ 2569 public static final UnicodeBlock MARCHEN = new UnicodeBlock("MARCHEN", MARCHEN_ID); /*[11C70]*/ 2570 /** @stable ICU 58 */ 2571 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 2572 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", MONGOLIAN_SUPPLEMENT_ID); /*[11660]*/ 2573 /** @stable ICU 58 */ 2574 public static final UnicodeBlock NEWA = new UnicodeBlock("NEWA", NEWA_ID); /*[11400]*/ 2575 /** @stable ICU 58 */ 2576 public static final UnicodeBlock OSAGE = new UnicodeBlock("OSAGE", OSAGE_ID); /*[104B0]*/ 2577 /** @stable ICU 58 */ 2578 public static final UnicodeBlock TANGUT = new UnicodeBlock("TANGUT", TANGUT_ID); /*[17000]*/ 2579 /** @stable ICU 58 */ 2580 public static final UnicodeBlock TANGUT_COMPONENTS = 2581 new UnicodeBlock("TANGUT_COMPONENTS", TANGUT_COMPONENTS_ID); /*[18800]*/ 2582 2583 // New blocks in Unicode 10.0 2584 2585 /** @stable ICU 60 */ 2586 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 2587 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID); /*[2CEB0]*/ 2588 /** @stable ICU 60 */ 2589 public static final UnicodeBlock KANA_EXTENDED_A = 2590 new UnicodeBlock("KANA_EXTENDED_A", KANA_EXTENDED_A_ID); /*[1B100]*/ 2591 /** @stable ICU 60 */ 2592 public static final UnicodeBlock MASARAM_GONDI = 2593 new UnicodeBlock("MASARAM_GONDI", MASARAM_GONDI_ID); /*[11D00]*/ 2594 /** @stable ICU 60 */ 2595 public static final UnicodeBlock NUSHU = new UnicodeBlock("NUSHU", NUSHU_ID); /*[1B170]*/ 2596 /** @stable ICU 60 */ 2597 public static final UnicodeBlock SOYOMBO = new UnicodeBlock("SOYOMBO", SOYOMBO_ID); /*[11A50]*/ 2598 /** @stable ICU 60 */ 2599 public static final UnicodeBlock SYRIAC_SUPPLEMENT = 2600 new UnicodeBlock("SYRIAC_SUPPLEMENT", SYRIAC_SUPPLEMENT_ID); /*[0860]*/ 2601 /** @stable ICU 60 */ 2602 public static final UnicodeBlock ZANABAZAR_SQUARE = 2603 new UnicodeBlock("ZANABAZAR_SQUARE", ZANABAZAR_SQUARE_ID); /*[11A00]*/ 2604 2605 // New blocks in Unicode 11.0 2606 2607 /** @stable ICU 62 */ 2608 public static final UnicodeBlock CHESS_SYMBOLS = 2609 new UnicodeBlock("CHESS_SYMBOLS", CHESS_SYMBOLS_ID); /*[1FA00]*/ 2610 /** @stable ICU 62 */ 2611 public static final UnicodeBlock DOGRA = new UnicodeBlock("DOGRA", DOGRA_ID); /*[11800]*/ 2612 /** @stable ICU 62 */ 2613 public static final UnicodeBlock GEORGIAN_EXTENDED = 2614 new UnicodeBlock("GEORGIAN_EXTENDED", GEORGIAN_EXTENDED_ID); /*[1C90]*/ 2615 /** @stable ICU 62 */ 2616 public static final UnicodeBlock GUNJALA_GONDI = 2617 new UnicodeBlock("GUNJALA_GONDI", GUNJALA_GONDI_ID); /*[11D60]*/ 2618 /** @stable ICU 62 */ 2619 public static final UnicodeBlock HANIFI_ROHINGYA = 2620 new UnicodeBlock("HANIFI_ROHINGYA", HANIFI_ROHINGYA_ID); /*[10D00]*/ 2621 /** @stable ICU 62 */ 2622 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS = 2623 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", INDIC_SIYAQ_NUMBERS_ID); /*[1EC70]*/ 2624 /** @stable ICU 62 */ 2625 public static final UnicodeBlock MAKASAR = new UnicodeBlock("MAKASAR", MAKASAR_ID); /*[11EE0]*/ 2626 /** @stable ICU 62 */ 2627 public static final UnicodeBlock MAYAN_NUMERALS = 2628 new UnicodeBlock("MAYAN_NUMERALS", MAYAN_NUMERALS_ID); /*[1D2E0]*/ 2629 /** @stable ICU 62 */ 2630 public static final UnicodeBlock MEDEFAIDRIN = 2631 new UnicodeBlock("MEDEFAIDRIN", MEDEFAIDRIN_ID); /*[16E40]*/ 2632 /** @stable ICU 62 */ 2633 public static final UnicodeBlock OLD_SOGDIAN = 2634 new UnicodeBlock("OLD_SOGDIAN", OLD_SOGDIAN_ID); /*[10F00]*/ 2635 /** @stable ICU 62 */ 2636 public static final UnicodeBlock SOGDIAN = new UnicodeBlock("SOGDIAN", SOGDIAN_ID); /*[10F30]*/ 2637 2638 // New blocks in Unicode 12.0 2639 2640 /** @stable ICU 64 */ 2641 public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 2642 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID); /*[13430]*/ 2643 /** @stable ICU 64 */ 2644 public static final UnicodeBlock ELYMAIC = new UnicodeBlock("ELYMAIC", ELYMAIC_ID); /*[10FE0]*/ 2645 /** @stable ICU 64 */ 2646 public static final UnicodeBlock NANDINAGARI = 2647 new UnicodeBlock("NANDINAGARI", NANDINAGARI_ID); /*[119A0]*/ 2648 /** @stable ICU 64 */ 2649 public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG = 2650 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", NYIAKENG_PUACHUE_HMONG_ID); /*[1E100]*/ 2651 /** @stable ICU 64 */ 2652 public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS = 2653 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", OTTOMAN_SIYAQ_NUMBERS_ID); /*[1ED00]*/ 2654 /** @stable ICU 64 */ 2655 public static final UnicodeBlock SMALL_KANA_EXTENSION = 2656 new UnicodeBlock("SMALL_KANA_EXTENSION", SMALL_KANA_EXTENSION_ID); /*[1B130]*/ 2657 /** @stable ICU 64 */ 2658 public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 2659 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID); /*[1FA70]*/ 2660 /** @stable ICU 64 */ 2661 public static final UnicodeBlock TAMIL_SUPPLEMENT = 2662 new UnicodeBlock("TAMIL_SUPPLEMENT", TAMIL_SUPPLEMENT_ID); /*[11FC0]*/ 2663 /** @stable ICU 64 */ 2664 public static final UnicodeBlock WANCHO = new UnicodeBlock("WANCHO", WANCHO_ID); /*[1E2C0]*/ 2665 2666 // New blocks in Unicode 13.0 2667 2668 /** @stable ICU 66 */ 2669 public static final UnicodeBlock CHORASMIAN = 2670 new UnicodeBlock("CHORASMIAN", CHORASMIAN_ID); /*[10FB0]*/ 2671 /** @stable ICU 66 */ 2672 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 2673 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G", 2674 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID); /*[30000]*/ 2675 /** @stable ICU 66 */ 2676 public static final UnicodeBlock DIVES_AKURU = 2677 new UnicodeBlock("DIVES_AKURU", DIVES_AKURU_ID); /*[11900]*/ 2678 /** @stable ICU 66 */ 2679 public static final UnicodeBlock KHITAN_SMALL_SCRIPT = 2680 new UnicodeBlock("KHITAN_SMALL_SCRIPT", KHITAN_SMALL_SCRIPT_ID); /*[18B00]*/ 2681 /** @stable ICU 66 */ 2682 public static final UnicodeBlock LISU_SUPPLEMENT = 2683 new UnicodeBlock("LISU_SUPPLEMENT", LISU_SUPPLEMENT_ID); /*[11FB0]*/ 2684 /** @stable ICU 66 */ 2685 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING = 2686 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING", SYMBOLS_FOR_LEGACY_COMPUTING_ID); /*[1FB00]*/ 2687 /** @stable ICU 66 */ 2688 public static final UnicodeBlock TANGUT_SUPPLEMENT = 2689 new UnicodeBlock("TANGUT_SUPPLEMENT", TANGUT_SUPPLEMENT_ID); /*[18D00]*/ 2690 /** @stable ICU 66 */ 2691 public static final UnicodeBlock YEZIDI = new UnicodeBlock("YEZIDI", YEZIDI_ID); /*[10E80]*/ 2692 2693 // New blocks in Unicode 14.0 2694 2695 /** @stable ICU 70 */ 2696 public static final UnicodeBlock ARABIC_EXTENDED_B = 2697 new UnicodeBlock("ARABIC_EXTENDED_B", ARABIC_EXTENDED_B_ID); /*[0870]*/ 2698 /** @stable ICU 70 */ 2699 public static final UnicodeBlock CYPRO_MINOAN = 2700 new UnicodeBlock("CYPRO_MINOAN", CYPRO_MINOAN_ID); /*[12F90]*/ 2701 /** @stable ICU 70 */ 2702 public static final UnicodeBlock ETHIOPIC_EXTENDED_B = 2703 new UnicodeBlock("ETHIOPIC_EXTENDED_B", ETHIOPIC_EXTENDED_B_ID); /*[1E7E0]*/ 2704 /** @stable ICU 70 */ 2705 public static final UnicodeBlock KANA_EXTENDED_B = 2706 new UnicodeBlock("KANA_EXTENDED_B", KANA_EXTENDED_B_ID); /*[1AFF0]*/ 2707 /** @stable ICU 70 */ 2708 public static final UnicodeBlock LATIN_EXTENDED_F = 2709 new UnicodeBlock("LATIN_EXTENDED_F", LATIN_EXTENDED_F_ID); /*[10780]*/ 2710 /** @stable ICU 70 */ 2711 public static final UnicodeBlock LATIN_EXTENDED_G = 2712 new UnicodeBlock("LATIN_EXTENDED_G", LATIN_EXTENDED_G_ID); /*[1DF00]*/ 2713 /** @stable ICU 70 */ 2714 public static final UnicodeBlock OLD_UYGHUR = 2715 new UnicodeBlock("OLD_UYGHUR", OLD_UYGHUR_ID); /*[10F70]*/ 2716 /** @stable ICU 70 */ 2717 public static final UnicodeBlock TANGSA = new UnicodeBlock("TANGSA", TANGSA_ID); /*[16A70]*/ 2718 /** @stable ICU 70 */ 2719 public static final UnicodeBlock TOTO = new UnicodeBlock("TOTO", TOTO_ID); /*[1E290]*/ 2720 /** @stable ICU 70 */ 2721 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A = 2722 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A", 2723 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A_ID); /*[11AB0]*/ 2724 /** @stable ICU 70 */ 2725 public static final UnicodeBlock VITHKUQI = 2726 new UnicodeBlock("VITHKUQI", VITHKUQI_ID); /*[10570]*/ 2727 /** @stable ICU 70 */ 2728 public static final UnicodeBlock ZNAMENNY_MUSICAL_NOTATION = 2729 new UnicodeBlock("ZNAMENNY_MUSICAL_NOTATION", 2730 ZNAMENNY_MUSICAL_NOTATION_ID); /*[1CF00]*/ 2731 2732 // New blocks in Unicode 15.0 2733 2734 /** @stable ICU 72 */ 2735 public static final UnicodeBlock ARABIC_EXTENDED_C = 2736 new UnicodeBlock("ARABIC_EXTENDED_C", ARABIC_EXTENDED_C_ID); /*[10EC0]*/ 2737 /** @stable ICU 72 */ 2738 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H = 2739 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H", 2740 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H_ID); /*[31350]*/ 2741 /** @stable ICU 72 */ 2742 public static final UnicodeBlock CYRILLIC_EXTENDED_D = 2743 new UnicodeBlock("CYRILLIC_EXTENDED_D", CYRILLIC_EXTENDED_D_ID); /*[1E030]*/ 2744 /** @stable ICU 72 */ 2745 public static final UnicodeBlock DEVANAGARI_EXTENDED_A = 2746 new UnicodeBlock("DEVANAGARI_EXTENDED_A", DEVANAGARI_EXTENDED_A_ID); /*[11B00]*/ 2747 /** @stable ICU 72 */ 2748 public static final UnicodeBlock KAKTOVIK_NUMERALS = 2749 new UnicodeBlock("KAKTOVIK_NUMERALS", KAKTOVIK_NUMERALS_ID); /*[1D2C0]*/ 2750 /** @stable ICU 72 */ 2751 public static final UnicodeBlock KAWI = new UnicodeBlock("KAWI", KAWI_ID); /*[11F00]*/ 2752 /** @stable ICU 72 */ 2753 public static final UnicodeBlock NAG_MUNDARI = 2754 new UnicodeBlock("NAG_MUNDARI", NAG_MUNDARI_ID); /*[1E4D0]*/ 2755 2756 // New block in Unicode 15.1 2757 2758 /** @stable ICU 74 */ 2759 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I = 2760 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I", 2761 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I_ID); /*[2EBF0]*/ 2762 2763 /** 2764 * @stable ICU 2.4 2765 */ 2766 public static final UnicodeBlock INVALID_CODE 2767 = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID); 2768 2769 static { 2770 for (int blockId = 0; blockId < COUNT; ++blockId) { 2771 if (BLOCKS_[blockId] == null) { 2772 throw new java.lang.IllegalStateException( 2773 "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized"); 2774 } 2775 } 2776 } 2777 2778 // public methods -------------------------------------------------- 2779 2780 /** 2781 * {@icu} Returns the only instance of the UnicodeBlock with the argument ID. 2782 * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned. 2783 * @param id UnicodeBlock ID 2784 * @return the only instance of the UnicodeBlock with the argument ID 2785 * if it exists, otherwise a INVALID_CODE UnicodeBlock will be 2786 * returned. 2787 * @stable ICU 2.4 2788 */ getInstance(int id)2789 public static UnicodeBlock getInstance(int id) 2790 { 2791 if (id >= 0 && id < BLOCKS_.length) { 2792 return BLOCKS_[id]; 2793 } 2794 return INVALID_CODE; 2795 } 2796 2797 /** 2798 * Returns the Unicode allocation block that contains the code point, 2799 * or null if the code point is not a member of a defined block. 2800 * @param ch code point to be tested 2801 * @return the Unicode allocation block that contains the code point 2802 * @stable ICU 2.4 2803 */ of(int ch)2804 public static UnicodeBlock of(int ch) 2805 { 2806 if (ch > MAX_VALUE) { 2807 return INVALID_CODE; 2808 } 2809 2810 return UnicodeBlock.getInstance( 2811 UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK)); 2812 } 2813 2814 /** 2815 * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method. 2816 * Returns the Unicode block with the given name. {@icunote} Unlike 2817 * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches 2818 * against the official UCD name and the Java block name 2819 * (ignoring case). 2820 * @param blockName the name of the block to match 2821 * @return the UnicodeBlock with that name 2822 * @throws IllegalArgumentException if the blockName could not be matched 2823 * @stable ICU 3.0 2824 */ forName(String blockName)2825 public static final UnicodeBlock forName(String blockName) { 2826 Map<String, UnicodeBlock> m = null; 2827 if (mref != null) { 2828 m = mref.get(); 2829 } 2830 if (m == null) { 2831 m = new HashMap<>(BLOCKS_.length); 2832 for (int i = 0; i < BLOCKS_.length; ++i) { 2833 UnicodeBlock b = BLOCKS_[i]; 2834 String name = trimBlockName( 2835 getPropertyValueName(UProperty.BLOCK, b.getID(), 2836 UProperty.NameChoice.LONG)); 2837 m.put(name, b); 2838 } 2839 mref = new SoftReference<>(m); 2840 } 2841 UnicodeBlock b = m.get(trimBlockName(blockName)); 2842 if (b == null) { 2843 throw new IllegalArgumentException(); 2844 } 2845 return b; 2846 } 2847 private static SoftReference<Map<String, UnicodeBlock>> mref; 2848 trimBlockName(String name)2849 private static String trimBlockName(String name) { 2850 String upper = name.toUpperCase(Locale.ENGLISH); 2851 StringBuilder result = new StringBuilder(upper.length()); 2852 for (int i = 0; i < upper.length(); i++) { 2853 char c = upper.charAt(i); 2854 if (c != ' ' && c != '_' && c != '-') { 2855 result.append(c); 2856 } 2857 } 2858 return result.toString(); 2859 } 2860 2861 /** 2862 * {icu} Returns the type ID of this Unicode block 2863 * @return integer type ID of this Unicode block 2864 * @stable ICU 2.4 2865 */ getID()2866 public int getID() 2867 { 2868 return m_id_; 2869 } 2870 2871 // private data members --------------------------------------------- 2872 2873 /** 2874 * Identification code for this UnicodeBlock 2875 */ 2876 private int m_id_; 2877 2878 // private constructor ---------------------------------------------- 2879 2880 /** 2881 * UnicodeBlock constructor 2882 * @param name name of this UnicodeBlock 2883 * @param id unique id of this UnicodeBlock 2884 * @exception NullPointerException if name is <code>null</code> 2885 */ UnicodeBlock(String name, int id)2886 private UnicodeBlock(String name, int id) 2887 { 2888 super(name); 2889 m_id_ = id; 2890 if (id >= 0) { 2891 BLOCKS_[id] = this; 2892 } 2893 } 2894 } 2895 2896 /** 2897 * East Asian Width constants. 2898 * @see UProperty#EAST_ASIAN_WIDTH 2899 * @see UCharacter#getIntPropertyValue 2900 * @stable ICU 2.4 2901 */ 2902 public static interface EastAsianWidth 2903 { 2904 /** 2905 * @stable ICU 2.4 2906 */ 2907 public static final int NEUTRAL = 0; 2908 /** 2909 * @stable ICU 2.4 2910 */ 2911 public static final int AMBIGUOUS = 1; 2912 /** 2913 * @stable ICU 2.4 2914 */ 2915 public static final int HALFWIDTH = 2; 2916 /** 2917 * @stable ICU 2.4 2918 */ 2919 public static final int FULLWIDTH = 3; 2920 /** 2921 * @stable ICU 2.4 2922 */ 2923 public static final int NARROW = 4; 2924 /** 2925 * @stable ICU 2.4 2926 */ 2927 public static final int WIDE = 5; 2928 /** 2929 * One more than the highest normal EastAsianWidth value. 2930 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH). 2931 * 2932 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2933 */ 2934 @Deprecated 2935 public static final int COUNT = 6; 2936 } 2937 2938 /** 2939 * Decomposition Type constants. 2940 * @see UProperty#DECOMPOSITION_TYPE 2941 * @stable ICU 2.4 2942 */ 2943 public static interface DecompositionType 2944 { 2945 /** 2946 * @stable ICU 2.4 2947 */ 2948 public static final int NONE = 0; 2949 /** 2950 * @stable ICU 2.4 2951 */ 2952 public static final int CANONICAL = 1; 2953 /** 2954 * @stable ICU 2.4 2955 */ 2956 public static final int COMPAT = 2; 2957 /** 2958 * @stable ICU 2.4 2959 */ 2960 public static final int CIRCLE = 3; 2961 /** 2962 * @stable ICU 2.4 2963 */ 2964 public static final int FINAL = 4; 2965 /** 2966 * @stable ICU 2.4 2967 */ 2968 public static final int FONT = 5; 2969 /** 2970 * @stable ICU 2.4 2971 */ 2972 public static final int FRACTION = 6; 2973 /** 2974 * @stable ICU 2.4 2975 */ 2976 public static final int INITIAL = 7; 2977 /** 2978 * @stable ICU 2.4 2979 */ 2980 public static final int ISOLATED = 8; 2981 /** 2982 * @stable ICU 2.4 2983 */ 2984 public static final int MEDIAL = 9; 2985 /** 2986 * @stable ICU 2.4 2987 */ 2988 public static final int NARROW = 10; 2989 /** 2990 * @stable ICU 2.4 2991 */ 2992 public static final int NOBREAK = 11; 2993 /** 2994 * @stable ICU 2.4 2995 */ 2996 public static final int SMALL = 12; 2997 /** 2998 * @stable ICU 2.4 2999 */ 3000 public static final int SQUARE = 13; 3001 /** 3002 * @stable ICU 2.4 3003 */ 3004 public static final int SUB = 14; 3005 /** 3006 * @stable ICU 2.4 3007 */ 3008 public static final int SUPER = 15; 3009 /** 3010 * @stable ICU 2.4 3011 */ 3012 public static final int VERTICAL = 16; 3013 /** 3014 * @stable ICU 2.4 3015 */ 3016 public static final int WIDE = 17; 3017 /** 3018 * One more than the highest normal DecompositionType value. 3019 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE). 3020 * 3021 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3022 */ 3023 @Deprecated 3024 public static final int COUNT = 18; 3025 } 3026 3027 /** 3028 * Joining Type constants. 3029 * @see UProperty#JOINING_TYPE 3030 * @stable ICU 2.4 3031 */ 3032 public static interface JoiningType 3033 { 3034 /** 3035 * @stable ICU 2.4 3036 */ 3037 public static final int NON_JOINING = 0; 3038 /** 3039 * @stable ICU 2.4 3040 */ 3041 public static final int JOIN_CAUSING = 1; 3042 /** 3043 * @stable ICU 2.4 3044 */ 3045 public static final int DUAL_JOINING = 2; 3046 /** 3047 * @stable ICU 2.4 3048 */ 3049 public static final int LEFT_JOINING = 3; 3050 /** 3051 * @stable ICU 2.4 3052 */ 3053 public static final int RIGHT_JOINING = 4; 3054 /** 3055 * @stable ICU 2.4 3056 */ 3057 public static final int TRANSPARENT = 5; 3058 /** 3059 * One more than the highest normal JoiningType value. 3060 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE). 3061 * 3062 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3063 */ 3064 @Deprecated 3065 public static final int COUNT = 6; 3066 } 3067 3068 /** 3069 * Joining Group constants. 3070 * @see UProperty#JOINING_GROUP 3071 * @stable ICU 2.4 3072 */ 3073 public static interface JoiningGroup 3074 { 3075 /** 3076 * @stable ICU 2.4 3077 */ 3078 public static final int NO_JOINING_GROUP = 0; 3079 /** 3080 * @stable ICU 2.4 3081 */ 3082 public static final int AIN = 1; 3083 /** 3084 * @stable ICU 2.4 3085 */ 3086 public static final int ALAPH = 2; 3087 /** 3088 * @stable ICU 2.4 3089 */ 3090 public static final int ALEF = 3; 3091 /** 3092 * @stable ICU 2.4 3093 */ 3094 public static final int BEH = 4; 3095 /** 3096 * @stable ICU 2.4 3097 */ 3098 public static final int BETH = 5; 3099 /** 3100 * @stable ICU 2.4 3101 */ 3102 public static final int DAL = 6; 3103 /** 3104 * @stable ICU 2.4 3105 */ 3106 public static final int DALATH_RISH = 7; 3107 /** 3108 * @stable ICU 2.4 3109 */ 3110 public static final int E = 8; 3111 /** 3112 * @stable ICU 2.4 3113 */ 3114 public static final int FEH = 9; 3115 /** 3116 * @stable ICU 2.4 3117 */ 3118 public static final int FINAL_SEMKATH = 10; 3119 /** 3120 * @stable ICU 2.4 3121 */ 3122 public static final int GAF = 11; 3123 /** 3124 * @stable ICU 2.4 3125 */ 3126 public static final int GAMAL = 12; 3127 /** 3128 * @stable ICU 2.4 3129 */ 3130 public static final int HAH = 13; 3131 /** @stable ICU 4.6 */ 3132 public static final int TEH_MARBUTA_GOAL = 14; 3133 /** 3134 * @stable ICU 2.4 3135 */ 3136 public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL; 3137 /** 3138 * @stable ICU 2.4 3139 */ 3140 public static final int HE = 15; 3141 /** 3142 * @stable ICU 2.4 3143 */ 3144 public static final int HEH = 16; 3145 /** 3146 * @stable ICU 2.4 3147 */ 3148 public static final int HEH_GOAL = 17; 3149 /** 3150 * @stable ICU 2.4 3151 */ 3152 public static final int HETH = 18; 3153 /** 3154 * @stable ICU 2.4 3155 */ 3156 public static final int KAF = 19; 3157 /** 3158 * @stable ICU 2.4 3159 */ 3160 public static final int KAPH = 20; 3161 /** 3162 * @stable ICU 2.4 3163 */ 3164 public static final int KNOTTED_HEH = 21; 3165 /** 3166 * @stable ICU 2.4 3167 */ 3168 public static final int LAM = 22; 3169 /** 3170 * @stable ICU 2.4 3171 */ 3172 public static final int LAMADH = 23; 3173 /** 3174 * @stable ICU 2.4 3175 */ 3176 public static final int MEEM = 24; 3177 /** 3178 * @stable ICU 2.4 3179 */ 3180 public static final int MIM = 25; 3181 /** 3182 * @stable ICU 2.4 3183 */ 3184 public static final int NOON = 26; 3185 /** 3186 * @stable ICU 2.4 3187 */ 3188 public static final int NUN = 27; 3189 /** 3190 * @stable ICU 2.4 3191 */ 3192 public static final int PE = 28; 3193 /** 3194 * @stable ICU 2.4 3195 */ 3196 public static final int QAF = 29; 3197 /** 3198 * @stable ICU 2.4 3199 */ 3200 public static final int QAPH = 30; 3201 /** 3202 * @stable ICU 2.4 3203 */ 3204 public static final int REH = 31; 3205 /** 3206 * @stable ICU 2.4 3207 */ 3208 public static final int REVERSED_PE = 32; 3209 /** 3210 * @stable ICU 2.4 3211 */ 3212 public static final int SAD = 33; 3213 /** 3214 * @stable ICU 2.4 3215 */ 3216 public static final int SADHE = 34; 3217 /** 3218 * @stable ICU 2.4 3219 */ 3220 public static final int SEEN = 35; 3221 /** 3222 * @stable ICU 2.4 3223 */ 3224 public static final int SEMKATH = 36; 3225 /** 3226 * @stable ICU 2.4 3227 */ 3228 public static final int SHIN = 37; 3229 /** 3230 * @stable ICU 2.4 3231 */ 3232 public static final int SWASH_KAF = 38; 3233 /** 3234 * @stable ICU 2.4 3235 */ 3236 public static final int SYRIAC_WAW = 39; 3237 /** 3238 * @stable ICU 2.4 3239 */ 3240 public static final int TAH = 40; 3241 /** 3242 * @stable ICU 2.4 3243 */ 3244 public static final int TAW = 41; 3245 /** 3246 * @stable ICU 2.4 3247 */ 3248 public static final int TEH_MARBUTA = 42; 3249 /** 3250 * @stable ICU 2.4 3251 */ 3252 public static final int TETH = 43; 3253 /** 3254 * @stable ICU 2.4 3255 */ 3256 public static final int WAW = 44; 3257 /** 3258 * @stable ICU 2.4 3259 */ 3260 public static final int YEH = 45; 3261 /** 3262 * @stable ICU 2.4 3263 */ 3264 public static final int YEH_BARREE = 46; 3265 /** 3266 * @stable ICU 2.4 3267 */ 3268 public static final int YEH_WITH_TAIL = 47; 3269 /** 3270 * @stable ICU 2.4 3271 */ 3272 public static final int YUDH = 48; 3273 /** 3274 * @stable ICU 2.4 3275 */ 3276 public static final int YUDH_HE = 49; 3277 /** 3278 * @stable ICU 2.4 3279 */ 3280 public static final int ZAIN = 50; 3281 /** 3282 * @stable ICU 2.6 3283 */ 3284 public static final int FE = 51; 3285 /** 3286 * @stable ICU 2.6 3287 */ 3288 public static final int KHAPH = 52; 3289 /** 3290 * @stable ICU 2.6 3291 */ 3292 public static final int ZHAIN = 53; 3293 /** 3294 * @stable ICU 4.0 3295 */ 3296 public static final int BURUSHASKI_YEH_BARREE = 54; 3297 /** @stable ICU 4.4 */ 3298 public static final int FARSI_YEH = 55; 3299 /** @stable ICU 4.4 */ 3300 public static final int NYA = 56; 3301 /** @stable ICU 49 */ 3302 public static final int ROHINGYA_YEH = 57; 3303 3304 /** @stable ICU 54 */ 3305 public static final int MANICHAEAN_ALEPH = 58; 3306 /** @stable ICU 54 */ 3307 public static final int MANICHAEAN_AYIN = 59; 3308 /** @stable ICU 54 */ 3309 public static final int MANICHAEAN_BETH = 60; 3310 /** @stable ICU 54 */ 3311 public static final int MANICHAEAN_DALETH = 61; 3312 /** @stable ICU 54 */ 3313 public static final int MANICHAEAN_DHAMEDH = 62; 3314 /** @stable ICU 54 */ 3315 public static final int MANICHAEAN_FIVE = 63; 3316 /** @stable ICU 54 */ 3317 public static final int MANICHAEAN_GIMEL = 64; 3318 /** @stable ICU 54 */ 3319 public static final int MANICHAEAN_HETH = 65; 3320 /** @stable ICU 54 */ 3321 public static final int MANICHAEAN_HUNDRED = 66; 3322 /** @stable ICU 54 */ 3323 public static final int MANICHAEAN_KAPH = 67; 3324 /** @stable ICU 54 */ 3325 public static final int MANICHAEAN_LAMEDH = 68; 3326 /** @stable ICU 54 */ 3327 public static final int MANICHAEAN_MEM = 69; 3328 /** @stable ICU 54 */ 3329 public static final int MANICHAEAN_NUN = 70; 3330 /** @stable ICU 54 */ 3331 public static final int MANICHAEAN_ONE = 71; 3332 /** @stable ICU 54 */ 3333 public static final int MANICHAEAN_PE = 72; 3334 /** @stable ICU 54 */ 3335 public static final int MANICHAEAN_QOPH = 73; 3336 /** @stable ICU 54 */ 3337 public static final int MANICHAEAN_RESH = 74; 3338 /** @stable ICU 54 */ 3339 public static final int MANICHAEAN_SADHE = 75; 3340 /** @stable ICU 54 */ 3341 public static final int MANICHAEAN_SAMEKH = 76; 3342 /** @stable ICU 54 */ 3343 public static final int MANICHAEAN_TAW = 77; 3344 /** @stable ICU 54 */ 3345 public static final int MANICHAEAN_TEN = 78; 3346 /** @stable ICU 54 */ 3347 public static final int MANICHAEAN_TETH = 79; 3348 /** @stable ICU 54 */ 3349 public static final int MANICHAEAN_THAMEDH = 80; 3350 /** @stable ICU 54 */ 3351 public static final int MANICHAEAN_TWENTY = 81; 3352 /** @stable ICU 54 */ 3353 public static final int MANICHAEAN_WAW = 82; 3354 /** @stable ICU 54 */ 3355 public static final int MANICHAEAN_YODH = 83; 3356 /** @stable ICU 54 */ 3357 public static final int MANICHAEAN_ZAYIN = 84; 3358 /** @stable ICU 54 */ 3359 public static final int STRAIGHT_WAW = 85; 3360 3361 /** @stable ICU 58 */ 3362 public static final int AFRICAN_FEH = 86; 3363 /** @stable ICU 58 */ 3364 public static final int AFRICAN_NOON = 87; 3365 /** @stable ICU 58 */ 3366 public static final int AFRICAN_QAF = 88; 3367 3368 /** @stable ICU 60 */ 3369 public static final int MALAYALAM_BHA = 89; 3370 /** @stable ICU 60 */ 3371 public static final int MALAYALAM_JA = 90; 3372 /** @stable ICU 60 */ 3373 public static final int MALAYALAM_LLA = 91; 3374 /** @stable ICU 60 */ 3375 public static final int MALAYALAM_LLLA = 92; 3376 /** @stable ICU 60 */ 3377 public static final int MALAYALAM_NGA = 93; 3378 /** @stable ICU 60 */ 3379 public static final int MALAYALAM_NNA = 94; 3380 /** @stable ICU 60 */ 3381 public static final int MALAYALAM_NNNA = 95; 3382 /** @stable ICU 60 */ 3383 public static final int MALAYALAM_NYA = 96; 3384 /** @stable ICU 60 */ 3385 public static final int MALAYALAM_RA = 97; 3386 /** @stable ICU 60 */ 3387 public static final int MALAYALAM_SSA = 98; 3388 /** @stable ICU 60 */ 3389 public static final int MALAYALAM_TTA = 99; 3390 3391 /** @stable ICU 62 */ 3392 public static final int HANIFI_ROHINGYA_KINNA_YA = 100; 3393 /** @stable ICU 62 */ 3394 public static final int HANIFI_ROHINGYA_PA = 101; 3395 3396 /** @stable ICU 70 */ 3397 public static final int THIN_YEH = 102; 3398 /** @stable ICU 70 */ 3399 public static final int VERTICAL_TAIL = 103; 3400 3401 /** 3402 * One more than the highest normal JoiningGroup value. 3403 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JoiningGroup). 3404 * 3405 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3406 */ 3407 @Deprecated 3408 public static final int COUNT = 104; 3409 } 3410 3411 /** 3412 * Grapheme Cluster Break constants. 3413 * @see UProperty#GRAPHEME_CLUSTER_BREAK 3414 * @stable ICU 3.4 3415 */ 3416 public static interface GraphemeClusterBreak { 3417 /** 3418 * @stable ICU 3.4 3419 */ 3420 public static final int OTHER = 0; 3421 /** 3422 * @stable ICU 3.4 3423 */ 3424 public static final int CONTROL = 1; 3425 /** 3426 * @stable ICU 3.4 3427 */ 3428 public static final int CR = 2; 3429 /** 3430 * @stable ICU 3.4 3431 */ 3432 public static final int EXTEND = 3; 3433 /** 3434 * @stable ICU 3.4 3435 */ 3436 public static final int L = 4; 3437 /** 3438 * @stable ICU 3.4 3439 */ 3440 public static final int LF = 5; 3441 /** 3442 * @stable ICU 3.4 3443 */ 3444 public static final int LV = 6; 3445 /** 3446 * @stable ICU 3.4 3447 */ 3448 public static final int LVT = 7; 3449 /** 3450 * @stable ICU 3.4 3451 */ 3452 public static final int T = 8; 3453 /** 3454 * @stable ICU 3.4 3455 */ 3456 public static final int V = 9; 3457 /** 3458 * @stable ICU 4.0 3459 */ 3460 public static final int SPACING_MARK = 10; 3461 /** 3462 * @stable ICU 4.0 3463 */ 3464 public static final int PREPEND = 11; 3465 /** @stable ICU 50 */ 3466 public static final int REGIONAL_INDICATOR = 12; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3467 /** @stable ICU 58 */ 3468 public static final int E_BASE = 13; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3469 /** @stable ICU 58 */ 3470 public static final int E_BASE_GAZ = 14; /*[EBG]*/ 3471 /** @stable ICU 58 */ 3472 public static final int E_MODIFIER = 15; /*[EM]*/ 3473 /** @stable ICU 58 */ 3474 public static final int GLUE_AFTER_ZWJ = 16; /*[GAZ]*/ 3475 /** @stable ICU 58 */ 3476 public static final int ZWJ = 17; /*[ZWJ]*/ 3477 3478 /** 3479 * One more than the highest normal GraphemeClusterBreak value. 3480 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK). 3481 * 3482 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3483 */ 3484 @Deprecated 3485 public static final int COUNT = 18; 3486 } 3487 3488 /** 3489 * Word Break constants. 3490 * @see UProperty#WORD_BREAK 3491 * @stable ICU 3.4 3492 */ 3493 public static interface WordBreak { 3494 /** 3495 * @stable ICU 3.8 3496 */ 3497 public static final int OTHER = 0; 3498 /** 3499 * @stable ICU 3.8 3500 */ 3501 public static final int ALETTER = 1; 3502 /** 3503 * @stable ICU 3.8 3504 */ 3505 public static final int FORMAT = 2; 3506 /** 3507 * @stable ICU 3.8 3508 */ 3509 public static final int KATAKANA = 3; 3510 /** 3511 * @stable ICU 3.8 3512 */ 3513 public static final int MIDLETTER = 4; 3514 /** 3515 * @stable ICU 3.8 3516 */ 3517 public static final int MIDNUM = 5; 3518 /** 3519 * @stable ICU 3.8 3520 */ 3521 public static final int NUMERIC = 6; 3522 /** 3523 * @stable ICU 3.8 3524 */ 3525 public static final int EXTENDNUMLET = 7; 3526 /** 3527 * @stable ICU 4.0 3528 */ 3529 public static final int CR = 8; 3530 /** 3531 * @stable ICU 4.0 3532 */ 3533 public static final int EXTEND = 9; 3534 /** 3535 * @stable ICU 4.0 3536 */ 3537 public static final int LF = 10; 3538 /** 3539 * @stable ICU 4.0 3540 */ 3541 public static final int MIDNUMLET = 11; 3542 /** 3543 * @stable ICU 4.0 3544 */ 3545 public static final int NEWLINE = 12; 3546 /** @stable ICU 50 */ 3547 public static final int REGIONAL_INDICATOR = 13; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3548 /** @stable ICU 52 */ 3549 public static final int HEBREW_LETTER = 14; /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ 3550 /** @stable ICU 52 */ 3551 public static final int SINGLE_QUOTE = 15; /*[SQ]*/ 3552 /** @stable ICU 52 */ 3553 public static final int DOUBLE_QUOTE = 16; /*[DQ]*/ 3554 /** @stable ICU 58 */ 3555 public static final int E_BASE = 17; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3556 /** @stable ICU 58 */ 3557 public static final int E_BASE_GAZ = 18; /*[EBG]*/ 3558 /** @stable ICU 58 */ 3559 public static final int E_MODIFIER = 19; /*[EM]*/ 3560 /** @stable ICU 58 */ 3561 public static final int GLUE_AFTER_ZWJ = 20; /*[GAZ]*/ 3562 /** @stable ICU 58 */ 3563 public static final int ZWJ = 21; /*[ZWJ]*/ 3564 /** @stable ICU 62 */ 3565 public static final int WSEGSPACE = 22; /*[WSEGSPACE]*/ 3566 /** 3567 * One more than the highest normal WordBreak value. 3568 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK). 3569 * 3570 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3571 */ 3572 @Deprecated 3573 public static final int COUNT = 23; 3574 } 3575 3576 /** 3577 * Sentence Break constants. 3578 * @see UProperty#SENTENCE_BREAK 3579 * @stable ICU 3.4 3580 */ 3581 public static interface SentenceBreak { 3582 /** 3583 * @stable ICU 3.8 3584 */ 3585 public static final int OTHER = 0; 3586 /** 3587 * @stable ICU 3.8 3588 */ 3589 public static final int ATERM = 1; 3590 /** 3591 * @stable ICU 3.8 3592 */ 3593 public static final int CLOSE = 2; 3594 /** 3595 * @stable ICU 3.8 3596 */ 3597 public static final int FORMAT = 3; 3598 /** 3599 * @stable ICU 3.8 3600 */ 3601 public static final int LOWER = 4; 3602 /** 3603 * @stable ICU 3.8 3604 */ 3605 public static final int NUMERIC = 5; 3606 /** 3607 * @stable ICU 3.8 3608 */ 3609 public static final int OLETTER = 6; 3610 /** 3611 * @stable ICU 3.8 3612 */ 3613 public static final int SEP = 7; 3614 /** 3615 * @stable ICU 3.8 3616 */ 3617 public static final int SP = 8; 3618 /** 3619 * @stable ICU 3.8 3620 */ 3621 public static final int STERM = 9; 3622 /** 3623 * @stable ICU 3.8 3624 */ 3625 public static final int UPPER = 10; 3626 /** 3627 * @stable ICU 4.0 3628 */ 3629 public static final int CR = 11; 3630 /** 3631 * @stable ICU 4.0 3632 */ 3633 public static final int EXTEND = 12; 3634 /** 3635 * @stable ICU 4.0 3636 */ 3637 public static final int LF = 13; 3638 /** 3639 * @stable ICU 4.0 3640 */ 3641 public static final int SCONTINUE = 14; 3642 /** 3643 * One more than the highest normal SentenceBreak value. 3644 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK). 3645 * 3646 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3647 */ 3648 @Deprecated 3649 public static final int COUNT = 15; 3650 } 3651 3652 /** 3653 * Line Break constants. 3654 * @see UProperty#LINE_BREAK 3655 * @stable ICU 2.4 3656 */ 3657 public static interface LineBreak 3658 { 3659 /** 3660 * @stable ICU 2.4 3661 */ 3662 public static final int UNKNOWN = 0; 3663 /** 3664 * @stable ICU 2.4 3665 */ 3666 public static final int AMBIGUOUS = 1; 3667 /** 3668 * @stable ICU 2.4 3669 */ 3670 public static final int ALPHABETIC = 2; 3671 /** 3672 * @stable ICU 2.4 3673 */ 3674 public static final int BREAK_BOTH = 3; 3675 /** 3676 * @stable ICU 2.4 3677 */ 3678 public static final int BREAK_AFTER = 4; 3679 /** 3680 * @stable ICU 2.4 3681 */ 3682 public static final int BREAK_BEFORE = 5; 3683 /** 3684 * @stable ICU 2.4 3685 */ 3686 public static final int MANDATORY_BREAK = 6; 3687 /** 3688 * @stable ICU 2.4 3689 */ 3690 public static final int CONTINGENT_BREAK = 7; 3691 /** 3692 * @stable ICU 2.4 3693 */ 3694 public static final int CLOSE_PUNCTUATION = 8; 3695 /** 3696 * @stable ICU 2.4 3697 */ 3698 public static final int COMBINING_MARK = 9; 3699 /** 3700 * @stable ICU 2.4 3701 */ 3702 public static final int CARRIAGE_RETURN = 10; 3703 /** 3704 * @stable ICU 2.4 3705 */ 3706 public static final int EXCLAMATION = 11; 3707 /** 3708 * @stable ICU 2.4 3709 */ 3710 public static final int GLUE = 12; 3711 /** 3712 * @stable ICU 2.4 3713 */ 3714 public static final int HYPHEN = 13; 3715 /** 3716 * @stable ICU 2.4 3717 */ 3718 public static final int IDEOGRAPHIC = 14; 3719 /** 3720 * @see #INSEPARABLE 3721 * @stable ICU 2.4 3722 */ 3723 public static final int INSEPERABLE = 15; 3724 /** 3725 * Renamed from the misspelled "inseperable" in Unicode 4.0.1. 3726 * @stable ICU 3.0 3727 */ 3728 public static final int INSEPARABLE = 15; 3729 /** 3730 * @stable ICU 2.4 3731 */ 3732 public static final int INFIX_NUMERIC = 16; 3733 /** 3734 * @stable ICU 2.4 3735 */ 3736 public static final int LINE_FEED = 17; 3737 /** 3738 * @stable ICU 2.4 3739 */ 3740 public static final int NONSTARTER = 18; 3741 /** 3742 * @stable ICU 2.4 3743 */ 3744 public static final int NUMERIC = 19; 3745 /** 3746 * @stable ICU 2.4 3747 */ 3748 public static final int OPEN_PUNCTUATION = 20; 3749 /** 3750 * @stable ICU 2.4 3751 */ 3752 public static final int POSTFIX_NUMERIC = 21; 3753 /** 3754 * @stable ICU 2.4 3755 */ 3756 public static final int PREFIX_NUMERIC = 22; 3757 /** 3758 * @stable ICU 2.4 3759 */ 3760 public static final int QUOTATION = 23; 3761 /** 3762 * @stable ICU 2.4 3763 */ 3764 public static final int COMPLEX_CONTEXT = 24; 3765 /** 3766 * @stable ICU 2.4 3767 */ 3768 public static final int SURROGATE = 25; 3769 /** 3770 * @stable ICU 2.4 3771 */ 3772 public static final int SPACE = 26; 3773 /** 3774 * @stable ICU 2.4 3775 */ 3776 public static final int BREAK_SYMBOLS = 27; 3777 /** 3778 * @stable ICU 2.4 3779 */ 3780 public static final int ZWSPACE = 28; 3781 /** 3782 * @stable ICU 2.6 3783 */ 3784 public static final int NEXT_LINE = 29; /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ 3785 /** 3786 * @stable ICU 2.6 3787 */ 3788 public static final int WORD_JOINER = 30; /*[WJ]*/ 3789 /** 3790 * @stable ICU 3.4 3791 */ 3792 public static final int H2 = 31; /* from here on: new in Unicode 4.1/ICU 3.4 */ 3793 /** 3794 * @stable ICU 3.4 3795 */ 3796 public static final int H3 = 32; 3797 /** 3798 * @stable ICU 3.4 3799 */ 3800 public static final int JL = 33; 3801 /** 3802 * @stable ICU 3.4 3803 */ 3804 public static final int JT = 34; 3805 /** 3806 * @stable ICU 3.4 3807 */ 3808 public static final int JV = 35; 3809 /** @stable ICU 4.4 */ 3810 public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ 3811 /** @stable ICU 49 */ 3812 public static final int CONDITIONAL_JAPANESE_STARTER = 37; /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ 3813 /** @stable ICU 49 */ 3814 public static final int HEBREW_LETTER = 38; /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ 3815 /** @stable ICU 50 */ 3816 public static final int REGIONAL_INDICATOR = 39; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3817 /** @stable ICU 58 */ 3818 public static final int E_BASE = 40; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3819 /** @stable ICU 58 */ 3820 public static final int E_MODIFIER = 41; /*[EM]*/ 3821 /** @stable ICU 58 */ 3822 public static final int ZWJ = 42; /*[ZWJ]*/ 3823 /** @stable ICU 74 */ 3824 public static final int AKSARA = 43; /*[AK]*/ /* from here on: new in Unicode 15.1/ICU 74 */ 3825 /** @stable ICU 74 */ 3826 public static final int AKSARA_PREBASE = 44; /*[AP]*/ 3827 /** @stable ICU 74 */ 3828 public static final int AKSARA_START = 45; /*[AS]*/ 3829 /** @stable ICU 74 */ 3830 public static final int VIRAMA_FINAL = 46; /*[VF]*/ 3831 /** @stable ICU 74 */ 3832 public static final int VIRAMA = 47; /*[VI]*/ 3833 /** 3834 * One more than the highest normal LineBreak value. 3835 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK). 3836 * 3837 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3838 */ 3839 @Deprecated 3840 public static final int COUNT = 48; 3841 } 3842 3843 /** 3844 * Numeric Type constants. 3845 * @see UProperty#NUMERIC_TYPE 3846 * @stable ICU 2.4 3847 */ 3848 public static interface NumericType 3849 { 3850 /** 3851 * @stable ICU 2.4 3852 */ 3853 public static final int NONE = 0; 3854 /** 3855 * @stable ICU 2.4 3856 */ 3857 public static final int DECIMAL = 1; 3858 /** 3859 * @stable ICU 2.4 3860 */ 3861 public static final int DIGIT = 2; 3862 /** 3863 * @stable ICU 2.4 3864 */ 3865 public static final int NUMERIC = 3; 3866 /** 3867 * One more than the highest normal NumericType value. 3868 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE). 3869 * 3870 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3871 */ 3872 @Deprecated 3873 public static final int COUNT = 4; 3874 } 3875 3876 /** 3877 * Hangul Syllable Type constants. 3878 * 3879 * @see UProperty#HANGUL_SYLLABLE_TYPE 3880 * @stable ICU 2.6 3881 */ 3882 public static interface HangulSyllableType 3883 { 3884 /** 3885 * @stable ICU 2.6 3886 */ 3887 public static final int NOT_APPLICABLE = 0; /*[NA]*/ /*See note !!*/ 3888 /** 3889 * @stable ICU 2.6 3890 */ 3891 public static final int LEADING_JAMO = 1; /*[L]*/ 3892 /** 3893 * @stable ICU 2.6 3894 */ 3895 public static final int VOWEL_JAMO = 2; /*[V]*/ 3896 /** 3897 * @stable ICU 2.6 3898 */ 3899 public static final int TRAILING_JAMO = 3; /*[T]*/ 3900 /** 3901 * @stable ICU 2.6 3902 */ 3903 public static final int LV_SYLLABLE = 4; /*[LV]*/ 3904 /** 3905 * @stable ICU 2.6 3906 */ 3907 public static final int LVT_SYLLABLE = 5; /*[LVT]*/ 3908 /** 3909 * One more than the highest normal HangulSyllableType value. 3910 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE). 3911 * 3912 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3913 */ 3914 @Deprecated 3915 public static final int COUNT = 6; 3916 } 3917 3918 /** 3919 * Bidi Paired Bracket Type constants. 3920 * 3921 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 3922 * @stable ICU 52 3923 */ 3924 public static interface BidiPairedBracketType { 3925 /** 3926 * Not a paired bracket. 3927 * @stable ICU 52 3928 */ 3929 public static final int NONE = 0; 3930 /** 3931 * Open paired bracket. 3932 * @stable ICU 52 3933 */ 3934 public static final int OPEN = 1; 3935 /** 3936 * Close paired bracket. 3937 * @stable ICU 52 3938 */ 3939 public static final int CLOSE = 2; 3940 /** 3941 * One more than the highest normal BidiPairedBracketType value. 3942 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE). 3943 * 3944 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3945 */ 3946 @Deprecated 3947 public static final int COUNT = 3; 3948 } 3949 3950 /** 3951 * Indic Positional Category constants. 3952 * 3953 * @see UProperty#INDIC_POSITIONAL_CATEGORY 3954 * @stable ICU 63 3955 */ 3956 public static interface IndicPositionalCategory { 3957 /** @stable ICU 63 */ 3958 public static final int NA = 0; 3959 /** @stable ICU 63 */ 3960 public static final int BOTTOM = 1; 3961 /** @stable ICU 63 */ 3962 public static final int BOTTOM_AND_LEFT = 2; 3963 /** @stable ICU 63 */ 3964 public static final int BOTTOM_AND_RIGHT = 3; 3965 /** @stable ICU 63 */ 3966 public static final int LEFT = 4; 3967 /** @stable ICU 63 */ 3968 public static final int LEFT_AND_RIGHT = 5; 3969 /** @stable ICU 63 */ 3970 public static final int OVERSTRUCK = 6; 3971 /** @stable ICU 63 */ 3972 public static final int RIGHT = 7; 3973 /** @stable ICU 63 */ 3974 public static final int TOP = 8; 3975 /** @stable ICU 63 */ 3976 public static final int TOP_AND_BOTTOM = 9; 3977 /** @stable ICU 63 */ 3978 public static final int TOP_AND_BOTTOM_AND_RIGHT = 10; 3979 /** @stable ICU 63 */ 3980 public static final int TOP_AND_LEFT = 11; 3981 /** @stable ICU 63 */ 3982 public static final int TOP_AND_LEFT_AND_RIGHT = 12; 3983 /** @stable ICU 63 */ 3984 public static final int TOP_AND_RIGHT = 13; 3985 /** @stable ICU 63 */ 3986 public static final int VISUAL_ORDER_LEFT = 14; 3987 /** @stable ICU 66 */ 3988 public static final int TOP_AND_BOTTOM_AND_LEFT = 15; 3989 } 3990 3991 /** 3992 * Indic Syllabic Category constants. 3993 * 3994 * @see UProperty#INDIC_SYLLABIC_CATEGORY 3995 * @stable ICU 63 3996 */ 3997 public static interface IndicSyllabicCategory { 3998 /** @stable ICU 63 */ 3999 public static final int OTHER = 0; 4000 /** @stable ICU 63 */ 4001 public static final int AVAGRAHA = 1; 4002 /** @stable ICU 63 */ 4003 public static final int BINDU = 2; 4004 /** @stable ICU 63 */ 4005 public static final int BRAHMI_JOINING_NUMBER = 3; 4006 /** @stable ICU 63 */ 4007 public static final int CANTILLATION_MARK = 4; 4008 /** @stable ICU 63 */ 4009 public static final int CONSONANT = 5; 4010 /** @stable ICU 63 */ 4011 public static final int CONSONANT_DEAD = 6; 4012 /** @stable ICU 63 */ 4013 public static final int CONSONANT_FINAL = 7; 4014 /** @stable ICU 63 */ 4015 public static final int CONSONANT_HEAD_LETTER = 8; 4016 /** @stable ICU 63 */ 4017 public static final int CONSONANT_INITIAL_POSTFIXED = 9; 4018 /** @stable ICU 63 */ 4019 public static final int CONSONANT_KILLER = 10; 4020 /** @stable ICU 63 */ 4021 public static final int CONSONANT_MEDIAL = 11; 4022 /** @stable ICU 63 */ 4023 public static final int CONSONANT_PLACEHOLDER = 12; 4024 /** @stable ICU 63 */ 4025 public static final int CONSONANT_PRECEDING_REPHA = 13; 4026 /** @stable ICU 63 */ 4027 public static final int CONSONANT_PREFIXED = 14; 4028 /** @stable ICU 63 */ 4029 public static final int CONSONANT_SUBJOINED = 15; 4030 /** @stable ICU 63 */ 4031 public static final int CONSONANT_SUCCEEDING_REPHA = 16; 4032 /** @stable ICU 63 */ 4033 public static final int CONSONANT_WITH_STACKER = 17; 4034 /** @stable ICU 63 */ 4035 public static final int GEMINATION_MARK = 18; 4036 /** @stable ICU 63 */ 4037 public static final int INVISIBLE_STACKER = 19; 4038 /** @stable ICU 63 */ 4039 public static final int JOINER = 20; 4040 /** @stable ICU 63 */ 4041 public static final int MODIFYING_LETTER = 21; 4042 /** @stable ICU 63 */ 4043 public static final int NON_JOINER = 22; 4044 /** @stable ICU 63 */ 4045 public static final int NUKTA = 23; 4046 /** @stable ICU 63 */ 4047 public static final int NUMBER = 24; 4048 /** @stable ICU 63 */ 4049 public static final int NUMBER_JOINER = 25; 4050 /** @stable ICU 63 */ 4051 public static final int PURE_KILLER = 26; 4052 /** @stable ICU 63 */ 4053 public static final int REGISTER_SHIFTER = 27; 4054 /** @stable ICU 63 */ 4055 public static final int SYLLABLE_MODIFIER = 28; 4056 /** @stable ICU 63 */ 4057 public static final int TONE_LETTER = 29; 4058 /** @stable ICU 63 */ 4059 public static final int TONE_MARK = 30; 4060 /** @stable ICU 63 */ 4061 public static final int VIRAMA = 31; 4062 /** @stable ICU 63 */ 4063 public static final int VISARGA = 32; 4064 /** @stable ICU 63 */ 4065 public static final int VOWEL = 33; 4066 /** @stable ICU 63 */ 4067 public static final int VOWEL_DEPENDENT = 34; 4068 /** @stable ICU 63 */ 4069 public static final int VOWEL_INDEPENDENT = 35; 4070 } 4071 4072 /** 4073 * Vertical Orientation constants. 4074 * 4075 * @see UProperty#VERTICAL_ORIENTATION 4076 * @stable ICU 63 4077 */ 4078 public static interface VerticalOrientation { 4079 /** @stable ICU 63 */ 4080 public static final int ROTATED = 0; 4081 /** @stable ICU 63 */ 4082 public static final int TRANSFORMED_ROTATED = 1; 4083 /** @stable ICU 63 */ 4084 public static final int TRANSFORMED_UPRIGHT = 2; 4085 /** @stable ICU 63 */ 4086 public static final int UPRIGHT = 3; 4087 } 4088 4089 /** 4090 * Identifier Status constants. 4091 * See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type. 4092 * 4093 * @see UProperty#IDENTIFIER_STATUS 4094 * @draft ICU 75 4095 */ 4096 public enum IdentifierStatus { 4097 /** @draft ICU 75 */ 4098 RESTRICTED, 4099 /** @draft ICU 75 */ 4100 ALLOWED, 4101 } 4102 4103 /** 4104 * Identifier Type constants. 4105 * See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type. 4106 * 4107 * @see UProperty#IDENTIFIER_TYPE 4108 * @draft ICU 75 4109 */ 4110 public enum IdentifierType { 4111 /** @draft ICU 75 */ 4112 NOT_CHARACTER, 4113 /** @draft ICU 75 */ 4114 DEPRECATED, 4115 /** @draft ICU 75 */ 4116 DEFAULT_IGNORABLE, 4117 /** @draft ICU 75 */ 4118 NOT_NFKC, 4119 /** @draft ICU 75 */ 4120 NOT_XID, 4121 /** @draft ICU 75 */ 4122 EXCLUSION, 4123 /** @draft ICU 75 */ 4124 OBSOLETE, 4125 /** @draft ICU 75 */ 4126 TECHNICAL, 4127 /** @draft ICU 75 */ 4128 UNCOMMON_USE, 4129 /** @draft ICU 75 */ 4130 LIMITED_USE, 4131 /** @draft ICU 75 */ 4132 INCLUSION, 4133 /** @draft ICU 75 */ 4134 RECOMMENDED, 4135 } 4136 4137 // public data members ----------------------------------------------- 4138 4139 /** 4140 * The lowest Unicode code point value, constant 0. 4141 * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}. 4142 * 4143 * @stable ICU 2.1 4144 */ 4145 public static final int MIN_VALUE = Character.MIN_CODE_POINT; 4146 4147 /** 4148 * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits). 4149 * Same as {@link Character#MAX_CODE_POINT}. 4150 * 4151 * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE} 4152 * which is still a char with the value U+FFFF. 4153 * 4154 * @stable ICU 2.1 4155 */ 4156 public static final int MAX_VALUE = Character.MAX_CODE_POINT; 4157 4158 /** 4159 * The minimum value for Supplementary code points, constant U+10000. 4160 * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 4161 * 4162 * @stable ICU 2.1 4163 */ 4164 public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT; 4165 4166 /** 4167 * Unicode value used when translating into Unicode encoding form and there 4168 * is no existing character. 4169 * @stable ICU 2.1 4170 */ 4171 public static final int REPLACEMENT_CHAR = '\uFFFD'; 4172 4173 /** 4174 * Special value that is returned by getUnicodeNumericValue(int) when no 4175 * numeric value is defined for a code point. 4176 * @stable ICU 2.4 4177 * @see #getUnicodeNumericValue 4178 */ 4179 public static final double NO_NUMERIC_VALUE = -123456789; 4180 4181 /** 4182 * Compatibility constant for Java Character's MIN_RADIX. 4183 * @stable ICU 3.4 4184 */ 4185 public static final int MIN_RADIX = java.lang.Character.MIN_RADIX; 4186 4187 /** 4188 * Compatibility constant for Java Character's MAX_RADIX. 4189 * @stable ICU 3.4 4190 */ 4191 public static final int MAX_RADIX = java.lang.Character.MAX_RADIX; 4192 4193 /** 4194 * Do not lowercase non-initial parts of words when titlecasing. 4195 * Option bit for titlecasing APIs that take an options bit set. 4196 * 4197 * By default, titlecasing will titlecase the first cased character 4198 * of a word and lowercase all other characters. 4199 * With this option, the other characters will not be modified. 4200 * 4201 * @see #toTitleCase 4202 * @stable ICU 3.8 4203 */ 4204 public static final int TITLECASE_NO_LOWERCASE = 0x100; 4205 4206 /** 4207 * Do not adjust the titlecasing indexes from BreakIterator::next() indexes; 4208 * titlecase exactly the characters at breaks from the iterator. 4209 * Option bit for titlecasing APIs that take an options bit set. 4210 * 4211 * By default, titlecasing will take each break iterator index, 4212 * adjust it by looking for the next cased character, and titlecase that one. 4213 * Other characters are lowercased. 4214 * 4215 * This follows Unicode 4 & 5 section 3.13 Default Case Operations: 4216 * 4217 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 4218 * #29, "Text Boundaries." Between each pair of word boundaries, find the first 4219 * cased character F. If F exists, map F to default_title(F); then map each 4220 * subsequent character C to default_lower(C). 4221 * 4222 * @see #toTitleCase 4223 * @see #TITLECASE_NO_LOWERCASE 4224 * @stable ICU 3.8 4225 */ 4226 public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200; 4227 4228 // public methods ---------------------------------------------------- 4229 4230 /** 4231 * Returnss the numeric value of a decimal digit code point. 4232 * <br>This method observes the semantics of 4233 * <code>java.lang.Character.digit()</code>. Note that this 4234 * will return positive values for code points for which isDigit 4235 * returns false, just like java.lang.Character. 4236 * <br><em>Semantic Change:</em> In release 1.3.1 and 4237 * prior, this did not treat the European letters as having a 4238 * digit value, and also treated numeric letters and other numbers as 4239 * digits. 4240 * This has been changed to conform to the java semantics. 4241 * <br>A code point is a valid digit if and only if: 4242 * <ul> 4243 * <li>ch is a decimal digit or one of the european letters, and 4244 * <li>the value of ch is less than the specified radix. 4245 * </ul> 4246 * @param ch the code point to query 4247 * @param radix the radix 4248 * @return the numeric value represented by the code point in the 4249 * specified radix, or -1 if the code point is not a decimal digit 4250 * or if its value is too large for the radix 4251 * @stable ICU 2.1 4252 */ digit(int ch, int radix)4253 public static int digit(int ch, int radix) 4254 { 4255 if (2 <= radix && radix <= 36) { 4256 int value = digit(ch); 4257 if (value < 0) { 4258 // ch is not a decimal digit, try latin letters 4259 value = UCharacterProperty.getEuropeanDigit(ch); 4260 } 4261 return (value < radix) ? value : -1; 4262 } else { 4263 return -1; // invalid radix 4264 } 4265 } 4266 4267 /** 4268 * Returnss the numeric value of a decimal digit code point. 4269 * <br>This is a convenience overload of <code>digit(int, int)</code> 4270 * that provides a decimal radix. 4271 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this 4272 * treated numeric letters and other numbers as digits. This has 4273 * been changed to conform to the java semantics. 4274 * @param ch the code point to query 4275 * @return the numeric value represented by the code point, 4276 * or -1 if the code point is not a decimal digit or if its 4277 * value is too large for a decimal radix 4278 * @stable ICU 2.1 4279 */ digit(int ch)4280 public static int digit(int ch) 4281 { 4282 return UCharacterProperty.INSTANCE.digit(ch); 4283 } 4284 4285 /** 4286 * Returns the numeric value of the code point as a nonnegative 4287 * integer. 4288 * <br>If the code point does not have a numeric value, then -1 is returned. 4289 * <br> 4290 * If the code point has a numeric value that cannot be represented as a 4291 * nonnegative integer (for example, a fractional value), then -2 is 4292 * returned. 4293 * @param ch the code point to query 4294 * @return the numeric value of the code point, or -1 if it has no numeric 4295 * value, or -2 if it has a numeric value that cannot be represented as a 4296 * nonnegative integer 4297 * @stable ICU 2.1 4298 */ getNumericValue(int ch)4299 public static int getNumericValue(int ch) 4300 { 4301 return UCharacterProperty.INSTANCE.getNumericValue(ch); 4302 } 4303 4304 /** 4305 * {@icu} Returns the numeric value for a Unicode code point as defined in the 4306 * Unicode Character Database. 4307 * <p>A "double" return type is necessary because some numeric values are 4308 * fractions, negative, or too large for int. 4309 * <p>For characters without any numeric values in the Unicode Character 4310 * Database, this function will return NO_NUMERIC_VALUE. 4311 * Note: This is different from the Unicode Standard which specifies NaN as the default value. 4312 * <p><em>API Change:</em> In release 2.2 and prior, this API has a 4313 * return type int and returns -1 when the argument ch does not have a 4314 * corresponding numeric value. This has been changed to synch with ICU4C 4315 * 4316 * This corresponds to the ICU4C function u_getNumericValue. 4317 * @param ch Code point to get the numeric value for. 4318 * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined. 4319 * @stable ICU 2.4 4320 */ getUnicodeNumericValue(int ch)4321 public static double getUnicodeNumericValue(int ch) 4322 { 4323 return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch); 4324 } 4325 4326 /** 4327 * Compatibility override of Java deprecated method. This 4328 * method will always remain deprecated. 4329 * Same as java.lang.Character.isSpace(). 4330 * @param ch the code point 4331 * @return true if the code point is a space character as 4332 * defined by java.lang.Character.isSpace. 4333 * @deprecated ICU 3.4 (Java) 4334 */ 4335 @Deprecated isSpace(int ch)4336 public static boolean isSpace(int ch) { 4337 return ch <= 0x20 && 4338 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d); 4339 } 4340 4341 /** 4342 * Returns a value indicating a code point's Unicode category. 4343 * Up-to-date Unicode implementation of java.lang.Character.getType() 4344 * except for the above mentioned code points that had their category 4345 * changed.<br> 4346 * Return results are constants from the interface 4347 * <a href=UCharacterCategory.html>UCharacterCategory</a><br> 4348 * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with 4349 * those returned by java.lang.Character.getType. UCharacterCategory values 4350 * match the ones used in ICU4C, while java.lang.Character type 4351 * values, though similar, skip the value 17. 4352 * @param ch code point whose type is to be determined 4353 * @return category which is a value of UCharacterCategory 4354 * @stable ICU 2.1 4355 */ getType(int ch)4356 public static int getType(int ch) 4357 { 4358 return UCharacterProperty.INSTANCE.getType(ch); 4359 } 4360 4361 /** 4362 * Determines if a code point has a defined meaning in the up-to-date 4363 * Unicode standard. 4364 * E.g. supplementary code points though allocated space are not defined in 4365 * Unicode yet.<br> 4366 * Up-to-date Unicode implementation of java.lang.Character.isDefined() 4367 * @param ch code point to be determined if it is defined in the most 4368 * current version of Unicode 4369 * @return true if this code point is defined in unicode 4370 * @stable ICU 2.1 4371 */ isDefined(int ch)4372 public static boolean isDefined(int ch) 4373 { 4374 return getType(ch) != 0; 4375 } 4376 4377 /** 4378 * Determines if a code point is a Java digit. 4379 * <br>This method observes the semantics of 4380 * <code>java.lang.Character.isDigit()</code>. It returns true for decimal 4381 * digits only. 4382 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated 4383 * numeric letters and other numbers as digits. 4384 * This has been changed to conform to the java semantics. 4385 * @param ch code point to query 4386 * @return true if this code point is a digit 4387 * @stable ICU 2.1 4388 */ isDigit(int ch)4389 public static boolean isDigit(int ch) 4390 { 4391 return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER; 4392 } 4393 4394 /** 4395 * Determines if the specified code point is an ISO control character. 4396 * A code point is considered to be an ISO control character if it is in 4397 * the range \u0000 through \u001F or in the range \u007F through 4398 * \u009F.<br> 4399 * Up-to-date Unicode implementation of java.lang.Character.isISOControl() 4400 * @param ch code point to determine if it is an ISO control character 4401 * @return true if code point is a ISO control character 4402 * @stable ICU 2.1 4403 */ isISOControl(int ch)4404 public static boolean isISOControl(int ch) 4405 { 4406 return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ && 4407 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_)); 4408 } 4409 4410 /** 4411 * Determines if the specified code point is a letter. 4412 * Up-to-date Unicode implementation of java.lang.Character.isLetter() 4413 * @param ch code point to determine if it is a letter 4414 * @return true if code point is a letter 4415 * @stable ICU 2.1 4416 */ isLetter(int ch)4417 public static boolean isLetter(int ch) 4418 { 4419 // if props == 0, it will just fall through and return false 4420 return ((1 << getType(ch)) 4421 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4422 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4423 | (1 << UCharacterCategory.TITLECASE_LETTER) 4424 | (1 << UCharacterCategory.MODIFIER_LETTER) 4425 | (1 << UCharacterCategory.OTHER_LETTER))) != 0; 4426 } 4427 4428 /** 4429 * Determines if the specified code point is a letter or digit. 4430 * {@icunote} This method, unlike java.lang.Character does not regard the ascii 4431 * characters 'A' - 'Z' and 'a' - 'z' as digits. 4432 * @param ch code point to determine if it is a letter or a digit 4433 * @return true if code point is a letter or a digit 4434 * @stable ICU 2.1 4435 */ isLetterOrDigit(int ch)4436 public static boolean isLetterOrDigit(int ch) 4437 { 4438 return ((1 << getType(ch)) 4439 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4440 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4441 | (1 << UCharacterCategory.TITLECASE_LETTER) 4442 | (1 << UCharacterCategory.MODIFIER_LETTER) 4443 | (1 << UCharacterCategory.OTHER_LETTER) 4444 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0; 4445 } 4446 4447 /** 4448 * Compatibility override of Java deprecated method. This 4449 * method will always remain deprecated. Delegates to 4450 * java.lang.Character.isJavaIdentifierStart. 4451 * @param cp the code point 4452 * @return true if the code point can start a java identifier. 4453 * @deprecated ICU 3.4 (Java) 4454 */ 4455 @Deprecated isJavaLetter(int cp)4456 public static boolean isJavaLetter(int cp) { 4457 return isJavaIdentifierStart(cp); 4458 } 4459 4460 /** 4461 * Compatibility override of Java deprecated method. This 4462 * method will always remain deprecated. Delegates to 4463 * java.lang.Character.isJavaIdentifierPart. 4464 * @param cp the code point 4465 * @return true if the code point can continue a java identifier. 4466 * @deprecated ICU 3.4 (Java) 4467 */ 4468 @Deprecated isJavaLetterOrDigit(int cp)4469 public static boolean isJavaLetterOrDigit(int cp) { 4470 return isJavaIdentifierPart(cp); 4471 } 4472 4473 /** 4474 * Compatibility override of Java method, delegates to 4475 * java.lang.Character.isJavaIdentifierStart. 4476 * @param cp the code point 4477 * @return true if the code point can start a java identifier. 4478 * @stable ICU 3.4 4479 */ isJavaIdentifierStart(int cp)4480 public static boolean isJavaIdentifierStart(int cp) { 4481 // note, downcast to char for jdk 1.4 compatibility 4482 return java.lang.Character.isJavaIdentifierStart((char)cp); 4483 } 4484 4485 /** 4486 * Compatibility override of Java method, delegates to 4487 * java.lang.Character.isJavaIdentifierPart. 4488 * @param cp the code point 4489 * @return true if the code point can continue a java identifier. 4490 * @stable ICU 3.4 4491 */ isJavaIdentifierPart(int cp)4492 public static boolean isJavaIdentifierPart(int cp) { 4493 // note, downcast to char for jdk 1.4 compatibility 4494 return java.lang.Character.isJavaIdentifierPart((char)cp); 4495 } 4496 4497 /** 4498 * Determines if the specified code point is a lowercase character. 4499 * UnicodeData only contains case mappings for code points where they are 4500 * one-to-one mappings; it also omits information about context-sensitive 4501 * case mappings.<br> For more information about Unicode case mapping 4502 * please refer to the 4503 * <a href=https://www.unicode.org/reports/tr21/>Technical report 4504 * #21</a>.<br> 4505 * Up-to-date Unicode implementation of java.lang.Character.isLowerCase() 4506 * @param ch code point to determine if it is in lowercase 4507 * @return true if code point is a lowercase character 4508 * @stable ICU 2.1 4509 */ isLowerCase(int ch)4510 public static boolean isLowerCase(int ch) 4511 { 4512 // if props == 0, it will just fall through and return false 4513 return getType(ch) == UCharacterCategory.LOWERCASE_LETTER; 4514 } 4515 4516 /** 4517 * Determines if the specified code point is a white space character. 4518 * A code point is considered to be an whitespace character if and only 4519 * if it satisfies one of the following criteria: 4520 * <ul> 4521 * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not 4522 * also a non-breaking space (\u00A0 or \u2007 or \u202F). 4523 * <li> It is \u0009, HORIZONTAL TABULATION. 4524 * <li> It is \u000A, LINE FEED. 4525 * <li> It is \u000B, VERTICAL TABULATION. 4526 * <li> It is \u000C, FORM FEED. 4527 * <li> It is \u000D, CARRIAGE RETURN. 4528 * <li> It is \u001C, FILE SEPARATOR. 4529 * <li> It is \u001D, GROUP SEPARATOR. 4530 * <li> It is \u001E, RECORD SEPARATOR. 4531 * <li> It is \u001F, UNIT SEPARATOR. 4532 * </ul> 4533 * 4534 * This API tries to sync with the semantics of Java's 4535 * java.lang.Character.isWhitespace(), but it may not return 4536 * the exact same results because of the Unicode version 4537 * difference. 4538 * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) 4539 * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. 4540 * See http://www.unicode.org/versions/Unicode4.0.1/ 4541 * @param ch code point to determine if it is a white space 4542 * @return true if the specified code point is a white space character 4543 * @stable ICU 2.1 4544 */ isWhitespace(int ch)4545 public static boolean isWhitespace(int ch) 4546 { 4547 // exclude no-break spaces 4548 // if props == 0, it will just fall through and return false 4549 return ((1 << getType(ch)) & 4550 ((1 << UCharacterCategory.SPACE_SEPARATOR) 4551 | (1 << UCharacterCategory.LINE_SEPARATOR) 4552 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0 4553 && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_) 4554 // TAB VT LF FF CR FS GS RS US NL are all control characters 4555 // that are white spaces. 4556 || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f); 4557 } 4558 4559 /** 4560 * Determines if the specified code point is a Unicode specified space 4561 * character, i.e. if code point is in the category Zs, Zl and Zp. 4562 * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar(). 4563 * @param ch code point to determine if it is a space 4564 * @return true if the specified code point is a space character 4565 * @stable ICU 2.1 4566 */ isSpaceChar(int ch)4567 public static boolean isSpaceChar(int ch) 4568 { 4569 // if props == 0, it will just fall through and return false 4570 return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR) 4571 | (1 << UCharacterCategory.LINE_SEPARATOR) 4572 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) 4573 != 0; 4574 } 4575 4576 /** 4577 * Determines if the specified code point is a titlecase character. 4578 * UnicodeData only contains case mappings for code points where they are 4579 * one-to-one mappings; it also omits information about context-sensitive 4580 * case mappings.<br> 4581 * For more information about Unicode case mapping please refer to the 4582 * <a href=https://www.unicode.org/reports/tr21/> 4583 * Technical report #21</a>.<br> 4584 * Up-to-date Unicode implementation of java.lang.Character.isTitleCase(). 4585 * @param ch code point to determine if it is in title case 4586 * @return true if the specified code point is a titlecase character 4587 * @stable ICU 2.1 4588 */ isTitleCase(int ch)4589 public static boolean isTitleCase(int ch) 4590 { 4591 // if props == 0, it will just fall through and return false 4592 return getType(ch) == UCharacterCategory.TITLECASE_LETTER; 4593 } 4594 4595 /** 4596 * Determines if the specified character is permissible as a 4597 * non-initial character of an identifier 4598 * according to UAX #31 Unicode Identifier and Pattern Syntax. 4599 * 4600 * <p>Same as Unicode ID_Continue ({@link UProperty#ID_CONTINUE}). 4601 * 4602 * <p>Note that this differs from {@link java.lang.Character#isUnicodeIdentifierPart(char)} 4603 * which implements a different identifier profile. 4604 * 4605 * @param ch the code point to be tested 4606 * @return true if the code point may occur as a non-initial character of an identifier 4607 * @stable ICU 2.1 4608 */ isUnicodeIdentifierPart(int ch)4609 public static boolean isUnicodeIdentifierPart(int ch) 4610 { 4611 return hasBinaryProperty(ch, UProperty.ID_CONTINUE); // single code point 4612 } 4613 4614 /** 4615 * Determines if the specified character is permissible as the first character in an identifier 4616 * according to UAX #31 Unicode Identifier and Pattern Syntax. 4617 * 4618 * <p>Same as Unicode ID_Start ({@link UProperty#ID_START}). 4619 * 4620 * <p>Note that this differs from {@link java.lang.Character#isUnicodeIdentifierStart(char)} 4621 * which implements a different identifier profile. 4622 * 4623 * @param ch the code point to be tested 4624 * @return true if the code point may start an identifier 4625 * @stable ICU 2.1 4626 */ isUnicodeIdentifierStart(int ch)4627 public static boolean isUnicodeIdentifierStart(int ch) 4628 { 4629 return hasBinaryProperty(ch, UProperty.ID_START); // single code point 4630 } 4631 4632 /** 4633 * Does the set of Identifier_Type values code point c contain the given type? 4634 * 4635 * <p>Used for UTS #39 General Security Profile for Identifiers 4636 * (https://www.unicode.org/reports/tr39/#General_Security_Profile). 4637 * 4638 * <p>Each code point maps to a <i>set</i> of UIdentifierType values. 4639 * 4640 * @param c code point 4641 * @param type Identifier_Type to check 4642 * @return true if type is in Identifier_Type(c) 4643 * @draft ICU 75 4644 */ hasIdentifierType(int c, IdentifierType type)4645 public static final boolean hasIdentifierType(int c, IdentifierType type) { 4646 return UCharacterProperty.INSTANCE.hasIDType(c, type); 4647 } 4648 4649 /** 4650 * Writes code point c's Identifier_Type as a set of IdentifierType values and 4651 * returns the number of types. 4652 * The set is cleared before c's types are added. 4653 * 4654 * <p>Used for UTS #39 General Security Profile for Identifiers 4655 * (https://www.unicode.org/reports/tr39/#General_Security_Profile). 4656 * 4657 * <p>Each code point maps to a <i>set</i> of IdentifierType values. 4658 * There is always at least one type. 4659 * Only some of the types can be combined with others, 4660 * and usually only a small number of types occur together. 4661 * Future versions might add additional types. 4662 * See UTS #39 and its data files for details. 4663 * 4664 * @param c code point 4665 * @param types output set 4666 * @return number of values in c's Identifier_Type 4667 * @draft ICU 75 4668 */ getIdentifierTypes(int c, EnumSet<IdentifierType> types)4669 public static final int getIdentifierTypes(int c, EnumSet<IdentifierType> types) { 4670 return UCharacterProperty.INSTANCE.getIDTypes(c, types); 4671 } 4672 4673 /** 4674 * Determines if the specified code point should be regarded as an 4675 * ignorable character in a Java identifier. 4676 * A character is Java-identifier-ignorable if it has the general category 4677 * Cf Formatting Control, or it is a non-Java-whitespace ISO control: 4678 * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br> 4679 * Up-to-date Unicode implementation of 4680 * java.lang.Character.isIdentifierIgnorable().<br> 4681 * See <a href=https://www.unicode.org/reports/tr8/>UTR #8</a>. 4682 * <p>Note that Unicode just recommends to ignore Cf (format controls). 4683 * @param ch code point to be determined if it can be ignored in a Unicode 4684 * identifier. 4685 * @return true if the code point is ignorable 4686 * @stable ICU 2.1 4687 */ isIdentifierIgnorable(int ch)4688 public static boolean isIdentifierIgnorable(int ch) 4689 { 4690 // see java.lang.Character.isIdentifierIgnorable() on range of 4691 // ignorable characters. 4692 if (ch <= 0x9f) { 4693 return isISOControl(ch) 4694 && !((ch >= 0x9 && ch <= 0xd) 4695 || (ch >= 0x1c && ch <= 0x1f)); 4696 } 4697 return getType(ch) == UCharacterCategory.FORMAT; 4698 } 4699 4700 /** 4701 * Determines if the specified code point is an uppercase character. 4702 * UnicodeData only contains case mappings for code point where they are 4703 * one-to-one mappings; it also omits information about context-sensitive 4704 * case mappings.<br> 4705 * For language specific case conversion behavior, use 4706 * toUpperCase(locale, str). <br> 4707 * For example, the case conversion for dot-less i and dotted I in Turkish, 4708 * or for final sigma in Greek. 4709 * For more information about Unicode case mapping please refer to the 4710 * <a href=https://www.unicode.org/reports/tr21/> 4711 * Technical report #21</a>.<br> 4712 * Up-to-date Unicode implementation of java.lang.Character.isUpperCase(). 4713 * @param ch code point to determine if it is in uppercase 4714 * @return true if the code point is an uppercase character 4715 * @stable ICU 2.1 4716 */ isUpperCase(int ch)4717 public static boolean isUpperCase(int ch) 4718 { 4719 // if props == 0, it will just fall through and return false 4720 return getType(ch) == UCharacterCategory.UPPERCASE_LETTER; 4721 } 4722 4723 /** 4724 * The given code point is mapped to its lowercase equivalent; if the code 4725 * point has no lowercase equivalent, the code point itself is returned. 4726 * Up-to-date Unicode implementation of java.lang.Character.toLowerCase() 4727 * 4728 * <p>This function only returns the simple, single-code point case mapping. 4729 * Full case mappings should be used whenever possible because they produce 4730 * better results by working on whole strings. 4731 * They take into account the string context and the language and can map 4732 * to a result string with a different length as appropriate. 4733 * Full case mappings are applied by the case mapping functions 4734 * that take String parameters rather than code points (int). 4735 * See also the User Guide chapter on C/POSIX migration: 4736 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 4737 * 4738 * @param ch code point whose lowercase equivalent is to be retrieved 4739 * @return the lowercase equivalent code point 4740 * @stable ICU 2.1 4741 */ toLowerCase(int ch)4742 public static int toLowerCase(int ch) { 4743 return UCaseProps.INSTANCE.tolower(ch); 4744 } 4745 4746 /** 4747 * Converts argument code point and returns a String object representing 4748 * the code point's value in UTF-16 format. 4749 * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones. 4750 * 4751 * <p>Up-to-date Unicode implementation of java.lang.Character.toString(). 4752 * 4753 * @param ch code point 4754 * @return string representation of the code point, null if code point is not 4755 * defined in unicode 4756 * @stable ICU 2.1 4757 */ toString(int ch)4758 public static String toString(int ch) 4759 { 4760 if (ch < MIN_VALUE || ch > MAX_VALUE) { 4761 return null; 4762 } 4763 4764 if (ch < SUPPLEMENTARY_MIN_VALUE) { 4765 return String.valueOf((char)ch); 4766 } 4767 4768 return new String(Character.toChars(ch)); 4769 } 4770 4771 /** 4772 * Converts the code point argument to titlecase. 4773 * If no titlecase is available, the uppercase is returned. If no uppercase 4774 * is available, the code point itself is returned. 4775 * Up-to-date Unicode implementation of java.lang.Character.toTitleCase() 4776 * 4777 * <p>This function only returns the simple, single-code point case mapping. 4778 * Full case mappings should be used whenever possible because they produce 4779 * better results by working on whole strings. 4780 * They take into account the string context and the language and can map 4781 * to a result string with a different length as appropriate. 4782 * Full case mappings are applied by the case mapping functions 4783 * that take String parameters rather than code points (int). 4784 * See also the User Guide chapter on C/POSIX migration: 4785 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 4786 * 4787 * @param ch code point whose title case is to be retrieved 4788 * @return titlecase code point 4789 * @stable ICU 2.1 4790 */ toTitleCase(int ch)4791 public static int toTitleCase(int ch) { 4792 return UCaseProps.INSTANCE.totitle(ch); 4793 } 4794 4795 /** 4796 * Converts the character argument to uppercase. 4797 * If no uppercase is available, the character itself is returned. 4798 * Up-to-date Unicode implementation of java.lang.Character.toUpperCase() 4799 * 4800 * <p>This function only returns the simple, single-code point case mapping. 4801 * Full case mappings should be used whenever possible because they produce 4802 * better results by working on whole strings. 4803 * They take into account the string context and the language and can map 4804 * to a result string with a different length as appropriate. 4805 * Full case mappings are applied by the case mapping functions 4806 * that take String parameters rather than code points (int). 4807 * See also the User Guide chapter on C/POSIX migration: 4808 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 4809 * 4810 * @param ch code point whose uppercase is to be retrieved 4811 * @return uppercase code point 4812 * @stable ICU 2.1 4813 */ toUpperCase(int ch)4814 public static int toUpperCase(int ch) { 4815 return UCaseProps.INSTANCE.toupper(ch); 4816 } 4817 4818 // extra methods not in java.lang.Character -------------------------- 4819 4820 /** 4821 * {@icu} Determines if the code point is a supplementary character. 4822 * A code point is a supplementary character if and only if it is greater 4823 * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a> 4824 * @param ch code point to be determined if it is in the supplementary 4825 * plane 4826 * @return true if code point is a supplementary character 4827 * @stable ICU 2.1 4828 */ isSupplementary(int ch)4829 public static boolean isSupplementary(int ch) 4830 { 4831 return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE && 4832 ch <= UCharacter.MAX_VALUE; 4833 } 4834 4835 /** 4836 * {@icu} Determines if the code point is in the BMP plane. 4837 * @param ch code point to be determined if it is not a supplementary 4838 * character 4839 * @return true if code point is not a supplementary character 4840 * @stable ICU 2.1 4841 */ isBMP(int ch)4842 public static boolean isBMP(int ch) 4843 { 4844 return (ch >= 0 && ch <= LAST_CHAR_MASK_); 4845 } 4846 4847 /** 4848 * {@icu} Determines whether the specified code point is a printable character 4849 * according to the Unicode standard. 4850 * @param ch code point to be determined if it is printable 4851 * @return true if the code point is a printable character 4852 * @stable ICU 2.1 4853 */ isPrintable(int ch)4854 public static boolean isPrintable(int ch) 4855 { 4856 int cat = getType(ch); 4857 // if props == 0, it will just fall through and return false 4858 return (cat != UCharacterCategory.UNASSIGNED && 4859 cat != UCharacterCategory.CONTROL && 4860 cat != UCharacterCategory.FORMAT && 4861 cat != UCharacterCategory.PRIVATE_USE && 4862 cat != UCharacterCategory.SURROGATE && 4863 cat != UCharacterCategory.GENERAL_OTHER_TYPES); 4864 } 4865 4866 /** 4867 * {@icu} Determines whether the specified code point is of base form. 4868 * A code point of base form does not graphically combine with preceding 4869 * characters, and is neither a control nor a format character. 4870 * @param ch code point to be determined if it is of base form 4871 * @return true if the code point is of base form 4872 * @stable ICU 2.1 4873 */ isBaseForm(int ch)4874 public static boolean isBaseForm(int ch) 4875 { 4876 int cat = getType(ch); 4877 // if props == 0, it will just fall through and return false 4878 return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER || 4879 cat == UCharacterCategory.OTHER_NUMBER || 4880 cat == UCharacterCategory.LETTER_NUMBER || 4881 cat == UCharacterCategory.UPPERCASE_LETTER || 4882 cat == UCharacterCategory.LOWERCASE_LETTER || 4883 cat == UCharacterCategory.TITLECASE_LETTER || 4884 cat == UCharacterCategory.MODIFIER_LETTER || 4885 cat == UCharacterCategory.OTHER_LETTER || 4886 cat == UCharacterCategory.NON_SPACING_MARK || 4887 cat == UCharacterCategory.ENCLOSING_MARK || 4888 cat == UCharacterCategory.COMBINING_SPACING_MARK; 4889 } 4890 4891 /** 4892 * {@icu} Returns the Bidirection property of a code point. 4893 * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional 4894 * property.<br> 4895 * Result returned belongs to the interface 4896 * <a href=UCharacterDirection.html>UCharacterDirection</a> 4897 * @param ch the code point to be determined its direction 4898 * @return direction constant from UCharacterDirection. 4899 * @stable ICU 2.1 4900 */ getDirection(int ch)4901 public static int getDirection(int ch) 4902 { 4903 return UBiDiProps.INSTANCE.getClass(ch); 4904 } 4905 4906 /** 4907 * Determines whether the code point has the "mirrored" property. 4908 * This property is set for characters that are commonly used in 4909 * Right-To-Left contexts and need to be displayed with a "mirrored" 4910 * glyph. 4911 * @param ch code point whose mirror is to be determined 4912 * @return true if the code point has the "mirrored" property 4913 * @stable ICU 2.1 4914 */ isMirrored(int ch)4915 public static boolean isMirrored(int ch) 4916 { 4917 return UBiDiProps.INSTANCE.isMirrored(ch); 4918 } 4919 4920 /** 4921 * {@icu} Maps the specified code point to a "mirror-image" code point. 4922 * For code points with the "mirrored" property, implementations sometimes 4923 * need a "poor man's" mapping to another code point such that the default 4924 * glyph may serve as the mirror-image of the default glyph of the 4925 * specified code point.<br> 4926 * This is useful for text conversion to and from codepages with visual 4927 * order, and for displays without glyph selection capabilities. 4928 * @param ch code point whose mirror is to be retrieved 4929 * @return another code point that may serve as a mirror-image substitute, 4930 * or ch itself if there is no such mapping or ch does not have the 4931 * "mirrored" property 4932 * @stable ICU 2.1 4933 */ getMirror(int ch)4934 public static int getMirror(int ch) 4935 { 4936 return UBiDiProps.INSTANCE.getMirror(ch); 4937 } 4938 4939 /** 4940 * {@icu} Maps the specified character to its paired bracket character. 4941 * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int). 4942 * Otherwise c itself is returned. 4943 * See http://www.unicode.org/reports/tr9/ 4944 * 4945 * @param c the code point to be mapped 4946 * @return the paired bracket code point, 4947 * or c itself if there is no such mapping 4948 * (Bidi_Paired_Bracket_Type=None) 4949 * 4950 * @see UProperty#BIDI_PAIRED_BRACKET 4951 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 4952 * @see #getMirror(int) 4953 * @stable ICU 52 4954 */ getBidiPairedBracket(int c)4955 public static int getBidiPairedBracket(int c) { 4956 return UBiDiProps.INSTANCE.getPairedBracket(c); 4957 } 4958 4959 /** 4960 * {@icu} Returns the combining class of the argument codepoint 4961 * @param ch code point whose combining is to be retrieved 4962 * @return the combining class of the codepoint 4963 * @stable ICU 2.1 4964 */ getCombiningClass(int ch)4965 public static int getCombiningClass(int ch) 4966 { 4967 return Normalizer2.getNFDInstance().getCombiningClass(ch); 4968 } 4969 4970 /** 4971 * {@icu} A code point is illegal if and only if 4972 * <ul> 4973 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 4974 * <li> A surrogate value, 0xD800 to 0xDFFF 4975 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 4976 * </ul> 4977 * Note: legal does not mean that it is assigned in this version of Unicode. 4978 * @param ch code point to determine if it is a legal code point by itself 4979 * @return true if and only if legal. 4980 * @stable ICU 2.1 4981 */ isLegal(int ch)4982 public static boolean isLegal(int ch) 4983 { 4984 if (ch < MIN_VALUE) { 4985 return false; 4986 } 4987 if (ch < Character.MIN_SURROGATE) { 4988 return true; 4989 } 4990 if (ch <= Character.MAX_SURROGATE) { 4991 return false; 4992 } 4993 if (UCharacterUtility.isNonCharacter(ch)) { 4994 return false; 4995 } 4996 return (ch <= MAX_VALUE); 4997 } 4998 4999 /** 5000 * {@icu} A string is legal iff all its code points are legal. 5001 * A code point is illegal if and only if 5002 * <ul> 5003 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 5004 * <li> A surrogate value, 0xD800 to 0xDFFF 5005 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 5006 * </ul> 5007 * Note: legal does not mean that it is assigned in this version of Unicode. 5008 * @param str containing code points to examin 5009 * @return true if and only if legal. 5010 * @stable ICU 2.1 5011 */ isLegal(String str)5012 public static boolean isLegal(String str) 5013 { 5014 int size = str.length(); 5015 int codepoint; 5016 for (int i = 0; i < size; i += Character.charCount(codepoint)) 5017 { 5018 codepoint = str.codePointAt(i); 5019 if (!isLegal(codepoint)) { 5020 return false; 5021 } 5022 } 5023 return true; 5024 } 5025 5026 /** 5027 * {@icu} Returns the version of Unicode data used. 5028 * @return the unicode version number used 5029 * @stable ICU 2.1 5030 */ getUnicodeVersion()5031 public static VersionInfo getUnicodeVersion() 5032 { 5033 return UCharacterProperty.INSTANCE.m_unicodeVersion_; 5034 } 5035 5036 /** 5037 * {@icu} Returns the most current Unicode name of the argument code point, or 5038 * null if the character is unassigned or outside the range 5039 * {@code UCharacter.MIN_VALUE} and {@code UCharacter.MAX_VALUE} or does not 5040 * have a name. 5041 * <br> 5042 * Note calling any methods related to code point names, e.g. {@code getName()} 5043 * incurs a one-time initialization cost to construct the name tables. 5044 * @param ch the code point for which to get the name 5045 * @return most current Unicode name 5046 * @stable ICU 2.1 5047 */ getName(int ch)5048 public static String getName(int ch) 5049 { 5050 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); 5051 } 5052 5053 /** 5054 * {@icu} Returns the names for each of the characters in a string 5055 * @param s string to format 5056 * @param separator string to go between names 5057 * @return string of names 5058 * @stable ICU 3.8 5059 */ getName(String s, String separator)5060 public static String getName(String s, String separator) { 5061 if (s.length() == 1) { // handle common case 5062 return getName(s.charAt(0)); 5063 } 5064 int cp; 5065 StringBuilder sb = new StringBuilder(); 5066 for (int i = 0; i < s.length(); i += Character.charCount(cp)) { 5067 cp = s.codePointAt(i); 5068 if (i != 0) sb.append(separator); 5069 sb.append(UCharacter.getName(cp)); 5070 } 5071 return sb.toString(); 5072 } 5073 5074 /** 5075 * {@icu} Returns null. 5076 * Used to return the Unicode_1_Name property value which was of little practical value. 5077 * @param ch the code point for which to get the name 5078 * @return null 5079 * @deprecated ICU 49 5080 */ 5081 @Deprecated getName1_0(int ch)5082 public static String getName1_0(int ch) 5083 { 5084 return null; 5085 } 5086 5087 /** 5088 * {@icu} Returns a name for a valid codepoint. Unlike, getName(int) and 5089 * getName1_0(int), this method will return a name even for codepoints that 5090 * are not assigned a name in UnicodeData.txt. 5091 * 5092 * <p>The names are returned in the following order. 5093 * <ul> 5094 * <li> Most current Unicode name if there is any 5095 * <li> Unicode 1.0 name if there is any 5096 * <li> Extended name in the form of 5097 * "<codepoint_type-codepoint_hex_digits>". E.g., <noncharacter-fffe> 5098 * </ul> 5099 * Note calling any methods related to code point names, e.g. {@code getName()} 5100 * incurs a one-time initialization cost to construct the name tables. 5101 * @param ch the code point for which to get the name 5102 * @return a name for the argument codepoint 5103 * @stable ICU 2.6 5104 */ getExtendedName(int ch)5105 public static String getExtendedName(int ch) { 5106 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME); 5107 } 5108 5109 /** 5110 * {@icu} Returns the corrected name from NameAliases.txt if there is one. 5111 * Returns null if the character is unassigned or outside the range 5112 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 5113 * <br> 5114 * Note calling any methods related to code point names, e.g. {@code getName()} 5115 * incurs a one-time initialization cost to construct the name tables. 5116 * @param ch the code point for which to get the name alias 5117 * @return Unicode name alias, or null 5118 * @stable ICU 4.4 5119 */ getNameAlias(int ch)5120 public static String getNameAlias(int ch) 5121 { 5122 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS); 5123 } 5124 5125 /** 5126 * {@icu} Returns null. 5127 * Used to return the ISO 10646 comment for a character. 5128 * The Unicode ISO_Comment property is deprecated and has no values. 5129 * 5130 * @param ch The code point for which to get the ISO comment. 5131 * It must be the case that {@code 0 <= ch <= 0x10ffff}. 5132 * @return null 5133 * @deprecated ICU 49 5134 */ 5135 @Deprecated getISOComment(int ch)5136 public static String getISOComment(int ch) 5137 { 5138 return null; 5139 } 5140 5141 /** 5142 * {@icu} <p>Finds a Unicode code point by its most current Unicode name and 5143 * return its code point value. All Unicode names are in uppercase. 5144 * Note calling any methods related to code point names, e.g. {@code getName()} 5145 * incurs a one-time initialization cost to construct the name tables. 5146 * @param name most current Unicode character name whose code point is to 5147 * be returned 5148 * @return code point or -1 if name is not found 5149 * @stable ICU 2.1 5150 */ getCharFromName(String name)5151 public static int getCharFromName(String name){ 5152 return UCharacterName.INSTANCE.getCharFromName( 5153 UCharacterNameChoice.UNICODE_CHAR_NAME, name); 5154 } 5155 5156 /** 5157 * {@icu} Returns -1. 5158 * <p>Used to find a Unicode character by its version 1.0 Unicode name and return 5159 * its code point value. 5160 * @param name Unicode 1.0 code point name whose code point is to be 5161 * returned 5162 * @return -1 5163 * @deprecated ICU 49 5164 * @see #getName1_0(int) 5165 */ 5166 @Deprecated getCharFromName1_0(String name)5167 public static int getCharFromName1_0(String name){ 5168 return -1; 5169 } 5170 5171 /** 5172 * {@icu} <p>Find a Unicode character by either its name and return its code 5173 * point value. All Unicode names are in uppercase. 5174 * Extended names are all lowercase except for numbers and are contained 5175 * within angle brackets. 5176 * The names are searched in the following order 5177 * <ul> 5178 * <li> Most current Unicode name if there is any 5179 * <li> Unicode 1.0 name if there is any 5180 * <li> Extended name in the form of 5181 * "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-FFFE> 5182 * </ul> 5183 * Note calling any methods related to code point names, e.g. {@code getName()} 5184 * incurs a one-time initialization cost to construct the name tables. 5185 * @param name codepoint name 5186 * @return code point associated with the name or -1 if the name is not 5187 * found. 5188 * @stable ICU 2.6 5189 */ getCharFromExtendedName(String name)5190 public static int getCharFromExtendedName(String name){ 5191 return UCharacterName.INSTANCE.getCharFromName( 5192 UCharacterNameChoice.EXTENDED_CHAR_NAME, name); 5193 } 5194 5195 /** 5196 * {@icu} <p>Find a Unicode character by its corrected name alias and return 5197 * its code point value. All Unicode names are in uppercase. 5198 * Note calling any methods related to code point names, e.g. {@code getName()} 5199 * incurs a one-time initialization cost to construct the name tables. 5200 * @param name Unicode name alias whose code point is to be returned 5201 * @return code point or -1 if name is not found 5202 * @stable ICU 4.4 5203 */ getCharFromNameAlias(String name)5204 public static int getCharFromNameAlias(String name){ 5205 return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name); 5206 } 5207 5208 /** 5209 * {@icu} Return the Unicode name for a given property, as given in the 5210 * Unicode database file PropertyAliases.txt. Most properties 5211 * have more than one name. The nameChoice determines which one 5212 * is returned. 5213 * 5214 * In addition, this function maps the property 5215 * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" / 5216 * "General_Category_Mask". These names are not in 5217 * PropertyAliases.txt. 5218 * 5219 * @param property UProperty selector. 5220 * 5221 * @param nameChoice UProperty.NameChoice selector for which name 5222 * to get. All properties have a long name. Most have a short 5223 * name, but some do not. Unicode allows for additional names; if 5224 * present these will be returned by UProperty.NameChoice.LONG + i, 5225 * where i=1, 2,... 5226 * 5227 * @return a name, or null if Unicode explicitly defines no name 5228 * ("n/a") for a given property/nameChoice. If a given nameChoice 5229 * throws an exception, then all larger values of nameChoice will 5230 * throw an exception. If null is returned for a given 5231 * nameChoice, then other nameChoice values may return non-null 5232 * results. 5233 * 5234 * @exception IllegalArgumentException thrown if property or 5235 * nameChoice are invalid. 5236 * 5237 * @see UProperty 5238 * @see UProperty.NameChoice 5239 * @stable ICU 2.4 5240 */ getPropertyName(int property, int nameChoice)5241 public static String getPropertyName(int property, 5242 int nameChoice) { 5243 return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice); 5244 } 5245 5246 /** 5247 * {@icu} Return the UProperty selector for a given property name, as 5248 * specified in the Unicode database file PropertyAliases.txt. 5249 * Short, long, and any other variants are recognized. 5250 * 5251 * In addition, this function maps the synthetic names "gcm" / 5252 * "General_Category_Mask" to the property 5253 * UProperty.GENERAL_CATEGORY_MASK. These names are not in 5254 * PropertyAliases.txt. 5255 * 5256 * @param propertyAlias the property name to be matched. The name 5257 * is compared using "loose matching" as described in 5258 * PropertyAliases.txt. 5259 * 5260 * @return a UProperty enum. 5261 * 5262 * @exception IllegalArgumentException thrown if propertyAlias 5263 * is not recognized. 5264 * 5265 * @see UProperty 5266 * @stable ICU 2.4 5267 */ getPropertyEnum(CharSequence propertyAlias)5268 public static int getPropertyEnum(CharSequence propertyAlias) { 5269 int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias); 5270 if (propEnum == UProperty.UNDEFINED) { 5271 throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias); 5272 } 5273 return propEnum; 5274 } 5275 5276 /** 5277 * {@icu} Return the Unicode name for a given property value, as given in 5278 * the Unicode database file PropertyValueAliases.txt. Most 5279 * values have more than one name. The nameChoice determines 5280 * which one is returned. 5281 * 5282 * Note: Some of the names in PropertyValueAliases.txt can only be 5283 * retrieved using UProperty.GENERAL_CATEGORY_MASK, not 5284 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 5285 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 5286 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 5287 * 5288 * @param property UProperty selector constant. 5289 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 5290 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 5291 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 5292 * If out of range, null is returned. 5293 * 5294 * @param value selector for a value for the given property. In 5295 * general, valid values range from 0 up to some maximum. There 5296 * are a few exceptions: (1.) UProperty.BLOCK values begin at the 5297 * non-zero value BASIC_LATIN.getID(). (2.) 5298 * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous 5299 * and range from 0..240. (3.) UProperty.GENERAL_CATEGORY_MASK values 5300 * are mask values produced by left-shifting 1 by 5301 * UCharacter.getType(). This allows grouped categories such as 5302 * [:L:] to be represented. Mask values are non-contiguous. 5303 * 5304 * @param nameChoice UProperty.NameChoice selector for which name 5305 * to get. All values have a long name. Most have a short name, 5306 * but some do not. Unicode allows for additional names; if 5307 * present these will be returned by UProperty.NameChoice.LONG + i, 5308 * where i=1, 2,... 5309 * 5310 * @return a name, or null if Unicode explicitly defines no name 5311 * ("n/a") for a given property/value/nameChoice. If a given 5312 * nameChoice throws an exception, then all larger values of 5313 * nameChoice will throw an exception. If null is returned for a 5314 * given nameChoice, then other nameChoice values may return 5315 * non-null results. 5316 * 5317 * @exception IllegalArgumentException thrown if property, value, 5318 * or nameChoice are invalid. 5319 * 5320 * @see UProperty 5321 * @see UProperty.NameChoice 5322 * @stable ICU 2.4 5323 */ getPropertyValueName(int property, int value, int nameChoice)5324 public static String getPropertyValueName(int property, 5325 int value, 5326 int nameChoice) 5327 { 5328 if ((property == UProperty.CANONICAL_COMBINING_CLASS 5329 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS 5330 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) 5331 && value >= UCharacter.getIntPropertyMinValue( 5332 UProperty.CANONICAL_COMBINING_CLASS) 5333 && value <= UCharacter.getIntPropertyMaxValue( 5334 UProperty.CANONICAL_COMBINING_CLASS) 5335 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) { 5336 // this is hard coded for the valid cc 5337 // because PropertyValueAliases.txt does not contain all of them 5338 try { 5339 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, 5340 nameChoice); 5341 } 5342 catch (IllegalArgumentException e) { 5343 return null; 5344 } 5345 } 5346 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice); 5347 } 5348 5349 /** 5350 * {@icu} Return the property value integer for a given value name, as 5351 * specified in the Unicode database file PropertyValueAliases.txt. 5352 * Short, long, and any other variants are recognized. 5353 * 5354 * Note: Some of the names in PropertyValueAliases.txt will only be 5355 * recognized with UProperty.GENERAL_CATEGORY_MASK, not 5356 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 5357 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 5358 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 5359 * 5360 * @param property UProperty selector constant. 5361 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 5362 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 5363 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 5364 * Only these properties can be enumerated. 5365 * 5366 * @param valueAlias the value name to be matched. The name is 5367 * compared using "loose matching" as described in 5368 * PropertyValueAliases.txt. 5369 * 5370 * @return a value integer. Note: UProperty.GENERAL_CATEGORY 5371 * values are mask values produced by left-shifting 1 by 5372 * UCharacter.getType(). This allows grouped categories such as 5373 * [:L:] to be represented. 5374 * 5375 * @see UProperty 5376 * @throws IllegalArgumentException if property is not a valid UProperty 5377 * selector or valueAlias is not a value of this property 5378 * @stable ICU 2.4 5379 */ getPropertyValueEnum(int property, CharSequence valueAlias)5380 public static int getPropertyValueEnum(int property, CharSequence valueAlias) { 5381 int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias); 5382 if (propEnum == UProperty.UNDEFINED) { 5383 throw new IllegalIcuArgumentException("Invalid name: " + valueAlias); 5384 } 5385 return propEnum; 5386 } 5387 5388 /** 5389 * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED. 5390 * @param property Same as {@link #getPropertyValueEnum(int, CharSequence)} 5391 * @param valueAlias Same as {@link #getPropertyValueEnum(int, CharSequence)} 5392 * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value. 5393 * @internal 5394 * @deprecated This API is ICU internal only. 5395 */ 5396 @Deprecated getPropertyValueEnumNoThrow(int property, CharSequence valueAlias)5397 public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) { 5398 return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias); 5399 } 5400 5401 5402 /** 5403 * {@icu} Returns a code point corresponding to the two surrogate code units. 5404 * 5405 * @param lead the lead unit 5406 * (In ICU 2.1-69 the type of both parameters was <code>char</code>.) 5407 * @param trail the trail unit 5408 * @return code point if lead and trail form a valid surrogate pair. 5409 * @exception IllegalArgumentException thrown when the code units do 5410 * not form a valid surrogate pair 5411 * @stable ICU 70 5412 * @see #toCodePoint(int, int) 5413 */ getCodePoint(int lead, int trail)5414 public static int getCodePoint(int lead, int trail) 5415 { 5416 if (isHighSurrogate(lead) && isLowSurrogate(trail)) { 5417 return toCodePoint(lead, trail); 5418 } 5419 throw new IllegalArgumentException("Not a valid surrogate pair"); 5420 } 5421 5422 // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655 5423 /** 5424 * {@icu} Returns a code point corresponding to the two surrogate code units. 5425 * 5426 * @param lead the lead char 5427 * @param trail the trail char 5428 * @return code point if surrogate characters are valid. 5429 * @exception IllegalArgumentException thrown when the code units do 5430 * not form a valid code point 5431 * @stable ICU 2.1 5432 */ getCodePoint(char lead, char trail)5433 public static int getCodePoint(char lead, char trail) 5434 { 5435 return getCodePoint((int) lead, (int) trail); 5436 } 5437 // END Android patch: Keep the `char` version on Android. See ICU-21655 5438 5439 /** 5440 * {@icu} Returns the code point corresponding to the BMP code point. 5441 * 5442 * @param char16 the BMP code point 5443 * @return code point if argument is a valid character. 5444 * @exception IllegalArgumentException thrown when char16 is not a valid 5445 * code point 5446 * @stable ICU 2.1 5447 */ getCodePoint(char char16)5448 public static int getCodePoint(char char16) 5449 { 5450 if (UCharacter.isLegal(char16)) { 5451 return char16; 5452 } 5453 throw new IllegalArgumentException("Illegal codepoint"); 5454 } 5455 5456 /** 5457 * Returns the uppercase version of the argument string. 5458 * Casing is dependent on the default locale and context-sensitive. 5459 * @param str source string to be performed on 5460 * @return uppercase version of the argument string 5461 * @stable ICU 2.1 5462 */ toUpperCase(String str)5463 public static String toUpperCase(String str) 5464 { 5465 return CaseMapImpl.toUpper(getDefaultCaseLocale(), 0, str); 5466 } 5467 5468 /** 5469 * Returns the lowercase version of the argument string. 5470 * Casing is dependent on the default locale and context-sensitive 5471 * @param str source string to be performed on 5472 * @return lowercase version of the argument string 5473 * @stable ICU 2.1 5474 */ toLowerCase(String str)5475 public static String toLowerCase(String str) 5476 { 5477 return CaseMapImpl.toLower(getDefaultCaseLocale(), 0, str); 5478 } 5479 5480 /** 5481 * <p>Returns the titlecase version of the argument string. 5482 * <p>Position for titlecasing is determined by the argument break 5483 * iterator, hence the user can customize his break iterator for 5484 * a specialized titlecasing. In this case only the forward iteration 5485 * needs to be implemented. 5486 * If the break iterator passed in is null, the default Unicode algorithm 5487 * will be used to determine the titlecase positions. 5488 * 5489 * <p>Only positions returned by the break iterator will be title cased, 5490 * character in between the positions will all be in lower case. 5491 * <p>Casing is dependent on the default locale and context-sensitive 5492 * @param str source string to be performed on 5493 * @param breakiter break iterator to determine the positions in which 5494 * the character should be title cased. 5495 * @return titlecase version of the argument string 5496 * @stable ICU 2.6 5497 */ toTitleCase(String str, BreakIterator breakiter)5498 public static String toTitleCase(String str, BreakIterator breakiter) 5499 { 5500 return toTitleCase(Locale.getDefault(), str, breakiter, 0); 5501 } 5502 getDefaultCaseLocale()5503 private static int getDefaultCaseLocale() { 5504 return UCaseProps.getCaseLocale(Locale.getDefault()); 5505 } 5506 getCaseLocale(Locale locale)5507 private static int getCaseLocale(Locale locale) { 5508 if (locale == null) { 5509 locale = Locale.getDefault(); 5510 } 5511 return UCaseProps.getCaseLocale(locale); 5512 } 5513 getCaseLocale(ULocale locale)5514 private static int getCaseLocale(ULocale locale) { 5515 if (locale == null) { 5516 locale = ULocale.getDefault(); 5517 } 5518 return UCaseProps.getCaseLocale(locale); 5519 } 5520 5521 /** 5522 * Returns the uppercase version of the argument string. 5523 * Casing is dependent on the argument locale and context-sensitive. 5524 * @param locale which string is to be converted in 5525 * @param str source string to be performed on 5526 * @return uppercase version of the argument string 5527 * @stable ICU 2.1 5528 */ toUpperCase(Locale locale, String str)5529 public static String toUpperCase(Locale locale, String str) 5530 { 5531 return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str); 5532 } 5533 5534 /** 5535 * Returns the uppercase version of the argument string. 5536 * Casing is dependent on the argument locale and context-sensitive. 5537 * @param locale which string is to be converted in 5538 * @param str source string to be performed on 5539 * @return uppercase version of the argument string 5540 * @stable ICU 3.2 5541 */ toUpperCase(ULocale locale, String str)5542 public static String toUpperCase(ULocale locale, String str) { 5543 return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str); 5544 } 5545 5546 /** 5547 * Returns the lowercase version of the argument string. 5548 * Casing is dependent on the argument locale and context-sensitive 5549 * @param locale which string is to be converted in 5550 * @param str source string to be performed on 5551 * @return lowercase version of the argument string 5552 * @stable ICU 2.1 5553 */ toLowerCase(Locale locale, String str)5554 public static String toLowerCase(Locale locale, String str) 5555 { 5556 return CaseMapImpl.toLower(getCaseLocale(locale), 0, str); 5557 } 5558 5559 /** 5560 * Returns the lowercase version of the argument string. 5561 * Casing is dependent on the argument locale and context-sensitive 5562 * @param locale which string is to be converted in 5563 * @param str source string to be performed on 5564 * @return lowercase version of the argument string 5565 * @stable ICU 3.2 5566 */ toLowerCase(ULocale locale, String str)5567 public static String toLowerCase(ULocale locale, String str) { 5568 return CaseMapImpl.toLower(getCaseLocale(locale), 0, str); 5569 } 5570 5571 /** 5572 * <p>Returns the titlecase version of the argument string. 5573 * <p>Position for titlecasing is determined by the argument break 5574 * iterator, hence the user can customize his break iterator for 5575 * a specialized titlecasing. In this case only the forward iteration 5576 * needs to be implemented. 5577 * If the break iterator passed in is null, the default Unicode algorithm 5578 * will be used to determine the titlecase positions. 5579 * 5580 * <p>Only positions returned by the break iterator will be title cased, 5581 * character in between the positions will all be in lower case. 5582 * <p>Casing is dependent on the argument locale and context-sensitive 5583 * @param locale which string is to be converted in 5584 * @param str source string to be performed on 5585 * @param breakiter break iterator to determine the positions in which 5586 * the character should be title cased. 5587 * @return titlecase version of the argument string 5588 * @stable ICU 2.6 5589 */ toTitleCase(Locale locale, String str, BreakIterator breakiter)5590 public static String toTitleCase(Locale locale, String str, 5591 BreakIterator breakiter) 5592 { 5593 return toTitleCase(locale, str, breakiter, 0); 5594 } 5595 5596 /** 5597 * <p>Returns the titlecase version of the argument string. 5598 * <p>Position for titlecasing is determined by the argument break 5599 * iterator, hence the user can customize his break iterator for 5600 * a specialized titlecasing. In this case only the forward iteration 5601 * needs to be implemented. 5602 * If the break iterator passed in is null, the default Unicode algorithm 5603 * will be used to determine the titlecase positions. 5604 * 5605 * <p>Only positions returned by the break iterator will be title cased, 5606 * character in between the positions will all be in lower case. 5607 * <p>Casing is dependent on the argument locale and context-sensitive 5608 * @param locale which string is to be converted in 5609 * @param str source string to be performed on 5610 * @param titleIter break iterator to determine the positions in which 5611 * the character should be title cased. 5612 * @return titlecase version of the argument string 5613 * @stable ICU 3.2 5614 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter)5615 public static String toTitleCase(ULocale locale, String str, 5616 BreakIterator titleIter) { 5617 return toTitleCase(locale, str, titleIter, 0); 5618 } 5619 5620 /** 5621 * <p>Returns the titlecase version of the argument string. 5622 * <p>Position for titlecasing is determined by the argument break 5623 * iterator, hence the user can customize his break iterator for 5624 * a specialized titlecasing. In this case only the forward iteration 5625 * needs to be implemented. 5626 * If the break iterator passed in is null, the default Unicode algorithm 5627 * will be used to determine the titlecase positions. 5628 * 5629 * <p>Only positions returned by the break iterator will be title cased, 5630 * character in between the positions will all be in lower case. 5631 * <p>Casing is dependent on the argument locale and context-sensitive 5632 * @param locale which string is to be converted in 5633 * @param str source string to be performed on 5634 * @param titleIter break iterator to determine the positions in which 5635 * the character should be title cased. 5636 * @param options bit set to modify the titlecasing operation 5637 * @return titlecase version of the argument string 5638 * @stable ICU 3.8 5639 * @see #TITLECASE_NO_LOWERCASE 5640 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 5641 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)5642 public static String toTitleCase(ULocale locale, String str, 5643 BreakIterator titleIter, int options) { 5644 if (titleIter == null && locale == null) { 5645 locale = ULocale.getDefault(); 5646 } 5647 titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter); 5648 titleIter.setText(str); 5649 return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str); 5650 } 5651 5652 /** 5653 * {@icu} <p>Returns the titlecase version of the argument string. 5654 * <p>Position for titlecasing is determined by the argument break 5655 * iterator, hence the user can customize his break iterator for 5656 * a specialized titlecasing. In this case only the forward iteration 5657 * needs to be implemented. 5658 * If the break iterator passed in is null, the default Unicode algorithm 5659 * will be used to determine the titlecase positions. 5660 * 5661 * <p>Only positions returned by the break iterator will be title cased, 5662 * character in between the positions will all be in lower case. 5663 * <p>Casing is dependent on the argument locale and context-sensitive 5664 * @param locale which string is to be converted in 5665 * @param str source string to be performed on 5666 * @param titleIter break iterator to determine the positions in which 5667 * the character should be title cased. 5668 * @param options bit set to modify the titlecasing operation 5669 * @return titlecase version of the argument string 5670 * @see #TITLECASE_NO_LOWERCASE 5671 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 5672 * @stable ICU 54 5673 */ toTitleCase(Locale locale, String str, BreakIterator titleIter, int options)5674 public static String toTitleCase(Locale locale, String str, 5675 BreakIterator titleIter, 5676 int options) { 5677 if (titleIter == null && locale == null) { 5678 locale = Locale.getDefault(); 5679 } 5680 titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter); 5681 titleIter.setText(str); 5682 return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str); 5683 } 5684 5685 /** 5686 * {@icu} The given character is mapped to its case folding equivalent according 5687 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 5688 * folding equivalent, the character itself is returned. 5689 * 5690 * <p>This function only returns the simple, single-code point case mapping. 5691 * Full case mappings should be used whenever possible because they produce 5692 * better results by working on whole strings. 5693 * They can map to a result string with a different length as appropriate. 5694 * Full case mappings are applied by the case mapping functions 5695 * that take String parameters rather than code points (int). 5696 * See also the User Guide chapter on C/POSIX migration: 5697 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 5698 * 5699 * @param ch the character to be converted 5700 * @param defaultmapping Indicates whether the default mappings defined in 5701 * CaseFolding.txt are to be used, otherwise the 5702 * mappings for dotted I and dotless i marked with 5703 * 'T' in CaseFolding.txt are included. 5704 * @return the case folding equivalent of the character, if 5705 * any; otherwise the character itself. 5706 * @see #foldCase(String, boolean) 5707 * @stable ICU 2.1 5708 */ foldCase(int ch, boolean defaultmapping)5709 public static int foldCase(int ch, boolean defaultmapping) { 5710 return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 5711 } 5712 5713 /** 5714 * {@icu} The given string is mapped to its case folding equivalent according to 5715 * UnicodeData.txt and CaseFolding.txt; if any character has no case 5716 * folding equivalent, the character itself is returned. 5717 * "Full", multiple-code point case folding mappings are returned here. 5718 * For "simple" single-code point mappings use the API 5719 * foldCase(int ch, boolean defaultmapping). 5720 * @param str the String to be converted 5721 * @param defaultmapping Indicates whether the default mappings defined in 5722 * CaseFolding.txt are to be used, otherwise the 5723 * mappings for dotted I and dotless i marked with 5724 * 'T' in CaseFolding.txt are included. 5725 * @return the case folding equivalent of the character, if 5726 * any; otherwise the character itself. 5727 * @see #foldCase(int, boolean) 5728 * @stable ICU 2.1 5729 */ foldCase(String str, boolean defaultmapping)5730 public static String foldCase(String str, boolean defaultmapping) { 5731 return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 5732 } 5733 5734 /** 5735 * {@icu} Option value for case folding: use default mappings defined in 5736 * CaseFolding.txt. 5737 * @stable ICU 2.6 5738 */ 5739 public static final int FOLD_CASE_DEFAULT = 0x0000; 5740 /** 5741 * {@icu} Option value for case folding: 5742 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I 5743 * and dotless i appropriately for Turkic languages (tr, az). 5744 * 5745 * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that 5746 * are to be included for default mappings and 5747 * excluded for the Turkic-specific mappings. 5748 * 5749 * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that 5750 * are to be excluded for default mappings and 5751 * included for the Turkic-specific mappings. 5752 * 5753 * @stable ICU 2.6 5754 */ 5755 public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001; 5756 5757 /** 5758 * {@icu} The given character is mapped to its case folding equivalent according 5759 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 5760 * folding equivalent, the character itself is returned. 5761 * 5762 * <p>This function only returns the simple, single-code point case mapping. 5763 * Full case mappings should be used whenever possible because they produce 5764 * better results by working on whole strings. 5765 * They can map to a result string with a different length as appropriate. 5766 * Full case mappings are applied by the case mapping functions 5767 * that take String parameters rather than code points (int). 5768 * See also the User Guide chapter on C/POSIX migration: 5769 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 5770 * 5771 * @param ch the character to be converted 5772 * @param options A bit set for special processing. Currently the recognised options 5773 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 5774 * @return the case folding equivalent of the character, if any; otherwise the 5775 * character itself. 5776 * @see #foldCase(String, boolean) 5777 * @stable ICU 2.6 5778 */ foldCase(int ch, int options)5779 public static int foldCase(int ch, int options) { 5780 return UCaseProps.INSTANCE.fold(ch, options); 5781 } 5782 5783 /** 5784 * {@icu} The given string is mapped to its case folding equivalent according to 5785 * UnicodeData.txt and CaseFolding.txt; if any character has no case 5786 * folding equivalent, the character itself is returned. 5787 * "Full", multiple-code point case folding mappings are returned here. 5788 * For "simple" single-code point mappings use the API 5789 * foldCase(int ch, boolean defaultmapping). 5790 * @param str the String to be converted 5791 * @param options A bit set for special processing. Currently the recognised options 5792 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 5793 * @return the case folding equivalent of the character, if any; otherwise the 5794 * character itself. 5795 * @see #foldCase(int, boolean) 5796 * @stable ICU 2.6 5797 */ foldCase(String str, int options)5798 public static final String foldCase(String str, int options) { 5799 return CaseMapImpl.fold(options, str); 5800 } 5801 5802 /** 5803 * {@icu} Returns the numeric value of a Han character. 5804 * 5805 * <p>This returns the value of Han 'numeric' code points, 5806 * including those for zero, ten, hundred, thousand, ten thousand, 5807 * and hundred million. 5808 * This includes both the standard and 'checkwriting' 5809 * characters, the 'big circle' zero character, and the standard 5810 * zero character. 5811 * 5812 * <p>Note: The Unicode Standard has numeric values for more 5813 * Han characters recognized by this method 5814 * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt), 5815 * and a {@link com.ibm.icu.text.NumberFormat} can be used with 5816 * a Chinese {@link com.ibm.icu.text.NumberingSystem}. 5817 * 5818 * @param ch code point to query 5819 * @return value if it is a Han 'numeric character,' otherwise return -1. 5820 * @stable ICU 2.4 5821 */ getHanNumericValue(int ch)5822 public static int getHanNumericValue(int ch) 5823 { 5824 switch(ch) 5825 { 5826 case IDEOGRAPHIC_NUMBER_ZERO_ : 5827 case CJK_IDEOGRAPH_COMPLEX_ZERO_ : 5828 return 0; // Han Zero 5829 case CJK_IDEOGRAPH_FIRST_ : 5830 case CJK_IDEOGRAPH_COMPLEX_ONE_ : 5831 return 1; // Han One 5832 case CJK_IDEOGRAPH_SECOND_ : 5833 case CJK_IDEOGRAPH_COMPLEX_TWO_ : 5834 return 2; // Han Two 5835 case CJK_IDEOGRAPH_THIRD_ : 5836 case CJK_IDEOGRAPH_COMPLEX_THREE_ : 5837 return 3; // Han Three 5838 case CJK_IDEOGRAPH_FOURTH_ : 5839 case CJK_IDEOGRAPH_COMPLEX_FOUR_ : 5840 return 4; // Han Four 5841 case CJK_IDEOGRAPH_FIFTH_ : 5842 case CJK_IDEOGRAPH_COMPLEX_FIVE_ : 5843 return 5; // Han Five 5844 case CJK_IDEOGRAPH_SIXTH_ : 5845 case CJK_IDEOGRAPH_COMPLEX_SIX_ : 5846 return 6; // Han Six 5847 case CJK_IDEOGRAPH_SEVENTH_ : 5848 case CJK_IDEOGRAPH_COMPLEX_SEVEN_ : 5849 return 7; // Han Seven 5850 case CJK_IDEOGRAPH_EIGHTH_ : 5851 case CJK_IDEOGRAPH_COMPLEX_EIGHT_ : 5852 return 8; // Han Eight 5853 case CJK_IDEOGRAPH_NINETH_ : 5854 case CJK_IDEOGRAPH_COMPLEX_NINE_ : 5855 return 9; // Han Nine 5856 case CJK_IDEOGRAPH_TEN_ : 5857 case CJK_IDEOGRAPH_COMPLEX_TEN_ : 5858 return 10; 5859 case CJK_IDEOGRAPH_HUNDRED_ : 5860 case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ : 5861 return 100; 5862 case CJK_IDEOGRAPH_THOUSAND_ : 5863 case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ : 5864 return 1000; 5865 case CJK_IDEOGRAPH_TEN_THOUSAND_ : 5866 return 10000; 5867 case CJK_IDEOGRAPH_HUNDRED_MILLION_ : 5868 return 100000000; 5869 } 5870 return -1; // no value 5871 } 5872 5873 /** 5874 * {@icu} <p>Returns an iterator for character types, iterating over codepoints. 5875 * <p>Example of use:<br> 5876 * <pre> 5877 * RangeValueIterator iterator = UCharacter.getTypeIterator(); 5878 * RangeValueIterator.Element element = new RangeValueIterator.Element(); 5879 * while (iterator.next(element)) { 5880 * System.out.println("Codepoint \\u" + 5881 * Integer.toHexString(element.start) + 5882 * " to codepoint \\u" + 5883 * Integer.toHexString(element.limit - 1) + 5884 * " has the character type " + 5885 * element.value); 5886 * } 5887 * </pre> 5888 * @return an iterator 5889 * @stable ICU 2.6 5890 */ getTypeIterator()5891 public static RangeValueIterator getTypeIterator() 5892 { 5893 return new UCharacterTypeIterator(); 5894 } 5895 5896 private static final class UCharacterTypeIterator implements RangeValueIterator { UCharacterTypeIterator()5897 UCharacterTypeIterator() { 5898 reset(); 5899 } 5900 5901 // implements RangeValueIterator 5902 @Override next(Element element)5903 public boolean next(Element element) { 5904 if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 5905 element.start=range.startCodePoint; 5906 element.limit=range.endCodePoint+1; 5907 element.value=range.value; 5908 return true; 5909 } else { 5910 return false; 5911 } 5912 } 5913 5914 // implements RangeValueIterator 5915 @Override reset()5916 public void reset() { 5917 trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE); 5918 } 5919 5920 private Iterator<Trie2.Range> trieIterator; 5921 private Trie2.Range range; 5922 5923 private static final class MaskType implements Trie2.ValueMapper { 5924 // Extracts the general category ("character type") from the trie value. 5925 @Override map(int value)5926 public int map(int value) { 5927 return value & UCharacterProperty.TYPE_MASK; 5928 } 5929 } 5930 private static final MaskType MASK_TYPE=new MaskType(); 5931 } 5932 5933 /** 5934 * {@icu} <p>Returns an iterator for character names, iterating over codepoints. 5935 * <p>This API only gets the iterator for the modern, most up-to-date 5936 * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or 5937 * for extended names use getExtendedNameIterator(). 5938 * <p>Example of use:<br> 5939 * <pre> 5940 * ValueIterator iterator = UCharacter.getNameIterator(); 5941 * ValueIterator.Element element = new ValueIterator.Element(); 5942 * while (iterator.next(element)) { 5943 * System.out.println("Codepoint \\u" + 5944 * Integer.toHexString(element.codepoint) + 5945 * " has the name " + (String)element.value); 5946 * } 5947 * </pre> 5948 * <p>The maximal range which the name iterator iterates is from 5949 * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE. 5950 * @return an iterator 5951 * @stable ICU 2.6 5952 */ getNameIterator()5953 public static ValueIterator getNameIterator(){ 5954 return new UCharacterNameIterator(UCharacterName.INSTANCE, 5955 UCharacterNameChoice.UNICODE_CHAR_NAME); 5956 } 5957 5958 /** 5959 * {@icu} Returns an empty iterator. 5960 * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints. 5961 * @return an empty iterator 5962 * @deprecated ICU 49 5963 * @see #getName1_0(int) 5964 */ 5965 @Deprecated getName1_0Iterator()5966 public static ValueIterator getName1_0Iterator(){ 5967 return new DummyValueIterator(); 5968 } 5969 5970 private static final class DummyValueIterator implements ValueIterator { 5971 @Override next(Element element)5972 public boolean next(Element element) { return false; } 5973 @Override reset()5974 public void reset() {} 5975 @Override setRange(int start, int limit)5976 public void setRange(int start, int limit) {} 5977 } 5978 5979 /** 5980 * {@icu} <p>Returns an iterator for character names, iterating over codepoints. 5981 * <p>This API only gets the iterator for the extended names. 5982 * For modern, most up-to-date Unicode names use getNameIterator() or 5983 * for older 1.0 Unicode names use get1_0NameIterator(). 5984 * <p>Example of use:<br> 5985 * <pre> 5986 * ValueIterator iterator = UCharacter.getExtendedNameIterator(); 5987 * ValueIterator.Element element = new ValueIterator.Element(); 5988 * while (iterator.next(element)) { 5989 * System.out.println("Codepoint \\u" + 5990 * Integer.toHexString(element.codepoint) + 5991 * " has the name " + (String)element.value); 5992 * } 5993 * </pre> 5994 * <p>The maximal range which the name iterator iterates is from 5995 * @return an iterator 5996 * @stable ICU 2.6 5997 */ getExtendedNameIterator()5998 public static ValueIterator getExtendedNameIterator(){ 5999 return new UCharacterNameIterator(UCharacterName.INSTANCE, 6000 UCharacterNameChoice.EXTENDED_CHAR_NAME); 6001 } 6002 6003 /** 6004 * {@icu} Returns the "age" of the code point. 6005 * <p>The "age" is the Unicode version when the code point was first 6006 * designated (as a non-character or for Private Use) or assigned a 6007 * character. 6008 * <p>This can be useful to avoid emitting code points to receiving 6009 * processes that do not accept newer characters. 6010 * <p>The data is from the UCD file DerivedAge.txt. 6011 * @param ch The code point. 6012 * @return the Unicode version number 6013 * @stable ICU 2.6 6014 */ getAge(int ch)6015 public static VersionInfo getAge(int ch) 6016 { 6017 if (ch < MIN_VALUE || ch > MAX_VALUE) { 6018 throw new IllegalArgumentException("Codepoint out of bounds"); 6019 } 6020 return UCharacterProperty.INSTANCE.getAge(ch); 6021 } 6022 6023 /** 6024 * {@icu} Check a binary Unicode property for a code point. 6025 * <p>Unicode, especially in version 3.2, defines many more properties 6026 * than the original set in UnicodeData.txt. 6027 * <p>This API is intended to reflect Unicode properties as defined in 6028 * the Unicode Character Database (UCD) and Unicode Technical Reports 6029 * (UTR). 6030 * <p>For details about the properties see 6031 * <a href=http://www.unicode.org/>http://www.unicode.org/</a>. 6032 * <p>For names of Unicode properties see the UCD file 6033 * PropertyAliases.txt. 6034 * <p>This API does not check the validity of the codepoint. 6035 * <p>Important: If ICU is built with UCD files from Unicode versions 6036 * below 3.2, then properties marked with "new" are not or 6037 * not fully available. 6038 * @param ch code point to test. 6039 * @param property selector constant from com.ibm.icu.lang.UProperty, 6040 * identifies which binary property to check. 6041 * @return true or false according to the binary Unicode property value 6042 * for ch. Also false if property is out of bounds or if the 6043 * Unicode version does not have data for the property at all, or 6044 * not for this code point. 6045 * @see com.ibm.icu.lang.UProperty 6046 * @see CharacterProperties#getBinaryPropertySet(int) 6047 * @stable ICU 2.6 6048 */ hasBinaryProperty(int ch, int property)6049 public static boolean hasBinaryProperty(int ch, int property) 6050 { 6051 return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property); 6052 } 6053 6054 /** 6055 * {@icu} Returns true if the property is true for the string. 6056 * Same as {@link #hasBinaryProperty(int, int)} 6057 * if the string contains exactly one code point. 6058 * 6059 * <p>Most properties apply only to single code points. 6060 * <a href="https://www.unicode.org/reports/tr51/#Emoji_Sets">UTS #51 Unicode Emoji</a> 6061 * defines several properties of strings. 6062 * 6063 * @param s String to test. 6064 * @param property UProperty selector constant, identifies which binary property to check. 6065 * Must be BINARY_START<=which<BINARY_LIMIT. 6066 * @return true or false according to the binary Unicode property value for the string. 6067 * Also false if <code>property</code> is out of bounds or if the Unicode version 6068 * does not have data for the property at all. 6069 * 6070 * @see com.ibm.icu.lang.UProperty 6071 * @see CharacterProperties#getBinaryPropertySet(int) 6072 * @stable ICU 70 6073 */ hasBinaryProperty(CharSequence s, int property)6074 public static boolean hasBinaryProperty(CharSequence s, int property) { 6075 int length = s.length(); 6076 if (length == 1) { 6077 return hasBinaryProperty(s.charAt(0), property); // single code point 6078 } else if (length == 2) { 6079 // first code point 6080 int c = Character.codePointAt(s, 0); 6081 if (Character.charCount(c) == length) { 6082 return hasBinaryProperty(c, property); // single code point 6083 } 6084 } 6085 // Only call into EmojiProps for a relevant property, 6086 // so that we not unnecessarily try to load its data file. 6087 return UProperty.BASIC_EMOJI <= property && property <= UProperty.RGI_EMOJI && 6088 EmojiProps.INSTANCE.hasBinaryProperty(s, property); 6089 } 6090 6091 /** 6092 * {@icu} <p>Check if a code point has the Alphabetic Unicode property. 6093 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC). 6094 * <p>Different from UCharacter.isLetter(ch)! 6095 * @stable ICU 2.6 6096 * @param ch codepoint to be tested 6097 */ isUAlphabetic(int ch)6098 public static boolean isUAlphabetic(int ch) 6099 { 6100 return hasBinaryProperty(ch, UProperty.ALPHABETIC); 6101 } 6102 6103 /** 6104 * {@icu} <p>Check if a code point has the Lowercase Unicode property. 6105 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE). 6106 * <p>This is different from UCharacter.isLowerCase(ch)! 6107 * @param ch codepoint to be tested 6108 * @stable ICU 2.6 6109 */ isULowercase(int ch)6110 public static boolean isULowercase(int ch) 6111 { 6112 return hasBinaryProperty(ch, UProperty.LOWERCASE); 6113 } 6114 6115 /** 6116 * {@icu} <p>Check if a code point has the Uppercase Unicode property. 6117 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE). 6118 * <p>This is different from UCharacter.isUpperCase(ch)! 6119 * @param ch codepoint to be tested 6120 * @stable ICU 2.6 6121 */ isUUppercase(int ch)6122 public static boolean isUUppercase(int ch) 6123 { 6124 return hasBinaryProperty(ch, UProperty.UPPERCASE); 6125 } 6126 6127 /** 6128 * {@icu} <p>Check if a code point has the White_Space Unicode property. 6129 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE). 6130 * <p>This is different from both UCharacter.isSpace(ch) and 6131 * UCharacter.isWhitespace(ch)! 6132 * @param ch codepoint to be tested 6133 * @stable ICU 2.6 6134 */ isUWhiteSpace(int ch)6135 public static boolean isUWhiteSpace(int ch) 6136 { 6137 return hasBinaryProperty(ch, UProperty.WHITE_SPACE); 6138 } 6139 6140 /** 6141 * {@icu} Returns the property value for a Unicode property type of a code point. 6142 * Also returns binary and mask property values. 6143 * <p>Unicode, especially in version 3.2, defines many more properties than 6144 * the original set in UnicodeData.txt. 6145 * <p>The properties APIs are intended to reflect Unicode properties as 6146 * defined in the Unicode Character Database (UCD) and Unicode Technical 6147 * Reports (UTR). For details about the properties see 6148 * http://www.unicode.org/. 6149 * <p>For names of Unicode properties see the UCD file PropertyAliases.txt. 6150 * 6151 * <pre> 6152 * Sample usage: 6153 * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH); 6154 * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC); 6155 * boolean b = (ideo == 1) ? true : false; 6156 * </pre> 6157 * @param ch code point to test. 6158 * @param type UProperty selector constant, identifies which binary 6159 * property to check. Must be 6160 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 6161 * UProperty.INT_START <= type < UProperty.INT_LIMIT or 6162 * UProperty.MASK_START <= type < UProperty.MASK_LIMIT. 6163 * @return numeric value that is directly the property value or, 6164 * for enumerated properties, corresponds to the numeric value of 6165 * the enumerated constant of the respective property value type 6166 * ({@link ECharacterCategory}, {@link ECharacterDirection}, 6167 * {@link DecompositionType}, etc.). 6168 * Returns 0 or 1 (for false / true) for binary Unicode properties. 6169 * Returns a bit-mask for mask properties. 6170 * Returns 0 if 'type' is out of bounds or if the Unicode version 6171 * does not have data for the property at all, or not for this code 6172 * point. 6173 * @see UProperty 6174 * @see #hasBinaryProperty 6175 * @see #getIntPropertyMinValue 6176 * @see #getIntPropertyMaxValue 6177 * @see CharacterProperties#getIntPropertyMap(int) 6178 * @see #getUnicodeVersion 6179 * @stable ICU 2.4 6180 */ getIntPropertyValue(int ch, int type)6181 public static int getIntPropertyValue(int ch, int type) 6182 { 6183 return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type); 6184 } 6185 /** 6186 * {@icu} Returns a string version of the property value. 6187 * @param propertyEnum The property enum value. 6188 * @param codepoint The codepoint value. 6189 * @param nameChoice The choice of the name. 6190 * @return value as string 6191 * @internal 6192 * @deprecated This API is ICU internal only. 6193 */ 6194 @Deprecated 6195 ///CLOVER:OFF getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice)6196 public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) { 6197 if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) || 6198 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) { 6199 return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum), 6200 nameChoice); 6201 } 6202 if (propertyEnum == UProperty.NUMERIC_VALUE) { 6203 return String.valueOf(getUnicodeNumericValue(codepoint)); 6204 } 6205 // otherwise must be string property 6206 switch (propertyEnum) { 6207 case UProperty.AGE: return getAge(codepoint).toString(); 6208 case UProperty.ISO_COMMENT: return getISOComment(codepoint); 6209 case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint)); 6210 case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true)); 6211 case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 6212 case UProperty.NAME: return getName(codepoint); 6213 case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true)); 6214 case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 6215 case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 6216 case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 6217 case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 6218 case UProperty.UNICODE_1_NAME: return getName1_0(codepoint); 6219 case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 6220 } 6221 throw new IllegalArgumentException("Illegal Property Enum"); 6222 } 6223 ///CLOVER:ON 6224 6225 /** 6226 * {@icu} Returns the minimum value for an integer/binary Unicode property type. 6227 * Can be used together with UCharacter.getIntPropertyMaxValue(int) 6228 * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar. 6229 * @param type UProperty selector constant, identifies which binary 6230 * property to check. Must be 6231 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 6232 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 6233 * @return Minimum value returned by UCharacter.getIntPropertyValue(int) 6234 * for a Unicode property. 0 if the property 6235 * selector 'type' is out of range. 6236 * @see UProperty 6237 * @see #hasBinaryProperty 6238 * @see #getUnicodeVersion 6239 * @see #getIntPropertyMaxValue 6240 * @see #getIntPropertyValue 6241 * @stable ICU 2.4 6242 */ getIntPropertyMinValue(int type)6243 public static int getIntPropertyMinValue(int type){ 6244 6245 return 0; // undefined; and: all other properties have a minimum value of 0 6246 } 6247 6248 6249 /** 6250 * {@icu} Returns the maximum value for an integer/binary Unicode property. 6251 * Can be used together with UCharacter.getIntPropertyMinValue(int) 6252 * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar. 6253 * Examples for min/max values (for Unicode 3.2): 6254 * <ul> 6255 * <li> UProperty.BIDI_CLASS: 0/18 6256 * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL) 6257 * <li> UProperty.SCRIPT: 0/45 (UScript.COMMON/UScript.TAGBANWA) 6258 * <li> UProperty.IDEOGRAPHIC: 0/1 (false/true) 6259 * </ul> 6260 * For undefined UProperty constant values, min/max values will be 0/-1. 6261 * @param type UProperty selector constant, identifies which binary 6262 * property to check. Must be 6263 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 6264 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 6265 * @return Maximum value returned by u_getIntPropertyValue for a Unicode 6266 * property. <= 0 if the property selector 'type' is out of range. 6267 * @see UProperty 6268 * @see #hasBinaryProperty 6269 * @see #getUnicodeVersion 6270 * @see #getIntPropertyMaxValue 6271 * @see #getIntPropertyValue 6272 * @stable ICU 2.4 6273 */ getIntPropertyMaxValue(int type)6274 public static int getIntPropertyMaxValue(int type) 6275 { 6276 return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type); 6277 } 6278 6279 /** 6280 * Provide the java.lang.Character forDigit API, for convenience. 6281 * @stable ICU 3.0 6282 */ forDigit(int digit, int radix)6283 public static char forDigit(int digit, int radix) { 6284 return java.lang.Character.forDigit(digit, radix); 6285 } 6286 6287 // JDK 1.5 API coverage 6288 6289 /** 6290 * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}. 6291 * 6292 * @stable ICU 3.0 6293 */ 6294 public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE; 6295 6296 /** 6297 * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}. 6298 * 6299 * @stable ICU 3.0 6300 */ 6301 public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE; 6302 6303 /** 6304 * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}. 6305 * 6306 * @stable ICU 3.0 6307 */ 6308 public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE; 6309 6310 /** 6311 * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}. 6312 * 6313 * @stable ICU 3.0 6314 */ 6315 public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE; 6316 6317 /** 6318 * Constant U+D800, same as {@link Character#MIN_SURROGATE}. 6319 * 6320 * @stable ICU 3.0 6321 */ 6322 public static final char MIN_SURROGATE = Character.MIN_SURROGATE; 6323 6324 /** 6325 * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}. 6326 * 6327 * @stable ICU 3.0 6328 */ 6329 public static final char MAX_SURROGATE = Character.MAX_SURROGATE; 6330 6331 /** 6332 * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 6333 * 6334 * @stable ICU 3.0 6335 */ 6336 public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT; 6337 6338 /** 6339 * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}. 6340 * 6341 * @stable ICU 3.0 6342 */ 6343 public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT; 6344 6345 /** 6346 * Constant U+0000, same as {@link Character#MIN_CODE_POINT}. 6347 * 6348 * @stable ICU 3.0 6349 */ 6350 public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT; 6351 6352 /** 6353 * Equivalent to {@link Character#isValidCodePoint}. 6354 * 6355 * @param cp the code point to check 6356 * @return true if cp is a valid code point 6357 * @stable ICU 3.0 6358 */ isValidCodePoint(int cp)6359 public static final boolean isValidCodePoint(int cp) { 6360 return cp >= 0 && cp <= MAX_CODE_POINT; 6361 } 6362 6363 /** 6364 * Same as {@link Character#isSupplementaryCodePoint}. 6365 * 6366 * @param cp the code point to check 6367 * @return true if cp is a supplementary code point 6368 * @stable ICU 3.0 6369 */ isSupplementaryCodePoint(int cp)6370 public static final boolean isSupplementaryCodePoint(int cp) { 6371 return Character.isSupplementaryCodePoint(cp); 6372 } 6373 6374 /** 6375 * Same as {@link Character#isHighSurrogate}, 6376 * except that the ICU version accepts <code>int</code> for code points. 6377 * 6378 * @param codePoint the code point to check 6379 * (In ICU 3.0-69 the type of this parameter was <code>char</code>.) 6380 * @return true if codePoint is a high (lead) surrogate 6381 * @stable ICU 70 6382 */ isHighSurrogate(int codePoint)6383 public static boolean isHighSurrogate(int codePoint) { 6384 return (codePoint & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS; 6385 } 6386 6387 // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655 6388 /** 6389 * Same as {@link Character#isHighSurrogate}, 6390 * 6391 * @param ch the char to check 6392 * @return true if ch is a high (lead) surrogate 6393 * @stable ICU 3.0 6394 */ isHighSurrogate(char ch)6395 public static boolean isHighSurrogate(char ch) { 6396 return isHighSurrogate((int) ch); 6397 } 6398 // END Android patch: Keep the `char` version on Android. See ICU-21655 6399 6400 /** 6401 * Same as {@link Character#isLowSurrogate}, 6402 * except that the ICU version accepts <code>int</code> for code points. 6403 * 6404 * @param codePoint the code point to check 6405 * (In ICU 3.0-69 the type of this parameter was <code>char</code>.) 6406 * @return true if codePoint is a low (trail) surrogate 6407 * @stable ICU 70 6408 */ isLowSurrogate(int codePoint)6409 public static boolean isLowSurrogate(int codePoint) { 6410 return (codePoint & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS; 6411 } 6412 6413 // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655 6414 /** 6415 * Same as {@link Character#isLowSurrogate}, 6416 * 6417 * @param ch the char to check 6418 * @return true if ch is a low (trail) surrogate 6419 * @stable ICU 3.0 6420 */ isLowSurrogate(char ch)6421 public static boolean isLowSurrogate(char ch) { 6422 return isLowSurrogate((int) ch); 6423 } 6424 // END Android patch: Keep the `char` version on Android. See ICU-21655 6425 6426 /** 6427 * Same as {@link Character#isSurrogatePair}, 6428 * except that the ICU version accepts <code>int</code> for code points. 6429 * 6430 * @param high the high (lead) unit 6431 * (In ICU 3.0-69 the type of both parameters was <code>char</code>.) 6432 * @param low the low (trail) unit 6433 * @return true if high, low form a surrogate pair 6434 * @stable ICU 70 6435 */ isSurrogatePair(int high, int low)6436 public static final boolean isSurrogatePair(int high, int low) { 6437 return isHighSurrogate(high) && isLowSurrogate(low); 6438 } 6439 6440 // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655 6441 /** 6442 * Same as {@link Character#isSurrogatePair}. 6443 * 6444 * @param high the high (lead) char 6445 * @param low the low (trail) char 6446 * @return true if high, low form a surrogate pair 6447 * @stable ICU 3.0 6448 */ isSurrogatePair(char high, char low)6449 public static final boolean isSurrogatePair(char high, char low) { 6450 return isSurrogatePair((int) high, (int) low); 6451 } 6452 // END Android patch: Keep the `char` version on Android. See ICU-21655 6453 6454 /** 6455 * Same as {@link Character#charCount}. 6456 * Returns the number of chars needed to represent the code point (1 or 2). 6457 * This does not check the code point for validity. 6458 * 6459 * @param cp the code point to check 6460 * @return the number of chars needed to represent the code point 6461 * @stable ICU 3.0 6462 */ charCount(int cp)6463 public static int charCount(int cp) { 6464 return Character.charCount(cp); 6465 } 6466 6467 /** 6468 * Same as {@link Character#toCodePoint}, 6469 * except that the ICU version accepts <code>int</code> for code points. 6470 * Returns the code point represented by the two surrogate code units. 6471 * This does not check the surrogate pair for validity. 6472 * 6473 * @param high the high (lead) surrogate 6474 * (In ICU 3.0-69 the type of both parameters was <code>char</code>.) 6475 * @param low the low (trail) surrogate 6476 * @return the code point formed by the surrogate pair 6477 * @stable ICU 70 6478 * @see #getCodePoint(int, int) 6479 */ toCodePoint(int high, int low)6480 public static final int toCodePoint(int high, int low) { 6481 // see ICU4C U16_GET_SUPPLEMENTARY() 6482 return (high << 10) + low - U16_SURROGATE_OFFSET; 6483 } 6484 6485 // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655 6486 /** 6487 * Same as {@link Character#toCodePoint}. 6488 * Returns the code point represented by the two surrogate code units. 6489 * This does not check the surrogate pair for validity. 6490 * 6491 * @param high the high (lead) surrogate 6492 * @param low the low (trail) surrogate 6493 * @return the code point formed by the surrogate pair 6494 * @stable ICU 3.0 6495 */ toCodePoint(char high, char low)6496 public static final int toCodePoint(char high, char low) { 6497 return toCodePoint((int) high, (int) low); 6498 } 6499 // END Android patch: Keep the `char` version on Android. See ICU-21655 6500 6501 /** 6502 * Same as {@link Character#codePointAt(CharSequence, int)}. 6503 * Returns the code point at index. 6504 * This examines only the characters at index and index+1. 6505 * 6506 * @param seq the characters to check 6507 * @param index the index of the first or only char forming the code point 6508 * @return the code point at the index 6509 * @stable ICU 3.0 6510 */ codePointAt(CharSequence seq, int index)6511 public static final int codePointAt(CharSequence seq, int index) { 6512 char c1 = seq.charAt(index++); 6513 if (isHighSurrogate(c1)) { 6514 if (index < seq.length()) { 6515 char c2 = seq.charAt(index); 6516 if (isLowSurrogate(c2)) { 6517 return toCodePoint(c1, c2); 6518 } 6519 } 6520 } 6521 return c1; 6522 } 6523 6524 /** 6525 * Same as {@link Character#codePointAt(char[], int)}. 6526 * Returns the code point at index. 6527 * This examines only the characters at index and index+1. 6528 * 6529 * @param text the characters to check 6530 * @param index the index of the first or only char forming the code point 6531 * @return the code point at the index 6532 * @stable ICU 3.0 6533 */ codePointAt(char[] text, int index)6534 public static final int codePointAt(char[] text, int index) { 6535 char c1 = text[index++]; 6536 if (isHighSurrogate(c1)) { 6537 if (index < text.length) { 6538 char c2 = text[index]; 6539 if (isLowSurrogate(c2)) { 6540 return toCodePoint(c1, c2); 6541 } 6542 } 6543 } 6544 return c1; 6545 } 6546 6547 /** 6548 * Same as {@link Character#codePointAt(char[], int, int)}. 6549 * Returns the code point at index. 6550 * This examines only the characters at index and index+1. 6551 * 6552 * @param text the characters to check 6553 * @param index the index of the first or only char forming the code point 6554 * @param limit the limit of the valid text 6555 * @return the code point at the index 6556 * @stable ICU 3.0 6557 */ codePointAt(char[] text, int index, int limit)6558 public static final int codePointAt(char[] text, int index, int limit) { 6559 if (index >= limit || limit > text.length) { 6560 throw new IndexOutOfBoundsException(); 6561 } 6562 char c1 = text[index++]; 6563 if (isHighSurrogate(c1)) { 6564 if (index < limit) { 6565 char c2 = text[index]; 6566 if (isLowSurrogate(c2)) { 6567 return toCodePoint(c1, c2); 6568 } 6569 } 6570 } 6571 return c1; 6572 } 6573 6574 /** 6575 * Same as {@link Character#codePointBefore(CharSequence, int)}. 6576 * Return the code point before index. 6577 * This examines only the characters at index-1 and index-2. 6578 * 6579 * @param seq the characters to check 6580 * @param index the index after the last or only char forming the code point 6581 * @return the code point before the index 6582 * @stable ICU 3.0 6583 */ codePointBefore(CharSequence seq, int index)6584 public static final int codePointBefore(CharSequence seq, int index) { 6585 char c2 = seq.charAt(--index); 6586 if (isLowSurrogate(c2)) { 6587 if (index > 0) { 6588 char c1 = seq.charAt(--index); 6589 if (isHighSurrogate(c1)) { 6590 return toCodePoint(c1, c2); 6591 } 6592 } 6593 } 6594 return c2; 6595 } 6596 6597 /** 6598 * Same as {@link Character#codePointBefore(char[], int)}. 6599 * Returns the code point before index. 6600 * This examines only the characters at index-1 and index-2. 6601 * 6602 * @param text the characters to check 6603 * @param index the index after the last or only char forming the code point 6604 * @return the code point before the index 6605 * @stable ICU 3.0 6606 */ codePointBefore(char[] text, int index)6607 public static final int codePointBefore(char[] text, int index) { 6608 char c2 = text[--index]; 6609 if (isLowSurrogate(c2)) { 6610 if (index > 0) { 6611 char c1 = text[--index]; 6612 if (isHighSurrogate(c1)) { 6613 return toCodePoint(c1, c2); 6614 } 6615 } 6616 } 6617 return c2; 6618 } 6619 6620 /** 6621 * Same as {@link Character#codePointBefore(char[], int, int)}. 6622 * Return the code point before index. 6623 * This examines only the characters at index-1 and index-2. 6624 * 6625 * @param text the characters to check 6626 * @param index the index after the last or only char forming the code point 6627 * @param limit the start of the valid text 6628 * @return the code point before the index 6629 * @stable ICU 3.0 6630 */ codePointBefore(char[] text, int index, int limit)6631 public static final int codePointBefore(char[] text, int index, int limit) { 6632 if (index <= limit || limit < 0) { 6633 throw new IndexOutOfBoundsException(); 6634 } 6635 char c2 = text[--index]; 6636 if (isLowSurrogate(c2)) { 6637 if (index > limit) { 6638 char c1 = text[--index]; 6639 if (isHighSurrogate(c1)) { 6640 return toCodePoint(c1, c2); 6641 } 6642 } 6643 } 6644 return c2; 6645 } 6646 6647 /** 6648 * Same as {@link Character#toChars(int, char[], int)}. 6649 * Writes the chars representing the 6650 * code point into the destination at the given index. 6651 * 6652 * @param cp the code point to convert 6653 * @param dst the destination array into which to put the char(s) representing the code point 6654 * @param dstIndex the index at which to put the first (or only) char 6655 * @return the count of the number of chars written (1 or 2) 6656 * @throws IllegalArgumentException if cp is not a valid code point 6657 * @stable ICU 3.0 6658 */ toChars(int cp, char[] dst, int dstIndex)6659 public static final int toChars(int cp, char[] dst, int dstIndex) { 6660 return Character.toChars(cp, dst, dstIndex); 6661 } 6662 6663 /** 6664 * Same as {@link Character#toChars(int)}. 6665 * Returns a char array representing the code point. 6666 * 6667 * @param cp the code point to convert 6668 * @return an array containing the char(s) representing the code point 6669 * @throws IllegalArgumentException if cp is not a valid code point 6670 * @stable ICU 3.0 6671 */ toChars(int cp)6672 public static final char[] toChars(int cp) { 6673 return Character.toChars(cp); 6674 } 6675 6676 /** 6677 * Equivalent to the {@link Character#getDirectionality(char)} method, for 6678 * convenience. Returns a byte representing the directionality of the 6679 * character. 6680 * 6681 * {@icunote} Unlike {@link Character#getDirectionality(char)}, this returns 6682 * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters. 6683 * 6684 * {@icunote} The return value must be tested using the constants defined in {@link 6685 * UCharacterDirection} and its interface {@link 6686 * UCharacterEnums.ECharacterDirection} since the values are different from the ones 6687 * defined by <code>java.lang.Character</code>. 6688 * @param cp the code point to check 6689 * @return the directionality of the code point 6690 * @see #getDirection 6691 * @stable ICU 3.0 6692 */ getDirectionality(int cp)6693 public static byte getDirectionality(int cp) 6694 { 6695 return (byte)getDirection(cp); 6696 } 6697 6698 /** 6699 * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)} 6700 * method, for convenience. Counts the number of code points in the range 6701 * of text. 6702 * @param text the characters to check 6703 * @param start the start of the range 6704 * @param limit the limit of the range 6705 * @return the number of code points in the range 6706 * @stable ICU 3.0 6707 */ codePointCount(CharSequence text, int start, int limit)6708 public static int codePointCount(CharSequence text, int start, int limit) { 6709 if (start < 0 || limit < start || limit > text.length()) { 6710 throw new IndexOutOfBoundsException("start (" + start + 6711 ") or limit (" + limit + 6712 ") invalid or out of range 0, " + text.length()); 6713 } 6714 6715 int len = limit - start; 6716 while (limit > start) { 6717 char ch = text.charAt(--limit); 6718 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 6719 ch = text.charAt(--limit); 6720 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 6721 --len; 6722 break; 6723 } 6724 } 6725 } 6726 return len; 6727 } 6728 6729 /** 6730 * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for 6731 * convenience. Counts the number of code points in the range of text. 6732 * @param text the characters to check 6733 * @param start the start of the range 6734 * @param limit the limit of the range 6735 * @return the number of code points in the range 6736 * @stable ICU 3.0 6737 */ codePointCount(char[] text, int start, int limit)6738 public static int codePointCount(char[] text, int start, int limit) { 6739 if (start < 0 || limit < start || limit > text.length) { 6740 throw new IndexOutOfBoundsException("start (" + start + 6741 ") or limit (" + limit + 6742 ") invalid or out of range 0, " + text.length); 6743 } 6744 6745 int len = limit - start; 6746 while (limit > start) { 6747 char ch = text[--limit]; 6748 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 6749 ch = text[--limit]; 6750 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 6751 --len; 6752 break; 6753 } 6754 } 6755 } 6756 return len; 6757 } 6758 6759 /** 6760 * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)} 6761 * method, for convenience. Adjusts the char index by a code point offset. 6762 * @param text the characters to check 6763 * @param index the index to adjust 6764 * @param codePointOffset the number of code points by which to offset the index 6765 * @return the adjusted index 6766 * @stable ICU 3.0 6767 */ offsetByCodePoints(CharSequence text, int index, int codePointOffset)6768 public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) { 6769 if (index < 0 || index > text.length()) { 6770 throw new IndexOutOfBoundsException("index ( " + index + 6771 ") out of range 0, " + text.length()); 6772 } 6773 6774 if (codePointOffset < 0) { 6775 while (++codePointOffset <= 0) { 6776 char ch = text.charAt(--index); 6777 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) { 6778 ch = text.charAt(--index); 6779 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 6780 if (++codePointOffset > 0) { 6781 return index+1; 6782 } 6783 } 6784 } 6785 } 6786 } else { 6787 int limit = text.length(); 6788 while (--codePointOffset >= 0) { 6789 char ch = text.charAt(index++); 6790 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 6791 ch = text.charAt(index++); 6792 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 6793 if (--codePointOffset < 0) { 6794 return index-1; 6795 } 6796 } 6797 } 6798 } 6799 } 6800 6801 return index; 6802 } 6803 6804 /** 6805 * Equivalent to the 6806 * {@link Character#offsetByCodePoints(char[], int, int, int, int)} 6807 * method, for convenience. Adjusts the char index by a code point offset. 6808 * @param text the characters to check 6809 * @param start the start of the range to check 6810 * @param count the length of the range to check 6811 * @param index the index to adjust 6812 * @param codePointOffset the number of code points by which to offset the index 6813 * @return the adjusted index 6814 * @stable ICU 3.0 6815 */ offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset)6816 public static int offsetByCodePoints(char[] text, int start, int count, int index, 6817 int codePointOffset) { 6818 int limit = start + count; 6819 if (start < 0 || limit < start || limit > text.length || index < start || index > limit) { 6820 throw new IndexOutOfBoundsException("index ( " + index + 6821 ") out of range " + start + 6822 ", " + limit + 6823 " in array 0, " + text.length); 6824 } 6825 6826 if (codePointOffset < 0) { 6827 while (++codePointOffset <= 0) { 6828 char ch = text[--index]; 6829 if (index < start) { 6830 throw new IndexOutOfBoundsException("index ( " + index + 6831 ") < start (" + start + 6832 ")"); 6833 } 6834 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) { 6835 ch = text[--index]; 6836 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 6837 if (++codePointOffset > 0) { 6838 return index+1; 6839 } 6840 } 6841 } 6842 } 6843 } else { 6844 while (--codePointOffset >= 0) { 6845 char ch = text[index++]; 6846 if (index > limit) { 6847 throw new IndexOutOfBoundsException("index ( " + index + 6848 ") > limit (" + limit + 6849 ")"); 6850 } 6851 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 6852 ch = text[index++]; 6853 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 6854 if (--codePointOffset < 0) { 6855 return index-1; 6856 } 6857 } 6858 } 6859 } 6860 } 6861 6862 return index; 6863 } 6864 6865 // private variables ------------------------------------------------- 6866 6867 /** 6868 * To get the last character out from a data type 6869 */ 6870 private static final int LAST_CHAR_MASK_ = 0xFFFF; 6871 6872 // /** 6873 // * To get the last byte out from a data type 6874 // */ 6875 // private static final int LAST_BYTE_MASK_ = 0xFF; 6876 // 6877 // /** 6878 // * Shift 16 bits 6879 // */ 6880 // private static final int SHIFT_16_ = 16; 6881 // 6882 // /** 6883 // * Shift 24 bits 6884 // */ 6885 // private static final int SHIFT_24_ = 24; 6886 // 6887 // /** 6888 // * Decimal radix 6889 // */ 6890 // private static final int DECIMAL_RADIX_ = 10; 6891 6892 /** 6893 * No break space code point 6894 */ 6895 private static final int NO_BREAK_SPACE_ = 0xA0; 6896 6897 /** 6898 * Figure space code point 6899 */ 6900 private static final int FIGURE_SPACE_ = 0x2007; 6901 6902 /** 6903 * Narrow no break space code point 6904 */ 6905 private static final int NARROW_NO_BREAK_SPACE_ = 0x202F; 6906 6907 /** 6908 * Ideographic number zero code point 6909 */ 6910 private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007; 6911 6912 /** 6913 * CJK Ideograph, First code point 6914 */ 6915 private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00; 6916 6917 /** 6918 * CJK Ideograph, Second code point 6919 */ 6920 private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c; 6921 6922 /** 6923 * CJK Ideograph, Third code point 6924 */ 6925 private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09; 6926 6927 /** 6928 * CJK Ideograph, Fourth code point 6929 */ 6930 private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db; 6931 6932 /** 6933 * CJK Ideograph, FIFTH code point 6934 */ 6935 private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94; 6936 6937 /** 6938 * CJK Ideograph, Sixth code point 6939 */ 6940 private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d; 6941 6942 /** 6943 * CJK Ideograph, Seventh code point 6944 */ 6945 private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03; 6946 6947 /** 6948 * CJK Ideograph, Eighth code point 6949 */ 6950 private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b; 6951 6952 /** 6953 * CJK Ideograph, Nineth code point 6954 */ 6955 private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d; 6956 6957 /** 6958 * Application Program command code point 6959 */ 6960 private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F; 6961 6962 /** 6963 * Unit separator code point 6964 */ 6965 private static final int UNIT_SEPARATOR_ = 0x001F; 6966 6967 /** 6968 * Delete code point 6969 */ 6970 private static final int DELETE_ = 0x007F; 6971 6972 /** 6973 * Han digit characters 6974 */ 6975 private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_ = 0x96f6; 6976 private static final int CJK_IDEOGRAPH_COMPLEX_ONE_ = 0x58f9; 6977 private static final int CJK_IDEOGRAPH_COMPLEX_TWO_ = 0x8cb3; 6978 private static final int CJK_IDEOGRAPH_COMPLEX_THREE_ = 0x53c3; 6979 private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_ = 0x8086; 6980 private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_ = 0x4f0d; 6981 private static final int CJK_IDEOGRAPH_COMPLEX_SIX_ = 0x9678; 6982 private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_ = 0x67d2; 6983 private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_ = 0x634c; 6984 private static final int CJK_IDEOGRAPH_COMPLEX_NINE_ = 0x7396; 6985 private static final int CJK_IDEOGRAPH_TEN_ = 0x5341; 6986 private static final int CJK_IDEOGRAPH_COMPLEX_TEN_ = 0x62fe; 6987 private static final int CJK_IDEOGRAPH_HUNDRED_ = 0x767e; 6988 private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_ = 0x4f70; 6989 private static final int CJK_IDEOGRAPH_THOUSAND_ = 0x5343; 6990 private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf; 6991 private static final int CJK_IDEOGRAPH_TEN_THOUSAND_ = 0x824c; 6992 private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_ = 0x5104; 6993 6994 // private constructor ----------------------------------------------- 6995 ///CLOVER:OFF 6996 /** 6997 * Private constructor to prevent instantiation 6998 */ UCharacter()6999 private UCharacter() 7000 { 7001 } 7002 ///CLOVER:ON 7003 } 7004