1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /** 4 ******************************************************************************* 5 * Copyright (C) 1996-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.lang; 11 12 import java.lang.ref.SoftReference; 13 import java.util.HashMap; 14 import java.util.Iterator; 15 import java.util.Locale; 16 import java.util.Map; 17 18 import com.ibm.icu.impl.CaseMapImpl; 19 import com.ibm.icu.impl.EmojiProps; 20 import com.ibm.icu.impl.IllegalIcuArgumentException; 21 import com.ibm.icu.impl.Trie2; 22 import com.ibm.icu.impl.UBiDiProps; 23 import com.ibm.icu.impl.UCaseProps; 24 import com.ibm.icu.impl.UCharacterName; 25 import com.ibm.icu.impl.UCharacterNameChoice; 26 import com.ibm.icu.impl.UCharacterProperty; 27 import com.ibm.icu.impl.UCharacterUtility; 28 import com.ibm.icu.impl.UPropertyAliases; 29 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; 30 import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection; 31 import com.ibm.icu.text.BreakIterator; 32 import com.ibm.icu.text.Normalizer2; 33 import com.ibm.icu.util.RangeValueIterator; 34 import com.ibm.icu.util.ULocale; 35 import com.ibm.icu.util.ValueIterator; 36 import com.ibm.icu.util.VersionInfo; 37 38 /** 39 * {@icuenhanced java.lang.Character}.{@icu _usage_} 40 * 41 * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class. 42 * These extensions provide support for more Unicode properties. 43 * Each ICU release supports the latest version of Unicode available at that time. 44 * 45 * <p>For some time before Java 5 added support for supplementary Unicode code points, 46 * The ICU UCharacter class and many other ICU classes already supported them. 47 * Some UCharacter methods and constants were widened slightly differently than 48 * how the Character class methods and constants were widened later. 49 * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF, 50 * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF. 51 * 52 * <p>Code points are represented in these API using ints. While it would be 53 * more convenient in Java to have a separate primitive datatype for them, 54 * ints suffice in the meantime. 55 * 56 * <p>To use this class please add the jar file name icu4j.jar to the 57 * class path, since it contains data files which supply the information used 58 * by this file.<br> 59 * E.g. In Windows <br> 60 * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br> 61 * Otherwise, another method would be to copy the files uprops.dat and 62 * unames.icu from the icu4j source subdirectory 63 * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory 64 * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>. 65 * 66 * <p>Aside from the additions for UTF-16 support, and the updated Unicode 67 * properties, the main differences between UCharacter and Character are: 68 * <ul> 69 * <li> UCharacter is not designed to be a char wrapper and does not have 70 * APIs to which involves management of that single char.<br> 71 * These include: 72 * <ul> 73 * <li> char charValue(), 74 * <li> int compareTo(java.lang.Character, java.lang.Character), etc. 75 * </ul> 76 * <li> UCharacter does not include Character APIs that are deprecated, nor 77 * does it include the Java-specific character information, such as 78 * boolean isJavaIdentifierPart(char ch). 79 * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric 80 * values '10' - '35'. UCharacter also does this in digit and 81 * getNumericValue, to adhere to the java semantics of these 82 * methods. New methods unicodeDigit, and 83 * getUnicodeNumericValue do not treat the above code points 84 * as having numeric values. This is a semantic change from ICU4J 1.3.1. 85 * </ul> 86 * <p> 87 * Further detail on differences can be determined using the program 88 * <a href= 89 * "https://github.com/unicode-org/icu/blob/main/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java"> 90 * com.ibm.icu.dev.test.lang.UCharacterCompare</a> 91 * <p> 92 * In addition to Java compatibility functions, which calculate derived properties, 93 * this API provides low-level access to the Unicode Character Database. 94 * <p> 95 * Unicode assigns each code point (not just assigned character) values for 96 * many properties. 97 * Most of them are simple boolean flags, or constants from a small enumerated list. 98 * For some properties, values are strings or other relatively more complex types. 99 * <p> 100 * For more information see 101 * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a> 102 * (http://www.unicode.org/ucd/) 103 * and the <a href="https://unicode-org.github.io/icu/userguide/strings/properties">ICU 104 * User Guide chapter on Properties</a> 105 * (https://unicode-org.github.io/icu/userguide/strings/properties). 106 * <p> 107 * There are also functions that provide easy migration from C/POSIX functions 108 * like isblank(). Their use is generally discouraged because the C/POSIX 109 * standards do not define their semantics beyond the ASCII range, which means 110 * that different implementations exhibit very different behavior. 111 * Instead, Unicode properties should be used directly. 112 * <p> 113 * There are also only a few, broad C/POSIX character classes, and they tend 114 * to be used for conflicting purposes. For example, the "isalpha()" class 115 * is sometimes used to determine word boundaries, while a more sophisticated 116 * approach would at least distinguish initial letters from continuation 117 * characters (the latter including combining marks). 118 * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) 119 * Another example: There is no "istitle()" class for titlecase characters. 120 * <p> 121 * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. 122 * ICU implements them according to the Standard Recommendations in 123 * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions 124 * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). 125 * <p> 126 * API access for C/POSIX character classes is as follows: 127 * <pre>{@code 128 * - alpha: isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC) 129 * - lower: isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE) 130 * - upper: isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE) 131 * - punct: ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)| 132 * (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)| 133 * (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0 134 * - digit: isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER 135 * - xdigit: hasBinaryProperty(c, UProperty.POSIX_XDIGIT) 136 * - alnum: hasBinaryProperty(c, UProperty.POSIX_ALNUM) 137 * - space: isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE) 138 * - blank: hasBinaryProperty(c, UProperty.POSIX_BLANK) 139 * - cntrl: getType(c)==CONTROL 140 * - graph: hasBinaryProperty(c, UProperty.POSIX_GRAPH) 141 * - print: hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre> 142 * <p> 143 * The C/POSIX character classes are also available in UnicodeSet patterns, 144 * using patterns like [:graph:] or \p{graph}. 145 * 146 * <p>{@icunote} There are several ICU (and Java) whitespace functions. 147 * Comparison:<ul> 148 * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; 149 * most of general categories "Z" (separators) + most whitespace ISO controls 150 * (including no-break spaces, but excluding IS1..IS4) 151 * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces 152 * <li> isSpaceChar: just Z (including no-break spaces)</ul> 153 * 154 * <p> 155 * This class is not subclassable. 156 * 157 * @author Syn Wee Quek 158 * @stable ICU 2.1 159 * @see com.ibm.icu.lang.UCharacterEnums 160 */ 161 162 public final class UCharacter implements ECharacterCategory, ECharacterDirection 163 { 164 /** 165 * Lead surrogate bitmask 166 */ 167 private static final int LEAD_SURROGATE_BITMASK = 0xFFFFFC00; 168 169 /** 170 * Trail surrogate bitmask 171 */ 172 private static final int TRAIL_SURROGATE_BITMASK = 0xFFFFFC00; 173 174 /** 175 * Lead surrogate bits 176 */ 177 private static final int LEAD_SURROGATE_BITS = 0xD800; 178 179 /** 180 * Trail surrogate bits 181 */ 182 private static final int TRAIL_SURROGATE_BITS = 0xDC00; 183 184 private static final int U16_SURROGATE_OFFSET = ((0xd800 << 10) + 0xdc00 - 0x10000); 185 186 // public inner classes ---------------------------------------------- 187 188 /** 189 * {@icuenhanced java.lang.Character.UnicodeBlock}.{@icu _usage_} 190 * 191 * A family of character subsets representing the character blocks in the 192 * Unicode specification, generated from Unicode Data file Blocks.txt. 193 * Character blocks generally define characters used for a specific script 194 * or purpose. A character is contained by at most one Unicode block. 195 * 196 * {@icunote} All fields named XXX_ID are specific to ICU. 197 * 198 * @stable ICU 2.4 199 */ 200 public static final class UnicodeBlock extends Character.Subset 201 { 202 // block id corresponding to icu4c ----------------------------------- 203 204 /** 205 * @stable ICU 2.4 206 */ 207 public static final int INVALID_CODE_ID = -1; 208 /** 209 * @stable ICU 2.4 210 */ 211 public static final int BASIC_LATIN_ID = 1; 212 /** 213 * @stable ICU 2.4 214 */ 215 public static final int LATIN_1_SUPPLEMENT_ID = 2; 216 /** 217 * @stable ICU 2.4 218 */ 219 public static final int LATIN_EXTENDED_A_ID = 3; 220 /** 221 * @stable ICU 2.4 222 */ 223 public static final int LATIN_EXTENDED_B_ID = 4; 224 /** 225 * @stable ICU 2.4 226 */ 227 public static final int IPA_EXTENSIONS_ID = 5; 228 /** 229 * @stable ICU 2.4 230 */ 231 public static final int SPACING_MODIFIER_LETTERS_ID = 6; 232 /** 233 * @stable ICU 2.4 234 */ 235 public static final int COMBINING_DIACRITICAL_MARKS_ID = 7; 236 /** 237 * Unicode 3.2 renames this block to "Greek and Coptic". 238 * @stable ICU 2.4 239 */ 240 public static final int GREEK_ID = 8; 241 /** 242 * @stable ICU 2.4 243 */ 244 public static final int CYRILLIC_ID = 9; 245 /** 246 * @stable ICU 2.4 247 */ 248 public static final int ARMENIAN_ID = 10; 249 /** 250 * @stable ICU 2.4 251 */ 252 public static final int HEBREW_ID = 11; 253 /** 254 * @stable ICU 2.4 255 */ 256 public static final int ARABIC_ID = 12; 257 /** 258 * @stable ICU 2.4 259 */ 260 public static final int SYRIAC_ID = 13; 261 /** 262 * @stable ICU 2.4 263 */ 264 public static final int THAANA_ID = 14; 265 /** 266 * @stable ICU 2.4 267 */ 268 public static final int DEVANAGARI_ID = 15; 269 /** 270 * @stable ICU 2.4 271 */ 272 public static final int BENGALI_ID = 16; 273 /** 274 * @stable ICU 2.4 275 */ 276 public static final int GURMUKHI_ID = 17; 277 /** 278 * @stable ICU 2.4 279 */ 280 public static final int GUJARATI_ID = 18; 281 /** 282 * @stable ICU 2.4 283 */ 284 public static final int ORIYA_ID = 19; 285 /** 286 * @stable ICU 2.4 287 */ 288 public static final int TAMIL_ID = 20; 289 /** 290 * @stable ICU 2.4 291 */ 292 public static final int TELUGU_ID = 21; 293 /** 294 * @stable ICU 2.4 295 */ 296 public static final int KANNADA_ID = 22; 297 /** 298 * @stable ICU 2.4 299 */ 300 public static final int MALAYALAM_ID = 23; 301 /** 302 * @stable ICU 2.4 303 */ 304 public static final int SINHALA_ID = 24; 305 /** 306 * @stable ICU 2.4 307 */ 308 public static final int THAI_ID = 25; 309 /** 310 * @stable ICU 2.4 311 */ 312 public static final int LAO_ID = 26; 313 /** 314 * @stable ICU 2.4 315 */ 316 public static final int TIBETAN_ID = 27; 317 /** 318 * @stable ICU 2.4 319 */ 320 public static final int MYANMAR_ID = 28; 321 /** 322 * @stable ICU 2.4 323 */ 324 public static final int GEORGIAN_ID = 29; 325 /** 326 * @stable ICU 2.4 327 */ 328 public static final int HANGUL_JAMO_ID = 30; 329 /** 330 * @stable ICU 2.4 331 */ 332 public static final int ETHIOPIC_ID = 31; 333 /** 334 * @stable ICU 2.4 335 */ 336 public static final int CHEROKEE_ID = 32; 337 /** 338 * @stable ICU 2.4 339 */ 340 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33; 341 /** 342 * @stable ICU 2.4 343 */ 344 public static final int OGHAM_ID = 34; 345 /** 346 * @stable ICU 2.4 347 */ 348 public static final int RUNIC_ID = 35; 349 /** 350 * @stable ICU 2.4 351 */ 352 public static final int KHMER_ID = 36; 353 /** 354 * @stable ICU 2.4 355 */ 356 public static final int MONGOLIAN_ID = 37; 357 /** 358 * @stable ICU 2.4 359 */ 360 public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38; 361 /** 362 * @stable ICU 2.4 363 */ 364 public static final int GREEK_EXTENDED_ID = 39; 365 /** 366 * @stable ICU 2.4 367 */ 368 public static final int GENERAL_PUNCTUATION_ID = 40; 369 /** 370 * @stable ICU 2.4 371 */ 372 public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41; 373 /** 374 * @stable ICU 2.4 375 */ 376 public static final int CURRENCY_SYMBOLS_ID = 42; 377 /** 378 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 379 * Symbols". 380 * @stable ICU 2.4 381 */ 382 public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43; 383 /** 384 * @stable ICU 2.4 385 */ 386 public static final int LETTERLIKE_SYMBOLS_ID = 44; 387 /** 388 * @stable ICU 2.4 389 */ 390 public static final int NUMBER_FORMS_ID = 45; 391 /** 392 * @stable ICU 2.4 393 */ 394 public static final int ARROWS_ID = 46; 395 /** 396 * @stable ICU 2.4 397 */ 398 public static final int MATHEMATICAL_OPERATORS_ID = 47; 399 /** 400 * @stable ICU 2.4 401 */ 402 public static final int MISCELLANEOUS_TECHNICAL_ID = 48; 403 /** 404 * @stable ICU 2.4 405 */ 406 public static final int CONTROL_PICTURES_ID = 49; 407 /** 408 * @stable ICU 2.4 409 */ 410 public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50; 411 /** 412 * @stable ICU 2.4 413 */ 414 public static final int ENCLOSED_ALPHANUMERICS_ID = 51; 415 /** 416 * @stable ICU 2.4 417 */ 418 public static final int BOX_DRAWING_ID = 52; 419 /** 420 * @stable ICU 2.4 421 */ 422 public static final int BLOCK_ELEMENTS_ID = 53; 423 /** 424 * @stable ICU 2.4 425 */ 426 public static final int GEOMETRIC_SHAPES_ID = 54; 427 /** 428 * @stable ICU 2.4 429 */ 430 public static final int MISCELLANEOUS_SYMBOLS_ID = 55; 431 /** 432 * @stable ICU 2.4 433 */ 434 public static final int DINGBATS_ID = 56; 435 /** 436 * @stable ICU 2.4 437 */ 438 public static final int BRAILLE_PATTERNS_ID = 57; 439 /** 440 * @stable ICU 2.4 441 */ 442 public static final int CJK_RADICALS_SUPPLEMENT_ID = 58; 443 /** 444 * @stable ICU 2.4 445 */ 446 public static final int KANGXI_RADICALS_ID = 59; 447 /** 448 * @stable ICU 2.4 449 */ 450 public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60; 451 /** 452 * @stable ICU 2.4 453 */ 454 public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61; 455 /** 456 * @stable ICU 2.4 457 */ 458 public static final int HIRAGANA_ID = 62; 459 /** 460 * @stable ICU 2.4 461 */ 462 public static final int KATAKANA_ID = 63; 463 /** 464 * @stable ICU 2.4 465 */ 466 public static final int BOPOMOFO_ID = 64; 467 /** 468 * @stable ICU 2.4 469 */ 470 public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65; 471 /** 472 * @stable ICU 2.4 473 */ 474 public static final int KANBUN_ID = 66; 475 /** 476 * @stable ICU 2.4 477 */ 478 public static final int BOPOMOFO_EXTENDED_ID = 67; 479 /** 480 * @stable ICU 2.4 481 */ 482 public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68; 483 /** 484 * @stable ICU 2.4 485 */ 486 public static final int CJK_COMPATIBILITY_ID = 69; 487 /** 488 * @stable ICU 2.4 489 */ 490 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70; 491 /** 492 * @stable ICU 2.4 493 */ 494 public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71; 495 /** 496 * @stable ICU 2.4 497 */ 498 public static final int YI_SYLLABLES_ID = 72; 499 /** 500 * @stable ICU 2.4 501 */ 502 public static final int YI_RADICALS_ID = 73; 503 /** 504 * @stable ICU 2.4 505 */ 506 public static final int HANGUL_SYLLABLES_ID = 74; 507 /** 508 * @stable ICU 2.4 509 */ 510 public static final int HIGH_SURROGATES_ID = 75; 511 /** 512 * @stable ICU 2.4 513 */ 514 public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76; 515 /** 516 * @stable ICU 2.4 517 */ 518 public static final int LOW_SURROGATES_ID = 77; 519 /** 520 * Same as public static final int PRIVATE_USE. 521 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 522 * and multiple code point ranges had this block. 523 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 524 * and adds separate blocks for the supplementary PUAs. 525 * @stable ICU 2.4 526 */ 527 public static final int PRIVATE_USE_AREA_ID = 78; 528 /** 529 * Same as public static final int PRIVATE_USE_AREA. 530 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 531 * and multiple code point ranges had this block. 532 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 533 * and adds separate blocks for the supplementary PUAs. 534 * @stable ICU 2.4 535 */ 536 public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID; 537 /** 538 * @stable ICU 2.4 539 */ 540 public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79; 541 /** 542 * @stable ICU 2.4 543 */ 544 public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80; 545 /** 546 * @stable ICU 2.4 547 */ 548 public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81; 549 /** 550 * @stable ICU 2.4 551 */ 552 public static final int COMBINING_HALF_MARKS_ID = 82; 553 /** 554 * @stable ICU 2.4 555 */ 556 public static final int CJK_COMPATIBILITY_FORMS_ID = 83; 557 /** 558 * @stable ICU 2.4 559 */ 560 public static final int SMALL_FORM_VARIANTS_ID = 84; 561 /** 562 * @stable ICU 2.4 563 */ 564 public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85; 565 /** 566 * @stable ICU 2.4 567 */ 568 public static final int SPECIALS_ID = 86; 569 /** 570 * @stable ICU 2.4 571 */ 572 public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87; 573 /** 574 * @stable ICU 2.4 575 */ 576 public static final int OLD_ITALIC_ID = 88; 577 /** 578 * @stable ICU 2.4 579 */ 580 public static final int GOTHIC_ID = 89; 581 /** 582 * @stable ICU 2.4 583 */ 584 public static final int DESERET_ID = 90; 585 /** 586 * @stable ICU 2.4 587 */ 588 public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91; 589 /** 590 * @stable ICU 2.4 591 */ 592 public static final int MUSICAL_SYMBOLS_ID = 92; 593 /** 594 * @stable ICU 2.4 595 */ 596 public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93; 597 /** 598 * @stable ICU 2.4 599 */ 600 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94; 601 /** 602 * @stable ICU 2.4 603 */ 604 public static final int 605 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95; 606 /** 607 * @stable ICU 2.4 608 */ 609 public static final int TAGS_ID = 96; 610 611 // New blocks in Unicode 3.2 612 613 /** 614 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 615 * @stable ICU 2.4 616 */ 617 public static final int CYRILLIC_SUPPLEMENTARY_ID = 97; 618 /** 619 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 620 * @stable ICU 3.0 621 */ 622 623 public static final int CYRILLIC_SUPPLEMENT_ID = 97; 624 /** 625 * @stable ICU 2.4 626 */ 627 public static final int TAGALOG_ID = 98; 628 /** 629 * @stable ICU 2.4 630 */ 631 public static final int HANUNOO_ID = 99; 632 /** 633 * @stable ICU 2.4 634 */ 635 public static final int BUHID_ID = 100; 636 /** 637 * @stable ICU 2.4 638 */ 639 public static final int TAGBANWA_ID = 101; 640 /** 641 * @stable ICU 2.4 642 */ 643 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102; 644 /** 645 * @stable ICU 2.4 646 */ 647 public static final int SUPPLEMENTAL_ARROWS_A_ID = 103; 648 /** 649 * @stable ICU 2.4 650 */ 651 public static final int SUPPLEMENTAL_ARROWS_B_ID = 104; 652 /** 653 * @stable ICU 2.4 654 */ 655 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105; 656 /** 657 * @stable ICU 2.4 658 */ 659 public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106; 660 /** 661 * @stable ICU 2.4 662 */ 663 public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107; 664 /** 665 * @stable ICU 2.4 666 */ 667 public static final int VARIATION_SELECTORS_ID = 108; 668 /** 669 * @stable ICU 2.4 670 */ 671 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109; 672 /** 673 * @stable ICU 2.4 674 */ 675 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110; 676 677 /** 678 * @stable ICU 2.6 679 */ 680 public static final int LIMBU_ID = 111; /*[1900]*/ 681 /** 682 * @stable ICU 2.6 683 */ 684 public static final int TAI_LE_ID = 112; /*[1950]*/ 685 /** 686 * @stable ICU 2.6 687 */ 688 public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/ 689 /** 690 * @stable ICU 2.6 691 */ 692 public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/ 693 /** 694 * @stable ICU 2.6 695 */ 696 public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/ 697 /** 698 * @stable ICU 2.6 699 */ 700 public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/ 701 /** 702 * @stable ICU 2.6 703 */ 704 public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/ 705 /** 706 * @stable ICU 2.6 707 */ 708 public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/ 709 /** 710 * @stable ICU 2.6 711 */ 712 public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/ 713 /** 714 * @stable ICU 2.6 715 */ 716 public static final int UGARITIC_ID = 120; /*[10380]*/ 717 /** 718 * @stable ICU 2.6 719 */ 720 public static final int SHAVIAN_ID = 121; /*[10450]*/ 721 /** 722 * @stable ICU 2.6 723 */ 724 public static final int OSMANYA_ID = 122; /*[10480]*/ 725 /** 726 * @stable ICU 2.6 727 */ 728 public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/ 729 /** 730 * @stable ICU 2.6 731 */ 732 public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/ 733 /** 734 * @stable ICU 2.6 735 */ 736 public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/ 737 738 /* New blocks in Unicode 4.1 */ 739 740 /** 741 * @stable ICU 3.4 742 */ 743 public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/ 744 745 /** 746 * @stable ICU 3.4 747 */ 748 public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/ 749 750 /** 751 * @stable ICU 3.4 752 */ 753 public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/ 754 755 /** 756 * @stable ICU 3.4 757 */ 758 public static final int BUGINESE_ID = 129; /*[1A00]*/ 759 760 /** 761 * @stable ICU 3.4 762 */ 763 public static final int CJK_STROKES_ID = 130; /*[31C0]*/ 764 765 /** 766 * @stable ICU 3.4 767 */ 768 public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/ 769 770 /** 771 * @stable ICU 3.4 772 */ 773 public static final int COPTIC_ID = 132; /*[2C80]*/ 774 775 /** 776 * @stable ICU 3.4 777 */ 778 public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/ 779 780 /** 781 * @stable ICU 3.4 782 */ 783 public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/ 784 785 /** 786 * @stable ICU 3.4 787 */ 788 public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/ 789 790 /** 791 * @stable ICU 3.4 792 */ 793 public static final int GLAGOLITIC_ID = 136; /*[2C00]*/ 794 795 /** 796 * @stable ICU 3.4 797 */ 798 public static final int KHAROSHTHI_ID = 137; /*[10A00]*/ 799 800 /** 801 * @stable ICU 3.4 802 */ 803 public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/ 804 805 /** 806 * @stable ICU 3.4 807 */ 808 public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/ 809 810 /** 811 * @stable ICU 3.4 812 */ 813 public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/ 814 815 /** 816 * @stable ICU 3.4 817 */ 818 public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/ 819 820 /** 821 * @stable ICU 3.4 822 */ 823 public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/ 824 825 /** 826 * @stable ICU 3.4 827 */ 828 public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/ 829 830 /** 831 * @stable ICU 3.4 832 */ 833 public static final int TIFINAGH_ID = 144; /*[2D30]*/ 834 835 /** 836 * @stable ICU 3.4 837 */ 838 public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/ 839 840 /* New blocks in Unicode 5.0 */ 841 842 /** 843 * @stable ICU 3.6 844 */ 845 public static final int NKO_ID = 146; /*[07C0]*/ 846 /** 847 * @stable ICU 3.6 848 */ 849 public static final int BALINESE_ID = 147; /*[1B00]*/ 850 /** 851 * @stable ICU 3.6 852 */ 853 public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/ 854 /** 855 * @stable ICU 3.6 856 */ 857 public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/ 858 /** 859 * @stable ICU 3.6 860 */ 861 public static final int PHAGS_PA_ID = 150; /*[A840]*/ 862 /** 863 * @stable ICU 3.6 864 */ 865 public static final int PHOENICIAN_ID = 151; /*[10900]*/ 866 /** 867 * @stable ICU 3.6 868 */ 869 public static final int CUNEIFORM_ID = 152; /*[12000]*/ 870 /** 871 * @stable ICU 3.6 872 */ 873 public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/ 874 /** 875 * @stable ICU 3.6 876 */ 877 public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/ 878 879 /** 880 * @stable ICU 4.0 881 */ 882 public static final int SUNDANESE_ID = 155; /* [1B80] */ 883 884 /** 885 * @stable ICU 4.0 886 */ 887 public static final int LEPCHA_ID = 156; /* [1C00] */ 888 889 /** 890 * @stable ICU 4.0 891 */ 892 public static final int OL_CHIKI_ID = 157; /* [1C50] */ 893 894 /** 895 * @stable ICU 4.0 896 */ 897 public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */ 898 899 /** 900 * @stable ICU 4.0 901 */ 902 public static final int VAI_ID = 159; /* [A500] */ 903 904 /** 905 * @stable ICU 4.0 906 */ 907 public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */ 908 909 /** 910 * @stable ICU 4.0 911 */ 912 public static final int SAURASHTRA_ID = 161; /* [A880] */ 913 914 /** 915 * @stable ICU 4.0 916 */ 917 public static final int KAYAH_LI_ID = 162; /* [A900] */ 918 919 /** 920 * @stable ICU 4.0 921 */ 922 public static final int REJANG_ID = 163; /* [A930] */ 923 924 /** 925 * @stable ICU 4.0 926 */ 927 public static final int CHAM_ID = 164; /* [AA00] */ 928 929 /** 930 * @stable ICU 4.0 931 */ 932 public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */ 933 934 /** 935 * @stable ICU 4.0 936 */ 937 public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */ 938 939 /** 940 * @stable ICU 4.0 941 */ 942 public static final int LYCIAN_ID = 167; /* [10280] */ 943 944 /** 945 * @stable ICU 4.0 946 */ 947 public static final int CARIAN_ID = 168; /* [102A0] */ 948 949 /** 950 * @stable ICU 4.0 951 */ 952 public static final int LYDIAN_ID = 169; /* [10920] */ 953 954 /** 955 * @stable ICU 4.0 956 */ 957 public static final int MAHJONG_TILES_ID = 170; /* [1F000] */ 958 959 /** 960 * @stable ICU 4.0 961 */ 962 public static final int DOMINO_TILES_ID = 171; /* [1F030] */ 963 964 /* New blocks in Unicode 5.2 */ 965 966 /** @stable ICU 4.4 */ 967 public static final int SAMARITAN_ID = 172; /*[0800]*/ 968 /** @stable ICU 4.4 */ 969 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/ 970 /** @stable ICU 4.4 */ 971 public static final int TAI_THAM_ID = 174; /*[1A20]*/ 972 /** @stable ICU 4.4 */ 973 public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/ 974 /** @stable ICU 4.4 */ 975 public static final int LISU_ID = 176; /*[A4D0]*/ 976 /** @stable ICU 4.4 */ 977 public static final int BAMUM_ID = 177; /*[A6A0]*/ 978 /** @stable ICU 4.4 */ 979 public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/ 980 /** @stable ICU 4.4 */ 981 public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/ 982 /** @stable ICU 4.4 */ 983 public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/ 984 /** @stable ICU 4.4 */ 985 public static final int JAVANESE_ID = 181; /*[A980]*/ 986 /** @stable ICU 4.4 */ 987 public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/ 988 /** @stable ICU 4.4 */ 989 public static final int TAI_VIET_ID = 183; /*[AA80]*/ 990 /** @stable ICU 4.4 */ 991 public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/ 992 /** @stable ICU 4.4 */ 993 public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/ 994 /** @stable ICU 4.4 */ 995 public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/ 996 /** @stable ICU 4.4 */ 997 public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/ 998 /** @stable ICU 4.4 */ 999 public static final int AVESTAN_ID = 188; /*[10B00]*/ 1000 /** @stable ICU 4.4 */ 1001 public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/ 1002 /** @stable ICU 4.4 */ 1003 public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/ 1004 /** @stable ICU 4.4 */ 1005 public static final int OLD_TURKIC_ID = 191; /*[10C00]*/ 1006 /** @stable ICU 4.4 */ 1007 public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/ 1008 /** @stable ICU 4.4 */ 1009 public static final int KAITHI_ID = 193; /*[11080]*/ 1010 /** @stable ICU 4.4 */ 1011 public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/ 1012 /** @stable ICU 4.4 */ 1013 public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/ 1014 /** @stable ICU 4.4 */ 1015 public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/ 1016 /** @stable ICU 4.4 */ 1017 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/ 1018 1019 /* New blocks in Unicode 6.0 */ 1020 1021 /** @stable ICU 4.6 */ 1022 public static final int MANDAIC_ID = 198; /*[0840]*/ 1023 /** @stable ICU 4.6 */ 1024 public static final int BATAK_ID = 199; /*[1BC0]*/ 1025 /** @stable ICU 4.6 */ 1026 public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/ 1027 /** @stable ICU 4.6 */ 1028 public static final int BRAHMI_ID = 201; /*[11000]*/ 1029 /** @stable ICU 4.6 */ 1030 public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/ 1031 /** @stable ICU 4.6 */ 1032 public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/ 1033 /** @stable ICU 4.6 */ 1034 public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/ 1035 /** @stable ICU 4.6 */ 1036 public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/ 1037 /** @stable ICU 4.6 */ 1038 public static final int EMOTICONS_ID = 206; /*[1F600]*/ 1039 /** @stable ICU 4.6 */ 1040 public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/ 1041 /** @stable ICU 4.6 */ 1042 public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/ 1043 /** @stable ICU 4.6 */ 1044 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/ 1045 1046 /* New blocks in Unicode 6.1 */ 1047 1048 /** @stable ICU 49 */ 1049 public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/ 1050 /** @stable ICU 49 */ 1051 public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/ 1052 /** @stable ICU 49 */ 1053 public static final int CHAKMA_ID = 212; /*[11100]*/ 1054 /** @stable ICU 49 */ 1055 public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/ 1056 /** @stable ICU 49 */ 1057 public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/ 1058 /** @stable ICU 49 */ 1059 public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/ 1060 /** @stable ICU 49 */ 1061 public static final int MIAO_ID = 216; /*[16F00]*/ 1062 /** @stable ICU 49 */ 1063 public static final int SHARADA_ID = 217; /*[11180]*/ 1064 /** @stable ICU 49 */ 1065 public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/ 1066 /** @stable ICU 49 */ 1067 public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/ 1068 /** @stable ICU 49 */ 1069 public static final int TAKRI_ID = 220; /*[11680]*/ 1070 1071 /* New blocks in Unicode 7.0 */ 1072 1073 /** @stable ICU 54 */ 1074 public static final int BASSA_VAH_ID = 221; /*[16AD0]*/ 1075 /** @stable ICU 54 */ 1076 public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/ 1077 /** @stable ICU 54 */ 1078 public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/ 1079 /** @stable ICU 54 */ 1080 public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/ 1081 /** @stable ICU 54 */ 1082 public static final int DUPLOYAN_ID = 225; /*[1BC00]*/ 1083 /** @stable ICU 54 */ 1084 public static final int ELBASAN_ID = 226; /*[10500]*/ 1085 /** @stable ICU 54 */ 1086 public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/ 1087 /** @stable ICU 54 */ 1088 public static final int GRANTHA_ID = 228; /*[11300]*/ 1089 /** @stable ICU 54 */ 1090 public static final int KHOJKI_ID = 229; /*[11200]*/ 1091 /** @stable ICU 54 */ 1092 public static final int KHUDAWADI_ID = 230; /*[112B0]*/ 1093 /** @stable ICU 54 */ 1094 public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/ 1095 /** @stable ICU 54 */ 1096 public static final int LINEAR_A_ID = 232; /*[10600]*/ 1097 /** @stable ICU 54 */ 1098 public static final int MAHAJANI_ID = 233; /*[11150]*/ 1099 /** @stable ICU 54 */ 1100 public static final int MANICHAEAN_ID = 234; /*[10AC0]*/ 1101 /** @stable ICU 54 */ 1102 public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/ 1103 /** @stable ICU 54 */ 1104 public static final int MODI_ID = 236; /*[11600]*/ 1105 /** @stable ICU 54 */ 1106 public static final int MRO_ID = 237; /*[16A40]*/ 1107 /** @stable ICU 54 */ 1108 public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/ 1109 /** @stable ICU 54 */ 1110 public static final int NABATAEAN_ID = 239; /*[10880]*/ 1111 /** @stable ICU 54 */ 1112 public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/ 1113 /** @stable ICU 54 */ 1114 public static final int OLD_PERMIC_ID = 241; /*[10350]*/ 1115 /** @stable ICU 54 */ 1116 public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/ 1117 /** @stable ICU 54 */ 1118 public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/ 1119 /** @stable ICU 54 */ 1120 public static final int PALMYRENE_ID = 244; /*[10860]*/ 1121 /** @stable ICU 54 */ 1122 public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/ 1123 /** @stable ICU 54 */ 1124 public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/ 1125 /** @stable ICU 54 */ 1126 public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/ 1127 /** @stable ICU 54 */ 1128 public static final int SIDDHAM_ID = 248; /*[11580]*/ 1129 /** @stable ICU 54 */ 1130 public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/ 1131 /** @stable ICU 54 */ 1132 public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/ 1133 /** @stable ICU 54 */ 1134 public static final int TIRHUTA_ID = 251; /*[11480]*/ 1135 /** @stable ICU 54 */ 1136 public static final int WARANG_CITI_ID = 252; /*[118A0]*/ 1137 1138 /* New blocks in Unicode 8.0 */ 1139 1140 /** @stable ICU 56 */ 1141 public static final int AHOM_ID = 253; /*[11700]*/ 1142 /** @stable ICU 56 */ 1143 public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/ 1144 /** @stable ICU 56 */ 1145 public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/ 1146 /** @stable ICU 56 */ 1147 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/ 1148 /** @stable ICU 56 */ 1149 public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/ 1150 /** @stable ICU 56 */ 1151 public static final int HATRAN_ID = 258; /*[108E0]*/ 1152 /** @stable ICU 56 */ 1153 public static final int MULTANI_ID = 259; /*[11280]*/ 1154 /** @stable ICU 56 */ 1155 public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/ 1156 /** @stable ICU 56 */ 1157 public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/ 1158 /** @stable ICU 56 */ 1159 public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/ 1160 1161 /* New blocks in Unicode 9.0 */ 1162 1163 /** @stable ICU 58 */ 1164 public static final int ADLAM_ID = 263; /*[1E900]*/ 1165 /** @stable ICU 58 */ 1166 public static final int BHAIKSUKI_ID = 264; /*[11C00]*/ 1167 /** @stable ICU 58 */ 1168 public static final int CYRILLIC_EXTENDED_C_ID = 265; /*[1C80]*/ 1169 /** @stable ICU 58 */ 1170 public static final int GLAGOLITIC_SUPPLEMENT_ID = 266; /*[1E000]*/ 1171 /** @stable ICU 58 */ 1172 public static final int IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID = 267; /*[16FE0]*/ 1173 /** @stable ICU 58 */ 1174 public static final int MARCHEN_ID = 268; /*[11C70]*/ 1175 /** @stable ICU 58 */ 1176 public static final int MONGOLIAN_SUPPLEMENT_ID = 269; /*[11660]*/ 1177 /** @stable ICU 58 */ 1178 public static final int NEWA_ID = 270; /*[11400]*/ 1179 /** @stable ICU 58 */ 1180 public static final int OSAGE_ID = 271; /*[104B0]*/ 1181 /** @stable ICU 58 */ 1182 public static final int TANGUT_ID = 272; /*[17000]*/ 1183 /** @stable ICU 58 */ 1184 public static final int TANGUT_COMPONENTS_ID = 273; /*[18800]*/ 1185 1186 // New blocks in Unicode 10.0 1187 1188 /** @stable ICU 60 */ 1189 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID = 274; /*[2CEB0]*/ 1190 /** @stable ICU 60 */ 1191 public static final int KANA_EXTENDED_A_ID = 275; /*[1B100]*/ 1192 /** @stable ICU 60 */ 1193 public static final int MASARAM_GONDI_ID = 276; /*[11D00]*/ 1194 /** @stable ICU 60 */ 1195 public static final int NUSHU_ID = 277; /*[1B170]*/ 1196 /** @stable ICU 60 */ 1197 public static final int SOYOMBO_ID = 278; /*[11A50]*/ 1198 /** @stable ICU 60 */ 1199 public static final int SYRIAC_SUPPLEMENT_ID = 279; /*[0860]*/ 1200 /** @stable ICU 60 */ 1201 public static final int ZANABAZAR_SQUARE_ID = 280; /*[11A00]*/ 1202 1203 // New blocks in Unicode 11.0 1204 1205 /** @stable ICU 62 */ 1206 public static final int CHESS_SYMBOLS_ID = 281; /*[1FA00]*/ 1207 /** @stable ICU 62 */ 1208 public static final int DOGRA_ID = 282; /*[11800]*/ 1209 /** @stable ICU 62 */ 1210 public static final int GEORGIAN_EXTENDED_ID = 283; /*[1C90]*/ 1211 /** @stable ICU 62 */ 1212 public static final int GUNJALA_GONDI_ID = 284; /*[11D60]*/ 1213 /** @stable ICU 62 */ 1214 public static final int HANIFI_ROHINGYA_ID = 285; /*[10D00]*/ 1215 /** @stable ICU 62 */ 1216 public static final int INDIC_SIYAQ_NUMBERS_ID = 286; /*[1EC70]*/ 1217 /** @stable ICU 62 */ 1218 public static final int MAKASAR_ID = 287; /*[11EE0]*/ 1219 /** @stable ICU 62 */ 1220 public static final int MAYAN_NUMERALS_ID = 288; /*[1D2E0]*/ 1221 /** @stable ICU 62 */ 1222 public static final int MEDEFAIDRIN_ID = 289; /*[16E40]*/ 1223 /** @stable ICU 62 */ 1224 public static final int OLD_SOGDIAN_ID = 290; /*[10F00]*/ 1225 /** @stable ICU 62 */ 1226 public static final int SOGDIAN_ID = 291; /*[10F30]*/ 1227 1228 // New blocks in Unicode 12.0 1229 1230 /** @stable ICU 64 */ 1231 public static final int EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID = 292; /*[13430]*/ 1232 /** @stable ICU 64 */ 1233 public static final int ELYMAIC_ID = 293; /*[10FE0]*/ 1234 /** @stable ICU 64 */ 1235 public static final int NANDINAGARI_ID = 294; /*[119A0]*/ 1236 /** @stable ICU 64 */ 1237 public static final int NYIAKENG_PUACHUE_HMONG_ID = 295; /*[1E100]*/ 1238 /** @stable ICU 64 */ 1239 public static final int OTTOMAN_SIYAQ_NUMBERS_ID = 296; /*[1ED00]*/ 1240 /** @stable ICU 64 */ 1241 public static final int SMALL_KANA_EXTENSION_ID = 297; /*[1B130]*/ 1242 /** @stable ICU 64 */ 1243 public static final int SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID = 298; /*[1FA70]*/ 1244 /** @stable ICU 64 */ 1245 public static final int TAMIL_SUPPLEMENT_ID = 299; /*[11FC0]*/ 1246 /** @stable ICU 64 */ 1247 public static final int WANCHO_ID = 300; /*[1E2C0]*/ 1248 1249 // New blocks in Unicode 13.0 1250 1251 /** @stable ICU 66 */ 1252 public static final int CHORASMIAN_ID = 301; /*[10FB0]*/ 1253 /** @stable ICU 66 */ 1254 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID = 302; /*[30000]*/ 1255 /** @stable ICU 66 */ 1256 public static final int DIVES_AKURU_ID = 303; /*[11900]*/ 1257 /** @stable ICU 66 */ 1258 public static final int KHITAN_SMALL_SCRIPT_ID = 304; /*[18B00]*/ 1259 /** @stable ICU 66 */ 1260 public static final int LISU_SUPPLEMENT_ID = 305; /*[11FB0]*/ 1261 /** @stable ICU 66 */ 1262 public static final int SYMBOLS_FOR_LEGACY_COMPUTING_ID = 306; /*[1FB00]*/ 1263 /** @stable ICU 66 */ 1264 public static final int TANGUT_SUPPLEMENT_ID = 307; /*[18D00]*/ 1265 /** @stable ICU 66 */ 1266 public static final int YEZIDI_ID = 308; /*[10E80]*/ 1267 1268 // New blocks in Unicode 14.0 1269 1270 /** @stable ICU 70 */ 1271 public static final int ARABIC_EXTENDED_B_ID = 309; /*[0870]*/ 1272 /** @stable ICU 70 */ 1273 public static final int CYPRO_MINOAN_ID = 310; /*[12F90]*/ 1274 /** @stable ICU 70 */ 1275 public static final int ETHIOPIC_EXTENDED_B_ID = 311; /*[1E7E0]*/ 1276 /** @stable ICU 70 */ 1277 public static final int KANA_EXTENDED_B_ID = 312; /*[1AFF0]*/ 1278 /** @stable ICU 70 */ 1279 public static final int LATIN_EXTENDED_F_ID = 313; /*[10780]*/ 1280 /** @stable ICU 70 */ 1281 public static final int LATIN_EXTENDED_G_ID = 314; /*[1DF00]*/ 1282 /** @stable ICU 70 */ 1283 public static final int OLD_UYGHUR_ID = 315; /*[10F70]*/ 1284 /** @stable ICU 70 */ 1285 public static final int TANGSA_ID = 316; /*[16A70]*/ 1286 /** @stable ICU 70 */ 1287 public static final int TOTO_ID = 317; /*[1E290]*/ 1288 /** @stable ICU 70 */ 1289 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A_ID = 318; /*[11AB0]*/ 1290 /** @stable ICU 70 */ 1291 public static final int VITHKUQI_ID = 319; /*[10570]*/ 1292 /** @stable ICU 70 */ 1293 public static final int ZNAMENNY_MUSICAL_NOTATION_ID = 320; /*[1CF00]*/ 1294 1295 // New blocks in Unicode 15.0 1296 1297 /** @stable ICU 72 */ 1298 public static final int ARABIC_EXTENDED_C_ID = 321; /*[10EC0]*/ 1299 /** @stable ICU 72 */ 1300 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H_ID = 322; /*[31350]*/ 1301 /** @stable ICU 72 */ 1302 public static final int CYRILLIC_EXTENDED_D_ID = 323; /*[1E030]*/ 1303 /** @stable ICU 72 */ 1304 public static final int DEVANAGARI_EXTENDED_A_ID = 324; /*[11B00]*/ 1305 /** @stable ICU 72 */ 1306 public static final int KAKTOVIK_NUMERALS_ID = 325; /*[1D2C0]*/ 1307 /** @stable ICU 72 */ 1308 public static final int KAWI_ID = 326; /*[11F00]*/ 1309 /** @stable ICU 72 */ 1310 public static final int NAG_MUNDARI_ID = 327; /*[1E4D0]*/ 1311 1312 /** 1313 * One more than the highest normal UnicodeBlock value. 1314 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BLOCK). 1315 * 1316 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 1317 */ 1318 @Deprecated 1319 public static final int COUNT = 328; 1320 1321 // blocks objects --------------------------------------------------- 1322 1323 /** 1324 * Array of UnicodeBlocks, for easy access in getInstance(int) 1325 */ 1326 private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT]; 1327 1328 /** 1329 * @stable ICU 2.6 1330 */ 1331 public static final UnicodeBlock NO_BLOCK 1332 = new UnicodeBlock("NO_BLOCK", 0); 1333 1334 /** 1335 * @stable ICU 2.4 1336 */ 1337 public static final UnicodeBlock BASIC_LATIN 1338 = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID); 1339 /** 1340 * @stable ICU 2.4 1341 */ 1342 public static final UnicodeBlock LATIN_1_SUPPLEMENT 1343 = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID); 1344 /** 1345 * @stable ICU 2.4 1346 */ 1347 public static final UnicodeBlock LATIN_EXTENDED_A 1348 = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID); 1349 /** 1350 * @stable ICU 2.4 1351 */ 1352 public static final UnicodeBlock LATIN_EXTENDED_B 1353 = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID); 1354 /** 1355 * @stable ICU 2.4 1356 */ 1357 public static final UnicodeBlock IPA_EXTENSIONS 1358 = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID); 1359 /** 1360 * @stable ICU 2.4 1361 */ 1362 public static final UnicodeBlock SPACING_MODIFIER_LETTERS 1363 = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID); 1364 /** 1365 * @stable ICU 2.4 1366 */ 1367 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS 1368 = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID); 1369 /** 1370 * Unicode 3.2 renames this block to "Greek and Coptic". 1371 * @stable ICU 2.4 1372 */ 1373 public static final UnicodeBlock GREEK 1374 = new UnicodeBlock("GREEK", GREEK_ID); 1375 /** 1376 * @stable ICU 2.4 1377 */ 1378 public static final UnicodeBlock CYRILLIC 1379 = new UnicodeBlock("CYRILLIC", CYRILLIC_ID); 1380 /** 1381 * @stable ICU 2.4 1382 */ 1383 public static final UnicodeBlock ARMENIAN 1384 = new UnicodeBlock("ARMENIAN", ARMENIAN_ID); 1385 /** 1386 * @stable ICU 2.4 1387 */ 1388 public static final UnicodeBlock HEBREW 1389 = new UnicodeBlock("HEBREW", HEBREW_ID); 1390 /** 1391 * @stable ICU 2.4 1392 */ 1393 public static final UnicodeBlock ARABIC 1394 = new UnicodeBlock("ARABIC", ARABIC_ID); 1395 /** 1396 * @stable ICU 2.4 1397 */ 1398 public static final UnicodeBlock SYRIAC 1399 = new UnicodeBlock("SYRIAC", SYRIAC_ID); 1400 /** 1401 * @stable ICU 2.4 1402 */ 1403 public static final UnicodeBlock THAANA 1404 = new UnicodeBlock("THAANA", THAANA_ID); 1405 /** 1406 * @stable ICU 2.4 1407 */ 1408 public static final UnicodeBlock DEVANAGARI 1409 = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID); 1410 /** 1411 * @stable ICU 2.4 1412 */ 1413 public static final UnicodeBlock BENGALI 1414 = new UnicodeBlock("BENGALI", BENGALI_ID); 1415 /** 1416 * @stable ICU 2.4 1417 */ 1418 public static final UnicodeBlock GURMUKHI 1419 = new UnicodeBlock("GURMUKHI", GURMUKHI_ID); 1420 /** 1421 * @stable ICU 2.4 1422 */ 1423 public static final UnicodeBlock GUJARATI 1424 = new UnicodeBlock("GUJARATI", GUJARATI_ID); 1425 /** 1426 * @stable ICU 2.4 1427 */ 1428 public static final UnicodeBlock ORIYA 1429 = new UnicodeBlock("ORIYA", ORIYA_ID); 1430 /** 1431 * @stable ICU 2.4 1432 */ 1433 public static final UnicodeBlock TAMIL 1434 = new UnicodeBlock("TAMIL", TAMIL_ID); 1435 /** 1436 * @stable ICU 2.4 1437 */ 1438 public static final UnicodeBlock TELUGU 1439 = new UnicodeBlock("TELUGU", TELUGU_ID); 1440 /** 1441 * @stable ICU 2.4 1442 */ 1443 public static final UnicodeBlock KANNADA 1444 = new UnicodeBlock("KANNADA", KANNADA_ID); 1445 /** 1446 * @stable ICU 2.4 1447 */ 1448 public static final UnicodeBlock MALAYALAM 1449 = new UnicodeBlock("MALAYALAM", MALAYALAM_ID); 1450 /** 1451 * @stable ICU 2.4 1452 */ 1453 public static final UnicodeBlock SINHALA 1454 = new UnicodeBlock("SINHALA", SINHALA_ID); 1455 /** 1456 * @stable ICU 2.4 1457 */ 1458 public static final UnicodeBlock THAI 1459 = new UnicodeBlock("THAI", THAI_ID); 1460 /** 1461 * @stable ICU 2.4 1462 */ 1463 public static final UnicodeBlock LAO 1464 = new UnicodeBlock("LAO", LAO_ID); 1465 /** 1466 * @stable ICU 2.4 1467 */ 1468 public static final UnicodeBlock TIBETAN 1469 = new UnicodeBlock("TIBETAN", TIBETAN_ID); 1470 /** 1471 * @stable ICU 2.4 1472 */ 1473 public static final UnicodeBlock MYANMAR 1474 = new UnicodeBlock("MYANMAR", MYANMAR_ID); 1475 /** 1476 * @stable ICU 2.4 1477 */ 1478 public static final UnicodeBlock GEORGIAN 1479 = new UnicodeBlock("GEORGIAN", GEORGIAN_ID); 1480 /** 1481 * @stable ICU 2.4 1482 */ 1483 public static final UnicodeBlock HANGUL_JAMO 1484 = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID); 1485 /** 1486 * @stable ICU 2.4 1487 */ 1488 public static final UnicodeBlock ETHIOPIC 1489 = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID); 1490 /** 1491 * @stable ICU 2.4 1492 */ 1493 public static final UnicodeBlock CHEROKEE 1494 = new UnicodeBlock("CHEROKEE", CHEROKEE_ID); 1495 /** 1496 * @stable ICU 2.4 1497 */ 1498 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 1499 = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1500 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID); 1501 /** 1502 * @stable ICU 2.4 1503 */ 1504 public static final UnicodeBlock OGHAM 1505 = new UnicodeBlock("OGHAM", OGHAM_ID); 1506 /** 1507 * @stable ICU 2.4 1508 */ 1509 public static final UnicodeBlock RUNIC 1510 = new UnicodeBlock("RUNIC", RUNIC_ID); 1511 /** 1512 * @stable ICU 2.4 1513 */ 1514 public static final UnicodeBlock KHMER 1515 = new UnicodeBlock("KHMER", KHMER_ID); 1516 /** 1517 * @stable ICU 2.4 1518 */ 1519 public static final UnicodeBlock MONGOLIAN 1520 = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID); 1521 /** 1522 * @stable ICU 2.4 1523 */ 1524 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL 1525 = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID); 1526 /** 1527 * @stable ICU 2.4 1528 */ 1529 public static final UnicodeBlock GREEK_EXTENDED 1530 = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID); 1531 /** 1532 * @stable ICU 2.4 1533 */ 1534 public static final UnicodeBlock GENERAL_PUNCTUATION 1535 = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID); 1536 /** 1537 * @stable ICU 2.4 1538 */ 1539 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS 1540 = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID); 1541 /** 1542 * @stable ICU 2.4 1543 */ 1544 public static final UnicodeBlock CURRENCY_SYMBOLS 1545 = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID); 1546 /** 1547 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 1548 * Symbols". 1549 * @stable ICU 2.4 1550 */ 1551 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS 1552 = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID); 1553 /** 1554 * @stable ICU 2.4 1555 */ 1556 public static final UnicodeBlock LETTERLIKE_SYMBOLS 1557 = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID); 1558 /** 1559 * @stable ICU 2.4 1560 */ 1561 public static final UnicodeBlock NUMBER_FORMS 1562 = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID); 1563 /** 1564 * @stable ICU 2.4 1565 */ 1566 public static final UnicodeBlock ARROWS 1567 = new UnicodeBlock("ARROWS", ARROWS_ID); 1568 /** 1569 * @stable ICU 2.4 1570 */ 1571 public static final UnicodeBlock MATHEMATICAL_OPERATORS 1572 = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID); 1573 /** 1574 * @stable ICU 2.4 1575 */ 1576 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL 1577 = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID); 1578 /** 1579 * @stable ICU 2.4 1580 */ 1581 public static final UnicodeBlock CONTROL_PICTURES 1582 = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID); 1583 /** 1584 * @stable ICU 2.4 1585 */ 1586 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION 1587 = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID); 1588 /** 1589 * @stable ICU 2.4 1590 */ 1591 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS 1592 = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID); 1593 /** 1594 * @stable ICU 2.4 1595 */ 1596 public static final UnicodeBlock BOX_DRAWING 1597 = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID); 1598 /** 1599 * @stable ICU 2.4 1600 */ 1601 public static final UnicodeBlock BLOCK_ELEMENTS 1602 = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID); 1603 /** 1604 * @stable ICU 2.4 1605 */ 1606 public static final UnicodeBlock GEOMETRIC_SHAPES 1607 = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID); 1608 /** 1609 * @stable ICU 2.4 1610 */ 1611 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS 1612 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID); 1613 /** 1614 * @stable ICU 2.4 1615 */ 1616 public static final UnicodeBlock DINGBATS 1617 = new UnicodeBlock("DINGBATS", DINGBATS_ID); 1618 /** 1619 * @stable ICU 2.4 1620 */ 1621 public static final UnicodeBlock BRAILLE_PATTERNS 1622 = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID); 1623 /** 1624 * @stable ICU 2.4 1625 */ 1626 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT 1627 = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID); 1628 /** 1629 * @stable ICU 2.4 1630 */ 1631 public static final UnicodeBlock KANGXI_RADICALS 1632 = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID); 1633 /** 1634 * @stable ICU 2.4 1635 */ 1636 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS 1637 = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1638 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID); 1639 /** 1640 * @stable ICU 2.4 1641 */ 1642 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION 1643 = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID); 1644 /** 1645 * @stable ICU 2.4 1646 */ 1647 public static final UnicodeBlock HIRAGANA 1648 = new UnicodeBlock("HIRAGANA", HIRAGANA_ID); 1649 /** 1650 * @stable ICU 2.4 1651 */ 1652 public static final UnicodeBlock KATAKANA 1653 = new UnicodeBlock("KATAKANA", KATAKANA_ID); 1654 /** 1655 * @stable ICU 2.4 1656 */ 1657 public static final UnicodeBlock BOPOMOFO 1658 = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID); 1659 /** 1660 * @stable ICU 2.4 1661 */ 1662 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO 1663 = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID); 1664 /** 1665 * @stable ICU 2.4 1666 */ 1667 public static final UnicodeBlock KANBUN 1668 = new UnicodeBlock("KANBUN", KANBUN_ID); 1669 /** 1670 * @stable ICU 2.4 1671 */ 1672 public static final UnicodeBlock BOPOMOFO_EXTENDED 1673 = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID); 1674 /** 1675 * @stable ICU 2.4 1676 */ 1677 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS 1678 = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1679 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID); 1680 /** 1681 * @stable ICU 2.4 1682 */ 1683 public static final UnicodeBlock CJK_COMPATIBILITY 1684 = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID); 1685 /** 1686 * @stable ICU 2.4 1687 */ 1688 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 1689 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1690 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID); 1691 /** 1692 * @stable ICU 2.4 1693 */ 1694 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS 1695 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID); 1696 /** 1697 * @stable ICU 2.4 1698 */ 1699 public static final UnicodeBlock YI_SYLLABLES 1700 = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID); 1701 /** 1702 * @stable ICU 2.4 1703 */ 1704 public static final UnicodeBlock YI_RADICALS 1705 = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID); 1706 /** 1707 * @stable ICU 2.4 1708 */ 1709 public static final UnicodeBlock HANGUL_SYLLABLES 1710 = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID); 1711 /** 1712 * @stable ICU 2.4 1713 */ 1714 public static final UnicodeBlock HIGH_SURROGATES 1715 = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID); 1716 /** 1717 * @stable ICU 2.4 1718 */ 1719 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES 1720 = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID); 1721 /** 1722 * @stable ICU 2.4 1723 */ 1724 public static final UnicodeBlock LOW_SURROGATES 1725 = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID); 1726 /** 1727 * Same as public static final int PRIVATE_USE. 1728 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1729 * and multiple code point ranges had this block. 1730 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1731 * and adds separate blocks for the supplementary PUAs. 1732 * @stable ICU 2.4 1733 */ 1734 public static final UnicodeBlock PRIVATE_USE_AREA 1735 = new UnicodeBlock("PRIVATE_USE_AREA", 78); 1736 /** 1737 * Same as public static final int PRIVATE_USE_AREA. 1738 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1739 * and multiple code point ranges had this block. 1740 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1741 * and adds separate blocks for the supplementary PUAs. 1742 * @stable ICU 2.4 1743 */ 1744 public static final UnicodeBlock PRIVATE_USE 1745 = PRIVATE_USE_AREA; 1746 /** 1747 * @stable ICU 2.4 1748 */ 1749 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS 1750 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID); 1751 /** 1752 * @stable ICU 2.4 1753 */ 1754 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS 1755 = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID); 1756 /** 1757 * @stable ICU 2.4 1758 */ 1759 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A 1760 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID); 1761 /** 1762 * @stable ICU 2.4 1763 */ 1764 public static final UnicodeBlock COMBINING_HALF_MARKS 1765 = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID); 1766 /** 1767 * @stable ICU 2.4 1768 */ 1769 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS 1770 = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID); 1771 /** 1772 * @stable ICU 2.4 1773 */ 1774 public static final UnicodeBlock SMALL_FORM_VARIANTS 1775 = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID); 1776 /** 1777 * @stable ICU 2.4 1778 */ 1779 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B 1780 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID); 1781 /** 1782 * @stable ICU 2.4 1783 */ 1784 public static final UnicodeBlock SPECIALS 1785 = new UnicodeBlock("SPECIALS", SPECIALS_ID); 1786 /** 1787 * @stable ICU 2.4 1788 */ 1789 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS 1790 = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID); 1791 /** 1792 * @stable ICU 2.4 1793 */ 1794 public static final UnicodeBlock OLD_ITALIC 1795 = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID); 1796 /** 1797 * @stable ICU 2.4 1798 */ 1799 public static final UnicodeBlock GOTHIC 1800 = new UnicodeBlock("GOTHIC", GOTHIC_ID); 1801 /** 1802 * @stable ICU 2.4 1803 */ 1804 public static final UnicodeBlock DESERET 1805 = new UnicodeBlock("DESERET", DESERET_ID); 1806 /** 1807 * @stable ICU 2.4 1808 */ 1809 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS 1810 = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID); 1811 /** 1812 * @stable ICU 2.4 1813 */ 1814 public static final UnicodeBlock MUSICAL_SYMBOLS 1815 = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID); 1816 /** 1817 * @stable ICU 2.4 1818 */ 1819 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS 1820 = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1821 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID); 1822 /** 1823 * @stable ICU 2.4 1824 */ 1825 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B 1826 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1827 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID); 1828 /** 1829 * @stable ICU 2.4 1830 */ 1831 public static final UnicodeBlock 1832 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT 1833 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1834 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID); 1835 /** 1836 * @stable ICU 2.4 1837 */ 1838 public static final UnicodeBlock TAGS 1839 = new UnicodeBlock("TAGS", TAGS_ID); 1840 1841 // New blocks in Unicode 3.2 1842 1843 /** 1844 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1845 * @stable ICU 2.4 1846 */ 1847 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY 1848 = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID); 1849 /** 1850 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1851 * @stable ICU 3.0 1852 */ 1853 public static final UnicodeBlock CYRILLIC_SUPPLEMENT 1854 = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID); 1855 /** 1856 * @stable ICU 2.4 1857 */ 1858 public static final UnicodeBlock TAGALOG 1859 = new UnicodeBlock("TAGALOG", TAGALOG_ID); 1860 /** 1861 * @stable ICU 2.4 1862 */ 1863 public static final UnicodeBlock HANUNOO 1864 = new UnicodeBlock("HANUNOO", HANUNOO_ID); 1865 /** 1866 * @stable ICU 2.4 1867 */ 1868 public static final UnicodeBlock BUHID 1869 = new UnicodeBlock("BUHID", BUHID_ID); 1870 /** 1871 * @stable ICU 2.4 1872 */ 1873 public static final UnicodeBlock TAGBANWA 1874 = new UnicodeBlock("TAGBANWA", TAGBANWA_ID); 1875 /** 1876 * @stable ICU 2.4 1877 */ 1878 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A 1879 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1880 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID); 1881 /** 1882 * @stable ICU 2.4 1883 */ 1884 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A 1885 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID); 1886 /** 1887 * @stable ICU 2.4 1888 */ 1889 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B 1890 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID); 1891 /** 1892 * @stable ICU 2.4 1893 */ 1894 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B 1895 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1896 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID); 1897 /** 1898 * @stable ICU 2.4 1899 */ 1900 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS 1901 = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1902 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID); 1903 /** 1904 * @stable ICU 2.4 1905 */ 1906 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS 1907 = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID); 1908 /** 1909 * @stable ICU 2.4 1910 */ 1911 public static final UnicodeBlock VARIATION_SELECTORS 1912 = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID); 1913 /** 1914 * @stable ICU 2.4 1915 */ 1916 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A 1917 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1918 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID); 1919 /** 1920 * @stable ICU 2.4 1921 */ 1922 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B 1923 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1924 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID); 1925 1926 /** 1927 * @stable ICU 2.6 1928 */ 1929 public static final UnicodeBlock LIMBU 1930 = new UnicodeBlock("LIMBU", LIMBU_ID); 1931 /** 1932 * @stable ICU 2.6 1933 */ 1934 public static final UnicodeBlock TAI_LE 1935 = new UnicodeBlock("TAI_LE", TAI_LE_ID); 1936 /** 1937 * @stable ICU 2.6 1938 */ 1939 public static final UnicodeBlock KHMER_SYMBOLS 1940 = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID); 1941 1942 /** 1943 * @stable ICU 2.6 1944 */ 1945 public static final UnicodeBlock PHONETIC_EXTENSIONS 1946 = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID); 1947 1948 /** 1949 * @stable ICU 2.6 1950 */ 1951 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS 1952 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1953 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID); 1954 /** 1955 * @stable ICU 2.6 1956 */ 1957 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS 1958 = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID); 1959 /** 1960 * @stable ICU 2.6 1961 */ 1962 public static final UnicodeBlock LINEAR_B_SYLLABARY 1963 = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID); 1964 /** 1965 * @stable ICU 2.6 1966 */ 1967 public static final UnicodeBlock LINEAR_B_IDEOGRAMS 1968 = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID); 1969 /** 1970 * @stable ICU 2.6 1971 */ 1972 public static final UnicodeBlock AEGEAN_NUMBERS 1973 = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID); 1974 /** 1975 * @stable ICU 2.6 1976 */ 1977 public static final UnicodeBlock UGARITIC 1978 = new UnicodeBlock("UGARITIC", UGARITIC_ID); 1979 /** 1980 * @stable ICU 2.6 1981 */ 1982 public static final UnicodeBlock SHAVIAN 1983 = new UnicodeBlock("SHAVIAN", SHAVIAN_ID); 1984 /** 1985 * @stable ICU 2.6 1986 */ 1987 public static final UnicodeBlock OSMANYA 1988 = new UnicodeBlock("OSMANYA", OSMANYA_ID); 1989 /** 1990 * @stable ICU 2.6 1991 */ 1992 public static final UnicodeBlock CYPRIOT_SYLLABARY 1993 = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID); 1994 /** 1995 * @stable ICU 2.6 1996 */ 1997 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS 1998 = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID); 1999 2000 /** 2001 * @stable ICU 2.6 2002 */ 2003 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT 2004 = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID); 2005 2006 /* New blocks in Unicode 4.1 */ 2007 2008 /** 2009 * @stable ICU 3.4 2010 */ 2011 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2012 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2013 ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/ 2014 2015 /** 2016 * @stable ICU 3.4 2017 */ 2018 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2019 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/ 2020 2021 /** 2022 * @stable ICU 3.4 2023 */ 2024 public static final UnicodeBlock ARABIC_SUPPLEMENT = 2025 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/ 2026 2027 /** 2028 * @stable ICU 3.4 2029 */ 2030 public static final UnicodeBlock BUGINESE = 2031 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/ 2032 2033 /** 2034 * @stable ICU 3.4 2035 */ 2036 public static final UnicodeBlock CJK_STROKES = 2037 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/ 2038 2039 /** 2040 * @stable ICU 3.4 2041 */ 2042 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 2043 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 2044 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/ 2045 2046 /** 2047 * @stable ICU 3.4 2048 */ 2049 public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/ 2050 2051 /** 2052 * @stable ICU 3.4 2053 */ 2054 public static final UnicodeBlock ETHIOPIC_EXTENDED = 2055 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/ 2056 2057 /** 2058 * @stable ICU 3.4 2059 */ 2060 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 2061 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/ 2062 2063 /** 2064 * @stable ICU 3.4 2065 */ 2066 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 2067 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/ 2068 2069 /** 2070 * @stable ICU 3.4 2071 */ 2072 public static final UnicodeBlock GLAGOLITIC = 2073 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/ 2074 2075 /** 2076 * @stable ICU 3.4 2077 */ 2078 public static final UnicodeBlock KHAROSHTHI = 2079 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/ 2080 2081 /** 2082 * @stable ICU 3.4 2083 */ 2084 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2085 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/ 2086 2087 /** 2088 * @stable ICU 3.4 2089 */ 2090 public static final UnicodeBlock NEW_TAI_LUE = 2091 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/ 2092 2093 /** 2094 * @stable ICU 3.4 2095 */ 2096 public static final UnicodeBlock OLD_PERSIAN = 2097 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/ 2098 2099 /** 2100 * @stable ICU 3.4 2101 */ 2102 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 2103 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 2104 PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/ 2105 2106 /** 2107 * @stable ICU 3.4 2108 */ 2109 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 2110 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/ 2111 2112 /** 2113 * @stable ICU 3.4 2114 */ 2115 public static final UnicodeBlock SYLOTI_NAGRI = 2116 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/ 2117 2118 /** 2119 * @stable ICU 3.4 2120 */ 2121 public static final UnicodeBlock TIFINAGH = 2122 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/ 2123 2124 /** 2125 * @stable ICU 3.4 2126 */ 2127 public static final UnicodeBlock VERTICAL_FORMS = 2128 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/ 2129 2130 /** 2131 * @stable ICU 3.6 2132 */ 2133 public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/ 2134 /** 2135 * @stable ICU 3.6 2136 */ 2137 public static final UnicodeBlock BALINESE = 2138 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/ 2139 /** 2140 * @stable ICU 3.6 2141 */ 2142 public static final UnicodeBlock LATIN_EXTENDED_C = 2143 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/ 2144 /** 2145 * @stable ICU 3.6 2146 */ 2147 public static final UnicodeBlock LATIN_EXTENDED_D = 2148 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/ 2149 /** 2150 * @stable ICU 3.6 2151 */ 2152 public static final UnicodeBlock PHAGS_PA = 2153 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/ 2154 /** 2155 * @stable ICU 3.6 2156 */ 2157 public static final UnicodeBlock PHOENICIAN = 2158 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/ 2159 /** 2160 * @stable ICU 3.6 2161 */ 2162 public static final UnicodeBlock CUNEIFORM = 2163 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/ 2164 /** 2165 * @stable ICU 3.6 2166 */ 2167 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2168 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2169 CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/ 2170 /** 2171 * @stable ICU 3.6 2172 */ 2173 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2174 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/ 2175 2176 /** 2177 * @stable ICU 4.0 2178 */ 2179 public static final UnicodeBlock SUNDANESE = 2180 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */ 2181 2182 /** 2183 * @stable ICU 4.0 2184 */ 2185 public static final UnicodeBlock LEPCHA = 2186 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */ 2187 2188 /** 2189 * @stable ICU 4.0 2190 */ 2191 public static final UnicodeBlock OL_CHIKI = 2192 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */ 2193 2194 /** 2195 * @stable ICU 4.0 2196 */ 2197 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 2198 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */ 2199 2200 /** 2201 * @stable ICU 4.0 2202 */ 2203 public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */ 2204 2205 /** 2206 * @stable ICU 4.0 2207 */ 2208 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2209 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */ 2210 2211 /** 2212 * @stable ICU 4.0 2213 */ 2214 public static final UnicodeBlock SAURASHTRA = 2215 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */ 2216 2217 /** 2218 * @stable ICU 4.0 2219 */ 2220 public static final UnicodeBlock KAYAH_LI = 2221 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */ 2222 2223 /** 2224 * @stable ICU 4.0 2225 */ 2226 public static final UnicodeBlock REJANG = 2227 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */ 2228 2229 /** 2230 * @stable ICU 4.0 2231 */ 2232 public static final UnicodeBlock CHAM = 2233 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */ 2234 2235 /** 2236 * @stable ICU 4.0 2237 */ 2238 public static final UnicodeBlock ANCIENT_SYMBOLS = 2239 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */ 2240 2241 /** 2242 * @stable ICU 4.0 2243 */ 2244 public static final UnicodeBlock PHAISTOS_DISC = 2245 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */ 2246 2247 /** 2248 * @stable ICU 4.0 2249 */ 2250 public static final UnicodeBlock LYCIAN = 2251 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */ 2252 2253 /** 2254 * @stable ICU 4.0 2255 */ 2256 public static final UnicodeBlock CARIAN = 2257 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */ 2258 2259 /** 2260 * @stable ICU 4.0 2261 */ 2262 public static final UnicodeBlock LYDIAN = 2263 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */ 2264 2265 /** 2266 * @stable ICU 4.0 2267 */ 2268 public static final UnicodeBlock MAHJONG_TILES = 2269 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */ 2270 2271 /** 2272 * @stable ICU 4.0 2273 */ 2274 public static final UnicodeBlock DOMINO_TILES = 2275 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */ 2276 2277 /* New blocks in Unicode 5.2 */ 2278 2279 /** @stable ICU 4.4 */ 2280 public static final UnicodeBlock SAMARITAN = 2281 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/ 2282 /** @stable ICU 4.4 */ 2283 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 2284 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 2285 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/ 2286 /** @stable ICU 4.4 */ 2287 public static final UnicodeBlock TAI_THAM = 2288 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/ 2289 /** @stable ICU 4.4 */ 2290 public static final UnicodeBlock VEDIC_EXTENSIONS = 2291 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/ 2292 /** @stable ICU 4.4 */ 2293 public static final UnicodeBlock LISU = 2294 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/ 2295 /** @stable ICU 4.4 */ 2296 public static final UnicodeBlock BAMUM = 2297 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/ 2298 /** @stable ICU 4.4 */ 2299 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2300 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/ 2301 /** @stable ICU 4.4 */ 2302 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2303 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/ 2304 /** @stable ICU 4.4 */ 2305 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2306 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/ 2307 /** @stable ICU 4.4 */ 2308 public static final UnicodeBlock JAVANESE = 2309 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/ 2310 /** @stable ICU 4.4 */ 2311 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2312 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/ 2313 /** @stable ICU 4.4 */ 2314 public static final UnicodeBlock TAI_VIET = 2315 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/ 2316 /** @stable ICU 4.4 */ 2317 public static final UnicodeBlock MEETEI_MAYEK = 2318 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/ 2319 /** @stable ICU 4.4 */ 2320 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2321 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/ 2322 /** @stable ICU 4.4 */ 2323 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2324 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/ 2325 /** @stable ICU 4.4 */ 2326 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2327 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/ 2328 /** @stable ICU 4.4 */ 2329 public static final UnicodeBlock AVESTAN = 2330 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/ 2331 /** @stable ICU 4.4 */ 2332 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2333 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/ 2334 /** @stable ICU 4.4 */ 2335 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2336 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/ 2337 /** @stable ICU 4.4 */ 2338 public static final UnicodeBlock OLD_TURKIC = 2339 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/ 2340 /** @stable ICU 4.4 */ 2341 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2342 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/ 2343 /** @stable ICU 4.4 */ 2344 public static final UnicodeBlock KAITHI = 2345 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/ 2346 /** @stable ICU 4.4 */ 2347 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2348 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/ 2349 /** @stable ICU 4.4 */ 2350 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2351 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2352 ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/ 2353 /** @stable ICU 4.4 */ 2354 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2355 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2356 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/ 2357 /** @stable ICU 4.4 */ 2358 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2359 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2360 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/ 2361 2362 /* New blocks in Unicode 6.0 */ 2363 2364 /** @stable ICU 4.6 */ 2365 public static final UnicodeBlock MANDAIC = 2366 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/ 2367 /** @stable ICU 4.6 */ 2368 public static final UnicodeBlock BATAK = 2369 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/ 2370 /** @stable ICU 4.6 */ 2371 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2372 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/ 2373 /** @stable ICU 4.6 */ 2374 public static final UnicodeBlock BRAHMI = 2375 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/ 2376 /** @stable ICU 4.6 */ 2377 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2378 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/ 2379 /** @stable ICU 4.6 */ 2380 public static final UnicodeBlock KANA_SUPPLEMENT = 2381 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/ 2382 /** @stable ICU 4.6 */ 2383 public static final UnicodeBlock PLAYING_CARDS = 2384 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/ 2385 /** @stable ICU 4.6 */ 2386 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2387 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2388 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/ 2389 /** @stable ICU 4.6 */ 2390 public static final UnicodeBlock EMOTICONS = 2391 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/ 2392 /** @stable ICU 4.6 */ 2393 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2394 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/ 2395 /** @stable ICU 4.6 */ 2396 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2397 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/ 2398 /** @stable ICU 4.6 */ 2399 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2400 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2401 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/ 2402 2403 /* New blocks in Unicode 6.1 */ 2404 2405 /** @stable ICU 49 */ 2406 public static final UnicodeBlock ARABIC_EXTENDED_A = 2407 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/ 2408 /** @stable ICU 49 */ 2409 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2410 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/ 2411 /** @stable ICU 49 */ 2412 public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/ 2413 /** @stable ICU 49 */ 2414 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2415 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/ 2416 /** @stable ICU 49 */ 2417 public static final UnicodeBlock MEROITIC_CURSIVE = 2418 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/ 2419 /** @stable ICU 49 */ 2420 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2421 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/ 2422 /** @stable ICU 49 */ 2423 public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/ 2424 /** @stable ICU 49 */ 2425 public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/ 2426 /** @stable ICU 49 */ 2427 public static final UnicodeBlock SORA_SOMPENG = 2428 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/ 2429 /** @stable ICU 49 */ 2430 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2431 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/ 2432 /** @stable ICU 49 */ 2433 public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/ 2434 2435 /* New blocks in Unicode 7.0 */ 2436 2437 /** @stable ICU 54 */ 2438 public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/ 2439 /** @stable ICU 54 */ 2440 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2441 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/ 2442 /** @stable ICU 54 */ 2443 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2444 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/ 2445 /** @stable ICU 54 */ 2446 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2447 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/ 2448 /** @stable ICU 54 */ 2449 public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/ 2450 /** @stable ICU 54 */ 2451 public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/ 2452 /** @stable ICU 54 */ 2453 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2454 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/ 2455 /** @stable ICU 54 */ 2456 public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/ 2457 /** @stable ICU 54 */ 2458 public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/ 2459 /** @stable ICU 54 */ 2460 public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/ 2461 /** @stable ICU 54 */ 2462 public static final UnicodeBlock LATIN_EXTENDED_E = 2463 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/ 2464 /** @stable ICU 54 */ 2465 public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/ 2466 /** @stable ICU 54 */ 2467 public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/ 2468 /** @stable ICU 54 */ 2469 public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/ 2470 /** @stable ICU 54 */ 2471 public static final UnicodeBlock MENDE_KIKAKUI = 2472 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/ 2473 /** @stable ICU 54 */ 2474 public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/ 2475 /** @stable ICU 54 */ 2476 public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/ 2477 /** @stable ICU 54 */ 2478 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2479 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/ 2480 /** @stable ICU 54 */ 2481 public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/ 2482 /** @stable ICU 54 */ 2483 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2484 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/ 2485 /** @stable ICU 54 */ 2486 public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/ 2487 /** @stable ICU 54 */ 2488 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2489 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/ 2490 /** @stable ICU 54 */ 2491 public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/ 2492 /** @stable ICU 54 */ 2493 public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/ 2494 /** @stable ICU 54 */ 2495 public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/ 2496 /** @stable ICU 54 */ 2497 public static final UnicodeBlock PSALTER_PAHLAVI = 2498 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/ 2499 /** @stable ICU 54 */ 2500 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2501 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/ 2502 /** @stable ICU 54 */ 2503 public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/ 2504 /** @stable ICU 54 */ 2505 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2506 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/ 2507 /** @stable ICU 54 */ 2508 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2509 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/ 2510 /** @stable ICU 54 */ 2511 public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/ 2512 /** @stable ICU 54 */ 2513 public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/ 2514 2515 /* New blocks in Unicode 8.0 */ 2516 2517 /** @stable ICU 56 */ 2518 public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/ 2519 /** @stable ICU 56 */ 2520 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2521 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/ 2522 /** @stable ICU 56 */ 2523 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2524 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/ 2525 /** @stable ICU 56 */ 2526 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 2527 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 2528 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/ 2529 /** @stable ICU 56 */ 2530 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2531 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/ 2532 /** @stable ICU 56 */ 2533 public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/ 2534 /** @stable ICU 56 */ 2535 public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/ 2536 /** @stable ICU 56 */ 2537 public static final UnicodeBlock OLD_HUNGARIAN = 2538 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/ 2539 /** @stable ICU 56 */ 2540 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 2541 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 2542 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/ 2543 /** @stable ICU 56 */ 2544 public static final UnicodeBlock SUTTON_SIGNWRITING = 2545 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/ 2546 2547 /* New blocks in Unicode 9.0 */ 2548 2549 /** @stable ICU 58 */ 2550 public static final UnicodeBlock ADLAM = new UnicodeBlock("ADLAM", ADLAM_ID); /*[1E900]*/ 2551 /** @stable ICU 58 */ 2552 public static final UnicodeBlock BHAIKSUKI = new UnicodeBlock("BHAIKSUKI", BHAIKSUKI_ID); /*[11C00]*/ 2553 /** @stable ICU 58 */ 2554 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 2555 new UnicodeBlock("CYRILLIC_EXTENDED_C", CYRILLIC_EXTENDED_C_ID); /*[1C80]*/ 2556 /** @stable ICU 58 */ 2557 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 2558 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", GLAGOLITIC_SUPPLEMENT_ID); /*[1E000]*/ 2559 /** @stable ICU 58 */ 2560 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 2561 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID); /*[16FE0]*/ 2562 /** @stable ICU 58 */ 2563 public static final UnicodeBlock MARCHEN = new UnicodeBlock("MARCHEN", MARCHEN_ID); /*[11C70]*/ 2564 /** @stable ICU 58 */ 2565 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 2566 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", MONGOLIAN_SUPPLEMENT_ID); /*[11660]*/ 2567 /** @stable ICU 58 */ 2568 public static final UnicodeBlock NEWA = new UnicodeBlock("NEWA", NEWA_ID); /*[11400]*/ 2569 /** @stable ICU 58 */ 2570 public static final UnicodeBlock OSAGE = new UnicodeBlock("OSAGE", OSAGE_ID); /*[104B0]*/ 2571 /** @stable ICU 58 */ 2572 public static final UnicodeBlock TANGUT = new UnicodeBlock("TANGUT", TANGUT_ID); /*[17000]*/ 2573 /** @stable ICU 58 */ 2574 public static final UnicodeBlock TANGUT_COMPONENTS = 2575 new UnicodeBlock("TANGUT_COMPONENTS", TANGUT_COMPONENTS_ID); /*[18800]*/ 2576 2577 // New blocks in Unicode 10.0 2578 2579 /** @stable ICU 60 */ 2580 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 2581 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID); /*[2CEB0]*/ 2582 /** @stable ICU 60 */ 2583 public static final UnicodeBlock KANA_EXTENDED_A = 2584 new UnicodeBlock("KANA_EXTENDED_A", KANA_EXTENDED_A_ID); /*[1B100]*/ 2585 /** @stable ICU 60 */ 2586 public static final UnicodeBlock MASARAM_GONDI = 2587 new UnicodeBlock("MASARAM_GONDI", MASARAM_GONDI_ID); /*[11D00]*/ 2588 /** @stable ICU 60 */ 2589 public static final UnicodeBlock NUSHU = new UnicodeBlock("NUSHU", NUSHU_ID); /*[1B170]*/ 2590 /** @stable ICU 60 */ 2591 public static final UnicodeBlock SOYOMBO = new UnicodeBlock("SOYOMBO", SOYOMBO_ID); /*[11A50]*/ 2592 /** @stable ICU 60 */ 2593 public static final UnicodeBlock SYRIAC_SUPPLEMENT = 2594 new UnicodeBlock("SYRIAC_SUPPLEMENT", SYRIAC_SUPPLEMENT_ID); /*[0860]*/ 2595 /** @stable ICU 60 */ 2596 public static final UnicodeBlock ZANABAZAR_SQUARE = 2597 new UnicodeBlock("ZANABAZAR_SQUARE", ZANABAZAR_SQUARE_ID); /*[11A00]*/ 2598 2599 // New blocks in Unicode 11.0 2600 2601 /** @stable ICU 62 */ 2602 public static final UnicodeBlock CHESS_SYMBOLS = 2603 new UnicodeBlock("CHESS_SYMBOLS", CHESS_SYMBOLS_ID); /*[1FA00]*/ 2604 /** @stable ICU 62 */ 2605 public static final UnicodeBlock DOGRA = new UnicodeBlock("DOGRA", DOGRA_ID); /*[11800]*/ 2606 /** @stable ICU 62 */ 2607 public static final UnicodeBlock GEORGIAN_EXTENDED = 2608 new UnicodeBlock("GEORGIAN_EXTENDED", GEORGIAN_EXTENDED_ID); /*[1C90]*/ 2609 /** @stable ICU 62 */ 2610 public static final UnicodeBlock GUNJALA_GONDI = 2611 new UnicodeBlock("GUNJALA_GONDI", GUNJALA_GONDI_ID); /*[11D60]*/ 2612 /** @stable ICU 62 */ 2613 public static final UnicodeBlock HANIFI_ROHINGYA = 2614 new UnicodeBlock("HANIFI_ROHINGYA", HANIFI_ROHINGYA_ID); /*[10D00]*/ 2615 /** @stable ICU 62 */ 2616 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS = 2617 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", INDIC_SIYAQ_NUMBERS_ID); /*[1EC70]*/ 2618 /** @stable ICU 62 */ 2619 public static final UnicodeBlock MAKASAR = new UnicodeBlock("MAKASAR", MAKASAR_ID); /*[11EE0]*/ 2620 /** @stable ICU 62 */ 2621 public static final UnicodeBlock MAYAN_NUMERALS = 2622 new UnicodeBlock("MAYAN_NUMERALS", MAYAN_NUMERALS_ID); /*[1D2E0]*/ 2623 /** @stable ICU 62 */ 2624 public static final UnicodeBlock MEDEFAIDRIN = 2625 new UnicodeBlock("MEDEFAIDRIN", MEDEFAIDRIN_ID); /*[16E40]*/ 2626 /** @stable ICU 62 */ 2627 public static final UnicodeBlock OLD_SOGDIAN = 2628 new UnicodeBlock("OLD_SOGDIAN", OLD_SOGDIAN_ID); /*[10F00]*/ 2629 /** @stable ICU 62 */ 2630 public static final UnicodeBlock SOGDIAN = new UnicodeBlock("SOGDIAN", SOGDIAN_ID); /*[10F30]*/ 2631 2632 // New blocks in Unicode 12.0 2633 2634 /** @stable ICU 64 */ 2635 public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 2636 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID); /*[13430]*/ 2637 /** @stable ICU 64 */ 2638 public static final UnicodeBlock ELYMAIC = new UnicodeBlock("ELYMAIC", ELYMAIC_ID); /*[10FE0]*/ 2639 /** @stable ICU 64 */ 2640 public static final UnicodeBlock NANDINAGARI = 2641 new UnicodeBlock("NANDINAGARI", NANDINAGARI_ID); /*[119A0]*/ 2642 /** @stable ICU 64 */ 2643 public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG = 2644 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", NYIAKENG_PUACHUE_HMONG_ID); /*[1E100]*/ 2645 /** @stable ICU 64 */ 2646 public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS = 2647 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", OTTOMAN_SIYAQ_NUMBERS_ID); /*[1ED00]*/ 2648 /** @stable ICU 64 */ 2649 public static final UnicodeBlock SMALL_KANA_EXTENSION = 2650 new UnicodeBlock("SMALL_KANA_EXTENSION", SMALL_KANA_EXTENSION_ID); /*[1B130]*/ 2651 /** @stable ICU 64 */ 2652 public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 2653 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID); /*[1FA70]*/ 2654 /** @stable ICU 64 */ 2655 public static final UnicodeBlock TAMIL_SUPPLEMENT = 2656 new UnicodeBlock("TAMIL_SUPPLEMENT", TAMIL_SUPPLEMENT_ID); /*[11FC0]*/ 2657 /** @stable ICU 64 */ 2658 public static final UnicodeBlock WANCHO = new UnicodeBlock("WANCHO", WANCHO_ID); /*[1E2C0]*/ 2659 2660 // New blocks in Unicode 13.0 2661 2662 /** @stable ICU 66 */ 2663 public static final UnicodeBlock CHORASMIAN = 2664 new UnicodeBlock("CHORASMIAN", CHORASMIAN_ID); /*[10FB0]*/ 2665 /** @stable ICU 66 */ 2666 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 2667 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G", 2668 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID); /*[30000]*/ 2669 /** @stable ICU 66 */ 2670 public static final UnicodeBlock DIVES_AKURU = 2671 new UnicodeBlock("DIVES_AKURU", DIVES_AKURU_ID); /*[11900]*/ 2672 /** @stable ICU 66 */ 2673 public static final UnicodeBlock KHITAN_SMALL_SCRIPT = 2674 new UnicodeBlock("KHITAN_SMALL_SCRIPT", KHITAN_SMALL_SCRIPT_ID); /*[18B00]*/ 2675 /** @stable ICU 66 */ 2676 public static final UnicodeBlock LISU_SUPPLEMENT = 2677 new UnicodeBlock("LISU_SUPPLEMENT", LISU_SUPPLEMENT_ID); /*[11FB0]*/ 2678 /** @stable ICU 66 */ 2679 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING = 2680 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING", SYMBOLS_FOR_LEGACY_COMPUTING_ID); /*[1FB00]*/ 2681 /** @stable ICU 66 */ 2682 public static final UnicodeBlock TANGUT_SUPPLEMENT = 2683 new UnicodeBlock("TANGUT_SUPPLEMENT", TANGUT_SUPPLEMENT_ID); /*[18D00]*/ 2684 /** @stable ICU 66 */ 2685 public static final UnicodeBlock YEZIDI = new UnicodeBlock("YEZIDI", YEZIDI_ID); /*[10E80]*/ 2686 2687 // New blocks in Unicode 14.0 2688 2689 /** @stable ICU 70 */ 2690 public static final UnicodeBlock ARABIC_EXTENDED_B = 2691 new UnicodeBlock("ARABIC_EXTENDED_B", ARABIC_EXTENDED_B_ID); /*[0870]*/ 2692 /** @stable ICU 70 */ 2693 public static final UnicodeBlock CYPRO_MINOAN = 2694 new UnicodeBlock("CYPRO_MINOAN", CYPRO_MINOAN_ID); /*[12F90]*/ 2695 /** @stable ICU 70 */ 2696 public static final UnicodeBlock ETHIOPIC_EXTENDED_B = 2697 new UnicodeBlock("ETHIOPIC_EXTENDED_B", ETHIOPIC_EXTENDED_B_ID); /*[1E7E0]*/ 2698 /** @stable ICU 70 */ 2699 public static final UnicodeBlock KANA_EXTENDED_B = 2700 new UnicodeBlock("KANA_EXTENDED_B", KANA_EXTENDED_B_ID); /*[1AFF0]*/ 2701 /** @stable ICU 70 */ 2702 public static final UnicodeBlock LATIN_EXTENDED_F = 2703 new UnicodeBlock("LATIN_EXTENDED_F", LATIN_EXTENDED_F_ID); /*[10780]*/ 2704 /** @stable ICU 70 */ 2705 public static final UnicodeBlock LATIN_EXTENDED_G = 2706 new UnicodeBlock("LATIN_EXTENDED_G", LATIN_EXTENDED_G_ID); /*[1DF00]*/ 2707 /** @stable ICU 70 */ 2708 public static final UnicodeBlock OLD_UYGHUR = 2709 new UnicodeBlock("OLD_UYGHUR", OLD_UYGHUR_ID); /*[10F70]*/ 2710 /** @stable ICU 70 */ 2711 public static final UnicodeBlock TANGSA = new UnicodeBlock("TANGSA", TANGSA_ID); /*[16A70]*/ 2712 /** @stable ICU 70 */ 2713 public static final UnicodeBlock TOTO = new UnicodeBlock("TOTO", TOTO_ID); /*[1E290]*/ 2714 /** @stable ICU 70 */ 2715 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A = 2716 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A", 2717 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A_ID); /*[11AB0]*/ 2718 /** @stable ICU 70 */ 2719 public static final UnicodeBlock VITHKUQI = 2720 new UnicodeBlock("VITHKUQI", VITHKUQI_ID); /*[10570]*/ 2721 /** @stable ICU 70 */ 2722 public static final UnicodeBlock ZNAMENNY_MUSICAL_NOTATION = 2723 new UnicodeBlock("ZNAMENNY_MUSICAL_NOTATION", 2724 ZNAMENNY_MUSICAL_NOTATION_ID); /*[1CF00]*/ 2725 2726 // New blocks in Unicode 15.0 2727 2728 /** @stable ICU 72 */ 2729 public static final UnicodeBlock ARABIC_EXTENDED_C = 2730 new UnicodeBlock("ARABIC_EXTENDED_C", ARABIC_EXTENDED_C_ID); /*[10EC0]*/ 2731 /** @stable ICU 72 */ 2732 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H = 2733 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H", 2734 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H_ID); /*[31350]*/ 2735 /** @stable ICU 72 */ 2736 public static final UnicodeBlock CYRILLIC_EXTENDED_D = 2737 new UnicodeBlock("CYRILLIC_EXTENDED_D", CYRILLIC_EXTENDED_D_ID); /*[1E030]*/ 2738 /** @stable ICU 72 */ 2739 public static final UnicodeBlock DEVANAGARI_EXTENDED_A = 2740 new UnicodeBlock("DEVANAGARI_EXTENDED_A", DEVANAGARI_EXTENDED_A_ID); /*[11B00]*/ 2741 /** @stable ICU 72 */ 2742 public static final UnicodeBlock KAKTOVIK_NUMERALS = 2743 new UnicodeBlock("KAKTOVIK_NUMERALS", KAKTOVIK_NUMERALS_ID); /*[1D2C0]*/ 2744 /** @stable ICU 72 */ 2745 public static final UnicodeBlock KAWI = new UnicodeBlock("KAWI", KAWI_ID); /*[11F00]*/ 2746 /** @stable ICU 72 */ 2747 public static final UnicodeBlock NAG_MUNDARI = 2748 new UnicodeBlock("NAG_MUNDARI", NAG_MUNDARI_ID); /*[1E4D0]*/ 2749 2750 /** 2751 * @stable ICU 2.4 2752 */ 2753 public static final UnicodeBlock INVALID_CODE 2754 = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID); 2755 2756 static { 2757 for (int blockId = 0; blockId < COUNT; ++blockId) { 2758 if (BLOCKS_[blockId] == null) { 2759 throw new java.lang.IllegalStateException( 2760 "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized"); 2761 } 2762 } 2763 } 2764 2765 // public methods -------------------------------------------------- 2766 2767 /** 2768 * {@icu} Returns the only instance of the UnicodeBlock with the argument ID. 2769 * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned. 2770 * @param id UnicodeBlock ID 2771 * @return the only instance of the UnicodeBlock with the argument ID 2772 * if it exists, otherwise a INVALID_CODE UnicodeBlock will be 2773 * returned. 2774 * @stable ICU 2.4 2775 */ getInstance(int id)2776 public static UnicodeBlock getInstance(int id) 2777 { 2778 if (id >= 0 && id < BLOCKS_.length) { 2779 return BLOCKS_[id]; 2780 } 2781 return INVALID_CODE; 2782 } 2783 2784 /** 2785 * Returns the Unicode allocation block that contains the code point, 2786 * or null if the code point is not a member of a defined block. 2787 * @param ch code point to be tested 2788 * @return the Unicode allocation block that contains the code point 2789 * @stable ICU 2.4 2790 */ of(int ch)2791 public static UnicodeBlock of(int ch) 2792 { 2793 if (ch > MAX_VALUE) { 2794 return INVALID_CODE; 2795 } 2796 2797 return UnicodeBlock.getInstance( 2798 UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK)); 2799 } 2800 2801 /** 2802 * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method. 2803 * Returns the Unicode block with the given name. {@icunote} Unlike 2804 * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches 2805 * against the official UCD name and the Java block name 2806 * (ignoring case). 2807 * @param blockName the name of the block to match 2808 * @return the UnicodeBlock with that name 2809 * @throws IllegalArgumentException if the blockName could not be matched 2810 * @stable ICU 3.0 2811 */ forName(String blockName)2812 public static final UnicodeBlock forName(String blockName) { 2813 Map<String, UnicodeBlock> m = null; 2814 if (mref != null) { 2815 m = mref.get(); 2816 } 2817 if (m == null) { 2818 m = new HashMap<>(BLOCKS_.length); 2819 for (int i = 0; i < BLOCKS_.length; ++i) { 2820 UnicodeBlock b = BLOCKS_[i]; 2821 String name = trimBlockName( 2822 getPropertyValueName(UProperty.BLOCK, b.getID(), 2823 UProperty.NameChoice.LONG)); 2824 m.put(name, b); 2825 } 2826 mref = new SoftReference<>(m); 2827 } 2828 UnicodeBlock b = m.get(trimBlockName(blockName)); 2829 if (b == null) { 2830 throw new IllegalArgumentException(); 2831 } 2832 return b; 2833 } 2834 private static SoftReference<Map<String, UnicodeBlock>> mref; 2835 trimBlockName(String name)2836 private static String trimBlockName(String name) { 2837 String upper = name.toUpperCase(Locale.ENGLISH); 2838 StringBuilder result = new StringBuilder(upper.length()); 2839 for (int i = 0; i < upper.length(); i++) { 2840 char c = upper.charAt(i); 2841 if (c != ' ' && c != '_' && c != '-') { 2842 result.append(c); 2843 } 2844 } 2845 return result.toString(); 2846 } 2847 2848 /** 2849 * {icu} Returns the type ID of this Unicode block 2850 * @return integer type ID of this Unicode block 2851 * @stable ICU 2.4 2852 */ getID()2853 public int getID() 2854 { 2855 return m_id_; 2856 } 2857 2858 // private data members --------------------------------------------- 2859 2860 /** 2861 * Identification code for this UnicodeBlock 2862 */ 2863 private int m_id_; 2864 2865 // private constructor ---------------------------------------------- 2866 2867 /** 2868 * UnicodeBlock constructor 2869 * @param name name of this UnicodeBlock 2870 * @param id unique id of this UnicodeBlock 2871 * @exception NullPointerException if name is <code>null</code> 2872 */ UnicodeBlock(String name, int id)2873 private UnicodeBlock(String name, int id) 2874 { 2875 super(name); 2876 m_id_ = id; 2877 if (id >= 0) { 2878 BLOCKS_[id] = this; 2879 } 2880 } 2881 } 2882 2883 /** 2884 * East Asian Width constants. 2885 * @see UProperty#EAST_ASIAN_WIDTH 2886 * @see UCharacter#getIntPropertyValue 2887 * @stable ICU 2.4 2888 */ 2889 public static interface EastAsianWidth 2890 { 2891 /** 2892 * @stable ICU 2.4 2893 */ 2894 public static final int NEUTRAL = 0; 2895 /** 2896 * @stable ICU 2.4 2897 */ 2898 public static final int AMBIGUOUS = 1; 2899 /** 2900 * @stable ICU 2.4 2901 */ 2902 public static final int HALFWIDTH = 2; 2903 /** 2904 * @stable ICU 2.4 2905 */ 2906 public static final int FULLWIDTH = 3; 2907 /** 2908 * @stable ICU 2.4 2909 */ 2910 public static final int NARROW = 4; 2911 /** 2912 * @stable ICU 2.4 2913 */ 2914 public static final int WIDE = 5; 2915 /** 2916 * One more than the highest normal EastAsianWidth value. 2917 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH). 2918 * 2919 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2920 */ 2921 @Deprecated 2922 public static final int COUNT = 6; 2923 } 2924 2925 /** 2926 * Decomposition Type constants. 2927 * @see UProperty#DECOMPOSITION_TYPE 2928 * @stable ICU 2.4 2929 */ 2930 public static interface DecompositionType 2931 { 2932 /** 2933 * @stable ICU 2.4 2934 */ 2935 public static final int NONE = 0; 2936 /** 2937 * @stable ICU 2.4 2938 */ 2939 public static final int CANONICAL = 1; 2940 /** 2941 * @stable ICU 2.4 2942 */ 2943 public static final int COMPAT = 2; 2944 /** 2945 * @stable ICU 2.4 2946 */ 2947 public static final int CIRCLE = 3; 2948 /** 2949 * @stable ICU 2.4 2950 */ 2951 public static final int FINAL = 4; 2952 /** 2953 * @stable ICU 2.4 2954 */ 2955 public static final int FONT = 5; 2956 /** 2957 * @stable ICU 2.4 2958 */ 2959 public static final int FRACTION = 6; 2960 /** 2961 * @stable ICU 2.4 2962 */ 2963 public static final int INITIAL = 7; 2964 /** 2965 * @stable ICU 2.4 2966 */ 2967 public static final int ISOLATED = 8; 2968 /** 2969 * @stable ICU 2.4 2970 */ 2971 public static final int MEDIAL = 9; 2972 /** 2973 * @stable ICU 2.4 2974 */ 2975 public static final int NARROW = 10; 2976 /** 2977 * @stable ICU 2.4 2978 */ 2979 public static final int NOBREAK = 11; 2980 /** 2981 * @stable ICU 2.4 2982 */ 2983 public static final int SMALL = 12; 2984 /** 2985 * @stable ICU 2.4 2986 */ 2987 public static final int SQUARE = 13; 2988 /** 2989 * @stable ICU 2.4 2990 */ 2991 public static final int SUB = 14; 2992 /** 2993 * @stable ICU 2.4 2994 */ 2995 public static final int SUPER = 15; 2996 /** 2997 * @stable ICU 2.4 2998 */ 2999 public static final int VERTICAL = 16; 3000 /** 3001 * @stable ICU 2.4 3002 */ 3003 public static final int WIDE = 17; 3004 /** 3005 * One more than the highest normal DecompositionType value. 3006 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE). 3007 * 3008 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3009 */ 3010 @Deprecated 3011 public static final int COUNT = 18; 3012 } 3013 3014 /** 3015 * Joining Type constants. 3016 * @see UProperty#JOINING_TYPE 3017 * @stable ICU 2.4 3018 */ 3019 public static interface JoiningType 3020 { 3021 /** 3022 * @stable ICU 2.4 3023 */ 3024 public static final int NON_JOINING = 0; 3025 /** 3026 * @stable ICU 2.4 3027 */ 3028 public static final int JOIN_CAUSING = 1; 3029 /** 3030 * @stable ICU 2.4 3031 */ 3032 public static final int DUAL_JOINING = 2; 3033 /** 3034 * @stable ICU 2.4 3035 */ 3036 public static final int LEFT_JOINING = 3; 3037 /** 3038 * @stable ICU 2.4 3039 */ 3040 public static final int RIGHT_JOINING = 4; 3041 /** 3042 * @stable ICU 2.4 3043 */ 3044 public static final int TRANSPARENT = 5; 3045 /** 3046 * One more than the highest normal JoiningType value. 3047 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE). 3048 * 3049 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3050 */ 3051 @Deprecated 3052 public static final int COUNT = 6; 3053 } 3054 3055 /** 3056 * Joining Group constants. 3057 * @see UProperty#JOINING_GROUP 3058 * @stable ICU 2.4 3059 */ 3060 public static interface JoiningGroup 3061 { 3062 /** 3063 * @stable ICU 2.4 3064 */ 3065 public static final int NO_JOINING_GROUP = 0; 3066 /** 3067 * @stable ICU 2.4 3068 */ 3069 public static final int AIN = 1; 3070 /** 3071 * @stable ICU 2.4 3072 */ 3073 public static final int ALAPH = 2; 3074 /** 3075 * @stable ICU 2.4 3076 */ 3077 public static final int ALEF = 3; 3078 /** 3079 * @stable ICU 2.4 3080 */ 3081 public static final int BEH = 4; 3082 /** 3083 * @stable ICU 2.4 3084 */ 3085 public static final int BETH = 5; 3086 /** 3087 * @stable ICU 2.4 3088 */ 3089 public static final int DAL = 6; 3090 /** 3091 * @stable ICU 2.4 3092 */ 3093 public static final int DALATH_RISH = 7; 3094 /** 3095 * @stable ICU 2.4 3096 */ 3097 public static final int E = 8; 3098 /** 3099 * @stable ICU 2.4 3100 */ 3101 public static final int FEH = 9; 3102 /** 3103 * @stable ICU 2.4 3104 */ 3105 public static final int FINAL_SEMKATH = 10; 3106 /** 3107 * @stable ICU 2.4 3108 */ 3109 public static final int GAF = 11; 3110 /** 3111 * @stable ICU 2.4 3112 */ 3113 public static final int GAMAL = 12; 3114 /** 3115 * @stable ICU 2.4 3116 */ 3117 public static final int HAH = 13; 3118 /** @stable ICU 4.6 */ 3119 public static final int TEH_MARBUTA_GOAL = 14; 3120 /** 3121 * @stable ICU 2.4 3122 */ 3123 public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL; 3124 /** 3125 * @stable ICU 2.4 3126 */ 3127 public static final int HE = 15; 3128 /** 3129 * @stable ICU 2.4 3130 */ 3131 public static final int HEH = 16; 3132 /** 3133 * @stable ICU 2.4 3134 */ 3135 public static final int HEH_GOAL = 17; 3136 /** 3137 * @stable ICU 2.4 3138 */ 3139 public static final int HETH = 18; 3140 /** 3141 * @stable ICU 2.4 3142 */ 3143 public static final int KAF = 19; 3144 /** 3145 * @stable ICU 2.4 3146 */ 3147 public static final int KAPH = 20; 3148 /** 3149 * @stable ICU 2.4 3150 */ 3151 public static final int KNOTTED_HEH = 21; 3152 /** 3153 * @stable ICU 2.4 3154 */ 3155 public static final int LAM = 22; 3156 /** 3157 * @stable ICU 2.4 3158 */ 3159 public static final int LAMADH = 23; 3160 /** 3161 * @stable ICU 2.4 3162 */ 3163 public static final int MEEM = 24; 3164 /** 3165 * @stable ICU 2.4 3166 */ 3167 public static final int MIM = 25; 3168 /** 3169 * @stable ICU 2.4 3170 */ 3171 public static final int NOON = 26; 3172 /** 3173 * @stable ICU 2.4 3174 */ 3175 public static final int NUN = 27; 3176 /** 3177 * @stable ICU 2.4 3178 */ 3179 public static final int PE = 28; 3180 /** 3181 * @stable ICU 2.4 3182 */ 3183 public static final int QAF = 29; 3184 /** 3185 * @stable ICU 2.4 3186 */ 3187 public static final int QAPH = 30; 3188 /** 3189 * @stable ICU 2.4 3190 */ 3191 public static final int REH = 31; 3192 /** 3193 * @stable ICU 2.4 3194 */ 3195 public static final int REVERSED_PE = 32; 3196 /** 3197 * @stable ICU 2.4 3198 */ 3199 public static final int SAD = 33; 3200 /** 3201 * @stable ICU 2.4 3202 */ 3203 public static final int SADHE = 34; 3204 /** 3205 * @stable ICU 2.4 3206 */ 3207 public static final int SEEN = 35; 3208 /** 3209 * @stable ICU 2.4 3210 */ 3211 public static final int SEMKATH = 36; 3212 /** 3213 * @stable ICU 2.4 3214 */ 3215 public static final int SHIN = 37; 3216 /** 3217 * @stable ICU 2.4 3218 */ 3219 public static final int SWASH_KAF = 38; 3220 /** 3221 * @stable ICU 2.4 3222 */ 3223 public static final int SYRIAC_WAW = 39; 3224 /** 3225 * @stable ICU 2.4 3226 */ 3227 public static final int TAH = 40; 3228 /** 3229 * @stable ICU 2.4 3230 */ 3231 public static final int TAW = 41; 3232 /** 3233 * @stable ICU 2.4 3234 */ 3235 public static final int TEH_MARBUTA = 42; 3236 /** 3237 * @stable ICU 2.4 3238 */ 3239 public static final int TETH = 43; 3240 /** 3241 * @stable ICU 2.4 3242 */ 3243 public static final int WAW = 44; 3244 /** 3245 * @stable ICU 2.4 3246 */ 3247 public static final int YEH = 45; 3248 /** 3249 * @stable ICU 2.4 3250 */ 3251 public static final int YEH_BARREE = 46; 3252 /** 3253 * @stable ICU 2.4 3254 */ 3255 public static final int YEH_WITH_TAIL = 47; 3256 /** 3257 * @stable ICU 2.4 3258 */ 3259 public static final int YUDH = 48; 3260 /** 3261 * @stable ICU 2.4 3262 */ 3263 public static final int YUDH_HE = 49; 3264 /** 3265 * @stable ICU 2.4 3266 */ 3267 public static final int ZAIN = 50; 3268 /** 3269 * @stable ICU 2.6 3270 */ 3271 public static final int FE = 51; 3272 /** 3273 * @stable ICU 2.6 3274 */ 3275 public static final int KHAPH = 52; 3276 /** 3277 * @stable ICU 2.6 3278 */ 3279 public static final int ZHAIN = 53; 3280 /** 3281 * @stable ICU 4.0 3282 */ 3283 public static final int BURUSHASKI_YEH_BARREE = 54; 3284 /** @stable ICU 4.4 */ 3285 public static final int FARSI_YEH = 55; 3286 /** @stable ICU 4.4 */ 3287 public static final int NYA = 56; 3288 /** @stable ICU 49 */ 3289 public static final int ROHINGYA_YEH = 57; 3290 3291 /** @stable ICU 54 */ 3292 public static final int MANICHAEAN_ALEPH = 58; 3293 /** @stable ICU 54 */ 3294 public static final int MANICHAEAN_AYIN = 59; 3295 /** @stable ICU 54 */ 3296 public static final int MANICHAEAN_BETH = 60; 3297 /** @stable ICU 54 */ 3298 public static final int MANICHAEAN_DALETH = 61; 3299 /** @stable ICU 54 */ 3300 public static final int MANICHAEAN_DHAMEDH = 62; 3301 /** @stable ICU 54 */ 3302 public static final int MANICHAEAN_FIVE = 63; 3303 /** @stable ICU 54 */ 3304 public static final int MANICHAEAN_GIMEL = 64; 3305 /** @stable ICU 54 */ 3306 public static final int MANICHAEAN_HETH = 65; 3307 /** @stable ICU 54 */ 3308 public static final int MANICHAEAN_HUNDRED = 66; 3309 /** @stable ICU 54 */ 3310 public static final int MANICHAEAN_KAPH = 67; 3311 /** @stable ICU 54 */ 3312 public static final int MANICHAEAN_LAMEDH = 68; 3313 /** @stable ICU 54 */ 3314 public static final int MANICHAEAN_MEM = 69; 3315 /** @stable ICU 54 */ 3316 public static final int MANICHAEAN_NUN = 70; 3317 /** @stable ICU 54 */ 3318 public static final int MANICHAEAN_ONE = 71; 3319 /** @stable ICU 54 */ 3320 public static final int MANICHAEAN_PE = 72; 3321 /** @stable ICU 54 */ 3322 public static final int MANICHAEAN_QOPH = 73; 3323 /** @stable ICU 54 */ 3324 public static final int MANICHAEAN_RESH = 74; 3325 /** @stable ICU 54 */ 3326 public static final int MANICHAEAN_SADHE = 75; 3327 /** @stable ICU 54 */ 3328 public static final int MANICHAEAN_SAMEKH = 76; 3329 /** @stable ICU 54 */ 3330 public static final int MANICHAEAN_TAW = 77; 3331 /** @stable ICU 54 */ 3332 public static final int MANICHAEAN_TEN = 78; 3333 /** @stable ICU 54 */ 3334 public static final int MANICHAEAN_TETH = 79; 3335 /** @stable ICU 54 */ 3336 public static final int MANICHAEAN_THAMEDH = 80; 3337 /** @stable ICU 54 */ 3338 public static final int MANICHAEAN_TWENTY = 81; 3339 /** @stable ICU 54 */ 3340 public static final int MANICHAEAN_WAW = 82; 3341 /** @stable ICU 54 */ 3342 public static final int MANICHAEAN_YODH = 83; 3343 /** @stable ICU 54 */ 3344 public static final int MANICHAEAN_ZAYIN = 84; 3345 /** @stable ICU 54 */ 3346 public static final int STRAIGHT_WAW = 85; 3347 3348 /** @stable ICU 58 */ 3349 public static final int AFRICAN_FEH = 86; 3350 /** @stable ICU 58 */ 3351 public static final int AFRICAN_NOON = 87; 3352 /** @stable ICU 58 */ 3353 public static final int AFRICAN_QAF = 88; 3354 3355 /** @stable ICU 60 */ 3356 public static final int MALAYALAM_BHA = 89; 3357 /** @stable ICU 60 */ 3358 public static final int MALAYALAM_JA = 90; 3359 /** @stable ICU 60 */ 3360 public static final int MALAYALAM_LLA = 91; 3361 /** @stable ICU 60 */ 3362 public static final int MALAYALAM_LLLA = 92; 3363 /** @stable ICU 60 */ 3364 public static final int MALAYALAM_NGA = 93; 3365 /** @stable ICU 60 */ 3366 public static final int MALAYALAM_NNA = 94; 3367 /** @stable ICU 60 */ 3368 public static final int MALAYALAM_NNNA = 95; 3369 /** @stable ICU 60 */ 3370 public static final int MALAYALAM_NYA = 96; 3371 /** @stable ICU 60 */ 3372 public static final int MALAYALAM_RA = 97; 3373 /** @stable ICU 60 */ 3374 public static final int MALAYALAM_SSA = 98; 3375 /** @stable ICU 60 */ 3376 public static final int MALAYALAM_TTA = 99; 3377 3378 /** @stable ICU 62 */ 3379 public static final int HANIFI_ROHINGYA_KINNA_YA = 100; 3380 /** @stable ICU 62 */ 3381 public static final int HANIFI_ROHINGYA_PA = 101; 3382 3383 /** @stable ICU 70 */ 3384 public static final int THIN_YEH = 102; 3385 /** @stable ICU 70 */ 3386 public static final int VERTICAL_TAIL = 103; 3387 3388 /** 3389 * One more than the highest normal JoiningGroup value. 3390 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JoiningGroup). 3391 * 3392 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3393 */ 3394 @Deprecated 3395 public static final int COUNT = 104; 3396 } 3397 3398 /** 3399 * Grapheme Cluster Break constants. 3400 * @see UProperty#GRAPHEME_CLUSTER_BREAK 3401 * @stable ICU 3.4 3402 */ 3403 public static interface GraphemeClusterBreak { 3404 /** 3405 * @stable ICU 3.4 3406 */ 3407 public static final int OTHER = 0; 3408 /** 3409 * @stable ICU 3.4 3410 */ 3411 public static final int CONTROL = 1; 3412 /** 3413 * @stable ICU 3.4 3414 */ 3415 public static final int CR = 2; 3416 /** 3417 * @stable ICU 3.4 3418 */ 3419 public static final int EXTEND = 3; 3420 /** 3421 * @stable ICU 3.4 3422 */ 3423 public static final int L = 4; 3424 /** 3425 * @stable ICU 3.4 3426 */ 3427 public static final int LF = 5; 3428 /** 3429 * @stable ICU 3.4 3430 */ 3431 public static final int LV = 6; 3432 /** 3433 * @stable ICU 3.4 3434 */ 3435 public static final int LVT = 7; 3436 /** 3437 * @stable ICU 3.4 3438 */ 3439 public static final int T = 8; 3440 /** 3441 * @stable ICU 3.4 3442 */ 3443 public static final int V = 9; 3444 /** 3445 * @stable ICU 4.0 3446 */ 3447 public static final int SPACING_MARK = 10; 3448 /** 3449 * @stable ICU 4.0 3450 */ 3451 public static final int PREPEND = 11; 3452 /** @stable ICU 50 */ 3453 public static final int REGIONAL_INDICATOR = 12; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3454 /** @stable ICU 58 */ 3455 public static final int E_BASE = 13; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3456 /** @stable ICU 58 */ 3457 public static final int E_BASE_GAZ = 14; /*[EBG]*/ 3458 /** @stable ICU 58 */ 3459 public static final int E_MODIFIER = 15; /*[EM]*/ 3460 /** @stable ICU 58 */ 3461 public static final int GLUE_AFTER_ZWJ = 16; /*[GAZ]*/ 3462 /** @stable ICU 58 */ 3463 public static final int ZWJ = 17; /*[ZWJ]*/ 3464 3465 /** 3466 * One more than the highest normal GraphemeClusterBreak value. 3467 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK). 3468 * 3469 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3470 */ 3471 @Deprecated 3472 public static final int COUNT = 18; 3473 } 3474 3475 /** 3476 * Word Break constants. 3477 * @see UProperty#WORD_BREAK 3478 * @stable ICU 3.4 3479 */ 3480 public static interface WordBreak { 3481 /** 3482 * @stable ICU 3.8 3483 */ 3484 public static final int OTHER = 0; 3485 /** 3486 * @stable ICU 3.8 3487 */ 3488 public static final int ALETTER = 1; 3489 /** 3490 * @stable ICU 3.8 3491 */ 3492 public static final int FORMAT = 2; 3493 /** 3494 * @stable ICU 3.8 3495 */ 3496 public static final int KATAKANA = 3; 3497 /** 3498 * @stable ICU 3.8 3499 */ 3500 public static final int MIDLETTER = 4; 3501 /** 3502 * @stable ICU 3.8 3503 */ 3504 public static final int MIDNUM = 5; 3505 /** 3506 * @stable ICU 3.8 3507 */ 3508 public static final int NUMERIC = 6; 3509 /** 3510 * @stable ICU 3.8 3511 */ 3512 public static final int EXTENDNUMLET = 7; 3513 /** 3514 * @stable ICU 4.0 3515 */ 3516 public static final int CR = 8; 3517 /** 3518 * @stable ICU 4.0 3519 */ 3520 public static final int EXTEND = 9; 3521 /** 3522 * @stable ICU 4.0 3523 */ 3524 public static final int LF = 10; 3525 /** 3526 * @stable ICU 4.0 3527 */ 3528 public static final int MIDNUMLET = 11; 3529 /** 3530 * @stable ICU 4.0 3531 */ 3532 public static final int NEWLINE = 12; 3533 /** @stable ICU 50 */ 3534 public static final int REGIONAL_INDICATOR = 13; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3535 /** @stable ICU 52 */ 3536 public static final int HEBREW_LETTER = 14; /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ 3537 /** @stable ICU 52 */ 3538 public static final int SINGLE_QUOTE = 15; /*[SQ]*/ 3539 /** @stable ICU 52 */ 3540 public static final int DOUBLE_QUOTE = 16; /*[DQ]*/ 3541 /** @stable ICU 58 */ 3542 public static final int E_BASE = 17; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3543 /** @stable ICU 58 */ 3544 public static final int E_BASE_GAZ = 18; /*[EBG]*/ 3545 /** @stable ICU 58 */ 3546 public static final int E_MODIFIER = 19; /*[EM]*/ 3547 /** @stable ICU 58 */ 3548 public static final int GLUE_AFTER_ZWJ = 20; /*[GAZ]*/ 3549 /** @stable ICU 58 */ 3550 public static final int ZWJ = 21; /*[ZWJ]*/ 3551 /** @stable ICU 62 */ 3552 public static final int WSEGSPACE = 22; /*[WSEGSPACE]*/ 3553 /** 3554 * One more than the highest normal WordBreak value. 3555 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK). 3556 * 3557 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3558 */ 3559 @Deprecated 3560 public static final int COUNT = 23; 3561 } 3562 3563 /** 3564 * Sentence Break constants. 3565 * @see UProperty#SENTENCE_BREAK 3566 * @stable ICU 3.4 3567 */ 3568 public static interface SentenceBreak { 3569 /** 3570 * @stable ICU 3.8 3571 */ 3572 public static final int OTHER = 0; 3573 /** 3574 * @stable ICU 3.8 3575 */ 3576 public static final int ATERM = 1; 3577 /** 3578 * @stable ICU 3.8 3579 */ 3580 public static final int CLOSE = 2; 3581 /** 3582 * @stable ICU 3.8 3583 */ 3584 public static final int FORMAT = 3; 3585 /** 3586 * @stable ICU 3.8 3587 */ 3588 public static final int LOWER = 4; 3589 /** 3590 * @stable ICU 3.8 3591 */ 3592 public static final int NUMERIC = 5; 3593 /** 3594 * @stable ICU 3.8 3595 */ 3596 public static final int OLETTER = 6; 3597 /** 3598 * @stable ICU 3.8 3599 */ 3600 public static final int SEP = 7; 3601 /** 3602 * @stable ICU 3.8 3603 */ 3604 public static final int SP = 8; 3605 /** 3606 * @stable ICU 3.8 3607 */ 3608 public static final int STERM = 9; 3609 /** 3610 * @stable ICU 3.8 3611 */ 3612 public static final int UPPER = 10; 3613 /** 3614 * @stable ICU 4.0 3615 */ 3616 public static final int CR = 11; 3617 /** 3618 * @stable ICU 4.0 3619 */ 3620 public static final int EXTEND = 12; 3621 /** 3622 * @stable ICU 4.0 3623 */ 3624 public static final int LF = 13; 3625 /** 3626 * @stable ICU 4.0 3627 */ 3628 public static final int SCONTINUE = 14; 3629 /** 3630 * One more than the highest normal SentenceBreak value. 3631 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK). 3632 * 3633 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3634 */ 3635 @Deprecated 3636 public static final int COUNT = 15; 3637 } 3638 3639 /** 3640 * Line Break constants. 3641 * @see UProperty#LINE_BREAK 3642 * @stable ICU 2.4 3643 */ 3644 public static interface LineBreak 3645 { 3646 /** 3647 * @stable ICU 2.4 3648 */ 3649 public static final int UNKNOWN = 0; 3650 /** 3651 * @stable ICU 2.4 3652 */ 3653 public static final int AMBIGUOUS = 1; 3654 /** 3655 * @stable ICU 2.4 3656 */ 3657 public static final int ALPHABETIC = 2; 3658 /** 3659 * @stable ICU 2.4 3660 */ 3661 public static final int BREAK_BOTH = 3; 3662 /** 3663 * @stable ICU 2.4 3664 */ 3665 public static final int BREAK_AFTER = 4; 3666 /** 3667 * @stable ICU 2.4 3668 */ 3669 public static final int BREAK_BEFORE = 5; 3670 /** 3671 * @stable ICU 2.4 3672 */ 3673 public static final int MANDATORY_BREAK = 6; 3674 /** 3675 * @stable ICU 2.4 3676 */ 3677 public static final int CONTINGENT_BREAK = 7; 3678 /** 3679 * @stable ICU 2.4 3680 */ 3681 public static final int CLOSE_PUNCTUATION = 8; 3682 /** 3683 * @stable ICU 2.4 3684 */ 3685 public static final int COMBINING_MARK = 9; 3686 /** 3687 * @stable ICU 2.4 3688 */ 3689 public static final int CARRIAGE_RETURN = 10; 3690 /** 3691 * @stable ICU 2.4 3692 */ 3693 public static final int EXCLAMATION = 11; 3694 /** 3695 * @stable ICU 2.4 3696 */ 3697 public static final int GLUE = 12; 3698 /** 3699 * @stable ICU 2.4 3700 */ 3701 public static final int HYPHEN = 13; 3702 /** 3703 * @stable ICU 2.4 3704 */ 3705 public static final int IDEOGRAPHIC = 14; 3706 /** 3707 * @see #INSEPARABLE 3708 * @stable ICU 2.4 3709 */ 3710 public static final int INSEPERABLE = 15; 3711 /** 3712 * Renamed from the misspelled "inseperable" in Unicode 4.0.1. 3713 * @stable ICU 3.0 3714 */ 3715 public static final int INSEPARABLE = 15; 3716 /** 3717 * @stable ICU 2.4 3718 */ 3719 public static final int INFIX_NUMERIC = 16; 3720 /** 3721 * @stable ICU 2.4 3722 */ 3723 public static final int LINE_FEED = 17; 3724 /** 3725 * @stable ICU 2.4 3726 */ 3727 public static final int NONSTARTER = 18; 3728 /** 3729 * @stable ICU 2.4 3730 */ 3731 public static final int NUMERIC = 19; 3732 /** 3733 * @stable ICU 2.4 3734 */ 3735 public static final int OPEN_PUNCTUATION = 20; 3736 /** 3737 * @stable ICU 2.4 3738 */ 3739 public static final int POSTFIX_NUMERIC = 21; 3740 /** 3741 * @stable ICU 2.4 3742 */ 3743 public static final int PREFIX_NUMERIC = 22; 3744 /** 3745 * @stable ICU 2.4 3746 */ 3747 public static final int QUOTATION = 23; 3748 /** 3749 * @stable ICU 2.4 3750 */ 3751 public static final int COMPLEX_CONTEXT = 24; 3752 /** 3753 * @stable ICU 2.4 3754 */ 3755 public static final int SURROGATE = 25; 3756 /** 3757 * @stable ICU 2.4 3758 */ 3759 public static final int SPACE = 26; 3760 /** 3761 * @stable ICU 2.4 3762 */ 3763 public static final int BREAK_SYMBOLS = 27; 3764 /** 3765 * @stable ICU 2.4 3766 */ 3767 public static final int ZWSPACE = 28; 3768 /** 3769 * @stable ICU 2.6 3770 */ 3771 public static final int NEXT_LINE = 29; /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ 3772 /** 3773 * @stable ICU 2.6 3774 */ 3775 public static final int WORD_JOINER = 30; /*[WJ]*/ 3776 /** 3777 * @stable ICU 3.4 3778 */ 3779 public static final int H2 = 31; /* from here on: new in Unicode 4.1/ICU 3.4 */ 3780 /** 3781 * @stable ICU 3.4 3782 */ 3783 public static final int H3 = 32; 3784 /** 3785 * @stable ICU 3.4 3786 */ 3787 public static final int JL = 33; 3788 /** 3789 * @stable ICU 3.4 3790 */ 3791 public static final int JT = 34; 3792 /** 3793 * @stable ICU 3.4 3794 */ 3795 public static final int JV = 35; 3796 /** @stable ICU 4.4 */ 3797 public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ 3798 /** @stable ICU 49 */ 3799 public static final int CONDITIONAL_JAPANESE_STARTER = 37; /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ 3800 /** @stable ICU 49 */ 3801 public static final int HEBREW_LETTER = 38; /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ 3802 /** @stable ICU 50 */ 3803 public static final int REGIONAL_INDICATOR = 39; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3804 /** @stable ICU 58 */ 3805 public static final int E_BASE = 40; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3806 /** @stable ICU 58 */ 3807 public static final int E_MODIFIER = 41; /*[EM]*/ 3808 /** @stable ICU 58 */ 3809 public static final int ZWJ = 42; /*[ZWJ]*/ 3810 /** 3811 * One more than the highest normal LineBreak value. 3812 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK). 3813 * 3814 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3815 */ 3816 @Deprecated 3817 public static final int COUNT = 43; 3818 } 3819 3820 /** 3821 * Numeric Type constants. 3822 * @see UProperty#NUMERIC_TYPE 3823 * @stable ICU 2.4 3824 */ 3825 public static interface NumericType 3826 { 3827 /** 3828 * @stable ICU 2.4 3829 */ 3830 public static final int NONE = 0; 3831 /** 3832 * @stable ICU 2.4 3833 */ 3834 public static final int DECIMAL = 1; 3835 /** 3836 * @stable ICU 2.4 3837 */ 3838 public static final int DIGIT = 2; 3839 /** 3840 * @stable ICU 2.4 3841 */ 3842 public static final int NUMERIC = 3; 3843 /** 3844 * One more than the highest normal NumericType value. 3845 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE). 3846 * 3847 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3848 */ 3849 @Deprecated 3850 public static final int COUNT = 4; 3851 } 3852 3853 /** 3854 * Hangul Syllable Type constants. 3855 * 3856 * @see UProperty#HANGUL_SYLLABLE_TYPE 3857 * @stable ICU 2.6 3858 */ 3859 public static interface HangulSyllableType 3860 { 3861 /** 3862 * @stable ICU 2.6 3863 */ 3864 public static final int NOT_APPLICABLE = 0; /*[NA]*/ /*See note !!*/ 3865 /** 3866 * @stable ICU 2.6 3867 */ 3868 public static final int LEADING_JAMO = 1; /*[L]*/ 3869 /** 3870 * @stable ICU 2.6 3871 */ 3872 public static final int VOWEL_JAMO = 2; /*[V]*/ 3873 /** 3874 * @stable ICU 2.6 3875 */ 3876 public static final int TRAILING_JAMO = 3; /*[T]*/ 3877 /** 3878 * @stable ICU 2.6 3879 */ 3880 public static final int LV_SYLLABLE = 4; /*[LV]*/ 3881 /** 3882 * @stable ICU 2.6 3883 */ 3884 public static final int LVT_SYLLABLE = 5; /*[LVT]*/ 3885 /** 3886 * One more than the highest normal HangulSyllableType value. 3887 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE). 3888 * 3889 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3890 */ 3891 @Deprecated 3892 public static final int COUNT = 6; 3893 } 3894 3895 /** 3896 * Bidi Paired Bracket Type constants. 3897 * 3898 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 3899 * @stable ICU 52 3900 */ 3901 public static interface BidiPairedBracketType { 3902 /** 3903 * Not a paired bracket. 3904 * @stable ICU 52 3905 */ 3906 public static final int NONE = 0; 3907 /** 3908 * Open paired bracket. 3909 * @stable ICU 52 3910 */ 3911 public static final int OPEN = 1; 3912 /** 3913 * Close paired bracket. 3914 * @stable ICU 52 3915 */ 3916 public static final int CLOSE = 2; 3917 /** 3918 * One more than the highest normal BidiPairedBracketType value. 3919 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE). 3920 * 3921 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3922 */ 3923 @Deprecated 3924 public static final int COUNT = 3; 3925 } 3926 3927 /** 3928 * Indic Positional Category constants. 3929 * 3930 * @see UProperty#INDIC_POSITIONAL_CATEGORY 3931 * @stable ICU 63 3932 */ 3933 public static interface IndicPositionalCategory { 3934 /** @stable ICU 63 */ 3935 public static final int NA = 0; 3936 /** @stable ICU 63 */ 3937 public static final int BOTTOM = 1; 3938 /** @stable ICU 63 */ 3939 public static final int BOTTOM_AND_LEFT = 2; 3940 /** @stable ICU 63 */ 3941 public static final int BOTTOM_AND_RIGHT = 3; 3942 /** @stable ICU 63 */ 3943 public static final int LEFT = 4; 3944 /** @stable ICU 63 */ 3945 public static final int LEFT_AND_RIGHT = 5; 3946 /** @stable ICU 63 */ 3947 public static final int OVERSTRUCK = 6; 3948 /** @stable ICU 63 */ 3949 public static final int RIGHT = 7; 3950 /** @stable ICU 63 */ 3951 public static final int TOP = 8; 3952 /** @stable ICU 63 */ 3953 public static final int TOP_AND_BOTTOM = 9; 3954 /** @stable ICU 63 */ 3955 public static final int TOP_AND_BOTTOM_AND_RIGHT = 10; 3956 /** @stable ICU 63 */ 3957 public static final int TOP_AND_LEFT = 11; 3958 /** @stable ICU 63 */ 3959 public static final int TOP_AND_LEFT_AND_RIGHT = 12; 3960 /** @stable ICU 63 */ 3961 public static final int TOP_AND_RIGHT = 13; 3962 /** @stable ICU 63 */ 3963 public static final int VISUAL_ORDER_LEFT = 14; 3964 /** @stable ICU 66 */ 3965 public static final int TOP_AND_BOTTOM_AND_LEFT = 15; 3966 } 3967 3968 /** 3969 * Indic Syllabic Category constants. 3970 * 3971 * @see UProperty#INDIC_SYLLABIC_CATEGORY 3972 * @stable ICU 63 3973 */ 3974 public static interface IndicSyllabicCategory { 3975 /** @stable ICU 63 */ 3976 public static final int OTHER = 0; 3977 /** @stable ICU 63 */ 3978 public static final int AVAGRAHA = 1; 3979 /** @stable ICU 63 */ 3980 public static final int BINDU = 2; 3981 /** @stable ICU 63 */ 3982 public static final int BRAHMI_JOINING_NUMBER = 3; 3983 /** @stable ICU 63 */ 3984 public static final int CANTILLATION_MARK = 4; 3985 /** @stable ICU 63 */ 3986 public static final int CONSONANT = 5; 3987 /** @stable ICU 63 */ 3988 public static final int CONSONANT_DEAD = 6; 3989 /** @stable ICU 63 */ 3990 public static final int CONSONANT_FINAL = 7; 3991 /** @stable ICU 63 */ 3992 public static final int CONSONANT_HEAD_LETTER = 8; 3993 /** @stable ICU 63 */ 3994 public static final int CONSONANT_INITIAL_POSTFIXED = 9; 3995 /** @stable ICU 63 */ 3996 public static final int CONSONANT_KILLER = 10; 3997 /** @stable ICU 63 */ 3998 public static final int CONSONANT_MEDIAL = 11; 3999 /** @stable ICU 63 */ 4000 public static final int CONSONANT_PLACEHOLDER = 12; 4001 /** @stable ICU 63 */ 4002 public static final int CONSONANT_PRECEDING_REPHA = 13; 4003 /** @stable ICU 63 */ 4004 public static final int CONSONANT_PREFIXED = 14; 4005 /** @stable ICU 63 */ 4006 public static final int CONSONANT_SUBJOINED = 15; 4007 /** @stable ICU 63 */ 4008 public static final int CONSONANT_SUCCEEDING_REPHA = 16; 4009 /** @stable ICU 63 */ 4010 public static final int CONSONANT_WITH_STACKER = 17; 4011 /** @stable ICU 63 */ 4012 public static final int GEMINATION_MARK = 18; 4013 /** @stable ICU 63 */ 4014 public static final int INVISIBLE_STACKER = 19; 4015 /** @stable ICU 63 */ 4016 public static final int JOINER = 20; 4017 /** @stable ICU 63 */ 4018 public static final int MODIFYING_LETTER = 21; 4019 /** @stable ICU 63 */ 4020 public static final int NON_JOINER = 22; 4021 /** @stable ICU 63 */ 4022 public static final int NUKTA = 23; 4023 /** @stable ICU 63 */ 4024 public static final int NUMBER = 24; 4025 /** @stable ICU 63 */ 4026 public static final int NUMBER_JOINER = 25; 4027 /** @stable ICU 63 */ 4028 public static final int PURE_KILLER = 26; 4029 /** @stable ICU 63 */ 4030 public static final int REGISTER_SHIFTER = 27; 4031 /** @stable ICU 63 */ 4032 public static final int SYLLABLE_MODIFIER = 28; 4033 /** @stable ICU 63 */ 4034 public static final int TONE_LETTER = 29; 4035 /** @stable ICU 63 */ 4036 public static final int TONE_MARK = 30; 4037 /** @stable ICU 63 */ 4038 public static final int VIRAMA = 31; 4039 /** @stable ICU 63 */ 4040 public static final int VISARGA = 32; 4041 /** @stable ICU 63 */ 4042 public static final int VOWEL = 33; 4043 /** @stable ICU 63 */ 4044 public static final int VOWEL_DEPENDENT = 34; 4045 /** @stable ICU 63 */ 4046 public static final int VOWEL_INDEPENDENT = 35; 4047 } 4048 4049 /** 4050 * Vertical Orientation constants. 4051 * 4052 * @see UProperty#VERTICAL_ORIENTATION 4053 * @stable ICU 63 4054 */ 4055 public static interface VerticalOrientation { 4056 /** @stable ICU 63 */ 4057 public static final int ROTATED = 0; 4058 /** @stable ICU 63 */ 4059 public static final int TRANSFORMED_ROTATED = 1; 4060 /** @stable ICU 63 */ 4061 public static final int TRANSFORMED_UPRIGHT = 2; 4062 /** @stable ICU 63 */ 4063 public static final int UPRIGHT = 3; 4064 } 4065 4066 // public data members ----------------------------------------------- 4067 4068 /** 4069 * The lowest Unicode code point value, constant 0. 4070 * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}. 4071 * 4072 * @stable ICU 2.1 4073 */ 4074 public static final int MIN_VALUE = Character.MIN_CODE_POINT; 4075 4076 /** 4077 * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits). 4078 * Same as {@link Character#MAX_CODE_POINT}. 4079 * 4080 * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE} 4081 * which is still a char with the value U+FFFF. 4082 * 4083 * @stable ICU 2.1 4084 */ 4085 public static final int MAX_VALUE = Character.MAX_CODE_POINT; 4086 4087 /** 4088 * The minimum value for Supplementary code points, constant U+10000. 4089 * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 4090 * 4091 * @stable ICU 2.1 4092 */ 4093 public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT; 4094 4095 /** 4096 * Unicode value used when translating into Unicode encoding form and there 4097 * is no existing character. 4098 * @stable ICU 2.1 4099 */ 4100 public static final int REPLACEMENT_CHAR = '\uFFFD'; 4101 4102 /** 4103 * Special value that is returned by getUnicodeNumericValue(int) when no 4104 * numeric value is defined for a code point. 4105 * @stable ICU 2.4 4106 * @see #getUnicodeNumericValue 4107 */ 4108 public static final double NO_NUMERIC_VALUE = -123456789; 4109 4110 /** 4111 * Compatibility constant for Java Character's MIN_RADIX. 4112 * @stable ICU 3.4 4113 */ 4114 public static final int MIN_RADIX = java.lang.Character.MIN_RADIX; 4115 4116 /** 4117 * Compatibility constant for Java Character's MAX_RADIX. 4118 * @stable ICU 3.4 4119 */ 4120 public static final int MAX_RADIX = java.lang.Character.MAX_RADIX; 4121 4122 /** 4123 * Do not lowercase non-initial parts of words when titlecasing. 4124 * Option bit for titlecasing APIs that take an options bit set. 4125 * 4126 * By default, titlecasing will titlecase the first cased character 4127 * of a word and lowercase all other characters. 4128 * With this option, the other characters will not be modified. 4129 * 4130 * @see #toTitleCase 4131 * @stable ICU 3.8 4132 */ 4133 public static final int TITLECASE_NO_LOWERCASE = 0x100; 4134 4135 /** 4136 * Do not adjust the titlecasing indexes from BreakIterator::next() indexes; 4137 * titlecase exactly the characters at breaks from the iterator. 4138 * Option bit for titlecasing APIs that take an options bit set. 4139 * 4140 * By default, titlecasing will take each break iterator index, 4141 * adjust it by looking for the next cased character, and titlecase that one. 4142 * Other characters are lowercased. 4143 * 4144 * This follows Unicode 4 & 5 section 3.13 Default Case Operations: 4145 * 4146 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 4147 * #29, "Text Boundaries." Between each pair of word boundaries, find the first 4148 * cased character F. If F exists, map F to default_title(F); then map each 4149 * subsequent character C to default_lower(C). 4150 * 4151 * @see #toTitleCase 4152 * @see #TITLECASE_NO_LOWERCASE 4153 * @stable ICU 3.8 4154 */ 4155 public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200; 4156 4157 // public methods ---------------------------------------------------- 4158 4159 /** 4160 * Returnss the numeric value of a decimal digit code point. 4161 * <br>This method observes the semantics of 4162 * <code>java.lang.Character.digit()</code>. Note that this 4163 * will return positive values for code points for which isDigit 4164 * returns false, just like java.lang.Character. 4165 * <br><em>Semantic Change:</em> In release 1.3.1 and 4166 * prior, this did not treat the European letters as having a 4167 * digit value, and also treated numeric letters and other numbers as 4168 * digits. 4169 * This has been changed to conform to the java semantics. 4170 * <br>A code point is a valid digit if and only if: 4171 * <ul> 4172 * <li>ch is a decimal digit or one of the european letters, and 4173 * <li>the value of ch is less than the specified radix. 4174 * </ul> 4175 * @param ch the code point to query 4176 * @param radix the radix 4177 * @return the numeric value represented by the code point in the 4178 * specified radix, or -1 if the code point is not a decimal digit 4179 * or if its value is too large for the radix 4180 * @stable ICU 2.1 4181 */ digit(int ch, int radix)4182 public static int digit(int ch, int radix) 4183 { 4184 if (2 <= radix && radix <= 36) { 4185 int value = digit(ch); 4186 if (value < 0) { 4187 // ch is not a decimal digit, try latin letters 4188 value = UCharacterProperty.getEuropeanDigit(ch); 4189 } 4190 return (value < radix) ? value : -1; 4191 } else { 4192 return -1; // invalid radix 4193 } 4194 } 4195 4196 /** 4197 * Returnss the numeric value of a decimal digit code point. 4198 * <br>This is a convenience overload of <code>digit(int, int)</code> 4199 * that provides a decimal radix. 4200 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this 4201 * treated numeric letters and other numbers as digits. This has 4202 * been changed to conform to the java semantics. 4203 * @param ch the code point to query 4204 * @return the numeric value represented by the code point, 4205 * or -1 if the code point is not a decimal digit or if its 4206 * value is too large for a decimal radix 4207 * @stable ICU 2.1 4208 */ digit(int ch)4209 public static int digit(int ch) 4210 { 4211 return UCharacterProperty.INSTANCE.digit(ch); 4212 } 4213 4214 /** 4215 * Returns the numeric value of the code point as a nonnegative 4216 * integer. 4217 * <br>If the code point does not have a numeric value, then -1 is returned. 4218 * <br> 4219 * If the code point has a numeric value that cannot be represented as a 4220 * nonnegative integer (for example, a fractional value), then -2 is 4221 * returned. 4222 * @param ch the code point to query 4223 * @return the numeric value of the code point, or -1 if it has no numeric 4224 * value, or -2 if it has a numeric value that cannot be represented as a 4225 * nonnegative integer 4226 * @stable ICU 2.1 4227 */ getNumericValue(int ch)4228 public static int getNumericValue(int ch) 4229 { 4230 return UCharacterProperty.INSTANCE.getNumericValue(ch); 4231 } 4232 4233 /** 4234 * {@icu} Returns the numeric value for a Unicode code point as defined in the 4235 * Unicode Character Database. 4236 * <p>A "double" return type is necessary because some numeric values are 4237 * fractions, negative, or too large for int. 4238 * <p>For characters without any numeric values in the Unicode Character 4239 * Database, this function will return NO_NUMERIC_VALUE. 4240 * Note: This is different from the Unicode Standard which specifies NaN as the default value. 4241 * <p><em>API Change:</em> In release 2.2 and prior, this API has a 4242 * return type int and returns -1 when the argument ch does not have a 4243 * corresponding numeric value. This has been changed to synch with ICU4C 4244 * 4245 * This corresponds to the ICU4C function u_getNumericValue. 4246 * @param ch Code point to get the numeric value for. 4247 * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined. 4248 * @stable ICU 2.4 4249 */ getUnicodeNumericValue(int ch)4250 public static double getUnicodeNumericValue(int ch) 4251 { 4252 return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch); 4253 } 4254 4255 /** 4256 * Compatibility override of Java deprecated method. This 4257 * method will always remain deprecated. 4258 * Same as java.lang.Character.isSpace(). 4259 * @param ch the code point 4260 * @return true if the code point is a space character as 4261 * defined by java.lang.Character.isSpace. 4262 * @deprecated ICU 3.4 (Java) 4263 */ 4264 @Deprecated isSpace(int ch)4265 public static boolean isSpace(int ch) { 4266 return ch <= 0x20 && 4267 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d); 4268 } 4269 4270 /** 4271 * Returns a value indicating a code point's Unicode category. 4272 * Up-to-date Unicode implementation of java.lang.Character.getType() 4273 * except for the above mentioned code points that had their category 4274 * changed.<br> 4275 * Return results are constants from the interface 4276 * <a href=UCharacterCategory.html>UCharacterCategory</a><br> 4277 * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with 4278 * those returned by java.lang.Character.getType. UCharacterCategory values 4279 * match the ones used in ICU4C, while java.lang.Character type 4280 * values, though similar, skip the value 17. 4281 * @param ch code point whose type is to be determined 4282 * @return category which is a value of UCharacterCategory 4283 * @stable ICU 2.1 4284 */ getType(int ch)4285 public static int getType(int ch) 4286 { 4287 return UCharacterProperty.INSTANCE.getType(ch); 4288 } 4289 4290 /** 4291 * Determines if a code point has a defined meaning in the up-to-date 4292 * Unicode standard. 4293 * E.g. supplementary code points though allocated space are not defined in 4294 * Unicode yet.<br> 4295 * Up-to-date Unicode implementation of java.lang.Character.isDefined() 4296 * @param ch code point to be determined if it is defined in the most 4297 * current version of Unicode 4298 * @return true if this code point is defined in unicode 4299 * @stable ICU 2.1 4300 */ isDefined(int ch)4301 public static boolean isDefined(int ch) 4302 { 4303 return getType(ch) != 0; 4304 } 4305 4306 /** 4307 * Determines if a code point is a Java digit. 4308 * <br>This method observes the semantics of 4309 * <code>java.lang.Character.isDigit()</code>. It returns true for decimal 4310 * digits only. 4311 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated 4312 * numeric letters and other numbers as digits. 4313 * This has been changed to conform to the java semantics. 4314 * @param ch code point to query 4315 * @return true if this code point is a digit 4316 * @stable ICU 2.1 4317 */ isDigit(int ch)4318 public static boolean isDigit(int ch) 4319 { 4320 return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER; 4321 } 4322 4323 /** 4324 * Determines if the specified code point is an ISO control character. 4325 * A code point is considered to be an ISO control character if it is in 4326 * the range \u0000 through \u001F or in the range \u007F through 4327 * \u009F.<br> 4328 * Up-to-date Unicode implementation of java.lang.Character.isISOControl() 4329 * @param ch code point to determine if it is an ISO control character 4330 * @return true if code point is a ISO control character 4331 * @stable ICU 2.1 4332 */ isISOControl(int ch)4333 public static boolean isISOControl(int ch) 4334 { 4335 return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ && 4336 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_)); 4337 } 4338 4339 /** 4340 * Determines if the specified code point is a letter. 4341 * Up-to-date Unicode implementation of java.lang.Character.isLetter() 4342 * @param ch code point to determine if it is a letter 4343 * @return true if code point is a letter 4344 * @stable ICU 2.1 4345 */ isLetter(int ch)4346 public static boolean isLetter(int ch) 4347 { 4348 // if props == 0, it will just fall through and return false 4349 return ((1 << getType(ch)) 4350 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4351 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4352 | (1 << UCharacterCategory.TITLECASE_LETTER) 4353 | (1 << UCharacterCategory.MODIFIER_LETTER) 4354 | (1 << UCharacterCategory.OTHER_LETTER))) != 0; 4355 } 4356 4357 /** 4358 * Determines if the specified code point is a letter or digit. 4359 * {@icunote} This method, unlike java.lang.Character does not regard the ascii 4360 * characters 'A' - 'Z' and 'a' - 'z' as digits. 4361 * @param ch code point to determine if it is a letter or a digit 4362 * @return true if code point is a letter or a digit 4363 * @stable ICU 2.1 4364 */ isLetterOrDigit(int ch)4365 public static boolean isLetterOrDigit(int ch) 4366 { 4367 return ((1 << getType(ch)) 4368 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4369 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4370 | (1 << UCharacterCategory.TITLECASE_LETTER) 4371 | (1 << UCharacterCategory.MODIFIER_LETTER) 4372 | (1 << UCharacterCategory.OTHER_LETTER) 4373 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0; 4374 } 4375 4376 /** 4377 * Compatibility override of Java deprecated method. This 4378 * method will always remain deprecated. Delegates to 4379 * java.lang.Character.isJavaIdentifierStart. 4380 * @param cp the code point 4381 * @return true if the code point can start a java identifier. 4382 * @deprecated ICU 3.4 (Java) 4383 */ 4384 @Deprecated isJavaLetter(int cp)4385 public static boolean isJavaLetter(int cp) { 4386 return isJavaIdentifierStart(cp); 4387 } 4388 4389 /** 4390 * Compatibility override of Java deprecated method. This 4391 * method will always remain deprecated. Delegates to 4392 * java.lang.Character.isJavaIdentifierPart. 4393 * @param cp the code point 4394 * @return true if the code point can continue a java identifier. 4395 * @deprecated ICU 3.4 (Java) 4396 */ 4397 @Deprecated isJavaLetterOrDigit(int cp)4398 public static boolean isJavaLetterOrDigit(int cp) { 4399 return isJavaIdentifierPart(cp); 4400 } 4401 4402 /** 4403 * Compatibility override of Java method, delegates to 4404 * java.lang.Character.isJavaIdentifierStart. 4405 * @param cp the code point 4406 * @return true if the code point can start a java identifier. 4407 * @stable ICU 3.4 4408 */ isJavaIdentifierStart(int cp)4409 public static boolean isJavaIdentifierStart(int cp) { 4410 // note, downcast to char for jdk 1.4 compatibility 4411 return java.lang.Character.isJavaIdentifierStart((char)cp); 4412 } 4413 4414 /** 4415 * Compatibility override of Java method, delegates to 4416 * java.lang.Character.isJavaIdentifierPart. 4417 * @param cp the code point 4418 * @return true if the code point can continue a java identifier. 4419 * @stable ICU 3.4 4420 */ isJavaIdentifierPart(int cp)4421 public static boolean isJavaIdentifierPart(int cp) { 4422 // note, downcast to char for jdk 1.4 compatibility 4423 return java.lang.Character.isJavaIdentifierPart((char)cp); 4424 } 4425 4426 /** 4427 * Determines if the specified code point is a lowercase character. 4428 * UnicodeData only contains case mappings for code points where they are 4429 * one-to-one mappings; it also omits information about context-sensitive 4430 * case mappings.<br> For more information about Unicode case mapping 4431 * please refer to the 4432 * <a href=https://www.unicode.org/reports/tr21/>Technical report 4433 * #21</a>.<br> 4434 * Up-to-date Unicode implementation of java.lang.Character.isLowerCase() 4435 * @param ch code point to determine if it is in lowercase 4436 * @return true if code point is a lowercase character 4437 * @stable ICU 2.1 4438 */ isLowerCase(int ch)4439 public static boolean isLowerCase(int ch) 4440 { 4441 // if props == 0, it will just fall through and return false 4442 return getType(ch) == UCharacterCategory.LOWERCASE_LETTER; 4443 } 4444 4445 /** 4446 * Determines if the specified code point is a white space character. 4447 * A code point is considered to be an whitespace character if and only 4448 * if it satisfies one of the following criteria: 4449 * <ul> 4450 * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not 4451 * also a non-breaking space (\u00A0 or \u2007 or \u202F). 4452 * <li> It is \u0009, HORIZONTAL TABULATION. 4453 * <li> It is \u000A, LINE FEED. 4454 * <li> It is \u000B, VERTICAL TABULATION. 4455 * <li> It is \u000C, FORM FEED. 4456 * <li> It is \u000D, CARRIAGE RETURN. 4457 * <li> It is \u001C, FILE SEPARATOR. 4458 * <li> It is \u001D, GROUP SEPARATOR. 4459 * <li> It is \u001E, RECORD SEPARATOR. 4460 * <li> It is \u001F, UNIT SEPARATOR. 4461 * </ul> 4462 * 4463 * This API tries to sync with the semantics of Java's 4464 * java.lang.Character.isWhitespace(), but it may not return 4465 * the exact same results because of the Unicode version 4466 * difference. 4467 * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) 4468 * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. 4469 * See http://www.unicode.org/versions/Unicode4.0.1/ 4470 * @param ch code point to determine if it is a white space 4471 * @return true if the specified code point is a white space character 4472 * @stable ICU 2.1 4473 */ isWhitespace(int ch)4474 public static boolean isWhitespace(int ch) 4475 { 4476 // exclude no-break spaces 4477 // if props == 0, it will just fall through and return false 4478 return ((1 << getType(ch)) & 4479 ((1 << UCharacterCategory.SPACE_SEPARATOR) 4480 | (1 << UCharacterCategory.LINE_SEPARATOR) 4481 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0 4482 && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_) 4483 // TAB VT LF FF CR FS GS RS US NL are all control characters 4484 // that are white spaces. 4485 || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f); 4486 } 4487 4488 /** 4489 * Determines if the specified code point is a Unicode specified space 4490 * character, i.e. if code point is in the category Zs, Zl and Zp. 4491 * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar(). 4492 * @param ch code point to determine if it is a space 4493 * @return true if the specified code point is a space character 4494 * @stable ICU 2.1 4495 */ isSpaceChar(int ch)4496 public static boolean isSpaceChar(int ch) 4497 { 4498 // if props == 0, it will just fall through and return false 4499 return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR) 4500 | (1 << UCharacterCategory.LINE_SEPARATOR) 4501 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) 4502 != 0; 4503 } 4504 4505 /** 4506 * Determines if the specified code point is a titlecase character. 4507 * UnicodeData only contains case mappings for code points where they are 4508 * one-to-one mappings; it also omits information about context-sensitive 4509 * case mappings.<br> 4510 * For more information about Unicode case mapping please refer to the 4511 * <a href=https://www.unicode.org/reports/tr21/> 4512 * Technical report #21</a>.<br> 4513 * Up-to-date Unicode implementation of java.lang.Character.isTitleCase(). 4514 * @param ch code point to determine if it is in title case 4515 * @return true if the specified code point is a titlecase character 4516 * @stable ICU 2.1 4517 */ isTitleCase(int ch)4518 public static boolean isTitleCase(int ch) 4519 { 4520 // if props == 0, it will just fall through and return false 4521 return getType(ch) == UCharacterCategory.TITLECASE_LETTER; 4522 } 4523 4524 /** 4525 * Determines if the specified code point may be any part of a Unicode 4526 * identifier other than the starting character. 4527 * A code point may be part of a Unicode identifier if and only if it is 4528 * one of the following: 4529 * <ul> 4530 * <li> Lu Uppercase letter 4531 * <li> Ll Lowercase letter 4532 * <li> Lt Titlecase letter 4533 * <li> Lm Modifier letter 4534 * <li> Lo Other letter 4535 * <li> Nl Letter number 4536 * <li> Pc Connecting punctuation character 4537 * <li> Nd decimal number 4538 * <li> Mc Spacing combining mark 4539 * <li> Mn Non-spacing mark 4540 * <li> Cf formatting code 4541 * </ul> 4542 * Up-to-date Unicode implementation of 4543 * java.lang.Character.isUnicodeIdentifierPart().<br> 4544 * See <a href=https://www.unicode.org/reports/tr8/>UTR #8</a>. 4545 * @param ch code point to determine if is can be part of a Unicode 4546 * identifier 4547 * @return true if code point is any character belonging a unicode 4548 * identifier suffix after the first character 4549 * @stable ICU 2.1 4550 */ isUnicodeIdentifierPart(int ch)4551 public static boolean isUnicodeIdentifierPart(int ch) 4552 { 4553 // if props == 0, it will just fall through and return false 4554 // cat == format 4555 return ((1 << getType(ch)) 4556 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4557 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4558 | (1 << UCharacterCategory.TITLECASE_LETTER) 4559 | (1 << UCharacterCategory.MODIFIER_LETTER) 4560 | (1 << UCharacterCategory.OTHER_LETTER) 4561 | (1 << UCharacterCategory.LETTER_NUMBER) 4562 | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION) 4563 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) 4564 | (1 << UCharacterCategory.COMBINING_SPACING_MARK) 4565 | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0 4566 || isIdentifierIgnorable(ch); 4567 } 4568 4569 /** 4570 * Determines if the specified code point is permissible as the first 4571 * character in a Unicode identifier. 4572 * A code point may start a Unicode identifier if it is of type either 4573 * <ul> 4574 * <li> Lu Uppercase letter 4575 * <li> Ll Lowercase letter 4576 * <li> Lt Titlecase letter 4577 * <li> Lm Modifier letter 4578 * <li> Lo Other letter 4579 * <li> Nl Letter number 4580 * </ul> 4581 * Up-to-date Unicode implementation of 4582 * java.lang.Character.isUnicodeIdentifierStart().<br> 4583 * See <a href=https://www.unicode.org/reports/tr8/>UTR #8</a>. 4584 * @param ch code point to determine if it can start a Unicode identifier 4585 * @return true if code point is the first character belonging a unicode 4586 * identifier 4587 * @stable ICU 2.1 4588 */ isUnicodeIdentifierStart(int ch)4589 public static boolean isUnicodeIdentifierStart(int ch) 4590 { 4591 /*int cat = getType(ch);*/ 4592 // if props == 0, it will just fall through and return false 4593 return ((1 << getType(ch)) 4594 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4595 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4596 | (1 << UCharacterCategory.TITLECASE_LETTER) 4597 | (1 << UCharacterCategory.MODIFIER_LETTER) 4598 | (1 << UCharacterCategory.OTHER_LETTER) 4599 | (1 << UCharacterCategory.LETTER_NUMBER))) != 0; 4600 } 4601 4602 /** 4603 * Determines if the specified code point should be regarded as an 4604 * ignorable character in a Java identifier. 4605 * A character is Java-identifier-ignorable if it has the general category 4606 * Cf Formatting Control, or it is a non-Java-whitespace ISO control: 4607 * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br> 4608 * Up-to-date Unicode implementation of 4609 * java.lang.Character.isIdentifierIgnorable().<br> 4610 * See <a href=https://www.unicode.org/reports/tr8/>UTR #8</a>. 4611 * <p>Note that Unicode just recommends to ignore Cf (format controls). 4612 * @param ch code point to be determined if it can be ignored in a Unicode 4613 * identifier. 4614 * @return true if the code point is ignorable 4615 * @stable ICU 2.1 4616 */ isIdentifierIgnorable(int ch)4617 public static boolean isIdentifierIgnorable(int ch) 4618 { 4619 // see java.lang.Character.isIdentifierIgnorable() on range of 4620 // ignorable characters. 4621 if (ch <= 0x9f) { 4622 return isISOControl(ch) 4623 && !((ch >= 0x9 && ch <= 0xd) 4624 || (ch >= 0x1c && ch <= 0x1f)); 4625 } 4626 return getType(ch) == UCharacterCategory.FORMAT; 4627 } 4628 4629 /** 4630 * Determines if the specified code point is an uppercase character. 4631 * UnicodeData only contains case mappings for code point where they are 4632 * one-to-one mappings; it also omits information about context-sensitive 4633 * case mappings.<br> 4634 * For language specific case conversion behavior, use 4635 * toUpperCase(locale, str). <br> 4636 * For example, the case conversion for dot-less i and dotted I in Turkish, 4637 * or for final sigma in Greek. 4638 * For more information about Unicode case mapping please refer to the 4639 * <a href=https://www.unicode.org/reports/tr21/> 4640 * Technical report #21</a>.<br> 4641 * Up-to-date Unicode implementation of java.lang.Character.isUpperCase(). 4642 * @param ch code point to determine if it is in uppercase 4643 * @return true if the code point is an uppercase character 4644 * @stable ICU 2.1 4645 */ isUpperCase(int ch)4646 public static boolean isUpperCase(int ch) 4647 { 4648 // if props == 0, it will just fall through and return false 4649 return getType(ch) == UCharacterCategory.UPPERCASE_LETTER; 4650 } 4651 4652 /** 4653 * The given code point is mapped to its lowercase equivalent; if the code 4654 * point has no lowercase equivalent, the code point itself is returned. 4655 * Up-to-date Unicode implementation of java.lang.Character.toLowerCase() 4656 * 4657 * <p>This function only returns the simple, single-code point case mapping. 4658 * Full case mappings should be used whenever possible because they produce 4659 * better results by working on whole strings. 4660 * They take into account the string context and the language and can map 4661 * to a result string with a different length as appropriate. 4662 * Full case mappings are applied by the case mapping functions 4663 * that take String parameters rather than code points (int). 4664 * See also the User Guide chapter on C/POSIX migration: 4665 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 4666 * 4667 * @param ch code point whose lowercase equivalent is to be retrieved 4668 * @return the lowercase equivalent code point 4669 * @stable ICU 2.1 4670 */ toLowerCase(int ch)4671 public static int toLowerCase(int ch) { 4672 return UCaseProps.INSTANCE.tolower(ch); 4673 } 4674 4675 /** 4676 * Converts argument code point and returns a String object representing 4677 * the code point's value in UTF-16 format. 4678 * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones. 4679 * 4680 * <p>Up-to-date Unicode implementation of java.lang.Character.toString(). 4681 * 4682 * @param ch code point 4683 * @return string representation of the code point, null if code point is not 4684 * defined in unicode 4685 * @stable ICU 2.1 4686 */ toString(int ch)4687 public static String toString(int ch) 4688 { 4689 if (ch < MIN_VALUE || ch > MAX_VALUE) { 4690 return null; 4691 } 4692 4693 if (ch < SUPPLEMENTARY_MIN_VALUE) { 4694 return String.valueOf((char)ch); 4695 } 4696 4697 return new String(Character.toChars(ch)); 4698 } 4699 4700 /** 4701 * Converts the code point argument to titlecase. 4702 * If no titlecase is available, the uppercase is returned. If no uppercase 4703 * is available, the code point itself is returned. 4704 * Up-to-date Unicode implementation of java.lang.Character.toTitleCase() 4705 * 4706 * <p>This function only returns the simple, single-code point case mapping. 4707 * Full case mappings should be used whenever possible because they produce 4708 * better results by working on whole strings. 4709 * They take into account the string context and the language and can map 4710 * to a result string with a different length as appropriate. 4711 * Full case mappings are applied by the case mapping functions 4712 * that take String parameters rather than code points (int). 4713 * See also the User Guide chapter on C/POSIX migration: 4714 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 4715 * 4716 * @param ch code point whose title case is to be retrieved 4717 * @return titlecase code point 4718 * @stable ICU 2.1 4719 */ toTitleCase(int ch)4720 public static int toTitleCase(int ch) { 4721 return UCaseProps.INSTANCE.totitle(ch); 4722 } 4723 4724 /** 4725 * Converts the character argument to uppercase. 4726 * If no uppercase is available, the character itself is returned. 4727 * Up-to-date Unicode implementation of java.lang.Character.toUpperCase() 4728 * 4729 * <p>This function only returns the simple, single-code point case mapping. 4730 * Full case mappings should be used whenever possible because they produce 4731 * better results by working on whole strings. 4732 * They take into account the string context and the language and can map 4733 * to a result string with a different length as appropriate. 4734 * Full case mappings are applied by the case mapping functions 4735 * that take String parameters rather than code points (int). 4736 * See also the User Guide chapter on C/POSIX migration: 4737 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 4738 * 4739 * @param ch code point whose uppercase is to be retrieved 4740 * @return uppercase code point 4741 * @stable ICU 2.1 4742 */ toUpperCase(int ch)4743 public static int toUpperCase(int ch) { 4744 return UCaseProps.INSTANCE.toupper(ch); 4745 } 4746 4747 // extra methods not in java.lang.Character -------------------------- 4748 4749 /** 4750 * {@icu} Determines if the code point is a supplementary character. 4751 * A code point is a supplementary character if and only if it is greater 4752 * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a> 4753 * @param ch code point to be determined if it is in the supplementary 4754 * plane 4755 * @return true if code point is a supplementary character 4756 * @stable ICU 2.1 4757 */ isSupplementary(int ch)4758 public static boolean isSupplementary(int ch) 4759 { 4760 return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE && 4761 ch <= UCharacter.MAX_VALUE; 4762 } 4763 4764 /** 4765 * {@icu} Determines if the code point is in the BMP plane. 4766 * @param ch code point to be determined if it is not a supplementary 4767 * character 4768 * @return true if code point is not a supplementary character 4769 * @stable ICU 2.1 4770 */ isBMP(int ch)4771 public static boolean isBMP(int ch) 4772 { 4773 return (ch >= 0 && ch <= LAST_CHAR_MASK_); 4774 } 4775 4776 /** 4777 * {@icu} Determines whether the specified code point is a printable character 4778 * according to the Unicode standard. 4779 * @param ch code point to be determined if it is printable 4780 * @return true if the code point is a printable character 4781 * @stable ICU 2.1 4782 */ isPrintable(int ch)4783 public static boolean isPrintable(int ch) 4784 { 4785 int cat = getType(ch); 4786 // if props == 0, it will just fall through and return false 4787 return (cat != UCharacterCategory.UNASSIGNED && 4788 cat != UCharacterCategory.CONTROL && 4789 cat != UCharacterCategory.FORMAT && 4790 cat != UCharacterCategory.PRIVATE_USE && 4791 cat != UCharacterCategory.SURROGATE && 4792 cat != UCharacterCategory.GENERAL_OTHER_TYPES); 4793 } 4794 4795 /** 4796 * {@icu} Determines whether the specified code point is of base form. 4797 * A code point of base form does not graphically combine with preceding 4798 * characters, and is neither a control nor a format character. 4799 * @param ch code point to be determined if it is of base form 4800 * @return true if the code point is of base form 4801 * @stable ICU 2.1 4802 */ isBaseForm(int ch)4803 public static boolean isBaseForm(int ch) 4804 { 4805 int cat = getType(ch); 4806 // if props == 0, it will just fall through and return false 4807 return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER || 4808 cat == UCharacterCategory.OTHER_NUMBER || 4809 cat == UCharacterCategory.LETTER_NUMBER || 4810 cat == UCharacterCategory.UPPERCASE_LETTER || 4811 cat == UCharacterCategory.LOWERCASE_LETTER || 4812 cat == UCharacterCategory.TITLECASE_LETTER || 4813 cat == UCharacterCategory.MODIFIER_LETTER || 4814 cat == UCharacterCategory.OTHER_LETTER || 4815 cat == UCharacterCategory.NON_SPACING_MARK || 4816 cat == UCharacterCategory.ENCLOSING_MARK || 4817 cat == UCharacterCategory.COMBINING_SPACING_MARK; 4818 } 4819 4820 /** 4821 * {@icu} Returns the Bidirection property of a code point. 4822 * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional 4823 * property.<br> 4824 * Result returned belongs to the interface 4825 * <a href=UCharacterDirection.html>UCharacterDirection</a> 4826 * @param ch the code point to be determined its direction 4827 * @return direction constant from UCharacterDirection. 4828 * @stable ICU 2.1 4829 */ getDirection(int ch)4830 public static int getDirection(int ch) 4831 { 4832 return UBiDiProps.INSTANCE.getClass(ch); 4833 } 4834 4835 /** 4836 * Determines whether the code point has the "mirrored" property. 4837 * This property is set for characters that are commonly used in 4838 * Right-To-Left contexts and need to be displayed with a "mirrored" 4839 * glyph. 4840 * @param ch code point whose mirror is to be determined 4841 * @return true if the code point has the "mirrored" property 4842 * @stable ICU 2.1 4843 */ isMirrored(int ch)4844 public static boolean isMirrored(int ch) 4845 { 4846 return UBiDiProps.INSTANCE.isMirrored(ch); 4847 } 4848 4849 /** 4850 * {@icu} Maps the specified code point to a "mirror-image" code point. 4851 * For code points with the "mirrored" property, implementations sometimes 4852 * need a "poor man's" mapping to another code point such that the default 4853 * glyph may serve as the mirror-image of the default glyph of the 4854 * specified code point.<br> 4855 * This is useful for text conversion to and from codepages with visual 4856 * order, and for displays without glyph selection capabilities. 4857 * @param ch code point whose mirror is to be retrieved 4858 * @return another code point that may serve as a mirror-image substitute, 4859 * or ch itself if there is no such mapping or ch does not have the 4860 * "mirrored" property 4861 * @stable ICU 2.1 4862 */ getMirror(int ch)4863 public static int getMirror(int ch) 4864 { 4865 return UBiDiProps.INSTANCE.getMirror(ch); 4866 } 4867 4868 /** 4869 * {@icu} Maps the specified character to its paired bracket character. 4870 * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int). 4871 * Otherwise c itself is returned. 4872 * See http://www.unicode.org/reports/tr9/ 4873 * 4874 * @param c the code point to be mapped 4875 * @return the paired bracket code point, 4876 * or c itself if there is no such mapping 4877 * (Bidi_Paired_Bracket_Type=None) 4878 * 4879 * @see UProperty#BIDI_PAIRED_BRACKET 4880 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 4881 * @see #getMirror(int) 4882 * @stable ICU 52 4883 */ getBidiPairedBracket(int c)4884 public static int getBidiPairedBracket(int c) { 4885 return UBiDiProps.INSTANCE.getPairedBracket(c); 4886 } 4887 4888 /** 4889 * {@icu} Returns the combining class of the argument codepoint 4890 * @param ch code point whose combining is to be retrieved 4891 * @return the combining class of the codepoint 4892 * @stable ICU 2.1 4893 */ getCombiningClass(int ch)4894 public static int getCombiningClass(int ch) 4895 { 4896 return Normalizer2.getNFDInstance().getCombiningClass(ch); 4897 } 4898 4899 /** 4900 * {@icu} A code point is illegal if and only if 4901 * <ul> 4902 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 4903 * <li> A surrogate value, 0xD800 to 0xDFFF 4904 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 4905 * </ul> 4906 * Note: legal does not mean that it is assigned in this version of Unicode. 4907 * @param ch code point to determine if it is a legal code point by itself 4908 * @return true if and only if legal. 4909 * @stable ICU 2.1 4910 */ isLegal(int ch)4911 public static boolean isLegal(int ch) 4912 { 4913 if (ch < MIN_VALUE) { 4914 return false; 4915 } 4916 if (ch < Character.MIN_SURROGATE) { 4917 return true; 4918 } 4919 if (ch <= Character.MAX_SURROGATE) { 4920 return false; 4921 } 4922 if (UCharacterUtility.isNonCharacter(ch)) { 4923 return false; 4924 } 4925 return (ch <= MAX_VALUE); 4926 } 4927 4928 /** 4929 * {@icu} A string is legal iff all its code points are legal. 4930 * A code point is illegal if and only if 4931 * <ul> 4932 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 4933 * <li> A surrogate value, 0xD800 to 0xDFFF 4934 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 4935 * </ul> 4936 * Note: legal does not mean that it is assigned in this version of Unicode. 4937 * @param str containing code points to examin 4938 * @return true if and only if legal. 4939 * @stable ICU 2.1 4940 */ isLegal(String str)4941 public static boolean isLegal(String str) 4942 { 4943 int size = str.length(); 4944 int codepoint; 4945 for (int i = 0; i < size; i += Character.charCount(codepoint)) 4946 { 4947 codepoint = str.codePointAt(i); 4948 if (!isLegal(codepoint)) { 4949 return false; 4950 } 4951 } 4952 return true; 4953 } 4954 4955 /** 4956 * {@icu} Returns the version of Unicode data used. 4957 * @return the unicode version number used 4958 * @stable ICU 2.1 4959 */ getUnicodeVersion()4960 public static VersionInfo getUnicodeVersion() 4961 { 4962 return UCharacterProperty.INSTANCE.m_unicodeVersion_; 4963 } 4964 4965 /** 4966 * {@icu} Returns the most current Unicode name of the argument code point, or 4967 * null if the character is unassigned or outside the range 4968 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 4969 * <br> 4970 * Note calling any methods related to code point names, e.g. get*Name*() 4971 * incurs a one-time initialization cost to construct the name tables. 4972 * @param ch the code point for which to get the name 4973 * @return most current Unicode name 4974 * @stable ICU 2.1 4975 */ getName(int ch)4976 public static String getName(int ch) 4977 { 4978 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); 4979 } 4980 4981 /** 4982 * {@icu} Returns the names for each of the characters in a string 4983 * @param s string to format 4984 * @param separator string to go between names 4985 * @return string of names 4986 * @stable ICU 3.8 4987 */ getName(String s, String separator)4988 public static String getName(String s, String separator) { 4989 if (s.length() == 1) { // handle common case 4990 return getName(s.charAt(0)); 4991 } 4992 int cp; 4993 StringBuilder sb = new StringBuilder(); 4994 for (int i = 0; i < s.length(); i += Character.charCount(cp)) { 4995 cp = s.codePointAt(i); 4996 if (i != 0) sb.append(separator); 4997 sb.append(UCharacter.getName(cp)); 4998 } 4999 return sb.toString(); 5000 } 5001 5002 /** 5003 * {@icu} Returns null. 5004 * Used to return the Unicode_1_Name property value which was of little practical value. 5005 * @param ch the code point for which to get the name 5006 * @return null 5007 * @deprecated ICU 49 5008 */ 5009 @Deprecated getName1_0(int ch)5010 public static String getName1_0(int ch) 5011 { 5012 return null; 5013 } 5014 5015 /** 5016 * {@icu} Returns a name for a valid codepoint. Unlike, getName(int) and 5017 * getName1_0(int), this method will return a name even for codepoints that 5018 * are not assigned a name in UnicodeData.txt. 5019 * 5020 * <p>The names are returned in the following order. 5021 * <ul> 5022 * <li> Most current Unicode name if there is any 5023 * <li> Unicode 1.0 name if there is any 5024 * <li> Extended name in the form of 5025 * "<codepoint_type-codepoint_hex_digits>". E.g., <noncharacter-fffe> 5026 * </ul> 5027 * Note calling any methods related to code point names, e.g. get*Name*() 5028 * incurs a one-time initialization cost to construct the name tables. 5029 * @param ch the code point for which to get the name 5030 * @return a name for the argument codepoint 5031 * @stable ICU 2.6 5032 */ getExtendedName(int ch)5033 public static String getExtendedName(int ch) { 5034 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME); 5035 } 5036 5037 /** 5038 * {@icu} Returns the corrected name from NameAliases.txt if there is one. 5039 * Returns null if the character is unassigned or outside the range 5040 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 5041 * <br> 5042 * Note calling any methods related to code point names, e.g. get*Name*() 5043 * incurs a one-time initialization cost to construct the name tables. 5044 * @param ch the code point for which to get the name alias 5045 * @return Unicode name alias, or null 5046 * @stable ICU 4.4 5047 */ getNameAlias(int ch)5048 public static String getNameAlias(int ch) 5049 { 5050 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS); 5051 } 5052 5053 /** 5054 * {@icu} Returns null. 5055 * Used to return the ISO 10646 comment for a character. 5056 * The Unicode ISO_Comment property is deprecated and has no values. 5057 * 5058 * @param ch The code point for which to get the ISO comment. 5059 * It must be the case that {@code 0 <= ch <= 0x10ffff}. 5060 * @return null 5061 * @deprecated ICU 49 5062 */ 5063 @Deprecated getISOComment(int ch)5064 public static String getISOComment(int ch) 5065 { 5066 return null; 5067 } 5068 5069 /** 5070 * {@icu} <p>Finds a Unicode code point by its most current Unicode name and 5071 * return its code point value. All Unicode names are in uppercase. 5072 * Note calling any methods related to code point names, e.g. get*Name*() 5073 * incurs a one-time initialization cost to construct the name tables. 5074 * @param name most current Unicode character name whose code point is to 5075 * be returned 5076 * @return code point or -1 if name is not found 5077 * @stable ICU 2.1 5078 */ getCharFromName(String name)5079 public static int getCharFromName(String name){ 5080 return UCharacterName.INSTANCE.getCharFromName( 5081 UCharacterNameChoice.UNICODE_CHAR_NAME, name); 5082 } 5083 5084 /** 5085 * {@icu} Returns -1. 5086 * <p>Used to find a Unicode character by its version 1.0 Unicode name and return 5087 * its code point value. 5088 * @param name Unicode 1.0 code point name whose code point is to be 5089 * returned 5090 * @return -1 5091 * @deprecated ICU 49 5092 * @see #getName1_0(int) 5093 */ 5094 @Deprecated getCharFromName1_0(String name)5095 public static int getCharFromName1_0(String name){ 5096 return -1; 5097 } 5098 5099 /** 5100 * {@icu} <p>Find a Unicode character by either its name and return its code 5101 * point value. All Unicode names are in uppercase. 5102 * Extended names are all lowercase except for numbers and are contained 5103 * within angle brackets. 5104 * The names are searched in the following order 5105 * <ul> 5106 * <li> Most current Unicode name if there is any 5107 * <li> Unicode 1.0 name if there is any 5108 * <li> Extended name in the form of 5109 * "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-FFFE> 5110 * </ul> 5111 * Note calling any methods related to code point names, e.g. get*Name*() 5112 * incurs a one-time initialization cost to construct the name tables. 5113 * @param name codepoint name 5114 * @return code point associated with the name or -1 if the name is not 5115 * found. 5116 * @stable ICU 2.6 5117 */ getCharFromExtendedName(String name)5118 public static int getCharFromExtendedName(String name){ 5119 return UCharacterName.INSTANCE.getCharFromName( 5120 UCharacterNameChoice.EXTENDED_CHAR_NAME, name); 5121 } 5122 5123 /** 5124 * {@icu} <p>Find a Unicode character by its corrected name alias and return 5125 * its code point value. All Unicode names are in uppercase. 5126 * Note calling any methods related to code point names, e.g. get*Name*() 5127 * incurs a one-time initialization cost to construct the name tables. 5128 * @param name Unicode name alias whose code point is to be returned 5129 * @return code point or -1 if name is not found 5130 * @stable ICU 4.4 5131 */ getCharFromNameAlias(String name)5132 public static int getCharFromNameAlias(String name){ 5133 return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name); 5134 } 5135 5136 /** 5137 * {@icu} Return the Unicode name for a given property, as given in the 5138 * Unicode database file PropertyAliases.txt. Most properties 5139 * have more than one name. The nameChoice determines which one 5140 * is returned. 5141 * 5142 * In addition, this function maps the property 5143 * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" / 5144 * "General_Category_Mask". These names are not in 5145 * PropertyAliases.txt. 5146 * 5147 * @param property UProperty selector. 5148 * 5149 * @param nameChoice UProperty.NameChoice selector for which name 5150 * to get. All properties have a long name. Most have a short 5151 * name, but some do not. Unicode allows for additional names; if 5152 * present these will be returned by UProperty.NameChoice.LONG + i, 5153 * where i=1, 2,... 5154 * 5155 * @return a name, or null if Unicode explicitly defines no name 5156 * ("n/a") for a given property/nameChoice. If a given nameChoice 5157 * throws an exception, then all larger values of nameChoice will 5158 * throw an exception. If null is returned for a given 5159 * nameChoice, then other nameChoice values may return non-null 5160 * results. 5161 * 5162 * @exception IllegalArgumentException thrown if property or 5163 * nameChoice are invalid. 5164 * 5165 * @see UProperty 5166 * @see UProperty.NameChoice 5167 * @stable ICU 2.4 5168 */ getPropertyName(int property, int nameChoice)5169 public static String getPropertyName(int property, 5170 int nameChoice) { 5171 return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice); 5172 } 5173 5174 /** 5175 * {@icu} Return the UProperty selector for a given property name, as 5176 * specified in the Unicode database file PropertyAliases.txt. 5177 * Short, long, and any other variants are recognized. 5178 * 5179 * In addition, this function maps the synthetic names "gcm" / 5180 * "General_Category_Mask" to the property 5181 * UProperty.GENERAL_CATEGORY_MASK. These names are not in 5182 * PropertyAliases.txt. 5183 * 5184 * @param propertyAlias the property name to be matched. The name 5185 * is compared using "loose matching" as described in 5186 * PropertyAliases.txt. 5187 * 5188 * @return a UProperty enum. 5189 * 5190 * @exception IllegalArgumentException thrown if propertyAlias 5191 * is not recognized. 5192 * 5193 * @see UProperty 5194 * @stable ICU 2.4 5195 */ getPropertyEnum(CharSequence propertyAlias)5196 public static int getPropertyEnum(CharSequence propertyAlias) { 5197 int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias); 5198 if (propEnum == UProperty.UNDEFINED) { 5199 throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias); 5200 } 5201 return propEnum; 5202 } 5203 5204 /** 5205 * {@icu} Return the Unicode name for a given property value, as given in 5206 * the Unicode database file PropertyValueAliases.txt. Most 5207 * values have more than one name. The nameChoice determines 5208 * which one is returned. 5209 * 5210 * Note: Some of the names in PropertyValueAliases.txt can only be 5211 * retrieved using UProperty.GENERAL_CATEGORY_MASK, not 5212 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 5213 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 5214 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 5215 * 5216 * @param property UProperty selector constant. 5217 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 5218 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 5219 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 5220 * If out of range, null is returned. 5221 * 5222 * @param value selector for a value for the given property. In 5223 * general, valid values range from 0 up to some maximum. There 5224 * are a few exceptions: (1.) UProperty.BLOCK values begin at the 5225 * non-zero value BASIC_LATIN.getID(). (2.) 5226 * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous 5227 * and range from 0..240. (3.) UProperty.GENERAL_CATEGORY_MASK values 5228 * are mask values produced by left-shifting 1 by 5229 * UCharacter.getType(). This allows grouped categories such as 5230 * [:L:] to be represented. Mask values are non-contiguous. 5231 * 5232 * @param nameChoice UProperty.NameChoice selector for which name 5233 * to get. All values have a long name. Most have a short name, 5234 * but some do not. Unicode allows for additional names; if 5235 * present these will be returned by UProperty.NameChoice.LONG + i, 5236 * where i=1, 2,... 5237 * 5238 * @return a name, or null if Unicode explicitly defines no name 5239 * ("n/a") for a given property/value/nameChoice. If a given 5240 * nameChoice throws an exception, then all larger values of 5241 * nameChoice will throw an exception. If null is returned for a 5242 * given nameChoice, then other nameChoice values may return 5243 * non-null results. 5244 * 5245 * @exception IllegalArgumentException thrown if property, value, 5246 * or nameChoice are invalid. 5247 * 5248 * @see UProperty 5249 * @see UProperty.NameChoice 5250 * @stable ICU 2.4 5251 */ getPropertyValueName(int property, int value, int nameChoice)5252 public static String getPropertyValueName(int property, 5253 int value, 5254 int nameChoice) 5255 { 5256 if ((property == UProperty.CANONICAL_COMBINING_CLASS 5257 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS 5258 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) 5259 && value >= UCharacter.getIntPropertyMinValue( 5260 UProperty.CANONICAL_COMBINING_CLASS) 5261 && value <= UCharacter.getIntPropertyMaxValue( 5262 UProperty.CANONICAL_COMBINING_CLASS) 5263 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) { 5264 // this is hard coded for the valid cc 5265 // because PropertyValueAliases.txt does not contain all of them 5266 try { 5267 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, 5268 nameChoice); 5269 } 5270 catch (IllegalArgumentException e) { 5271 return null; 5272 } 5273 } 5274 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice); 5275 } 5276 5277 /** 5278 * {@icu} Return the property value integer for a given value name, as 5279 * specified in the Unicode database file PropertyValueAliases.txt. 5280 * Short, long, and any other variants are recognized. 5281 * 5282 * Note: Some of the names in PropertyValueAliases.txt will only be 5283 * recognized with UProperty.GENERAL_CATEGORY_MASK, not 5284 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 5285 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 5286 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 5287 * 5288 * @param property UProperty selector constant. 5289 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 5290 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 5291 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 5292 * Only these properties can be enumerated. 5293 * 5294 * @param valueAlias the value name to be matched. The name is 5295 * compared using "loose matching" as described in 5296 * PropertyValueAliases.txt. 5297 * 5298 * @return a value integer. Note: UProperty.GENERAL_CATEGORY 5299 * values are mask values produced by left-shifting 1 by 5300 * UCharacter.getType(). This allows grouped categories such as 5301 * [:L:] to be represented. 5302 * 5303 * @see UProperty 5304 * @throws IllegalArgumentException if property is not a valid UProperty 5305 * selector or valueAlias is not a value of this property 5306 * @stable ICU 2.4 5307 */ getPropertyValueEnum(int property, CharSequence valueAlias)5308 public static int getPropertyValueEnum(int property, CharSequence valueAlias) { 5309 int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias); 5310 if (propEnum == UProperty.UNDEFINED) { 5311 throw new IllegalIcuArgumentException("Invalid name: " + valueAlias); 5312 } 5313 return propEnum; 5314 } 5315 5316 /** 5317 * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED. 5318 * @param property Same as {@link #getPropertyValueEnum(int, CharSequence)} 5319 * @param valueAlias Same as {@link #getPropertyValueEnum(int, CharSequence)} 5320 * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value. 5321 * @internal 5322 * @deprecated This API is ICU internal only. 5323 */ 5324 @Deprecated getPropertyValueEnumNoThrow(int property, CharSequence valueAlias)5325 public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) { 5326 return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias); 5327 } 5328 5329 5330 /** 5331 * {@icu} Returns a code point corresponding to the two surrogate code units. 5332 * 5333 * @param lead the lead unit 5334 * (In ICU 2.1-69 the type of both parameters was <code>char</code>.) 5335 * @param trail the trail unit 5336 * @return code point if lead and trail form a valid surrogate pair. 5337 * @exception IllegalArgumentException thrown when the code units do 5338 * not form a valid surrogate pair 5339 * @stable ICU 70 5340 * @see #toCodePoint(int, int) 5341 */ getCodePoint(int lead, int trail)5342 public static int getCodePoint(int lead, int trail) 5343 { 5344 if (isHighSurrogate(lead) && isLowSurrogate(trail)) { 5345 return toCodePoint(lead, trail); 5346 } 5347 throw new IllegalArgumentException("Not a valid surrogate pair"); 5348 } 5349 5350 // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655 5351 /** 5352 * {@icu} Returns a code point corresponding to the two surrogate code units. 5353 * 5354 * @param lead the lead char 5355 * @param trail the trail char 5356 * @return code point if surrogate characters are valid. 5357 * @exception IllegalArgumentException thrown when the code units do 5358 * not form a valid code point 5359 * @stable ICU 2.1 5360 */ getCodePoint(char lead, char trail)5361 public static int getCodePoint(char lead, char trail) 5362 { 5363 return getCodePoint((int) lead, (int) trail); 5364 } 5365 // END Android patch: Keep the `char` version on Android. See ICU-21655 5366 5367 /** 5368 * {@icu} Returns the code point corresponding to the BMP code point. 5369 * 5370 * @param char16 the BMP code point 5371 * @return code point if argument is a valid character. 5372 * @exception IllegalArgumentException thrown when char16 is not a valid 5373 * code point 5374 * @stable ICU 2.1 5375 */ getCodePoint(char char16)5376 public static int getCodePoint(char char16) 5377 { 5378 if (UCharacter.isLegal(char16)) { 5379 return char16; 5380 } 5381 throw new IllegalArgumentException("Illegal codepoint"); 5382 } 5383 5384 /** 5385 * Returns the uppercase version of the argument string. 5386 * Casing is dependent on the default locale and context-sensitive. 5387 * @param str source string to be performed on 5388 * @return uppercase version of the argument string 5389 * @stable ICU 2.1 5390 */ toUpperCase(String str)5391 public static String toUpperCase(String str) 5392 { 5393 return CaseMapImpl.toUpper(getDefaultCaseLocale(), 0, str); 5394 } 5395 5396 /** 5397 * Returns the lowercase version of the argument string. 5398 * Casing is dependent on the default locale and context-sensitive 5399 * @param str source string to be performed on 5400 * @return lowercase version of the argument string 5401 * @stable ICU 2.1 5402 */ toLowerCase(String str)5403 public static String toLowerCase(String str) 5404 { 5405 return CaseMapImpl.toLower(getDefaultCaseLocale(), 0, str); 5406 } 5407 5408 /** 5409 * <p>Returns the titlecase version of the argument string. 5410 * <p>Position for titlecasing is determined by the argument break 5411 * iterator, hence the user can customize his break iterator for 5412 * a specialized titlecasing. In this case only the forward iteration 5413 * needs to be implemented. 5414 * If the break iterator passed in is null, the default Unicode algorithm 5415 * will be used to determine the titlecase positions. 5416 * 5417 * <p>Only positions returned by the break iterator will be title cased, 5418 * character in between the positions will all be in lower case. 5419 * <p>Casing is dependent on the default locale and context-sensitive 5420 * @param str source string to be performed on 5421 * @param breakiter break iterator to determine the positions in which 5422 * the character should be title cased. 5423 * @return titlecase version of the argument string 5424 * @stable ICU 2.6 5425 */ toTitleCase(String str, BreakIterator breakiter)5426 public static String toTitleCase(String str, BreakIterator breakiter) 5427 { 5428 return toTitleCase(Locale.getDefault(), str, breakiter, 0); 5429 } 5430 getDefaultCaseLocale()5431 private static int getDefaultCaseLocale() { 5432 return UCaseProps.getCaseLocale(Locale.getDefault()); 5433 } 5434 getCaseLocale(Locale locale)5435 private static int getCaseLocale(Locale locale) { 5436 if (locale == null) { 5437 locale = Locale.getDefault(); 5438 } 5439 return UCaseProps.getCaseLocale(locale); 5440 } 5441 getCaseLocale(ULocale locale)5442 private static int getCaseLocale(ULocale locale) { 5443 if (locale == null) { 5444 locale = ULocale.getDefault(); 5445 } 5446 return UCaseProps.getCaseLocale(locale); 5447 } 5448 5449 /** 5450 * Returns the uppercase version of the argument string. 5451 * Casing is dependent on the argument locale and context-sensitive. 5452 * @param locale which string is to be converted in 5453 * @param str source string to be performed on 5454 * @return uppercase version of the argument string 5455 * @stable ICU 2.1 5456 */ toUpperCase(Locale locale, String str)5457 public static String toUpperCase(Locale locale, String str) 5458 { 5459 return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str); 5460 } 5461 5462 /** 5463 * Returns the uppercase version of the argument string. 5464 * Casing is dependent on the argument locale and context-sensitive. 5465 * @param locale which string is to be converted in 5466 * @param str source string to be performed on 5467 * @return uppercase version of the argument string 5468 * @stable ICU 3.2 5469 */ toUpperCase(ULocale locale, String str)5470 public static String toUpperCase(ULocale locale, String str) { 5471 return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str); 5472 } 5473 5474 /** 5475 * Returns the lowercase version of the argument string. 5476 * Casing is dependent on the argument locale and context-sensitive 5477 * @param locale which string is to be converted in 5478 * @param str source string to be performed on 5479 * @return lowercase version of the argument string 5480 * @stable ICU 2.1 5481 */ toLowerCase(Locale locale, String str)5482 public static String toLowerCase(Locale locale, String str) 5483 { 5484 return CaseMapImpl.toLower(getCaseLocale(locale), 0, str); 5485 } 5486 5487 /** 5488 * Returns the lowercase version of the argument string. 5489 * Casing is dependent on the argument locale and context-sensitive 5490 * @param locale which string is to be converted in 5491 * @param str source string to be performed on 5492 * @return lowercase version of the argument string 5493 * @stable ICU 3.2 5494 */ toLowerCase(ULocale locale, String str)5495 public static String toLowerCase(ULocale locale, String str) { 5496 return CaseMapImpl.toLower(getCaseLocale(locale), 0, str); 5497 } 5498 5499 /** 5500 * <p>Returns the titlecase version of the argument string. 5501 * <p>Position for titlecasing is determined by the argument break 5502 * iterator, hence the user can customize his break iterator for 5503 * a specialized titlecasing. In this case only the forward iteration 5504 * needs to be implemented. 5505 * If the break iterator passed in is null, the default Unicode algorithm 5506 * will be used to determine the titlecase positions. 5507 * 5508 * <p>Only positions returned by the break iterator will be title cased, 5509 * character in between the positions will all be in lower case. 5510 * <p>Casing is dependent on the argument locale and context-sensitive 5511 * @param locale which string is to be converted in 5512 * @param str source string to be performed on 5513 * @param breakiter break iterator to determine the positions in which 5514 * the character should be title cased. 5515 * @return titlecase version of the argument string 5516 * @stable ICU 2.6 5517 */ toTitleCase(Locale locale, String str, BreakIterator breakiter)5518 public static String toTitleCase(Locale locale, String str, 5519 BreakIterator breakiter) 5520 { 5521 return toTitleCase(locale, str, breakiter, 0); 5522 } 5523 5524 /** 5525 * <p>Returns the titlecase version of the argument string. 5526 * <p>Position for titlecasing is determined by the argument break 5527 * iterator, hence the user can customize his break iterator for 5528 * a specialized titlecasing. In this case only the forward iteration 5529 * needs to be implemented. 5530 * If the break iterator passed in is null, the default Unicode algorithm 5531 * will be used to determine the titlecase positions. 5532 * 5533 * <p>Only positions returned by the break iterator will be title cased, 5534 * character in between the positions will all be in lower case. 5535 * <p>Casing is dependent on the argument locale and context-sensitive 5536 * @param locale which string is to be converted in 5537 * @param str source string to be performed on 5538 * @param titleIter break iterator to determine the positions in which 5539 * the character should be title cased. 5540 * @return titlecase version of the argument string 5541 * @stable ICU 3.2 5542 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter)5543 public static String toTitleCase(ULocale locale, String str, 5544 BreakIterator titleIter) { 5545 return toTitleCase(locale, str, titleIter, 0); 5546 } 5547 5548 /** 5549 * <p>Returns the titlecase version of the argument string. 5550 * <p>Position for titlecasing is determined by the argument break 5551 * iterator, hence the user can customize his break iterator for 5552 * a specialized titlecasing. In this case only the forward iteration 5553 * needs to be implemented. 5554 * If the break iterator passed in is null, the default Unicode algorithm 5555 * will be used to determine the titlecase positions. 5556 * 5557 * <p>Only positions returned by the break iterator will be title cased, 5558 * character in between the positions will all be in lower case. 5559 * <p>Casing is dependent on the argument locale and context-sensitive 5560 * @param locale which string is to be converted in 5561 * @param str source string to be performed on 5562 * @param titleIter break iterator to determine the positions in which 5563 * the character should be title cased. 5564 * @param options bit set to modify the titlecasing operation 5565 * @return titlecase version of the argument string 5566 * @stable ICU 3.8 5567 * @see #TITLECASE_NO_LOWERCASE 5568 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 5569 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)5570 public static String toTitleCase(ULocale locale, String str, 5571 BreakIterator titleIter, int options) { 5572 if (titleIter == null && locale == null) { 5573 locale = ULocale.getDefault(); 5574 } 5575 titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter); 5576 titleIter.setText(str); 5577 return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str); 5578 } 5579 5580 /** 5581 * {@icu} <p>Returns the titlecase version of the argument string. 5582 * <p>Position for titlecasing is determined by the argument break 5583 * iterator, hence the user can customize his break iterator for 5584 * a specialized titlecasing. In this case only the forward iteration 5585 * needs to be implemented. 5586 * If the break iterator passed in is null, the default Unicode algorithm 5587 * will be used to determine the titlecase positions. 5588 * 5589 * <p>Only positions returned by the break iterator will be title cased, 5590 * character in between the positions will all be in lower case. 5591 * <p>Casing is dependent on the argument locale and context-sensitive 5592 * @param locale which string is to be converted in 5593 * @param str source string to be performed on 5594 * @param titleIter break iterator to determine the positions in which 5595 * the character should be title cased. 5596 * @param options bit set to modify the titlecasing operation 5597 * @return titlecase version of the argument string 5598 * @see #TITLECASE_NO_LOWERCASE 5599 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 5600 * @stable ICU 54 5601 */ toTitleCase(Locale locale, String str, BreakIterator titleIter, int options)5602 public static String toTitleCase(Locale locale, String str, 5603 BreakIterator titleIter, 5604 int options) { 5605 if (titleIter == null && locale == null) { 5606 locale = Locale.getDefault(); 5607 } 5608 titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter); 5609 titleIter.setText(str); 5610 return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str); 5611 } 5612 5613 /** 5614 * {@icu} The given character is mapped to its case folding equivalent according 5615 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 5616 * folding equivalent, the character itself is returned. 5617 * 5618 * <p>This function only returns the simple, single-code point case mapping. 5619 * Full case mappings should be used whenever possible because they produce 5620 * better results by working on whole strings. 5621 * They can map to a result string with a different length as appropriate. 5622 * Full case mappings are applied by the case mapping functions 5623 * that take String parameters rather than code points (int). 5624 * See also the User Guide chapter on C/POSIX migration: 5625 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 5626 * 5627 * @param ch the character to be converted 5628 * @param defaultmapping Indicates whether the default mappings defined in 5629 * CaseFolding.txt are to be used, otherwise the 5630 * mappings for dotted I and dotless i marked with 5631 * 'T' in CaseFolding.txt are included. 5632 * @return the case folding equivalent of the character, if 5633 * any; otherwise the character itself. 5634 * @see #foldCase(String, boolean) 5635 * @stable ICU 2.1 5636 */ foldCase(int ch, boolean defaultmapping)5637 public static int foldCase(int ch, boolean defaultmapping) { 5638 return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 5639 } 5640 5641 /** 5642 * {@icu} The given string is mapped to its case folding equivalent according to 5643 * UnicodeData.txt and CaseFolding.txt; if any character has no case 5644 * folding equivalent, the character itself is returned. 5645 * "Full", multiple-code point case folding mappings are returned here. 5646 * For "simple" single-code point mappings use the API 5647 * foldCase(int ch, boolean defaultmapping). 5648 * @param str the String to be converted 5649 * @param defaultmapping Indicates whether the default mappings defined in 5650 * CaseFolding.txt are to be used, otherwise the 5651 * mappings for dotted I and dotless i marked with 5652 * 'T' in CaseFolding.txt are included. 5653 * @return the case folding equivalent of the character, if 5654 * any; otherwise the character itself. 5655 * @see #foldCase(int, boolean) 5656 * @stable ICU 2.1 5657 */ foldCase(String str, boolean defaultmapping)5658 public static String foldCase(String str, boolean defaultmapping) { 5659 return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 5660 } 5661 5662 /** 5663 * {@icu} Option value for case folding: use default mappings defined in 5664 * CaseFolding.txt. 5665 * @stable ICU 2.6 5666 */ 5667 public static final int FOLD_CASE_DEFAULT = 0x0000; 5668 /** 5669 * {@icu} Option value for case folding: 5670 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I 5671 * and dotless i appropriately for Turkic languages (tr, az). 5672 * 5673 * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that 5674 * are to be included for default mappings and 5675 * excluded for the Turkic-specific mappings. 5676 * 5677 * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that 5678 * are to be excluded for default mappings and 5679 * included for the Turkic-specific mappings. 5680 * 5681 * @stable ICU 2.6 5682 */ 5683 public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001; 5684 5685 /** 5686 * {@icu} The given character is mapped to its case folding equivalent according 5687 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 5688 * folding equivalent, the character itself is returned. 5689 * 5690 * <p>This function only returns the simple, single-code point case mapping. 5691 * Full case mappings should be used whenever possible because they produce 5692 * better results by working on whole strings. 5693 * They can map to a result string with a different length as appropriate. 5694 * Full case mappings are applied by the case mapping functions 5695 * that take String parameters rather than code points (int). 5696 * See also the User Guide chapter on C/POSIX migration: 5697 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 5698 * 5699 * @param ch the character to be converted 5700 * @param options A bit set for special processing. Currently the recognised options 5701 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 5702 * @return the case folding equivalent of the character, if any; otherwise the 5703 * character itself. 5704 * @see #foldCase(String, boolean) 5705 * @stable ICU 2.6 5706 */ foldCase(int ch, int options)5707 public static int foldCase(int ch, int options) { 5708 return UCaseProps.INSTANCE.fold(ch, options); 5709 } 5710 5711 /** 5712 * {@icu} The given string is mapped to its case folding equivalent according to 5713 * UnicodeData.txt and CaseFolding.txt; if any character has no case 5714 * folding equivalent, the character itself is returned. 5715 * "Full", multiple-code point case folding mappings are returned here. 5716 * For "simple" single-code point mappings use the API 5717 * foldCase(int ch, boolean defaultmapping). 5718 * @param str the String to be converted 5719 * @param options A bit set for special processing. Currently the recognised options 5720 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 5721 * @return the case folding equivalent of the character, if any; otherwise the 5722 * character itself. 5723 * @see #foldCase(int, boolean) 5724 * @stable ICU 2.6 5725 */ foldCase(String str, int options)5726 public static final String foldCase(String str, int options) { 5727 return CaseMapImpl.fold(options, str); 5728 } 5729 5730 /** 5731 * {@icu} Returns the numeric value of a Han character. 5732 * 5733 * <p>This returns the value of Han 'numeric' code points, 5734 * including those for zero, ten, hundred, thousand, ten thousand, 5735 * and hundred million. 5736 * This includes both the standard and 'checkwriting' 5737 * characters, the 'big circle' zero character, and the standard 5738 * zero character. 5739 * 5740 * <p>Note: The Unicode Standard has numeric values for more 5741 * Han characters recognized by this method 5742 * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt), 5743 * and a {@link com.ibm.icu.text.NumberFormat} can be used with 5744 * a Chinese {@link com.ibm.icu.text.NumberingSystem}. 5745 * 5746 * @param ch code point to query 5747 * @return value if it is a Han 'numeric character,' otherwise return -1. 5748 * @stable ICU 2.4 5749 */ getHanNumericValue(int ch)5750 public static int getHanNumericValue(int ch) 5751 { 5752 switch(ch) 5753 { 5754 case IDEOGRAPHIC_NUMBER_ZERO_ : 5755 case CJK_IDEOGRAPH_COMPLEX_ZERO_ : 5756 return 0; // Han Zero 5757 case CJK_IDEOGRAPH_FIRST_ : 5758 case CJK_IDEOGRAPH_COMPLEX_ONE_ : 5759 return 1; // Han One 5760 case CJK_IDEOGRAPH_SECOND_ : 5761 case CJK_IDEOGRAPH_COMPLEX_TWO_ : 5762 return 2; // Han Two 5763 case CJK_IDEOGRAPH_THIRD_ : 5764 case CJK_IDEOGRAPH_COMPLEX_THREE_ : 5765 return 3; // Han Three 5766 case CJK_IDEOGRAPH_FOURTH_ : 5767 case CJK_IDEOGRAPH_COMPLEX_FOUR_ : 5768 return 4; // Han Four 5769 case CJK_IDEOGRAPH_FIFTH_ : 5770 case CJK_IDEOGRAPH_COMPLEX_FIVE_ : 5771 return 5; // Han Five 5772 case CJK_IDEOGRAPH_SIXTH_ : 5773 case CJK_IDEOGRAPH_COMPLEX_SIX_ : 5774 return 6; // Han Six 5775 case CJK_IDEOGRAPH_SEVENTH_ : 5776 case CJK_IDEOGRAPH_COMPLEX_SEVEN_ : 5777 return 7; // Han Seven 5778 case CJK_IDEOGRAPH_EIGHTH_ : 5779 case CJK_IDEOGRAPH_COMPLEX_EIGHT_ : 5780 return 8; // Han Eight 5781 case CJK_IDEOGRAPH_NINETH_ : 5782 case CJK_IDEOGRAPH_COMPLEX_NINE_ : 5783 return 9; // Han Nine 5784 case CJK_IDEOGRAPH_TEN_ : 5785 case CJK_IDEOGRAPH_COMPLEX_TEN_ : 5786 return 10; 5787 case CJK_IDEOGRAPH_HUNDRED_ : 5788 case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ : 5789 return 100; 5790 case CJK_IDEOGRAPH_THOUSAND_ : 5791 case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ : 5792 return 1000; 5793 case CJK_IDEOGRAPH_TEN_THOUSAND_ : 5794 return 10000; 5795 case CJK_IDEOGRAPH_HUNDRED_MILLION_ : 5796 return 100000000; 5797 } 5798 return -1; // no value 5799 } 5800 5801 /** 5802 * {@icu} <p>Returns an iterator for character types, iterating over codepoints. 5803 * <p>Example of use:<br> 5804 * <pre> 5805 * RangeValueIterator iterator = UCharacter.getTypeIterator(); 5806 * RangeValueIterator.Element element = new RangeValueIterator.Element(); 5807 * while (iterator.next(element)) { 5808 * System.out.println("Codepoint \\u" + 5809 * Integer.toHexString(element.start) + 5810 * " to codepoint \\u" + 5811 * Integer.toHexString(element.limit - 1) + 5812 * " has the character type " + 5813 * element.value); 5814 * } 5815 * </pre> 5816 * @return an iterator 5817 * @stable ICU 2.6 5818 */ getTypeIterator()5819 public static RangeValueIterator getTypeIterator() 5820 { 5821 return new UCharacterTypeIterator(); 5822 } 5823 5824 private static final class UCharacterTypeIterator implements RangeValueIterator { UCharacterTypeIterator()5825 UCharacterTypeIterator() { 5826 reset(); 5827 } 5828 5829 // implements RangeValueIterator 5830 @Override next(Element element)5831 public boolean next(Element element) { 5832 if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 5833 element.start=range.startCodePoint; 5834 element.limit=range.endCodePoint+1; 5835 element.value=range.value; 5836 return true; 5837 } else { 5838 return false; 5839 } 5840 } 5841 5842 // implements RangeValueIterator 5843 @Override reset()5844 public void reset() { 5845 trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE); 5846 } 5847 5848 private Iterator<Trie2.Range> trieIterator; 5849 private Trie2.Range range; 5850 5851 private static final class MaskType implements Trie2.ValueMapper { 5852 // Extracts the general category ("character type") from the trie value. 5853 @Override map(int value)5854 public int map(int value) { 5855 return value & UCharacterProperty.TYPE_MASK; 5856 } 5857 } 5858 private static final MaskType MASK_TYPE=new MaskType(); 5859 } 5860 5861 /** 5862 * {@icu} <p>Returns an iterator for character names, iterating over codepoints. 5863 * <p>This API only gets the iterator for the modern, most up-to-date 5864 * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or 5865 * for extended names use getExtendedNameIterator(). 5866 * <p>Example of use:<br> 5867 * <pre> 5868 * ValueIterator iterator = UCharacter.getNameIterator(); 5869 * ValueIterator.Element element = new ValueIterator.Element(); 5870 * while (iterator.next(element)) { 5871 * System.out.println("Codepoint \\u" + 5872 * Integer.toHexString(element.codepoint) + 5873 * " has the name " + (String)element.value); 5874 * } 5875 * </pre> 5876 * <p>The maximal range which the name iterator iterates is from 5877 * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE. 5878 * @return an iterator 5879 * @stable ICU 2.6 5880 */ getNameIterator()5881 public static ValueIterator getNameIterator(){ 5882 return new UCharacterNameIterator(UCharacterName.INSTANCE, 5883 UCharacterNameChoice.UNICODE_CHAR_NAME); 5884 } 5885 5886 /** 5887 * {@icu} Returns an empty iterator. 5888 * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints. 5889 * @return an empty iterator 5890 * @deprecated ICU 49 5891 * @see #getName1_0(int) 5892 */ 5893 @Deprecated getName1_0Iterator()5894 public static ValueIterator getName1_0Iterator(){ 5895 return new DummyValueIterator(); 5896 } 5897 5898 private static final class DummyValueIterator implements ValueIterator { 5899 @Override next(Element element)5900 public boolean next(Element element) { return false; } 5901 @Override reset()5902 public void reset() {} 5903 @Override setRange(int start, int limit)5904 public void setRange(int start, int limit) {} 5905 } 5906 5907 /** 5908 * {@icu} <p>Returns an iterator for character names, iterating over codepoints. 5909 * <p>This API only gets the iterator for the extended names. 5910 * For modern, most up-to-date Unicode names use getNameIterator() or 5911 * for older 1.0 Unicode names use get1_0NameIterator(). 5912 * <p>Example of use:<br> 5913 * <pre> 5914 * ValueIterator iterator = UCharacter.getExtendedNameIterator(); 5915 * ValueIterator.Element element = new ValueIterator.Element(); 5916 * while (iterator.next(element)) { 5917 * System.out.println("Codepoint \\u" + 5918 * Integer.toHexString(element.codepoint) + 5919 * " has the name " + (String)element.value); 5920 * } 5921 * </pre> 5922 * <p>The maximal range which the name iterator iterates is from 5923 * @return an iterator 5924 * @stable ICU 2.6 5925 */ getExtendedNameIterator()5926 public static ValueIterator getExtendedNameIterator(){ 5927 return new UCharacterNameIterator(UCharacterName.INSTANCE, 5928 UCharacterNameChoice.EXTENDED_CHAR_NAME); 5929 } 5930 5931 /** 5932 * {@icu} Returns the "age" of the code point. 5933 * <p>The "age" is the Unicode version when the code point was first 5934 * designated (as a non-character or for Private Use) or assigned a 5935 * character. 5936 * <p>This can be useful to avoid emitting code points to receiving 5937 * processes that do not accept newer characters. 5938 * <p>The data is from the UCD file DerivedAge.txt. 5939 * @param ch The code point. 5940 * @return the Unicode version number 5941 * @stable ICU 2.6 5942 */ getAge(int ch)5943 public static VersionInfo getAge(int ch) 5944 { 5945 if (ch < MIN_VALUE || ch > MAX_VALUE) { 5946 throw new IllegalArgumentException("Codepoint out of bounds"); 5947 } 5948 return UCharacterProperty.INSTANCE.getAge(ch); 5949 } 5950 5951 /** 5952 * {@icu} Check a binary Unicode property for a code point. 5953 * <p>Unicode, especially in version 3.2, defines many more properties 5954 * than the original set in UnicodeData.txt. 5955 * <p>This API is intended to reflect Unicode properties as defined in 5956 * the Unicode Character Database (UCD) and Unicode Technical Reports 5957 * (UTR). 5958 * <p>For details about the properties see 5959 * <a href=http://www.unicode.org/>http://www.unicode.org/</a>. 5960 * <p>For names of Unicode properties see the UCD file 5961 * PropertyAliases.txt. 5962 * <p>This API does not check the validity of the codepoint. 5963 * <p>Important: If ICU is built with UCD files from Unicode versions 5964 * below 3.2, then properties marked with "new" are not or 5965 * not fully available. 5966 * @param ch code point to test. 5967 * @param property selector constant from com.ibm.icu.lang.UProperty, 5968 * identifies which binary property to check. 5969 * @return true or false according to the binary Unicode property value 5970 * for ch. Also false if property is out of bounds or if the 5971 * Unicode version does not have data for the property at all, or 5972 * not for this code point. 5973 * @see com.ibm.icu.lang.UProperty 5974 * @see CharacterProperties#getBinaryPropertySet(int) 5975 * @stable ICU 2.6 5976 */ hasBinaryProperty(int ch, int property)5977 public static boolean hasBinaryProperty(int ch, int property) 5978 { 5979 return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property); 5980 } 5981 5982 /** 5983 * {@icu} Returns true if the property is true for the string. 5984 * Same as {@link #hasBinaryProperty(int, int)} 5985 * if the string contains exactly one code point. 5986 * 5987 * <p>Most properties apply only to single code points. 5988 * <a href="https://www.unicode.org/reports/tr51/#Emoji_Sets">UTS #51 Unicode Emoji</a> 5989 * defines several properties of strings. 5990 * 5991 * @param s String to test. 5992 * @param property UProperty selector constant, identifies which binary property to check. 5993 * Must be BINARY_START<=which<BINARY_LIMIT. 5994 * @return true or false according to the binary Unicode property value for the string. 5995 * Also false if <code>property</code> is out of bounds or if the Unicode version 5996 * does not have data for the property at all. 5997 * 5998 * @see com.ibm.icu.lang.UProperty 5999 * @see CharacterProperties#getBinaryPropertySet(int) 6000 * @stable ICU 70 6001 */ hasBinaryProperty(CharSequence s, int property)6002 public static boolean hasBinaryProperty(CharSequence s, int property) { 6003 int length = s.length(); 6004 if (length == 1) { 6005 return hasBinaryProperty(s.charAt(0), property); // single code point 6006 } else if (length == 2) { 6007 // first code point 6008 int c = Character.codePointAt(s, 0); 6009 if (Character.charCount(c) == length) { 6010 return hasBinaryProperty(c, property); // single code point 6011 } 6012 } 6013 // Only call into EmojiProps for a relevant property, 6014 // so that we not unnecessarily try to load its data file. 6015 return UProperty.BASIC_EMOJI <= property && property <= UProperty.RGI_EMOJI && 6016 EmojiProps.INSTANCE.hasBinaryProperty(s, property); 6017 } 6018 6019 /** 6020 * {@icu} <p>Check if a code point has the Alphabetic Unicode property. 6021 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC). 6022 * <p>Different from UCharacter.isLetter(ch)! 6023 * @stable ICU 2.6 6024 * @param ch codepoint to be tested 6025 */ isUAlphabetic(int ch)6026 public static boolean isUAlphabetic(int ch) 6027 { 6028 return hasBinaryProperty(ch, UProperty.ALPHABETIC); 6029 } 6030 6031 /** 6032 * {@icu} <p>Check if a code point has the Lowercase Unicode property. 6033 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE). 6034 * <p>This is different from UCharacter.isLowerCase(ch)! 6035 * @param ch codepoint to be tested 6036 * @stable ICU 2.6 6037 */ isULowercase(int ch)6038 public static boolean isULowercase(int ch) 6039 { 6040 return hasBinaryProperty(ch, UProperty.LOWERCASE); 6041 } 6042 6043 /** 6044 * {@icu} <p>Check if a code point has the Uppercase Unicode property. 6045 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE). 6046 * <p>This is different from UCharacter.isUpperCase(ch)! 6047 * @param ch codepoint to be tested 6048 * @stable ICU 2.6 6049 */ isUUppercase(int ch)6050 public static boolean isUUppercase(int ch) 6051 { 6052 return hasBinaryProperty(ch, UProperty.UPPERCASE); 6053 } 6054 6055 /** 6056 * {@icu} <p>Check if a code point has the White_Space Unicode property. 6057 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE). 6058 * <p>This is different from both UCharacter.isSpace(ch) and 6059 * UCharacter.isWhitespace(ch)! 6060 * @param ch codepoint to be tested 6061 * @stable ICU 2.6 6062 */ isUWhiteSpace(int ch)6063 public static boolean isUWhiteSpace(int ch) 6064 { 6065 return hasBinaryProperty(ch, UProperty.WHITE_SPACE); 6066 } 6067 6068 /** 6069 * {@icu} Returns the property value for a Unicode property type of a code point. 6070 * Also returns binary and mask property values. 6071 * <p>Unicode, especially in version 3.2, defines many more properties than 6072 * the original set in UnicodeData.txt. 6073 * <p>The properties APIs are intended to reflect Unicode properties as 6074 * defined in the Unicode Character Database (UCD) and Unicode Technical 6075 * Reports (UTR). For details about the properties see 6076 * http://www.unicode.org/. 6077 * <p>For names of Unicode properties see the UCD file PropertyAliases.txt. 6078 * 6079 * <pre> 6080 * Sample usage: 6081 * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH); 6082 * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC); 6083 * boolean b = (ideo == 1) ? true : false; 6084 * </pre> 6085 * @param ch code point to test. 6086 * @param type UProperty selector constant, identifies which binary 6087 * property to check. Must be 6088 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 6089 * UProperty.INT_START <= type < UProperty.INT_LIMIT or 6090 * UProperty.MASK_START <= type < UProperty.MASK_LIMIT. 6091 * @return numeric value that is directly the property value or, 6092 * for enumerated properties, corresponds to the numeric value of 6093 * the enumerated constant of the respective property value type 6094 * ({@link ECharacterCategory}, {@link ECharacterDirection}, 6095 * {@link DecompositionType}, etc.). 6096 * Returns 0 or 1 (for false / true) for binary Unicode properties. 6097 * Returns a bit-mask for mask properties. 6098 * Returns 0 if 'type' is out of bounds or if the Unicode version 6099 * does not have data for the property at all, or not for this code 6100 * point. 6101 * @see UProperty 6102 * @see #hasBinaryProperty 6103 * @see #getIntPropertyMinValue 6104 * @see #getIntPropertyMaxValue 6105 * @see CharacterProperties#getIntPropertyMap(int) 6106 * @see #getUnicodeVersion 6107 * @stable ICU 2.4 6108 */ getIntPropertyValue(int ch, int type)6109 public static int getIntPropertyValue(int ch, int type) 6110 { 6111 return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type); 6112 } 6113 /** 6114 * {@icu} Returns a string version of the property value. 6115 * @param propertyEnum The property enum value. 6116 * @param codepoint The codepoint value. 6117 * @param nameChoice The choice of the name. 6118 * @return value as string 6119 * @internal 6120 * @deprecated This API is ICU internal only. 6121 */ 6122 @Deprecated 6123 ///CLOVER:OFF getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice)6124 public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) { 6125 if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) || 6126 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) { 6127 return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum), 6128 nameChoice); 6129 } 6130 if (propertyEnum == UProperty.NUMERIC_VALUE) { 6131 return String.valueOf(getUnicodeNumericValue(codepoint)); 6132 } 6133 // otherwise must be string property 6134 switch (propertyEnum) { 6135 case UProperty.AGE: return getAge(codepoint).toString(); 6136 case UProperty.ISO_COMMENT: return getISOComment(codepoint); 6137 case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint)); 6138 case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true)); 6139 case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 6140 case UProperty.NAME: return getName(codepoint); 6141 case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true)); 6142 case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 6143 case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 6144 case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 6145 case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 6146 case UProperty.UNICODE_1_NAME: return getName1_0(codepoint); 6147 case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 6148 } 6149 throw new IllegalArgumentException("Illegal Property Enum"); 6150 } 6151 ///CLOVER:ON 6152 6153 /** 6154 * {@icu} Returns the minimum value for an integer/binary Unicode property type. 6155 * Can be used together with UCharacter.getIntPropertyMaxValue(int) 6156 * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar. 6157 * @param type UProperty selector constant, identifies which binary 6158 * property to check. Must be 6159 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 6160 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 6161 * @return Minimum value returned by UCharacter.getIntPropertyValue(int) 6162 * for a Unicode property. 0 if the property 6163 * selector 'type' is out of range. 6164 * @see UProperty 6165 * @see #hasBinaryProperty 6166 * @see #getUnicodeVersion 6167 * @see #getIntPropertyMaxValue 6168 * @see #getIntPropertyValue 6169 * @stable ICU 2.4 6170 */ getIntPropertyMinValue(int type)6171 public static int getIntPropertyMinValue(int type){ 6172 6173 return 0; // undefined; and: all other properties have a minimum value of 0 6174 } 6175 6176 6177 /** 6178 * {@icu} Returns the maximum value for an integer/binary Unicode property. 6179 * Can be used together with UCharacter.getIntPropertyMinValue(int) 6180 * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar. 6181 * Examples for min/max values (for Unicode 3.2): 6182 * <ul> 6183 * <li> UProperty.BIDI_CLASS: 0/18 6184 * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL) 6185 * <li> UProperty.SCRIPT: 0/45 (UScript.COMMON/UScript.TAGBANWA) 6186 * <li> UProperty.IDEOGRAPHIC: 0/1 (false/true) 6187 * </ul> 6188 * For undefined UProperty constant values, min/max values will be 0/-1. 6189 * @param type UProperty selector constant, identifies which binary 6190 * property to check. Must be 6191 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 6192 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 6193 * @return Maximum value returned by u_getIntPropertyValue for a Unicode 6194 * property. <= 0 if the property selector 'type' is out of range. 6195 * @see UProperty 6196 * @see #hasBinaryProperty 6197 * @see #getUnicodeVersion 6198 * @see #getIntPropertyMaxValue 6199 * @see #getIntPropertyValue 6200 * @stable ICU 2.4 6201 */ getIntPropertyMaxValue(int type)6202 public static int getIntPropertyMaxValue(int type) 6203 { 6204 return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type); 6205 } 6206 6207 /** 6208 * Provide the java.lang.Character forDigit API, for convenience. 6209 * @stable ICU 3.0 6210 */ forDigit(int digit, int radix)6211 public static char forDigit(int digit, int radix) { 6212 return java.lang.Character.forDigit(digit, radix); 6213 } 6214 6215 // JDK 1.5 API coverage 6216 6217 /** 6218 * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}. 6219 * 6220 * @stable ICU 3.0 6221 */ 6222 public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE; 6223 6224 /** 6225 * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}. 6226 * 6227 * @stable ICU 3.0 6228 */ 6229 public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE; 6230 6231 /** 6232 * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}. 6233 * 6234 * @stable ICU 3.0 6235 */ 6236 public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE; 6237 6238 /** 6239 * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}. 6240 * 6241 * @stable ICU 3.0 6242 */ 6243 public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE; 6244 6245 /** 6246 * Constant U+D800, same as {@link Character#MIN_SURROGATE}. 6247 * 6248 * @stable ICU 3.0 6249 */ 6250 public static final char MIN_SURROGATE = Character.MIN_SURROGATE; 6251 6252 /** 6253 * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}. 6254 * 6255 * @stable ICU 3.0 6256 */ 6257 public static final char MAX_SURROGATE = Character.MAX_SURROGATE; 6258 6259 /** 6260 * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 6261 * 6262 * @stable ICU 3.0 6263 */ 6264 public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT; 6265 6266 /** 6267 * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}. 6268 * 6269 * @stable ICU 3.0 6270 */ 6271 public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT; 6272 6273 /** 6274 * Constant U+0000, same as {@link Character#MIN_CODE_POINT}. 6275 * 6276 * @stable ICU 3.0 6277 */ 6278 public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT; 6279 6280 /** 6281 * Equivalent to {@link Character#isValidCodePoint}. 6282 * 6283 * @param cp the code point to check 6284 * @return true if cp is a valid code point 6285 * @stable ICU 3.0 6286 */ isValidCodePoint(int cp)6287 public static final boolean isValidCodePoint(int cp) { 6288 return cp >= 0 && cp <= MAX_CODE_POINT; 6289 } 6290 6291 /** 6292 * Same as {@link Character#isSupplementaryCodePoint}. 6293 * 6294 * @param cp the code point to check 6295 * @return true if cp is a supplementary code point 6296 * @stable ICU 3.0 6297 */ isSupplementaryCodePoint(int cp)6298 public static final boolean isSupplementaryCodePoint(int cp) { 6299 return Character.isSupplementaryCodePoint(cp); 6300 } 6301 6302 /** 6303 * Same as {@link Character#isHighSurrogate}, 6304 * except that the ICU version accepts <code>int</code> for code points. 6305 * 6306 * @param codePoint the code point to check 6307 * (In ICU 3.0-69 the type of this parameter was <code>char</code>.) 6308 * @return true if codePoint is a high (lead) surrogate 6309 * @stable ICU 70 6310 */ isHighSurrogate(int codePoint)6311 public static boolean isHighSurrogate(int codePoint) { 6312 return (codePoint & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS; 6313 } 6314 6315 // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655 6316 /** 6317 * Same as {@link Character#isHighSurrogate}, 6318 * 6319 * @param ch the char to check 6320 * @return true if ch is a high (lead) surrogate 6321 * @stable ICU 3.0 6322 */ isHighSurrogate(char ch)6323 public static boolean isHighSurrogate(char ch) { 6324 return isHighSurrogate((int) ch); 6325 } 6326 // END Android patch: Keep the `char` version on Android. See ICU-21655 6327 6328 /** 6329 * Same as {@link Character#isLowSurrogate}, 6330 * except that the ICU version accepts <code>int</code> for code points. 6331 * 6332 * @param codePoint the code point to check 6333 * (In ICU 3.0-69 the type of this parameter was <code>char</code>.) 6334 * @return true if codePoint is a low (trail) surrogate 6335 * @stable ICU 70 6336 */ isLowSurrogate(int codePoint)6337 public static boolean isLowSurrogate(int codePoint) { 6338 return (codePoint & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS; 6339 } 6340 6341 // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655 6342 /** 6343 * Same as {@link Character#isLowSurrogate}, 6344 * 6345 * @param ch the char to check 6346 * @return true if ch is a low (trail) surrogate 6347 * @stable ICU 3.0 6348 */ isLowSurrogate(char ch)6349 public static boolean isLowSurrogate(char ch) { 6350 return isLowSurrogate((int) ch); 6351 } 6352 // END Android patch: Keep the `char` version on Android. See ICU-21655 6353 6354 /** 6355 * Same as {@link Character#isSurrogatePair}, 6356 * except that the ICU version accepts <code>int</code> for code points. 6357 * 6358 * @param high the high (lead) unit 6359 * (In ICU 3.0-69 the type of both parameters was <code>char</code>.) 6360 * @param low the low (trail) unit 6361 * @return true if high, low form a surrogate pair 6362 * @stable ICU 70 6363 */ isSurrogatePair(int high, int low)6364 public static final boolean isSurrogatePair(int high, int low) { 6365 return isHighSurrogate(high) && isLowSurrogate(low); 6366 } 6367 6368 // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655 6369 /** 6370 * Same as {@link Character#isSurrogatePair}. 6371 * 6372 * @param high the high (lead) char 6373 * @param low the low (trail) char 6374 * @return true if high, low form a surrogate pair 6375 * @stable ICU 3.0 6376 */ isSurrogatePair(char high, char low)6377 public static final boolean isSurrogatePair(char high, char low) { 6378 return isSurrogatePair((int) high, (int) low); 6379 } 6380 // END Android patch: Keep the `char` version on Android. See ICU-21655 6381 6382 /** 6383 * Same as {@link Character#charCount}. 6384 * Returns the number of chars needed to represent the code point (1 or 2). 6385 * This does not check the code point for validity. 6386 * 6387 * @param cp the code point to check 6388 * @return the number of chars needed to represent the code point 6389 * @stable ICU 3.0 6390 */ charCount(int cp)6391 public static int charCount(int cp) { 6392 return Character.charCount(cp); 6393 } 6394 6395 /** 6396 * Same as {@link Character#toCodePoint}, 6397 * except that the ICU version accepts <code>int</code> for code points. 6398 * Returns the code point represented by the two surrogate code units. 6399 * This does not check the surrogate pair for validity. 6400 * 6401 * @param high the high (lead) surrogate 6402 * (In ICU 3.0-69 the type of both parameters was <code>char</code>.) 6403 * @param low the low (trail) surrogate 6404 * @return the code point formed by the surrogate pair 6405 * @stable ICU 70 6406 * @see #getCodePoint(int, int) 6407 */ toCodePoint(int high, int low)6408 public static final int toCodePoint(int high, int low) { 6409 // see ICU4C U16_GET_SUPPLEMENTARY() 6410 return (high << 10) + low - U16_SURROGATE_OFFSET; 6411 } 6412 6413 // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655 6414 /** 6415 * Same as {@link Character#toCodePoint}. 6416 * Returns the code point represented by the two surrogate code units. 6417 * This does not check the surrogate pair for validity. 6418 * 6419 * @param high the high (lead) surrogate 6420 * @param low the low (trail) surrogate 6421 * @return the code point formed by the surrogate pair 6422 * @stable ICU 3.0 6423 */ toCodePoint(char high, char low)6424 public static final int toCodePoint(char high, char low) { 6425 return toCodePoint((int) high, (int) low); 6426 } 6427 // END Android patch: Keep the `char` version on Android. See ICU-21655 6428 6429 /** 6430 * Same as {@link Character#codePointAt(CharSequence, int)}. 6431 * Returns the code point at index. 6432 * This examines only the characters at index and index+1. 6433 * 6434 * @param seq the characters to check 6435 * @param index the index of the first or only char forming the code point 6436 * @return the code point at the index 6437 * @stable ICU 3.0 6438 */ codePointAt(CharSequence seq, int index)6439 public static final int codePointAt(CharSequence seq, int index) { 6440 char c1 = seq.charAt(index++); 6441 if (isHighSurrogate(c1)) { 6442 if (index < seq.length()) { 6443 char c2 = seq.charAt(index); 6444 if (isLowSurrogate(c2)) { 6445 return toCodePoint(c1, c2); 6446 } 6447 } 6448 } 6449 return c1; 6450 } 6451 6452 /** 6453 * Same as {@link Character#codePointAt(char[], int)}. 6454 * Returns the code point at index. 6455 * This examines only the characters at index and index+1. 6456 * 6457 * @param text the characters to check 6458 * @param index the index of the first or only char forming the code point 6459 * @return the code point at the index 6460 * @stable ICU 3.0 6461 */ codePointAt(char[] text, int index)6462 public static final int codePointAt(char[] text, int index) { 6463 char c1 = text[index++]; 6464 if (isHighSurrogate(c1)) { 6465 if (index < text.length) { 6466 char c2 = text[index]; 6467 if (isLowSurrogate(c2)) { 6468 return toCodePoint(c1, c2); 6469 } 6470 } 6471 } 6472 return c1; 6473 } 6474 6475 /** 6476 * Same as {@link Character#codePointAt(char[], int, int)}. 6477 * Returns the code point at index. 6478 * This examines only the characters at index and index+1. 6479 * 6480 * @param text the characters to check 6481 * @param index the index of the first or only char forming the code point 6482 * @param limit the limit of the valid text 6483 * @return the code point at the index 6484 * @stable ICU 3.0 6485 */ codePointAt(char[] text, int index, int limit)6486 public static final int codePointAt(char[] text, int index, int limit) { 6487 if (index >= limit || limit > text.length) { 6488 throw new IndexOutOfBoundsException(); 6489 } 6490 char c1 = text[index++]; 6491 if (isHighSurrogate(c1)) { 6492 if (index < limit) { 6493 char c2 = text[index]; 6494 if (isLowSurrogate(c2)) { 6495 return toCodePoint(c1, c2); 6496 } 6497 } 6498 } 6499 return c1; 6500 } 6501 6502 /** 6503 * Same as {@link Character#codePointBefore(CharSequence, int)}. 6504 * Return the code point before index. 6505 * This examines only the characters at index-1 and index-2. 6506 * 6507 * @param seq the characters to check 6508 * @param index the index after the last or only char forming the code point 6509 * @return the code point before the index 6510 * @stable ICU 3.0 6511 */ codePointBefore(CharSequence seq, int index)6512 public static final int codePointBefore(CharSequence seq, int index) { 6513 char c2 = seq.charAt(--index); 6514 if (isLowSurrogate(c2)) { 6515 if (index > 0) { 6516 char c1 = seq.charAt(--index); 6517 if (isHighSurrogate(c1)) { 6518 return toCodePoint(c1, c2); 6519 } 6520 } 6521 } 6522 return c2; 6523 } 6524 6525 /** 6526 * Same as {@link Character#codePointBefore(char[], int)}. 6527 * Returns the code point before index. 6528 * This examines only the characters at index-1 and index-2. 6529 * 6530 * @param text the characters to check 6531 * @param index the index after the last or only char forming the code point 6532 * @return the code point before the index 6533 * @stable ICU 3.0 6534 */ codePointBefore(char[] text, int index)6535 public static final int codePointBefore(char[] text, int index) { 6536 char c2 = text[--index]; 6537 if (isLowSurrogate(c2)) { 6538 if (index > 0) { 6539 char c1 = text[--index]; 6540 if (isHighSurrogate(c1)) { 6541 return toCodePoint(c1, c2); 6542 } 6543 } 6544 } 6545 return c2; 6546 } 6547 6548 /** 6549 * Same as {@link Character#codePointBefore(char[], int, int)}. 6550 * Return the code point before index. 6551 * This examines only the characters at index-1 and index-2. 6552 * 6553 * @param text the characters to check 6554 * @param index the index after the last or only char forming the code point 6555 * @param limit the start of the valid text 6556 * @return the code point before the index 6557 * @stable ICU 3.0 6558 */ codePointBefore(char[] text, int index, int limit)6559 public static final int codePointBefore(char[] text, int index, int limit) { 6560 if (index <= limit || limit < 0) { 6561 throw new IndexOutOfBoundsException(); 6562 } 6563 char c2 = text[--index]; 6564 if (isLowSurrogate(c2)) { 6565 if (index > limit) { 6566 char c1 = text[--index]; 6567 if (isHighSurrogate(c1)) { 6568 return toCodePoint(c1, c2); 6569 } 6570 } 6571 } 6572 return c2; 6573 } 6574 6575 /** 6576 * Same as {@link Character#toChars(int, char[], int)}. 6577 * Writes the chars representing the 6578 * code point into the destination at the given index. 6579 * 6580 * @param cp the code point to convert 6581 * @param dst the destination array into which to put the char(s) representing the code point 6582 * @param dstIndex the index at which to put the first (or only) char 6583 * @return the count of the number of chars written (1 or 2) 6584 * @throws IllegalArgumentException if cp is not a valid code point 6585 * @stable ICU 3.0 6586 */ toChars(int cp, char[] dst, int dstIndex)6587 public static final int toChars(int cp, char[] dst, int dstIndex) { 6588 return Character.toChars(cp, dst, dstIndex); 6589 } 6590 6591 /** 6592 * Same as {@link Character#toChars(int)}. 6593 * Returns a char array representing the code point. 6594 * 6595 * @param cp the code point to convert 6596 * @return an array containing the char(s) representing the code point 6597 * @throws IllegalArgumentException if cp is not a valid code point 6598 * @stable ICU 3.0 6599 */ toChars(int cp)6600 public static final char[] toChars(int cp) { 6601 return Character.toChars(cp); 6602 } 6603 6604 /** 6605 * Equivalent to the {@link Character#getDirectionality(char)} method, for 6606 * convenience. Returns a byte representing the directionality of the 6607 * character. 6608 * 6609 * {@icunote} Unlike {@link Character#getDirectionality(char)}, this returns 6610 * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters. 6611 * 6612 * {@icunote} The return value must be tested using the constants defined in {@link 6613 * UCharacterDirection} and its interface {@link 6614 * UCharacterEnums.ECharacterDirection} since the values are different from the ones 6615 * defined by <code>java.lang.Character</code>. 6616 * @param cp the code point to check 6617 * @return the directionality of the code point 6618 * @see #getDirection 6619 * @stable ICU 3.0 6620 */ getDirectionality(int cp)6621 public static byte getDirectionality(int cp) 6622 { 6623 return (byte)getDirection(cp); 6624 } 6625 6626 /** 6627 * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)} 6628 * method, for convenience. Counts the number of code points in the range 6629 * of text. 6630 * @param text the characters to check 6631 * @param start the start of the range 6632 * @param limit the limit of the range 6633 * @return the number of code points in the range 6634 * @stable ICU 3.0 6635 */ codePointCount(CharSequence text, int start, int limit)6636 public static int codePointCount(CharSequence text, int start, int limit) { 6637 if (start < 0 || limit < start || limit > text.length()) { 6638 throw new IndexOutOfBoundsException("start (" + start + 6639 ") or limit (" + limit + 6640 ") invalid or out of range 0, " + text.length()); 6641 } 6642 6643 int len = limit - start; 6644 while (limit > start) { 6645 char ch = text.charAt(--limit); 6646 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 6647 ch = text.charAt(--limit); 6648 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 6649 --len; 6650 break; 6651 } 6652 } 6653 } 6654 return len; 6655 } 6656 6657 /** 6658 * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for 6659 * convenience. Counts the number of code points in the range of text. 6660 * @param text the characters to check 6661 * @param start the start of the range 6662 * @param limit the limit of the range 6663 * @return the number of code points in the range 6664 * @stable ICU 3.0 6665 */ codePointCount(char[] text, int start, int limit)6666 public static int codePointCount(char[] text, int start, int limit) { 6667 if (start < 0 || limit < start || limit > text.length) { 6668 throw new IndexOutOfBoundsException("start (" + start + 6669 ") or limit (" + limit + 6670 ") invalid or out of range 0, " + text.length); 6671 } 6672 6673 int len = limit - start; 6674 while (limit > start) { 6675 char ch = text[--limit]; 6676 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 6677 ch = text[--limit]; 6678 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 6679 --len; 6680 break; 6681 } 6682 } 6683 } 6684 return len; 6685 } 6686 6687 /** 6688 * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)} 6689 * method, for convenience. Adjusts the char index by a code point offset. 6690 * @param text the characters to check 6691 * @param index the index to adjust 6692 * @param codePointOffset the number of code points by which to offset the index 6693 * @return the adjusted index 6694 * @stable ICU 3.0 6695 */ offsetByCodePoints(CharSequence text, int index, int codePointOffset)6696 public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) { 6697 if (index < 0 || index > text.length()) { 6698 throw new IndexOutOfBoundsException("index ( " + index + 6699 ") out of range 0, " + text.length()); 6700 } 6701 6702 if (codePointOffset < 0) { 6703 while (++codePointOffset <= 0) { 6704 char ch = text.charAt(--index); 6705 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) { 6706 ch = text.charAt(--index); 6707 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 6708 if (++codePointOffset > 0) { 6709 return index+1; 6710 } 6711 } 6712 } 6713 } 6714 } else { 6715 int limit = text.length(); 6716 while (--codePointOffset >= 0) { 6717 char ch = text.charAt(index++); 6718 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 6719 ch = text.charAt(index++); 6720 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 6721 if (--codePointOffset < 0) { 6722 return index-1; 6723 } 6724 } 6725 } 6726 } 6727 } 6728 6729 return index; 6730 } 6731 6732 /** 6733 * Equivalent to the 6734 * {@link Character#offsetByCodePoints(char[], int, int, int, int)} 6735 * method, for convenience. Adjusts the char index by a code point offset. 6736 * @param text the characters to check 6737 * @param start the start of the range to check 6738 * @param count the length of the range to check 6739 * @param index the index to adjust 6740 * @param codePointOffset the number of code points by which to offset the index 6741 * @return the adjusted index 6742 * @stable ICU 3.0 6743 */ offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset)6744 public static int offsetByCodePoints(char[] text, int start, int count, int index, 6745 int codePointOffset) { 6746 int limit = start + count; 6747 if (start < 0 || limit < start || limit > text.length || index < start || index > limit) { 6748 throw new IndexOutOfBoundsException("index ( " + index + 6749 ") out of range " + start + 6750 ", " + limit + 6751 " in array 0, " + text.length); 6752 } 6753 6754 if (codePointOffset < 0) { 6755 while (++codePointOffset <= 0) { 6756 char ch = text[--index]; 6757 if (index < start) { 6758 throw new IndexOutOfBoundsException("index ( " + index + 6759 ") < start (" + start + 6760 ")"); 6761 } 6762 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) { 6763 ch = text[--index]; 6764 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 6765 if (++codePointOffset > 0) { 6766 return index+1; 6767 } 6768 } 6769 } 6770 } 6771 } else { 6772 while (--codePointOffset >= 0) { 6773 char ch = text[index++]; 6774 if (index > limit) { 6775 throw new IndexOutOfBoundsException("index ( " + index + 6776 ") > limit (" + limit + 6777 ")"); 6778 } 6779 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 6780 ch = text[index++]; 6781 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 6782 if (--codePointOffset < 0) { 6783 return index-1; 6784 } 6785 } 6786 } 6787 } 6788 } 6789 6790 return index; 6791 } 6792 6793 // private variables ------------------------------------------------- 6794 6795 /** 6796 * To get the last character out from a data type 6797 */ 6798 private static final int LAST_CHAR_MASK_ = 0xFFFF; 6799 6800 // /** 6801 // * To get the last byte out from a data type 6802 // */ 6803 // private static final int LAST_BYTE_MASK_ = 0xFF; 6804 // 6805 // /** 6806 // * Shift 16 bits 6807 // */ 6808 // private static final int SHIFT_16_ = 16; 6809 // 6810 // /** 6811 // * Shift 24 bits 6812 // */ 6813 // private static final int SHIFT_24_ = 24; 6814 // 6815 // /** 6816 // * Decimal radix 6817 // */ 6818 // private static final int DECIMAL_RADIX_ = 10; 6819 6820 /** 6821 * No break space code point 6822 */ 6823 private static final int NO_BREAK_SPACE_ = 0xA0; 6824 6825 /** 6826 * Figure space code point 6827 */ 6828 private static final int FIGURE_SPACE_ = 0x2007; 6829 6830 /** 6831 * Narrow no break space code point 6832 */ 6833 private static final int NARROW_NO_BREAK_SPACE_ = 0x202F; 6834 6835 /** 6836 * Ideographic number zero code point 6837 */ 6838 private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007; 6839 6840 /** 6841 * CJK Ideograph, First code point 6842 */ 6843 private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00; 6844 6845 /** 6846 * CJK Ideograph, Second code point 6847 */ 6848 private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c; 6849 6850 /** 6851 * CJK Ideograph, Third code point 6852 */ 6853 private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09; 6854 6855 /** 6856 * CJK Ideograph, Fourth code point 6857 */ 6858 private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db; 6859 6860 /** 6861 * CJK Ideograph, FIFTH code point 6862 */ 6863 private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94; 6864 6865 /** 6866 * CJK Ideograph, Sixth code point 6867 */ 6868 private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d; 6869 6870 /** 6871 * CJK Ideograph, Seventh code point 6872 */ 6873 private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03; 6874 6875 /** 6876 * CJK Ideograph, Eighth code point 6877 */ 6878 private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b; 6879 6880 /** 6881 * CJK Ideograph, Nineth code point 6882 */ 6883 private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d; 6884 6885 /** 6886 * Application Program command code point 6887 */ 6888 private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F; 6889 6890 /** 6891 * Unit separator code point 6892 */ 6893 private static final int UNIT_SEPARATOR_ = 0x001F; 6894 6895 /** 6896 * Delete code point 6897 */ 6898 private static final int DELETE_ = 0x007F; 6899 6900 /** 6901 * Han digit characters 6902 */ 6903 private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_ = 0x96f6; 6904 private static final int CJK_IDEOGRAPH_COMPLEX_ONE_ = 0x58f9; 6905 private static final int CJK_IDEOGRAPH_COMPLEX_TWO_ = 0x8cb3; 6906 private static final int CJK_IDEOGRAPH_COMPLEX_THREE_ = 0x53c3; 6907 private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_ = 0x8086; 6908 private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_ = 0x4f0d; 6909 private static final int CJK_IDEOGRAPH_COMPLEX_SIX_ = 0x9678; 6910 private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_ = 0x67d2; 6911 private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_ = 0x634c; 6912 private static final int CJK_IDEOGRAPH_COMPLEX_NINE_ = 0x7396; 6913 private static final int CJK_IDEOGRAPH_TEN_ = 0x5341; 6914 private static final int CJK_IDEOGRAPH_COMPLEX_TEN_ = 0x62fe; 6915 private static final int CJK_IDEOGRAPH_HUNDRED_ = 0x767e; 6916 private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_ = 0x4f70; 6917 private static final int CJK_IDEOGRAPH_THOUSAND_ = 0x5343; 6918 private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf; 6919 private static final int CJK_IDEOGRAPH_TEN_THOUSAND_ = 0x824c; 6920 private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_ = 0x5104; 6921 6922 // private constructor ----------------------------------------------- 6923 ///CLOVER:OFF 6924 /** 6925 * Private constructor to prevent instantiation 6926 */ UCharacter()6927 private UCharacter() 6928 { 6929 } 6930 ///CLOVER:ON 6931 } 6932