1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /** 4 ******************************************************************************* 5 * Copyright (C) 1996-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.lang; 11 12 import java.lang.ref.SoftReference; 13 import java.util.HashMap; 14 import java.util.Iterator; 15 import java.util.Locale; 16 import java.util.Map; 17 18 import com.ibm.icu.impl.CaseMapImpl; 19 import com.ibm.icu.impl.EmojiProps; 20 import com.ibm.icu.impl.IllegalIcuArgumentException; 21 import com.ibm.icu.impl.Trie2; 22 import com.ibm.icu.impl.UBiDiProps; 23 import com.ibm.icu.impl.UCaseProps; 24 import com.ibm.icu.impl.UCharacterName; 25 import com.ibm.icu.impl.UCharacterNameChoice; 26 import com.ibm.icu.impl.UCharacterProperty; 27 import com.ibm.icu.impl.UCharacterUtility; 28 import com.ibm.icu.impl.UPropertyAliases; 29 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; 30 import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection; 31 import com.ibm.icu.text.BreakIterator; 32 import com.ibm.icu.text.Normalizer2; 33 import com.ibm.icu.util.RangeValueIterator; 34 import com.ibm.icu.util.ULocale; 35 import com.ibm.icu.util.ValueIterator; 36 import com.ibm.icu.util.VersionInfo; 37 38 /** 39 * {@icuenhanced java.lang.Character}.{@icu _usage_} 40 * 41 * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class. 42 * These extensions provide support for more Unicode properties. 43 * Each ICU release supports the latest version of Unicode available at that time. 44 * 45 * <p>For some time before Java 5 added support for supplementary Unicode code points, 46 * The ICU UCharacter class and many other ICU classes already supported them. 47 * Some UCharacter methods and constants were widened slightly differently than 48 * how the Character class methods and constants were widened later. 49 * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF, 50 * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF. 51 * 52 * <p>Code points are represented in these API using ints. While it would be 53 * more convenient in Java to have a separate primitive datatype for them, 54 * ints suffice in the meantime. 55 * 56 * <p>To use this class please add the jar file name icu4j.jar to the 57 * class path, since it contains data files which supply the information used 58 * by this file.<br> 59 * E.g. In Windows <br> 60 * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br> 61 * Otherwise, another method would be to copy the files uprops.dat and 62 * unames.icu from the icu4j source subdirectory 63 * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory 64 * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>. 65 * 66 * <p>Aside from the additions for UTF-16 support, and the updated Unicode 67 * properties, the main differences between UCharacter and Character are: 68 * <ul> 69 * <li> UCharacter is not designed to be a char wrapper and does not have 70 * APIs to which involves management of that single char.<br> 71 * These include: 72 * <ul> 73 * <li> char charValue(), 74 * <li> int compareTo(java.lang.Character, java.lang.Character), etc. 75 * </ul> 76 * <li> UCharacter does not include Character APIs that are deprecated, nor 77 * does it include the Java-specific character information, such as 78 * boolean isJavaIdentifierPart(char ch). 79 * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric 80 * values '10' - '35'. UCharacter also does this in digit and 81 * getNumericValue, to adhere to the java semantics of these 82 * methods. New methods unicodeDigit, and 83 * getUnicodeNumericValue do not treat the above code points 84 * as having numeric values. This is a semantic change from ICU4J 1.3.1. 85 * </ul> 86 * <p> 87 * Further detail on differences can be determined using the program 88 * <a href= 89 * "https://github.com/unicode-org/icu/blob/main/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java"> 90 * com.ibm.icu.dev.test.lang.UCharacterCompare</a> 91 * <p> 92 * In addition to Java compatibility functions, which calculate derived properties, 93 * this API provides low-level access to the Unicode Character Database. 94 * <p> 95 * Unicode assigns each code point (not just assigned character) values for 96 * many properties. 97 * Most of them are simple boolean flags, or constants from a small enumerated list. 98 * For some properties, values are strings or other relatively more complex types. 99 * <p> 100 * For more information see 101 * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a> 102 * (http://www.unicode.org/ucd/) 103 * and the <a href="https://unicode-org.github.io/icu/userguide/strings/properties">ICU 104 * User Guide chapter on Properties</a> 105 * (https://unicode-org.github.io/icu/userguide/strings/properties). 106 * <p> 107 * There are also functions that provide easy migration from C/POSIX functions 108 * like isblank(). Their use is generally discouraged because the C/POSIX 109 * standards do not define their semantics beyond the ASCII range, which means 110 * that different implementations exhibit very different behavior. 111 * Instead, Unicode properties should be used directly. 112 * <p> 113 * There are also only a few, broad C/POSIX character classes, and they tend 114 * to be used for conflicting purposes. For example, the "isalpha()" class 115 * is sometimes used to determine word boundaries, while a more sophisticated 116 * approach would at least distinguish initial letters from continuation 117 * characters (the latter including combining marks). 118 * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) 119 * Another example: There is no "istitle()" class for titlecase characters. 120 * <p> 121 * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. 122 * ICU implements them according to the Standard Recommendations in 123 * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions 124 * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). 125 * <p> 126 * API access for C/POSIX character classes is as follows: 127 * <pre>{@code 128 * - alpha: isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC) 129 * - lower: isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE) 130 * - upper: isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE) 131 * - punct: ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)| 132 * (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)| 133 * (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0 134 * - digit: isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER 135 * - xdigit: hasBinaryProperty(c, UProperty.POSIX_XDIGIT) 136 * - alnum: hasBinaryProperty(c, UProperty.POSIX_ALNUM) 137 * - space: isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE) 138 * - blank: hasBinaryProperty(c, UProperty.POSIX_BLANK) 139 * - cntrl: getType(c)==CONTROL 140 * - graph: hasBinaryProperty(c, UProperty.POSIX_GRAPH) 141 * - print: hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre> 142 * <p> 143 * The C/POSIX character classes are also available in UnicodeSet patterns, 144 * using patterns like [:graph:] or \p{graph}. 145 * 146 * <p>{@icunote} There are several ICU (and Java) whitespace functions. 147 * Comparison:<ul> 148 * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; 149 * most of general categories "Z" (separators) + most whitespace ISO controls 150 * (including no-break spaces, but excluding IS1..IS4) 151 * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces 152 * <li> isSpaceChar: just Z (including no-break spaces)</ul> 153 * 154 * <p> 155 * This class is not subclassable. 156 * 157 * @author Syn Wee Quek 158 * @stable ICU 2.1 159 * @see com.ibm.icu.lang.UCharacterEnums 160 */ 161 162 public final class UCharacter implements ECharacterCategory, ECharacterDirection 163 { 164 /** 165 * Lead surrogate bitmask 166 */ 167 private static final int LEAD_SURROGATE_BITMASK = 0xFFFFFC00; 168 169 /** 170 * Trail surrogate bitmask 171 */ 172 private static final int TRAIL_SURROGATE_BITMASK = 0xFFFFFC00; 173 174 /** 175 * Lead surrogate bits 176 */ 177 private static final int LEAD_SURROGATE_BITS = 0xD800; 178 179 /** 180 * Trail surrogate bits 181 */ 182 private static final int TRAIL_SURROGATE_BITS = 0xDC00; 183 184 private static final int U16_SURROGATE_OFFSET = ((0xd800 << 10) + 0xdc00 - 0x10000); 185 186 // public inner classes ---------------------------------------------- 187 188 /** 189 * {@icuenhanced java.lang.Character.UnicodeBlock}.{@icu _usage_} 190 * 191 * A family of character subsets representing the character blocks in the 192 * Unicode specification, generated from Unicode Data file Blocks.txt. 193 * Character blocks generally define characters used for a specific script 194 * or purpose. A character is contained by at most one Unicode block. 195 * 196 * {@icunote} All fields named XXX_ID are specific to ICU. 197 * 198 * @stable ICU 2.4 199 */ 200 public static final class UnicodeBlock extends Character.Subset 201 { 202 // block id corresponding to icu4c ----------------------------------- 203 204 /** 205 * @stable ICU 2.4 206 */ 207 public static final int INVALID_CODE_ID = -1; 208 /** 209 * @stable ICU 2.4 210 */ 211 public static final int BASIC_LATIN_ID = 1; 212 /** 213 * @stable ICU 2.4 214 */ 215 public static final int LATIN_1_SUPPLEMENT_ID = 2; 216 /** 217 * @stable ICU 2.4 218 */ 219 public static final int LATIN_EXTENDED_A_ID = 3; 220 /** 221 * @stable ICU 2.4 222 */ 223 public static final int LATIN_EXTENDED_B_ID = 4; 224 /** 225 * @stable ICU 2.4 226 */ 227 public static final int IPA_EXTENSIONS_ID = 5; 228 /** 229 * @stable ICU 2.4 230 */ 231 public static final int SPACING_MODIFIER_LETTERS_ID = 6; 232 /** 233 * @stable ICU 2.4 234 */ 235 public static final int COMBINING_DIACRITICAL_MARKS_ID = 7; 236 /** 237 * Unicode 3.2 renames this block to "Greek and Coptic". 238 * @stable ICU 2.4 239 */ 240 public static final int GREEK_ID = 8; 241 /** 242 * @stable ICU 2.4 243 */ 244 public static final int CYRILLIC_ID = 9; 245 /** 246 * @stable ICU 2.4 247 */ 248 public static final int ARMENIAN_ID = 10; 249 /** 250 * @stable ICU 2.4 251 */ 252 public static final int HEBREW_ID = 11; 253 /** 254 * @stable ICU 2.4 255 */ 256 public static final int ARABIC_ID = 12; 257 /** 258 * @stable ICU 2.4 259 */ 260 public static final int SYRIAC_ID = 13; 261 /** 262 * @stable ICU 2.4 263 */ 264 public static final int THAANA_ID = 14; 265 /** 266 * @stable ICU 2.4 267 */ 268 public static final int DEVANAGARI_ID = 15; 269 /** 270 * @stable ICU 2.4 271 */ 272 public static final int BENGALI_ID = 16; 273 /** 274 * @stable ICU 2.4 275 */ 276 public static final int GURMUKHI_ID = 17; 277 /** 278 * @stable ICU 2.4 279 */ 280 public static final int GUJARATI_ID = 18; 281 /** 282 * @stable ICU 2.4 283 */ 284 public static final int ORIYA_ID = 19; 285 /** 286 * @stable ICU 2.4 287 */ 288 public static final int TAMIL_ID = 20; 289 /** 290 * @stable ICU 2.4 291 */ 292 public static final int TELUGU_ID = 21; 293 /** 294 * @stable ICU 2.4 295 */ 296 public static final int KANNADA_ID = 22; 297 /** 298 * @stable ICU 2.4 299 */ 300 public static final int MALAYALAM_ID = 23; 301 /** 302 * @stable ICU 2.4 303 */ 304 public static final int SINHALA_ID = 24; 305 /** 306 * @stable ICU 2.4 307 */ 308 public static final int THAI_ID = 25; 309 /** 310 * @stable ICU 2.4 311 */ 312 public static final int LAO_ID = 26; 313 /** 314 * @stable ICU 2.4 315 */ 316 public static final int TIBETAN_ID = 27; 317 /** 318 * @stable ICU 2.4 319 */ 320 public static final int MYANMAR_ID = 28; 321 /** 322 * @stable ICU 2.4 323 */ 324 public static final int GEORGIAN_ID = 29; 325 /** 326 * @stable ICU 2.4 327 */ 328 public static final int HANGUL_JAMO_ID = 30; 329 /** 330 * @stable ICU 2.4 331 */ 332 public static final int ETHIOPIC_ID = 31; 333 /** 334 * @stable ICU 2.4 335 */ 336 public static final int CHEROKEE_ID = 32; 337 /** 338 * @stable ICU 2.4 339 */ 340 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33; 341 /** 342 * @stable ICU 2.4 343 */ 344 public static final int OGHAM_ID = 34; 345 /** 346 * @stable ICU 2.4 347 */ 348 public static final int RUNIC_ID = 35; 349 /** 350 * @stable ICU 2.4 351 */ 352 public static final int KHMER_ID = 36; 353 /** 354 * @stable ICU 2.4 355 */ 356 public static final int MONGOLIAN_ID = 37; 357 /** 358 * @stable ICU 2.4 359 */ 360 public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38; 361 /** 362 * @stable ICU 2.4 363 */ 364 public static final int GREEK_EXTENDED_ID = 39; 365 /** 366 * @stable ICU 2.4 367 */ 368 public static final int GENERAL_PUNCTUATION_ID = 40; 369 /** 370 * @stable ICU 2.4 371 */ 372 public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41; 373 /** 374 * @stable ICU 2.4 375 */ 376 public static final int CURRENCY_SYMBOLS_ID = 42; 377 /** 378 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 379 * Symbols". 380 * @stable ICU 2.4 381 */ 382 public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43; 383 /** 384 * @stable ICU 2.4 385 */ 386 public static final int LETTERLIKE_SYMBOLS_ID = 44; 387 /** 388 * @stable ICU 2.4 389 */ 390 public static final int NUMBER_FORMS_ID = 45; 391 /** 392 * @stable ICU 2.4 393 */ 394 public static final int ARROWS_ID = 46; 395 /** 396 * @stable ICU 2.4 397 */ 398 public static final int MATHEMATICAL_OPERATORS_ID = 47; 399 /** 400 * @stable ICU 2.4 401 */ 402 public static final int MISCELLANEOUS_TECHNICAL_ID = 48; 403 /** 404 * @stable ICU 2.4 405 */ 406 public static final int CONTROL_PICTURES_ID = 49; 407 /** 408 * @stable ICU 2.4 409 */ 410 public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50; 411 /** 412 * @stable ICU 2.4 413 */ 414 public static final int ENCLOSED_ALPHANUMERICS_ID = 51; 415 /** 416 * @stable ICU 2.4 417 */ 418 public static final int BOX_DRAWING_ID = 52; 419 /** 420 * @stable ICU 2.4 421 */ 422 public static final int BLOCK_ELEMENTS_ID = 53; 423 /** 424 * @stable ICU 2.4 425 */ 426 public static final int GEOMETRIC_SHAPES_ID = 54; 427 /** 428 * @stable ICU 2.4 429 */ 430 public static final int MISCELLANEOUS_SYMBOLS_ID = 55; 431 /** 432 * @stable ICU 2.4 433 */ 434 public static final int DINGBATS_ID = 56; 435 /** 436 * @stable ICU 2.4 437 */ 438 public static final int BRAILLE_PATTERNS_ID = 57; 439 /** 440 * @stable ICU 2.4 441 */ 442 public static final int CJK_RADICALS_SUPPLEMENT_ID = 58; 443 /** 444 * @stable ICU 2.4 445 */ 446 public static final int KANGXI_RADICALS_ID = 59; 447 /** 448 * @stable ICU 2.4 449 */ 450 public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60; 451 /** 452 * @stable ICU 2.4 453 */ 454 public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61; 455 /** 456 * @stable ICU 2.4 457 */ 458 public static final int HIRAGANA_ID = 62; 459 /** 460 * @stable ICU 2.4 461 */ 462 public static final int KATAKANA_ID = 63; 463 /** 464 * @stable ICU 2.4 465 */ 466 public static final int BOPOMOFO_ID = 64; 467 /** 468 * @stable ICU 2.4 469 */ 470 public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65; 471 /** 472 * @stable ICU 2.4 473 */ 474 public static final int KANBUN_ID = 66; 475 /** 476 * @stable ICU 2.4 477 */ 478 public static final int BOPOMOFO_EXTENDED_ID = 67; 479 /** 480 * @stable ICU 2.4 481 */ 482 public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68; 483 /** 484 * @stable ICU 2.4 485 */ 486 public static final int CJK_COMPATIBILITY_ID = 69; 487 /** 488 * @stable ICU 2.4 489 */ 490 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70; 491 /** 492 * @stable ICU 2.4 493 */ 494 public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71; 495 /** 496 * @stable ICU 2.4 497 */ 498 public static final int YI_SYLLABLES_ID = 72; 499 /** 500 * @stable ICU 2.4 501 */ 502 public static final int YI_RADICALS_ID = 73; 503 /** 504 * @stable ICU 2.4 505 */ 506 public static final int HANGUL_SYLLABLES_ID = 74; 507 /** 508 * @stable ICU 2.4 509 */ 510 public static final int HIGH_SURROGATES_ID = 75; 511 /** 512 * @stable ICU 2.4 513 */ 514 public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76; 515 /** 516 * @stable ICU 2.4 517 */ 518 public static final int LOW_SURROGATES_ID = 77; 519 /** 520 * Same as public static final int PRIVATE_USE. 521 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 522 * and multiple code point ranges had this block. 523 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 524 * and adds separate blocks for the supplementary PUAs. 525 * @stable ICU 2.4 526 */ 527 public static final int PRIVATE_USE_AREA_ID = 78; 528 /** 529 * Same as public static final int PRIVATE_USE_AREA. 530 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 531 * and multiple code point ranges had this block. 532 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 533 * and adds separate blocks for the supplementary PUAs. 534 * @stable ICU 2.4 535 */ 536 public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID; 537 /** 538 * @stable ICU 2.4 539 */ 540 public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79; 541 /** 542 * @stable ICU 2.4 543 */ 544 public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80; 545 /** 546 * @stable ICU 2.4 547 */ 548 public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81; 549 /** 550 * @stable ICU 2.4 551 */ 552 public static final int COMBINING_HALF_MARKS_ID = 82; 553 /** 554 * @stable ICU 2.4 555 */ 556 public static final int CJK_COMPATIBILITY_FORMS_ID = 83; 557 /** 558 * @stable ICU 2.4 559 */ 560 public static final int SMALL_FORM_VARIANTS_ID = 84; 561 /** 562 * @stable ICU 2.4 563 */ 564 public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85; 565 /** 566 * @stable ICU 2.4 567 */ 568 public static final int SPECIALS_ID = 86; 569 /** 570 * @stable ICU 2.4 571 */ 572 public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87; 573 /** 574 * @stable ICU 2.4 575 */ 576 public static final int OLD_ITALIC_ID = 88; 577 /** 578 * @stable ICU 2.4 579 */ 580 public static final int GOTHIC_ID = 89; 581 /** 582 * @stable ICU 2.4 583 */ 584 public static final int DESERET_ID = 90; 585 /** 586 * @stable ICU 2.4 587 */ 588 public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91; 589 /** 590 * @stable ICU 2.4 591 */ 592 public static final int MUSICAL_SYMBOLS_ID = 92; 593 /** 594 * @stable ICU 2.4 595 */ 596 public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93; 597 /** 598 * @stable ICU 2.4 599 */ 600 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94; 601 /** 602 * @stable ICU 2.4 603 */ 604 public static final int 605 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95; 606 /** 607 * @stable ICU 2.4 608 */ 609 public static final int TAGS_ID = 96; 610 611 // New blocks in Unicode 3.2 612 613 /** 614 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 615 * @stable ICU 2.4 616 */ 617 public static final int CYRILLIC_SUPPLEMENTARY_ID = 97; 618 /** 619 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 620 * @stable ICU 3.0 621 */ 622 623 public static final int CYRILLIC_SUPPLEMENT_ID = 97; 624 /** 625 * @stable ICU 2.4 626 */ 627 public static final int TAGALOG_ID = 98; 628 /** 629 * @stable ICU 2.4 630 */ 631 public static final int HANUNOO_ID = 99; 632 /** 633 * @stable ICU 2.4 634 */ 635 public static final int BUHID_ID = 100; 636 /** 637 * @stable ICU 2.4 638 */ 639 public static final int TAGBANWA_ID = 101; 640 /** 641 * @stable ICU 2.4 642 */ 643 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102; 644 /** 645 * @stable ICU 2.4 646 */ 647 public static final int SUPPLEMENTAL_ARROWS_A_ID = 103; 648 /** 649 * @stable ICU 2.4 650 */ 651 public static final int SUPPLEMENTAL_ARROWS_B_ID = 104; 652 /** 653 * @stable ICU 2.4 654 */ 655 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105; 656 /** 657 * @stable ICU 2.4 658 */ 659 public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106; 660 /** 661 * @stable ICU 2.4 662 */ 663 public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107; 664 /** 665 * @stable ICU 2.4 666 */ 667 public static final int VARIATION_SELECTORS_ID = 108; 668 /** 669 * @stable ICU 2.4 670 */ 671 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109; 672 /** 673 * @stable ICU 2.4 674 */ 675 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110; 676 677 /** 678 * @stable ICU 2.6 679 */ 680 public static final int LIMBU_ID = 111; /*[1900]*/ 681 /** 682 * @stable ICU 2.6 683 */ 684 public static final int TAI_LE_ID = 112; /*[1950]*/ 685 /** 686 * @stable ICU 2.6 687 */ 688 public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/ 689 /** 690 * @stable ICU 2.6 691 */ 692 public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/ 693 /** 694 * @stable ICU 2.6 695 */ 696 public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/ 697 /** 698 * @stable ICU 2.6 699 */ 700 public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/ 701 /** 702 * @stable ICU 2.6 703 */ 704 public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/ 705 /** 706 * @stable ICU 2.6 707 */ 708 public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/ 709 /** 710 * @stable ICU 2.6 711 */ 712 public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/ 713 /** 714 * @stable ICU 2.6 715 */ 716 public static final int UGARITIC_ID = 120; /*[10380]*/ 717 /** 718 * @stable ICU 2.6 719 */ 720 public static final int SHAVIAN_ID = 121; /*[10450]*/ 721 /** 722 * @stable ICU 2.6 723 */ 724 public static final int OSMANYA_ID = 122; /*[10480]*/ 725 /** 726 * @stable ICU 2.6 727 */ 728 public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/ 729 /** 730 * @stable ICU 2.6 731 */ 732 public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/ 733 /** 734 * @stable ICU 2.6 735 */ 736 public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/ 737 738 /* New blocks in Unicode 4.1 */ 739 740 /** 741 * @stable ICU 3.4 742 */ 743 public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/ 744 745 /** 746 * @stable ICU 3.4 747 */ 748 public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/ 749 750 /** 751 * @stable ICU 3.4 752 */ 753 public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/ 754 755 /** 756 * @stable ICU 3.4 757 */ 758 public static final int BUGINESE_ID = 129; /*[1A00]*/ 759 760 /** 761 * @stable ICU 3.4 762 */ 763 public static final int CJK_STROKES_ID = 130; /*[31C0]*/ 764 765 /** 766 * @stable ICU 3.4 767 */ 768 public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/ 769 770 /** 771 * @stable ICU 3.4 772 */ 773 public static final int COPTIC_ID = 132; /*[2C80]*/ 774 775 /** 776 * @stable ICU 3.4 777 */ 778 public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/ 779 780 /** 781 * @stable ICU 3.4 782 */ 783 public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/ 784 785 /** 786 * @stable ICU 3.4 787 */ 788 public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/ 789 790 /** 791 * @stable ICU 3.4 792 */ 793 public static final int GLAGOLITIC_ID = 136; /*[2C00]*/ 794 795 /** 796 * @stable ICU 3.4 797 */ 798 public static final int KHAROSHTHI_ID = 137; /*[10A00]*/ 799 800 /** 801 * @stable ICU 3.4 802 */ 803 public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/ 804 805 /** 806 * @stable ICU 3.4 807 */ 808 public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/ 809 810 /** 811 * @stable ICU 3.4 812 */ 813 public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/ 814 815 /** 816 * @stable ICU 3.4 817 */ 818 public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/ 819 820 /** 821 * @stable ICU 3.4 822 */ 823 public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/ 824 825 /** 826 * @stable ICU 3.4 827 */ 828 public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/ 829 830 /** 831 * @stable ICU 3.4 832 */ 833 public static final int TIFINAGH_ID = 144; /*[2D30]*/ 834 835 /** 836 * @stable ICU 3.4 837 */ 838 public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/ 839 840 /* New blocks in Unicode 5.0 */ 841 842 /** 843 * @stable ICU 3.6 844 */ 845 public static final int NKO_ID = 146; /*[07C0]*/ 846 /** 847 * @stable ICU 3.6 848 */ 849 public static final int BALINESE_ID = 147; /*[1B00]*/ 850 /** 851 * @stable ICU 3.6 852 */ 853 public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/ 854 /** 855 * @stable ICU 3.6 856 */ 857 public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/ 858 /** 859 * @stable ICU 3.6 860 */ 861 public static final int PHAGS_PA_ID = 150; /*[A840]*/ 862 /** 863 * @stable ICU 3.6 864 */ 865 public static final int PHOENICIAN_ID = 151; /*[10900]*/ 866 /** 867 * @stable ICU 3.6 868 */ 869 public static final int CUNEIFORM_ID = 152; /*[12000]*/ 870 /** 871 * @stable ICU 3.6 872 */ 873 public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/ 874 /** 875 * @stable ICU 3.6 876 */ 877 public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/ 878 879 /** 880 * @stable ICU 4.0 881 */ 882 public static final int SUNDANESE_ID = 155; /* [1B80] */ 883 884 /** 885 * @stable ICU 4.0 886 */ 887 public static final int LEPCHA_ID = 156; /* [1C00] */ 888 889 /** 890 * @stable ICU 4.0 891 */ 892 public static final int OL_CHIKI_ID = 157; /* [1C50] */ 893 894 /** 895 * @stable ICU 4.0 896 */ 897 public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */ 898 899 /** 900 * @stable ICU 4.0 901 */ 902 public static final int VAI_ID = 159; /* [A500] */ 903 904 /** 905 * @stable ICU 4.0 906 */ 907 public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */ 908 909 /** 910 * @stable ICU 4.0 911 */ 912 public static final int SAURASHTRA_ID = 161; /* [A880] */ 913 914 /** 915 * @stable ICU 4.0 916 */ 917 public static final int KAYAH_LI_ID = 162; /* [A900] */ 918 919 /** 920 * @stable ICU 4.0 921 */ 922 public static final int REJANG_ID = 163; /* [A930] */ 923 924 /** 925 * @stable ICU 4.0 926 */ 927 public static final int CHAM_ID = 164; /* [AA00] */ 928 929 /** 930 * @stable ICU 4.0 931 */ 932 public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */ 933 934 /** 935 * @stable ICU 4.0 936 */ 937 public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */ 938 939 /** 940 * @stable ICU 4.0 941 */ 942 public static final int LYCIAN_ID = 167; /* [10280] */ 943 944 /** 945 * @stable ICU 4.0 946 */ 947 public static final int CARIAN_ID = 168; /* [102A0] */ 948 949 /** 950 * @stable ICU 4.0 951 */ 952 public static final int LYDIAN_ID = 169; /* [10920] */ 953 954 /** 955 * @stable ICU 4.0 956 */ 957 public static final int MAHJONG_TILES_ID = 170; /* [1F000] */ 958 959 /** 960 * @stable ICU 4.0 961 */ 962 public static final int DOMINO_TILES_ID = 171; /* [1F030] */ 963 964 /* New blocks in Unicode 5.2 */ 965 966 /** @stable ICU 4.4 */ 967 public static final int SAMARITAN_ID = 172; /*[0800]*/ 968 /** @stable ICU 4.4 */ 969 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/ 970 /** @stable ICU 4.4 */ 971 public static final int TAI_THAM_ID = 174; /*[1A20]*/ 972 /** @stable ICU 4.4 */ 973 public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/ 974 /** @stable ICU 4.4 */ 975 public static final int LISU_ID = 176; /*[A4D0]*/ 976 /** @stable ICU 4.4 */ 977 public static final int BAMUM_ID = 177; /*[A6A0]*/ 978 /** @stable ICU 4.4 */ 979 public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/ 980 /** @stable ICU 4.4 */ 981 public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/ 982 /** @stable ICU 4.4 */ 983 public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/ 984 /** @stable ICU 4.4 */ 985 public static final int JAVANESE_ID = 181; /*[A980]*/ 986 /** @stable ICU 4.4 */ 987 public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/ 988 /** @stable ICU 4.4 */ 989 public static final int TAI_VIET_ID = 183; /*[AA80]*/ 990 /** @stable ICU 4.4 */ 991 public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/ 992 /** @stable ICU 4.4 */ 993 public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/ 994 /** @stable ICU 4.4 */ 995 public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/ 996 /** @stable ICU 4.4 */ 997 public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/ 998 /** @stable ICU 4.4 */ 999 public static final int AVESTAN_ID = 188; /*[10B00]*/ 1000 /** @stable ICU 4.4 */ 1001 public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/ 1002 /** @stable ICU 4.4 */ 1003 public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/ 1004 /** @stable ICU 4.4 */ 1005 public static final int OLD_TURKIC_ID = 191; /*[10C00]*/ 1006 /** @stable ICU 4.4 */ 1007 public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/ 1008 /** @stable ICU 4.4 */ 1009 public static final int KAITHI_ID = 193; /*[11080]*/ 1010 /** @stable ICU 4.4 */ 1011 public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/ 1012 /** @stable ICU 4.4 */ 1013 public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/ 1014 /** @stable ICU 4.4 */ 1015 public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/ 1016 /** @stable ICU 4.4 */ 1017 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/ 1018 1019 /* New blocks in Unicode 6.0 */ 1020 1021 /** @stable ICU 4.6 */ 1022 public static final int MANDAIC_ID = 198; /*[0840]*/ 1023 /** @stable ICU 4.6 */ 1024 public static final int BATAK_ID = 199; /*[1BC0]*/ 1025 /** @stable ICU 4.6 */ 1026 public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/ 1027 /** @stable ICU 4.6 */ 1028 public static final int BRAHMI_ID = 201; /*[11000]*/ 1029 /** @stable ICU 4.6 */ 1030 public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/ 1031 /** @stable ICU 4.6 */ 1032 public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/ 1033 /** @stable ICU 4.6 */ 1034 public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/ 1035 /** @stable ICU 4.6 */ 1036 public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/ 1037 /** @stable ICU 4.6 */ 1038 public static final int EMOTICONS_ID = 206; /*[1F600]*/ 1039 /** @stable ICU 4.6 */ 1040 public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/ 1041 /** @stable ICU 4.6 */ 1042 public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/ 1043 /** @stable ICU 4.6 */ 1044 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/ 1045 1046 /* New blocks in Unicode 6.1 */ 1047 1048 /** @stable ICU 49 */ 1049 public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/ 1050 /** @stable ICU 49 */ 1051 public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/ 1052 /** @stable ICU 49 */ 1053 public static final int CHAKMA_ID = 212; /*[11100]*/ 1054 /** @stable ICU 49 */ 1055 public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/ 1056 /** @stable ICU 49 */ 1057 public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/ 1058 /** @stable ICU 49 */ 1059 public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/ 1060 /** @stable ICU 49 */ 1061 public static final int MIAO_ID = 216; /*[16F00]*/ 1062 /** @stable ICU 49 */ 1063 public static final int SHARADA_ID = 217; /*[11180]*/ 1064 /** @stable ICU 49 */ 1065 public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/ 1066 /** @stable ICU 49 */ 1067 public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/ 1068 /** @stable ICU 49 */ 1069 public static final int TAKRI_ID = 220; /*[11680]*/ 1070 1071 /* New blocks in Unicode 7.0 */ 1072 1073 /** @stable ICU 54 */ 1074 public static final int BASSA_VAH_ID = 221; /*[16AD0]*/ 1075 /** @stable ICU 54 */ 1076 public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/ 1077 /** @stable ICU 54 */ 1078 public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/ 1079 /** @stable ICU 54 */ 1080 public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/ 1081 /** @stable ICU 54 */ 1082 public static final int DUPLOYAN_ID = 225; /*[1BC00]*/ 1083 /** @stable ICU 54 */ 1084 public static final int ELBASAN_ID = 226; /*[10500]*/ 1085 /** @stable ICU 54 */ 1086 public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/ 1087 /** @stable ICU 54 */ 1088 public static final int GRANTHA_ID = 228; /*[11300]*/ 1089 /** @stable ICU 54 */ 1090 public static final int KHOJKI_ID = 229; /*[11200]*/ 1091 /** @stable ICU 54 */ 1092 public static final int KHUDAWADI_ID = 230; /*[112B0]*/ 1093 /** @stable ICU 54 */ 1094 public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/ 1095 /** @stable ICU 54 */ 1096 public static final int LINEAR_A_ID = 232; /*[10600]*/ 1097 /** @stable ICU 54 */ 1098 public static final int MAHAJANI_ID = 233; /*[11150]*/ 1099 /** @stable ICU 54 */ 1100 public static final int MANICHAEAN_ID = 234; /*[10AC0]*/ 1101 /** @stable ICU 54 */ 1102 public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/ 1103 /** @stable ICU 54 */ 1104 public static final int MODI_ID = 236; /*[11600]*/ 1105 /** @stable ICU 54 */ 1106 public static final int MRO_ID = 237; /*[16A40]*/ 1107 /** @stable ICU 54 */ 1108 public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/ 1109 /** @stable ICU 54 */ 1110 public static final int NABATAEAN_ID = 239; /*[10880]*/ 1111 /** @stable ICU 54 */ 1112 public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/ 1113 /** @stable ICU 54 */ 1114 public static final int OLD_PERMIC_ID = 241; /*[10350]*/ 1115 /** @stable ICU 54 */ 1116 public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/ 1117 /** @stable ICU 54 */ 1118 public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/ 1119 /** @stable ICU 54 */ 1120 public static final int PALMYRENE_ID = 244; /*[10860]*/ 1121 /** @stable ICU 54 */ 1122 public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/ 1123 /** @stable ICU 54 */ 1124 public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/ 1125 /** @stable ICU 54 */ 1126 public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/ 1127 /** @stable ICU 54 */ 1128 public static final int SIDDHAM_ID = 248; /*[11580]*/ 1129 /** @stable ICU 54 */ 1130 public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/ 1131 /** @stable ICU 54 */ 1132 public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/ 1133 /** @stable ICU 54 */ 1134 public static final int TIRHUTA_ID = 251; /*[11480]*/ 1135 /** @stable ICU 54 */ 1136 public static final int WARANG_CITI_ID = 252; /*[118A0]*/ 1137 1138 /* New blocks in Unicode 8.0 */ 1139 1140 /** @stable ICU 56 */ 1141 public static final int AHOM_ID = 253; /*[11700]*/ 1142 /** @stable ICU 56 */ 1143 public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/ 1144 /** @stable ICU 56 */ 1145 public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/ 1146 /** @stable ICU 56 */ 1147 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/ 1148 /** @stable ICU 56 */ 1149 public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/ 1150 /** @stable ICU 56 */ 1151 public static final int HATRAN_ID = 258; /*[108E0]*/ 1152 /** @stable ICU 56 */ 1153 public static final int MULTANI_ID = 259; /*[11280]*/ 1154 /** @stable ICU 56 */ 1155 public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/ 1156 /** @stable ICU 56 */ 1157 public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/ 1158 /** @stable ICU 56 */ 1159 public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/ 1160 1161 /* New blocks in Unicode 9.0 */ 1162 1163 /** @stable ICU 58 */ 1164 public static final int ADLAM_ID = 263; /*[1E900]*/ 1165 /** @stable ICU 58 */ 1166 public static final int BHAIKSUKI_ID = 264; /*[11C00]*/ 1167 /** @stable ICU 58 */ 1168 public static final int CYRILLIC_EXTENDED_C_ID = 265; /*[1C80]*/ 1169 /** @stable ICU 58 */ 1170 public static final int GLAGOLITIC_SUPPLEMENT_ID = 266; /*[1E000]*/ 1171 /** @stable ICU 58 */ 1172 public static final int IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID = 267; /*[16FE0]*/ 1173 /** @stable ICU 58 */ 1174 public static final int MARCHEN_ID = 268; /*[11C70]*/ 1175 /** @stable ICU 58 */ 1176 public static final int MONGOLIAN_SUPPLEMENT_ID = 269; /*[11660]*/ 1177 /** @stable ICU 58 */ 1178 public static final int NEWA_ID = 270; /*[11400]*/ 1179 /** @stable ICU 58 */ 1180 public static final int OSAGE_ID = 271; /*[104B0]*/ 1181 /** @stable ICU 58 */ 1182 public static final int TANGUT_ID = 272; /*[17000]*/ 1183 /** @stable ICU 58 */ 1184 public static final int TANGUT_COMPONENTS_ID = 273; /*[18800]*/ 1185 1186 // New blocks in Unicode 10.0 1187 1188 /** @stable ICU 60 */ 1189 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID = 274; /*[2CEB0]*/ 1190 /** @stable ICU 60 */ 1191 public static final int KANA_EXTENDED_A_ID = 275; /*[1B100]*/ 1192 /** @stable ICU 60 */ 1193 public static final int MASARAM_GONDI_ID = 276; /*[11D00]*/ 1194 /** @stable ICU 60 */ 1195 public static final int NUSHU_ID = 277; /*[1B170]*/ 1196 /** @stable ICU 60 */ 1197 public static final int SOYOMBO_ID = 278; /*[11A50]*/ 1198 /** @stable ICU 60 */ 1199 public static final int SYRIAC_SUPPLEMENT_ID = 279; /*[0860]*/ 1200 /** @stable ICU 60 */ 1201 public static final int ZANABAZAR_SQUARE_ID = 280; /*[11A00]*/ 1202 1203 // New blocks in Unicode 11.0 1204 1205 /** @stable ICU 62 */ 1206 public static final int CHESS_SYMBOLS_ID = 281; /*[1FA00]*/ 1207 /** @stable ICU 62 */ 1208 public static final int DOGRA_ID = 282; /*[11800]*/ 1209 /** @stable ICU 62 */ 1210 public static final int GEORGIAN_EXTENDED_ID = 283; /*[1C90]*/ 1211 /** @stable ICU 62 */ 1212 public static final int GUNJALA_GONDI_ID = 284; /*[11D60]*/ 1213 /** @stable ICU 62 */ 1214 public static final int HANIFI_ROHINGYA_ID = 285; /*[10D00]*/ 1215 /** @stable ICU 62 */ 1216 public static final int INDIC_SIYAQ_NUMBERS_ID = 286; /*[1EC70]*/ 1217 /** @stable ICU 62 */ 1218 public static final int MAKASAR_ID = 287; /*[11EE0]*/ 1219 /** @stable ICU 62 */ 1220 public static final int MAYAN_NUMERALS_ID = 288; /*[1D2E0]*/ 1221 /** @stable ICU 62 */ 1222 public static final int MEDEFAIDRIN_ID = 289; /*[16E40]*/ 1223 /** @stable ICU 62 */ 1224 public static final int OLD_SOGDIAN_ID = 290; /*[10F00]*/ 1225 /** @stable ICU 62 */ 1226 public static final int SOGDIAN_ID = 291; /*[10F30]*/ 1227 1228 // New blocks in Unicode 12.0 1229 1230 /** @stable ICU 64 */ 1231 public static final int EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID = 292; /*[13430]*/ 1232 /** @stable ICU 64 */ 1233 public static final int ELYMAIC_ID = 293; /*[10FE0]*/ 1234 /** @stable ICU 64 */ 1235 public static final int NANDINAGARI_ID = 294; /*[119A0]*/ 1236 /** @stable ICU 64 */ 1237 public static final int NYIAKENG_PUACHUE_HMONG_ID = 295; /*[1E100]*/ 1238 /** @stable ICU 64 */ 1239 public static final int OTTOMAN_SIYAQ_NUMBERS_ID = 296; /*[1ED00]*/ 1240 /** @stable ICU 64 */ 1241 public static final int SMALL_KANA_EXTENSION_ID = 297; /*[1B130]*/ 1242 /** @stable ICU 64 */ 1243 public static final int SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID = 298; /*[1FA70]*/ 1244 /** @stable ICU 64 */ 1245 public static final int TAMIL_SUPPLEMENT_ID = 299; /*[11FC0]*/ 1246 /** @stable ICU 64 */ 1247 public static final int WANCHO_ID = 300; /*[1E2C0]*/ 1248 1249 // New blocks in Unicode 13.0 1250 1251 /** @stable ICU 66 */ 1252 public static final int CHORASMIAN_ID = 301; /*[10FB0]*/ 1253 /** @stable ICU 66 */ 1254 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID = 302; /*[30000]*/ 1255 /** @stable ICU 66 */ 1256 public static final int DIVES_AKURU_ID = 303; /*[11900]*/ 1257 /** @stable ICU 66 */ 1258 public static final int KHITAN_SMALL_SCRIPT_ID = 304; /*[18B00]*/ 1259 /** @stable ICU 66 */ 1260 public static final int LISU_SUPPLEMENT_ID = 305; /*[11FB0]*/ 1261 /** @stable ICU 66 */ 1262 public static final int SYMBOLS_FOR_LEGACY_COMPUTING_ID = 306; /*[1FB00]*/ 1263 /** @stable ICU 66 */ 1264 public static final int TANGUT_SUPPLEMENT_ID = 307; /*[18D00]*/ 1265 /** @stable ICU 66 */ 1266 public static final int YEZIDI_ID = 308; /*[10E80]*/ 1267 1268 // New blocks in Unicode 14.0 1269 1270 /** @stable ICU 70 */ 1271 public static final int ARABIC_EXTENDED_B_ID = 309; /*[0870]*/ 1272 /** @stable ICU 70 */ 1273 public static final int CYPRO_MINOAN_ID = 310; /*[12F90]*/ 1274 /** @stable ICU 70 */ 1275 public static final int ETHIOPIC_EXTENDED_B_ID = 311; /*[1E7E0]*/ 1276 /** @stable ICU 70 */ 1277 public static final int KANA_EXTENDED_B_ID = 312; /*[1AFF0]*/ 1278 /** @stable ICU 70 */ 1279 public static final int LATIN_EXTENDED_F_ID = 313; /*[10780]*/ 1280 /** @stable ICU 70 */ 1281 public static final int LATIN_EXTENDED_G_ID = 314; /*[1DF00]*/ 1282 /** @stable ICU 70 */ 1283 public static final int OLD_UYGHUR_ID = 315; /*[10F70]*/ 1284 /** @stable ICU 70 */ 1285 public static final int TANGSA_ID = 316; /*[16A70]*/ 1286 /** @stable ICU 70 */ 1287 public static final int TOTO_ID = 317; /*[1E290]*/ 1288 /** @stable ICU 70 */ 1289 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A_ID = 318; /*[11AB0]*/ 1290 /** @stable ICU 70 */ 1291 public static final int VITHKUQI_ID = 319; /*[10570]*/ 1292 /** @stable ICU 70 */ 1293 public static final int ZNAMENNY_MUSICAL_NOTATION_ID = 320; /*[1CF00]*/ 1294 1295 /** 1296 * One more than the highest normal UnicodeBlock value. 1297 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BLOCK). 1298 * 1299 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 1300 */ 1301 @Deprecated 1302 public static final int COUNT = 321; 1303 1304 // blocks objects --------------------------------------------------- 1305 1306 /** 1307 * Array of UnicodeBlocks, for easy access in getInstance(int) 1308 */ 1309 private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT]; 1310 1311 /** 1312 * @stable ICU 2.6 1313 */ 1314 public static final UnicodeBlock NO_BLOCK 1315 = new UnicodeBlock("NO_BLOCK", 0); 1316 1317 /** 1318 * @stable ICU 2.4 1319 */ 1320 public static final UnicodeBlock BASIC_LATIN 1321 = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID); 1322 /** 1323 * @stable ICU 2.4 1324 */ 1325 public static final UnicodeBlock LATIN_1_SUPPLEMENT 1326 = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID); 1327 /** 1328 * @stable ICU 2.4 1329 */ 1330 public static final UnicodeBlock LATIN_EXTENDED_A 1331 = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID); 1332 /** 1333 * @stable ICU 2.4 1334 */ 1335 public static final UnicodeBlock LATIN_EXTENDED_B 1336 = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID); 1337 /** 1338 * @stable ICU 2.4 1339 */ 1340 public static final UnicodeBlock IPA_EXTENSIONS 1341 = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID); 1342 /** 1343 * @stable ICU 2.4 1344 */ 1345 public static final UnicodeBlock SPACING_MODIFIER_LETTERS 1346 = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID); 1347 /** 1348 * @stable ICU 2.4 1349 */ 1350 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS 1351 = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID); 1352 /** 1353 * Unicode 3.2 renames this block to "Greek and Coptic". 1354 * @stable ICU 2.4 1355 */ 1356 public static final UnicodeBlock GREEK 1357 = new UnicodeBlock("GREEK", GREEK_ID); 1358 /** 1359 * @stable ICU 2.4 1360 */ 1361 public static final UnicodeBlock CYRILLIC 1362 = new UnicodeBlock("CYRILLIC", CYRILLIC_ID); 1363 /** 1364 * @stable ICU 2.4 1365 */ 1366 public static final UnicodeBlock ARMENIAN 1367 = new UnicodeBlock("ARMENIAN", ARMENIAN_ID); 1368 /** 1369 * @stable ICU 2.4 1370 */ 1371 public static final UnicodeBlock HEBREW 1372 = new UnicodeBlock("HEBREW", HEBREW_ID); 1373 /** 1374 * @stable ICU 2.4 1375 */ 1376 public static final UnicodeBlock ARABIC 1377 = new UnicodeBlock("ARABIC", ARABIC_ID); 1378 /** 1379 * @stable ICU 2.4 1380 */ 1381 public static final UnicodeBlock SYRIAC 1382 = new UnicodeBlock("SYRIAC", SYRIAC_ID); 1383 /** 1384 * @stable ICU 2.4 1385 */ 1386 public static final UnicodeBlock THAANA 1387 = new UnicodeBlock("THAANA", THAANA_ID); 1388 /** 1389 * @stable ICU 2.4 1390 */ 1391 public static final UnicodeBlock DEVANAGARI 1392 = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID); 1393 /** 1394 * @stable ICU 2.4 1395 */ 1396 public static final UnicodeBlock BENGALI 1397 = new UnicodeBlock("BENGALI", BENGALI_ID); 1398 /** 1399 * @stable ICU 2.4 1400 */ 1401 public static final UnicodeBlock GURMUKHI 1402 = new UnicodeBlock("GURMUKHI", GURMUKHI_ID); 1403 /** 1404 * @stable ICU 2.4 1405 */ 1406 public static final UnicodeBlock GUJARATI 1407 = new UnicodeBlock("GUJARATI", GUJARATI_ID); 1408 /** 1409 * @stable ICU 2.4 1410 */ 1411 public static final UnicodeBlock ORIYA 1412 = new UnicodeBlock("ORIYA", ORIYA_ID); 1413 /** 1414 * @stable ICU 2.4 1415 */ 1416 public static final UnicodeBlock TAMIL 1417 = new UnicodeBlock("TAMIL", TAMIL_ID); 1418 /** 1419 * @stable ICU 2.4 1420 */ 1421 public static final UnicodeBlock TELUGU 1422 = new UnicodeBlock("TELUGU", TELUGU_ID); 1423 /** 1424 * @stable ICU 2.4 1425 */ 1426 public static final UnicodeBlock KANNADA 1427 = new UnicodeBlock("KANNADA", KANNADA_ID); 1428 /** 1429 * @stable ICU 2.4 1430 */ 1431 public static final UnicodeBlock MALAYALAM 1432 = new UnicodeBlock("MALAYALAM", MALAYALAM_ID); 1433 /** 1434 * @stable ICU 2.4 1435 */ 1436 public static final UnicodeBlock SINHALA 1437 = new UnicodeBlock("SINHALA", SINHALA_ID); 1438 /** 1439 * @stable ICU 2.4 1440 */ 1441 public static final UnicodeBlock THAI 1442 = new UnicodeBlock("THAI", THAI_ID); 1443 /** 1444 * @stable ICU 2.4 1445 */ 1446 public static final UnicodeBlock LAO 1447 = new UnicodeBlock("LAO", LAO_ID); 1448 /** 1449 * @stable ICU 2.4 1450 */ 1451 public static final UnicodeBlock TIBETAN 1452 = new UnicodeBlock("TIBETAN", TIBETAN_ID); 1453 /** 1454 * @stable ICU 2.4 1455 */ 1456 public static final UnicodeBlock MYANMAR 1457 = new UnicodeBlock("MYANMAR", MYANMAR_ID); 1458 /** 1459 * @stable ICU 2.4 1460 */ 1461 public static final UnicodeBlock GEORGIAN 1462 = new UnicodeBlock("GEORGIAN", GEORGIAN_ID); 1463 /** 1464 * @stable ICU 2.4 1465 */ 1466 public static final UnicodeBlock HANGUL_JAMO 1467 = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID); 1468 /** 1469 * @stable ICU 2.4 1470 */ 1471 public static final UnicodeBlock ETHIOPIC 1472 = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID); 1473 /** 1474 * @stable ICU 2.4 1475 */ 1476 public static final UnicodeBlock CHEROKEE 1477 = new UnicodeBlock("CHEROKEE", CHEROKEE_ID); 1478 /** 1479 * @stable ICU 2.4 1480 */ 1481 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 1482 = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1483 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID); 1484 /** 1485 * @stable ICU 2.4 1486 */ 1487 public static final UnicodeBlock OGHAM 1488 = new UnicodeBlock("OGHAM", OGHAM_ID); 1489 /** 1490 * @stable ICU 2.4 1491 */ 1492 public static final UnicodeBlock RUNIC 1493 = new UnicodeBlock("RUNIC", RUNIC_ID); 1494 /** 1495 * @stable ICU 2.4 1496 */ 1497 public static final UnicodeBlock KHMER 1498 = new UnicodeBlock("KHMER", KHMER_ID); 1499 /** 1500 * @stable ICU 2.4 1501 */ 1502 public static final UnicodeBlock MONGOLIAN 1503 = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID); 1504 /** 1505 * @stable ICU 2.4 1506 */ 1507 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL 1508 = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID); 1509 /** 1510 * @stable ICU 2.4 1511 */ 1512 public static final UnicodeBlock GREEK_EXTENDED 1513 = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID); 1514 /** 1515 * @stable ICU 2.4 1516 */ 1517 public static final UnicodeBlock GENERAL_PUNCTUATION 1518 = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID); 1519 /** 1520 * @stable ICU 2.4 1521 */ 1522 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS 1523 = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID); 1524 /** 1525 * @stable ICU 2.4 1526 */ 1527 public static final UnicodeBlock CURRENCY_SYMBOLS 1528 = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID); 1529 /** 1530 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 1531 * Symbols". 1532 * @stable ICU 2.4 1533 */ 1534 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS 1535 = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID); 1536 /** 1537 * @stable ICU 2.4 1538 */ 1539 public static final UnicodeBlock LETTERLIKE_SYMBOLS 1540 = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID); 1541 /** 1542 * @stable ICU 2.4 1543 */ 1544 public static final UnicodeBlock NUMBER_FORMS 1545 = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID); 1546 /** 1547 * @stable ICU 2.4 1548 */ 1549 public static final UnicodeBlock ARROWS 1550 = new UnicodeBlock("ARROWS", ARROWS_ID); 1551 /** 1552 * @stable ICU 2.4 1553 */ 1554 public static final UnicodeBlock MATHEMATICAL_OPERATORS 1555 = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID); 1556 /** 1557 * @stable ICU 2.4 1558 */ 1559 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL 1560 = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID); 1561 /** 1562 * @stable ICU 2.4 1563 */ 1564 public static final UnicodeBlock CONTROL_PICTURES 1565 = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID); 1566 /** 1567 * @stable ICU 2.4 1568 */ 1569 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION 1570 = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID); 1571 /** 1572 * @stable ICU 2.4 1573 */ 1574 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS 1575 = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID); 1576 /** 1577 * @stable ICU 2.4 1578 */ 1579 public static final UnicodeBlock BOX_DRAWING 1580 = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID); 1581 /** 1582 * @stable ICU 2.4 1583 */ 1584 public static final UnicodeBlock BLOCK_ELEMENTS 1585 = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID); 1586 /** 1587 * @stable ICU 2.4 1588 */ 1589 public static final UnicodeBlock GEOMETRIC_SHAPES 1590 = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID); 1591 /** 1592 * @stable ICU 2.4 1593 */ 1594 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS 1595 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID); 1596 /** 1597 * @stable ICU 2.4 1598 */ 1599 public static final UnicodeBlock DINGBATS 1600 = new UnicodeBlock("DINGBATS", DINGBATS_ID); 1601 /** 1602 * @stable ICU 2.4 1603 */ 1604 public static final UnicodeBlock BRAILLE_PATTERNS 1605 = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID); 1606 /** 1607 * @stable ICU 2.4 1608 */ 1609 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT 1610 = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID); 1611 /** 1612 * @stable ICU 2.4 1613 */ 1614 public static final UnicodeBlock KANGXI_RADICALS 1615 = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID); 1616 /** 1617 * @stable ICU 2.4 1618 */ 1619 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS 1620 = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1621 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID); 1622 /** 1623 * @stable ICU 2.4 1624 */ 1625 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION 1626 = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID); 1627 /** 1628 * @stable ICU 2.4 1629 */ 1630 public static final UnicodeBlock HIRAGANA 1631 = new UnicodeBlock("HIRAGANA", HIRAGANA_ID); 1632 /** 1633 * @stable ICU 2.4 1634 */ 1635 public static final UnicodeBlock KATAKANA 1636 = new UnicodeBlock("KATAKANA", KATAKANA_ID); 1637 /** 1638 * @stable ICU 2.4 1639 */ 1640 public static final UnicodeBlock BOPOMOFO 1641 = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID); 1642 /** 1643 * @stable ICU 2.4 1644 */ 1645 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO 1646 = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID); 1647 /** 1648 * @stable ICU 2.4 1649 */ 1650 public static final UnicodeBlock KANBUN 1651 = new UnicodeBlock("KANBUN", KANBUN_ID); 1652 /** 1653 * @stable ICU 2.4 1654 */ 1655 public static final UnicodeBlock BOPOMOFO_EXTENDED 1656 = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID); 1657 /** 1658 * @stable ICU 2.4 1659 */ 1660 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS 1661 = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1662 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID); 1663 /** 1664 * @stable ICU 2.4 1665 */ 1666 public static final UnicodeBlock CJK_COMPATIBILITY 1667 = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID); 1668 /** 1669 * @stable ICU 2.4 1670 */ 1671 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 1672 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1673 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID); 1674 /** 1675 * @stable ICU 2.4 1676 */ 1677 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS 1678 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID); 1679 /** 1680 * @stable ICU 2.4 1681 */ 1682 public static final UnicodeBlock YI_SYLLABLES 1683 = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID); 1684 /** 1685 * @stable ICU 2.4 1686 */ 1687 public static final UnicodeBlock YI_RADICALS 1688 = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID); 1689 /** 1690 * @stable ICU 2.4 1691 */ 1692 public static final UnicodeBlock HANGUL_SYLLABLES 1693 = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID); 1694 /** 1695 * @stable ICU 2.4 1696 */ 1697 public static final UnicodeBlock HIGH_SURROGATES 1698 = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID); 1699 /** 1700 * @stable ICU 2.4 1701 */ 1702 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES 1703 = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID); 1704 /** 1705 * @stable ICU 2.4 1706 */ 1707 public static final UnicodeBlock LOW_SURROGATES 1708 = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID); 1709 /** 1710 * Same as public static final int PRIVATE_USE. 1711 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1712 * and multiple code point ranges had this block. 1713 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1714 * and adds separate blocks for the supplementary PUAs. 1715 * @stable ICU 2.4 1716 */ 1717 public static final UnicodeBlock PRIVATE_USE_AREA 1718 = new UnicodeBlock("PRIVATE_USE_AREA", 78); 1719 /** 1720 * Same as public static final int PRIVATE_USE_AREA. 1721 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1722 * and multiple code point ranges had this block. 1723 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1724 * and adds separate blocks for the supplementary PUAs. 1725 * @stable ICU 2.4 1726 */ 1727 public static final UnicodeBlock PRIVATE_USE 1728 = PRIVATE_USE_AREA; 1729 /** 1730 * @stable ICU 2.4 1731 */ 1732 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS 1733 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID); 1734 /** 1735 * @stable ICU 2.4 1736 */ 1737 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS 1738 = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID); 1739 /** 1740 * @stable ICU 2.4 1741 */ 1742 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A 1743 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID); 1744 /** 1745 * @stable ICU 2.4 1746 */ 1747 public static final UnicodeBlock COMBINING_HALF_MARKS 1748 = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID); 1749 /** 1750 * @stable ICU 2.4 1751 */ 1752 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS 1753 = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID); 1754 /** 1755 * @stable ICU 2.4 1756 */ 1757 public static final UnicodeBlock SMALL_FORM_VARIANTS 1758 = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID); 1759 /** 1760 * @stable ICU 2.4 1761 */ 1762 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B 1763 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID); 1764 /** 1765 * @stable ICU 2.4 1766 */ 1767 public static final UnicodeBlock SPECIALS 1768 = new UnicodeBlock("SPECIALS", SPECIALS_ID); 1769 /** 1770 * @stable ICU 2.4 1771 */ 1772 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS 1773 = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID); 1774 /** 1775 * @stable ICU 2.4 1776 */ 1777 public static final UnicodeBlock OLD_ITALIC 1778 = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID); 1779 /** 1780 * @stable ICU 2.4 1781 */ 1782 public static final UnicodeBlock GOTHIC 1783 = new UnicodeBlock("GOTHIC", GOTHIC_ID); 1784 /** 1785 * @stable ICU 2.4 1786 */ 1787 public static final UnicodeBlock DESERET 1788 = new UnicodeBlock("DESERET", DESERET_ID); 1789 /** 1790 * @stable ICU 2.4 1791 */ 1792 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS 1793 = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID); 1794 /** 1795 * @stable ICU 2.4 1796 */ 1797 public static final UnicodeBlock MUSICAL_SYMBOLS 1798 = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID); 1799 /** 1800 * @stable ICU 2.4 1801 */ 1802 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS 1803 = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1804 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID); 1805 /** 1806 * @stable ICU 2.4 1807 */ 1808 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B 1809 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1810 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID); 1811 /** 1812 * @stable ICU 2.4 1813 */ 1814 public static final UnicodeBlock 1815 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT 1816 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1817 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID); 1818 /** 1819 * @stable ICU 2.4 1820 */ 1821 public static final UnicodeBlock TAGS 1822 = new UnicodeBlock("TAGS", TAGS_ID); 1823 1824 // New blocks in Unicode 3.2 1825 1826 /** 1827 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1828 * @stable ICU 2.4 1829 */ 1830 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY 1831 = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID); 1832 /** 1833 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1834 * @stable ICU 3.0 1835 */ 1836 public static final UnicodeBlock CYRILLIC_SUPPLEMENT 1837 = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID); 1838 /** 1839 * @stable ICU 2.4 1840 */ 1841 public static final UnicodeBlock TAGALOG 1842 = new UnicodeBlock("TAGALOG", TAGALOG_ID); 1843 /** 1844 * @stable ICU 2.4 1845 */ 1846 public static final UnicodeBlock HANUNOO 1847 = new UnicodeBlock("HANUNOO", HANUNOO_ID); 1848 /** 1849 * @stable ICU 2.4 1850 */ 1851 public static final UnicodeBlock BUHID 1852 = new UnicodeBlock("BUHID", BUHID_ID); 1853 /** 1854 * @stable ICU 2.4 1855 */ 1856 public static final UnicodeBlock TAGBANWA 1857 = new UnicodeBlock("TAGBANWA", TAGBANWA_ID); 1858 /** 1859 * @stable ICU 2.4 1860 */ 1861 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A 1862 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1863 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID); 1864 /** 1865 * @stable ICU 2.4 1866 */ 1867 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A 1868 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID); 1869 /** 1870 * @stable ICU 2.4 1871 */ 1872 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B 1873 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID); 1874 /** 1875 * @stable ICU 2.4 1876 */ 1877 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B 1878 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1879 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID); 1880 /** 1881 * @stable ICU 2.4 1882 */ 1883 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS 1884 = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1885 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID); 1886 /** 1887 * @stable ICU 2.4 1888 */ 1889 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS 1890 = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID); 1891 /** 1892 * @stable ICU 2.4 1893 */ 1894 public static final UnicodeBlock VARIATION_SELECTORS 1895 = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID); 1896 /** 1897 * @stable ICU 2.4 1898 */ 1899 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A 1900 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1901 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID); 1902 /** 1903 * @stable ICU 2.4 1904 */ 1905 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B 1906 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1907 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID); 1908 1909 /** 1910 * @stable ICU 2.6 1911 */ 1912 public static final UnicodeBlock LIMBU 1913 = new UnicodeBlock("LIMBU", LIMBU_ID); 1914 /** 1915 * @stable ICU 2.6 1916 */ 1917 public static final UnicodeBlock TAI_LE 1918 = new UnicodeBlock("TAI_LE", TAI_LE_ID); 1919 /** 1920 * @stable ICU 2.6 1921 */ 1922 public static final UnicodeBlock KHMER_SYMBOLS 1923 = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID); 1924 1925 /** 1926 * @stable ICU 2.6 1927 */ 1928 public static final UnicodeBlock PHONETIC_EXTENSIONS 1929 = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID); 1930 1931 /** 1932 * @stable ICU 2.6 1933 */ 1934 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS 1935 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1936 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID); 1937 /** 1938 * @stable ICU 2.6 1939 */ 1940 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS 1941 = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID); 1942 /** 1943 * @stable ICU 2.6 1944 */ 1945 public static final UnicodeBlock LINEAR_B_SYLLABARY 1946 = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID); 1947 /** 1948 * @stable ICU 2.6 1949 */ 1950 public static final UnicodeBlock LINEAR_B_IDEOGRAMS 1951 = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID); 1952 /** 1953 * @stable ICU 2.6 1954 */ 1955 public static final UnicodeBlock AEGEAN_NUMBERS 1956 = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID); 1957 /** 1958 * @stable ICU 2.6 1959 */ 1960 public static final UnicodeBlock UGARITIC 1961 = new UnicodeBlock("UGARITIC", UGARITIC_ID); 1962 /** 1963 * @stable ICU 2.6 1964 */ 1965 public static final UnicodeBlock SHAVIAN 1966 = new UnicodeBlock("SHAVIAN", SHAVIAN_ID); 1967 /** 1968 * @stable ICU 2.6 1969 */ 1970 public static final UnicodeBlock OSMANYA 1971 = new UnicodeBlock("OSMANYA", OSMANYA_ID); 1972 /** 1973 * @stable ICU 2.6 1974 */ 1975 public static final UnicodeBlock CYPRIOT_SYLLABARY 1976 = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID); 1977 /** 1978 * @stable ICU 2.6 1979 */ 1980 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS 1981 = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID); 1982 1983 /** 1984 * @stable ICU 2.6 1985 */ 1986 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT 1987 = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID); 1988 1989 /* New blocks in Unicode 4.1 */ 1990 1991 /** 1992 * @stable ICU 3.4 1993 */ 1994 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 1995 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 1996 ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/ 1997 1998 /** 1999 * @stable ICU 3.4 2000 */ 2001 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2002 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/ 2003 2004 /** 2005 * @stable ICU 3.4 2006 */ 2007 public static final UnicodeBlock ARABIC_SUPPLEMENT = 2008 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/ 2009 2010 /** 2011 * @stable ICU 3.4 2012 */ 2013 public static final UnicodeBlock BUGINESE = 2014 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/ 2015 2016 /** 2017 * @stable ICU 3.4 2018 */ 2019 public static final UnicodeBlock CJK_STROKES = 2020 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/ 2021 2022 /** 2023 * @stable ICU 3.4 2024 */ 2025 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 2026 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 2027 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/ 2028 2029 /** 2030 * @stable ICU 3.4 2031 */ 2032 public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/ 2033 2034 /** 2035 * @stable ICU 3.4 2036 */ 2037 public static final UnicodeBlock ETHIOPIC_EXTENDED = 2038 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/ 2039 2040 /** 2041 * @stable ICU 3.4 2042 */ 2043 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 2044 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/ 2045 2046 /** 2047 * @stable ICU 3.4 2048 */ 2049 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 2050 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/ 2051 2052 /** 2053 * @stable ICU 3.4 2054 */ 2055 public static final UnicodeBlock GLAGOLITIC = 2056 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/ 2057 2058 /** 2059 * @stable ICU 3.4 2060 */ 2061 public static final UnicodeBlock KHAROSHTHI = 2062 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/ 2063 2064 /** 2065 * @stable ICU 3.4 2066 */ 2067 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2068 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/ 2069 2070 /** 2071 * @stable ICU 3.4 2072 */ 2073 public static final UnicodeBlock NEW_TAI_LUE = 2074 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/ 2075 2076 /** 2077 * @stable ICU 3.4 2078 */ 2079 public static final UnicodeBlock OLD_PERSIAN = 2080 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/ 2081 2082 /** 2083 * @stable ICU 3.4 2084 */ 2085 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 2086 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 2087 PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/ 2088 2089 /** 2090 * @stable ICU 3.4 2091 */ 2092 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 2093 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/ 2094 2095 /** 2096 * @stable ICU 3.4 2097 */ 2098 public static final UnicodeBlock SYLOTI_NAGRI = 2099 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/ 2100 2101 /** 2102 * @stable ICU 3.4 2103 */ 2104 public static final UnicodeBlock TIFINAGH = 2105 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/ 2106 2107 /** 2108 * @stable ICU 3.4 2109 */ 2110 public static final UnicodeBlock VERTICAL_FORMS = 2111 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/ 2112 2113 /** 2114 * @stable ICU 3.6 2115 */ 2116 public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/ 2117 /** 2118 * @stable ICU 3.6 2119 */ 2120 public static final UnicodeBlock BALINESE = 2121 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/ 2122 /** 2123 * @stable ICU 3.6 2124 */ 2125 public static final UnicodeBlock LATIN_EXTENDED_C = 2126 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/ 2127 /** 2128 * @stable ICU 3.6 2129 */ 2130 public static final UnicodeBlock LATIN_EXTENDED_D = 2131 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/ 2132 /** 2133 * @stable ICU 3.6 2134 */ 2135 public static final UnicodeBlock PHAGS_PA = 2136 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/ 2137 /** 2138 * @stable ICU 3.6 2139 */ 2140 public static final UnicodeBlock PHOENICIAN = 2141 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/ 2142 /** 2143 * @stable ICU 3.6 2144 */ 2145 public static final UnicodeBlock CUNEIFORM = 2146 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/ 2147 /** 2148 * @stable ICU 3.6 2149 */ 2150 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2151 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2152 CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/ 2153 /** 2154 * @stable ICU 3.6 2155 */ 2156 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2157 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/ 2158 2159 /** 2160 * @stable ICU 4.0 2161 */ 2162 public static final UnicodeBlock SUNDANESE = 2163 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */ 2164 2165 /** 2166 * @stable ICU 4.0 2167 */ 2168 public static final UnicodeBlock LEPCHA = 2169 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */ 2170 2171 /** 2172 * @stable ICU 4.0 2173 */ 2174 public static final UnicodeBlock OL_CHIKI = 2175 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */ 2176 2177 /** 2178 * @stable ICU 4.0 2179 */ 2180 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 2181 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */ 2182 2183 /** 2184 * @stable ICU 4.0 2185 */ 2186 public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */ 2187 2188 /** 2189 * @stable ICU 4.0 2190 */ 2191 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2192 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */ 2193 2194 /** 2195 * @stable ICU 4.0 2196 */ 2197 public static final UnicodeBlock SAURASHTRA = 2198 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */ 2199 2200 /** 2201 * @stable ICU 4.0 2202 */ 2203 public static final UnicodeBlock KAYAH_LI = 2204 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */ 2205 2206 /** 2207 * @stable ICU 4.0 2208 */ 2209 public static final UnicodeBlock REJANG = 2210 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */ 2211 2212 /** 2213 * @stable ICU 4.0 2214 */ 2215 public static final UnicodeBlock CHAM = 2216 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */ 2217 2218 /** 2219 * @stable ICU 4.0 2220 */ 2221 public static final UnicodeBlock ANCIENT_SYMBOLS = 2222 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */ 2223 2224 /** 2225 * @stable ICU 4.0 2226 */ 2227 public static final UnicodeBlock PHAISTOS_DISC = 2228 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */ 2229 2230 /** 2231 * @stable ICU 4.0 2232 */ 2233 public static final UnicodeBlock LYCIAN = 2234 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */ 2235 2236 /** 2237 * @stable ICU 4.0 2238 */ 2239 public static final UnicodeBlock CARIAN = 2240 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */ 2241 2242 /** 2243 * @stable ICU 4.0 2244 */ 2245 public static final UnicodeBlock LYDIAN = 2246 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */ 2247 2248 /** 2249 * @stable ICU 4.0 2250 */ 2251 public static final UnicodeBlock MAHJONG_TILES = 2252 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */ 2253 2254 /** 2255 * @stable ICU 4.0 2256 */ 2257 public static final UnicodeBlock DOMINO_TILES = 2258 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */ 2259 2260 /* New blocks in Unicode 5.2 */ 2261 2262 /** @stable ICU 4.4 */ 2263 public static final UnicodeBlock SAMARITAN = 2264 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/ 2265 /** @stable ICU 4.4 */ 2266 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 2267 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 2268 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/ 2269 /** @stable ICU 4.4 */ 2270 public static final UnicodeBlock TAI_THAM = 2271 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/ 2272 /** @stable ICU 4.4 */ 2273 public static final UnicodeBlock VEDIC_EXTENSIONS = 2274 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/ 2275 /** @stable ICU 4.4 */ 2276 public static final UnicodeBlock LISU = 2277 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/ 2278 /** @stable ICU 4.4 */ 2279 public static final UnicodeBlock BAMUM = 2280 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/ 2281 /** @stable ICU 4.4 */ 2282 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2283 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/ 2284 /** @stable ICU 4.4 */ 2285 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2286 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/ 2287 /** @stable ICU 4.4 */ 2288 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2289 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/ 2290 /** @stable ICU 4.4 */ 2291 public static final UnicodeBlock JAVANESE = 2292 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/ 2293 /** @stable ICU 4.4 */ 2294 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2295 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/ 2296 /** @stable ICU 4.4 */ 2297 public static final UnicodeBlock TAI_VIET = 2298 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/ 2299 /** @stable ICU 4.4 */ 2300 public static final UnicodeBlock MEETEI_MAYEK = 2301 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/ 2302 /** @stable ICU 4.4 */ 2303 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2304 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/ 2305 /** @stable ICU 4.4 */ 2306 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2307 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/ 2308 /** @stable ICU 4.4 */ 2309 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2310 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/ 2311 /** @stable ICU 4.4 */ 2312 public static final UnicodeBlock AVESTAN = 2313 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/ 2314 /** @stable ICU 4.4 */ 2315 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2316 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/ 2317 /** @stable ICU 4.4 */ 2318 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2319 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/ 2320 /** @stable ICU 4.4 */ 2321 public static final UnicodeBlock OLD_TURKIC = 2322 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/ 2323 /** @stable ICU 4.4 */ 2324 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2325 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/ 2326 /** @stable ICU 4.4 */ 2327 public static final UnicodeBlock KAITHI = 2328 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/ 2329 /** @stable ICU 4.4 */ 2330 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2331 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/ 2332 /** @stable ICU 4.4 */ 2333 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2334 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2335 ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/ 2336 /** @stable ICU 4.4 */ 2337 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2338 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2339 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/ 2340 /** @stable ICU 4.4 */ 2341 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2342 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2343 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/ 2344 2345 /* New blocks in Unicode 6.0 */ 2346 2347 /** @stable ICU 4.6 */ 2348 public static final UnicodeBlock MANDAIC = 2349 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/ 2350 /** @stable ICU 4.6 */ 2351 public static final UnicodeBlock BATAK = 2352 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/ 2353 /** @stable ICU 4.6 */ 2354 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2355 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/ 2356 /** @stable ICU 4.6 */ 2357 public static final UnicodeBlock BRAHMI = 2358 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/ 2359 /** @stable ICU 4.6 */ 2360 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2361 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/ 2362 /** @stable ICU 4.6 */ 2363 public static final UnicodeBlock KANA_SUPPLEMENT = 2364 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/ 2365 /** @stable ICU 4.6 */ 2366 public static final UnicodeBlock PLAYING_CARDS = 2367 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/ 2368 /** @stable ICU 4.6 */ 2369 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2370 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2371 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/ 2372 /** @stable ICU 4.6 */ 2373 public static final UnicodeBlock EMOTICONS = 2374 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/ 2375 /** @stable ICU 4.6 */ 2376 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2377 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/ 2378 /** @stable ICU 4.6 */ 2379 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2380 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/ 2381 /** @stable ICU 4.6 */ 2382 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2383 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2384 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/ 2385 2386 /* New blocks in Unicode 6.1 */ 2387 2388 /** @stable ICU 49 */ 2389 public static final UnicodeBlock ARABIC_EXTENDED_A = 2390 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/ 2391 /** @stable ICU 49 */ 2392 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2393 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/ 2394 /** @stable ICU 49 */ 2395 public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/ 2396 /** @stable ICU 49 */ 2397 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2398 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/ 2399 /** @stable ICU 49 */ 2400 public static final UnicodeBlock MEROITIC_CURSIVE = 2401 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/ 2402 /** @stable ICU 49 */ 2403 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2404 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/ 2405 /** @stable ICU 49 */ 2406 public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/ 2407 /** @stable ICU 49 */ 2408 public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/ 2409 /** @stable ICU 49 */ 2410 public static final UnicodeBlock SORA_SOMPENG = 2411 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/ 2412 /** @stable ICU 49 */ 2413 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2414 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/ 2415 /** @stable ICU 49 */ 2416 public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/ 2417 2418 /* New blocks in Unicode 7.0 */ 2419 2420 /** @stable ICU 54 */ 2421 public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/ 2422 /** @stable ICU 54 */ 2423 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2424 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/ 2425 /** @stable ICU 54 */ 2426 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2427 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/ 2428 /** @stable ICU 54 */ 2429 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2430 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/ 2431 /** @stable ICU 54 */ 2432 public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/ 2433 /** @stable ICU 54 */ 2434 public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/ 2435 /** @stable ICU 54 */ 2436 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2437 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/ 2438 /** @stable ICU 54 */ 2439 public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/ 2440 /** @stable ICU 54 */ 2441 public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/ 2442 /** @stable ICU 54 */ 2443 public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/ 2444 /** @stable ICU 54 */ 2445 public static final UnicodeBlock LATIN_EXTENDED_E = 2446 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/ 2447 /** @stable ICU 54 */ 2448 public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/ 2449 /** @stable ICU 54 */ 2450 public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/ 2451 /** @stable ICU 54 */ 2452 public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/ 2453 /** @stable ICU 54 */ 2454 public static final UnicodeBlock MENDE_KIKAKUI = 2455 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/ 2456 /** @stable ICU 54 */ 2457 public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/ 2458 /** @stable ICU 54 */ 2459 public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/ 2460 /** @stable ICU 54 */ 2461 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2462 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/ 2463 /** @stable ICU 54 */ 2464 public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/ 2465 /** @stable ICU 54 */ 2466 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2467 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/ 2468 /** @stable ICU 54 */ 2469 public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/ 2470 /** @stable ICU 54 */ 2471 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2472 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/ 2473 /** @stable ICU 54 */ 2474 public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/ 2475 /** @stable ICU 54 */ 2476 public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/ 2477 /** @stable ICU 54 */ 2478 public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/ 2479 /** @stable ICU 54 */ 2480 public static final UnicodeBlock PSALTER_PAHLAVI = 2481 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/ 2482 /** @stable ICU 54 */ 2483 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2484 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/ 2485 /** @stable ICU 54 */ 2486 public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/ 2487 /** @stable ICU 54 */ 2488 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2489 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/ 2490 /** @stable ICU 54 */ 2491 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2492 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/ 2493 /** @stable ICU 54 */ 2494 public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/ 2495 /** @stable ICU 54 */ 2496 public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/ 2497 2498 /* New blocks in Unicode 8.0 */ 2499 2500 /** @stable ICU 56 */ 2501 public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/ 2502 /** @stable ICU 56 */ 2503 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2504 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/ 2505 /** @stable ICU 56 */ 2506 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2507 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/ 2508 /** @stable ICU 56 */ 2509 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 2510 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 2511 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/ 2512 /** @stable ICU 56 */ 2513 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2514 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/ 2515 /** @stable ICU 56 */ 2516 public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/ 2517 /** @stable ICU 56 */ 2518 public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/ 2519 /** @stable ICU 56 */ 2520 public static final UnicodeBlock OLD_HUNGARIAN = 2521 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/ 2522 /** @stable ICU 56 */ 2523 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 2524 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 2525 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/ 2526 /** @stable ICU 56 */ 2527 public static final UnicodeBlock SUTTON_SIGNWRITING = 2528 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/ 2529 2530 /* New blocks in Unicode 9.0 */ 2531 2532 /** @stable ICU 58 */ 2533 public static final UnicodeBlock ADLAM = new UnicodeBlock("ADLAM", ADLAM_ID); /*[1E900]*/ 2534 /** @stable ICU 58 */ 2535 public static final UnicodeBlock BHAIKSUKI = new UnicodeBlock("BHAIKSUKI", BHAIKSUKI_ID); /*[11C00]*/ 2536 /** @stable ICU 58 */ 2537 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 2538 new UnicodeBlock("CYRILLIC_EXTENDED_C", CYRILLIC_EXTENDED_C_ID); /*[1C80]*/ 2539 /** @stable ICU 58 */ 2540 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 2541 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", GLAGOLITIC_SUPPLEMENT_ID); /*[1E000]*/ 2542 /** @stable ICU 58 */ 2543 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 2544 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID); /*[16FE0]*/ 2545 /** @stable ICU 58 */ 2546 public static final UnicodeBlock MARCHEN = new UnicodeBlock("MARCHEN", MARCHEN_ID); /*[11C70]*/ 2547 /** @stable ICU 58 */ 2548 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 2549 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", MONGOLIAN_SUPPLEMENT_ID); /*[11660]*/ 2550 /** @stable ICU 58 */ 2551 public static final UnicodeBlock NEWA = new UnicodeBlock("NEWA", NEWA_ID); /*[11400]*/ 2552 /** @stable ICU 58 */ 2553 public static final UnicodeBlock OSAGE = new UnicodeBlock("OSAGE", OSAGE_ID); /*[104B0]*/ 2554 /** @stable ICU 58 */ 2555 public static final UnicodeBlock TANGUT = new UnicodeBlock("TANGUT", TANGUT_ID); /*[17000]*/ 2556 /** @stable ICU 58 */ 2557 public static final UnicodeBlock TANGUT_COMPONENTS = 2558 new UnicodeBlock("TANGUT_COMPONENTS", TANGUT_COMPONENTS_ID); /*[18800]*/ 2559 2560 // New blocks in Unicode 10.0 2561 2562 /** @stable ICU 60 */ 2563 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 2564 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID); /*[2CEB0]*/ 2565 /** @stable ICU 60 */ 2566 public static final UnicodeBlock KANA_EXTENDED_A = 2567 new UnicodeBlock("KANA_EXTENDED_A", KANA_EXTENDED_A_ID); /*[1B100]*/ 2568 /** @stable ICU 60 */ 2569 public static final UnicodeBlock MASARAM_GONDI = 2570 new UnicodeBlock("MASARAM_GONDI", MASARAM_GONDI_ID); /*[11D00]*/ 2571 /** @stable ICU 60 */ 2572 public static final UnicodeBlock NUSHU = new UnicodeBlock("NUSHU", NUSHU_ID); /*[1B170]*/ 2573 /** @stable ICU 60 */ 2574 public static final UnicodeBlock SOYOMBO = new UnicodeBlock("SOYOMBO", SOYOMBO_ID); /*[11A50]*/ 2575 /** @stable ICU 60 */ 2576 public static final UnicodeBlock SYRIAC_SUPPLEMENT = 2577 new UnicodeBlock("SYRIAC_SUPPLEMENT", SYRIAC_SUPPLEMENT_ID); /*[0860]*/ 2578 /** @stable ICU 60 */ 2579 public static final UnicodeBlock ZANABAZAR_SQUARE = 2580 new UnicodeBlock("ZANABAZAR_SQUARE", ZANABAZAR_SQUARE_ID); /*[11A00]*/ 2581 2582 // New blocks in Unicode 11.0 2583 2584 /** @stable ICU 62 */ 2585 public static final UnicodeBlock CHESS_SYMBOLS = 2586 new UnicodeBlock("CHESS_SYMBOLS", CHESS_SYMBOLS_ID); /*[1FA00]*/ 2587 /** @stable ICU 62 */ 2588 public static final UnicodeBlock DOGRA = new UnicodeBlock("DOGRA", DOGRA_ID); /*[11800]*/ 2589 /** @stable ICU 62 */ 2590 public static final UnicodeBlock GEORGIAN_EXTENDED = 2591 new UnicodeBlock("GEORGIAN_EXTENDED", GEORGIAN_EXTENDED_ID); /*[1C90]*/ 2592 /** @stable ICU 62 */ 2593 public static final UnicodeBlock GUNJALA_GONDI = 2594 new UnicodeBlock("GUNJALA_GONDI", GUNJALA_GONDI_ID); /*[11D60]*/ 2595 /** @stable ICU 62 */ 2596 public static final UnicodeBlock HANIFI_ROHINGYA = 2597 new UnicodeBlock("HANIFI_ROHINGYA", HANIFI_ROHINGYA_ID); /*[10D00]*/ 2598 /** @stable ICU 62 */ 2599 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS = 2600 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", INDIC_SIYAQ_NUMBERS_ID); /*[1EC70]*/ 2601 /** @stable ICU 62 */ 2602 public static final UnicodeBlock MAKASAR = new UnicodeBlock("MAKASAR", MAKASAR_ID); /*[11EE0]*/ 2603 /** @stable ICU 62 */ 2604 public static final UnicodeBlock MAYAN_NUMERALS = 2605 new UnicodeBlock("MAYAN_NUMERALS", MAYAN_NUMERALS_ID); /*[1D2E0]*/ 2606 /** @stable ICU 62 */ 2607 public static final UnicodeBlock MEDEFAIDRIN = 2608 new UnicodeBlock("MEDEFAIDRIN", MEDEFAIDRIN_ID); /*[16E40]*/ 2609 /** @stable ICU 62 */ 2610 public static final UnicodeBlock OLD_SOGDIAN = 2611 new UnicodeBlock("OLD_SOGDIAN", OLD_SOGDIAN_ID); /*[10F00]*/ 2612 /** @stable ICU 62 */ 2613 public static final UnicodeBlock SOGDIAN = new UnicodeBlock("SOGDIAN", SOGDIAN_ID); /*[10F30]*/ 2614 2615 // New blocks in Unicode 12.0 2616 2617 /** @stable ICU 64 */ 2618 public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 2619 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID); /*[13430]*/ 2620 /** @stable ICU 64 */ 2621 public static final UnicodeBlock ELYMAIC = new UnicodeBlock("ELYMAIC", ELYMAIC_ID); /*[10FE0]*/ 2622 /** @stable ICU 64 */ 2623 public static final UnicodeBlock NANDINAGARI = 2624 new UnicodeBlock("NANDINAGARI", NANDINAGARI_ID); /*[119A0]*/ 2625 /** @stable ICU 64 */ 2626 public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG = 2627 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", NYIAKENG_PUACHUE_HMONG_ID); /*[1E100]*/ 2628 /** @stable ICU 64 */ 2629 public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS = 2630 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", OTTOMAN_SIYAQ_NUMBERS_ID); /*[1ED00]*/ 2631 /** @stable ICU 64 */ 2632 public static final UnicodeBlock SMALL_KANA_EXTENSION = 2633 new UnicodeBlock("SMALL_KANA_EXTENSION", SMALL_KANA_EXTENSION_ID); /*[1B130]*/ 2634 /** @stable ICU 64 */ 2635 public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 2636 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID); /*[1FA70]*/ 2637 /** @stable ICU 64 */ 2638 public static final UnicodeBlock TAMIL_SUPPLEMENT = 2639 new UnicodeBlock("TAMIL_SUPPLEMENT", TAMIL_SUPPLEMENT_ID); /*[11FC0]*/ 2640 /** @stable ICU 64 */ 2641 public static final UnicodeBlock WANCHO = new UnicodeBlock("WANCHO", WANCHO_ID); /*[1E2C0]*/ 2642 2643 // New blocks in Unicode 13.0 2644 2645 /** @stable ICU 66 */ 2646 public static final UnicodeBlock CHORASMIAN = 2647 new UnicodeBlock("CHORASMIAN", CHORASMIAN_ID); /*[10FB0]*/ 2648 /** @stable ICU 66 */ 2649 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 2650 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G", 2651 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID); /*[30000]*/ 2652 /** @stable ICU 66 */ 2653 public static final UnicodeBlock DIVES_AKURU = 2654 new UnicodeBlock("DIVES_AKURU", DIVES_AKURU_ID); /*[11900]*/ 2655 /** @stable ICU 66 */ 2656 public static final UnicodeBlock KHITAN_SMALL_SCRIPT = 2657 new UnicodeBlock("KHITAN_SMALL_SCRIPT", KHITAN_SMALL_SCRIPT_ID); /*[18B00]*/ 2658 /** @stable ICU 66 */ 2659 public static final UnicodeBlock LISU_SUPPLEMENT = 2660 new UnicodeBlock("LISU_SUPPLEMENT", LISU_SUPPLEMENT_ID); /*[11FB0]*/ 2661 /** @stable ICU 66 */ 2662 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING = 2663 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING", SYMBOLS_FOR_LEGACY_COMPUTING_ID); /*[1FB00]*/ 2664 /** @stable ICU 66 */ 2665 public static final UnicodeBlock TANGUT_SUPPLEMENT = 2666 new UnicodeBlock("TANGUT_SUPPLEMENT", TANGUT_SUPPLEMENT_ID); /*[18D00]*/ 2667 /** @stable ICU 66 */ 2668 public static final UnicodeBlock YEZIDI = new UnicodeBlock("YEZIDI", YEZIDI_ID); /*[10E80]*/ 2669 2670 // New blocks in Unicode 14.0 2671 2672 /** @stable ICU 70 */ 2673 public static final UnicodeBlock ARABIC_EXTENDED_B = 2674 new UnicodeBlock("ARABIC_EXTENDED_B", ARABIC_EXTENDED_B_ID); /*[0870]*/ 2675 /** @stable ICU 70 */ 2676 public static final UnicodeBlock CYPRO_MINOAN = 2677 new UnicodeBlock("CYPRO_MINOAN", CYPRO_MINOAN_ID); /*[12F90]*/ 2678 /** @stable ICU 70 */ 2679 public static final UnicodeBlock ETHIOPIC_EXTENDED_B = 2680 new UnicodeBlock("ETHIOPIC_EXTENDED_B", ETHIOPIC_EXTENDED_B_ID); /*[1E7E0]*/ 2681 /** @stable ICU 70 */ 2682 public static final UnicodeBlock KANA_EXTENDED_B = 2683 new UnicodeBlock("KANA_EXTENDED_B", KANA_EXTENDED_B_ID); /*[1AFF0]*/ 2684 /** @stable ICU 70 */ 2685 public static final UnicodeBlock LATIN_EXTENDED_F = 2686 new UnicodeBlock("LATIN_EXTENDED_F", LATIN_EXTENDED_F_ID); /*[10780]*/ 2687 /** @stable ICU 70 */ 2688 public static final UnicodeBlock LATIN_EXTENDED_G = 2689 new UnicodeBlock("LATIN_EXTENDED_G", LATIN_EXTENDED_G_ID); /*[1DF00]*/ 2690 /** @stable ICU 70 */ 2691 public static final UnicodeBlock OLD_UYGHUR = 2692 new UnicodeBlock("OLD_UYGHUR", OLD_UYGHUR_ID); /*[10F70]*/ 2693 /** @stable ICU 70 */ 2694 public static final UnicodeBlock TANGSA = new UnicodeBlock("TANGSA", TANGSA_ID); /*[16A70]*/ 2695 /** @stable ICU 70 */ 2696 public static final UnicodeBlock TOTO = new UnicodeBlock("TOTO", TOTO_ID); /*[1E290]*/ 2697 /** @stable ICU 70 */ 2698 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A = 2699 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A", 2700 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A_ID); /*[11AB0]*/ 2701 /** @stable ICU 70 */ 2702 public static final UnicodeBlock VITHKUQI = 2703 new UnicodeBlock("VITHKUQI", VITHKUQI_ID); /*[10570]*/ 2704 /** @stable ICU 70 */ 2705 public static final UnicodeBlock ZNAMENNY_MUSICAL_NOTATION = 2706 new UnicodeBlock("ZNAMENNY_MUSICAL_NOTATION", 2707 ZNAMENNY_MUSICAL_NOTATION_ID); /*[1CF00]*/ 2708 2709 /** 2710 * @stable ICU 2.4 2711 */ 2712 public static final UnicodeBlock INVALID_CODE 2713 = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID); 2714 2715 static { 2716 for (int blockId = 0; blockId < COUNT; ++blockId) { 2717 if (BLOCKS_[blockId] == null) { 2718 throw new java.lang.IllegalStateException( 2719 "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized"); 2720 } 2721 } 2722 } 2723 2724 // public methods -------------------------------------------------- 2725 2726 /** 2727 * {@icu} Returns the only instance of the UnicodeBlock with the argument ID. 2728 * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned. 2729 * @param id UnicodeBlock ID 2730 * @return the only instance of the UnicodeBlock with the argument ID 2731 * if it exists, otherwise a INVALID_CODE UnicodeBlock will be 2732 * returned. 2733 * @stable ICU 2.4 2734 */ getInstance(int id)2735 public static UnicodeBlock getInstance(int id) 2736 { 2737 if (id >= 0 && id < BLOCKS_.length) { 2738 return BLOCKS_[id]; 2739 } 2740 return INVALID_CODE; 2741 } 2742 2743 /** 2744 * Returns the Unicode allocation block that contains the code point, 2745 * or null if the code point is not a member of a defined block. 2746 * @param ch code point to be tested 2747 * @return the Unicode allocation block that contains the code point 2748 * @stable ICU 2.4 2749 */ of(int ch)2750 public static UnicodeBlock of(int ch) 2751 { 2752 if (ch > MAX_VALUE) { 2753 return INVALID_CODE; 2754 } 2755 2756 return UnicodeBlock.getInstance( 2757 UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK)); 2758 } 2759 2760 /** 2761 * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method. 2762 * Returns the Unicode block with the given name. {@icunote} Unlike 2763 * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches 2764 * against the official UCD name and the Java block name 2765 * (ignoring case). 2766 * @param blockName the name of the block to match 2767 * @return the UnicodeBlock with that name 2768 * @throws IllegalArgumentException if the blockName could not be matched 2769 * @stable ICU 3.0 2770 */ forName(String blockName)2771 public static final UnicodeBlock forName(String blockName) { 2772 Map<String, UnicodeBlock> m = null; 2773 if (mref != null) { 2774 m = mref.get(); 2775 } 2776 if (m == null) { 2777 m = new HashMap<>(BLOCKS_.length); 2778 for (int i = 0; i < BLOCKS_.length; ++i) { 2779 UnicodeBlock b = BLOCKS_[i]; 2780 String name = trimBlockName( 2781 getPropertyValueName(UProperty.BLOCK, b.getID(), 2782 UProperty.NameChoice.LONG)); 2783 m.put(name, b); 2784 } 2785 mref = new SoftReference<>(m); 2786 } 2787 UnicodeBlock b = m.get(trimBlockName(blockName)); 2788 if (b == null) { 2789 throw new IllegalArgumentException(); 2790 } 2791 return b; 2792 } 2793 private static SoftReference<Map<String, UnicodeBlock>> mref; 2794 trimBlockName(String name)2795 private static String trimBlockName(String name) { 2796 String upper = name.toUpperCase(Locale.ENGLISH); 2797 StringBuilder result = new StringBuilder(upper.length()); 2798 for (int i = 0; i < upper.length(); i++) { 2799 char c = upper.charAt(i); 2800 if (c != ' ' && c != '_' && c != '-') { 2801 result.append(c); 2802 } 2803 } 2804 return result.toString(); 2805 } 2806 2807 /** 2808 * {icu} Returns the type ID of this Unicode block 2809 * @return integer type ID of this Unicode block 2810 * @stable ICU 2.4 2811 */ getID()2812 public int getID() 2813 { 2814 return m_id_; 2815 } 2816 2817 // private data members --------------------------------------------- 2818 2819 /** 2820 * Identification code for this UnicodeBlock 2821 */ 2822 private int m_id_; 2823 2824 // private constructor ---------------------------------------------- 2825 2826 /** 2827 * UnicodeBlock constructor 2828 * @param name name of this UnicodeBlock 2829 * @param id unique id of this UnicodeBlock 2830 * @exception NullPointerException if name is <code>null</code> 2831 */ UnicodeBlock(String name, int id)2832 private UnicodeBlock(String name, int id) 2833 { 2834 super(name); 2835 m_id_ = id; 2836 if (id >= 0) { 2837 BLOCKS_[id] = this; 2838 } 2839 } 2840 } 2841 2842 /** 2843 * East Asian Width constants. 2844 * @see UProperty#EAST_ASIAN_WIDTH 2845 * @see UCharacter#getIntPropertyValue 2846 * @stable ICU 2.4 2847 */ 2848 public static interface EastAsianWidth 2849 { 2850 /** 2851 * @stable ICU 2.4 2852 */ 2853 public static final int NEUTRAL = 0; 2854 /** 2855 * @stable ICU 2.4 2856 */ 2857 public static final int AMBIGUOUS = 1; 2858 /** 2859 * @stable ICU 2.4 2860 */ 2861 public static final int HALFWIDTH = 2; 2862 /** 2863 * @stable ICU 2.4 2864 */ 2865 public static final int FULLWIDTH = 3; 2866 /** 2867 * @stable ICU 2.4 2868 */ 2869 public static final int NARROW = 4; 2870 /** 2871 * @stable ICU 2.4 2872 */ 2873 public static final int WIDE = 5; 2874 /** 2875 * One more than the highest normal EastAsianWidth value. 2876 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH). 2877 * 2878 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2879 */ 2880 @Deprecated 2881 public static final int COUNT = 6; 2882 } 2883 2884 /** 2885 * Decomposition Type constants. 2886 * @see UProperty#DECOMPOSITION_TYPE 2887 * @stable ICU 2.4 2888 */ 2889 public static interface DecompositionType 2890 { 2891 /** 2892 * @stable ICU 2.4 2893 */ 2894 public static final int NONE = 0; 2895 /** 2896 * @stable ICU 2.4 2897 */ 2898 public static final int CANONICAL = 1; 2899 /** 2900 * @stable ICU 2.4 2901 */ 2902 public static final int COMPAT = 2; 2903 /** 2904 * @stable ICU 2.4 2905 */ 2906 public static final int CIRCLE = 3; 2907 /** 2908 * @stable ICU 2.4 2909 */ 2910 public static final int FINAL = 4; 2911 /** 2912 * @stable ICU 2.4 2913 */ 2914 public static final int FONT = 5; 2915 /** 2916 * @stable ICU 2.4 2917 */ 2918 public static final int FRACTION = 6; 2919 /** 2920 * @stable ICU 2.4 2921 */ 2922 public static final int INITIAL = 7; 2923 /** 2924 * @stable ICU 2.4 2925 */ 2926 public static final int ISOLATED = 8; 2927 /** 2928 * @stable ICU 2.4 2929 */ 2930 public static final int MEDIAL = 9; 2931 /** 2932 * @stable ICU 2.4 2933 */ 2934 public static final int NARROW = 10; 2935 /** 2936 * @stable ICU 2.4 2937 */ 2938 public static final int NOBREAK = 11; 2939 /** 2940 * @stable ICU 2.4 2941 */ 2942 public static final int SMALL = 12; 2943 /** 2944 * @stable ICU 2.4 2945 */ 2946 public static final int SQUARE = 13; 2947 /** 2948 * @stable ICU 2.4 2949 */ 2950 public static final int SUB = 14; 2951 /** 2952 * @stable ICU 2.4 2953 */ 2954 public static final int SUPER = 15; 2955 /** 2956 * @stable ICU 2.4 2957 */ 2958 public static final int VERTICAL = 16; 2959 /** 2960 * @stable ICU 2.4 2961 */ 2962 public static final int WIDE = 17; 2963 /** 2964 * One more than the highest normal DecompositionType value. 2965 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE). 2966 * 2967 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2968 */ 2969 @Deprecated 2970 public static final int COUNT = 18; 2971 } 2972 2973 /** 2974 * Joining Type constants. 2975 * @see UProperty#JOINING_TYPE 2976 * @stable ICU 2.4 2977 */ 2978 public static interface JoiningType 2979 { 2980 /** 2981 * @stable ICU 2.4 2982 */ 2983 public static final int NON_JOINING = 0; 2984 /** 2985 * @stable ICU 2.4 2986 */ 2987 public static final int JOIN_CAUSING = 1; 2988 /** 2989 * @stable ICU 2.4 2990 */ 2991 public static final int DUAL_JOINING = 2; 2992 /** 2993 * @stable ICU 2.4 2994 */ 2995 public static final int LEFT_JOINING = 3; 2996 /** 2997 * @stable ICU 2.4 2998 */ 2999 public static final int RIGHT_JOINING = 4; 3000 /** 3001 * @stable ICU 2.4 3002 */ 3003 public static final int TRANSPARENT = 5; 3004 /** 3005 * One more than the highest normal JoiningType value. 3006 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE). 3007 * 3008 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3009 */ 3010 @Deprecated 3011 public static final int COUNT = 6; 3012 } 3013 3014 /** 3015 * Joining Group constants. 3016 * @see UProperty#JOINING_GROUP 3017 * @stable ICU 2.4 3018 */ 3019 public static interface JoiningGroup 3020 { 3021 /** 3022 * @stable ICU 2.4 3023 */ 3024 public static final int NO_JOINING_GROUP = 0; 3025 /** 3026 * @stable ICU 2.4 3027 */ 3028 public static final int AIN = 1; 3029 /** 3030 * @stable ICU 2.4 3031 */ 3032 public static final int ALAPH = 2; 3033 /** 3034 * @stable ICU 2.4 3035 */ 3036 public static final int ALEF = 3; 3037 /** 3038 * @stable ICU 2.4 3039 */ 3040 public static final int BEH = 4; 3041 /** 3042 * @stable ICU 2.4 3043 */ 3044 public static final int BETH = 5; 3045 /** 3046 * @stable ICU 2.4 3047 */ 3048 public static final int DAL = 6; 3049 /** 3050 * @stable ICU 2.4 3051 */ 3052 public static final int DALATH_RISH = 7; 3053 /** 3054 * @stable ICU 2.4 3055 */ 3056 public static final int E = 8; 3057 /** 3058 * @stable ICU 2.4 3059 */ 3060 public static final int FEH = 9; 3061 /** 3062 * @stable ICU 2.4 3063 */ 3064 public static final int FINAL_SEMKATH = 10; 3065 /** 3066 * @stable ICU 2.4 3067 */ 3068 public static final int GAF = 11; 3069 /** 3070 * @stable ICU 2.4 3071 */ 3072 public static final int GAMAL = 12; 3073 /** 3074 * @stable ICU 2.4 3075 */ 3076 public static final int HAH = 13; 3077 /** @stable ICU 4.6 */ 3078 public static final int TEH_MARBUTA_GOAL = 14; 3079 /** 3080 * @stable ICU 2.4 3081 */ 3082 public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL; 3083 /** 3084 * @stable ICU 2.4 3085 */ 3086 public static final int HE = 15; 3087 /** 3088 * @stable ICU 2.4 3089 */ 3090 public static final int HEH = 16; 3091 /** 3092 * @stable ICU 2.4 3093 */ 3094 public static final int HEH_GOAL = 17; 3095 /** 3096 * @stable ICU 2.4 3097 */ 3098 public static final int HETH = 18; 3099 /** 3100 * @stable ICU 2.4 3101 */ 3102 public static final int KAF = 19; 3103 /** 3104 * @stable ICU 2.4 3105 */ 3106 public static final int KAPH = 20; 3107 /** 3108 * @stable ICU 2.4 3109 */ 3110 public static final int KNOTTED_HEH = 21; 3111 /** 3112 * @stable ICU 2.4 3113 */ 3114 public static final int LAM = 22; 3115 /** 3116 * @stable ICU 2.4 3117 */ 3118 public static final int LAMADH = 23; 3119 /** 3120 * @stable ICU 2.4 3121 */ 3122 public static final int MEEM = 24; 3123 /** 3124 * @stable ICU 2.4 3125 */ 3126 public static final int MIM = 25; 3127 /** 3128 * @stable ICU 2.4 3129 */ 3130 public static final int NOON = 26; 3131 /** 3132 * @stable ICU 2.4 3133 */ 3134 public static final int NUN = 27; 3135 /** 3136 * @stable ICU 2.4 3137 */ 3138 public static final int PE = 28; 3139 /** 3140 * @stable ICU 2.4 3141 */ 3142 public static final int QAF = 29; 3143 /** 3144 * @stable ICU 2.4 3145 */ 3146 public static final int QAPH = 30; 3147 /** 3148 * @stable ICU 2.4 3149 */ 3150 public static final int REH = 31; 3151 /** 3152 * @stable ICU 2.4 3153 */ 3154 public static final int REVERSED_PE = 32; 3155 /** 3156 * @stable ICU 2.4 3157 */ 3158 public static final int SAD = 33; 3159 /** 3160 * @stable ICU 2.4 3161 */ 3162 public static final int SADHE = 34; 3163 /** 3164 * @stable ICU 2.4 3165 */ 3166 public static final int SEEN = 35; 3167 /** 3168 * @stable ICU 2.4 3169 */ 3170 public static final int SEMKATH = 36; 3171 /** 3172 * @stable ICU 2.4 3173 */ 3174 public static final int SHIN = 37; 3175 /** 3176 * @stable ICU 2.4 3177 */ 3178 public static final int SWASH_KAF = 38; 3179 /** 3180 * @stable ICU 2.4 3181 */ 3182 public static final int SYRIAC_WAW = 39; 3183 /** 3184 * @stable ICU 2.4 3185 */ 3186 public static final int TAH = 40; 3187 /** 3188 * @stable ICU 2.4 3189 */ 3190 public static final int TAW = 41; 3191 /** 3192 * @stable ICU 2.4 3193 */ 3194 public static final int TEH_MARBUTA = 42; 3195 /** 3196 * @stable ICU 2.4 3197 */ 3198 public static final int TETH = 43; 3199 /** 3200 * @stable ICU 2.4 3201 */ 3202 public static final int WAW = 44; 3203 /** 3204 * @stable ICU 2.4 3205 */ 3206 public static final int YEH = 45; 3207 /** 3208 * @stable ICU 2.4 3209 */ 3210 public static final int YEH_BARREE = 46; 3211 /** 3212 * @stable ICU 2.4 3213 */ 3214 public static final int YEH_WITH_TAIL = 47; 3215 /** 3216 * @stable ICU 2.4 3217 */ 3218 public static final int YUDH = 48; 3219 /** 3220 * @stable ICU 2.4 3221 */ 3222 public static final int YUDH_HE = 49; 3223 /** 3224 * @stable ICU 2.4 3225 */ 3226 public static final int ZAIN = 50; 3227 /** 3228 * @stable ICU 2.6 3229 */ 3230 public static final int FE = 51; 3231 /** 3232 * @stable ICU 2.6 3233 */ 3234 public static final int KHAPH = 52; 3235 /** 3236 * @stable ICU 2.6 3237 */ 3238 public static final int ZHAIN = 53; 3239 /** 3240 * @stable ICU 4.0 3241 */ 3242 public static final int BURUSHASKI_YEH_BARREE = 54; 3243 /** @stable ICU 4.4 */ 3244 public static final int FARSI_YEH = 55; 3245 /** @stable ICU 4.4 */ 3246 public static final int NYA = 56; 3247 /** @stable ICU 49 */ 3248 public static final int ROHINGYA_YEH = 57; 3249 3250 /** @stable ICU 54 */ 3251 public static final int MANICHAEAN_ALEPH = 58; 3252 /** @stable ICU 54 */ 3253 public static final int MANICHAEAN_AYIN = 59; 3254 /** @stable ICU 54 */ 3255 public static final int MANICHAEAN_BETH = 60; 3256 /** @stable ICU 54 */ 3257 public static final int MANICHAEAN_DALETH = 61; 3258 /** @stable ICU 54 */ 3259 public static final int MANICHAEAN_DHAMEDH = 62; 3260 /** @stable ICU 54 */ 3261 public static final int MANICHAEAN_FIVE = 63; 3262 /** @stable ICU 54 */ 3263 public static final int MANICHAEAN_GIMEL = 64; 3264 /** @stable ICU 54 */ 3265 public static final int MANICHAEAN_HETH = 65; 3266 /** @stable ICU 54 */ 3267 public static final int MANICHAEAN_HUNDRED = 66; 3268 /** @stable ICU 54 */ 3269 public static final int MANICHAEAN_KAPH = 67; 3270 /** @stable ICU 54 */ 3271 public static final int MANICHAEAN_LAMEDH = 68; 3272 /** @stable ICU 54 */ 3273 public static final int MANICHAEAN_MEM = 69; 3274 /** @stable ICU 54 */ 3275 public static final int MANICHAEAN_NUN = 70; 3276 /** @stable ICU 54 */ 3277 public static final int MANICHAEAN_ONE = 71; 3278 /** @stable ICU 54 */ 3279 public static final int MANICHAEAN_PE = 72; 3280 /** @stable ICU 54 */ 3281 public static final int MANICHAEAN_QOPH = 73; 3282 /** @stable ICU 54 */ 3283 public static final int MANICHAEAN_RESH = 74; 3284 /** @stable ICU 54 */ 3285 public static final int MANICHAEAN_SADHE = 75; 3286 /** @stable ICU 54 */ 3287 public static final int MANICHAEAN_SAMEKH = 76; 3288 /** @stable ICU 54 */ 3289 public static final int MANICHAEAN_TAW = 77; 3290 /** @stable ICU 54 */ 3291 public static final int MANICHAEAN_TEN = 78; 3292 /** @stable ICU 54 */ 3293 public static final int MANICHAEAN_TETH = 79; 3294 /** @stable ICU 54 */ 3295 public static final int MANICHAEAN_THAMEDH = 80; 3296 /** @stable ICU 54 */ 3297 public static final int MANICHAEAN_TWENTY = 81; 3298 /** @stable ICU 54 */ 3299 public static final int MANICHAEAN_WAW = 82; 3300 /** @stable ICU 54 */ 3301 public static final int MANICHAEAN_YODH = 83; 3302 /** @stable ICU 54 */ 3303 public static final int MANICHAEAN_ZAYIN = 84; 3304 /** @stable ICU 54 */ 3305 public static final int STRAIGHT_WAW = 85; 3306 3307 /** @stable ICU 58 */ 3308 public static final int AFRICAN_FEH = 86; 3309 /** @stable ICU 58 */ 3310 public static final int AFRICAN_NOON = 87; 3311 /** @stable ICU 58 */ 3312 public static final int AFRICAN_QAF = 88; 3313 3314 /** @stable ICU 60 */ 3315 public static final int MALAYALAM_BHA = 89; 3316 /** @stable ICU 60 */ 3317 public static final int MALAYALAM_JA = 90; 3318 /** @stable ICU 60 */ 3319 public static final int MALAYALAM_LLA = 91; 3320 /** @stable ICU 60 */ 3321 public static final int MALAYALAM_LLLA = 92; 3322 /** @stable ICU 60 */ 3323 public static final int MALAYALAM_NGA = 93; 3324 /** @stable ICU 60 */ 3325 public static final int MALAYALAM_NNA = 94; 3326 /** @stable ICU 60 */ 3327 public static final int MALAYALAM_NNNA = 95; 3328 /** @stable ICU 60 */ 3329 public static final int MALAYALAM_NYA = 96; 3330 /** @stable ICU 60 */ 3331 public static final int MALAYALAM_RA = 97; 3332 /** @stable ICU 60 */ 3333 public static final int MALAYALAM_SSA = 98; 3334 /** @stable ICU 60 */ 3335 public static final int MALAYALAM_TTA = 99; 3336 3337 /** @stable ICU 62 */ 3338 public static final int HANIFI_ROHINGYA_KINNA_YA = 100; 3339 /** @stable ICU 62 */ 3340 public static final int HANIFI_ROHINGYA_PA = 101; 3341 3342 /** @stable ICU 70 */ 3343 public static final int THIN_YEH = 102; 3344 /** @stable ICU 70 */ 3345 public static final int VERTICAL_TAIL = 103; 3346 3347 /** 3348 * One more than the highest normal JoiningGroup value. 3349 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JoiningGroup). 3350 * 3351 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3352 */ 3353 @Deprecated 3354 public static final int COUNT = 104; 3355 } 3356 3357 /** 3358 * Grapheme Cluster Break constants. 3359 * @see UProperty#GRAPHEME_CLUSTER_BREAK 3360 * @stable ICU 3.4 3361 */ 3362 public static interface GraphemeClusterBreak { 3363 /** 3364 * @stable ICU 3.4 3365 */ 3366 public static final int OTHER = 0; 3367 /** 3368 * @stable ICU 3.4 3369 */ 3370 public static final int CONTROL = 1; 3371 /** 3372 * @stable ICU 3.4 3373 */ 3374 public static final int CR = 2; 3375 /** 3376 * @stable ICU 3.4 3377 */ 3378 public static final int EXTEND = 3; 3379 /** 3380 * @stable ICU 3.4 3381 */ 3382 public static final int L = 4; 3383 /** 3384 * @stable ICU 3.4 3385 */ 3386 public static final int LF = 5; 3387 /** 3388 * @stable ICU 3.4 3389 */ 3390 public static final int LV = 6; 3391 /** 3392 * @stable ICU 3.4 3393 */ 3394 public static final int LVT = 7; 3395 /** 3396 * @stable ICU 3.4 3397 */ 3398 public static final int T = 8; 3399 /** 3400 * @stable ICU 3.4 3401 */ 3402 public static final int V = 9; 3403 /** 3404 * @stable ICU 4.0 3405 */ 3406 public static final int SPACING_MARK = 10; 3407 /** 3408 * @stable ICU 4.0 3409 */ 3410 public static final int PREPEND = 11; 3411 /** @stable ICU 50 */ 3412 public static final int REGIONAL_INDICATOR = 12; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3413 /** @stable ICU 58 */ 3414 public static final int E_BASE = 13; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3415 /** @stable ICU 58 */ 3416 public static final int E_BASE_GAZ = 14; /*[EBG]*/ 3417 /** @stable ICU 58 */ 3418 public static final int E_MODIFIER = 15; /*[EM]*/ 3419 /** @stable ICU 58 */ 3420 public static final int GLUE_AFTER_ZWJ = 16; /*[GAZ]*/ 3421 /** @stable ICU 58 */ 3422 public static final int ZWJ = 17; /*[ZWJ]*/ 3423 3424 /** 3425 * One more than the highest normal GraphemeClusterBreak value. 3426 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK). 3427 * 3428 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3429 */ 3430 @Deprecated 3431 public static final int COUNT = 18; 3432 } 3433 3434 /** 3435 * Word Break constants. 3436 * @see UProperty#WORD_BREAK 3437 * @stable ICU 3.4 3438 */ 3439 public static interface WordBreak { 3440 /** 3441 * @stable ICU 3.8 3442 */ 3443 public static final int OTHER = 0; 3444 /** 3445 * @stable ICU 3.8 3446 */ 3447 public static final int ALETTER = 1; 3448 /** 3449 * @stable ICU 3.8 3450 */ 3451 public static final int FORMAT = 2; 3452 /** 3453 * @stable ICU 3.8 3454 */ 3455 public static final int KATAKANA = 3; 3456 /** 3457 * @stable ICU 3.8 3458 */ 3459 public static final int MIDLETTER = 4; 3460 /** 3461 * @stable ICU 3.8 3462 */ 3463 public static final int MIDNUM = 5; 3464 /** 3465 * @stable ICU 3.8 3466 */ 3467 public static final int NUMERIC = 6; 3468 /** 3469 * @stable ICU 3.8 3470 */ 3471 public static final int EXTENDNUMLET = 7; 3472 /** 3473 * @stable ICU 4.0 3474 */ 3475 public static final int CR = 8; 3476 /** 3477 * @stable ICU 4.0 3478 */ 3479 public static final int EXTEND = 9; 3480 /** 3481 * @stable ICU 4.0 3482 */ 3483 public static final int LF = 10; 3484 /** 3485 * @stable ICU 4.0 3486 */ 3487 public static final int MIDNUMLET = 11; 3488 /** 3489 * @stable ICU 4.0 3490 */ 3491 public static final int NEWLINE = 12; 3492 /** @stable ICU 50 */ 3493 public static final int REGIONAL_INDICATOR = 13; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3494 /** @stable ICU 52 */ 3495 public static final int HEBREW_LETTER = 14; /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ 3496 /** @stable ICU 52 */ 3497 public static final int SINGLE_QUOTE = 15; /*[SQ]*/ 3498 /** @stable ICU 52 */ 3499 public static final int DOUBLE_QUOTE = 16; /*[DQ]*/ 3500 /** @stable ICU 58 */ 3501 public static final int E_BASE = 17; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3502 /** @stable ICU 58 */ 3503 public static final int E_BASE_GAZ = 18; /*[EBG]*/ 3504 /** @stable ICU 58 */ 3505 public static final int E_MODIFIER = 19; /*[EM]*/ 3506 /** @stable ICU 58 */ 3507 public static final int GLUE_AFTER_ZWJ = 20; /*[GAZ]*/ 3508 /** @stable ICU 58 */ 3509 public static final int ZWJ = 21; /*[ZWJ]*/ 3510 /** @stable ICU 62 */ 3511 public static final int WSEGSPACE = 22; /*[WSEGSPACE]*/ 3512 /** 3513 * One more than the highest normal WordBreak value. 3514 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK). 3515 * 3516 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3517 */ 3518 @Deprecated 3519 public static final int COUNT = 23; 3520 } 3521 3522 /** 3523 * Sentence Break constants. 3524 * @see UProperty#SENTENCE_BREAK 3525 * @stable ICU 3.4 3526 */ 3527 public static interface SentenceBreak { 3528 /** 3529 * @stable ICU 3.8 3530 */ 3531 public static final int OTHER = 0; 3532 /** 3533 * @stable ICU 3.8 3534 */ 3535 public static final int ATERM = 1; 3536 /** 3537 * @stable ICU 3.8 3538 */ 3539 public static final int CLOSE = 2; 3540 /** 3541 * @stable ICU 3.8 3542 */ 3543 public static final int FORMAT = 3; 3544 /** 3545 * @stable ICU 3.8 3546 */ 3547 public static final int LOWER = 4; 3548 /** 3549 * @stable ICU 3.8 3550 */ 3551 public static final int NUMERIC = 5; 3552 /** 3553 * @stable ICU 3.8 3554 */ 3555 public static final int OLETTER = 6; 3556 /** 3557 * @stable ICU 3.8 3558 */ 3559 public static final int SEP = 7; 3560 /** 3561 * @stable ICU 3.8 3562 */ 3563 public static final int SP = 8; 3564 /** 3565 * @stable ICU 3.8 3566 */ 3567 public static final int STERM = 9; 3568 /** 3569 * @stable ICU 3.8 3570 */ 3571 public static final int UPPER = 10; 3572 /** 3573 * @stable ICU 4.0 3574 */ 3575 public static final int CR = 11; 3576 /** 3577 * @stable ICU 4.0 3578 */ 3579 public static final int EXTEND = 12; 3580 /** 3581 * @stable ICU 4.0 3582 */ 3583 public static final int LF = 13; 3584 /** 3585 * @stable ICU 4.0 3586 */ 3587 public static final int SCONTINUE = 14; 3588 /** 3589 * One more than the highest normal SentenceBreak value. 3590 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK). 3591 * 3592 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3593 */ 3594 @Deprecated 3595 public static final int COUNT = 15; 3596 } 3597 3598 /** 3599 * Line Break constants. 3600 * @see UProperty#LINE_BREAK 3601 * @stable ICU 2.4 3602 */ 3603 public static interface LineBreak 3604 { 3605 /** 3606 * @stable ICU 2.4 3607 */ 3608 public static final int UNKNOWN = 0; 3609 /** 3610 * @stable ICU 2.4 3611 */ 3612 public static final int AMBIGUOUS = 1; 3613 /** 3614 * @stable ICU 2.4 3615 */ 3616 public static final int ALPHABETIC = 2; 3617 /** 3618 * @stable ICU 2.4 3619 */ 3620 public static final int BREAK_BOTH = 3; 3621 /** 3622 * @stable ICU 2.4 3623 */ 3624 public static final int BREAK_AFTER = 4; 3625 /** 3626 * @stable ICU 2.4 3627 */ 3628 public static final int BREAK_BEFORE = 5; 3629 /** 3630 * @stable ICU 2.4 3631 */ 3632 public static final int MANDATORY_BREAK = 6; 3633 /** 3634 * @stable ICU 2.4 3635 */ 3636 public static final int CONTINGENT_BREAK = 7; 3637 /** 3638 * @stable ICU 2.4 3639 */ 3640 public static final int CLOSE_PUNCTUATION = 8; 3641 /** 3642 * @stable ICU 2.4 3643 */ 3644 public static final int COMBINING_MARK = 9; 3645 /** 3646 * @stable ICU 2.4 3647 */ 3648 public static final int CARRIAGE_RETURN = 10; 3649 /** 3650 * @stable ICU 2.4 3651 */ 3652 public static final int EXCLAMATION = 11; 3653 /** 3654 * @stable ICU 2.4 3655 */ 3656 public static final int GLUE = 12; 3657 /** 3658 * @stable ICU 2.4 3659 */ 3660 public static final int HYPHEN = 13; 3661 /** 3662 * @stable ICU 2.4 3663 */ 3664 public static final int IDEOGRAPHIC = 14; 3665 /** 3666 * @see #INSEPARABLE 3667 * @stable ICU 2.4 3668 */ 3669 public static final int INSEPERABLE = 15; 3670 /** 3671 * Renamed from the misspelled "inseperable" in Unicode 4.0.1. 3672 * @stable ICU 3.0 3673 */ 3674 public static final int INSEPARABLE = 15; 3675 /** 3676 * @stable ICU 2.4 3677 */ 3678 public static final int INFIX_NUMERIC = 16; 3679 /** 3680 * @stable ICU 2.4 3681 */ 3682 public static final int LINE_FEED = 17; 3683 /** 3684 * @stable ICU 2.4 3685 */ 3686 public static final int NONSTARTER = 18; 3687 /** 3688 * @stable ICU 2.4 3689 */ 3690 public static final int NUMERIC = 19; 3691 /** 3692 * @stable ICU 2.4 3693 */ 3694 public static final int OPEN_PUNCTUATION = 20; 3695 /** 3696 * @stable ICU 2.4 3697 */ 3698 public static final int POSTFIX_NUMERIC = 21; 3699 /** 3700 * @stable ICU 2.4 3701 */ 3702 public static final int PREFIX_NUMERIC = 22; 3703 /** 3704 * @stable ICU 2.4 3705 */ 3706 public static final int QUOTATION = 23; 3707 /** 3708 * @stable ICU 2.4 3709 */ 3710 public static final int COMPLEX_CONTEXT = 24; 3711 /** 3712 * @stable ICU 2.4 3713 */ 3714 public static final int SURROGATE = 25; 3715 /** 3716 * @stable ICU 2.4 3717 */ 3718 public static final int SPACE = 26; 3719 /** 3720 * @stable ICU 2.4 3721 */ 3722 public static final int BREAK_SYMBOLS = 27; 3723 /** 3724 * @stable ICU 2.4 3725 */ 3726 public static final int ZWSPACE = 28; 3727 /** 3728 * @stable ICU 2.6 3729 */ 3730 public static final int NEXT_LINE = 29; /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ 3731 /** 3732 * @stable ICU 2.6 3733 */ 3734 public static final int WORD_JOINER = 30; /*[WJ]*/ 3735 /** 3736 * @stable ICU 3.4 3737 */ 3738 public static final int H2 = 31; /* from here on: new in Unicode 4.1/ICU 3.4 */ 3739 /** 3740 * @stable ICU 3.4 3741 */ 3742 public static final int H3 = 32; 3743 /** 3744 * @stable ICU 3.4 3745 */ 3746 public static final int JL = 33; 3747 /** 3748 * @stable ICU 3.4 3749 */ 3750 public static final int JT = 34; 3751 /** 3752 * @stable ICU 3.4 3753 */ 3754 public static final int JV = 35; 3755 /** @stable ICU 4.4 */ 3756 public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ 3757 /** @stable ICU 49 */ 3758 public static final int CONDITIONAL_JAPANESE_STARTER = 37; /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ 3759 /** @stable ICU 49 */ 3760 public static final int HEBREW_LETTER = 38; /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ 3761 /** @stable ICU 50 */ 3762 public static final int REGIONAL_INDICATOR = 39; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3763 /** @stable ICU 58 */ 3764 public static final int E_BASE = 40; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3765 /** @stable ICU 58 */ 3766 public static final int E_MODIFIER = 41; /*[EM]*/ 3767 /** @stable ICU 58 */ 3768 public static final int ZWJ = 42; /*[ZWJ]*/ 3769 /** 3770 * One more than the highest normal LineBreak value. 3771 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK). 3772 * 3773 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3774 */ 3775 @Deprecated 3776 public static final int COUNT = 43; 3777 } 3778 3779 /** 3780 * Numeric Type constants. 3781 * @see UProperty#NUMERIC_TYPE 3782 * @stable ICU 2.4 3783 */ 3784 public static interface NumericType 3785 { 3786 /** 3787 * @stable ICU 2.4 3788 */ 3789 public static final int NONE = 0; 3790 /** 3791 * @stable ICU 2.4 3792 */ 3793 public static final int DECIMAL = 1; 3794 /** 3795 * @stable ICU 2.4 3796 */ 3797 public static final int DIGIT = 2; 3798 /** 3799 * @stable ICU 2.4 3800 */ 3801 public static final int NUMERIC = 3; 3802 /** 3803 * One more than the highest normal NumericType value. 3804 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE). 3805 * 3806 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3807 */ 3808 @Deprecated 3809 public static final int COUNT = 4; 3810 } 3811 3812 /** 3813 * Hangul Syllable Type constants. 3814 * 3815 * @see UProperty#HANGUL_SYLLABLE_TYPE 3816 * @stable ICU 2.6 3817 */ 3818 public static interface HangulSyllableType 3819 { 3820 /** 3821 * @stable ICU 2.6 3822 */ 3823 public static final int NOT_APPLICABLE = 0; /*[NA]*/ /*See note !!*/ 3824 /** 3825 * @stable ICU 2.6 3826 */ 3827 public static final int LEADING_JAMO = 1; /*[L]*/ 3828 /** 3829 * @stable ICU 2.6 3830 */ 3831 public static final int VOWEL_JAMO = 2; /*[V]*/ 3832 /** 3833 * @stable ICU 2.6 3834 */ 3835 public static final int TRAILING_JAMO = 3; /*[T]*/ 3836 /** 3837 * @stable ICU 2.6 3838 */ 3839 public static final int LV_SYLLABLE = 4; /*[LV]*/ 3840 /** 3841 * @stable ICU 2.6 3842 */ 3843 public static final int LVT_SYLLABLE = 5; /*[LVT]*/ 3844 /** 3845 * One more than the highest normal HangulSyllableType value. 3846 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE). 3847 * 3848 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3849 */ 3850 @Deprecated 3851 public static final int COUNT = 6; 3852 } 3853 3854 /** 3855 * Bidi Paired Bracket Type constants. 3856 * 3857 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 3858 * @stable ICU 52 3859 */ 3860 public static interface BidiPairedBracketType { 3861 /** 3862 * Not a paired bracket. 3863 * @stable ICU 52 3864 */ 3865 public static final int NONE = 0; 3866 /** 3867 * Open paired bracket. 3868 * @stable ICU 52 3869 */ 3870 public static final int OPEN = 1; 3871 /** 3872 * Close paired bracket. 3873 * @stable ICU 52 3874 */ 3875 public static final int CLOSE = 2; 3876 /** 3877 * One more than the highest normal BidiPairedBracketType value. 3878 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE). 3879 * 3880 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3881 */ 3882 @Deprecated 3883 public static final int COUNT = 3; 3884 } 3885 3886 /** 3887 * Indic Positional Category constants. 3888 * 3889 * @see UProperty#INDIC_POSITIONAL_CATEGORY 3890 * @stable ICU 63 3891 */ 3892 public static interface IndicPositionalCategory { 3893 /** @stable ICU 63 */ 3894 public static final int NA = 0; 3895 /** @stable ICU 63 */ 3896 public static final int BOTTOM = 1; 3897 /** @stable ICU 63 */ 3898 public static final int BOTTOM_AND_LEFT = 2; 3899 /** @stable ICU 63 */ 3900 public static final int BOTTOM_AND_RIGHT = 3; 3901 /** @stable ICU 63 */ 3902 public static final int LEFT = 4; 3903 /** @stable ICU 63 */ 3904 public static final int LEFT_AND_RIGHT = 5; 3905 /** @stable ICU 63 */ 3906 public static final int OVERSTRUCK = 6; 3907 /** @stable ICU 63 */ 3908 public static final int RIGHT = 7; 3909 /** @stable ICU 63 */ 3910 public static final int TOP = 8; 3911 /** @stable ICU 63 */ 3912 public static final int TOP_AND_BOTTOM = 9; 3913 /** @stable ICU 63 */ 3914 public static final int TOP_AND_BOTTOM_AND_RIGHT = 10; 3915 /** @stable ICU 63 */ 3916 public static final int TOP_AND_LEFT = 11; 3917 /** @stable ICU 63 */ 3918 public static final int TOP_AND_LEFT_AND_RIGHT = 12; 3919 /** @stable ICU 63 */ 3920 public static final int TOP_AND_RIGHT = 13; 3921 /** @stable ICU 63 */ 3922 public static final int VISUAL_ORDER_LEFT = 14; 3923 /** @stable ICU 66 */ 3924 public static final int TOP_AND_BOTTOM_AND_LEFT = 15; 3925 } 3926 3927 /** 3928 * Indic Syllabic Category constants. 3929 * 3930 * @see UProperty#INDIC_SYLLABIC_CATEGORY 3931 * @stable ICU 63 3932 */ 3933 public static interface IndicSyllabicCategory { 3934 /** @stable ICU 63 */ 3935 public static final int OTHER = 0; 3936 /** @stable ICU 63 */ 3937 public static final int AVAGRAHA = 1; 3938 /** @stable ICU 63 */ 3939 public static final int BINDU = 2; 3940 /** @stable ICU 63 */ 3941 public static final int BRAHMI_JOINING_NUMBER = 3; 3942 /** @stable ICU 63 */ 3943 public static final int CANTILLATION_MARK = 4; 3944 /** @stable ICU 63 */ 3945 public static final int CONSONANT = 5; 3946 /** @stable ICU 63 */ 3947 public static final int CONSONANT_DEAD = 6; 3948 /** @stable ICU 63 */ 3949 public static final int CONSONANT_FINAL = 7; 3950 /** @stable ICU 63 */ 3951 public static final int CONSONANT_HEAD_LETTER = 8; 3952 /** @stable ICU 63 */ 3953 public static final int CONSONANT_INITIAL_POSTFIXED = 9; 3954 /** @stable ICU 63 */ 3955 public static final int CONSONANT_KILLER = 10; 3956 /** @stable ICU 63 */ 3957 public static final int CONSONANT_MEDIAL = 11; 3958 /** @stable ICU 63 */ 3959 public static final int CONSONANT_PLACEHOLDER = 12; 3960 /** @stable ICU 63 */ 3961 public static final int CONSONANT_PRECEDING_REPHA = 13; 3962 /** @stable ICU 63 */ 3963 public static final int CONSONANT_PREFIXED = 14; 3964 /** @stable ICU 63 */ 3965 public static final int CONSONANT_SUBJOINED = 15; 3966 /** @stable ICU 63 */ 3967 public static final int CONSONANT_SUCCEEDING_REPHA = 16; 3968 /** @stable ICU 63 */ 3969 public static final int CONSONANT_WITH_STACKER = 17; 3970 /** @stable ICU 63 */ 3971 public static final int GEMINATION_MARK = 18; 3972 /** @stable ICU 63 */ 3973 public static final int INVISIBLE_STACKER = 19; 3974 /** @stable ICU 63 */ 3975 public static final int JOINER = 20; 3976 /** @stable ICU 63 */ 3977 public static final int MODIFYING_LETTER = 21; 3978 /** @stable ICU 63 */ 3979 public static final int NON_JOINER = 22; 3980 /** @stable ICU 63 */ 3981 public static final int NUKTA = 23; 3982 /** @stable ICU 63 */ 3983 public static final int NUMBER = 24; 3984 /** @stable ICU 63 */ 3985 public static final int NUMBER_JOINER = 25; 3986 /** @stable ICU 63 */ 3987 public static final int PURE_KILLER = 26; 3988 /** @stable ICU 63 */ 3989 public static final int REGISTER_SHIFTER = 27; 3990 /** @stable ICU 63 */ 3991 public static final int SYLLABLE_MODIFIER = 28; 3992 /** @stable ICU 63 */ 3993 public static final int TONE_LETTER = 29; 3994 /** @stable ICU 63 */ 3995 public static final int TONE_MARK = 30; 3996 /** @stable ICU 63 */ 3997 public static final int VIRAMA = 31; 3998 /** @stable ICU 63 */ 3999 public static final int VISARGA = 32; 4000 /** @stable ICU 63 */ 4001 public static final int VOWEL = 33; 4002 /** @stable ICU 63 */ 4003 public static final int VOWEL_DEPENDENT = 34; 4004 /** @stable ICU 63 */ 4005 public static final int VOWEL_INDEPENDENT = 35; 4006 } 4007 4008 /** 4009 * Vertical Orientation constants. 4010 * 4011 * @see UProperty#VERTICAL_ORIENTATION 4012 * @stable ICU 63 4013 */ 4014 public static interface VerticalOrientation { 4015 /** @stable ICU 63 */ 4016 public static final int ROTATED = 0; 4017 /** @stable ICU 63 */ 4018 public static final int TRANSFORMED_ROTATED = 1; 4019 /** @stable ICU 63 */ 4020 public static final int TRANSFORMED_UPRIGHT = 2; 4021 /** @stable ICU 63 */ 4022 public static final int UPRIGHT = 3; 4023 } 4024 4025 // public data members ----------------------------------------------- 4026 4027 /** 4028 * The lowest Unicode code point value, constant 0. 4029 * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}. 4030 * 4031 * @stable ICU 2.1 4032 */ 4033 public static final int MIN_VALUE = Character.MIN_CODE_POINT; 4034 4035 /** 4036 * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits). 4037 * Same as {@link Character#MAX_CODE_POINT}. 4038 * 4039 * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE} 4040 * which is still a char with the value U+FFFF. 4041 * 4042 * @stable ICU 2.1 4043 */ 4044 public static final int MAX_VALUE = Character.MAX_CODE_POINT; 4045 4046 /** 4047 * The minimum value for Supplementary code points, constant U+10000. 4048 * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 4049 * 4050 * @stable ICU 2.1 4051 */ 4052 public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT; 4053 4054 /** 4055 * Unicode value used when translating into Unicode encoding form and there 4056 * is no existing character. 4057 * @stable ICU 2.1 4058 */ 4059 public static final int REPLACEMENT_CHAR = '\uFFFD'; 4060 4061 /** 4062 * Special value that is returned by getUnicodeNumericValue(int) when no 4063 * numeric value is defined for a code point. 4064 * @stable ICU 2.4 4065 * @see #getUnicodeNumericValue 4066 */ 4067 public static final double NO_NUMERIC_VALUE = -123456789; 4068 4069 /** 4070 * Compatibility constant for Java Character's MIN_RADIX. 4071 * @stable ICU 3.4 4072 */ 4073 public static final int MIN_RADIX = java.lang.Character.MIN_RADIX; 4074 4075 /** 4076 * Compatibility constant for Java Character's MAX_RADIX. 4077 * @stable ICU 3.4 4078 */ 4079 public static final int MAX_RADIX = java.lang.Character.MAX_RADIX; 4080 4081 /** 4082 * Do not lowercase non-initial parts of words when titlecasing. 4083 * Option bit for titlecasing APIs that take an options bit set. 4084 * 4085 * By default, titlecasing will titlecase the first cased character 4086 * of a word and lowercase all other characters. 4087 * With this option, the other characters will not be modified. 4088 * 4089 * @see #toTitleCase 4090 * @stable ICU 3.8 4091 */ 4092 public static final int TITLECASE_NO_LOWERCASE = 0x100; 4093 4094 /** 4095 * Do not adjust the titlecasing indexes from BreakIterator::next() indexes; 4096 * titlecase exactly the characters at breaks from the iterator. 4097 * Option bit for titlecasing APIs that take an options bit set. 4098 * 4099 * By default, titlecasing will take each break iterator index, 4100 * adjust it by looking for the next cased character, and titlecase that one. 4101 * Other characters are lowercased. 4102 * 4103 * This follows Unicode 4 & 5 section 3.13 Default Case Operations: 4104 * 4105 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 4106 * #29, "Text Boundaries." Between each pair of word boundaries, find the first 4107 * cased character F. If F exists, map F to default_title(F); then map each 4108 * subsequent character C to default_lower(C). 4109 * 4110 * @see #toTitleCase 4111 * @see #TITLECASE_NO_LOWERCASE 4112 * @stable ICU 3.8 4113 */ 4114 public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200; 4115 4116 // public methods ---------------------------------------------------- 4117 4118 /** 4119 * Returnss the numeric value of a decimal digit code point. 4120 * <br>This method observes the semantics of 4121 * <code>java.lang.Character.digit()</code>. Note that this 4122 * will return positive values for code points for which isDigit 4123 * returns false, just like java.lang.Character. 4124 * <br><em>Semantic Change:</em> In release 1.3.1 and 4125 * prior, this did not treat the European letters as having a 4126 * digit value, and also treated numeric letters and other numbers as 4127 * digits. 4128 * This has been changed to conform to the java semantics. 4129 * <br>A code point is a valid digit if and only if: 4130 * <ul> 4131 * <li>ch is a decimal digit or one of the european letters, and 4132 * <li>the value of ch is less than the specified radix. 4133 * </ul> 4134 * @param ch the code point to query 4135 * @param radix the radix 4136 * @return the numeric value represented by the code point in the 4137 * specified radix, or -1 if the code point is not a decimal digit 4138 * or if its value is too large for the radix 4139 * @stable ICU 2.1 4140 */ digit(int ch, int radix)4141 public static int digit(int ch, int radix) 4142 { 4143 if (2 <= radix && radix <= 36) { 4144 int value = digit(ch); 4145 if (value < 0) { 4146 // ch is not a decimal digit, try latin letters 4147 value = UCharacterProperty.getEuropeanDigit(ch); 4148 } 4149 return (value < radix) ? value : -1; 4150 } else { 4151 return -1; // invalid radix 4152 } 4153 } 4154 4155 /** 4156 * Returnss the numeric value of a decimal digit code point. 4157 * <br>This is a convenience overload of <code>digit(int, int)</code> 4158 * that provides a decimal radix. 4159 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this 4160 * treated numeric letters and other numbers as digits. This has 4161 * been changed to conform to the java semantics. 4162 * @param ch the code point to query 4163 * @return the numeric value represented by the code point, 4164 * or -1 if the code point is not a decimal digit or if its 4165 * value is too large for a decimal radix 4166 * @stable ICU 2.1 4167 */ digit(int ch)4168 public static int digit(int ch) 4169 { 4170 return UCharacterProperty.INSTANCE.digit(ch); 4171 } 4172 4173 /** 4174 * Returns the numeric value of the code point as a nonnegative 4175 * integer. 4176 * <br>If the code point does not have a numeric value, then -1 is returned. 4177 * <br> 4178 * If the code point has a numeric value that cannot be represented as a 4179 * nonnegative integer (for example, a fractional value), then -2 is 4180 * returned. 4181 * @param ch the code point to query 4182 * @return the numeric value of the code point, or -1 if it has no numeric 4183 * value, or -2 if it has a numeric value that cannot be represented as a 4184 * nonnegative integer 4185 * @stable ICU 2.1 4186 */ getNumericValue(int ch)4187 public static int getNumericValue(int ch) 4188 { 4189 return UCharacterProperty.INSTANCE.getNumericValue(ch); 4190 } 4191 4192 /** 4193 * {@icu} Returns the numeric value for a Unicode code point as defined in the 4194 * Unicode Character Database. 4195 * <p>A "double" return type is necessary because some numeric values are 4196 * fractions, negative, or too large for int. 4197 * <p>For characters without any numeric values in the Unicode Character 4198 * Database, this function will return NO_NUMERIC_VALUE. 4199 * Note: This is different from the Unicode Standard which specifies NaN as the default value. 4200 * <p><em>API Change:</em> In release 2.2 and prior, this API has a 4201 * return type int and returns -1 when the argument ch does not have a 4202 * corresponding numeric value. This has been changed to synch with ICU4C 4203 * 4204 * This corresponds to the ICU4C function u_getNumericValue. 4205 * @param ch Code point to get the numeric value for. 4206 * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined. 4207 * @stable ICU 2.4 4208 */ getUnicodeNumericValue(int ch)4209 public static double getUnicodeNumericValue(int ch) 4210 { 4211 return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch); 4212 } 4213 4214 /** 4215 * Compatibility override of Java deprecated method. This 4216 * method will always remain deprecated. 4217 * Same as java.lang.Character.isSpace(). 4218 * @param ch the code point 4219 * @return true if the code point is a space character as 4220 * defined by java.lang.Character.isSpace. 4221 * @deprecated ICU 3.4 (Java) 4222 */ 4223 @Deprecated isSpace(int ch)4224 public static boolean isSpace(int ch) { 4225 return ch <= 0x20 && 4226 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d); 4227 } 4228 4229 /** 4230 * Returns a value indicating a code point's Unicode category. 4231 * Up-to-date Unicode implementation of java.lang.Character.getType() 4232 * except for the above mentioned code points that had their category 4233 * changed.<br> 4234 * Return results are constants from the interface 4235 * <a href=UCharacterCategory.html>UCharacterCategory</a><br> 4236 * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with 4237 * those returned by java.lang.Character.getType. UCharacterCategory values 4238 * match the ones used in ICU4C, while java.lang.Character type 4239 * values, though similar, skip the value 17. 4240 * @param ch code point whose type is to be determined 4241 * @return category which is a value of UCharacterCategory 4242 * @stable ICU 2.1 4243 */ getType(int ch)4244 public static int getType(int ch) 4245 { 4246 return UCharacterProperty.INSTANCE.getType(ch); 4247 } 4248 4249 /** 4250 * Determines if a code point has a defined meaning in the up-to-date 4251 * Unicode standard. 4252 * E.g. supplementary code points though allocated space are not defined in 4253 * Unicode yet.<br> 4254 * Up-to-date Unicode implementation of java.lang.Character.isDefined() 4255 * @param ch code point to be determined if it is defined in the most 4256 * current version of Unicode 4257 * @return true if this code point is defined in unicode 4258 * @stable ICU 2.1 4259 */ isDefined(int ch)4260 public static boolean isDefined(int ch) 4261 { 4262 return getType(ch) != 0; 4263 } 4264 4265 /** 4266 * Determines if a code point is a Java digit. 4267 * <br>This method observes the semantics of 4268 * <code>java.lang.Character.isDigit()</code>. It returns true for decimal 4269 * digits only. 4270 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated 4271 * numeric letters and other numbers as digits. 4272 * This has been changed to conform to the java semantics. 4273 * @param ch code point to query 4274 * @return true if this code point is a digit 4275 * @stable ICU 2.1 4276 */ isDigit(int ch)4277 public static boolean isDigit(int ch) 4278 { 4279 return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER; 4280 } 4281 4282 /** 4283 * Determines if the specified code point is an ISO control character. 4284 * A code point is considered to be an ISO control character if it is in 4285 * the range \u0000 through \u001F or in the range \u007F through 4286 * \u009F.<br> 4287 * Up-to-date Unicode implementation of java.lang.Character.isISOControl() 4288 * @param ch code point to determine if it is an ISO control character 4289 * @return true if code point is a ISO control character 4290 * @stable ICU 2.1 4291 */ isISOControl(int ch)4292 public static boolean isISOControl(int ch) 4293 { 4294 return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ && 4295 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_)); 4296 } 4297 4298 /** 4299 * Determines if the specified code point is a letter. 4300 * Up-to-date Unicode implementation of java.lang.Character.isLetter() 4301 * @param ch code point to determine if it is a letter 4302 * @return true if code point is a letter 4303 * @stable ICU 2.1 4304 */ isLetter(int ch)4305 public static boolean isLetter(int ch) 4306 { 4307 // if props == 0, it will just fall through and return false 4308 return ((1 << getType(ch)) 4309 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4310 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4311 | (1 << UCharacterCategory.TITLECASE_LETTER) 4312 | (1 << UCharacterCategory.MODIFIER_LETTER) 4313 | (1 << UCharacterCategory.OTHER_LETTER))) != 0; 4314 } 4315 4316 /** 4317 * Determines if the specified code point is a letter or digit. 4318 * {@icunote} This method, unlike java.lang.Character does not regard the ascii 4319 * characters 'A' - 'Z' and 'a' - 'z' as digits. 4320 * @param ch code point to determine if it is a letter or a digit 4321 * @return true if code point is a letter or a digit 4322 * @stable ICU 2.1 4323 */ isLetterOrDigit(int ch)4324 public static boolean isLetterOrDigit(int ch) 4325 { 4326 return ((1 << getType(ch)) 4327 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4328 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4329 | (1 << UCharacterCategory.TITLECASE_LETTER) 4330 | (1 << UCharacterCategory.MODIFIER_LETTER) 4331 | (1 << UCharacterCategory.OTHER_LETTER) 4332 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0; 4333 } 4334 4335 /** 4336 * Compatibility override of Java deprecated method. This 4337 * method will always remain deprecated. Delegates to 4338 * java.lang.Character.isJavaIdentifierStart. 4339 * @param cp the code point 4340 * @return true if the code point can start a java identifier. 4341 * @deprecated ICU 3.4 (Java) 4342 */ 4343 @Deprecated isJavaLetter(int cp)4344 public static boolean isJavaLetter(int cp) { 4345 return isJavaIdentifierStart(cp); 4346 } 4347 4348 /** 4349 * Compatibility override of Java deprecated method. This 4350 * method will always remain deprecated. Delegates to 4351 * java.lang.Character.isJavaIdentifierPart. 4352 * @param cp the code point 4353 * @return true if the code point can continue a java identifier. 4354 * @deprecated ICU 3.4 (Java) 4355 */ 4356 @Deprecated isJavaLetterOrDigit(int cp)4357 public static boolean isJavaLetterOrDigit(int cp) { 4358 return isJavaIdentifierPart(cp); 4359 } 4360 4361 /** 4362 * Compatibility override of Java method, delegates to 4363 * java.lang.Character.isJavaIdentifierStart. 4364 * @param cp the code point 4365 * @return true if the code point can start a java identifier. 4366 * @stable ICU 3.4 4367 */ isJavaIdentifierStart(int cp)4368 public static boolean isJavaIdentifierStart(int cp) { 4369 // note, downcast to char for jdk 1.4 compatibility 4370 return java.lang.Character.isJavaIdentifierStart((char)cp); 4371 } 4372 4373 /** 4374 * Compatibility override of Java method, delegates to 4375 * java.lang.Character.isJavaIdentifierPart. 4376 * @param cp the code point 4377 * @return true if the code point can continue a java identifier. 4378 * @stable ICU 3.4 4379 */ isJavaIdentifierPart(int cp)4380 public static boolean isJavaIdentifierPart(int cp) { 4381 // note, downcast to char for jdk 1.4 compatibility 4382 return java.lang.Character.isJavaIdentifierPart((char)cp); 4383 } 4384 4385 /** 4386 * Determines if the specified code point is a lowercase character. 4387 * UnicodeData only contains case mappings for code points where they are 4388 * one-to-one mappings; it also omits information about context-sensitive 4389 * case mappings.<br> For more information about Unicode case mapping 4390 * please refer to the 4391 * <a href=https://www.unicode.org/reports/tr21/>Technical report 4392 * #21</a>.<br> 4393 * Up-to-date Unicode implementation of java.lang.Character.isLowerCase() 4394 * @param ch code point to determine if it is in lowercase 4395 * @return true if code point is a lowercase character 4396 * @stable ICU 2.1 4397 */ isLowerCase(int ch)4398 public static boolean isLowerCase(int ch) 4399 { 4400 // if props == 0, it will just fall through and return false 4401 return getType(ch) == UCharacterCategory.LOWERCASE_LETTER; 4402 } 4403 4404 /** 4405 * Determines if the specified code point is a white space character. 4406 * A code point is considered to be an whitespace character if and only 4407 * if it satisfies one of the following criteria: 4408 * <ul> 4409 * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not 4410 * also a non-breaking space (\u00A0 or \u2007 or \u202F). 4411 * <li> It is \u0009, HORIZONTAL TABULATION. 4412 * <li> It is \u000A, LINE FEED. 4413 * <li> It is \u000B, VERTICAL TABULATION. 4414 * <li> It is \u000C, FORM FEED. 4415 * <li> It is \u000D, CARRIAGE RETURN. 4416 * <li> It is \u001C, FILE SEPARATOR. 4417 * <li> It is \u001D, GROUP SEPARATOR. 4418 * <li> It is \u001E, RECORD SEPARATOR. 4419 * <li> It is \u001F, UNIT SEPARATOR. 4420 * </ul> 4421 * 4422 * This API tries to sync with the semantics of Java's 4423 * java.lang.Character.isWhitespace(), but it may not return 4424 * the exact same results because of the Unicode version 4425 * difference. 4426 * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) 4427 * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. 4428 * See http://www.unicode.org/versions/Unicode4.0.1/ 4429 * @param ch code point to determine if it is a white space 4430 * @return true if the specified code point is a white space character 4431 * @stable ICU 2.1 4432 */ isWhitespace(int ch)4433 public static boolean isWhitespace(int ch) 4434 { 4435 // exclude no-break spaces 4436 // if props == 0, it will just fall through and return false 4437 return ((1 << getType(ch)) & 4438 ((1 << UCharacterCategory.SPACE_SEPARATOR) 4439 | (1 << UCharacterCategory.LINE_SEPARATOR) 4440 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0 4441 && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_) 4442 // TAB VT LF FF CR FS GS RS US NL are all control characters 4443 // that are white spaces. 4444 || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f); 4445 } 4446 4447 /** 4448 * Determines if the specified code point is a Unicode specified space 4449 * character, i.e. if code point is in the category Zs, Zl and Zp. 4450 * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar(). 4451 * @param ch code point to determine if it is a space 4452 * @return true if the specified code point is a space character 4453 * @stable ICU 2.1 4454 */ isSpaceChar(int ch)4455 public static boolean isSpaceChar(int ch) 4456 { 4457 // if props == 0, it will just fall through and return false 4458 return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR) 4459 | (1 << UCharacterCategory.LINE_SEPARATOR) 4460 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) 4461 != 0; 4462 } 4463 4464 /** 4465 * Determines if the specified code point is a titlecase character. 4466 * UnicodeData only contains case mappings for code points where they are 4467 * one-to-one mappings; it also omits information about context-sensitive 4468 * case mappings.<br> 4469 * For more information about Unicode case mapping please refer to the 4470 * <a href=https://www.unicode.org/reports/tr21/> 4471 * Technical report #21</a>.<br> 4472 * Up-to-date Unicode implementation of java.lang.Character.isTitleCase(). 4473 * @param ch code point to determine if it is in title case 4474 * @return true if the specified code point is a titlecase character 4475 * @stable ICU 2.1 4476 */ isTitleCase(int ch)4477 public static boolean isTitleCase(int ch) 4478 { 4479 // if props == 0, it will just fall through and return false 4480 return getType(ch) == UCharacterCategory.TITLECASE_LETTER; 4481 } 4482 4483 /** 4484 * Determines if the specified code point may be any part of a Unicode 4485 * identifier other than the starting character. 4486 * A code point may be part of a Unicode identifier if and only if it is 4487 * one of the following: 4488 * <ul> 4489 * <li> Lu Uppercase letter 4490 * <li> Ll Lowercase letter 4491 * <li> Lt Titlecase letter 4492 * <li> Lm Modifier letter 4493 * <li> Lo Other letter 4494 * <li> Nl Letter number 4495 * <li> Pc Connecting punctuation character 4496 * <li> Nd decimal number 4497 * <li> Mc Spacing combining mark 4498 * <li> Mn Non-spacing mark 4499 * <li> Cf formatting code 4500 * </ul> 4501 * Up-to-date Unicode implementation of 4502 * java.lang.Character.isUnicodeIdentifierPart().<br> 4503 * See <a href=https://www.unicode.org/reports/tr8/>UTR #8</a>. 4504 * @param ch code point to determine if is can be part of a Unicode 4505 * identifier 4506 * @return true if code point is any character belonging a unicode 4507 * identifier suffix after the first character 4508 * @stable ICU 2.1 4509 */ isUnicodeIdentifierPart(int ch)4510 public static boolean isUnicodeIdentifierPart(int ch) 4511 { 4512 // if props == 0, it will just fall through and return false 4513 // cat == format 4514 return ((1 << getType(ch)) 4515 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4516 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4517 | (1 << UCharacterCategory.TITLECASE_LETTER) 4518 | (1 << UCharacterCategory.MODIFIER_LETTER) 4519 | (1 << UCharacterCategory.OTHER_LETTER) 4520 | (1 << UCharacterCategory.LETTER_NUMBER) 4521 | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION) 4522 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) 4523 | (1 << UCharacterCategory.COMBINING_SPACING_MARK) 4524 | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0 4525 || isIdentifierIgnorable(ch); 4526 } 4527 4528 /** 4529 * Determines if the specified code point is permissible as the first 4530 * character in a Unicode identifier. 4531 * A code point may start a Unicode identifier if it is of type either 4532 * <ul> 4533 * <li> Lu Uppercase letter 4534 * <li> Ll Lowercase letter 4535 * <li> Lt Titlecase letter 4536 * <li> Lm Modifier letter 4537 * <li> Lo Other letter 4538 * <li> Nl Letter number 4539 * </ul> 4540 * Up-to-date Unicode implementation of 4541 * java.lang.Character.isUnicodeIdentifierStart().<br> 4542 * See <a href=https://www.unicode.org/reports/tr8/>UTR #8</a>. 4543 * @param ch code point to determine if it can start a Unicode identifier 4544 * @return true if code point is the first character belonging a unicode 4545 * identifier 4546 * @stable ICU 2.1 4547 */ isUnicodeIdentifierStart(int ch)4548 public static boolean isUnicodeIdentifierStart(int ch) 4549 { 4550 /*int cat = getType(ch);*/ 4551 // if props == 0, it will just fall through and return false 4552 return ((1 << getType(ch)) 4553 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4554 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4555 | (1 << UCharacterCategory.TITLECASE_LETTER) 4556 | (1 << UCharacterCategory.MODIFIER_LETTER) 4557 | (1 << UCharacterCategory.OTHER_LETTER) 4558 | (1 << UCharacterCategory.LETTER_NUMBER))) != 0; 4559 } 4560 4561 /** 4562 * Determines if the specified code point should be regarded as an 4563 * ignorable character in a Java identifier. 4564 * A character is Java-identifier-ignorable if it has the general category 4565 * Cf Formatting Control, or it is a non-Java-whitespace ISO control: 4566 * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br> 4567 * Up-to-date Unicode implementation of 4568 * java.lang.Character.isIdentifierIgnorable().<br> 4569 * See <a href=https://www.unicode.org/reports/tr8/>UTR #8</a>. 4570 * <p>Note that Unicode just recommends to ignore Cf (format controls). 4571 * @param ch code point to be determined if it can be ignored in a Unicode 4572 * identifier. 4573 * @return true if the code point is ignorable 4574 * @stable ICU 2.1 4575 */ isIdentifierIgnorable(int ch)4576 public static boolean isIdentifierIgnorable(int ch) 4577 { 4578 // see java.lang.Character.isIdentifierIgnorable() on range of 4579 // ignorable characters. 4580 if (ch <= 0x9f) { 4581 return isISOControl(ch) 4582 && !((ch >= 0x9 && ch <= 0xd) 4583 || (ch >= 0x1c && ch <= 0x1f)); 4584 } 4585 return getType(ch) == UCharacterCategory.FORMAT; 4586 } 4587 4588 /** 4589 * Determines if the specified code point is an uppercase character. 4590 * UnicodeData only contains case mappings for code point where they are 4591 * one-to-one mappings; it also omits information about context-sensitive 4592 * case mappings.<br> 4593 * For language specific case conversion behavior, use 4594 * toUpperCase(locale, str). <br> 4595 * For example, the case conversion for dot-less i and dotted I in Turkish, 4596 * or for final sigma in Greek. 4597 * For more information about Unicode case mapping please refer to the 4598 * <a href=https://www.unicode.org/reports/tr21/> 4599 * Technical report #21</a>.<br> 4600 * Up-to-date Unicode implementation of java.lang.Character.isUpperCase(). 4601 * @param ch code point to determine if it is in uppercase 4602 * @return true if the code point is an uppercase character 4603 * @stable ICU 2.1 4604 */ isUpperCase(int ch)4605 public static boolean isUpperCase(int ch) 4606 { 4607 // if props == 0, it will just fall through and return false 4608 return getType(ch) == UCharacterCategory.UPPERCASE_LETTER; 4609 } 4610 4611 /** 4612 * The given code point is mapped to its lowercase equivalent; if the code 4613 * point has no lowercase equivalent, the code point itself is returned. 4614 * Up-to-date Unicode implementation of java.lang.Character.toLowerCase() 4615 * 4616 * <p>This function only returns the simple, single-code point case mapping. 4617 * Full case mappings should be used whenever possible because they produce 4618 * better results by working on whole strings. 4619 * They take into account the string context and the language and can map 4620 * to a result string with a different length as appropriate. 4621 * Full case mappings are applied by the case mapping functions 4622 * that take String parameters rather than code points (int). 4623 * See also the User Guide chapter on C/POSIX migration: 4624 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 4625 * 4626 * @param ch code point whose lowercase equivalent is to be retrieved 4627 * @return the lowercase equivalent code point 4628 * @stable ICU 2.1 4629 */ toLowerCase(int ch)4630 public static int toLowerCase(int ch) { 4631 return UCaseProps.INSTANCE.tolower(ch); 4632 } 4633 4634 /** 4635 * Converts argument code point and returns a String object representing 4636 * the code point's value in UTF-16 format. 4637 * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones. 4638 * 4639 * <p>Up-to-date Unicode implementation of java.lang.Character.toString(). 4640 * 4641 * @param ch code point 4642 * @return string representation of the code point, null if code point is not 4643 * defined in unicode 4644 * @stable ICU 2.1 4645 */ toString(int ch)4646 public static String toString(int ch) 4647 { 4648 if (ch < MIN_VALUE || ch > MAX_VALUE) { 4649 return null; 4650 } 4651 4652 if (ch < SUPPLEMENTARY_MIN_VALUE) { 4653 return String.valueOf((char)ch); 4654 } 4655 4656 return new String(Character.toChars(ch)); 4657 } 4658 4659 /** 4660 * Converts the code point argument to titlecase. 4661 * If no titlecase is available, the uppercase is returned. If no uppercase 4662 * is available, the code point itself is returned. 4663 * Up-to-date Unicode implementation of java.lang.Character.toTitleCase() 4664 * 4665 * <p>This function only returns the simple, single-code point case mapping. 4666 * Full case mappings should be used whenever possible because they produce 4667 * better results by working on whole strings. 4668 * They take into account the string context and the language and can map 4669 * to a result string with a different length as appropriate. 4670 * Full case mappings are applied by the case mapping functions 4671 * that take String parameters rather than code points (int). 4672 * See also the User Guide chapter on C/POSIX migration: 4673 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 4674 * 4675 * @param ch code point whose title case is to be retrieved 4676 * @return titlecase code point 4677 * @stable ICU 2.1 4678 */ toTitleCase(int ch)4679 public static int toTitleCase(int ch) { 4680 return UCaseProps.INSTANCE.totitle(ch); 4681 } 4682 4683 /** 4684 * Converts the character argument to uppercase. 4685 * If no uppercase is available, the character itself is returned. 4686 * Up-to-date Unicode implementation of java.lang.Character.toUpperCase() 4687 * 4688 * <p>This function only returns the simple, single-code point case mapping. 4689 * Full case mappings should be used whenever possible because they produce 4690 * better results by working on whole strings. 4691 * They take into account the string context and the language and can map 4692 * to a result string with a different length as appropriate. 4693 * Full case mappings are applied by the case mapping functions 4694 * that take String parameters rather than code points (int). 4695 * See also the User Guide chapter on C/POSIX migration: 4696 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 4697 * 4698 * @param ch code point whose uppercase is to be retrieved 4699 * @return uppercase code point 4700 * @stable ICU 2.1 4701 */ toUpperCase(int ch)4702 public static int toUpperCase(int ch) { 4703 return UCaseProps.INSTANCE.toupper(ch); 4704 } 4705 4706 // extra methods not in java.lang.Character -------------------------- 4707 4708 /** 4709 * {@icu} Determines if the code point is a supplementary character. 4710 * A code point is a supplementary character if and only if it is greater 4711 * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a> 4712 * @param ch code point to be determined if it is in the supplementary 4713 * plane 4714 * @return true if code point is a supplementary character 4715 * @stable ICU 2.1 4716 */ isSupplementary(int ch)4717 public static boolean isSupplementary(int ch) 4718 { 4719 return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE && 4720 ch <= UCharacter.MAX_VALUE; 4721 } 4722 4723 /** 4724 * {@icu} Determines if the code point is in the BMP plane. 4725 * @param ch code point to be determined if it is not a supplementary 4726 * character 4727 * @return true if code point is not a supplementary character 4728 * @stable ICU 2.1 4729 */ isBMP(int ch)4730 public static boolean isBMP(int ch) 4731 { 4732 return (ch >= 0 && ch <= LAST_CHAR_MASK_); 4733 } 4734 4735 /** 4736 * {@icu} Determines whether the specified code point is a printable character 4737 * according to the Unicode standard. 4738 * @param ch code point to be determined if it is printable 4739 * @return true if the code point is a printable character 4740 * @stable ICU 2.1 4741 */ isPrintable(int ch)4742 public static boolean isPrintable(int ch) 4743 { 4744 int cat = getType(ch); 4745 // if props == 0, it will just fall through and return false 4746 return (cat != UCharacterCategory.UNASSIGNED && 4747 cat != UCharacterCategory.CONTROL && 4748 cat != UCharacterCategory.FORMAT && 4749 cat != UCharacterCategory.PRIVATE_USE && 4750 cat != UCharacterCategory.SURROGATE && 4751 cat != UCharacterCategory.GENERAL_OTHER_TYPES); 4752 } 4753 4754 /** 4755 * {@icu} Determines whether the specified code point is of base form. 4756 * A code point of base form does not graphically combine with preceding 4757 * characters, and is neither a control nor a format character. 4758 * @param ch code point to be determined if it is of base form 4759 * @return true if the code point is of base form 4760 * @stable ICU 2.1 4761 */ isBaseForm(int ch)4762 public static boolean isBaseForm(int ch) 4763 { 4764 int cat = getType(ch); 4765 // if props == 0, it will just fall through and return false 4766 return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER || 4767 cat == UCharacterCategory.OTHER_NUMBER || 4768 cat == UCharacterCategory.LETTER_NUMBER || 4769 cat == UCharacterCategory.UPPERCASE_LETTER || 4770 cat == UCharacterCategory.LOWERCASE_LETTER || 4771 cat == UCharacterCategory.TITLECASE_LETTER || 4772 cat == UCharacterCategory.MODIFIER_LETTER || 4773 cat == UCharacterCategory.OTHER_LETTER || 4774 cat == UCharacterCategory.NON_SPACING_MARK || 4775 cat == UCharacterCategory.ENCLOSING_MARK || 4776 cat == UCharacterCategory.COMBINING_SPACING_MARK; 4777 } 4778 4779 /** 4780 * {@icu} Returns the Bidirection property of a code point. 4781 * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional 4782 * property.<br> 4783 * Result returned belongs to the interface 4784 * <a href=UCharacterDirection.html>UCharacterDirection</a> 4785 * @param ch the code point to be determined its direction 4786 * @return direction constant from UCharacterDirection. 4787 * @stable ICU 2.1 4788 */ getDirection(int ch)4789 public static int getDirection(int ch) 4790 { 4791 return UBiDiProps.INSTANCE.getClass(ch); 4792 } 4793 4794 /** 4795 * Determines whether the code point has the "mirrored" property. 4796 * This property is set for characters that are commonly used in 4797 * Right-To-Left contexts and need to be displayed with a "mirrored" 4798 * glyph. 4799 * @param ch code point whose mirror is to be determined 4800 * @return true if the code point has the "mirrored" property 4801 * @stable ICU 2.1 4802 */ isMirrored(int ch)4803 public static boolean isMirrored(int ch) 4804 { 4805 return UBiDiProps.INSTANCE.isMirrored(ch); 4806 } 4807 4808 /** 4809 * {@icu} Maps the specified code point to a "mirror-image" code point. 4810 * For code points with the "mirrored" property, implementations sometimes 4811 * need a "poor man's" mapping to another code point such that the default 4812 * glyph may serve as the mirror-image of the default glyph of the 4813 * specified code point.<br> 4814 * This is useful for text conversion to and from codepages with visual 4815 * order, and for displays without glyph selection capabilities. 4816 * @param ch code point whose mirror is to be retrieved 4817 * @return another code point that may serve as a mirror-image substitute, 4818 * or ch itself if there is no such mapping or ch does not have the 4819 * "mirrored" property 4820 * @stable ICU 2.1 4821 */ getMirror(int ch)4822 public static int getMirror(int ch) 4823 { 4824 return UBiDiProps.INSTANCE.getMirror(ch); 4825 } 4826 4827 /** 4828 * {@icu} Maps the specified character to its paired bracket character. 4829 * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int). 4830 * Otherwise c itself is returned. 4831 * See http://www.unicode.org/reports/tr9/ 4832 * 4833 * @param c the code point to be mapped 4834 * @return the paired bracket code point, 4835 * or c itself if there is no such mapping 4836 * (Bidi_Paired_Bracket_Type=None) 4837 * 4838 * @see UProperty#BIDI_PAIRED_BRACKET 4839 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 4840 * @see #getMirror(int) 4841 * @stable ICU 52 4842 */ getBidiPairedBracket(int c)4843 public static int getBidiPairedBracket(int c) { 4844 return UBiDiProps.INSTANCE.getPairedBracket(c); 4845 } 4846 4847 /** 4848 * {@icu} Returns the combining class of the argument codepoint 4849 * @param ch code point whose combining is to be retrieved 4850 * @return the combining class of the codepoint 4851 * @stable ICU 2.1 4852 */ getCombiningClass(int ch)4853 public static int getCombiningClass(int ch) 4854 { 4855 return Normalizer2.getNFDInstance().getCombiningClass(ch); 4856 } 4857 4858 /** 4859 * {@icu} A code point is illegal if and only if 4860 * <ul> 4861 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 4862 * <li> A surrogate value, 0xD800 to 0xDFFF 4863 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 4864 * </ul> 4865 * Note: legal does not mean that it is assigned in this version of Unicode. 4866 * @param ch code point to determine if it is a legal code point by itself 4867 * @return true if and only if legal. 4868 * @stable ICU 2.1 4869 */ isLegal(int ch)4870 public static boolean isLegal(int ch) 4871 { 4872 if (ch < MIN_VALUE) { 4873 return false; 4874 } 4875 if (ch < Character.MIN_SURROGATE) { 4876 return true; 4877 } 4878 if (ch <= Character.MAX_SURROGATE) { 4879 return false; 4880 } 4881 if (UCharacterUtility.isNonCharacter(ch)) { 4882 return false; 4883 } 4884 return (ch <= MAX_VALUE); 4885 } 4886 4887 /** 4888 * {@icu} A string is legal iff all its code points are legal. 4889 * A code point is illegal if and only if 4890 * <ul> 4891 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 4892 * <li> A surrogate value, 0xD800 to 0xDFFF 4893 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 4894 * </ul> 4895 * Note: legal does not mean that it is assigned in this version of Unicode. 4896 * @param str containing code points to examin 4897 * @return true if and only if legal. 4898 * @stable ICU 2.1 4899 */ isLegal(String str)4900 public static boolean isLegal(String str) 4901 { 4902 int size = str.length(); 4903 int codepoint; 4904 for (int i = 0; i < size; i += Character.charCount(codepoint)) 4905 { 4906 codepoint = str.codePointAt(i); 4907 if (!isLegal(codepoint)) { 4908 return false; 4909 } 4910 } 4911 return true; 4912 } 4913 4914 /** 4915 * {@icu} Returns the version of Unicode data used. 4916 * @return the unicode version number used 4917 * @stable ICU 2.1 4918 */ getUnicodeVersion()4919 public static VersionInfo getUnicodeVersion() 4920 { 4921 return UCharacterProperty.INSTANCE.m_unicodeVersion_; 4922 } 4923 4924 /** 4925 * {@icu} Returns the most current Unicode name of the argument code point, or 4926 * null if the character is unassigned or outside the range 4927 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 4928 * <br> 4929 * Note calling any methods related to code point names, e.g. get*Name*() 4930 * incurs a one-time initialization cost to construct the name tables. 4931 * @param ch the code point for which to get the name 4932 * @return most current Unicode name 4933 * @stable ICU 2.1 4934 */ getName(int ch)4935 public static String getName(int ch) 4936 { 4937 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); 4938 } 4939 4940 /** 4941 * {@icu} Returns the names for each of the characters in a string 4942 * @param s string to format 4943 * @param separator string to go between names 4944 * @return string of names 4945 * @stable ICU 3.8 4946 */ getName(String s, String separator)4947 public static String getName(String s, String separator) { 4948 if (s.length() == 1) { // handle common case 4949 return getName(s.charAt(0)); 4950 } 4951 int cp; 4952 StringBuilder sb = new StringBuilder(); 4953 for (int i = 0; i < s.length(); i += Character.charCount(cp)) { 4954 cp = s.codePointAt(i); 4955 if (i != 0) sb.append(separator); 4956 sb.append(UCharacter.getName(cp)); 4957 } 4958 return sb.toString(); 4959 } 4960 4961 /** 4962 * {@icu} Returns null. 4963 * Used to return the Unicode_1_Name property value which was of little practical value. 4964 * @param ch the code point for which to get the name 4965 * @return null 4966 * @deprecated ICU 49 4967 */ 4968 @Deprecated getName1_0(int ch)4969 public static String getName1_0(int ch) 4970 { 4971 return null; 4972 } 4973 4974 /** 4975 * {@icu} Returns a name for a valid codepoint. Unlike, getName(int) and 4976 * getName1_0(int), this method will return a name even for codepoints that 4977 * are not assigned a name in UnicodeData.txt. 4978 * 4979 * <p>The names are returned in the following order. 4980 * <ul> 4981 * <li> Most current Unicode name if there is any 4982 * <li> Unicode 1.0 name if there is any 4983 * <li> Extended name in the form of 4984 * "<codepoint_type-codepoint_hex_digits>". E.g., <noncharacter-fffe> 4985 * </ul> 4986 * Note calling any methods related to code point names, e.g. get*Name*() 4987 * incurs a one-time initialization cost to construct the name tables. 4988 * @param ch the code point for which to get the name 4989 * @return a name for the argument codepoint 4990 * @stable ICU 2.6 4991 */ getExtendedName(int ch)4992 public static String getExtendedName(int ch) { 4993 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME); 4994 } 4995 4996 /** 4997 * {@icu} Returns the corrected name from NameAliases.txt if there is one. 4998 * Returns null if the character is unassigned or outside the range 4999 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 5000 * <br> 5001 * Note calling any methods related to code point names, e.g. get*Name*() 5002 * incurs a one-time initialization cost to construct the name tables. 5003 * @param ch the code point for which to get the name alias 5004 * @return Unicode name alias, or null 5005 * @stable ICU 4.4 5006 */ getNameAlias(int ch)5007 public static String getNameAlias(int ch) 5008 { 5009 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS); 5010 } 5011 5012 /** 5013 * {@icu} Returns null. 5014 * Used to return the ISO 10646 comment for a character. 5015 * The Unicode ISO_Comment property is deprecated and has no values. 5016 * 5017 * @param ch The code point for which to get the ISO comment. 5018 * It must be the case that {@code 0 <= ch <= 0x10ffff}. 5019 * @return null 5020 * @deprecated ICU 49 5021 */ 5022 @Deprecated getISOComment(int ch)5023 public static String getISOComment(int ch) 5024 { 5025 return null; 5026 } 5027 5028 /** 5029 * {@icu} <p>Finds a Unicode code point by its most current Unicode name and 5030 * return its code point value. All Unicode names are in uppercase. 5031 * Note calling any methods related to code point names, e.g. get*Name*() 5032 * incurs a one-time initialization cost to construct the name tables. 5033 * @param name most current Unicode character name whose code point is to 5034 * be returned 5035 * @return code point or -1 if name is not found 5036 * @stable ICU 2.1 5037 */ getCharFromName(String name)5038 public static int getCharFromName(String name){ 5039 return UCharacterName.INSTANCE.getCharFromName( 5040 UCharacterNameChoice.UNICODE_CHAR_NAME, name); 5041 } 5042 5043 /** 5044 * {@icu} Returns -1. 5045 * <p>Used to find a Unicode character by its version 1.0 Unicode name and return 5046 * its code point value. 5047 * @param name Unicode 1.0 code point name whose code point is to be 5048 * returned 5049 * @return -1 5050 * @deprecated ICU 49 5051 * @see #getName1_0(int) 5052 */ 5053 @Deprecated getCharFromName1_0(String name)5054 public static int getCharFromName1_0(String name){ 5055 return -1; 5056 } 5057 5058 /** 5059 * {@icu} <p>Find a Unicode character by either its name and return its code 5060 * point value. All Unicode names are in uppercase. 5061 * Extended names are all lowercase except for numbers and are contained 5062 * within angle brackets. 5063 * The names are searched in the following order 5064 * <ul> 5065 * <li> Most current Unicode name if there is any 5066 * <li> Unicode 1.0 name if there is any 5067 * <li> Extended name in the form of 5068 * "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-FFFE> 5069 * </ul> 5070 * Note calling any methods related to code point names, e.g. get*Name*() 5071 * incurs a one-time initialization cost to construct the name tables. 5072 * @param name codepoint name 5073 * @return code point associated with the name or -1 if the name is not 5074 * found. 5075 * @stable ICU 2.6 5076 */ getCharFromExtendedName(String name)5077 public static int getCharFromExtendedName(String name){ 5078 return UCharacterName.INSTANCE.getCharFromName( 5079 UCharacterNameChoice.EXTENDED_CHAR_NAME, name); 5080 } 5081 5082 /** 5083 * {@icu} <p>Find a Unicode character by its corrected name alias and return 5084 * its code point value. All Unicode names are in uppercase. 5085 * Note calling any methods related to code point names, e.g. get*Name*() 5086 * incurs a one-time initialization cost to construct the name tables. 5087 * @param name Unicode name alias whose code point is to be returned 5088 * @return code point or -1 if name is not found 5089 * @stable ICU 4.4 5090 */ getCharFromNameAlias(String name)5091 public static int getCharFromNameAlias(String name){ 5092 return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name); 5093 } 5094 5095 /** 5096 * {@icu} Return the Unicode name for a given property, as given in the 5097 * Unicode database file PropertyAliases.txt. Most properties 5098 * have more than one name. The nameChoice determines which one 5099 * is returned. 5100 * 5101 * In addition, this function maps the property 5102 * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" / 5103 * "General_Category_Mask". These names are not in 5104 * PropertyAliases.txt. 5105 * 5106 * @param property UProperty selector. 5107 * 5108 * @param nameChoice UProperty.NameChoice selector for which name 5109 * to get. All properties have a long name. Most have a short 5110 * name, but some do not. Unicode allows for additional names; if 5111 * present these will be returned by UProperty.NameChoice.LONG + i, 5112 * where i=1, 2,... 5113 * 5114 * @return a name, or null if Unicode explicitly defines no name 5115 * ("n/a") for a given property/nameChoice. If a given nameChoice 5116 * throws an exception, then all larger values of nameChoice will 5117 * throw an exception. If null is returned for a given 5118 * nameChoice, then other nameChoice values may return non-null 5119 * results. 5120 * 5121 * @exception IllegalArgumentException thrown if property or 5122 * nameChoice are invalid. 5123 * 5124 * @see UProperty 5125 * @see UProperty.NameChoice 5126 * @stable ICU 2.4 5127 */ getPropertyName(int property, int nameChoice)5128 public static String getPropertyName(int property, 5129 int nameChoice) { 5130 return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice); 5131 } 5132 5133 /** 5134 * {@icu} Return the UProperty selector for a given property name, as 5135 * specified in the Unicode database file PropertyAliases.txt. 5136 * Short, long, and any other variants are recognized. 5137 * 5138 * In addition, this function maps the synthetic names "gcm" / 5139 * "General_Category_Mask" to the property 5140 * UProperty.GENERAL_CATEGORY_MASK. These names are not in 5141 * PropertyAliases.txt. 5142 * 5143 * @param propertyAlias the property name to be matched. The name 5144 * is compared using "loose matching" as described in 5145 * PropertyAliases.txt. 5146 * 5147 * @return a UProperty enum. 5148 * 5149 * @exception IllegalArgumentException thrown if propertyAlias 5150 * is not recognized. 5151 * 5152 * @see UProperty 5153 * @stable ICU 2.4 5154 */ getPropertyEnum(CharSequence propertyAlias)5155 public static int getPropertyEnum(CharSequence propertyAlias) { 5156 int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias); 5157 if (propEnum == UProperty.UNDEFINED) { 5158 throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias); 5159 } 5160 return propEnum; 5161 } 5162 5163 /** 5164 * {@icu} Return the Unicode name for a given property value, as given in 5165 * the Unicode database file PropertyValueAliases.txt. Most 5166 * values have more than one name. The nameChoice determines 5167 * which one is returned. 5168 * 5169 * Note: Some of the names in PropertyValueAliases.txt can only be 5170 * retrieved using UProperty.GENERAL_CATEGORY_MASK, not 5171 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 5172 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 5173 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 5174 * 5175 * @param property UProperty selector constant. 5176 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 5177 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 5178 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 5179 * If out of range, null is returned. 5180 * 5181 * @param value selector for a value for the given property. In 5182 * general, valid values range from 0 up to some maximum. There 5183 * are a few exceptions: (1.) UProperty.BLOCK values begin at the 5184 * non-zero value BASIC_LATIN.getID(). (2.) 5185 * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous 5186 * and range from 0..240. (3.) UProperty.GENERAL_CATEGORY_MASK values 5187 * are mask values produced by left-shifting 1 by 5188 * UCharacter.getType(). This allows grouped categories such as 5189 * [:L:] to be represented. Mask values are non-contiguous. 5190 * 5191 * @param nameChoice UProperty.NameChoice selector for which name 5192 * to get. All values have a long name. Most have a short name, 5193 * but some do not. Unicode allows for additional names; if 5194 * present these will be returned by UProperty.NameChoice.LONG + i, 5195 * where i=1, 2,... 5196 * 5197 * @return a name, or null if Unicode explicitly defines no name 5198 * ("n/a") for a given property/value/nameChoice. If a given 5199 * nameChoice throws an exception, then all larger values of 5200 * nameChoice will throw an exception. If null is returned for a 5201 * given nameChoice, then other nameChoice values may return 5202 * non-null results. 5203 * 5204 * @exception IllegalArgumentException thrown if property, value, 5205 * or nameChoice are invalid. 5206 * 5207 * @see UProperty 5208 * @see UProperty.NameChoice 5209 * @stable ICU 2.4 5210 */ getPropertyValueName(int property, int value, int nameChoice)5211 public static String getPropertyValueName(int property, 5212 int value, 5213 int nameChoice) 5214 { 5215 if ((property == UProperty.CANONICAL_COMBINING_CLASS 5216 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS 5217 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) 5218 && value >= UCharacter.getIntPropertyMinValue( 5219 UProperty.CANONICAL_COMBINING_CLASS) 5220 && value <= UCharacter.getIntPropertyMaxValue( 5221 UProperty.CANONICAL_COMBINING_CLASS) 5222 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) { 5223 // this is hard coded for the valid cc 5224 // because PropertyValueAliases.txt does not contain all of them 5225 try { 5226 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, 5227 nameChoice); 5228 } 5229 catch (IllegalArgumentException e) { 5230 return null; 5231 } 5232 } 5233 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice); 5234 } 5235 5236 /** 5237 * {@icu} Return the property value integer for a given value name, as 5238 * specified in the Unicode database file PropertyValueAliases.txt. 5239 * Short, long, and any other variants are recognized. 5240 * 5241 * Note: Some of the names in PropertyValueAliases.txt will only be 5242 * recognized with UProperty.GENERAL_CATEGORY_MASK, not 5243 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 5244 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 5245 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 5246 * 5247 * @param property UProperty selector constant. 5248 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 5249 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 5250 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 5251 * Only these properties can be enumerated. 5252 * 5253 * @param valueAlias the value name to be matched. The name is 5254 * compared using "loose matching" as described in 5255 * PropertyValueAliases.txt. 5256 * 5257 * @return a value integer. Note: UProperty.GENERAL_CATEGORY 5258 * values are mask values produced by left-shifting 1 by 5259 * UCharacter.getType(). This allows grouped categories such as 5260 * [:L:] to be represented. 5261 * 5262 * @see UProperty 5263 * @throws IllegalArgumentException if property is not a valid UProperty 5264 * selector or valueAlias is not a value of this property 5265 * @stable ICU 2.4 5266 */ getPropertyValueEnum(int property, CharSequence valueAlias)5267 public static int getPropertyValueEnum(int property, CharSequence valueAlias) { 5268 int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias); 5269 if (propEnum == UProperty.UNDEFINED) { 5270 throw new IllegalIcuArgumentException("Invalid name: " + valueAlias); 5271 } 5272 return propEnum; 5273 } 5274 5275 /** 5276 * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED. 5277 * @param property Same as {@link #getPropertyValueEnum(int, CharSequence)} 5278 * @param valueAlias Same as {@link #getPropertyValueEnum(int, CharSequence)} 5279 * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value. 5280 * @internal 5281 * @deprecated This API is ICU internal only. 5282 */ 5283 @Deprecated getPropertyValueEnumNoThrow(int property, CharSequence valueAlias)5284 public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) { 5285 return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias); 5286 } 5287 5288 5289 /** 5290 * {@icu} Returns a code point corresponding to the two surrogate code units. 5291 * 5292 * @param lead the lead unit 5293 * (In ICU 2.1-69 the type of both parameters was <code>char</code>.) 5294 * @param trail the trail unit 5295 * @return code point if lead and trail form a valid surrogate pair. 5296 * @exception IllegalArgumentException thrown when the code units do 5297 * not form a valid surrogate pair 5298 * @stable ICU 70 5299 * @see #toCodePoint(int, int) 5300 */ getCodePoint(int lead, int trail)5301 public static int getCodePoint(int lead, int trail) 5302 { 5303 if (isHighSurrogate(lead) && isLowSurrogate(trail)) { 5304 return toCodePoint(lead, trail); 5305 } 5306 throw new IllegalArgumentException("Not a valid surrogate pair"); 5307 } 5308 5309 // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655 5310 /** 5311 * {@icu} Returns a code point corresponding to the two surrogate code units. 5312 * 5313 * @param lead the lead char 5314 * @param trail the trail char 5315 * @return code point if surrogate characters are valid. 5316 * @exception IllegalArgumentException thrown when the code units do 5317 * not form a valid code point 5318 * @stable ICU 2.1 5319 */ getCodePoint(char lead, char trail)5320 public static int getCodePoint(char lead, char trail) 5321 { 5322 return getCodePoint((int) lead, (int) trail); 5323 } 5324 // END Android patch: Keep the `char` version on Android. See ICU-21655 5325 5326 /** 5327 * {@icu} Returns the code point corresponding to the BMP code point. 5328 * 5329 * @param char16 the BMP code point 5330 * @return code point if argument is a valid character. 5331 * @exception IllegalArgumentException thrown when char16 is not a valid 5332 * code point 5333 * @stable ICU 2.1 5334 */ getCodePoint(char char16)5335 public static int getCodePoint(char char16) 5336 { 5337 if (UCharacter.isLegal(char16)) { 5338 return char16; 5339 } 5340 throw new IllegalArgumentException("Illegal codepoint"); 5341 } 5342 5343 /** 5344 * Returns the uppercase version of the argument string. 5345 * Casing is dependent on the default locale and context-sensitive. 5346 * @param str source string to be performed on 5347 * @return uppercase version of the argument string 5348 * @stable ICU 2.1 5349 */ toUpperCase(String str)5350 public static String toUpperCase(String str) 5351 { 5352 return CaseMapImpl.toUpper(getDefaultCaseLocale(), 0, str); 5353 } 5354 5355 /** 5356 * Returns the lowercase version of the argument string. 5357 * Casing is dependent on the default locale and context-sensitive 5358 * @param str source string to be performed on 5359 * @return lowercase version of the argument string 5360 * @stable ICU 2.1 5361 */ toLowerCase(String str)5362 public static String toLowerCase(String str) 5363 { 5364 return CaseMapImpl.toLower(getDefaultCaseLocale(), 0, str); 5365 } 5366 5367 /** 5368 * <p>Returns the titlecase version of the argument string. 5369 * <p>Position for titlecasing is determined by the argument break 5370 * iterator, hence the user can customize his break iterator for 5371 * a specialized titlecasing. In this case only the forward iteration 5372 * needs to be implemented. 5373 * If the break iterator passed in is null, the default Unicode algorithm 5374 * will be used to determine the titlecase positions. 5375 * 5376 * <p>Only positions returned by the break iterator will be title cased, 5377 * character in between the positions will all be in lower case. 5378 * <p>Casing is dependent on the default locale and context-sensitive 5379 * @param str source string to be performed on 5380 * @param breakiter break iterator to determine the positions in which 5381 * the character should be title cased. 5382 * @return titlecase version of the argument string 5383 * @stable ICU 2.6 5384 */ toTitleCase(String str, BreakIterator breakiter)5385 public static String toTitleCase(String str, BreakIterator breakiter) 5386 { 5387 return toTitleCase(Locale.getDefault(), str, breakiter, 0); 5388 } 5389 getDefaultCaseLocale()5390 private static int getDefaultCaseLocale() { 5391 return UCaseProps.getCaseLocale(Locale.getDefault()); 5392 } 5393 getCaseLocale(Locale locale)5394 private static int getCaseLocale(Locale locale) { 5395 if (locale == null) { 5396 locale = Locale.getDefault(); 5397 } 5398 return UCaseProps.getCaseLocale(locale); 5399 } 5400 getCaseLocale(ULocale locale)5401 private static int getCaseLocale(ULocale locale) { 5402 if (locale == null) { 5403 locale = ULocale.getDefault(); 5404 } 5405 return UCaseProps.getCaseLocale(locale); 5406 } 5407 5408 /** 5409 * Returns the uppercase version of the argument string. 5410 * Casing is dependent on the argument locale and context-sensitive. 5411 * @param locale which string is to be converted in 5412 * @param str source string to be performed on 5413 * @return uppercase version of the argument string 5414 * @stable ICU 2.1 5415 */ toUpperCase(Locale locale, String str)5416 public static String toUpperCase(Locale locale, String str) 5417 { 5418 return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str); 5419 } 5420 5421 /** 5422 * Returns the uppercase version of the argument string. 5423 * Casing is dependent on the argument locale and context-sensitive. 5424 * @param locale which string is to be converted in 5425 * @param str source string to be performed on 5426 * @return uppercase version of the argument string 5427 * @stable ICU 3.2 5428 */ toUpperCase(ULocale locale, String str)5429 public static String toUpperCase(ULocale locale, String str) { 5430 return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str); 5431 } 5432 5433 /** 5434 * Returns the lowercase version of the argument string. 5435 * Casing is dependent on the argument locale and context-sensitive 5436 * @param locale which string is to be converted in 5437 * @param str source string to be performed on 5438 * @return lowercase version of the argument string 5439 * @stable ICU 2.1 5440 */ toLowerCase(Locale locale, String str)5441 public static String toLowerCase(Locale locale, String str) 5442 { 5443 return CaseMapImpl.toLower(getCaseLocale(locale), 0, str); 5444 } 5445 5446 /** 5447 * Returns the lowercase version of the argument string. 5448 * Casing is dependent on the argument locale and context-sensitive 5449 * @param locale which string is to be converted in 5450 * @param str source string to be performed on 5451 * @return lowercase version of the argument string 5452 * @stable ICU 3.2 5453 */ toLowerCase(ULocale locale, String str)5454 public static String toLowerCase(ULocale locale, String str) { 5455 return CaseMapImpl.toLower(getCaseLocale(locale), 0, str); 5456 } 5457 5458 /** 5459 * <p>Returns the titlecase version of the argument string. 5460 * <p>Position for titlecasing is determined by the argument break 5461 * iterator, hence the user can customize his break iterator for 5462 * a specialized titlecasing. In this case only the forward iteration 5463 * needs to be implemented. 5464 * If the break iterator passed in is null, the default Unicode algorithm 5465 * will be used to determine the titlecase positions. 5466 * 5467 * <p>Only positions returned by the break iterator will be title cased, 5468 * character in between the positions will all be in lower case. 5469 * <p>Casing is dependent on the argument locale and context-sensitive 5470 * @param locale which string is to be converted in 5471 * @param str source string to be performed on 5472 * @param breakiter break iterator to determine the positions in which 5473 * the character should be title cased. 5474 * @return titlecase version of the argument string 5475 * @stable ICU 2.6 5476 */ toTitleCase(Locale locale, String str, BreakIterator breakiter)5477 public static String toTitleCase(Locale locale, String str, 5478 BreakIterator breakiter) 5479 { 5480 return toTitleCase(locale, str, breakiter, 0); 5481 } 5482 5483 /** 5484 * <p>Returns the titlecase version of the argument string. 5485 * <p>Position for titlecasing is determined by the argument break 5486 * iterator, hence the user can customize his break iterator for 5487 * a specialized titlecasing. In this case only the forward iteration 5488 * needs to be implemented. 5489 * If the break iterator passed in is null, the default Unicode algorithm 5490 * will be used to determine the titlecase positions. 5491 * 5492 * <p>Only positions returned by the break iterator will be title cased, 5493 * character in between the positions will all be in lower case. 5494 * <p>Casing is dependent on the argument locale and context-sensitive 5495 * @param locale which string is to be converted in 5496 * @param str source string to be performed on 5497 * @param titleIter break iterator to determine the positions in which 5498 * the character should be title cased. 5499 * @return titlecase version of the argument string 5500 * @stable ICU 3.2 5501 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter)5502 public static String toTitleCase(ULocale locale, String str, 5503 BreakIterator titleIter) { 5504 return toTitleCase(locale, str, titleIter, 0); 5505 } 5506 5507 /** 5508 * <p>Returns the titlecase version of the argument string. 5509 * <p>Position for titlecasing is determined by the argument break 5510 * iterator, hence the user can customize his break iterator for 5511 * a specialized titlecasing. In this case only the forward iteration 5512 * needs to be implemented. 5513 * If the break iterator passed in is null, the default Unicode algorithm 5514 * will be used to determine the titlecase positions. 5515 * 5516 * <p>Only positions returned by the break iterator will be title cased, 5517 * character in between the positions will all be in lower case. 5518 * <p>Casing is dependent on the argument locale and context-sensitive 5519 * @param locale which string is to be converted in 5520 * @param str source string to be performed on 5521 * @param titleIter break iterator to determine the positions in which 5522 * the character should be title cased. 5523 * @param options bit set to modify the titlecasing operation 5524 * @return titlecase version of the argument string 5525 * @stable ICU 3.8 5526 * @see #TITLECASE_NO_LOWERCASE 5527 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 5528 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)5529 public static String toTitleCase(ULocale locale, String str, 5530 BreakIterator titleIter, int options) { 5531 if (titleIter == null && locale == null) { 5532 locale = ULocale.getDefault(); 5533 } 5534 titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter); 5535 titleIter.setText(str); 5536 return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str); 5537 } 5538 5539 /** 5540 * {@icu} <p>Returns the titlecase version of the argument string. 5541 * <p>Position for titlecasing is determined by the argument break 5542 * iterator, hence the user can customize his break iterator for 5543 * a specialized titlecasing. In this case only the forward iteration 5544 * needs to be implemented. 5545 * If the break iterator passed in is null, the default Unicode algorithm 5546 * will be used to determine the titlecase positions. 5547 * 5548 * <p>Only positions returned by the break iterator will be title cased, 5549 * character in between the positions will all be in lower case. 5550 * <p>Casing is dependent on the argument locale and context-sensitive 5551 * @param locale which string is to be converted in 5552 * @param str source string to be performed on 5553 * @param titleIter break iterator to determine the positions in which 5554 * the character should be title cased. 5555 * @param options bit set to modify the titlecasing operation 5556 * @return titlecase version of the argument string 5557 * @see #TITLECASE_NO_LOWERCASE 5558 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 5559 * @stable ICU 54 5560 */ toTitleCase(Locale locale, String str, BreakIterator titleIter, int options)5561 public static String toTitleCase(Locale locale, String str, 5562 BreakIterator titleIter, 5563 int options) { 5564 if (titleIter == null && locale == null) { 5565 locale = Locale.getDefault(); 5566 } 5567 titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter); 5568 titleIter.setText(str); 5569 return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str); 5570 } 5571 5572 /** 5573 * {@icu} The given character is mapped to its case folding equivalent according 5574 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 5575 * folding equivalent, the character itself is returned. 5576 * 5577 * <p>This function only returns the simple, single-code point case mapping. 5578 * Full case mappings should be used whenever possible because they produce 5579 * better results by working on whole strings. 5580 * They can map to a result string with a different length as appropriate. 5581 * Full case mappings are applied by the case mapping functions 5582 * that take String parameters rather than code points (int). 5583 * See also the User Guide chapter on C/POSIX migration: 5584 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 5585 * 5586 * @param ch the character to be converted 5587 * @param defaultmapping Indicates whether the default mappings defined in 5588 * CaseFolding.txt are to be used, otherwise the 5589 * mappings for dotted I and dotless i marked with 5590 * 'T' in CaseFolding.txt are included. 5591 * @return the case folding equivalent of the character, if 5592 * any; otherwise the character itself. 5593 * @see #foldCase(String, boolean) 5594 * @stable ICU 2.1 5595 */ foldCase(int ch, boolean defaultmapping)5596 public static int foldCase(int ch, boolean defaultmapping) { 5597 return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 5598 } 5599 5600 /** 5601 * {@icu} The given string is mapped to its case folding equivalent according to 5602 * UnicodeData.txt and CaseFolding.txt; if any character has no case 5603 * folding equivalent, the character itself is returned. 5604 * "Full", multiple-code point case folding mappings are returned here. 5605 * For "simple" single-code point mappings use the API 5606 * foldCase(int ch, boolean defaultmapping). 5607 * @param str the String to be converted 5608 * @param defaultmapping Indicates whether the default mappings defined in 5609 * CaseFolding.txt are to be used, otherwise the 5610 * mappings for dotted I and dotless i marked with 5611 * 'T' in CaseFolding.txt are included. 5612 * @return the case folding equivalent of the character, if 5613 * any; otherwise the character itself. 5614 * @see #foldCase(int, boolean) 5615 * @stable ICU 2.1 5616 */ foldCase(String str, boolean defaultmapping)5617 public static String foldCase(String str, boolean defaultmapping) { 5618 return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 5619 } 5620 5621 /** 5622 * {@icu} Option value for case folding: use default mappings defined in 5623 * CaseFolding.txt. 5624 * @stable ICU 2.6 5625 */ 5626 public static final int FOLD_CASE_DEFAULT = 0x0000; 5627 /** 5628 * {@icu} Option value for case folding: 5629 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I 5630 * and dotless i appropriately for Turkic languages (tr, az). 5631 * 5632 * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that 5633 * are to be included for default mappings and 5634 * excluded for the Turkic-specific mappings. 5635 * 5636 * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that 5637 * are to be excluded for default mappings and 5638 * included for the Turkic-specific mappings. 5639 * 5640 * @stable ICU 2.6 5641 */ 5642 public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001; 5643 5644 /** 5645 * {@icu} The given character is mapped to its case folding equivalent according 5646 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 5647 * folding equivalent, the character itself is returned. 5648 * 5649 * <p>This function only returns the simple, single-code point case mapping. 5650 * Full case mappings should be used whenever possible because they produce 5651 * better results by working on whole strings. 5652 * They can map to a result string with a different length as appropriate. 5653 * Full case mappings are applied by the case mapping functions 5654 * that take String parameters rather than code points (int). 5655 * See also the User Guide chapter on C/POSIX migration: 5656 * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings 5657 * 5658 * @param ch the character to be converted 5659 * @param options A bit set for special processing. Currently the recognised options 5660 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 5661 * @return the case folding equivalent of the character, if any; otherwise the 5662 * character itself. 5663 * @see #foldCase(String, boolean) 5664 * @stable ICU 2.6 5665 */ foldCase(int ch, int options)5666 public static int foldCase(int ch, int options) { 5667 return UCaseProps.INSTANCE.fold(ch, options); 5668 } 5669 5670 /** 5671 * {@icu} The given string is mapped to its case folding equivalent according to 5672 * UnicodeData.txt and CaseFolding.txt; if any character has no case 5673 * folding equivalent, the character itself is returned. 5674 * "Full", multiple-code point case folding mappings are returned here. 5675 * For "simple" single-code point mappings use the API 5676 * foldCase(int ch, boolean defaultmapping). 5677 * @param str the String to be converted 5678 * @param options A bit set for special processing. Currently the recognised options 5679 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 5680 * @return the case folding equivalent of the character, if any; otherwise the 5681 * character itself. 5682 * @see #foldCase(int, boolean) 5683 * @stable ICU 2.6 5684 */ foldCase(String str, int options)5685 public static final String foldCase(String str, int options) { 5686 return CaseMapImpl.fold(options, str); 5687 } 5688 5689 /** 5690 * {@icu} Returns the numeric value of a Han character. 5691 * 5692 * <p>This returns the value of Han 'numeric' code points, 5693 * including those for zero, ten, hundred, thousand, ten thousand, 5694 * and hundred million. 5695 * This includes both the standard and 'checkwriting' 5696 * characters, the 'big circle' zero character, and the standard 5697 * zero character. 5698 * 5699 * <p>Note: The Unicode Standard has numeric values for more 5700 * Han characters recognized by this method 5701 * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt), 5702 * and a {@link com.ibm.icu.text.NumberFormat} can be used with 5703 * a Chinese {@link com.ibm.icu.text.NumberingSystem}. 5704 * 5705 * @param ch code point to query 5706 * @return value if it is a Han 'numeric character,' otherwise return -1. 5707 * @stable ICU 2.4 5708 */ getHanNumericValue(int ch)5709 public static int getHanNumericValue(int ch) 5710 { 5711 switch(ch) 5712 { 5713 case IDEOGRAPHIC_NUMBER_ZERO_ : 5714 case CJK_IDEOGRAPH_COMPLEX_ZERO_ : 5715 return 0; // Han Zero 5716 case CJK_IDEOGRAPH_FIRST_ : 5717 case CJK_IDEOGRAPH_COMPLEX_ONE_ : 5718 return 1; // Han One 5719 case CJK_IDEOGRAPH_SECOND_ : 5720 case CJK_IDEOGRAPH_COMPLEX_TWO_ : 5721 return 2; // Han Two 5722 case CJK_IDEOGRAPH_THIRD_ : 5723 case CJK_IDEOGRAPH_COMPLEX_THREE_ : 5724 return 3; // Han Three 5725 case CJK_IDEOGRAPH_FOURTH_ : 5726 case CJK_IDEOGRAPH_COMPLEX_FOUR_ : 5727 return 4; // Han Four 5728 case CJK_IDEOGRAPH_FIFTH_ : 5729 case CJK_IDEOGRAPH_COMPLEX_FIVE_ : 5730 return 5; // Han Five 5731 case CJK_IDEOGRAPH_SIXTH_ : 5732 case CJK_IDEOGRAPH_COMPLEX_SIX_ : 5733 return 6; // Han Six 5734 case CJK_IDEOGRAPH_SEVENTH_ : 5735 case CJK_IDEOGRAPH_COMPLEX_SEVEN_ : 5736 return 7; // Han Seven 5737 case CJK_IDEOGRAPH_EIGHTH_ : 5738 case CJK_IDEOGRAPH_COMPLEX_EIGHT_ : 5739 return 8; // Han Eight 5740 case CJK_IDEOGRAPH_NINETH_ : 5741 case CJK_IDEOGRAPH_COMPLEX_NINE_ : 5742 return 9; // Han Nine 5743 case CJK_IDEOGRAPH_TEN_ : 5744 case CJK_IDEOGRAPH_COMPLEX_TEN_ : 5745 return 10; 5746 case CJK_IDEOGRAPH_HUNDRED_ : 5747 case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ : 5748 return 100; 5749 case CJK_IDEOGRAPH_THOUSAND_ : 5750 case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ : 5751 return 1000; 5752 case CJK_IDEOGRAPH_TEN_THOUSAND_ : 5753 return 10000; 5754 case CJK_IDEOGRAPH_HUNDRED_MILLION_ : 5755 return 100000000; 5756 } 5757 return -1; // no value 5758 } 5759 5760 /** 5761 * {@icu} <p>Returns an iterator for character types, iterating over codepoints. 5762 * <p>Example of use:<br> 5763 * <pre> 5764 * RangeValueIterator iterator = UCharacter.getTypeIterator(); 5765 * RangeValueIterator.Element element = new RangeValueIterator.Element(); 5766 * while (iterator.next(element)) { 5767 * System.out.println("Codepoint \\u" + 5768 * Integer.toHexString(element.start) + 5769 * " to codepoint \\u" + 5770 * Integer.toHexString(element.limit - 1) + 5771 * " has the character type " + 5772 * element.value); 5773 * } 5774 * </pre> 5775 * @return an iterator 5776 * @stable ICU 2.6 5777 */ getTypeIterator()5778 public static RangeValueIterator getTypeIterator() 5779 { 5780 return new UCharacterTypeIterator(); 5781 } 5782 5783 private static final class UCharacterTypeIterator implements RangeValueIterator { UCharacterTypeIterator()5784 UCharacterTypeIterator() { 5785 reset(); 5786 } 5787 5788 // implements RangeValueIterator 5789 @Override next(Element element)5790 public boolean next(Element element) { 5791 if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 5792 element.start=range.startCodePoint; 5793 element.limit=range.endCodePoint+1; 5794 element.value=range.value; 5795 return true; 5796 } else { 5797 return false; 5798 } 5799 } 5800 5801 // implements RangeValueIterator 5802 @Override reset()5803 public void reset() { 5804 trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE); 5805 } 5806 5807 private Iterator<Trie2.Range> trieIterator; 5808 private Trie2.Range range; 5809 5810 private static final class MaskType implements Trie2.ValueMapper { 5811 // Extracts the general category ("character type") from the trie value. 5812 @Override map(int value)5813 public int map(int value) { 5814 return value & UCharacterProperty.TYPE_MASK; 5815 } 5816 } 5817 private static final MaskType MASK_TYPE=new MaskType(); 5818 } 5819 5820 /** 5821 * {@icu} <p>Returns an iterator for character names, iterating over codepoints. 5822 * <p>This API only gets the iterator for the modern, most up-to-date 5823 * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or 5824 * for extended names use getExtendedNameIterator(). 5825 * <p>Example of use:<br> 5826 * <pre> 5827 * ValueIterator iterator = UCharacter.getNameIterator(); 5828 * ValueIterator.Element element = new ValueIterator.Element(); 5829 * while (iterator.next(element)) { 5830 * System.out.println("Codepoint \\u" + 5831 * Integer.toHexString(element.codepoint) + 5832 * " has the name " + (String)element.value); 5833 * } 5834 * </pre> 5835 * <p>The maximal range which the name iterator iterates is from 5836 * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE. 5837 * @return an iterator 5838 * @stable ICU 2.6 5839 */ getNameIterator()5840 public static ValueIterator getNameIterator(){ 5841 return new UCharacterNameIterator(UCharacterName.INSTANCE, 5842 UCharacterNameChoice.UNICODE_CHAR_NAME); 5843 } 5844 5845 /** 5846 * {@icu} Returns an empty iterator. 5847 * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints. 5848 * @return an empty iterator 5849 * @deprecated ICU 49 5850 * @see #getName1_0(int) 5851 */ 5852 @Deprecated getName1_0Iterator()5853 public static ValueIterator getName1_0Iterator(){ 5854 return new DummyValueIterator(); 5855 } 5856 5857 private static final class DummyValueIterator implements ValueIterator { 5858 @Override next(Element element)5859 public boolean next(Element element) { return false; } 5860 @Override reset()5861 public void reset() {} 5862 @Override setRange(int start, int limit)5863 public void setRange(int start, int limit) {} 5864 } 5865 5866 /** 5867 * {@icu} <p>Returns an iterator for character names, iterating over codepoints. 5868 * <p>This API only gets the iterator for the extended names. 5869 * For modern, most up-to-date Unicode names use getNameIterator() or 5870 * for older 1.0 Unicode names use get1_0NameIterator(). 5871 * <p>Example of use:<br> 5872 * <pre> 5873 * ValueIterator iterator = UCharacter.getExtendedNameIterator(); 5874 * ValueIterator.Element element = new ValueIterator.Element(); 5875 * while (iterator.next(element)) { 5876 * System.out.println("Codepoint \\u" + 5877 * Integer.toHexString(element.codepoint) + 5878 * " has the name " + (String)element.value); 5879 * } 5880 * </pre> 5881 * <p>The maximal range which the name iterator iterates is from 5882 * @return an iterator 5883 * @stable ICU 2.6 5884 */ getExtendedNameIterator()5885 public static ValueIterator getExtendedNameIterator(){ 5886 return new UCharacterNameIterator(UCharacterName.INSTANCE, 5887 UCharacterNameChoice.EXTENDED_CHAR_NAME); 5888 } 5889 5890 /** 5891 * {@icu} Returns the "age" of the code point. 5892 * <p>The "age" is the Unicode version when the code point was first 5893 * designated (as a non-character or for Private Use) or assigned a 5894 * character. 5895 * <p>This can be useful to avoid emitting code points to receiving 5896 * processes that do not accept newer characters. 5897 * <p>The data is from the UCD file DerivedAge.txt. 5898 * @param ch The code point. 5899 * @return the Unicode version number 5900 * @stable ICU 2.6 5901 */ getAge(int ch)5902 public static VersionInfo getAge(int ch) 5903 { 5904 if (ch < MIN_VALUE || ch > MAX_VALUE) { 5905 throw new IllegalArgumentException("Codepoint out of bounds"); 5906 } 5907 return UCharacterProperty.INSTANCE.getAge(ch); 5908 } 5909 5910 /** 5911 * {@icu} Check a binary Unicode property for a code point. 5912 * <p>Unicode, especially in version 3.2, defines many more properties 5913 * than the original set in UnicodeData.txt. 5914 * <p>This API is intended to reflect Unicode properties as defined in 5915 * the Unicode Character Database (UCD) and Unicode Technical Reports 5916 * (UTR). 5917 * <p>For details about the properties see 5918 * <a href=http://www.unicode.org/>http://www.unicode.org/</a>. 5919 * <p>For names of Unicode properties see the UCD file 5920 * PropertyAliases.txt. 5921 * <p>This API does not check the validity of the codepoint. 5922 * <p>Important: If ICU is built with UCD files from Unicode versions 5923 * below 3.2, then properties marked with "new" are not or 5924 * not fully available. 5925 * @param ch code point to test. 5926 * @param property selector constant from com.ibm.icu.lang.UProperty, 5927 * identifies which binary property to check. 5928 * @return true or false according to the binary Unicode property value 5929 * for ch. Also false if property is out of bounds or if the 5930 * Unicode version does not have data for the property at all, or 5931 * not for this code point. 5932 * @see com.ibm.icu.lang.UProperty 5933 * @see CharacterProperties#getBinaryPropertySet(int) 5934 * @stable ICU 2.6 5935 */ hasBinaryProperty(int ch, int property)5936 public static boolean hasBinaryProperty(int ch, int property) 5937 { 5938 return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property); 5939 } 5940 5941 /** 5942 * {@icu} Returns true if the property is true for the string. 5943 * Same as {@link #hasBinaryProperty(int, int)} 5944 * if the string contains exactly one code point. 5945 * 5946 * <p>Most properties apply only to single code points. 5947 * <a href="https://www.unicode.org/reports/tr51/#Emoji_Sets">UTS #51 Unicode Emoji</a> 5948 * defines several properties of strings. 5949 * 5950 * @param s String to test. 5951 * @param property UProperty selector constant, identifies which binary property to check. 5952 * Must be BINARY_START<=which<BINARY_LIMIT. 5953 * @return true or false according to the binary Unicode property value for the string. 5954 * Also false if <code>property</code> is out of bounds or if the Unicode version 5955 * does not have data for the property at all. 5956 * 5957 * @see com.ibm.icu.lang.UProperty 5958 * @see CharacterProperties#getBinaryPropertySet(int) 5959 * @draft ICU 70 5960 */ hasBinaryProperty(CharSequence s, int property)5961 public static boolean hasBinaryProperty(CharSequence s, int property) { 5962 int length = s.length(); 5963 if (length == 1) { 5964 return hasBinaryProperty(s.charAt(0), property); // single code point 5965 } else if (length == 2) { 5966 // first code point 5967 int c = Character.codePointAt(s, 0); 5968 if (Character.charCount(c) == length) { 5969 return hasBinaryProperty(c, property); // single code point 5970 } 5971 } 5972 // Only call into EmojiProps for a relevant property, 5973 // so that we not unnecessarily try to load its data file. 5974 return UProperty.BASIC_EMOJI <= property && property <= UProperty.RGI_EMOJI && 5975 EmojiProps.INSTANCE.hasBinaryProperty(s, property); 5976 } 5977 5978 /** 5979 * {@icu} <p>Check if a code point has the Alphabetic Unicode property. 5980 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC). 5981 * <p>Different from UCharacter.isLetter(ch)! 5982 * @stable ICU 2.6 5983 * @param ch codepoint to be tested 5984 */ isUAlphabetic(int ch)5985 public static boolean isUAlphabetic(int ch) 5986 { 5987 return hasBinaryProperty(ch, UProperty.ALPHABETIC); 5988 } 5989 5990 /** 5991 * {@icu} <p>Check if a code point has the Lowercase Unicode property. 5992 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE). 5993 * <p>This is different from UCharacter.isLowerCase(ch)! 5994 * @param ch codepoint to be tested 5995 * @stable ICU 2.6 5996 */ isULowercase(int ch)5997 public static boolean isULowercase(int ch) 5998 { 5999 return hasBinaryProperty(ch, UProperty.LOWERCASE); 6000 } 6001 6002 /** 6003 * {@icu} <p>Check if a code point has the Uppercase Unicode property. 6004 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE). 6005 * <p>This is different from UCharacter.isUpperCase(ch)! 6006 * @param ch codepoint to be tested 6007 * @stable ICU 2.6 6008 */ isUUppercase(int ch)6009 public static boolean isUUppercase(int ch) 6010 { 6011 return hasBinaryProperty(ch, UProperty.UPPERCASE); 6012 } 6013 6014 /** 6015 * {@icu} <p>Check if a code point has the White_Space Unicode property. 6016 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE). 6017 * <p>This is different from both UCharacter.isSpace(ch) and 6018 * UCharacter.isWhitespace(ch)! 6019 * @param ch codepoint to be tested 6020 * @stable ICU 2.6 6021 */ isUWhiteSpace(int ch)6022 public static boolean isUWhiteSpace(int ch) 6023 { 6024 return hasBinaryProperty(ch, UProperty.WHITE_SPACE); 6025 } 6026 6027 /** 6028 * {@icu} Returns the property value for a Unicode property type of a code point. 6029 * Also returns binary and mask property values. 6030 * <p>Unicode, especially in version 3.2, defines many more properties than 6031 * the original set in UnicodeData.txt. 6032 * <p>The properties APIs are intended to reflect Unicode properties as 6033 * defined in the Unicode Character Database (UCD) and Unicode Technical 6034 * Reports (UTR). For details about the properties see 6035 * http://www.unicode.org/. 6036 * <p>For names of Unicode properties see the UCD file PropertyAliases.txt. 6037 * 6038 * <pre> 6039 * Sample usage: 6040 * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH); 6041 * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC); 6042 * boolean b = (ideo == 1) ? true : false; 6043 * </pre> 6044 * @param ch code point to test. 6045 * @param type UProperty selector constant, identifies which binary 6046 * property to check. Must be 6047 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 6048 * UProperty.INT_START <= type < UProperty.INT_LIMIT or 6049 * UProperty.MASK_START <= type < UProperty.MASK_LIMIT. 6050 * @return numeric value that is directly the property value or, 6051 * for enumerated properties, corresponds to the numeric value of 6052 * the enumerated constant of the respective property value type 6053 * ({@link ECharacterCategory}, {@link ECharacterDirection}, 6054 * {@link DecompositionType}, etc.). 6055 * Returns 0 or 1 (for false / true) for binary Unicode properties. 6056 * Returns a bit-mask for mask properties. 6057 * Returns 0 if 'type' is out of bounds or if the Unicode version 6058 * does not have data for the property at all, or not for this code 6059 * point. 6060 * @see UProperty 6061 * @see #hasBinaryProperty 6062 * @see #getIntPropertyMinValue 6063 * @see #getIntPropertyMaxValue 6064 * @see CharacterProperties#getIntPropertyMap(int) 6065 * @see #getUnicodeVersion 6066 * @stable ICU 2.4 6067 */ getIntPropertyValue(int ch, int type)6068 public static int getIntPropertyValue(int ch, int type) 6069 { 6070 return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type); 6071 } 6072 /** 6073 * {@icu} Returns a string version of the property value. 6074 * @param propertyEnum The property enum value. 6075 * @param codepoint The codepoint value. 6076 * @param nameChoice The choice of the name. 6077 * @return value as string 6078 * @internal 6079 * @deprecated This API is ICU internal only. 6080 */ 6081 @Deprecated 6082 ///CLOVER:OFF getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice)6083 public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) { 6084 if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) || 6085 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) { 6086 return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum), 6087 nameChoice); 6088 } 6089 if (propertyEnum == UProperty.NUMERIC_VALUE) { 6090 return String.valueOf(getUnicodeNumericValue(codepoint)); 6091 } 6092 // otherwise must be string property 6093 switch (propertyEnum) { 6094 case UProperty.AGE: return getAge(codepoint).toString(); 6095 case UProperty.ISO_COMMENT: return getISOComment(codepoint); 6096 case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint)); 6097 case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true)); 6098 case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 6099 case UProperty.NAME: return getName(codepoint); 6100 case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true)); 6101 case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 6102 case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 6103 case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 6104 case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 6105 case UProperty.UNICODE_1_NAME: return getName1_0(codepoint); 6106 case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 6107 } 6108 throw new IllegalArgumentException("Illegal Property Enum"); 6109 } 6110 ///CLOVER:ON 6111 6112 /** 6113 * {@icu} Returns the minimum value for an integer/binary Unicode property type. 6114 * Can be used together with UCharacter.getIntPropertyMaxValue(int) 6115 * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar. 6116 * @param type UProperty selector constant, identifies which binary 6117 * property to check. Must be 6118 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 6119 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 6120 * @return Minimum value returned by UCharacter.getIntPropertyValue(int) 6121 * for a Unicode property. 0 if the property 6122 * selector 'type' is out of range. 6123 * @see UProperty 6124 * @see #hasBinaryProperty 6125 * @see #getUnicodeVersion 6126 * @see #getIntPropertyMaxValue 6127 * @see #getIntPropertyValue 6128 * @stable ICU 2.4 6129 */ getIntPropertyMinValue(int type)6130 public static int getIntPropertyMinValue(int type){ 6131 6132 return 0; // undefined; and: all other properties have a minimum value of 0 6133 } 6134 6135 6136 /** 6137 * {@icu} Returns the maximum value for an integer/binary Unicode property. 6138 * Can be used together with UCharacter.getIntPropertyMinValue(int) 6139 * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar. 6140 * Examples for min/max values (for Unicode 3.2): 6141 * <ul> 6142 * <li> UProperty.BIDI_CLASS: 0/18 6143 * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL) 6144 * <li> UProperty.SCRIPT: 0/45 (UScript.COMMON/UScript.TAGBANWA) 6145 * <li> UProperty.IDEOGRAPHIC: 0/1 (false/true) 6146 * </ul> 6147 * For undefined UProperty constant values, min/max values will be 0/-1. 6148 * @param type UProperty selector constant, identifies which binary 6149 * property to check. Must be 6150 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 6151 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 6152 * @return Maximum value returned by u_getIntPropertyValue for a Unicode 6153 * property. <= 0 if the property selector 'type' is out of range. 6154 * @see UProperty 6155 * @see #hasBinaryProperty 6156 * @see #getUnicodeVersion 6157 * @see #getIntPropertyMaxValue 6158 * @see #getIntPropertyValue 6159 * @stable ICU 2.4 6160 */ getIntPropertyMaxValue(int type)6161 public static int getIntPropertyMaxValue(int type) 6162 { 6163 return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type); 6164 } 6165 6166 /** 6167 * Provide the java.lang.Character forDigit API, for convenience. 6168 * @stable ICU 3.0 6169 */ forDigit(int digit, int radix)6170 public static char forDigit(int digit, int radix) { 6171 return java.lang.Character.forDigit(digit, radix); 6172 } 6173 6174 // JDK 1.5 API coverage 6175 6176 /** 6177 * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}. 6178 * 6179 * @stable ICU 3.0 6180 */ 6181 public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE; 6182 6183 /** 6184 * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}. 6185 * 6186 * @stable ICU 3.0 6187 */ 6188 public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE; 6189 6190 /** 6191 * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}. 6192 * 6193 * @stable ICU 3.0 6194 */ 6195 public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE; 6196 6197 /** 6198 * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}. 6199 * 6200 * @stable ICU 3.0 6201 */ 6202 public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE; 6203 6204 /** 6205 * Constant U+D800, same as {@link Character#MIN_SURROGATE}. 6206 * 6207 * @stable ICU 3.0 6208 */ 6209 public static final char MIN_SURROGATE = Character.MIN_SURROGATE; 6210 6211 /** 6212 * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}. 6213 * 6214 * @stable ICU 3.0 6215 */ 6216 public static final char MAX_SURROGATE = Character.MAX_SURROGATE; 6217 6218 /** 6219 * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 6220 * 6221 * @stable ICU 3.0 6222 */ 6223 public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT; 6224 6225 /** 6226 * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}. 6227 * 6228 * @stable ICU 3.0 6229 */ 6230 public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT; 6231 6232 /** 6233 * Constant U+0000, same as {@link Character#MIN_CODE_POINT}. 6234 * 6235 * @stable ICU 3.0 6236 */ 6237 public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT; 6238 6239 /** 6240 * Equivalent to {@link Character#isValidCodePoint}. 6241 * 6242 * @param cp the code point to check 6243 * @return true if cp is a valid code point 6244 * @stable ICU 3.0 6245 */ isValidCodePoint(int cp)6246 public static final boolean isValidCodePoint(int cp) { 6247 return cp >= 0 && cp <= MAX_CODE_POINT; 6248 } 6249 6250 /** 6251 * Same as {@link Character#isSupplementaryCodePoint}. 6252 * 6253 * @param cp the code point to check 6254 * @return true if cp is a supplementary code point 6255 * @stable ICU 3.0 6256 */ isSupplementaryCodePoint(int cp)6257 public static final boolean isSupplementaryCodePoint(int cp) { 6258 return Character.isSupplementaryCodePoint(cp); 6259 } 6260 6261 /** 6262 * Same as {@link Character#isHighSurrogate}, 6263 * except that the ICU version accepts <code>int</code> for code points. 6264 * 6265 * @param codePoint the code point to check 6266 * (In ICU 3.0-69 the type of this parameter was <code>char</code>.) 6267 * @return true if codePoint is a high (lead) surrogate 6268 * @stable ICU 70 6269 */ isHighSurrogate(int codePoint)6270 public static boolean isHighSurrogate(int codePoint) { 6271 return (codePoint & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS; 6272 } 6273 6274 // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655 6275 /** 6276 * Same as {@link Character#isHighSurrogate}, 6277 * 6278 * @param ch the char to check 6279 * @return true if ch is a high (lead) surrogate 6280 * @stable ICU 3.0 6281 */ isHighSurrogate(char ch)6282 public static boolean isHighSurrogate(char ch) { 6283 return isHighSurrogate((int) ch); 6284 } 6285 // END Android patch: Keep the `char` version on Android. See ICU-21655 6286 6287 /** 6288 * Same as {@link Character#isLowSurrogate}, 6289 * except that the ICU version accepts <code>int</code> for code points. 6290 * 6291 * @param codePoint the code point to check 6292 * (In ICU 3.0-69 the type of this parameter was <code>char</code>.) 6293 * @return true if codePoint is a low (trail) surrogate 6294 * @stable ICU 70 6295 */ isLowSurrogate(int codePoint)6296 public static boolean isLowSurrogate(int codePoint) { 6297 return (codePoint & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS; 6298 } 6299 6300 // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655 6301 /** 6302 * Same as {@link Character#isLowSurrogate}, 6303 * 6304 * @param ch the char to check 6305 * @return true if ch is a low (trail) surrogate 6306 * @stable ICU 3.0 6307 */ isLowSurrogate(char ch)6308 public static boolean isLowSurrogate(char ch) { 6309 return isLowSurrogate((int) ch); 6310 } 6311 // END Android patch: Keep the `char` version on Android. See ICU-21655 6312 6313 /** 6314 * Same as {@link Character#isSurrogatePair}, 6315 * except that the ICU version accepts <code>int</code> for code points. 6316 * 6317 * @param high the high (lead) unit 6318 * (In ICU 3.0-69 the type of both parameters was <code>char</code>.) 6319 * @param low the low (trail) unit 6320 * @return true if high, low form a surrogate pair 6321 * @stable ICU 70 6322 */ isSurrogatePair(int high, int low)6323 public static final boolean isSurrogatePair(int high, int low) { 6324 return isHighSurrogate(high) && isLowSurrogate(low); 6325 } 6326 6327 // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655 6328 /** 6329 * Same as {@link Character#isSurrogatePair}. 6330 * 6331 * @param high the high (lead) char 6332 * @param low the low (trail) char 6333 * @return true if high, low form a surrogate pair 6334 * @stable ICU 3.0 6335 */ isSurrogatePair(char high, char low)6336 public static final boolean isSurrogatePair(char high, char low) { 6337 return isSurrogatePair((int) high, (int) low); 6338 } 6339 // END Android patch: Keep the `char` version on Android. See ICU-21655 6340 6341 /** 6342 * Same as {@link Character#charCount}. 6343 * Returns the number of chars needed to represent the code point (1 or 2). 6344 * This does not check the code point for validity. 6345 * 6346 * @param cp the code point to check 6347 * @return the number of chars needed to represent the code point 6348 * @stable ICU 3.0 6349 */ charCount(int cp)6350 public static int charCount(int cp) { 6351 return Character.charCount(cp); 6352 } 6353 6354 /** 6355 * Same as {@link Character#toCodePoint}, 6356 * except that the ICU version accepts <code>int</code> for code points. 6357 * Returns the code point represented by the two surrogate code units. 6358 * This does not check the surrogate pair for validity. 6359 * 6360 * @param high the high (lead) surrogate 6361 * (In ICU 3.0-69 the type of both parameters was <code>char</code>.) 6362 * @param low the low (trail) surrogate 6363 * @return the code point formed by the surrogate pair 6364 * @stable ICU 70 6365 * @see #getCodePoint(int, int) 6366 */ toCodePoint(int high, int low)6367 public static final int toCodePoint(int high, int low) { 6368 // see ICU4C U16_GET_SUPPLEMENTARY() 6369 return (high << 10) + low - U16_SURROGATE_OFFSET; 6370 } 6371 6372 // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655 6373 /** 6374 * Same as {@link Character#toCodePoint}. 6375 * Returns the code point represented by the two surrogate code units. 6376 * This does not check the surrogate pair for validity. 6377 * 6378 * @param high the high (lead) surrogate 6379 * @param low the low (trail) surrogate 6380 * @return the code point formed by the surrogate pair 6381 * @stable ICU 3.0 6382 */ toCodePoint(char high, char low)6383 public static final int toCodePoint(char high, char low) { 6384 return toCodePoint((int) high, (int) low); 6385 } 6386 // END Android patch: Keep the `char` version on Android. See ICU-21655 6387 6388 /** 6389 * Same as {@link Character#codePointAt(CharSequence, int)}. 6390 * Returns the code point at index. 6391 * This examines only the characters at index and index+1. 6392 * 6393 * @param seq the characters to check 6394 * @param index the index of the first or only char forming the code point 6395 * @return the code point at the index 6396 * @stable ICU 3.0 6397 */ codePointAt(CharSequence seq, int index)6398 public static final int codePointAt(CharSequence seq, int index) { 6399 char c1 = seq.charAt(index++); 6400 if (isHighSurrogate(c1)) { 6401 if (index < seq.length()) { 6402 char c2 = seq.charAt(index); 6403 if (isLowSurrogate(c2)) { 6404 return toCodePoint(c1, c2); 6405 } 6406 } 6407 } 6408 return c1; 6409 } 6410 6411 /** 6412 * Same as {@link Character#codePointAt(char[], int)}. 6413 * Returns the code point at index. 6414 * This examines only the characters at index and index+1. 6415 * 6416 * @param text the characters to check 6417 * @param index the index of the first or only char forming the code point 6418 * @return the code point at the index 6419 * @stable ICU 3.0 6420 */ codePointAt(char[] text, int index)6421 public static final int codePointAt(char[] text, int index) { 6422 char c1 = text[index++]; 6423 if (isHighSurrogate(c1)) { 6424 if (index < text.length) { 6425 char c2 = text[index]; 6426 if (isLowSurrogate(c2)) { 6427 return toCodePoint(c1, c2); 6428 } 6429 } 6430 } 6431 return c1; 6432 } 6433 6434 /** 6435 * Same as {@link Character#codePointAt(char[], int, int)}. 6436 * Returns the code point at index. 6437 * This examines only the characters at index and index+1. 6438 * 6439 * @param text the characters to check 6440 * @param index the index of the first or only char forming the code point 6441 * @param limit the limit of the valid text 6442 * @return the code point at the index 6443 * @stable ICU 3.0 6444 */ codePointAt(char[] text, int index, int limit)6445 public static final int codePointAt(char[] text, int index, int limit) { 6446 if (index >= limit || limit > text.length) { 6447 throw new IndexOutOfBoundsException(); 6448 } 6449 char c1 = text[index++]; 6450 if (isHighSurrogate(c1)) { 6451 if (index < limit) { 6452 char c2 = text[index]; 6453 if (isLowSurrogate(c2)) { 6454 return toCodePoint(c1, c2); 6455 } 6456 } 6457 } 6458 return c1; 6459 } 6460 6461 /** 6462 * Same as {@link Character#codePointBefore(CharSequence, int)}. 6463 * Return the code point before index. 6464 * This examines only the characters at index-1 and index-2. 6465 * 6466 * @param seq the characters to check 6467 * @param index the index after the last or only char forming the code point 6468 * @return the code point before the index 6469 * @stable ICU 3.0 6470 */ codePointBefore(CharSequence seq, int index)6471 public static final int codePointBefore(CharSequence seq, int index) { 6472 char c2 = seq.charAt(--index); 6473 if (isLowSurrogate(c2)) { 6474 if (index > 0) { 6475 char c1 = seq.charAt(--index); 6476 if (isHighSurrogate(c1)) { 6477 return toCodePoint(c1, c2); 6478 } 6479 } 6480 } 6481 return c2; 6482 } 6483 6484 /** 6485 * Same as {@link Character#codePointBefore(char[], int)}. 6486 * Returns the code point before index. 6487 * This examines only the characters at index-1 and index-2. 6488 * 6489 * @param text the characters to check 6490 * @param index the index after the last or only char forming the code point 6491 * @return the code point before the index 6492 * @stable ICU 3.0 6493 */ codePointBefore(char[] text, int index)6494 public static final int codePointBefore(char[] text, int index) { 6495 char c2 = text[--index]; 6496 if (isLowSurrogate(c2)) { 6497 if (index > 0) { 6498 char c1 = text[--index]; 6499 if (isHighSurrogate(c1)) { 6500 return toCodePoint(c1, c2); 6501 } 6502 } 6503 } 6504 return c2; 6505 } 6506 6507 /** 6508 * Same as {@link Character#codePointBefore(char[], int, int)}. 6509 * Return the code point before index. 6510 * This examines only the characters at index-1 and index-2. 6511 * 6512 * @param text the characters to check 6513 * @param index the index after the last or only char forming the code point 6514 * @param limit the start of the valid text 6515 * @return the code point before the index 6516 * @stable ICU 3.0 6517 */ codePointBefore(char[] text, int index, int limit)6518 public static final int codePointBefore(char[] text, int index, int limit) { 6519 if (index <= limit || limit < 0) { 6520 throw new IndexOutOfBoundsException(); 6521 } 6522 char c2 = text[--index]; 6523 if (isLowSurrogate(c2)) { 6524 if (index > limit) { 6525 char c1 = text[--index]; 6526 if (isHighSurrogate(c1)) { 6527 return toCodePoint(c1, c2); 6528 } 6529 } 6530 } 6531 return c2; 6532 } 6533 6534 /** 6535 * Same as {@link Character#toChars(int, char[], int)}. 6536 * Writes the chars representing the 6537 * code point into the destination at the given index. 6538 * 6539 * @param cp the code point to convert 6540 * @param dst the destination array into which to put the char(s) representing the code point 6541 * @param dstIndex the index at which to put the first (or only) char 6542 * @return the count of the number of chars written (1 or 2) 6543 * @throws IllegalArgumentException if cp is not a valid code point 6544 * @stable ICU 3.0 6545 */ toChars(int cp, char[] dst, int dstIndex)6546 public static final int toChars(int cp, char[] dst, int dstIndex) { 6547 return Character.toChars(cp, dst, dstIndex); 6548 } 6549 6550 /** 6551 * Same as {@link Character#toChars(int)}. 6552 * Returns a char array representing the code point. 6553 * 6554 * @param cp the code point to convert 6555 * @return an array containing the char(s) representing the code point 6556 * @throws IllegalArgumentException if cp is not a valid code point 6557 * @stable ICU 3.0 6558 */ toChars(int cp)6559 public static final char[] toChars(int cp) { 6560 return Character.toChars(cp); 6561 } 6562 6563 /** 6564 * Equivalent to the {@link Character#getDirectionality(char)} method, for 6565 * convenience. Returns a byte representing the directionality of the 6566 * character. 6567 * 6568 * {@icunote} Unlike {@link Character#getDirectionality(char)}, this returns 6569 * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters. 6570 * 6571 * {@icunote} The return value must be tested using the constants defined in {@link 6572 * UCharacterDirection} and its interface {@link 6573 * UCharacterEnums.ECharacterDirection} since the values are different from the ones 6574 * defined by <code>java.lang.Character</code>. 6575 * @param cp the code point to check 6576 * @return the directionality of the code point 6577 * @see #getDirection 6578 * @stable ICU 3.0 6579 */ getDirectionality(int cp)6580 public static byte getDirectionality(int cp) 6581 { 6582 return (byte)getDirection(cp); 6583 } 6584 6585 /** 6586 * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)} 6587 * method, for convenience. Counts the number of code points in the range 6588 * of text. 6589 * @param text the characters to check 6590 * @param start the start of the range 6591 * @param limit the limit of the range 6592 * @return the number of code points in the range 6593 * @stable ICU 3.0 6594 */ codePointCount(CharSequence text, int start, int limit)6595 public static int codePointCount(CharSequence text, int start, int limit) { 6596 if (start < 0 || limit < start || limit > text.length()) { 6597 throw new IndexOutOfBoundsException("start (" + start + 6598 ") or limit (" + limit + 6599 ") invalid or out of range 0, " + text.length()); 6600 } 6601 6602 int len = limit - start; 6603 while (limit > start) { 6604 char ch = text.charAt(--limit); 6605 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 6606 ch = text.charAt(--limit); 6607 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 6608 --len; 6609 break; 6610 } 6611 } 6612 } 6613 return len; 6614 } 6615 6616 /** 6617 * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for 6618 * convenience. Counts the number of code points in the range of text. 6619 * @param text the characters to check 6620 * @param start the start of the range 6621 * @param limit the limit of the range 6622 * @return the number of code points in the range 6623 * @stable ICU 3.0 6624 */ codePointCount(char[] text, int start, int limit)6625 public static int codePointCount(char[] text, int start, int limit) { 6626 if (start < 0 || limit < start || limit > text.length) { 6627 throw new IndexOutOfBoundsException("start (" + start + 6628 ") or limit (" + limit + 6629 ") invalid or out of range 0, " + text.length); 6630 } 6631 6632 int len = limit - start; 6633 while (limit > start) { 6634 char ch = text[--limit]; 6635 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 6636 ch = text[--limit]; 6637 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 6638 --len; 6639 break; 6640 } 6641 } 6642 } 6643 return len; 6644 } 6645 6646 /** 6647 * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)} 6648 * method, for convenience. Adjusts the char index by a code point offset. 6649 * @param text the characters to check 6650 * @param index the index to adjust 6651 * @param codePointOffset the number of code points by which to offset the index 6652 * @return the adjusted index 6653 * @stable ICU 3.0 6654 */ offsetByCodePoints(CharSequence text, int index, int codePointOffset)6655 public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) { 6656 if (index < 0 || index > text.length()) { 6657 throw new IndexOutOfBoundsException("index ( " + index + 6658 ") out of range 0, " + text.length()); 6659 } 6660 6661 if (codePointOffset < 0) { 6662 while (++codePointOffset <= 0) { 6663 char ch = text.charAt(--index); 6664 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) { 6665 ch = text.charAt(--index); 6666 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 6667 if (++codePointOffset > 0) { 6668 return index+1; 6669 } 6670 } 6671 } 6672 } 6673 } else { 6674 int limit = text.length(); 6675 while (--codePointOffset >= 0) { 6676 char ch = text.charAt(index++); 6677 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 6678 ch = text.charAt(index++); 6679 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 6680 if (--codePointOffset < 0) { 6681 return index-1; 6682 } 6683 } 6684 } 6685 } 6686 } 6687 6688 return index; 6689 } 6690 6691 /** 6692 * Equivalent to the 6693 * {@link Character#offsetByCodePoints(char[], int, int, int, int)} 6694 * method, for convenience. Adjusts the char index by a code point offset. 6695 * @param text the characters to check 6696 * @param start the start of the range to check 6697 * @param count the length of the range to check 6698 * @param index the index to adjust 6699 * @param codePointOffset the number of code points by which to offset the index 6700 * @return the adjusted index 6701 * @stable ICU 3.0 6702 */ offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset)6703 public static int offsetByCodePoints(char[] text, int start, int count, int index, 6704 int codePointOffset) { 6705 int limit = start + count; 6706 if (start < 0 || limit < start || limit > text.length || index < start || index > limit) { 6707 throw new IndexOutOfBoundsException("index ( " + index + 6708 ") out of range " + start + 6709 ", " + limit + 6710 " in array 0, " + text.length); 6711 } 6712 6713 if (codePointOffset < 0) { 6714 while (++codePointOffset <= 0) { 6715 char ch = text[--index]; 6716 if (index < start) { 6717 throw new IndexOutOfBoundsException("index ( " + index + 6718 ") < start (" + start + 6719 ")"); 6720 } 6721 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) { 6722 ch = text[--index]; 6723 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 6724 if (++codePointOffset > 0) { 6725 return index+1; 6726 } 6727 } 6728 } 6729 } 6730 } else { 6731 while (--codePointOffset >= 0) { 6732 char ch = text[index++]; 6733 if (index > limit) { 6734 throw new IndexOutOfBoundsException("index ( " + index + 6735 ") > limit (" + limit + 6736 ")"); 6737 } 6738 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 6739 ch = text[index++]; 6740 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 6741 if (--codePointOffset < 0) { 6742 return index-1; 6743 } 6744 } 6745 } 6746 } 6747 } 6748 6749 return index; 6750 } 6751 6752 // private variables ------------------------------------------------- 6753 6754 /** 6755 * To get the last character out from a data type 6756 */ 6757 private static final int LAST_CHAR_MASK_ = 0xFFFF; 6758 6759 // /** 6760 // * To get the last byte out from a data type 6761 // */ 6762 // private static final int LAST_BYTE_MASK_ = 0xFF; 6763 // 6764 // /** 6765 // * Shift 16 bits 6766 // */ 6767 // private static final int SHIFT_16_ = 16; 6768 // 6769 // /** 6770 // * Shift 24 bits 6771 // */ 6772 // private static final int SHIFT_24_ = 24; 6773 // 6774 // /** 6775 // * Decimal radix 6776 // */ 6777 // private static final int DECIMAL_RADIX_ = 10; 6778 6779 /** 6780 * No break space code point 6781 */ 6782 private static final int NO_BREAK_SPACE_ = 0xA0; 6783 6784 /** 6785 * Figure space code point 6786 */ 6787 private static final int FIGURE_SPACE_ = 0x2007; 6788 6789 /** 6790 * Narrow no break space code point 6791 */ 6792 private static final int NARROW_NO_BREAK_SPACE_ = 0x202F; 6793 6794 /** 6795 * Ideographic number zero code point 6796 */ 6797 private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007; 6798 6799 /** 6800 * CJK Ideograph, First code point 6801 */ 6802 private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00; 6803 6804 /** 6805 * CJK Ideograph, Second code point 6806 */ 6807 private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c; 6808 6809 /** 6810 * CJK Ideograph, Third code point 6811 */ 6812 private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09; 6813 6814 /** 6815 * CJK Ideograph, Fourth code point 6816 */ 6817 private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db; 6818 6819 /** 6820 * CJK Ideograph, FIFTH code point 6821 */ 6822 private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94; 6823 6824 /** 6825 * CJK Ideograph, Sixth code point 6826 */ 6827 private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d; 6828 6829 /** 6830 * CJK Ideograph, Seventh code point 6831 */ 6832 private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03; 6833 6834 /** 6835 * CJK Ideograph, Eighth code point 6836 */ 6837 private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b; 6838 6839 /** 6840 * CJK Ideograph, Nineth code point 6841 */ 6842 private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d; 6843 6844 /** 6845 * Application Program command code point 6846 */ 6847 private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F; 6848 6849 /** 6850 * Unit separator code point 6851 */ 6852 private static final int UNIT_SEPARATOR_ = 0x001F; 6853 6854 /** 6855 * Delete code point 6856 */ 6857 private static final int DELETE_ = 0x007F; 6858 6859 /** 6860 * Han digit characters 6861 */ 6862 private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_ = 0x96f6; 6863 private static final int CJK_IDEOGRAPH_COMPLEX_ONE_ = 0x58f9; 6864 private static final int CJK_IDEOGRAPH_COMPLEX_TWO_ = 0x8cb3; 6865 private static final int CJK_IDEOGRAPH_COMPLEX_THREE_ = 0x53c3; 6866 private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_ = 0x8086; 6867 private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_ = 0x4f0d; 6868 private static final int CJK_IDEOGRAPH_COMPLEX_SIX_ = 0x9678; 6869 private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_ = 0x67d2; 6870 private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_ = 0x634c; 6871 private static final int CJK_IDEOGRAPH_COMPLEX_NINE_ = 0x7396; 6872 private static final int CJK_IDEOGRAPH_TEN_ = 0x5341; 6873 private static final int CJK_IDEOGRAPH_COMPLEX_TEN_ = 0x62fe; 6874 private static final int CJK_IDEOGRAPH_HUNDRED_ = 0x767e; 6875 private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_ = 0x4f70; 6876 private static final int CJK_IDEOGRAPH_THOUSAND_ = 0x5343; 6877 private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf; 6878 private static final int CJK_IDEOGRAPH_TEN_THOUSAND_ = 0x824c; 6879 private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_ = 0x5104; 6880 6881 // private constructor ----------------------------------------------- 6882 ///CLOVER:OFF 6883 /** 6884 * Private constructor to prevent instantiation 6885 */ UCharacter()6886 private UCharacter() 6887 { 6888 } 6889 ///CLOVER:ON 6890 } 6891