1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /** 4 ******************************************************************************* 5 * Copyright (C) 1996-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.lang; 11 12 import java.lang.ref.SoftReference; 13 import java.util.HashMap; 14 import java.util.Iterator; 15 import java.util.Locale; 16 import java.util.Map; 17 18 import com.ibm.icu.impl.CaseMapImpl; 19 import com.ibm.icu.impl.IllegalIcuArgumentException; 20 import com.ibm.icu.impl.Trie2; 21 import com.ibm.icu.impl.UBiDiProps; 22 import com.ibm.icu.impl.UCaseProps; 23 import com.ibm.icu.impl.UCharacterName; 24 import com.ibm.icu.impl.UCharacterNameChoice; 25 import com.ibm.icu.impl.UCharacterProperty; 26 import com.ibm.icu.impl.UCharacterUtility; 27 import com.ibm.icu.impl.UPropertyAliases; 28 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; 29 import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection; 30 import com.ibm.icu.text.BreakIterator; 31 import com.ibm.icu.text.Normalizer2; 32 import com.ibm.icu.util.RangeValueIterator; 33 import com.ibm.icu.util.ULocale; 34 import com.ibm.icu.util.ValueIterator; 35 import com.ibm.icu.util.VersionInfo; 36 37 /** 38 * {@icuenhanced java.lang.Character}.{@icu _usage_} 39 * 40 * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class. 41 * These extensions provide support for more Unicode properties. 42 * Each ICU release supports the latest version of Unicode available at that time. 43 * 44 * <p>For some time before Java 5 added support for supplementary Unicode code points, 45 * The ICU UCharacter class and many other ICU classes already supported them. 46 * Some UCharacter methods and constants were widened slightly differently than 47 * how the Character class methods and constants were widened later. 48 * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF, 49 * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF. 50 * 51 * <p>Code points are represented in these API using ints. While it would be 52 * more convenient in Java to have a separate primitive datatype for them, 53 * ints suffice in the meantime. 54 * 55 * <p>To use this class please add the jar file name icu4j.jar to the 56 * class path, since it contains data files which supply the information used 57 * by this file.<br> 58 * E.g. In Windows <br> 59 * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br> 60 * Otherwise, another method would be to copy the files uprops.dat and 61 * unames.icu from the icu4j source subdirectory 62 * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory 63 * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>. 64 * 65 * <p>Aside from the additions for UTF-16 support, and the updated Unicode 66 * properties, the main differences between UCharacter and Character are: 67 * <ul> 68 * <li> UCharacter is not designed to be a char wrapper and does not have 69 * APIs to which involves management of that single char.<br> 70 * These include: 71 * <ul> 72 * <li> char charValue(), 73 * <li> int compareTo(java.lang.Character, java.lang.Character), etc. 74 * </ul> 75 * <li> UCharacter does not include Character APIs that are deprecated, nor 76 * does it include the Java-specific character information, such as 77 * boolean isJavaIdentifierPart(char ch). 78 * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric 79 * values '10' - '35'. UCharacter also does this in digit and 80 * getNumericValue, to adhere to the java semantics of these 81 * methods. New methods unicodeDigit, and 82 * getUnicodeNumericValue do not treat the above code points 83 * as having numeric values. This is a semantic change from ICU4J 1.3.1. 84 * </ul> 85 * <p> 86 * Further detail on differences can be determined using the program 87 * <a href= 88 * "http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java"> 89 * com.ibm.icu.dev.test.lang.UCharacterCompare</a> 90 * <p> 91 * In addition to Java compatibility functions, which calculate derived properties, 92 * this API provides low-level access to the Unicode Character Database. 93 * <p> 94 * Unicode assigns each code point (not just assigned character) values for 95 * many properties. 96 * Most of them are simple boolean flags, or constants from a small enumerated list. 97 * For some properties, values are strings or other relatively more complex types. 98 * <p> 99 * For more information see 100 * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a> 101 * (http://www.unicode.org/ucd/) 102 * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU 103 * User Guide chapter on Properties</a> 104 * (http://www.icu-project.org/userguide/properties.html). 105 * <p> 106 * There are also functions that provide easy migration from C/POSIX functions 107 * like isblank(). Their use is generally discouraged because the C/POSIX 108 * standards do not define their semantics beyond the ASCII range, which means 109 * that different implementations exhibit very different behavior. 110 * Instead, Unicode properties should be used directly. 111 * <p> 112 * There are also only a few, broad C/POSIX character classes, and they tend 113 * to be used for conflicting purposes. For example, the "isalpha()" class 114 * is sometimes used to determine word boundaries, while a more sophisticated 115 * approach would at least distinguish initial letters from continuation 116 * characters (the latter including combining marks). 117 * (In ICU, BreakIterator is the most sophisticated API for word boundaries.) 118 * Another example: There is no "istitle()" class for titlecase characters. 119 * <p> 120 * ICU 3.4 and later provides API access for all twelve C/POSIX character classes. 121 * ICU implements them according to the Standard Recommendations in 122 * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions 123 * (http://www.unicode.org/reports/tr18/#Compatibility_Properties). 124 * <p> 125 * API access for C/POSIX character classes is as follows: 126 * <pre>{@code 127 * - alpha: isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC) 128 * - lower: isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE) 129 * - upper: isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE) 130 * - punct: ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)| 131 * (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)| 132 * (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0 133 * - digit: isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER 134 * - xdigit: hasBinaryProperty(c, UProperty.POSIX_XDIGIT) 135 * - alnum: hasBinaryProperty(c, UProperty.POSIX_ALNUM) 136 * - space: isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE) 137 * - blank: hasBinaryProperty(c, UProperty.POSIX_BLANK) 138 * - cntrl: getType(c)==CONTROL 139 * - graph: hasBinaryProperty(c, UProperty.POSIX_GRAPH) 140 * - print: hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre> 141 * <p> 142 * The C/POSIX character classes are also available in UnicodeSet patterns, 143 * using patterns like [:graph:] or \p{graph}. 144 * 145 * <p>{@icunote} There are several ICU (and Java) whitespace functions. 146 * Comparison:<ul> 147 * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property; 148 * most of general categories "Z" (separators) + most whitespace ISO controls 149 * (including no-break spaces, but excluding IS1..IS4) 150 * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces 151 * <li> isSpaceChar: just Z (including no-break spaces)</ul> 152 * 153 * <p> 154 * This class is not subclassable. 155 * 156 * @author Syn Wee Quek 157 * @stable ICU 2.1 158 * @see com.ibm.icu.lang.UCharacterEnums 159 */ 160 161 public final class UCharacter implements ECharacterCategory, ECharacterDirection 162 { 163 // public inner classes ---------------------------------------------- 164 165 /** 166 * {@icuenhanced java.lang.Character.UnicodeBlock}.{@icu _usage_} 167 * 168 * A family of character subsets representing the character blocks in the 169 * Unicode specification, generated from Unicode Data file Blocks.txt. 170 * Character blocks generally define characters used for a specific script 171 * or purpose. A character is contained by at most one Unicode block. 172 * 173 * {@icunote} All fields named XXX_ID are specific to ICU. 174 * 175 * @stable ICU 2.4 176 */ 177 public static final class UnicodeBlock extends Character.Subset 178 { 179 // block id corresponding to icu4c ----------------------------------- 180 181 /** 182 * @stable ICU 2.4 183 */ 184 public static final int INVALID_CODE_ID = -1; 185 /** 186 * @stable ICU 2.4 187 */ 188 public static final int BASIC_LATIN_ID = 1; 189 /** 190 * @stable ICU 2.4 191 */ 192 public static final int LATIN_1_SUPPLEMENT_ID = 2; 193 /** 194 * @stable ICU 2.4 195 */ 196 public static final int LATIN_EXTENDED_A_ID = 3; 197 /** 198 * @stable ICU 2.4 199 */ 200 public static final int LATIN_EXTENDED_B_ID = 4; 201 /** 202 * @stable ICU 2.4 203 */ 204 public static final int IPA_EXTENSIONS_ID = 5; 205 /** 206 * @stable ICU 2.4 207 */ 208 public static final int SPACING_MODIFIER_LETTERS_ID = 6; 209 /** 210 * @stable ICU 2.4 211 */ 212 public static final int COMBINING_DIACRITICAL_MARKS_ID = 7; 213 /** 214 * Unicode 3.2 renames this block to "Greek and Coptic". 215 * @stable ICU 2.4 216 */ 217 public static final int GREEK_ID = 8; 218 /** 219 * @stable ICU 2.4 220 */ 221 public static final int CYRILLIC_ID = 9; 222 /** 223 * @stable ICU 2.4 224 */ 225 public static final int ARMENIAN_ID = 10; 226 /** 227 * @stable ICU 2.4 228 */ 229 public static final int HEBREW_ID = 11; 230 /** 231 * @stable ICU 2.4 232 */ 233 public static final int ARABIC_ID = 12; 234 /** 235 * @stable ICU 2.4 236 */ 237 public static final int SYRIAC_ID = 13; 238 /** 239 * @stable ICU 2.4 240 */ 241 public static final int THAANA_ID = 14; 242 /** 243 * @stable ICU 2.4 244 */ 245 public static final int DEVANAGARI_ID = 15; 246 /** 247 * @stable ICU 2.4 248 */ 249 public static final int BENGALI_ID = 16; 250 /** 251 * @stable ICU 2.4 252 */ 253 public static final int GURMUKHI_ID = 17; 254 /** 255 * @stable ICU 2.4 256 */ 257 public static final int GUJARATI_ID = 18; 258 /** 259 * @stable ICU 2.4 260 */ 261 public static final int ORIYA_ID = 19; 262 /** 263 * @stable ICU 2.4 264 */ 265 public static final int TAMIL_ID = 20; 266 /** 267 * @stable ICU 2.4 268 */ 269 public static final int TELUGU_ID = 21; 270 /** 271 * @stable ICU 2.4 272 */ 273 public static final int KANNADA_ID = 22; 274 /** 275 * @stable ICU 2.4 276 */ 277 public static final int MALAYALAM_ID = 23; 278 /** 279 * @stable ICU 2.4 280 */ 281 public static final int SINHALA_ID = 24; 282 /** 283 * @stable ICU 2.4 284 */ 285 public static final int THAI_ID = 25; 286 /** 287 * @stable ICU 2.4 288 */ 289 public static final int LAO_ID = 26; 290 /** 291 * @stable ICU 2.4 292 */ 293 public static final int TIBETAN_ID = 27; 294 /** 295 * @stable ICU 2.4 296 */ 297 public static final int MYANMAR_ID = 28; 298 /** 299 * @stable ICU 2.4 300 */ 301 public static final int GEORGIAN_ID = 29; 302 /** 303 * @stable ICU 2.4 304 */ 305 public static final int HANGUL_JAMO_ID = 30; 306 /** 307 * @stable ICU 2.4 308 */ 309 public static final int ETHIOPIC_ID = 31; 310 /** 311 * @stable ICU 2.4 312 */ 313 public static final int CHEROKEE_ID = 32; 314 /** 315 * @stable ICU 2.4 316 */ 317 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33; 318 /** 319 * @stable ICU 2.4 320 */ 321 public static final int OGHAM_ID = 34; 322 /** 323 * @stable ICU 2.4 324 */ 325 public static final int RUNIC_ID = 35; 326 /** 327 * @stable ICU 2.4 328 */ 329 public static final int KHMER_ID = 36; 330 /** 331 * @stable ICU 2.4 332 */ 333 public static final int MONGOLIAN_ID = 37; 334 /** 335 * @stable ICU 2.4 336 */ 337 public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38; 338 /** 339 * @stable ICU 2.4 340 */ 341 public static final int GREEK_EXTENDED_ID = 39; 342 /** 343 * @stable ICU 2.4 344 */ 345 public static final int GENERAL_PUNCTUATION_ID = 40; 346 /** 347 * @stable ICU 2.4 348 */ 349 public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41; 350 /** 351 * @stable ICU 2.4 352 */ 353 public static final int CURRENCY_SYMBOLS_ID = 42; 354 /** 355 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 356 * Symbols". 357 * @stable ICU 2.4 358 */ 359 public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43; 360 /** 361 * @stable ICU 2.4 362 */ 363 public static final int LETTERLIKE_SYMBOLS_ID = 44; 364 /** 365 * @stable ICU 2.4 366 */ 367 public static final int NUMBER_FORMS_ID = 45; 368 /** 369 * @stable ICU 2.4 370 */ 371 public static final int ARROWS_ID = 46; 372 /** 373 * @stable ICU 2.4 374 */ 375 public static final int MATHEMATICAL_OPERATORS_ID = 47; 376 /** 377 * @stable ICU 2.4 378 */ 379 public static final int MISCELLANEOUS_TECHNICAL_ID = 48; 380 /** 381 * @stable ICU 2.4 382 */ 383 public static final int CONTROL_PICTURES_ID = 49; 384 /** 385 * @stable ICU 2.4 386 */ 387 public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50; 388 /** 389 * @stable ICU 2.4 390 */ 391 public static final int ENCLOSED_ALPHANUMERICS_ID = 51; 392 /** 393 * @stable ICU 2.4 394 */ 395 public static final int BOX_DRAWING_ID = 52; 396 /** 397 * @stable ICU 2.4 398 */ 399 public static final int BLOCK_ELEMENTS_ID = 53; 400 /** 401 * @stable ICU 2.4 402 */ 403 public static final int GEOMETRIC_SHAPES_ID = 54; 404 /** 405 * @stable ICU 2.4 406 */ 407 public static final int MISCELLANEOUS_SYMBOLS_ID = 55; 408 /** 409 * @stable ICU 2.4 410 */ 411 public static final int DINGBATS_ID = 56; 412 /** 413 * @stable ICU 2.4 414 */ 415 public static final int BRAILLE_PATTERNS_ID = 57; 416 /** 417 * @stable ICU 2.4 418 */ 419 public static final int CJK_RADICALS_SUPPLEMENT_ID = 58; 420 /** 421 * @stable ICU 2.4 422 */ 423 public static final int KANGXI_RADICALS_ID = 59; 424 /** 425 * @stable ICU 2.4 426 */ 427 public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60; 428 /** 429 * @stable ICU 2.4 430 */ 431 public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61; 432 /** 433 * @stable ICU 2.4 434 */ 435 public static final int HIRAGANA_ID = 62; 436 /** 437 * @stable ICU 2.4 438 */ 439 public static final int KATAKANA_ID = 63; 440 /** 441 * @stable ICU 2.4 442 */ 443 public static final int BOPOMOFO_ID = 64; 444 /** 445 * @stable ICU 2.4 446 */ 447 public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65; 448 /** 449 * @stable ICU 2.4 450 */ 451 public static final int KANBUN_ID = 66; 452 /** 453 * @stable ICU 2.4 454 */ 455 public static final int BOPOMOFO_EXTENDED_ID = 67; 456 /** 457 * @stable ICU 2.4 458 */ 459 public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68; 460 /** 461 * @stable ICU 2.4 462 */ 463 public static final int CJK_COMPATIBILITY_ID = 69; 464 /** 465 * @stable ICU 2.4 466 */ 467 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70; 468 /** 469 * @stable ICU 2.4 470 */ 471 public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71; 472 /** 473 * @stable ICU 2.4 474 */ 475 public static final int YI_SYLLABLES_ID = 72; 476 /** 477 * @stable ICU 2.4 478 */ 479 public static final int YI_RADICALS_ID = 73; 480 /** 481 * @stable ICU 2.4 482 */ 483 public static final int HANGUL_SYLLABLES_ID = 74; 484 /** 485 * @stable ICU 2.4 486 */ 487 public static final int HIGH_SURROGATES_ID = 75; 488 /** 489 * @stable ICU 2.4 490 */ 491 public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76; 492 /** 493 * @stable ICU 2.4 494 */ 495 public static final int LOW_SURROGATES_ID = 77; 496 /** 497 * Same as public static final int PRIVATE_USE. 498 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 499 * and multiple code point ranges had this block. 500 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 501 * and adds separate blocks for the supplementary PUAs. 502 * @stable ICU 2.4 503 */ 504 public static final int PRIVATE_USE_AREA_ID = 78; 505 /** 506 * Same as public static final int PRIVATE_USE_AREA. 507 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 508 * and multiple code point ranges had this block. 509 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 510 * and adds separate blocks for the supplementary PUAs. 511 * @stable ICU 2.4 512 */ 513 public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID; 514 /** 515 * @stable ICU 2.4 516 */ 517 public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79; 518 /** 519 * @stable ICU 2.4 520 */ 521 public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80; 522 /** 523 * @stable ICU 2.4 524 */ 525 public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81; 526 /** 527 * @stable ICU 2.4 528 */ 529 public static final int COMBINING_HALF_MARKS_ID = 82; 530 /** 531 * @stable ICU 2.4 532 */ 533 public static final int CJK_COMPATIBILITY_FORMS_ID = 83; 534 /** 535 * @stable ICU 2.4 536 */ 537 public static final int SMALL_FORM_VARIANTS_ID = 84; 538 /** 539 * @stable ICU 2.4 540 */ 541 public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85; 542 /** 543 * @stable ICU 2.4 544 */ 545 public static final int SPECIALS_ID = 86; 546 /** 547 * @stable ICU 2.4 548 */ 549 public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87; 550 /** 551 * @stable ICU 2.4 552 */ 553 public static final int OLD_ITALIC_ID = 88; 554 /** 555 * @stable ICU 2.4 556 */ 557 public static final int GOTHIC_ID = 89; 558 /** 559 * @stable ICU 2.4 560 */ 561 public static final int DESERET_ID = 90; 562 /** 563 * @stable ICU 2.4 564 */ 565 public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91; 566 /** 567 * @stable ICU 2.4 568 */ 569 public static final int MUSICAL_SYMBOLS_ID = 92; 570 /** 571 * @stable ICU 2.4 572 */ 573 public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93; 574 /** 575 * @stable ICU 2.4 576 */ 577 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94; 578 /** 579 * @stable ICU 2.4 580 */ 581 public static final int 582 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95; 583 /** 584 * @stable ICU 2.4 585 */ 586 public static final int TAGS_ID = 96; 587 588 // New blocks in Unicode 3.2 589 590 /** 591 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 592 * @stable ICU 2.4 593 */ 594 public static final int CYRILLIC_SUPPLEMENTARY_ID = 97; 595 /** 596 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 597 * @stable ICU 3.0 598 */ 599 600 public static final int CYRILLIC_SUPPLEMENT_ID = 97; 601 /** 602 * @stable ICU 2.4 603 */ 604 public static final int TAGALOG_ID = 98; 605 /** 606 * @stable ICU 2.4 607 */ 608 public static final int HANUNOO_ID = 99; 609 /** 610 * @stable ICU 2.4 611 */ 612 public static final int BUHID_ID = 100; 613 /** 614 * @stable ICU 2.4 615 */ 616 public static final int TAGBANWA_ID = 101; 617 /** 618 * @stable ICU 2.4 619 */ 620 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102; 621 /** 622 * @stable ICU 2.4 623 */ 624 public static final int SUPPLEMENTAL_ARROWS_A_ID = 103; 625 /** 626 * @stable ICU 2.4 627 */ 628 public static final int SUPPLEMENTAL_ARROWS_B_ID = 104; 629 /** 630 * @stable ICU 2.4 631 */ 632 public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105; 633 /** 634 * @stable ICU 2.4 635 */ 636 public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106; 637 /** 638 * @stable ICU 2.4 639 */ 640 public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107; 641 /** 642 * @stable ICU 2.4 643 */ 644 public static final int VARIATION_SELECTORS_ID = 108; 645 /** 646 * @stable ICU 2.4 647 */ 648 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109; 649 /** 650 * @stable ICU 2.4 651 */ 652 public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110; 653 654 /** 655 * @stable ICU 2.6 656 */ 657 public static final int LIMBU_ID = 111; /*[1900]*/ 658 /** 659 * @stable ICU 2.6 660 */ 661 public static final int TAI_LE_ID = 112; /*[1950]*/ 662 /** 663 * @stable ICU 2.6 664 */ 665 public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/ 666 /** 667 * @stable ICU 2.6 668 */ 669 public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/ 670 /** 671 * @stable ICU 2.6 672 */ 673 public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/ 674 /** 675 * @stable ICU 2.6 676 */ 677 public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/ 678 /** 679 * @stable ICU 2.6 680 */ 681 public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/ 682 /** 683 * @stable ICU 2.6 684 */ 685 public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/ 686 /** 687 * @stable ICU 2.6 688 */ 689 public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/ 690 /** 691 * @stable ICU 2.6 692 */ 693 public static final int UGARITIC_ID = 120; /*[10380]*/ 694 /** 695 * @stable ICU 2.6 696 */ 697 public static final int SHAVIAN_ID = 121; /*[10450]*/ 698 /** 699 * @stable ICU 2.6 700 */ 701 public static final int OSMANYA_ID = 122; /*[10480]*/ 702 /** 703 * @stable ICU 2.6 704 */ 705 public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/ 706 /** 707 * @stable ICU 2.6 708 */ 709 public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/ 710 /** 711 * @stable ICU 2.6 712 */ 713 public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/ 714 715 /* New blocks in Unicode 4.1 */ 716 717 /** 718 * @stable ICU 3.4 719 */ 720 public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/ 721 722 /** 723 * @stable ICU 3.4 724 */ 725 public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/ 726 727 /** 728 * @stable ICU 3.4 729 */ 730 public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/ 731 732 /** 733 * @stable ICU 3.4 734 */ 735 public static final int BUGINESE_ID = 129; /*[1A00]*/ 736 737 /** 738 * @stable ICU 3.4 739 */ 740 public static final int CJK_STROKES_ID = 130; /*[31C0]*/ 741 742 /** 743 * @stable ICU 3.4 744 */ 745 public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/ 746 747 /** 748 * @stable ICU 3.4 749 */ 750 public static final int COPTIC_ID = 132; /*[2C80]*/ 751 752 /** 753 * @stable ICU 3.4 754 */ 755 public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/ 756 757 /** 758 * @stable ICU 3.4 759 */ 760 public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/ 761 762 /** 763 * @stable ICU 3.4 764 */ 765 public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/ 766 767 /** 768 * @stable ICU 3.4 769 */ 770 public static final int GLAGOLITIC_ID = 136; /*[2C00]*/ 771 772 /** 773 * @stable ICU 3.4 774 */ 775 public static final int KHAROSHTHI_ID = 137; /*[10A00]*/ 776 777 /** 778 * @stable ICU 3.4 779 */ 780 public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/ 781 782 /** 783 * @stable ICU 3.4 784 */ 785 public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/ 786 787 /** 788 * @stable ICU 3.4 789 */ 790 public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/ 791 792 /** 793 * @stable ICU 3.4 794 */ 795 public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/ 796 797 /** 798 * @stable ICU 3.4 799 */ 800 public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/ 801 802 /** 803 * @stable ICU 3.4 804 */ 805 public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/ 806 807 /** 808 * @stable ICU 3.4 809 */ 810 public static final int TIFINAGH_ID = 144; /*[2D30]*/ 811 812 /** 813 * @stable ICU 3.4 814 */ 815 public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/ 816 817 /* New blocks in Unicode 5.0 */ 818 819 /** 820 * @stable ICU 3.6 821 */ 822 public static final int NKO_ID = 146; /*[07C0]*/ 823 /** 824 * @stable ICU 3.6 825 */ 826 public static final int BALINESE_ID = 147; /*[1B00]*/ 827 /** 828 * @stable ICU 3.6 829 */ 830 public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/ 831 /** 832 * @stable ICU 3.6 833 */ 834 public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/ 835 /** 836 * @stable ICU 3.6 837 */ 838 public static final int PHAGS_PA_ID = 150; /*[A840]*/ 839 /** 840 * @stable ICU 3.6 841 */ 842 public static final int PHOENICIAN_ID = 151; /*[10900]*/ 843 /** 844 * @stable ICU 3.6 845 */ 846 public static final int CUNEIFORM_ID = 152; /*[12000]*/ 847 /** 848 * @stable ICU 3.6 849 */ 850 public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/ 851 /** 852 * @stable ICU 3.6 853 */ 854 public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/ 855 856 /** 857 * @stable ICU 4.0 858 */ 859 public static final int SUNDANESE_ID = 155; /* [1B80] */ 860 861 /** 862 * @stable ICU 4.0 863 */ 864 public static final int LEPCHA_ID = 156; /* [1C00] */ 865 866 /** 867 * @stable ICU 4.0 868 */ 869 public static final int OL_CHIKI_ID = 157; /* [1C50] */ 870 871 /** 872 * @stable ICU 4.0 873 */ 874 public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */ 875 876 /** 877 * @stable ICU 4.0 878 */ 879 public static final int VAI_ID = 159; /* [A500] */ 880 881 /** 882 * @stable ICU 4.0 883 */ 884 public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */ 885 886 /** 887 * @stable ICU 4.0 888 */ 889 public static final int SAURASHTRA_ID = 161; /* [A880] */ 890 891 /** 892 * @stable ICU 4.0 893 */ 894 public static final int KAYAH_LI_ID = 162; /* [A900] */ 895 896 /** 897 * @stable ICU 4.0 898 */ 899 public static final int REJANG_ID = 163; /* [A930] */ 900 901 /** 902 * @stable ICU 4.0 903 */ 904 public static final int CHAM_ID = 164; /* [AA00] */ 905 906 /** 907 * @stable ICU 4.0 908 */ 909 public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */ 910 911 /** 912 * @stable ICU 4.0 913 */ 914 public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */ 915 916 /** 917 * @stable ICU 4.0 918 */ 919 public static final int LYCIAN_ID = 167; /* [10280] */ 920 921 /** 922 * @stable ICU 4.0 923 */ 924 public static final int CARIAN_ID = 168; /* [102A0] */ 925 926 /** 927 * @stable ICU 4.0 928 */ 929 public static final int LYDIAN_ID = 169; /* [10920] */ 930 931 /** 932 * @stable ICU 4.0 933 */ 934 public static final int MAHJONG_TILES_ID = 170; /* [1F000] */ 935 936 /** 937 * @stable ICU 4.0 938 */ 939 public static final int DOMINO_TILES_ID = 171; /* [1F030] */ 940 941 /* New blocks in Unicode 5.2 */ 942 943 /** @stable ICU 4.4 */ 944 public static final int SAMARITAN_ID = 172; /*[0800]*/ 945 /** @stable ICU 4.4 */ 946 public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/ 947 /** @stable ICU 4.4 */ 948 public static final int TAI_THAM_ID = 174; /*[1A20]*/ 949 /** @stable ICU 4.4 */ 950 public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/ 951 /** @stable ICU 4.4 */ 952 public static final int LISU_ID = 176; /*[A4D0]*/ 953 /** @stable ICU 4.4 */ 954 public static final int BAMUM_ID = 177; /*[A6A0]*/ 955 /** @stable ICU 4.4 */ 956 public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/ 957 /** @stable ICU 4.4 */ 958 public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/ 959 /** @stable ICU 4.4 */ 960 public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/ 961 /** @stable ICU 4.4 */ 962 public static final int JAVANESE_ID = 181; /*[A980]*/ 963 /** @stable ICU 4.4 */ 964 public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/ 965 /** @stable ICU 4.4 */ 966 public static final int TAI_VIET_ID = 183; /*[AA80]*/ 967 /** @stable ICU 4.4 */ 968 public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/ 969 /** @stable ICU 4.4 */ 970 public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/ 971 /** @stable ICU 4.4 */ 972 public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/ 973 /** @stable ICU 4.4 */ 974 public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/ 975 /** @stable ICU 4.4 */ 976 public static final int AVESTAN_ID = 188; /*[10B00]*/ 977 /** @stable ICU 4.4 */ 978 public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/ 979 /** @stable ICU 4.4 */ 980 public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/ 981 /** @stable ICU 4.4 */ 982 public static final int OLD_TURKIC_ID = 191; /*[10C00]*/ 983 /** @stable ICU 4.4 */ 984 public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/ 985 /** @stable ICU 4.4 */ 986 public static final int KAITHI_ID = 193; /*[11080]*/ 987 /** @stable ICU 4.4 */ 988 public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/ 989 /** @stable ICU 4.4 */ 990 public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/ 991 /** @stable ICU 4.4 */ 992 public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/ 993 /** @stable ICU 4.4 */ 994 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/ 995 996 /* New blocks in Unicode 6.0 */ 997 998 /** @stable ICU 4.6 */ 999 public static final int MANDAIC_ID = 198; /*[0840]*/ 1000 /** @stable ICU 4.6 */ 1001 public static final int BATAK_ID = 199; /*[1BC0]*/ 1002 /** @stable ICU 4.6 */ 1003 public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/ 1004 /** @stable ICU 4.6 */ 1005 public static final int BRAHMI_ID = 201; /*[11000]*/ 1006 /** @stable ICU 4.6 */ 1007 public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/ 1008 /** @stable ICU 4.6 */ 1009 public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/ 1010 /** @stable ICU 4.6 */ 1011 public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/ 1012 /** @stable ICU 4.6 */ 1013 public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/ 1014 /** @stable ICU 4.6 */ 1015 public static final int EMOTICONS_ID = 206; /*[1F600]*/ 1016 /** @stable ICU 4.6 */ 1017 public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/ 1018 /** @stable ICU 4.6 */ 1019 public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/ 1020 /** @stable ICU 4.6 */ 1021 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/ 1022 1023 /* New blocks in Unicode 6.1 */ 1024 1025 /** @stable ICU 49 */ 1026 public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/ 1027 /** @stable ICU 49 */ 1028 public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/ 1029 /** @stable ICU 49 */ 1030 public static final int CHAKMA_ID = 212; /*[11100]*/ 1031 /** @stable ICU 49 */ 1032 public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/ 1033 /** @stable ICU 49 */ 1034 public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/ 1035 /** @stable ICU 49 */ 1036 public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/ 1037 /** @stable ICU 49 */ 1038 public static final int MIAO_ID = 216; /*[16F00]*/ 1039 /** @stable ICU 49 */ 1040 public static final int SHARADA_ID = 217; /*[11180]*/ 1041 /** @stable ICU 49 */ 1042 public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/ 1043 /** @stable ICU 49 */ 1044 public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/ 1045 /** @stable ICU 49 */ 1046 public static final int TAKRI_ID = 220; /*[11680]*/ 1047 1048 /* New blocks in Unicode 7.0 */ 1049 1050 /** @stable ICU 54 */ 1051 public static final int BASSA_VAH_ID = 221; /*[16AD0]*/ 1052 /** @stable ICU 54 */ 1053 public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/ 1054 /** @stable ICU 54 */ 1055 public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/ 1056 /** @stable ICU 54 */ 1057 public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/ 1058 /** @stable ICU 54 */ 1059 public static final int DUPLOYAN_ID = 225; /*[1BC00]*/ 1060 /** @stable ICU 54 */ 1061 public static final int ELBASAN_ID = 226; /*[10500]*/ 1062 /** @stable ICU 54 */ 1063 public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/ 1064 /** @stable ICU 54 */ 1065 public static final int GRANTHA_ID = 228; /*[11300]*/ 1066 /** @stable ICU 54 */ 1067 public static final int KHOJKI_ID = 229; /*[11200]*/ 1068 /** @stable ICU 54 */ 1069 public static final int KHUDAWADI_ID = 230; /*[112B0]*/ 1070 /** @stable ICU 54 */ 1071 public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/ 1072 /** @stable ICU 54 */ 1073 public static final int LINEAR_A_ID = 232; /*[10600]*/ 1074 /** @stable ICU 54 */ 1075 public static final int MAHAJANI_ID = 233; /*[11150]*/ 1076 /** @stable ICU 54 */ 1077 public static final int MANICHAEAN_ID = 234; /*[10AC0]*/ 1078 /** @stable ICU 54 */ 1079 public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/ 1080 /** @stable ICU 54 */ 1081 public static final int MODI_ID = 236; /*[11600]*/ 1082 /** @stable ICU 54 */ 1083 public static final int MRO_ID = 237; /*[16A40]*/ 1084 /** @stable ICU 54 */ 1085 public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/ 1086 /** @stable ICU 54 */ 1087 public static final int NABATAEAN_ID = 239; /*[10880]*/ 1088 /** @stable ICU 54 */ 1089 public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/ 1090 /** @stable ICU 54 */ 1091 public static final int OLD_PERMIC_ID = 241; /*[10350]*/ 1092 /** @stable ICU 54 */ 1093 public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/ 1094 /** @stable ICU 54 */ 1095 public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/ 1096 /** @stable ICU 54 */ 1097 public static final int PALMYRENE_ID = 244; /*[10860]*/ 1098 /** @stable ICU 54 */ 1099 public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/ 1100 /** @stable ICU 54 */ 1101 public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/ 1102 /** @stable ICU 54 */ 1103 public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/ 1104 /** @stable ICU 54 */ 1105 public static final int SIDDHAM_ID = 248; /*[11580]*/ 1106 /** @stable ICU 54 */ 1107 public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/ 1108 /** @stable ICU 54 */ 1109 public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/ 1110 /** @stable ICU 54 */ 1111 public static final int TIRHUTA_ID = 251; /*[11480]*/ 1112 /** @stable ICU 54 */ 1113 public static final int WARANG_CITI_ID = 252; /*[118A0]*/ 1114 1115 /* New blocks in Unicode 8.0 */ 1116 1117 /** @stable ICU 56 */ 1118 public static final int AHOM_ID = 253; /*[11700]*/ 1119 /** @stable ICU 56 */ 1120 public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/ 1121 /** @stable ICU 56 */ 1122 public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/ 1123 /** @stable ICU 56 */ 1124 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/ 1125 /** @stable ICU 56 */ 1126 public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/ 1127 /** @stable ICU 56 */ 1128 public static final int HATRAN_ID = 258; /*[108E0]*/ 1129 /** @stable ICU 56 */ 1130 public static final int MULTANI_ID = 259; /*[11280]*/ 1131 /** @stable ICU 56 */ 1132 public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/ 1133 /** @stable ICU 56 */ 1134 public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/ 1135 /** @stable ICU 56 */ 1136 public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/ 1137 1138 /* New blocks in Unicode 9.0 */ 1139 1140 /** @stable ICU 58 */ 1141 public static final int ADLAM_ID = 263; /*[1E900]*/ 1142 /** @stable ICU 58 */ 1143 public static final int BHAIKSUKI_ID = 264; /*[11C00]*/ 1144 /** @stable ICU 58 */ 1145 public static final int CYRILLIC_EXTENDED_C_ID = 265; /*[1C80]*/ 1146 /** @stable ICU 58 */ 1147 public static final int GLAGOLITIC_SUPPLEMENT_ID = 266; /*[1E000]*/ 1148 /** @stable ICU 58 */ 1149 public static final int IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID = 267; /*[16FE0]*/ 1150 /** @stable ICU 58 */ 1151 public static final int MARCHEN_ID = 268; /*[11C70]*/ 1152 /** @stable ICU 58 */ 1153 public static final int MONGOLIAN_SUPPLEMENT_ID = 269; /*[11660]*/ 1154 /** @stable ICU 58 */ 1155 public static final int NEWA_ID = 270; /*[11400]*/ 1156 /** @stable ICU 58 */ 1157 public static final int OSAGE_ID = 271; /*[104B0]*/ 1158 /** @stable ICU 58 */ 1159 public static final int TANGUT_ID = 272; /*[17000]*/ 1160 /** @stable ICU 58 */ 1161 public static final int TANGUT_COMPONENTS_ID = 273; /*[18800]*/ 1162 1163 // New blocks in Unicode 10.0 1164 1165 /** @stable ICU 60 */ 1166 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID = 274; /*[2CEB0]*/ 1167 /** @stable ICU 60 */ 1168 public static final int KANA_EXTENDED_A_ID = 275; /*[1B100]*/ 1169 /** @stable ICU 60 */ 1170 public static final int MASARAM_GONDI_ID = 276; /*[11D00]*/ 1171 /** @stable ICU 60 */ 1172 public static final int NUSHU_ID = 277; /*[1B170]*/ 1173 /** @stable ICU 60 */ 1174 public static final int SOYOMBO_ID = 278; /*[11A50]*/ 1175 /** @stable ICU 60 */ 1176 public static final int SYRIAC_SUPPLEMENT_ID = 279; /*[0860]*/ 1177 /** @stable ICU 60 */ 1178 public static final int ZANABAZAR_SQUARE_ID = 280; /*[11A00]*/ 1179 1180 // New blocks in Unicode 11.0 1181 1182 /** @stable ICU 62 */ 1183 public static final int CHESS_SYMBOLS_ID = 281; /*[1FA00]*/ 1184 /** @stable ICU 62 */ 1185 public static final int DOGRA_ID = 282; /*[11800]*/ 1186 /** @stable ICU 62 */ 1187 public static final int GEORGIAN_EXTENDED_ID = 283; /*[1C90]*/ 1188 /** @stable ICU 62 */ 1189 public static final int GUNJALA_GONDI_ID = 284; /*[11D60]*/ 1190 /** @stable ICU 62 */ 1191 public static final int HANIFI_ROHINGYA_ID = 285; /*[10D00]*/ 1192 /** @stable ICU 62 */ 1193 public static final int INDIC_SIYAQ_NUMBERS_ID = 286; /*[1EC70]*/ 1194 /** @stable ICU 62 */ 1195 public static final int MAKASAR_ID = 287; /*[11EE0]*/ 1196 /** @stable ICU 62 */ 1197 public static final int MAYAN_NUMERALS_ID = 288; /*[1D2E0]*/ 1198 /** @stable ICU 62 */ 1199 public static final int MEDEFAIDRIN_ID = 289; /*[16E40]*/ 1200 /** @stable ICU 62 */ 1201 public static final int OLD_SOGDIAN_ID = 290; /*[10F00]*/ 1202 /** @stable ICU 62 */ 1203 public static final int SOGDIAN_ID = 291; /*[10F30]*/ 1204 1205 // New blocks in Unicode 12.0 1206 1207 /** @stable ICU 64 */ 1208 public static final int EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID = 292; /*[13430]*/ 1209 /** @stable ICU 64 */ 1210 public static final int ELYMAIC_ID = 293; /*[10FE0]*/ 1211 /** @stable ICU 64 */ 1212 public static final int NANDINAGARI_ID = 294; /*[119A0]*/ 1213 /** @stable ICU 64 */ 1214 public static final int NYIAKENG_PUACHUE_HMONG_ID = 295; /*[1E100]*/ 1215 /** @stable ICU 64 */ 1216 public static final int OTTOMAN_SIYAQ_NUMBERS_ID = 296; /*[1ED00]*/ 1217 /** @stable ICU 64 */ 1218 public static final int SMALL_KANA_EXTENSION_ID = 297; /*[1B130]*/ 1219 /** @stable ICU 64 */ 1220 public static final int SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID = 298; /*[1FA70]*/ 1221 /** @stable ICU 64 */ 1222 public static final int TAMIL_SUPPLEMENT_ID = 299; /*[11FC0]*/ 1223 /** @stable ICU 64 */ 1224 public static final int WANCHO_ID = 300; /*[1E2C0]*/ 1225 1226 // New blocks in Unicode 13.0 1227 1228 /** @stable ICU 66 */ 1229 public static final int CHORASMIAN_ID = 301; /*[10FB0]*/ 1230 /** @stable ICU 66 */ 1231 public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID = 302; /*[30000]*/ 1232 /** @stable ICU 66 */ 1233 public static final int DIVES_AKURU_ID = 303; /*[11900]*/ 1234 /** @stable ICU 66 */ 1235 public static final int KHITAN_SMALL_SCRIPT_ID = 304; /*[18B00]*/ 1236 /** @stable ICU 66 */ 1237 public static final int LISU_SUPPLEMENT_ID = 305; /*[11FB0]*/ 1238 /** @stable ICU 66 */ 1239 public static final int SYMBOLS_FOR_LEGACY_COMPUTING_ID = 306; /*[1FB00]*/ 1240 /** @stable ICU 66 */ 1241 public static final int TANGUT_SUPPLEMENT_ID = 307; /*[18D00]*/ 1242 /** @stable ICU 66 */ 1243 public static final int YEZIDI_ID = 308; /*[10E80]*/ 1244 1245 /** 1246 * One more than the highest normal UnicodeBlock value. 1247 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BLOCK). 1248 * 1249 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 1250 */ 1251 @Deprecated 1252 public static final int COUNT = 309; 1253 1254 // blocks objects --------------------------------------------------- 1255 1256 /** 1257 * Array of UnicodeBlocks, for easy access in getInstance(int) 1258 */ 1259 private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT]; 1260 1261 /** 1262 * @stable ICU 2.6 1263 */ 1264 public static final UnicodeBlock NO_BLOCK 1265 = new UnicodeBlock("NO_BLOCK", 0); 1266 1267 /** 1268 * @stable ICU 2.4 1269 */ 1270 public static final UnicodeBlock BASIC_LATIN 1271 = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID); 1272 /** 1273 * @stable ICU 2.4 1274 */ 1275 public static final UnicodeBlock LATIN_1_SUPPLEMENT 1276 = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID); 1277 /** 1278 * @stable ICU 2.4 1279 */ 1280 public static final UnicodeBlock LATIN_EXTENDED_A 1281 = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID); 1282 /** 1283 * @stable ICU 2.4 1284 */ 1285 public static final UnicodeBlock LATIN_EXTENDED_B 1286 = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID); 1287 /** 1288 * @stable ICU 2.4 1289 */ 1290 public static final UnicodeBlock IPA_EXTENSIONS 1291 = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID); 1292 /** 1293 * @stable ICU 2.4 1294 */ 1295 public static final UnicodeBlock SPACING_MODIFIER_LETTERS 1296 = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID); 1297 /** 1298 * @stable ICU 2.4 1299 */ 1300 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS 1301 = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID); 1302 /** 1303 * Unicode 3.2 renames this block to "Greek and Coptic". 1304 * @stable ICU 2.4 1305 */ 1306 public static final UnicodeBlock GREEK 1307 = new UnicodeBlock("GREEK", GREEK_ID); 1308 /** 1309 * @stable ICU 2.4 1310 */ 1311 public static final UnicodeBlock CYRILLIC 1312 = new UnicodeBlock("CYRILLIC", CYRILLIC_ID); 1313 /** 1314 * @stable ICU 2.4 1315 */ 1316 public static final UnicodeBlock ARMENIAN 1317 = new UnicodeBlock("ARMENIAN", ARMENIAN_ID); 1318 /** 1319 * @stable ICU 2.4 1320 */ 1321 public static final UnicodeBlock HEBREW 1322 = new UnicodeBlock("HEBREW", HEBREW_ID); 1323 /** 1324 * @stable ICU 2.4 1325 */ 1326 public static final UnicodeBlock ARABIC 1327 = new UnicodeBlock("ARABIC", ARABIC_ID); 1328 /** 1329 * @stable ICU 2.4 1330 */ 1331 public static final UnicodeBlock SYRIAC 1332 = new UnicodeBlock("SYRIAC", SYRIAC_ID); 1333 /** 1334 * @stable ICU 2.4 1335 */ 1336 public static final UnicodeBlock THAANA 1337 = new UnicodeBlock("THAANA", THAANA_ID); 1338 /** 1339 * @stable ICU 2.4 1340 */ 1341 public static final UnicodeBlock DEVANAGARI 1342 = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID); 1343 /** 1344 * @stable ICU 2.4 1345 */ 1346 public static final UnicodeBlock BENGALI 1347 = new UnicodeBlock("BENGALI", BENGALI_ID); 1348 /** 1349 * @stable ICU 2.4 1350 */ 1351 public static final UnicodeBlock GURMUKHI 1352 = new UnicodeBlock("GURMUKHI", GURMUKHI_ID); 1353 /** 1354 * @stable ICU 2.4 1355 */ 1356 public static final UnicodeBlock GUJARATI 1357 = new UnicodeBlock("GUJARATI", GUJARATI_ID); 1358 /** 1359 * @stable ICU 2.4 1360 */ 1361 public static final UnicodeBlock ORIYA 1362 = new UnicodeBlock("ORIYA", ORIYA_ID); 1363 /** 1364 * @stable ICU 2.4 1365 */ 1366 public static final UnicodeBlock TAMIL 1367 = new UnicodeBlock("TAMIL", TAMIL_ID); 1368 /** 1369 * @stable ICU 2.4 1370 */ 1371 public static final UnicodeBlock TELUGU 1372 = new UnicodeBlock("TELUGU", TELUGU_ID); 1373 /** 1374 * @stable ICU 2.4 1375 */ 1376 public static final UnicodeBlock KANNADA 1377 = new UnicodeBlock("KANNADA", KANNADA_ID); 1378 /** 1379 * @stable ICU 2.4 1380 */ 1381 public static final UnicodeBlock MALAYALAM 1382 = new UnicodeBlock("MALAYALAM", MALAYALAM_ID); 1383 /** 1384 * @stable ICU 2.4 1385 */ 1386 public static final UnicodeBlock SINHALA 1387 = new UnicodeBlock("SINHALA", SINHALA_ID); 1388 /** 1389 * @stable ICU 2.4 1390 */ 1391 public static final UnicodeBlock THAI 1392 = new UnicodeBlock("THAI", THAI_ID); 1393 /** 1394 * @stable ICU 2.4 1395 */ 1396 public static final UnicodeBlock LAO 1397 = new UnicodeBlock("LAO", LAO_ID); 1398 /** 1399 * @stable ICU 2.4 1400 */ 1401 public static final UnicodeBlock TIBETAN 1402 = new UnicodeBlock("TIBETAN", TIBETAN_ID); 1403 /** 1404 * @stable ICU 2.4 1405 */ 1406 public static final UnicodeBlock MYANMAR 1407 = new UnicodeBlock("MYANMAR", MYANMAR_ID); 1408 /** 1409 * @stable ICU 2.4 1410 */ 1411 public static final UnicodeBlock GEORGIAN 1412 = new UnicodeBlock("GEORGIAN", GEORGIAN_ID); 1413 /** 1414 * @stable ICU 2.4 1415 */ 1416 public static final UnicodeBlock HANGUL_JAMO 1417 = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID); 1418 /** 1419 * @stable ICU 2.4 1420 */ 1421 public static final UnicodeBlock ETHIOPIC 1422 = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID); 1423 /** 1424 * @stable ICU 2.4 1425 */ 1426 public static final UnicodeBlock CHEROKEE 1427 = new UnicodeBlock("CHEROKEE", CHEROKEE_ID); 1428 /** 1429 * @stable ICU 2.4 1430 */ 1431 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS 1432 = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1433 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID); 1434 /** 1435 * @stable ICU 2.4 1436 */ 1437 public static final UnicodeBlock OGHAM 1438 = new UnicodeBlock("OGHAM", OGHAM_ID); 1439 /** 1440 * @stable ICU 2.4 1441 */ 1442 public static final UnicodeBlock RUNIC 1443 = new UnicodeBlock("RUNIC", RUNIC_ID); 1444 /** 1445 * @stable ICU 2.4 1446 */ 1447 public static final UnicodeBlock KHMER 1448 = new UnicodeBlock("KHMER", KHMER_ID); 1449 /** 1450 * @stable ICU 2.4 1451 */ 1452 public static final UnicodeBlock MONGOLIAN 1453 = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID); 1454 /** 1455 * @stable ICU 2.4 1456 */ 1457 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL 1458 = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID); 1459 /** 1460 * @stable ICU 2.4 1461 */ 1462 public static final UnicodeBlock GREEK_EXTENDED 1463 = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID); 1464 /** 1465 * @stable ICU 2.4 1466 */ 1467 public static final UnicodeBlock GENERAL_PUNCTUATION 1468 = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID); 1469 /** 1470 * @stable ICU 2.4 1471 */ 1472 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS 1473 = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID); 1474 /** 1475 * @stable ICU 2.4 1476 */ 1477 public static final UnicodeBlock CURRENCY_SYMBOLS 1478 = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID); 1479 /** 1480 * Unicode 3.2 renames this block to "Combining Diacritical Marks for 1481 * Symbols". 1482 * @stable ICU 2.4 1483 */ 1484 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS 1485 = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID); 1486 /** 1487 * @stable ICU 2.4 1488 */ 1489 public static final UnicodeBlock LETTERLIKE_SYMBOLS 1490 = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID); 1491 /** 1492 * @stable ICU 2.4 1493 */ 1494 public static final UnicodeBlock NUMBER_FORMS 1495 = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID); 1496 /** 1497 * @stable ICU 2.4 1498 */ 1499 public static final UnicodeBlock ARROWS 1500 = new UnicodeBlock("ARROWS", ARROWS_ID); 1501 /** 1502 * @stable ICU 2.4 1503 */ 1504 public static final UnicodeBlock MATHEMATICAL_OPERATORS 1505 = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID); 1506 /** 1507 * @stable ICU 2.4 1508 */ 1509 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL 1510 = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID); 1511 /** 1512 * @stable ICU 2.4 1513 */ 1514 public static final UnicodeBlock CONTROL_PICTURES 1515 = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID); 1516 /** 1517 * @stable ICU 2.4 1518 */ 1519 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION 1520 = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID); 1521 /** 1522 * @stable ICU 2.4 1523 */ 1524 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS 1525 = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID); 1526 /** 1527 * @stable ICU 2.4 1528 */ 1529 public static final UnicodeBlock BOX_DRAWING 1530 = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID); 1531 /** 1532 * @stable ICU 2.4 1533 */ 1534 public static final UnicodeBlock BLOCK_ELEMENTS 1535 = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID); 1536 /** 1537 * @stable ICU 2.4 1538 */ 1539 public static final UnicodeBlock GEOMETRIC_SHAPES 1540 = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID); 1541 /** 1542 * @stable ICU 2.4 1543 */ 1544 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS 1545 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID); 1546 /** 1547 * @stable ICU 2.4 1548 */ 1549 public static final UnicodeBlock DINGBATS 1550 = new UnicodeBlock("DINGBATS", DINGBATS_ID); 1551 /** 1552 * @stable ICU 2.4 1553 */ 1554 public static final UnicodeBlock BRAILLE_PATTERNS 1555 = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID); 1556 /** 1557 * @stable ICU 2.4 1558 */ 1559 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT 1560 = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID); 1561 /** 1562 * @stable ICU 2.4 1563 */ 1564 public static final UnicodeBlock KANGXI_RADICALS 1565 = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID); 1566 /** 1567 * @stable ICU 2.4 1568 */ 1569 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS 1570 = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1571 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID); 1572 /** 1573 * @stable ICU 2.4 1574 */ 1575 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION 1576 = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID); 1577 /** 1578 * @stable ICU 2.4 1579 */ 1580 public static final UnicodeBlock HIRAGANA 1581 = new UnicodeBlock("HIRAGANA", HIRAGANA_ID); 1582 /** 1583 * @stable ICU 2.4 1584 */ 1585 public static final UnicodeBlock KATAKANA 1586 = new UnicodeBlock("KATAKANA", KATAKANA_ID); 1587 /** 1588 * @stable ICU 2.4 1589 */ 1590 public static final UnicodeBlock BOPOMOFO 1591 = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID); 1592 /** 1593 * @stable ICU 2.4 1594 */ 1595 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO 1596 = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID); 1597 /** 1598 * @stable ICU 2.4 1599 */ 1600 public static final UnicodeBlock KANBUN 1601 = new UnicodeBlock("KANBUN", KANBUN_ID); 1602 /** 1603 * @stable ICU 2.4 1604 */ 1605 public static final UnicodeBlock BOPOMOFO_EXTENDED 1606 = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID); 1607 /** 1608 * @stable ICU 2.4 1609 */ 1610 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS 1611 = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1612 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID); 1613 /** 1614 * @stable ICU 2.4 1615 */ 1616 public static final UnicodeBlock CJK_COMPATIBILITY 1617 = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID); 1618 /** 1619 * @stable ICU 2.4 1620 */ 1621 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A 1622 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1623 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID); 1624 /** 1625 * @stable ICU 2.4 1626 */ 1627 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS 1628 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID); 1629 /** 1630 * @stable ICU 2.4 1631 */ 1632 public static final UnicodeBlock YI_SYLLABLES 1633 = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID); 1634 /** 1635 * @stable ICU 2.4 1636 */ 1637 public static final UnicodeBlock YI_RADICALS 1638 = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID); 1639 /** 1640 * @stable ICU 2.4 1641 */ 1642 public static final UnicodeBlock HANGUL_SYLLABLES 1643 = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID); 1644 /** 1645 * @stable ICU 2.4 1646 */ 1647 public static final UnicodeBlock HIGH_SURROGATES 1648 = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID); 1649 /** 1650 * @stable ICU 2.4 1651 */ 1652 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES 1653 = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID); 1654 /** 1655 * @stable ICU 2.4 1656 */ 1657 public static final UnicodeBlock LOW_SURROGATES 1658 = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID); 1659 /** 1660 * Same as public static final int PRIVATE_USE. 1661 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1662 * and multiple code point ranges had this block. 1663 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1664 * and adds separate blocks for the supplementary PUAs. 1665 * @stable ICU 2.4 1666 */ 1667 public static final UnicodeBlock PRIVATE_USE_AREA 1668 = new UnicodeBlock("PRIVATE_USE_AREA", 78); 1669 /** 1670 * Same as public static final int PRIVATE_USE_AREA. 1671 * Until Unicode 3.1.1; the corresponding block name was "Private Use"; 1672 * and multiple code point ranges had this block. 1673 * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" 1674 * and adds separate blocks for the supplementary PUAs. 1675 * @stable ICU 2.4 1676 */ 1677 public static final UnicodeBlock PRIVATE_USE 1678 = PRIVATE_USE_AREA; 1679 /** 1680 * @stable ICU 2.4 1681 */ 1682 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS 1683 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID); 1684 /** 1685 * @stable ICU 2.4 1686 */ 1687 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS 1688 = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID); 1689 /** 1690 * @stable ICU 2.4 1691 */ 1692 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A 1693 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID); 1694 /** 1695 * @stable ICU 2.4 1696 */ 1697 public static final UnicodeBlock COMBINING_HALF_MARKS 1698 = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID); 1699 /** 1700 * @stable ICU 2.4 1701 */ 1702 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS 1703 = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID); 1704 /** 1705 * @stable ICU 2.4 1706 */ 1707 public static final UnicodeBlock SMALL_FORM_VARIANTS 1708 = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID); 1709 /** 1710 * @stable ICU 2.4 1711 */ 1712 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B 1713 = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID); 1714 /** 1715 * @stable ICU 2.4 1716 */ 1717 public static final UnicodeBlock SPECIALS 1718 = new UnicodeBlock("SPECIALS", SPECIALS_ID); 1719 /** 1720 * @stable ICU 2.4 1721 */ 1722 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS 1723 = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID); 1724 /** 1725 * @stable ICU 2.4 1726 */ 1727 public static final UnicodeBlock OLD_ITALIC 1728 = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID); 1729 /** 1730 * @stable ICU 2.4 1731 */ 1732 public static final UnicodeBlock GOTHIC 1733 = new UnicodeBlock("GOTHIC", GOTHIC_ID); 1734 /** 1735 * @stable ICU 2.4 1736 */ 1737 public static final UnicodeBlock DESERET 1738 = new UnicodeBlock("DESERET", DESERET_ID); 1739 /** 1740 * @stable ICU 2.4 1741 */ 1742 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS 1743 = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID); 1744 /** 1745 * @stable ICU 2.4 1746 */ 1747 public static final UnicodeBlock MUSICAL_SYMBOLS 1748 = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID); 1749 /** 1750 * @stable ICU 2.4 1751 */ 1752 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS 1753 = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1754 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID); 1755 /** 1756 * @stable ICU 2.4 1757 */ 1758 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B 1759 = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1760 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID); 1761 /** 1762 * @stable ICU 2.4 1763 */ 1764 public static final UnicodeBlock 1765 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT 1766 = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1767 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID); 1768 /** 1769 * @stable ICU 2.4 1770 */ 1771 public static final UnicodeBlock TAGS 1772 = new UnicodeBlock("TAGS", TAGS_ID); 1773 1774 // New blocks in Unicode 3.2 1775 1776 /** 1777 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1778 * @stable ICU 2.4 1779 */ 1780 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY 1781 = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID); 1782 /** 1783 * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement". 1784 * @stable ICU 3.0 1785 */ 1786 public static final UnicodeBlock CYRILLIC_SUPPLEMENT 1787 = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID); 1788 /** 1789 * @stable ICU 2.4 1790 */ 1791 public static final UnicodeBlock TAGALOG 1792 = new UnicodeBlock("TAGALOG", TAGALOG_ID); 1793 /** 1794 * @stable ICU 2.4 1795 */ 1796 public static final UnicodeBlock HANUNOO 1797 = new UnicodeBlock("HANUNOO", HANUNOO_ID); 1798 /** 1799 * @stable ICU 2.4 1800 */ 1801 public static final UnicodeBlock BUHID 1802 = new UnicodeBlock("BUHID", BUHID_ID); 1803 /** 1804 * @stable ICU 2.4 1805 */ 1806 public static final UnicodeBlock TAGBANWA 1807 = new UnicodeBlock("TAGBANWA", TAGBANWA_ID); 1808 /** 1809 * @stable ICU 2.4 1810 */ 1811 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A 1812 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1813 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID); 1814 /** 1815 * @stable ICU 2.4 1816 */ 1817 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A 1818 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID); 1819 /** 1820 * @stable ICU 2.4 1821 */ 1822 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B 1823 = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID); 1824 /** 1825 * @stable ICU 2.4 1826 */ 1827 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B 1828 = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1829 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID); 1830 /** 1831 * @stable ICU 2.4 1832 */ 1833 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS 1834 = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1835 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID); 1836 /** 1837 * @stable ICU 2.4 1838 */ 1839 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS 1840 = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID); 1841 /** 1842 * @stable ICU 2.4 1843 */ 1844 public static final UnicodeBlock VARIATION_SELECTORS 1845 = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID); 1846 /** 1847 * @stable ICU 2.4 1848 */ 1849 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A 1850 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1851 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID); 1852 /** 1853 * @stable ICU 2.4 1854 */ 1855 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B 1856 = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1857 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID); 1858 1859 /** 1860 * @stable ICU 2.6 1861 */ 1862 public static final UnicodeBlock LIMBU 1863 = new UnicodeBlock("LIMBU", LIMBU_ID); 1864 /** 1865 * @stable ICU 2.6 1866 */ 1867 public static final UnicodeBlock TAI_LE 1868 = new UnicodeBlock("TAI_LE", TAI_LE_ID); 1869 /** 1870 * @stable ICU 2.6 1871 */ 1872 public static final UnicodeBlock KHMER_SYMBOLS 1873 = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID); 1874 1875 /** 1876 * @stable ICU 2.6 1877 */ 1878 public static final UnicodeBlock PHONETIC_EXTENSIONS 1879 = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID); 1880 1881 /** 1882 * @stable ICU 2.6 1883 */ 1884 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS 1885 = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1886 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID); 1887 /** 1888 * @stable ICU 2.6 1889 */ 1890 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS 1891 = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID); 1892 /** 1893 * @stable ICU 2.6 1894 */ 1895 public static final UnicodeBlock LINEAR_B_SYLLABARY 1896 = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID); 1897 /** 1898 * @stable ICU 2.6 1899 */ 1900 public static final UnicodeBlock LINEAR_B_IDEOGRAMS 1901 = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID); 1902 /** 1903 * @stable ICU 2.6 1904 */ 1905 public static final UnicodeBlock AEGEAN_NUMBERS 1906 = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID); 1907 /** 1908 * @stable ICU 2.6 1909 */ 1910 public static final UnicodeBlock UGARITIC 1911 = new UnicodeBlock("UGARITIC", UGARITIC_ID); 1912 /** 1913 * @stable ICU 2.6 1914 */ 1915 public static final UnicodeBlock SHAVIAN 1916 = new UnicodeBlock("SHAVIAN", SHAVIAN_ID); 1917 /** 1918 * @stable ICU 2.6 1919 */ 1920 public static final UnicodeBlock OSMANYA 1921 = new UnicodeBlock("OSMANYA", OSMANYA_ID); 1922 /** 1923 * @stable ICU 2.6 1924 */ 1925 public static final UnicodeBlock CYPRIOT_SYLLABARY 1926 = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID); 1927 /** 1928 * @stable ICU 2.6 1929 */ 1930 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS 1931 = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID); 1932 1933 /** 1934 * @stable ICU 2.6 1935 */ 1936 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT 1937 = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID); 1938 1939 /* New blocks in Unicode 4.1 */ 1940 1941 /** 1942 * @stable ICU 3.4 1943 */ 1944 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 1945 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 1946 ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/ 1947 1948 /** 1949 * @stable ICU 3.4 1950 */ 1951 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 1952 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/ 1953 1954 /** 1955 * @stable ICU 3.4 1956 */ 1957 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1958 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/ 1959 1960 /** 1961 * @stable ICU 3.4 1962 */ 1963 public static final UnicodeBlock BUGINESE = 1964 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/ 1965 1966 /** 1967 * @stable ICU 3.4 1968 */ 1969 public static final UnicodeBlock CJK_STROKES = 1970 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/ 1971 1972 /** 1973 * @stable ICU 3.4 1974 */ 1975 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 1976 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 1977 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/ 1978 1979 /** 1980 * @stable ICU 3.4 1981 */ 1982 public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/ 1983 1984 /** 1985 * @stable ICU 3.4 1986 */ 1987 public static final UnicodeBlock ETHIOPIC_EXTENDED = 1988 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/ 1989 1990 /** 1991 * @stable ICU 3.4 1992 */ 1993 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1994 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/ 1995 1996 /** 1997 * @stable ICU 3.4 1998 */ 1999 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 2000 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/ 2001 2002 /** 2003 * @stable ICU 3.4 2004 */ 2005 public static final UnicodeBlock GLAGOLITIC = 2006 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/ 2007 2008 /** 2009 * @stable ICU 3.4 2010 */ 2011 public static final UnicodeBlock KHAROSHTHI = 2012 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/ 2013 2014 /** 2015 * @stable ICU 3.4 2016 */ 2017 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2018 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/ 2019 2020 /** 2021 * @stable ICU 3.4 2022 */ 2023 public static final UnicodeBlock NEW_TAI_LUE = 2024 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/ 2025 2026 /** 2027 * @stable ICU 3.4 2028 */ 2029 public static final UnicodeBlock OLD_PERSIAN = 2030 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/ 2031 2032 /** 2033 * @stable ICU 3.4 2034 */ 2035 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 2036 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 2037 PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/ 2038 2039 /** 2040 * @stable ICU 3.4 2041 */ 2042 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 2043 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/ 2044 2045 /** 2046 * @stable ICU 3.4 2047 */ 2048 public static final UnicodeBlock SYLOTI_NAGRI = 2049 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/ 2050 2051 /** 2052 * @stable ICU 3.4 2053 */ 2054 public static final UnicodeBlock TIFINAGH = 2055 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/ 2056 2057 /** 2058 * @stable ICU 3.4 2059 */ 2060 public static final UnicodeBlock VERTICAL_FORMS = 2061 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/ 2062 2063 /** 2064 * @stable ICU 3.6 2065 */ 2066 public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/ 2067 /** 2068 * @stable ICU 3.6 2069 */ 2070 public static final UnicodeBlock BALINESE = 2071 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/ 2072 /** 2073 * @stable ICU 3.6 2074 */ 2075 public static final UnicodeBlock LATIN_EXTENDED_C = 2076 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/ 2077 /** 2078 * @stable ICU 3.6 2079 */ 2080 public static final UnicodeBlock LATIN_EXTENDED_D = 2081 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/ 2082 /** 2083 * @stable ICU 3.6 2084 */ 2085 public static final UnicodeBlock PHAGS_PA = 2086 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/ 2087 /** 2088 * @stable ICU 3.6 2089 */ 2090 public static final UnicodeBlock PHOENICIAN = 2091 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/ 2092 /** 2093 * @stable ICU 3.6 2094 */ 2095 public static final UnicodeBlock CUNEIFORM = 2096 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/ 2097 /** 2098 * @stable ICU 3.6 2099 */ 2100 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2101 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2102 CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/ 2103 /** 2104 * @stable ICU 3.6 2105 */ 2106 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2107 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/ 2108 2109 /** 2110 * @stable ICU 4.0 2111 */ 2112 public static final UnicodeBlock SUNDANESE = 2113 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */ 2114 2115 /** 2116 * @stable ICU 4.0 2117 */ 2118 public static final UnicodeBlock LEPCHA = 2119 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */ 2120 2121 /** 2122 * @stable ICU 4.0 2123 */ 2124 public static final UnicodeBlock OL_CHIKI = 2125 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */ 2126 2127 /** 2128 * @stable ICU 4.0 2129 */ 2130 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 2131 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */ 2132 2133 /** 2134 * @stable ICU 4.0 2135 */ 2136 public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */ 2137 2138 /** 2139 * @stable ICU 4.0 2140 */ 2141 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2142 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */ 2143 2144 /** 2145 * @stable ICU 4.0 2146 */ 2147 public static final UnicodeBlock SAURASHTRA = 2148 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */ 2149 2150 /** 2151 * @stable ICU 4.0 2152 */ 2153 public static final UnicodeBlock KAYAH_LI = 2154 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */ 2155 2156 /** 2157 * @stable ICU 4.0 2158 */ 2159 public static final UnicodeBlock REJANG = 2160 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */ 2161 2162 /** 2163 * @stable ICU 4.0 2164 */ 2165 public static final UnicodeBlock CHAM = 2166 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */ 2167 2168 /** 2169 * @stable ICU 4.0 2170 */ 2171 public static final UnicodeBlock ANCIENT_SYMBOLS = 2172 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */ 2173 2174 /** 2175 * @stable ICU 4.0 2176 */ 2177 public static final UnicodeBlock PHAISTOS_DISC = 2178 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */ 2179 2180 /** 2181 * @stable ICU 4.0 2182 */ 2183 public static final UnicodeBlock LYCIAN = 2184 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */ 2185 2186 /** 2187 * @stable ICU 4.0 2188 */ 2189 public static final UnicodeBlock CARIAN = 2190 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */ 2191 2192 /** 2193 * @stable ICU 4.0 2194 */ 2195 public static final UnicodeBlock LYDIAN = 2196 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */ 2197 2198 /** 2199 * @stable ICU 4.0 2200 */ 2201 public static final UnicodeBlock MAHJONG_TILES = 2202 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */ 2203 2204 /** 2205 * @stable ICU 4.0 2206 */ 2207 public static final UnicodeBlock DOMINO_TILES = 2208 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */ 2209 2210 /* New blocks in Unicode 5.2 */ 2211 2212 /** @stable ICU 4.4 */ 2213 public static final UnicodeBlock SAMARITAN = 2214 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/ 2215 /** @stable ICU 4.4 */ 2216 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 2217 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 2218 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/ 2219 /** @stable ICU 4.4 */ 2220 public static final UnicodeBlock TAI_THAM = 2221 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/ 2222 /** @stable ICU 4.4 */ 2223 public static final UnicodeBlock VEDIC_EXTENSIONS = 2224 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/ 2225 /** @stable ICU 4.4 */ 2226 public static final UnicodeBlock LISU = 2227 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/ 2228 /** @stable ICU 4.4 */ 2229 public static final UnicodeBlock BAMUM = 2230 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/ 2231 /** @stable ICU 4.4 */ 2232 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2233 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/ 2234 /** @stable ICU 4.4 */ 2235 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2236 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/ 2237 /** @stable ICU 4.4 */ 2238 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2239 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/ 2240 /** @stable ICU 4.4 */ 2241 public static final UnicodeBlock JAVANESE = 2242 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/ 2243 /** @stable ICU 4.4 */ 2244 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2245 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/ 2246 /** @stable ICU 4.4 */ 2247 public static final UnicodeBlock TAI_VIET = 2248 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/ 2249 /** @stable ICU 4.4 */ 2250 public static final UnicodeBlock MEETEI_MAYEK = 2251 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/ 2252 /** @stable ICU 4.4 */ 2253 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2254 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/ 2255 /** @stable ICU 4.4 */ 2256 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2257 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/ 2258 /** @stable ICU 4.4 */ 2259 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2260 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/ 2261 /** @stable ICU 4.4 */ 2262 public static final UnicodeBlock AVESTAN = 2263 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/ 2264 /** @stable ICU 4.4 */ 2265 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2266 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/ 2267 /** @stable ICU 4.4 */ 2268 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2269 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/ 2270 /** @stable ICU 4.4 */ 2271 public static final UnicodeBlock OLD_TURKIC = 2272 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/ 2273 /** @stable ICU 4.4 */ 2274 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2275 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/ 2276 /** @stable ICU 4.4 */ 2277 public static final UnicodeBlock KAITHI = 2278 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/ 2279 /** @stable ICU 4.4 */ 2280 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2281 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/ 2282 /** @stable ICU 4.4 */ 2283 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2284 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2285 ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/ 2286 /** @stable ICU 4.4 */ 2287 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2288 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2289 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/ 2290 /** @stable ICU 4.4 */ 2291 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2292 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2293 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/ 2294 2295 /* New blocks in Unicode 6.0 */ 2296 2297 /** @stable ICU 4.6 */ 2298 public static final UnicodeBlock MANDAIC = 2299 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/ 2300 /** @stable ICU 4.6 */ 2301 public static final UnicodeBlock BATAK = 2302 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/ 2303 /** @stable ICU 4.6 */ 2304 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2305 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/ 2306 /** @stable ICU 4.6 */ 2307 public static final UnicodeBlock BRAHMI = 2308 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/ 2309 /** @stable ICU 4.6 */ 2310 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2311 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/ 2312 /** @stable ICU 4.6 */ 2313 public static final UnicodeBlock KANA_SUPPLEMENT = 2314 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/ 2315 /** @stable ICU 4.6 */ 2316 public static final UnicodeBlock PLAYING_CARDS = 2317 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/ 2318 /** @stable ICU 4.6 */ 2319 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2320 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2321 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/ 2322 /** @stable ICU 4.6 */ 2323 public static final UnicodeBlock EMOTICONS = 2324 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/ 2325 /** @stable ICU 4.6 */ 2326 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2327 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/ 2328 /** @stable ICU 4.6 */ 2329 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2330 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/ 2331 /** @stable ICU 4.6 */ 2332 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2333 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2334 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/ 2335 2336 /* New blocks in Unicode 6.1 */ 2337 2338 /** @stable ICU 49 */ 2339 public static final UnicodeBlock ARABIC_EXTENDED_A = 2340 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/ 2341 /** @stable ICU 49 */ 2342 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2343 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/ 2344 /** @stable ICU 49 */ 2345 public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/ 2346 /** @stable ICU 49 */ 2347 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2348 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/ 2349 /** @stable ICU 49 */ 2350 public static final UnicodeBlock MEROITIC_CURSIVE = 2351 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/ 2352 /** @stable ICU 49 */ 2353 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2354 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/ 2355 /** @stable ICU 49 */ 2356 public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/ 2357 /** @stable ICU 49 */ 2358 public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/ 2359 /** @stable ICU 49 */ 2360 public static final UnicodeBlock SORA_SOMPENG = 2361 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/ 2362 /** @stable ICU 49 */ 2363 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2364 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/ 2365 /** @stable ICU 49 */ 2366 public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/ 2367 2368 /* New blocks in Unicode 7.0 */ 2369 2370 /** @stable ICU 54 */ 2371 public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/ 2372 /** @stable ICU 54 */ 2373 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2374 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/ 2375 /** @stable ICU 54 */ 2376 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2377 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/ 2378 /** @stable ICU 54 */ 2379 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2380 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/ 2381 /** @stable ICU 54 */ 2382 public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/ 2383 /** @stable ICU 54 */ 2384 public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/ 2385 /** @stable ICU 54 */ 2386 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2387 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/ 2388 /** @stable ICU 54 */ 2389 public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/ 2390 /** @stable ICU 54 */ 2391 public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/ 2392 /** @stable ICU 54 */ 2393 public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/ 2394 /** @stable ICU 54 */ 2395 public static final UnicodeBlock LATIN_EXTENDED_E = 2396 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/ 2397 /** @stable ICU 54 */ 2398 public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/ 2399 /** @stable ICU 54 */ 2400 public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/ 2401 /** @stable ICU 54 */ 2402 public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/ 2403 /** @stable ICU 54 */ 2404 public static final UnicodeBlock MENDE_KIKAKUI = 2405 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/ 2406 /** @stable ICU 54 */ 2407 public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/ 2408 /** @stable ICU 54 */ 2409 public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/ 2410 /** @stable ICU 54 */ 2411 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2412 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/ 2413 /** @stable ICU 54 */ 2414 public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/ 2415 /** @stable ICU 54 */ 2416 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2417 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/ 2418 /** @stable ICU 54 */ 2419 public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/ 2420 /** @stable ICU 54 */ 2421 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2422 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/ 2423 /** @stable ICU 54 */ 2424 public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/ 2425 /** @stable ICU 54 */ 2426 public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/ 2427 /** @stable ICU 54 */ 2428 public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/ 2429 /** @stable ICU 54 */ 2430 public static final UnicodeBlock PSALTER_PAHLAVI = 2431 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/ 2432 /** @stable ICU 54 */ 2433 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2434 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/ 2435 /** @stable ICU 54 */ 2436 public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/ 2437 /** @stable ICU 54 */ 2438 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2439 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/ 2440 /** @stable ICU 54 */ 2441 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2442 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/ 2443 /** @stable ICU 54 */ 2444 public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/ 2445 /** @stable ICU 54 */ 2446 public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/ 2447 2448 /* New blocks in Unicode 8.0 */ 2449 2450 /** @stable ICU 56 */ 2451 public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/ 2452 /** @stable ICU 56 */ 2453 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 2454 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/ 2455 /** @stable ICU 56 */ 2456 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2457 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/ 2458 /** @stable ICU 56 */ 2459 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 2460 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 2461 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/ 2462 /** @stable ICU 56 */ 2463 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 2464 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/ 2465 /** @stable ICU 56 */ 2466 public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/ 2467 /** @stable ICU 56 */ 2468 public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/ 2469 /** @stable ICU 56 */ 2470 public static final UnicodeBlock OLD_HUNGARIAN = 2471 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/ 2472 /** @stable ICU 56 */ 2473 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 2474 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 2475 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/ 2476 /** @stable ICU 56 */ 2477 public static final UnicodeBlock SUTTON_SIGNWRITING = 2478 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/ 2479 2480 /* New blocks in Unicode 9.0 */ 2481 2482 /** @stable ICU 58 */ 2483 public static final UnicodeBlock ADLAM = new UnicodeBlock("ADLAM", ADLAM_ID); /*[1E900]*/ 2484 /** @stable ICU 58 */ 2485 public static final UnicodeBlock BHAIKSUKI = new UnicodeBlock("BHAIKSUKI", BHAIKSUKI_ID); /*[11C00]*/ 2486 /** @stable ICU 58 */ 2487 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 2488 new UnicodeBlock("CYRILLIC_EXTENDED_C", CYRILLIC_EXTENDED_C_ID); /*[1C80]*/ 2489 /** @stable ICU 58 */ 2490 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 2491 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", GLAGOLITIC_SUPPLEMENT_ID); /*[1E000]*/ 2492 /** @stable ICU 58 */ 2493 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 2494 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID); /*[16FE0]*/ 2495 /** @stable ICU 58 */ 2496 public static final UnicodeBlock MARCHEN = new UnicodeBlock("MARCHEN", MARCHEN_ID); /*[11C70]*/ 2497 /** @stable ICU 58 */ 2498 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 2499 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", MONGOLIAN_SUPPLEMENT_ID); /*[11660]*/ 2500 /** @stable ICU 58 */ 2501 public static final UnicodeBlock NEWA = new UnicodeBlock("NEWA", NEWA_ID); /*[11400]*/ 2502 /** @stable ICU 58 */ 2503 public static final UnicodeBlock OSAGE = new UnicodeBlock("OSAGE", OSAGE_ID); /*[104B0]*/ 2504 /** @stable ICU 58 */ 2505 public static final UnicodeBlock TANGUT = new UnicodeBlock("TANGUT", TANGUT_ID); /*[17000]*/ 2506 /** @stable ICU 58 */ 2507 public static final UnicodeBlock TANGUT_COMPONENTS = 2508 new UnicodeBlock("TANGUT_COMPONENTS", TANGUT_COMPONENTS_ID); /*[18800]*/ 2509 2510 // New blocks in Unicode 10.0 2511 2512 /** @stable ICU 60 */ 2513 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 2514 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID); /*[2CEB0]*/ 2515 /** @stable ICU 60 */ 2516 public static final UnicodeBlock KANA_EXTENDED_A = 2517 new UnicodeBlock("KANA_EXTENDED_A", KANA_EXTENDED_A_ID); /*[1B100]*/ 2518 /** @stable ICU 60 */ 2519 public static final UnicodeBlock MASARAM_GONDI = 2520 new UnicodeBlock("MASARAM_GONDI", MASARAM_GONDI_ID); /*[11D00]*/ 2521 /** @stable ICU 60 */ 2522 public static final UnicodeBlock NUSHU = new UnicodeBlock("NUSHU", NUSHU_ID); /*[1B170]*/ 2523 /** @stable ICU 60 */ 2524 public static final UnicodeBlock SOYOMBO = new UnicodeBlock("SOYOMBO", SOYOMBO_ID); /*[11A50]*/ 2525 /** @stable ICU 60 */ 2526 public static final UnicodeBlock SYRIAC_SUPPLEMENT = 2527 new UnicodeBlock("SYRIAC_SUPPLEMENT", SYRIAC_SUPPLEMENT_ID); /*[0860]*/ 2528 /** @stable ICU 60 */ 2529 public static final UnicodeBlock ZANABAZAR_SQUARE = 2530 new UnicodeBlock("ZANABAZAR_SQUARE", ZANABAZAR_SQUARE_ID); /*[11A00]*/ 2531 2532 // New blocks in Unicode 11.0 2533 2534 /** @stable ICU 62 */ 2535 public static final UnicodeBlock CHESS_SYMBOLS = 2536 new UnicodeBlock("CHESS_SYMBOLS", CHESS_SYMBOLS_ID); /*[1FA00]*/ 2537 /** @stable ICU 62 */ 2538 public static final UnicodeBlock DOGRA = new UnicodeBlock("DOGRA", DOGRA_ID); /*[11800]*/ 2539 /** @stable ICU 62 */ 2540 public static final UnicodeBlock GEORGIAN_EXTENDED = 2541 new UnicodeBlock("GEORGIAN_EXTENDED", GEORGIAN_EXTENDED_ID); /*[1C90]*/ 2542 /** @stable ICU 62 */ 2543 public static final UnicodeBlock GUNJALA_GONDI = 2544 new UnicodeBlock("GUNJALA_GONDI", GUNJALA_GONDI_ID); /*[11D60]*/ 2545 /** @stable ICU 62 */ 2546 public static final UnicodeBlock HANIFI_ROHINGYA = 2547 new UnicodeBlock("HANIFI_ROHINGYA", HANIFI_ROHINGYA_ID); /*[10D00]*/ 2548 /** @stable ICU 62 */ 2549 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS = 2550 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", INDIC_SIYAQ_NUMBERS_ID); /*[1EC70]*/ 2551 /** @stable ICU 62 */ 2552 public static final UnicodeBlock MAKASAR = new UnicodeBlock("MAKASAR", MAKASAR_ID); /*[11EE0]*/ 2553 /** @stable ICU 62 */ 2554 public static final UnicodeBlock MAYAN_NUMERALS = 2555 new UnicodeBlock("MAYAN_NUMERALS", MAYAN_NUMERALS_ID); /*[1D2E0]*/ 2556 /** @stable ICU 62 */ 2557 public static final UnicodeBlock MEDEFAIDRIN = 2558 new UnicodeBlock("MEDEFAIDRIN", MEDEFAIDRIN_ID); /*[16E40]*/ 2559 /** @stable ICU 62 */ 2560 public static final UnicodeBlock OLD_SOGDIAN = 2561 new UnicodeBlock("OLD_SOGDIAN", OLD_SOGDIAN_ID); /*[10F00]*/ 2562 /** @stable ICU 62 */ 2563 public static final UnicodeBlock SOGDIAN = new UnicodeBlock("SOGDIAN", SOGDIAN_ID); /*[10F30]*/ 2564 2565 // New blocks in Unicode 12.0 2566 2567 /** @stable ICU 64 */ 2568 public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 2569 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID); /*[13430]*/ 2570 /** @stable ICU 64 */ 2571 public static final UnicodeBlock ELYMAIC = new UnicodeBlock("ELYMAIC", ELYMAIC_ID); /*[10FE0]*/ 2572 /** @stable ICU 64 */ 2573 public static final UnicodeBlock NANDINAGARI = 2574 new UnicodeBlock("NANDINAGARI", NANDINAGARI_ID); /*[119A0]*/ 2575 /** @stable ICU 64 */ 2576 public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG = 2577 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", NYIAKENG_PUACHUE_HMONG_ID); /*[1E100]*/ 2578 /** @stable ICU 64 */ 2579 public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS = 2580 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", OTTOMAN_SIYAQ_NUMBERS_ID); /*[1ED00]*/ 2581 /** @stable ICU 64 */ 2582 public static final UnicodeBlock SMALL_KANA_EXTENSION = 2583 new UnicodeBlock("SMALL_KANA_EXTENSION", SMALL_KANA_EXTENSION_ID); /*[1B130]*/ 2584 /** @stable ICU 64 */ 2585 public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 2586 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID); /*[1FA70]*/ 2587 /** @stable ICU 64 */ 2588 public static final UnicodeBlock TAMIL_SUPPLEMENT = 2589 new UnicodeBlock("TAMIL_SUPPLEMENT", TAMIL_SUPPLEMENT_ID); /*[11FC0]*/ 2590 /** @stable ICU 64 */ 2591 public static final UnicodeBlock WANCHO = new UnicodeBlock("WANCHO", WANCHO_ID); /*[1E2C0]*/ 2592 2593 // New blocks in Unicode 13.0 2594 2595 /** @stable ICU 66 */ 2596 public static final UnicodeBlock CHORASMIAN = 2597 new UnicodeBlock("CHORASMIAN", CHORASMIAN_ID); /*[10FB0]*/ 2598 /** @stable ICU 66 */ 2599 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 2600 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G", 2601 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID); /*[30000]*/ 2602 /** @stable ICU 66 */ 2603 public static final UnicodeBlock DIVES_AKURU = 2604 new UnicodeBlock("DIVES_AKURU", DIVES_AKURU_ID); /*[11900]*/ 2605 /** @stable ICU 66 */ 2606 public static final UnicodeBlock KHITAN_SMALL_SCRIPT = 2607 new UnicodeBlock("KHITAN_SMALL_SCRIPT", KHITAN_SMALL_SCRIPT_ID); /*[18B00]*/ 2608 /** @stable ICU 66 */ 2609 public static final UnicodeBlock LISU_SUPPLEMENT = 2610 new UnicodeBlock("LISU_SUPPLEMENT", LISU_SUPPLEMENT_ID); /*[11FB0]*/ 2611 /** @stable ICU 66 */ 2612 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING = 2613 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING", SYMBOLS_FOR_LEGACY_COMPUTING_ID); /*[1FB00]*/ 2614 /** @stable ICU 66 */ 2615 public static final UnicodeBlock TANGUT_SUPPLEMENT = 2616 new UnicodeBlock("TANGUT_SUPPLEMENT", TANGUT_SUPPLEMENT_ID); /*[18D00]*/ 2617 /** @stable ICU 66 */ 2618 public static final UnicodeBlock YEZIDI = new UnicodeBlock("YEZIDI", YEZIDI_ID); /*[10E80]*/ 2619 2620 /** 2621 * @stable ICU 2.4 2622 */ 2623 public static final UnicodeBlock INVALID_CODE 2624 = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID); 2625 2626 static { 2627 for (int blockId = 0; blockId < COUNT; ++blockId) { 2628 if (BLOCKS_[blockId] == null) { 2629 throw new java.lang.IllegalStateException( 2630 "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized"); 2631 } 2632 } 2633 } 2634 2635 // public methods -------------------------------------------------- 2636 2637 /** 2638 * {@icu} Returns the only instance of the UnicodeBlock with the argument ID. 2639 * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned. 2640 * @param id UnicodeBlock ID 2641 * @return the only instance of the UnicodeBlock with the argument ID 2642 * if it exists, otherwise a INVALID_CODE UnicodeBlock will be 2643 * returned. 2644 * @stable ICU 2.4 2645 */ getInstance(int id)2646 public static UnicodeBlock getInstance(int id) 2647 { 2648 if (id >= 0 && id < BLOCKS_.length) { 2649 return BLOCKS_[id]; 2650 } 2651 return INVALID_CODE; 2652 } 2653 2654 /** 2655 * Returns the Unicode allocation block that contains the code point, 2656 * or null if the code point is not a member of a defined block. 2657 * @param ch code point to be tested 2658 * @return the Unicode allocation block that contains the code point 2659 * @stable ICU 2.4 2660 */ of(int ch)2661 public static UnicodeBlock of(int ch) 2662 { 2663 if (ch > MAX_VALUE) { 2664 return INVALID_CODE; 2665 } 2666 2667 return UnicodeBlock.getInstance( 2668 UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK)); 2669 } 2670 2671 /** 2672 * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method. 2673 * Returns the Unicode block with the given name. {@icunote} Unlike 2674 * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches 2675 * against the official UCD name and the Java block name 2676 * (ignoring case). 2677 * @param blockName the name of the block to match 2678 * @return the UnicodeBlock with that name 2679 * @throws IllegalArgumentException if the blockName could not be matched 2680 * @stable ICU 3.0 2681 */ forName(String blockName)2682 public static final UnicodeBlock forName(String blockName) { 2683 Map<String, UnicodeBlock> m = null; 2684 if (mref != null) { 2685 m = mref.get(); 2686 } 2687 if (m == null) { 2688 m = new HashMap<>(BLOCKS_.length); 2689 for (int i = 0; i < BLOCKS_.length; ++i) { 2690 UnicodeBlock b = BLOCKS_[i]; 2691 String name = trimBlockName( 2692 getPropertyValueName(UProperty.BLOCK, b.getID(), 2693 UProperty.NameChoice.LONG)); 2694 m.put(name, b); 2695 } 2696 mref = new SoftReference<>(m); 2697 } 2698 UnicodeBlock b = m.get(trimBlockName(blockName)); 2699 if (b == null) { 2700 throw new IllegalArgumentException(); 2701 } 2702 return b; 2703 } 2704 private static SoftReference<Map<String, UnicodeBlock>> mref; 2705 trimBlockName(String name)2706 private static String trimBlockName(String name) { 2707 String upper = name.toUpperCase(Locale.ENGLISH); 2708 StringBuilder result = new StringBuilder(upper.length()); 2709 for (int i = 0; i < upper.length(); i++) { 2710 char c = upper.charAt(i); 2711 if (c != ' ' && c != '_' && c != '-') { 2712 result.append(c); 2713 } 2714 } 2715 return result.toString(); 2716 } 2717 2718 /** 2719 * {icu} Returns the type ID of this Unicode block 2720 * @return integer type ID of this Unicode block 2721 * @stable ICU 2.4 2722 */ getID()2723 public int getID() 2724 { 2725 return m_id_; 2726 } 2727 2728 // private data members --------------------------------------------- 2729 2730 /** 2731 * Identification code for this UnicodeBlock 2732 */ 2733 private int m_id_; 2734 2735 // private constructor ---------------------------------------------- 2736 2737 /** 2738 * UnicodeBlock constructor 2739 * @param name name of this UnicodeBlock 2740 * @param id unique id of this UnicodeBlock 2741 * @exception NullPointerException if name is <code>null</code> 2742 */ UnicodeBlock(String name, int id)2743 private UnicodeBlock(String name, int id) 2744 { 2745 super(name); 2746 m_id_ = id; 2747 if (id >= 0) { 2748 BLOCKS_[id] = this; 2749 } 2750 } 2751 } 2752 2753 /** 2754 * East Asian Width constants. 2755 * @see UProperty#EAST_ASIAN_WIDTH 2756 * @see UCharacter#getIntPropertyValue 2757 * @stable ICU 2.4 2758 */ 2759 public static interface EastAsianWidth 2760 { 2761 /** 2762 * @stable ICU 2.4 2763 */ 2764 public static final int NEUTRAL = 0; 2765 /** 2766 * @stable ICU 2.4 2767 */ 2768 public static final int AMBIGUOUS = 1; 2769 /** 2770 * @stable ICU 2.4 2771 */ 2772 public static final int HALFWIDTH = 2; 2773 /** 2774 * @stable ICU 2.4 2775 */ 2776 public static final int FULLWIDTH = 3; 2777 /** 2778 * @stable ICU 2.4 2779 */ 2780 public static final int NARROW = 4; 2781 /** 2782 * @stable ICU 2.4 2783 */ 2784 public static final int WIDE = 5; 2785 /** 2786 * One more than the highest normal EastAsianWidth value. 2787 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH). 2788 * 2789 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2790 */ 2791 @Deprecated 2792 public static final int COUNT = 6; 2793 } 2794 2795 /** 2796 * Decomposition Type constants. 2797 * @see UProperty#DECOMPOSITION_TYPE 2798 * @stable ICU 2.4 2799 */ 2800 public static interface DecompositionType 2801 { 2802 /** 2803 * @stable ICU 2.4 2804 */ 2805 public static final int NONE = 0; 2806 /** 2807 * @stable ICU 2.4 2808 */ 2809 public static final int CANONICAL = 1; 2810 /** 2811 * @stable ICU 2.4 2812 */ 2813 public static final int COMPAT = 2; 2814 /** 2815 * @stable ICU 2.4 2816 */ 2817 public static final int CIRCLE = 3; 2818 /** 2819 * @stable ICU 2.4 2820 */ 2821 public static final int FINAL = 4; 2822 /** 2823 * @stable ICU 2.4 2824 */ 2825 public static final int FONT = 5; 2826 /** 2827 * @stable ICU 2.4 2828 */ 2829 public static final int FRACTION = 6; 2830 /** 2831 * @stable ICU 2.4 2832 */ 2833 public static final int INITIAL = 7; 2834 /** 2835 * @stable ICU 2.4 2836 */ 2837 public static final int ISOLATED = 8; 2838 /** 2839 * @stable ICU 2.4 2840 */ 2841 public static final int MEDIAL = 9; 2842 /** 2843 * @stable ICU 2.4 2844 */ 2845 public static final int NARROW = 10; 2846 /** 2847 * @stable ICU 2.4 2848 */ 2849 public static final int NOBREAK = 11; 2850 /** 2851 * @stable ICU 2.4 2852 */ 2853 public static final int SMALL = 12; 2854 /** 2855 * @stable ICU 2.4 2856 */ 2857 public static final int SQUARE = 13; 2858 /** 2859 * @stable ICU 2.4 2860 */ 2861 public static final int SUB = 14; 2862 /** 2863 * @stable ICU 2.4 2864 */ 2865 public static final int SUPER = 15; 2866 /** 2867 * @stable ICU 2.4 2868 */ 2869 public static final int VERTICAL = 16; 2870 /** 2871 * @stable ICU 2.4 2872 */ 2873 public static final int WIDE = 17; 2874 /** 2875 * One more than the highest normal DecompositionType value. 2876 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE). 2877 * 2878 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2879 */ 2880 @Deprecated 2881 public static final int COUNT = 18; 2882 } 2883 2884 /** 2885 * Joining Type constants. 2886 * @see UProperty#JOINING_TYPE 2887 * @stable ICU 2.4 2888 */ 2889 public static interface JoiningType 2890 { 2891 /** 2892 * @stable ICU 2.4 2893 */ 2894 public static final int NON_JOINING = 0; 2895 /** 2896 * @stable ICU 2.4 2897 */ 2898 public static final int JOIN_CAUSING = 1; 2899 /** 2900 * @stable ICU 2.4 2901 */ 2902 public static final int DUAL_JOINING = 2; 2903 /** 2904 * @stable ICU 2.4 2905 */ 2906 public static final int LEFT_JOINING = 3; 2907 /** 2908 * @stable ICU 2.4 2909 */ 2910 public static final int RIGHT_JOINING = 4; 2911 /** 2912 * @stable ICU 2.4 2913 */ 2914 public static final int TRANSPARENT = 5; 2915 /** 2916 * One more than the highest normal JoiningType value. 2917 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE). 2918 * 2919 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 2920 */ 2921 @Deprecated 2922 public static final int COUNT = 6; 2923 } 2924 2925 /** 2926 * Joining Group constants. 2927 * @see UProperty#JOINING_GROUP 2928 * @stable ICU 2.4 2929 */ 2930 public static interface JoiningGroup 2931 { 2932 /** 2933 * @stable ICU 2.4 2934 */ 2935 public static final int NO_JOINING_GROUP = 0; 2936 /** 2937 * @stable ICU 2.4 2938 */ 2939 public static final int AIN = 1; 2940 /** 2941 * @stable ICU 2.4 2942 */ 2943 public static final int ALAPH = 2; 2944 /** 2945 * @stable ICU 2.4 2946 */ 2947 public static final int ALEF = 3; 2948 /** 2949 * @stable ICU 2.4 2950 */ 2951 public static final int BEH = 4; 2952 /** 2953 * @stable ICU 2.4 2954 */ 2955 public static final int BETH = 5; 2956 /** 2957 * @stable ICU 2.4 2958 */ 2959 public static final int DAL = 6; 2960 /** 2961 * @stable ICU 2.4 2962 */ 2963 public static final int DALATH_RISH = 7; 2964 /** 2965 * @stable ICU 2.4 2966 */ 2967 public static final int E = 8; 2968 /** 2969 * @stable ICU 2.4 2970 */ 2971 public static final int FEH = 9; 2972 /** 2973 * @stable ICU 2.4 2974 */ 2975 public static final int FINAL_SEMKATH = 10; 2976 /** 2977 * @stable ICU 2.4 2978 */ 2979 public static final int GAF = 11; 2980 /** 2981 * @stable ICU 2.4 2982 */ 2983 public static final int GAMAL = 12; 2984 /** 2985 * @stable ICU 2.4 2986 */ 2987 public static final int HAH = 13; 2988 /** @stable ICU 4.6 */ 2989 public static final int TEH_MARBUTA_GOAL = 14; 2990 /** 2991 * @stable ICU 2.4 2992 */ 2993 public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL; 2994 /** 2995 * @stable ICU 2.4 2996 */ 2997 public static final int HE = 15; 2998 /** 2999 * @stable ICU 2.4 3000 */ 3001 public static final int HEH = 16; 3002 /** 3003 * @stable ICU 2.4 3004 */ 3005 public static final int HEH_GOAL = 17; 3006 /** 3007 * @stable ICU 2.4 3008 */ 3009 public static final int HETH = 18; 3010 /** 3011 * @stable ICU 2.4 3012 */ 3013 public static final int KAF = 19; 3014 /** 3015 * @stable ICU 2.4 3016 */ 3017 public static final int KAPH = 20; 3018 /** 3019 * @stable ICU 2.4 3020 */ 3021 public static final int KNOTTED_HEH = 21; 3022 /** 3023 * @stable ICU 2.4 3024 */ 3025 public static final int LAM = 22; 3026 /** 3027 * @stable ICU 2.4 3028 */ 3029 public static final int LAMADH = 23; 3030 /** 3031 * @stable ICU 2.4 3032 */ 3033 public static final int MEEM = 24; 3034 /** 3035 * @stable ICU 2.4 3036 */ 3037 public static final int MIM = 25; 3038 /** 3039 * @stable ICU 2.4 3040 */ 3041 public static final int NOON = 26; 3042 /** 3043 * @stable ICU 2.4 3044 */ 3045 public static final int NUN = 27; 3046 /** 3047 * @stable ICU 2.4 3048 */ 3049 public static final int PE = 28; 3050 /** 3051 * @stable ICU 2.4 3052 */ 3053 public static final int QAF = 29; 3054 /** 3055 * @stable ICU 2.4 3056 */ 3057 public static final int QAPH = 30; 3058 /** 3059 * @stable ICU 2.4 3060 */ 3061 public static final int REH = 31; 3062 /** 3063 * @stable ICU 2.4 3064 */ 3065 public static final int REVERSED_PE = 32; 3066 /** 3067 * @stable ICU 2.4 3068 */ 3069 public static final int SAD = 33; 3070 /** 3071 * @stable ICU 2.4 3072 */ 3073 public static final int SADHE = 34; 3074 /** 3075 * @stable ICU 2.4 3076 */ 3077 public static final int SEEN = 35; 3078 /** 3079 * @stable ICU 2.4 3080 */ 3081 public static final int SEMKATH = 36; 3082 /** 3083 * @stable ICU 2.4 3084 */ 3085 public static final int SHIN = 37; 3086 /** 3087 * @stable ICU 2.4 3088 */ 3089 public static final int SWASH_KAF = 38; 3090 /** 3091 * @stable ICU 2.4 3092 */ 3093 public static final int SYRIAC_WAW = 39; 3094 /** 3095 * @stable ICU 2.4 3096 */ 3097 public static final int TAH = 40; 3098 /** 3099 * @stable ICU 2.4 3100 */ 3101 public static final int TAW = 41; 3102 /** 3103 * @stable ICU 2.4 3104 */ 3105 public static final int TEH_MARBUTA = 42; 3106 /** 3107 * @stable ICU 2.4 3108 */ 3109 public static final int TETH = 43; 3110 /** 3111 * @stable ICU 2.4 3112 */ 3113 public static final int WAW = 44; 3114 /** 3115 * @stable ICU 2.4 3116 */ 3117 public static final int YEH = 45; 3118 /** 3119 * @stable ICU 2.4 3120 */ 3121 public static final int YEH_BARREE = 46; 3122 /** 3123 * @stable ICU 2.4 3124 */ 3125 public static final int YEH_WITH_TAIL = 47; 3126 /** 3127 * @stable ICU 2.4 3128 */ 3129 public static final int YUDH = 48; 3130 /** 3131 * @stable ICU 2.4 3132 */ 3133 public static final int YUDH_HE = 49; 3134 /** 3135 * @stable ICU 2.4 3136 */ 3137 public static final int ZAIN = 50; 3138 /** 3139 * @stable ICU 2.6 3140 */ 3141 public static final int FE = 51; 3142 /** 3143 * @stable ICU 2.6 3144 */ 3145 public static final int KHAPH = 52; 3146 /** 3147 * @stable ICU 2.6 3148 */ 3149 public static final int ZHAIN = 53; 3150 /** 3151 * @stable ICU 4.0 3152 */ 3153 public static final int BURUSHASKI_YEH_BARREE = 54; 3154 /** @stable ICU 4.4 */ 3155 public static final int FARSI_YEH = 55; 3156 /** @stable ICU 4.4 */ 3157 public static final int NYA = 56; 3158 /** @stable ICU 49 */ 3159 public static final int ROHINGYA_YEH = 57; 3160 3161 /** @stable ICU 54 */ 3162 public static final int MANICHAEAN_ALEPH = 58; 3163 /** @stable ICU 54 */ 3164 public static final int MANICHAEAN_AYIN = 59; 3165 /** @stable ICU 54 */ 3166 public static final int MANICHAEAN_BETH = 60; 3167 /** @stable ICU 54 */ 3168 public static final int MANICHAEAN_DALETH = 61; 3169 /** @stable ICU 54 */ 3170 public static final int MANICHAEAN_DHAMEDH = 62; 3171 /** @stable ICU 54 */ 3172 public static final int MANICHAEAN_FIVE = 63; 3173 /** @stable ICU 54 */ 3174 public static final int MANICHAEAN_GIMEL = 64; 3175 /** @stable ICU 54 */ 3176 public static final int MANICHAEAN_HETH = 65; 3177 /** @stable ICU 54 */ 3178 public static final int MANICHAEAN_HUNDRED = 66; 3179 /** @stable ICU 54 */ 3180 public static final int MANICHAEAN_KAPH = 67; 3181 /** @stable ICU 54 */ 3182 public static final int MANICHAEAN_LAMEDH = 68; 3183 /** @stable ICU 54 */ 3184 public static final int MANICHAEAN_MEM = 69; 3185 /** @stable ICU 54 */ 3186 public static final int MANICHAEAN_NUN = 70; 3187 /** @stable ICU 54 */ 3188 public static final int MANICHAEAN_ONE = 71; 3189 /** @stable ICU 54 */ 3190 public static final int MANICHAEAN_PE = 72; 3191 /** @stable ICU 54 */ 3192 public static final int MANICHAEAN_QOPH = 73; 3193 /** @stable ICU 54 */ 3194 public static final int MANICHAEAN_RESH = 74; 3195 /** @stable ICU 54 */ 3196 public static final int MANICHAEAN_SADHE = 75; 3197 /** @stable ICU 54 */ 3198 public static final int MANICHAEAN_SAMEKH = 76; 3199 /** @stable ICU 54 */ 3200 public static final int MANICHAEAN_TAW = 77; 3201 /** @stable ICU 54 */ 3202 public static final int MANICHAEAN_TEN = 78; 3203 /** @stable ICU 54 */ 3204 public static final int MANICHAEAN_TETH = 79; 3205 /** @stable ICU 54 */ 3206 public static final int MANICHAEAN_THAMEDH = 80; 3207 /** @stable ICU 54 */ 3208 public static final int MANICHAEAN_TWENTY = 81; 3209 /** @stable ICU 54 */ 3210 public static final int MANICHAEAN_WAW = 82; 3211 /** @stable ICU 54 */ 3212 public static final int MANICHAEAN_YODH = 83; 3213 /** @stable ICU 54 */ 3214 public static final int MANICHAEAN_ZAYIN = 84; 3215 /** @stable ICU 54 */ 3216 public static final int STRAIGHT_WAW = 85; 3217 3218 /** @stable ICU 58 */ 3219 public static final int AFRICAN_FEH = 86; 3220 /** @stable ICU 58 */ 3221 public static final int AFRICAN_NOON = 87; 3222 /** @stable ICU 58 */ 3223 public static final int AFRICAN_QAF = 88; 3224 3225 /** @stable ICU 60 */ 3226 public static final int MALAYALAM_BHA = 89; 3227 /** @stable ICU 60 */ 3228 public static final int MALAYALAM_JA = 90; 3229 /** @stable ICU 60 */ 3230 public static final int MALAYALAM_LLA = 91; 3231 /** @stable ICU 60 */ 3232 public static final int MALAYALAM_LLLA = 92; 3233 /** @stable ICU 60 */ 3234 public static final int MALAYALAM_NGA = 93; 3235 /** @stable ICU 60 */ 3236 public static final int MALAYALAM_NNA = 94; 3237 /** @stable ICU 60 */ 3238 public static final int MALAYALAM_NNNA = 95; 3239 /** @stable ICU 60 */ 3240 public static final int MALAYALAM_NYA = 96; 3241 /** @stable ICU 60 */ 3242 public static final int MALAYALAM_RA = 97; 3243 /** @stable ICU 60 */ 3244 public static final int MALAYALAM_SSA = 98; 3245 /** @stable ICU 60 */ 3246 public static final int MALAYALAM_TTA = 99; 3247 3248 /** @stable ICU 62 */ 3249 public static final int HANIFI_ROHINGYA_KINNA_YA = 100; 3250 /** @stable ICU 62 */ 3251 public static final int HANIFI_ROHINGYA_PA = 101; 3252 3253 /** 3254 * One more than the highest normal JoiningGroup value. 3255 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JoiningGroup). 3256 * 3257 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3258 */ 3259 @Deprecated 3260 public static final int COUNT = 102; 3261 } 3262 3263 /** 3264 * Grapheme Cluster Break constants. 3265 * @see UProperty#GRAPHEME_CLUSTER_BREAK 3266 * @stable ICU 3.4 3267 */ 3268 public static interface GraphemeClusterBreak { 3269 /** 3270 * @stable ICU 3.4 3271 */ 3272 public static final int OTHER = 0; 3273 /** 3274 * @stable ICU 3.4 3275 */ 3276 public static final int CONTROL = 1; 3277 /** 3278 * @stable ICU 3.4 3279 */ 3280 public static final int CR = 2; 3281 /** 3282 * @stable ICU 3.4 3283 */ 3284 public static final int EXTEND = 3; 3285 /** 3286 * @stable ICU 3.4 3287 */ 3288 public static final int L = 4; 3289 /** 3290 * @stable ICU 3.4 3291 */ 3292 public static final int LF = 5; 3293 /** 3294 * @stable ICU 3.4 3295 */ 3296 public static final int LV = 6; 3297 /** 3298 * @stable ICU 3.4 3299 */ 3300 public static final int LVT = 7; 3301 /** 3302 * @stable ICU 3.4 3303 */ 3304 public static final int T = 8; 3305 /** 3306 * @stable ICU 3.4 3307 */ 3308 public static final int V = 9; 3309 /** 3310 * @stable ICU 4.0 3311 */ 3312 public static final int SPACING_MARK = 10; 3313 /** 3314 * @stable ICU 4.0 3315 */ 3316 public static final int PREPEND = 11; 3317 /** @stable ICU 50 */ 3318 public static final int REGIONAL_INDICATOR = 12; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3319 /** @stable ICU 58 */ 3320 public static final int E_BASE = 13; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3321 /** @stable ICU 58 */ 3322 public static final int E_BASE_GAZ = 14; /*[EBG]*/ 3323 /** @stable ICU 58 */ 3324 public static final int E_MODIFIER = 15; /*[EM]*/ 3325 /** @stable ICU 58 */ 3326 public static final int GLUE_AFTER_ZWJ = 16; /*[GAZ]*/ 3327 /** @stable ICU 58 */ 3328 public static final int ZWJ = 17; /*[ZWJ]*/ 3329 3330 /** 3331 * One more than the highest normal GraphemeClusterBreak value. 3332 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK). 3333 * 3334 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3335 */ 3336 @Deprecated 3337 public static final int COUNT = 18; 3338 } 3339 3340 /** 3341 * Word Break constants. 3342 * @see UProperty#WORD_BREAK 3343 * @stable ICU 3.4 3344 */ 3345 public static interface WordBreak { 3346 /** 3347 * @stable ICU 3.8 3348 */ 3349 public static final int OTHER = 0; 3350 /** 3351 * @stable ICU 3.8 3352 */ 3353 public static final int ALETTER = 1; 3354 /** 3355 * @stable ICU 3.8 3356 */ 3357 public static final int FORMAT = 2; 3358 /** 3359 * @stable ICU 3.8 3360 */ 3361 public static final int KATAKANA = 3; 3362 /** 3363 * @stable ICU 3.8 3364 */ 3365 public static final int MIDLETTER = 4; 3366 /** 3367 * @stable ICU 3.8 3368 */ 3369 public static final int MIDNUM = 5; 3370 /** 3371 * @stable ICU 3.8 3372 */ 3373 public static final int NUMERIC = 6; 3374 /** 3375 * @stable ICU 3.8 3376 */ 3377 public static final int EXTENDNUMLET = 7; 3378 /** 3379 * @stable ICU 4.0 3380 */ 3381 public static final int CR = 8; 3382 /** 3383 * @stable ICU 4.0 3384 */ 3385 public static final int EXTEND = 9; 3386 /** 3387 * @stable ICU 4.0 3388 */ 3389 public static final int LF = 10; 3390 /** 3391 * @stable ICU 4.0 3392 */ 3393 public static final int MIDNUMLET = 11; 3394 /** 3395 * @stable ICU 4.0 3396 */ 3397 public static final int NEWLINE = 12; 3398 /** @stable ICU 50 */ 3399 public static final int REGIONAL_INDICATOR = 13; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3400 /** @stable ICU 52 */ 3401 public static final int HEBREW_LETTER = 14; /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */ 3402 /** @stable ICU 52 */ 3403 public static final int SINGLE_QUOTE = 15; /*[SQ]*/ 3404 /** @stable ICU 52 */ 3405 public static final int DOUBLE_QUOTE = 16; /*[DQ]*/ 3406 /** @stable ICU 58 */ 3407 public static final int E_BASE = 17; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3408 /** @stable ICU 58 */ 3409 public static final int E_BASE_GAZ = 18; /*[EBG]*/ 3410 /** @stable ICU 58 */ 3411 public static final int E_MODIFIER = 19; /*[EM]*/ 3412 /** @stable ICU 58 */ 3413 public static final int GLUE_AFTER_ZWJ = 20; /*[GAZ]*/ 3414 /** @stable ICU 58 */ 3415 public static final int ZWJ = 21; /*[ZWJ]*/ 3416 /** @stable ICU 62 */ 3417 public static final int WSEGSPACE = 22; /*[WSEGSPACE]*/ 3418 /** 3419 * One more than the highest normal WordBreak value. 3420 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK). 3421 * 3422 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3423 */ 3424 @Deprecated 3425 public static final int COUNT = 23; 3426 } 3427 3428 /** 3429 * Sentence Break constants. 3430 * @see UProperty#SENTENCE_BREAK 3431 * @stable ICU 3.4 3432 */ 3433 public static interface SentenceBreak { 3434 /** 3435 * @stable ICU 3.8 3436 */ 3437 public static final int OTHER = 0; 3438 /** 3439 * @stable ICU 3.8 3440 */ 3441 public static final int ATERM = 1; 3442 /** 3443 * @stable ICU 3.8 3444 */ 3445 public static final int CLOSE = 2; 3446 /** 3447 * @stable ICU 3.8 3448 */ 3449 public static final int FORMAT = 3; 3450 /** 3451 * @stable ICU 3.8 3452 */ 3453 public static final int LOWER = 4; 3454 /** 3455 * @stable ICU 3.8 3456 */ 3457 public static final int NUMERIC = 5; 3458 /** 3459 * @stable ICU 3.8 3460 */ 3461 public static final int OLETTER = 6; 3462 /** 3463 * @stable ICU 3.8 3464 */ 3465 public static final int SEP = 7; 3466 /** 3467 * @stable ICU 3.8 3468 */ 3469 public static final int SP = 8; 3470 /** 3471 * @stable ICU 3.8 3472 */ 3473 public static final int STERM = 9; 3474 /** 3475 * @stable ICU 3.8 3476 */ 3477 public static final int UPPER = 10; 3478 /** 3479 * @stable ICU 4.0 3480 */ 3481 public static final int CR = 11; 3482 /** 3483 * @stable ICU 4.0 3484 */ 3485 public static final int EXTEND = 12; 3486 /** 3487 * @stable ICU 4.0 3488 */ 3489 public static final int LF = 13; 3490 /** 3491 * @stable ICU 4.0 3492 */ 3493 public static final int SCONTINUE = 14; 3494 /** 3495 * One more than the highest normal SentenceBreak value. 3496 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK). 3497 * 3498 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3499 */ 3500 @Deprecated 3501 public static final int COUNT = 15; 3502 } 3503 3504 /** 3505 * Line Break constants. 3506 * @see UProperty#LINE_BREAK 3507 * @stable ICU 2.4 3508 */ 3509 public static interface LineBreak 3510 { 3511 /** 3512 * @stable ICU 2.4 3513 */ 3514 public static final int UNKNOWN = 0; 3515 /** 3516 * @stable ICU 2.4 3517 */ 3518 public static final int AMBIGUOUS = 1; 3519 /** 3520 * @stable ICU 2.4 3521 */ 3522 public static final int ALPHABETIC = 2; 3523 /** 3524 * @stable ICU 2.4 3525 */ 3526 public static final int BREAK_BOTH = 3; 3527 /** 3528 * @stable ICU 2.4 3529 */ 3530 public static final int BREAK_AFTER = 4; 3531 /** 3532 * @stable ICU 2.4 3533 */ 3534 public static final int BREAK_BEFORE = 5; 3535 /** 3536 * @stable ICU 2.4 3537 */ 3538 public static final int MANDATORY_BREAK = 6; 3539 /** 3540 * @stable ICU 2.4 3541 */ 3542 public static final int CONTINGENT_BREAK = 7; 3543 /** 3544 * @stable ICU 2.4 3545 */ 3546 public static final int CLOSE_PUNCTUATION = 8; 3547 /** 3548 * @stable ICU 2.4 3549 */ 3550 public static final int COMBINING_MARK = 9; 3551 /** 3552 * @stable ICU 2.4 3553 */ 3554 public static final int CARRIAGE_RETURN = 10; 3555 /** 3556 * @stable ICU 2.4 3557 */ 3558 public static final int EXCLAMATION = 11; 3559 /** 3560 * @stable ICU 2.4 3561 */ 3562 public static final int GLUE = 12; 3563 /** 3564 * @stable ICU 2.4 3565 */ 3566 public static final int HYPHEN = 13; 3567 /** 3568 * @stable ICU 2.4 3569 */ 3570 public static final int IDEOGRAPHIC = 14; 3571 /** 3572 * @see #INSEPARABLE 3573 * @stable ICU 2.4 3574 */ 3575 public static final int INSEPERABLE = 15; 3576 /** 3577 * Renamed from the misspelled "inseperable" in Unicode 4.0.1. 3578 * @stable ICU 3.0 3579 */ 3580 public static final int INSEPARABLE = 15; 3581 /** 3582 * @stable ICU 2.4 3583 */ 3584 public static final int INFIX_NUMERIC = 16; 3585 /** 3586 * @stable ICU 2.4 3587 */ 3588 public static final int LINE_FEED = 17; 3589 /** 3590 * @stable ICU 2.4 3591 */ 3592 public static final int NONSTARTER = 18; 3593 /** 3594 * @stable ICU 2.4 3595 */ 3596 public static final int NUMERIC = 19; 3597 /** 3598 * @stable ICU 2.4 3599 */ 3600 public static final int OPEN_PUNCTUATION = 20; 3601 /** 3602 * @stable ICU 2.4 3603 */ 3604 public static final int POSTFIX_NUMERIC = 21; 3605 /** 3606 * @stable ICU 2.4 3607 */ 3608 public static final int PREFIX_NUMERIC = 22; 3609 /** 3610 * @stable ICU 2.4 3611 */ 3612 public static final int QUOTATION = 23; 3613 /** 3614 * @stable ICU 2.4 3615 */ 3616 public static final int COMPLEX_CONTEXT = 24; 3617 /** 3618 * @stable ICU 2.4 3619 */ 3620 public static final int SURROGATE = 25; 3621 /** 3622 * @stable ICU 2.4 3623 */ 3624 public static final int SPACE = 26; 3625 /** 3626 * @stable ICU 2.4 3627 */ 3628 public static final int BREAK_SYMBOLS = 27; 3629 /** 3630 * @stable ICU 2.4 3631 */ 3632 public static final int ZWSPACE = 28; 3633 /** 3634 * @stable ICU 2.6 3635 */ 3636 public static final int NEXT_LINE = 29; /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */ 3637 /** 3638 * @stable ICU 2.6 3639 */ 3640 public static final int WORD_JOINER = 30; /*[WJ]*/ 3641 /** 3642 * @stable ICU 3.4 3643 */ 3644 public static final int H2 = 31; /* from here on: new in Unicode 4.1/ICU 3.4 */ 3645 /** 3646 * @stable ICU 3.4 3647 */ 3648 public static final int H3 = 32; 3649 /** 3650 * @stable ICU 3.4 3651 */ 3652 public static final int JL = 33; 3653 /** 3654 * @stable ICU 3.4 3655 */ 3656 public static final int JT = 34; 3657 /** 3658 * @stable ICU 3.4 3659 */ 3660 public static final int JV = 35; 3661 /** @stable ICU 4.4 */ 3662 public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */ 3663 /** @stable ICU 49 */ 3664 public static final int CONDITIONAL_JAPANESE_STARTER = 37; /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */ 3665 /** @stable ICU 49 */ 3666 public static final int HEBREW_LETTER = 38; /*[HL]*/ /* new in Unicode 6.1/ICU 49 */ 3667 /** @stable ICU 50 */ 3668 public static final int REGIONAL_INDICATOR = 39; /*[RI]*/ /* new in Unicode 6.2/ICU 50 */ 3669 /** @stable ICU 58 */ 3670 public static final int E_BASE = 40; /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */ 3671 /** @stable ICU 58 */ 3672 public static final int E_MODIFIER = 41; /*[EM]*/ 3673 /** @stable ICU 58 */ 3674 public static final int ZWJ = 42; /*[ZWJ]*/ 3675 /** 3676 * One more than the highest normal LineBreak value. 3677 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK). 3678 * 3679 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3680 */ 3681 @Deprecated 3682 public static final int COUNT = 43; 3683 } 3684 3685 /** 3686 * Numeric Type constants. 3687 * @see UProperty#NUMERIC_TYPE 3688 * @stable ICU 2.4 3689 */ 3690 public static interface NumericType 3691 { 3692 /** 3693 * @stable ICU 2.4 3694 */ 3695 public static final int NONE = 0; 3696 /** 3697 * @stable ICU 2.4 3698 */ 3699 public static final int DECIMAL = 1; 3700 /** 3701 * @stable ICU 2.4 3702 */ 3703 public static final int DIGIT = 2; 3704 /** 3705 * @stable ICU 2.4 3706 */ 3707 public static final int NUMERIC = 3; 3708 /** 3709 * One more than the highest normal NumericType value. 3710 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE). 3711 * 3712 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3713 */ 3714 @Deprecated 3715 public static final int COUNT = 4; 3716 } 3717 3718 /** 3719 * Hangul Syllable Type constants. 3720 * 3721 * @see UProperty#HANGUL_SYLLABLE_TYPE 3722 * @stable ICU 2.6 3723 */ 3724 public static interface HangulSyllableType 3725 { 3726 /** 3727 * @stable ICU 2.6 3728 */ 3729 public static final int NOT_APPLICABLE = 0; /*[NA]*/ /*See note !!*/ 3730 /** 3731 * @stable ICU 2.6 3732 */ 3733 public static final int LEADING_JAMO = 1; /*[L]*/ 3734 /** 3735 * @stable ICU 2.6 3736 */ 3737 public static final int VOWEL_JAMO = 2; /*[V]*/ 3738 /** 3739 * @stable ICU 2.6 3740 */ 3741 public static final int TRAILING_JAMO = 3; /*[T]*/ 3742 /** 3743 * @stable ICU 2.6 3744 */ 3745 public static final int LV_SYLLABLE = 4; /*[LV]*/ 3746 /** 3747 * @stable ICU 2.6 3748 */ 3749 public static final int LVT_SYLLABLE = 5; /*[LVT]*/ 3750 /** 3751 * One more than the highest normal HangulSyllableType value. 3752 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE). 3753 * 3754 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3755 */ 3756 @Deprecated 3757 public static final int COUNT = 6; 3758 } 3759 3760 /** 3761 * Bidi Paired Bracket Type constants. 3762 * 3763 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 3764 * @stable ICU 52 3765 */ 3766 public static interface BidiPairedBracketType { 3767 /** 3768 * Not a paired bracket. 3769 * @stable ICU 52 3770 */ 3771 public static final int NONE = 0; 3772 /** 3773 * Open paired bracket. 3774 * @stable ICU 52 3775 */ 3776 public static final int OPEN = 1; 3777 /** 3778 * Close paired bracket. 3779 * @stable ICU 52 3780 */ 3781 public static final int CLOSE = 2; 3782 /** 3783 * One more than the highest normal BidiPairedBracketType value. 3784 * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE). 3785 * 3786 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. 3787 */ 3788 @Deprecated 3789 public static final int COUNT = 3; 3790 } 3791 3792 /** 3793 * Indic Positional Category constants. 3794 * 3795 * @see UProperty#INDIC_POSITIONAL_CATEGORY 3796 * @stable ICU 63 3797 */ 3798 public static interface IndicPositionalCategory { 3799 /** @stable ICU 63 */ 3800 public static final int NA = 0; 3801 /** @stable ICU 63 */ 3802 public static final int BOTTOM = 1; 3803 /** @stable ICU 63 */ 3804 public static final int BOTTOM_AND_LEFT = 2; 3805 /** @stable ICU 63 */ 3806 public static final int BOTTOM_AND_RIGHT = 3; 3807 /** @stable ICU 63 */ 3808 public static final int LEFT = 4; 3809 /** @stable ICU 63 */ 3810 public static final int LEFT_AND_RIGHT = 5; 3811 /** @stable ICU 63 */ 3812 public static final int OVERSTRUCK = 6; 3813 /** @stable ICU 63 */ 3814 public static final int RIGHT = 7; 3815 /** @stable ICU 63 */ 3816 public static final int TOP = 8; 3817 /** @stable ICU 63 */ 3818 public static final int TOP_AND_BOTTOM = 9; 3819 /** @stable ICU 63 */ 3820 public static final int TOP_AND_BOTTOM_AND_RIGHT = 10; 3821 /** @stable ICU 63 */ 3822 public static final int TOP_AND_LEFT = 11; 3823 /** @stable ICU 63 */ 3824 public static final int TOP_AND_LEFT_AND_RIGHT = 12; 3825 /** @stable ICU 63 */ 3826 public static final int TOP_AND_RIGHT = 13; 3827 /** @stable ICU 63 */ 3828 public static final int VISUAL_ORDER_LEFT = 14; 3829 /** @stable ICU 66 */ 3830 public static final int TOP_AND_BOTTOM_AND_LEFT = 15; 3831 } 3832 3833 /** 3834 * Indic Syllabic Category constants. 3835 * 3836 * @see UProperty#INDIC_SYLLABIC_CATEGORY 3837 * @stable ICU 63 3838 */ 3839 public static interface IndicSyllabicCategory { 3840 /** @stable ICU 63 */ 3841 public static final int OTHER = 0; 3842 /** @stable ICU 63 */ 3843 public static final int AVAGRAHA = 1; 3844 /** @stable ICU 63 */ 3845 public static final int BINDU = 2; 3846 /** @stable ICU 63 */ 3847 public static final int BRAHMI_JOINING_NUMBER = 3; 3848 /** @stable ICU 63 */ 3849 public static final int CANTILLATION_MARK = 4; 3850 /** @stable ICU 63 */ 3851 public static final int CONSONANT = 5; 3852 /** @stable ICU 63 */ 3853 public static final int CONSONANT_DEAD = 6; 3854 /** @stable ICU 63 */ 3855 public static final int CONSONANT_FINAL = 7; 3856 /** @stable ICU 63 */ 3857 public static final int CONSONANT_HEAD_LETTER = 8; 3858 /** @stable ICU 63 */ 3859 public static final int CONSONANT_INITIAL_POSTFIXED = 9; 3860 /** @stable ICU 63 */ 3861 public static final int CONSONANT_KILLER = 10; 3862 /** @stable ICU 63 */ 3863 public static final int CONSONANT_MEDIAL = 11; 3864 /** @stable ICU 63 */ 3865 public static final int CONSONANT_PLACEHOLDER = 12; 3866 /** @stable ICU 63 */ 3867 public static final int CONSONANT_PRECEDING_REPHA = 13; 3868 /** @stable ICU 63 */ 3869 public static final int CONSONANT_PREFIXED = 14; 3870 /** @stable ICU 63 */ 3871 public static final int CONSONANT_SUBJOINED = 15; 3872 /** @stable ICU 63 */ 3873 public static final int CONSONANT_SUCCEEDING_REPHA = 16; 3874 /** @stable ICU 63 */ 3875 public static final int CONSONANT_WITH_STACKER = 17; 3876 /** @stable ICU 63 */ 3877 public static final int GEMINATION_MARK = 18; 3878 /** @stable ICU 63 */ 3879 public static final int INVISIBLE_STACKER = 19; 3880 /** @stable ICU 63 */ 3881 public static final int JOINER = 20; 3882 /** @stable ICU 63 */ 3883 public static final int MODIFYING_LETTER = 21; 3884 /** @stable ICU 63 */ 3885 public static final int NON_JOINER = 22; 3886 /** @stable ICU 63 */ 3887 public static final int NUKTA = 23; 3888 /** @stable ICU 63 */ 3889 public static final int NUMBER = 24; 3890 /** @stable ICU 63 */ 3891 public static final int NUMBER_JOINER = 25; 3892 /** @stable ICU 63 */ 3893 public static final int PURE_KILLER = 26; 3894 /** @stable ICU 63 */ 3895 public static final int REGISTER_SHIFTER = 27; 3896 /** @stable ICU 63 */ 3897 public static final int SYLLABLE_MODIFIER = 28; 3898 /** @stable ICU 63 */ 3899 public static final int TONE_LETTER = 29; 3900 /** @stable ICU 63 */ 3901 public static final int TONE_MARK = 30; 3902 /** @stable ICU 63 */ 3903 public static final int VIRAMA = 31; 3904 /** @stable ICU 63 */ 3905 public static final int VISARGA = 32; 3906 /** @stable ICU 63 */ 3907 public static final int VOWEL = 33; 3908 /** @stable ICU 63 */ 3909 public static final int VOWEL_DEPENDENT = 34; 3910 /** @stable ICU 63 */ 3911 public static final int VOWEL_INDEPENDENT = 35; 3912 } 3913 3914 /** 3915 * Vertical Orientation constants. 3916 * 3917 * @see UProperty#VERTICAL_ORIENTATION 3918 * @stable ICU 63 3919 */ 3920 public static interface VerticalOrientation { 3921 /** @stable ICU 63 */ 3922 public static final int ROTATED = 0; 3923 /** @stable ICU 63 */ 3924 public static final int TRANSFORMED_ROTATED = 1; 3925 /** @stable ICU 63 */ 3926 public static final int TRANSFORMED_UPRIGHT = 2; 3927 /** @stable ICU 63 */ 3928 public static final int UPRIGHT = 3; 3929 } 3930 3931 // public data members ----------------------------------------------- 3932 3933 /** 3934 * The lowest Unicode code point value, constant 0. 3935 * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}. 3936 * 3937 * @stable ICU 2.1 3938 */ 3939 public static final int MIN_VALUE = Character.MIN_CODE_POINT; 3940 3941 /** 3942 * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits). 3943 * Same as {@link Character#MAX_CODE_POINT}. 3944 * 3945 * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE} 3946 * which is still a char with the value U+FFFF. 3947 * 3948 * @stable ICU 2.1 3949 */ 3950 public static final int MAX_VALUE = Character.MAX_CODE_POINT; 3951 3952 /** 3953 * The minimum value for Supplementary code points, constant U+10000. 3954 * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 3955 * 3956 * @stable ICU 2.1 3957 */ 3958 public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT; 3959 3960 /** 3961 * Unicode value used when translating into Unicode encoding form and there 3962 * is no existing character. 3963 * @stable ICU 2.1 3964 */ 3965 public static final int REPLACEMENT_CHAR = '\uFFFD'; 3966 3967 /** 3968 * Special value that is returned by getUnicodeNumericValue(int) when no 3969 * numeric value is defined for a code point. 3970 * @stable ICU 2.4 3971 * @see #getUnicodeNumericValue 3972 */ 3973 public static final double NO_NUMERIC_VALUE = -123456789; 3974 3975 /** 3976 * Compatibility constant for Java Character's MIN_RADIX. 3977 * @stable ICU 3.4 3978 */ 3979 public static final int MIN_RADIX = java.lang.Character.MIN_RADIX; 3980 3981 /** 3982 * Compatibility constant for Java Character's MAX_RADIX. 3983 * @stable ICU 3.4 3984 */ 3985 public static final int MAX_RADIX = java.lang.Character.MAX_RADIX; 3986 3987 /** 3988 * Do not lowercase non-initial parts of words when titlecasing. 3989 * Option bit for titlecasing APIs that take an options bit set. 3990 * 3991 * By default, titlecasing will titlecase the first cased character 3992 * of a word and lowercase all other characters. 3993 * With this option, the other characters will not be modified. 3994 * 3995 * @see #toTitleCase 3996 * @stable ICU 3.8 3997 */ 3998 public static final int TITLECASE_NO_LOWERCASE = 0x100; 3999 4000 /** 4001 * Do not adjust the titlecasing indexes from BreakIterator::next() indexes; 4002 * titlecase exactly the characters at breaks from the iterator. 4003 * Option bit for titlecasing APIs that take an options bit set. 4004 * 4005 * By default, titlecasing will take each break iterator index, 4006 * adjust it by looking for the next cased character, and titlecase that one. 4007 * Other characters are lowercased. 4008 * 4009 * This follows Unicode 4 & 5 section 3.13 Default Case Operations: 4010 * 4011 * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex 4012 * #29, "Text Boundaries." Between each pair of word boundaries, find the first 4013 * cased character F. If F exists, map F to default_title(F); then map each 4014 * subsequent character C to default_lower(C). 4015 * 4016 * @see #toTitleCase 4017 * @see #TITLECASE_NO_LOWERCASE 4018 * @stable ICU 3.8 4019 */ 4020 public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200; 4021 4022 // public methods ---------------------------------------------------- 4023 4024 /** 4025 * Returnss the numeric value of a decimal digit code point. 4026 * <br>This method observes the semantics of 4027 * <code>java.lang.Character.digit()</code>. Note that this 4028 * will return positive values for code points for which isDigit 4029 * returns false, just like java.lang.Character. 4030 * <br><em>Semantic Change:</em> In release 1.3.1 and 4031 * prior, this did not treat the European letters as having a 4032 * digit value, and also treated numeric letters and other numbers as 4033 * digits. 4034 * This has been changed to conform to the java semantics. 4035 * <br>A code point is a valid digit if and only if: 4036 * <ul> 4037 * <li>ch is a decimal digit or one of the european letters, and 4038 * <li>the value of ch is less than the specified radix. 4039 * </ul> 4040 * @param ch the code point to query 4041 * @param radix the radix 4042 * @return the numeric value represented by the code point in the 4043 * specified radix, or -1 if the code point is not a decimal digit 4044 * or if its value is too large for the radix 4045 * @stable ICU 2.1 4046 */ digit(int ch, int radix)4047 public static int digit(int ch, int radix) 4048 { 4049 if (2 <= radix && radix <= 36) { 4050 int value = digit(ch); 4051 if (value < 0) { 4052 // ch is not a decimal digit, try latin letters 4053 value = UCharacterProperty.getEuropeanDigit(ch); 4054 } 4055 return (value < radix) ? value : -1; 4056 } else { 4057 return -1; // invalid radix 4058 } 4059 } 4060 4061 /** 4062 * Returnss the numeric value of a decimal digit code point. 4063 * <br>This is a convenience overload of <code>digit(int, int)</code> 4064 * that provides a decimal radix. 4065 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this 4066 * treated numeric letters and other numbers as digits. This has 4067 * been changed to conform to the java semantics. 4068 * @param ch the code point to query 4069 * @return the numeric value represented by the code point, 4070 * or -1 if the code point is not a decimal digit or if its 4071 * value is too large for a decimal radix 4072 * @stable ICU 2.1 4073 */ digit(int ch)4074 public static int digit(int ch) 4075 { 4076 return UCharacterProperty.INSTANCE.digit(ch); 4077 } 4078 4079 /** 4080 * Returns the numeric value of the code point as a nonnegative 4081 * integer. 4082 * <br>If the code point does not have a numeric value, then -1 is returned. 4083 * <br> 4084 * If the code point has a numeric value that cannot be represented as a 4085 * nonnegative integer (for example, a fractional value), then -2 is 4086 * returned. 4087 * @param ch the code point to query 4088 * @return the numeric value of the code point, or -1 if it has no numeric 4089 * value, or -2 if it has a numeric value that cannot be represented as a 4090 * nonnegative integer 4091 * @stable ICU 2.1 4092 */ getNumericValue(int ch)4093 public static int getNumericValue(int ch) 4094 { 4095 return UCharacterProperty.INSTANCE.getNumericValue(ch); 4096 } 4097 4098 /** 4099 * {@icu} Returns the numeric value for a Unicode code point as defined in the 4100 * Unicode Character Database. 4101 * <p>A "double" return type is necessary because some numeric values are 4102 * fractions, negative, or too large for int. 4103 * <p>For characters without any numeric values in the Unicode Character 4104 * Database, this function will return NO_NUMERIC_VALUE. 4105 * Note: This is different from the Unicode Standard which specifies NaN as the default value. 4106 * <p><em>API Change:</em> In release 2.2 and prior, this API has a 4107 * return type int and returns -1 when the argument ch does not have a 4108 * corresponding numeric value. This has been changed to synch with ICU4C 4109 * 4110 * This corresponds to the ICU4C function u_getNumericValue. 4111 * @param ch Code point to get the numeric value for. 4112 * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined. 4113 * @stable ICU 2.4 4114 */ getUnicodeNumericValue(int ch)4115 public static double getUnicodeNumericValue(int ch) 4116 { 4117 return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch); 4118 } 4119 4120 /** 4121 * Compatibility override of Java deprecated method. This 4122 * method will always remain deprecated. 4123 * Same as java.lang.Character.isSpace(). 4124 * @param ch the code point 4125 * @return true if the code point is a space character as 4126 * defined by java.lang.Character.isSpace. 4127 * @deprecated ICU 3.4 (Java) 4128 */ 4129 @Deprecated isSpace(int ch)4130 public static boolean isSpace(int ch) { 4131 return ch <= 0x20 && 4132 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d); 4133 } 4134 4135 /** 4136 * Returns a value indicating a code point's Unicode category. 4137 * Up-to-date Unicode implementation of java.lang.Character.getType() 4138 * except for the above mentioned code points that had their category 4139 * changed.<br> 4140 * Return results are constants from the interface 4141 * <a href=UCharacterCategory.html>UCharacterCategory</a><br> 4142 * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with 4143 * those returned by java.lang.Character.getType. UCharacterCategory values 4144 * match the ones used in ICU4C, while java.lang.Character type 4145 * values, though similar, skip the value 17. 4146 * @param ch code point whose type is to be determined 4147 * @return category which is a value of UCharacterCategory 4148 * @stable ICU 2.1 4149 */ getType(int ch)4150 public static int getType(int ch) 4151 { 4152 return UCharacterProperty.INSTANCE.getType(ch); 4153 } 4154 4155 /** 4156 * Determines if a code point has a defined meaning in the up-to-date 4157 * Unicode standard. 4158 * E.g. supplementary code points though allocated space are not defined in 4159 * Unicode yet.<br> 4160 * Up-to-date Unicode implementation of java.lang.Character.isDefined() 4161 * @param ch code point to be determined if it is defined in the most 4162 * current version of Unicode 4163 * @return true if this code point is defined in unicode 4164 * @stable ICU 2.1 4165 */ isDefined(int ch)4166 public static boolean isDefined(int ch) 4167 { 4168 return getType(ch) != 0; 4169 } 4170 4171 /** 4172 * Determines if a code point is a Java digit. 4173 * <br>This method observes the semantics of 4174 * <code>java.lang.Character.isDigit()</code>. It returns true for decimal 4175 * digits only. 4176 * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated 4177 * numeric letters and other numbers as digits. 4178 * This has been changed to conform to the java semantics. 4179 * @param ch code point to query 4180 * @return true if this code point is a digit 4181 * @stable ICU 2.1 4182 */ isDigit(int ch)4183 public static boolean isDigit(int ch) 4184 { 4185 return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER; 4186 } 4187 4188 /** 4189 * Determines if the specified code point is an ISO control character. 4190 * A code point is considered to be an ISO control character if it is in 4191 * the range \u0000 through \u001F or in the range \u007F through 4192 * \u009F.<br> 4193 * Up-to-date Unicode implementation of java.lang.Character.isISOControl() 4194 * @param ch code point to determine if it is an ISO control character 4195 * @return true if code point is a ISO control character 4196 * @stable ICU 2.1 4197 */ isISOControl(int ch)4198 public static boolean isISOControl(int ch) 4199 { 4200 return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ && 4201 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_)); 4202 } 4203 4204 /** 4205 * Determines if the specified code point is a letter. 4206 * Up-to-date Unicode implementation of java.lang.Character.isLetter() 4207 * @param ch code point to determine if it is a letter 4208 * @return true if code point is a letter 4209 * @stable ICU 2.1 4210 */ isLetter(int ch)4211 public static boolean isLetter(int ch) 4212 { 4213 // if props == 0, it will just fall through and return false 4214 return ((1 << getType(ch)) 4215 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4216 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4217 | (1 << UCharacterCategory.TITLECASE_LETTER) 4218 | (1 << UCharacterCategory.MODIFIER_LETTER) 4219 | (1 << UCharacterCategory.OTHER_LETTER))) != 0; 4220 } 4221 4222 /** 4223 * Determines if the specified code point is a letter or digit. 4224 * {@icunote} This method, unlike java.lang.Character does not regard the ascii 4225 * characters 'A' - 'Z' and 'a' - 'z' as digits. 4226 * @param ch code point to determine if it is a letter or a digit 4227 * @return true if code point is a letter or a digit 4228 * @stable ICU 2.1 4229 */ isLetterOrDigit(int ch)4230 public static boolean isLetterOrDigit(int ch) 4231 { 4232 return ((1 << getType(ch)) 4233 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4234 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4235 | (1 << UCharacterCategory.TITLECASE_LETTER) 4236 | (1 << UCharacterCategory.MODIFIER_LETTER) 4237 | (1 << UCharacterCategory.OTHER_LETTER) 4238 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0; 4239 } 4240 4241 /** 4242 * Compatibility override of Java deprecated method. This 4243 * method will always remain deprecated. Delegates to 4244 * java.lang.Character.isJavaIdentifierStart. 4245 * @param cp the code point 4246 * @return true if the code point can start a java identifier. 4247 * @deprecated ICU 3.4 (Java) 4248 */ 4249 @Deprecated isJavaLetter(int cp)4250 public static boolean isJavaLetter(int cp) { 4251 return isJavaIdentifierStart(cp); 4252 } 4253 4254 /** 4255 * Compatibility override of Java deprecated method. This 4256 * method will always remain deprecated. Delegates to 4257 * java.lang.Character.isJavaIdentifierPart. 4258 * @param cp the code point 4259 * @return true if the code point can continue a java identifier. 4260 * @deprecated ICU 3.4 (Java) 4261 */ 4262 @Deprecated isJavaLetterOrDigit(int cp)4263 public static boolean isJavaLetterOrDigit(int cp) { 4264 return isJavaIdentifierPart(cp); 4265 } 4266 4267 /** 4268 * Compatibility override of Java method, delegates to 4269 * java.lang.Character.isJavaIdentifierStart. 4270 * @param cp the code point 4271 * @return true if the code point can start a java identifier. 4272 * @stable ICU 3.4 4273 */ isJavaIdentifierStart(int cp)4274 public static boolean isJavaIdentifierStart(int cp) { 4275 // note, downcast to char for jdk 1.4 compatibility 4276 return java.lang.Character.isJavaIdentifierStart((char)cp); 4277 } 4278 4279 /** 4280 * Compatibility override of Java method, delegates to 4281 * java.lang.Character.isJavaIdentifierPart. 4282 * @param cp the code point 4283 * @return true if the code point can continue a java identifier. 4284 * @stable ICU 3.4 4285 */ isJavaIdentifierPart(int cp)4286 public static boolean isJavaIdentifierPart(int cp) { 4287 // note, downcast to char for jdk 1.4 compatibility 4288 return java.lang.Character.isJavaIdentifierPart((char)cp); 4289 } 4290 4291 /** 4292 * Determines if the specified code point is a lowercase character. 4293 * UnicodeData only contains case mappings for code points where they are 4294 * one-to-one mappings; it also omits information about context-sensitive 4295 * case mappings.<br> For more information about Unicode case mapping 4296 * please refer to the 4297 * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report 4298 * #21</a>.<br> 4299 * Up-to-date Unicode implementation of java.lang.Character.isLowerCase() 4300 * @param ch code point to determine if it is in lowercase 4301 * @return true if code point is a lowercase character 4302 * @stable ICU 2.1 4303 */ isLowerCase(int ch)4304 public static boolean isLowerCase(int ch) 4305 { 4306 // if props == 0, it will just fall through and return false 4307 return getType(ch) == UCharacterCategory.LOWERCASE_LETTER; 4308 } 4309 4310 /** 4311 * Determines if the specified code point is a white space character. 4312 * A code point is considered to be an whitespace character if and only 4313 * if it satisfies one of the following criteria: 4314 * <ul> 4315 * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not 4316 * also a non-breaking space (\u00A0 or \u2007 or \u202F). 4317 * <li> It is \u0009, HORIZONTAL TABULATION. 4318 * <li> It is \u000A, LINE FEED. 4319 * <li> It is \u000B, VERTICAL TABULATION. 4320 * <li> It is \u000C, FORM FEED. 4321 * <li> It is \u000D, CARRIAGE RETURN. 4322 * <li> It is \u001C, FILE SEPARATOR. 4323 * <li> It is \u001D, GROUP SEPARATOR. 4324 * <li> It is \u001E, RECORD SEPARATOR. 4325 * <li> It is \u001F, UNIT SEPARATOR. 4326 * </ul> 4327 * 4328 * This API tries to sync with the semantics of Java's 4329 * java.lang.Character.isWhitespace(), but it may not return 4330 * the exact same results because of the Unicode version 4331 * difference. 4332 * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs) 4333 * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false. 4334 * See http://www.unicode.org/versions/Unicode4.0.1/ 4335 * @param ch code point to determine if it is a white space 4336 * @return true if the specified code point is a white space character 4337 * @stable ICU 2.1 4338 */ isWhitespace(int ch)4339 public static boolean isWhitespace(int ch) 4340 { 4341 // exclude no-break spaces 4342 // if props == 0, it will just fall through and return false 4343 return ((1 << getType(ch)) & 4344 ((1 << UCharacterCategory.SPACE_SEPARATOR) 4345 | (1 << UCharacterCategory.LINE_SEPARATOR) 4346 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0 4347 && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_) 4348 // TAB VT LF FF CR FS GS RS US NL are all control characters 4349 // that are white spaces. 4350 || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f); 4351 } 4352 4353 /** 4354 * Determines if the specified code point is a Unicode specified space 4355 * character, i.e. if code point is in the category Zs, Zl and Zp. 4356 * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar(). 4357 * @param ch code point to determine if it is a space 4358 * @return true if the specified code point is a space character 4359 * @stable ICU 2.1 4360 */ isSpaceChar(int ch)4361 public static boolean isSpaceChar(int ch) 4362 { 4363 // if props == 0, it will just fall through and return false 4364 return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR) 4365 | (1 << UCharacterCategory.LINE_SEPARATOR) 4366 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) 4367 != 0; 4368 } 4369 4370 /** 4371 * Determines if the specified code point is a titlecase character. 4372 * UnicodeData only contains case mappings for code points where they are 4373 * one-to-one mappings; it also omits information about context-sensitive 4374 * case mappings.<br> 4375 * For more information about Unicode case mapping please refer to the 4376 * <a href=http://www.unicode.org/unicode/reports/tr21/> 4377 * Technical report #21</a>.<br> 4378 * Up-to-date Unicode implementation of java.lang.Character.isTitleCase(). 4379 * @param ch code point to determine if it is in title case 4380 * @return true if the specified code point is a titlecase character 4381 * @stable ICU 2.1 4382 */ isTitleCase(int ch)4383 public static boolean isTitleCase(int ch) 4384 { 4385 // if props == 0, it will just fall through and return false 4386 return getType(ch) == UCharacterCategory.TITLECASE_LETTER; 4387 } 4388 4389 /** 4390 * Determines if the specified code point may be any part of a Unicode 4391 * identifier other than the starting character. 4392 * A code point may be part of a Unicode identifier if and only if it is 4393 * one of the following: 4394 * <ul> 4395 * <li> Lu Uppercase letter 4396 * <li> Ll Lowercase letter 4397 * <li> Lt Titlecase letter 4398 * <li> Lm Modifier letter 4399 * <li> Lo Other letter 4400 * <li> Nl Letter number 4401 * <li> Pc Connecting punctuation character 4402 * <li> Nd decimal number 4403 * <li> Mc Spacing combining mark 4404 * <li> Mn Non-spacing mark 4405 * <li> Cf formatting code 4406 * </ul> 4407 * Up-to-date Unicode implementation of 4408 * java.lang.Character.isUnicodeIdentifierPart().<br> 4409 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 4410 * @param ch code point to determine if is can be part of a Unicode 4411 * identifier 4412 * @return true if code point is any character belonging a unicode 4413 * identifier suffix after the first character 4414 * @stable ICU 2.1 4415 */ isUnicodeIdentifierPart(int ch)4416 public static boolean isUnicodeIdentifierPart(int ch) 4417 { 4418 // if props == 0, it will just fall through and return false 4419 // cat == format 4420 return ((1 << getType(ch)) 4421 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4422 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4423 | (1 << UCharacterCategory.TITLECASE_LETTER) 4424 | (1 << UCharacterCategory.MODIFIER_LETTER) 4425 | (1 << UCharacterCategory.OTHER_LETTER) 4426 | (1 << UCharacterCategory.LETTER_NUMBER) 4427 | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION) 4428 | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER) 4429 | (1 << UCharacterCategory.COMBINING_SPACING_MARK) 4430 | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0 4431 || isIdentifierIgnorable(ch); 4432 } 4433 4434 /** 4435 * Determines if the specified code point is permissible as the first 4436 * character in a Unicode identifier. 4437 * A code point may start a Unicode identifier if it is of type either 4438 * <ul> 4439 * <li> Lu Uppercase letter 4440 * <li> Ll Lowercase letter 4441 * <li> Lt Titlecase letter 4442 * <li> Lm Modifier letter 4443 * <li> Lo Other letter 4444 * <li> Nl Letter number 4445 * </ul> 4446 * Up-to-date Unicode implementation of 4447 * java.lang.Character.isUnicodeIdentifierStart().<br> 4448 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 4449 * @param ch code point to determine if it can start a Unicode identifier 4450 * @return true if code point is the first character belonging a unicode 4451 * identifier 4452 * @stable ICU 2.1 4453 */ isUnicodeIdentifierStart(int ch)4454 public static boolean isUnicodeIdentifierStart(int ch) 4455 { 4456 /*int cat = getType(ch);*/ 4457 // if props == 0, it will just fall through and return false 4458 return ((1 << getType(ch)) 4459 & ((1 << UCharacterCategory.UPPERCASE_LETTER) 4460 | (1 << UCharacterCategory.LOWERCASE_LETTER) 4461 | (1 << UCharacterCategory.TITLECASE_LETTER) 4462 | (1 << UCharacterCategory.MODIFIER_LETTER) 4463 | (1 << UCharacterCategory.OTHER_LETTER) 4464 | (1 << UCharacterCategory.LETTER_NUMBER))) != 0; 4465 } 4466 4467 /** 4468 * Determines if the specified code point should be regarded as an 4469 * ignorable character in a Java identifier. 4470 * A character is Java-identifier-ignorable if it has the general category 4471 * Cf Formatting Control, or it is a non-Java-whitespace ISO control: 4472 * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br> 4473 * Up-to-date Unicode implementation of 4474 * java.lang.Character.isIdentifierIgnorable().<br> 4475 * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>. 4476 * <p>Note that Unicode just recommends to ignore Cf (format controls). 4477 * @param ch code point to be determined if it can be ignored in a Unicode 4478 * identifier. 4479 * @return true if the code point is ignorable 4480 * @stable ICU 2.1 4481 */ isIdentifierIgnorable(int ch)4482 public static boolean isIdentifierIgnorable(int ch) 4483 { 4484 // see java.lang.Character.isIdentifierIgnorable() on range of 4485 // ignorable characters. 4486 if (ch <= 0x9f) { 4487 return isISOControl(ch) 4488 && !((ch >= 0x9 && ch <= 0xd) 4489 || (ch >= 0x1c && ch <= 0x1f)); 4490 } 4491 return getType(ch) == UCharacterCategory.FORMAT; 4492 } 4493 4494 /** 4495 * Determines if the specified code point is an uppercase character. 4496 * UnicodeData only contains case mappings for code point where they are 4497 * one-to-one mappings; it also omits information about context-sensitive 4498 * case mappings.<br> 4499 * For language specific case conversion behavior, use 4500 * toUpperCase(locale, str). <br> 4501 * For example, the case conversion for dot-less i and dotted I in Turkish, 4502 * or for final sigma in Greek. 4503 * For more information about Unicode case mapping please refer to the 4504 * <a href=http://www.unicode.org/unicode/reports/tr21/> 4505 * Technical report #21</a>.<br> 4506 * Up-to-date Unicode implementation of java.lang.Character.isUpperCase(). 4507 * @param ch code point to determine if it is in uppercase 4508 * @return true if the code point is an uppercase character 4509 * @stable ICU 2.1 4510 */ isUpperCase(int ch)4511 public static boolean isUpperCase(int ch) 4512 { 4513 // if props == 0, it will just fall through and return false 4514 return getType(ch) == UCharacterCategory.UPPERCASE_LETTER; 4515 } 4516 4517 /** 4518 * The given code point is mapped to its lowercase equivalent; if the code 4519 * point has no lowercase equivalent, the code point itself is returned. 4520 * Up-to-date Unicode implementation of java.lang.Character.toLowerCase() 4521 * 4522 * <p>This function only returns the simple, single-code point case mapping. 4523 * Full case mappings should be used whenever possible because they produce 4524 * better results by working on whole strings. 4525 * They take into account the string context and the language and can map 4526 * to a result string with a different length as appropriate. 4527 * Full case mappings are applied by the case mapping functions 4528 * that take String parameters rather than code points (int). 4529 * See also the User Guide chapter on C/POSIX migration: 4530 * http://www.icu-project.org/userguide/posix.html#case_mappings 4531 * 4532 * @param ch code point whose lowercase equivalent is to be retrieved 4533 * @return the lowercase equivalent code point 4534 * @stable ICU 2.1 4535 */ toLowerCase(int ch)4536 public static int toLowerCase(int ch) { 4537 return UCaseProps.INSTANCE.tolower(ch); 4538 } 4539 4540 /** 4541 * Converts argument code point and returns a String object representing 4542 * the code point's value in UTF-16 format. 4543 * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones. 4544 * 4545 * <p>Up-to-date Unicode implementation of java.lang.Character.toString(). 4546 * 4547 * @param ch code point 4548 * @return string representation of the code point, null if code point is not 4549 * defined in unicode 4550 * @stable ICU 2.1 4551 */ toString(int ch)4552 public static String toString(int ch) 4553 { 4554 if (ch < MIN_VALUE || ch > MAX_VALUE) { 4555 return null; 4556 } 4557 4558 if (ch < SUPPLEMENTARY_MIN_VALUE) { 4559 return String.valueOf((char)ch); 4560 } 4561 4562 return new String(Character.toChars(ch)); 4563 } 4564 4565 /** 4566 * Converts the code point argument to titlecase. 4567 * If no titlecase is available, the uppercase is returned. If no uppercase 4568 * is available, the code point itself is returned. 4569 * Up-to-date Unicode implementation of java.lang.Character.toTitleCase() 4570 * 4571 * <p>This function only returns the simple, single-code point case mapping. 4572 * Full case mappings should be used whenever possible because they produce 4573 * better results by working on whole strings. 4574 * They take into account the string context and the language and can map 4575 * to a result string with a different length as appropriate. 4576 * Full case mappings are applied by the case mapping functions 4577 * that take String parameters rather than code points (int). 4578 * See also the User Guide chapter on C/POSIX migration: 4579 * http://www.icu-project.org/userguide/posix.html#case_mappings 4580 * 4581 * @param ch code point whose title case is to be retrieved 4582 * @return titlecase code point 4583 * @stable ICU 2.1 4584 */ toTitleCase(int ch)4585 public static int toTitleCase(int ch) { 4586 return UCaseProps.INSTANCE.totitle(ch); 4587 } 4588 4589 /** 4590 * Converts the character argument to uppercase. 4591 * If no uppercase is available, the character itself is returned. 4592 * Up-to-date Unicode implementation of java.lang.Character.toUpperCase() 4593 * 4594 * <p>This function only returns the simple, single-code point case mapping. 4595 * Full case mappings should be used whenever possible because they produce 4596 * better results by working on whole strings. 4597 * They take into account the string context and the language and can map 4598 * to a result string with a different length as appropriate. 4599 * Full case mappings are applied by the case mapping functions 4600 * that take String parameters rather than code points (int). 4601 * See also the User Guide chapter on C/POSIX migration: 4602 * http://www.icu-project.org/userguide/posix.html#case_mappings 4603 * 4604 * @param ch code point whose uppercase is to be retrieved 4605 * @return uppercase code point 4606 * @stable ICU 2.1 4607 */ toUpperCase(int ch)4608 public static int toUpperCase(int ch) { 4609 return UCaseProps.INSTANCE.toupper(ch); 4610 } 4611 4612 // extra methods not in java.lang.Character -------------------------- 4613 4614 /** 4615 * {@icu} Determines if the code point is a supplementary character. 4616 * A code point is a supplementary character if and only if it is greater 4617 * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a> 4618 * @param ch code point to be determined if it is in the supplementary 4619 * plane 4620 * @return true if code point is a supplementary character 4621 * @stable ICU 2.1 4622 */ isSupplementary(int ch)4623 public static boolean isSupplementary(int ch) 4624 { 4625 return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE && 4626 ch <= UCharacter.MAX_VALUE; 4627 } 4628 4629 /** 4630 * {@icu} Determines if the code point is in the BMP plane. 4631 * @param ch code point to be determined if it is not a supplementary 4632 * character 4633 * @return true if code point is not a supplementary character 4634 * @stable ICU 2.1 4635 */ isBMP(int ch)4636 public static boolean isBMP(int ch) 4637 { 4638 return (ch >= 0 && ch <= LAST_CHAR_MASK_); 4639 } 4640 4641 /** 4642 * {@icu} Determines whether the specified code point is a printable character 4643 * according to the Unicode standard. 4644 * @param ch code point to be determined if it is printable 4645 * @return true if the code point is a printable character 4646 * @stable ICU 2.1 4647 */ isPrintable(int ch)4648 public static boolean isPrintable(int ch) 4649 { 4650 int cat = getType(ch); 4651 // if props == 0, it will just fall through and return false 4652 return (cat != UCharacterCategory.UNASSIGNED && 4653 cat != UCharacterCategory.CONTROL && 4654 cat != UCharacterCategory.FORMAT && 4655 cat != UCharacterCategory.PRIVATE_USE && 4656 cat != UCharacterCategory.SURROGATE && 4657 cat != UCharacterCategory.GENERAL_OTHER_TYPES); 4658 } 4659 4660 /** 4661 * {@icu} Determines whether the specified code point is of base form. 4662 * A code point of base form does not graphically combine with preceding 4663 * characters, and is neither a control nor a format character. 4664 * @param ch code point to be determined if it is of base form 4665 * @return true if the code point is of base form 4666 * @stable ICU 2.1 4667 */ isBaseForm(int ch)4668 public static boolean isBaseForm(int ch) 4669 { 4670 int cat = getType(ch); 4671 // if props == 0, it will just fall through and return false 4672 return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER || 4673 cat == UCharacterCategory.OTHER_NUMBER || 4674 cat == UCharacterCategory.LETTER_NUMBER || 4675 cat == UCharacterCategory.UPPERCASE_LETTER || 4676 cat == UCharacterCategory.LOWERCASE_LETTER || 4677 cat == UCharacterCategory.TITLECASE_LETTER || 4678 cat == UCharacterCategory.MODIFIER_LETTER || 4679 cat == UCharacterCategory.OTHER_LETTER || 4680 cat == UCharacterCategory.NON_SPACING_MARK || 4681 cat == UCharacterCategory.ENCLOSING_MARK || 4682 cat == UCharacterCategory.COMBINING_SPACING_MARK; 4683 } 4684 4685 /** 4686 * {@icu} Returns the Bidirection property of a code point. 4687 * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional 4688 * property.<br> 4689 * Result returned belongs to the interface 4690 * <a href=UCharacterDirection.html>UCharacterDirection</a> 4691 * @param ch the code point to be determined its direction 4692 * @return direction constant from UCharacterDirection. 4693 * @stable ICU 2.1 4694 */ getDirection(int ch)4695 public static int getDirection(int ch) 4696 { 4697 return UBiDiProps.INSTANCE.getClass(ch); 4698 } 4699 4700 /** 4701 * Determines whether the code point has the "mirrored" property. 4702 * This property is set for characters that are commonly used in 4703 * Right-To-Left contexts and need to be displayed with a "mirrored" 4704 * glyph. 4705 * @param ch code point whose mirror is to be determined 4706 * @return true if the code point has the "mirrored" property 4707 * @stable ICU 2.1 4708 */ isMirrored(int ch)4709 public static boolean isMirrored(int ch) 4710 { 4711 return UBiDiProps.INSTANCE.isMirrored(ch); 4712 } 4713 4714 /** 4715 * {@icu} Maps the specified code point to a "mirror-image" code point. 4716 * For code points with the "mirrored" property, implementations sometimes 4717 * need a "poor man's" mapping to another code point such that the default 4718 * glyph may serve as the mirror-image of the default glyph of the 4719 * specified code point.<br> 4720 * This is useful for text conversion to and from codepages with visual 4721 * order, and for displays without glyph selection capabilities. 4722 * @param ch code point whose mirror is to be retrieved 4723 * @return another code point that may serve as a mirror-image substitute, 4724 * or ch itself if there is no such mapping or ch does not have the 4725 * "mirrored" property 4726 * @stable ICU 2.1 4727 */ getMirror(int ch)4728 public static int getMirror(int ch) 4729 { 4730 return UBiDiProps.INSTANCE.getMirror(ch); 4731 } 4732 4733 /** 4734 * {@icu} Maps the specified character to its paired bracket character. 4735 * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int). 4736 * Otherwise c itself is returned. 4737 * See http://www.unicode.org/reports/tr9/ 4738 * 4739 * @param c the code point to be mapped 4740 * @return the paired bracket code point, 4741 * or c itself if there is no such mapping 4742 * (Bidi_Paired_Bracket_Type=None) 4743 * 4744 * @see UProperty#BIDI_PAIRED_BRACKET 4745 * @see UProperty#BIDI_PAIRED_BRACKET_TYPE 4746 * @see #getMirror(int) 4747 * @stable ICU 52 4748 */ getBidiPairedBracket(int c)4749 public static int getBidiPairedBracket(int c) { 4750 return UBiDiProps.INSTANCE.getPairedBracket(c); 4751 } 4752 4753 /** 4754 * {@icu} Returns the combining class of the argument codepoint 4755 * @param ch code point whose combining is to be retrieved 4756 * @return the combining class of the codepoint 4757 * @stable ICU 2.1 4758 */ getCombiningClass(int ch)4759 public static int getCombiningClass(int ch) 4760 { 4761 return Normalizer2.getNFDInstance().getCombiningClass(ch); 4762 } 4763 4764 /** 4765 * {@icu} A code point is illegal if and only if 4766 * <ul> 4767 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 4768 * <li> A surrogate value, 0xD800 to 0xDFFF 4769 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 4770 * </ul> 4771 * Note: legal does not mean that it is assigned in this version of Unicode. 4772 * @param ch code point to determine if it is a legal code point by itself 4773 * @return true if and only if legal. 4774 * @stable ICU 2.1 4775 */ isLegal(int ch)4776 public static boolean isLegal(int ch) 4777 { 4778 if (ch < MIN_VALUE) { 4779 return false; 4780 } 4781 if (ch < Character.MIN_SURROGATE) { 4782 return true; 4783 } 4784 if (ch <= Character.MAX_SURROGATE) { 4785 return false; 4786 } 4787 if (UCharacterUtility.isNonCharacter(ch)) { 4788 return false; 4789 } 4790 return (ch <= MAX_VALUE); 4791 } 4792 4793 /** 4794 * {@icu} A string is legal iff all its code points are legal. 4795 * A code point is illegal if and only if 4796 * <ul> 4797 * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE 4798 * <li> A surrogate value, 0xD800 to 0xDFFF 4799 * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE 4800 * </ul> 4801 * Note: legal does not mean that it is assigned in this version of Unicode. 4802 * @param str containing code points to examin 4803 * @return true if and only if legal. 4804 * @stable ICU 2.1 4805 */ isLegal(String str)4806 public static boolean isLegal(String str) 4807 { 4808 int size = str.length(); 4809 int codepoint; 4810 for (int i = 0; i < size; i += Character.charCount(codepoint)) 4811 { 4812 codepoint = str.codePointAt(i); 4813 if (!isLegal(codepoint)) { 4814 return false; 4815 } 4816 } 4817 return true; 4818 } 4819 4820 /** 4821 * {@icu} Returns the version of Unicode data used. 4822 * @return the unicode version number used 4823 * @stable ICU 2.1 4824 */ getUnicodeVersion()4825 public static VersionInfo getUnicodeVersion() 4826 { 4827 return UCharacterProperty.INSTANCE.m_unicodeVersion_; 4828 } 4829 4830 /** 4831 * {@icu} Returns the most current Unicode name of the argument code point, or 4832 * null if the character is unassigned or outside the range 4833 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 4834 * <br> 4835 * Note calling any methods related to code point names, e.g. get*Name*() 4836 * incurs a one-time initialisation cost to construct the name tables. 4837 * @param ch the code point for which to get the name 4838 * @return most current Unicode name 4839 * @stable ICU 2.1 4840 */ getName(int ch)4841 public static String getName(int ch) 4842 { 4843 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME); 4844 } 4845 4846 /** 4847 * {@icu} Returns the names for each of the characters in a string 4848 * @param s string to format 4849 * @param separator string to go between names 4850 * @return string of names 4851 * @stable ICU 3.8 4852 */ getName(String s, String separator)4853 public static String getName(String s, String separator) { 4854 if (s.length() == 1) { // handle common case 4855 return getName(s.charAt(0)); 4856 } 4857 int cp; 4858 StringBuilder sb = new StringBuilder(); 4859 for (int i = 0; i < s.length(); i += Character.charCount(cp)) { 4860 cp = s.codePointAt(i); 4861 if (i != 0) sb.append(separator); 4862 sb.append(UCharacter.getName(cp)); 4863 } 4864 return sb.toString(); 4865 } 4866 4867 /** 4868 * {@icu} Returns null. 4869 * Used to return the Unicode_1_Name property value which was of little practical value. 4870 * @param ch the code point for which to get the name 4871 * @return null 4872 * @deprecated ICU 49 4873 */ 4874 @Deprecated getName1_0(int ch)4875 public static String getName1_0(int ch) 4876 { 4877 return null; 4878 } 4879 4880 /** 4881 * {@icu} Returns a name for a valid codepoint. Unlike, getName(int) and 4882 * getName1_0(int), this method will return a name even for codepoints that 4883 * are not assigned a name in UnicodeData.txt. 4884 * 4885 * <p>The names are returned in the following order. 4886 * <ul> 4887 * <li> Most current Unicode name if there is any 4888 * <li> Unicode 1.0 name if there is any 4889 * <li> Extended name in the form of 4890 * "<codepoint_type-codepoint_hex_digits>". E.g., <noncharacter-fffe> 4891 * </ul> 4892 * Note calling any methods related to code point names, e.g. get*Name*() 4893 * incurs a one-time initialisation cost to construct the name tables. 4894 * @param ch the code point for which to get the name 4895 * @return a name for the argument codepoint 4896 * @stable ICU 2.6 4897 */ getExtendedName(int ch)4898 public static String getExtendedName(int ch) { 4899 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME); 4900 } 4901 4902 /** 4903 * {@icu} Returns the corrected name from NameAliases.txt if there is one. 4904 * Returns null if the character is unassigned or outside the range 4905 * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name. 4906 * <br> 4907 * Note calling any methods related to code point names, e.g. get*Name*() 4908 * incurs a one-time initialisation cost to construct the name tables. 4909 * @param ch the code point for which to get the name alias 4910 * @return Unicode name alias, or null 4911 * @stable ICU 4.4 4912 */ getNameAlias(int ch)4913 public static String getNameAlias(int ch) 4914 { 4915 return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS); 4916 } 4917 4918 /** 4919 * {@icu} Returns null. 4920 * Used to return the ISO 10646 comment for a character. 4921 * The Unicode ISO_Comment property is deprecated and has no values. 4922 * 4923 * @param ch The code point for which to get the ISO comment. 4924 * It must be the case that {@code 0 <= ch <= 0x10ffff}. 4925 * @return null 4926 * @deprecated ICU 49 4927 */ 4928 @Deprecated getISOComment(int ch)4929 public static String getISOComment(int ch) 4930 { 4931 return null; 4932 } 4933 4934 /** 4935 * {@icu} <p>Finds a Unicode code point by its most current Unicode name and 4936 * return its code point value. All Unicode names are in uppercase. 4937 * Note calling any methods related to code point names, e.g. get*Name*() 4938 * incurs a one-time initialisation cost to construct the name tables. 4939 * @param name most current Unicode character name whose code point is to 4940 * be returned 4941 * @return code point or -1 if name is not found 4942 * @stable ICU 2.1 4943 */ getCharFromName(String name)4944 public static int getCharFromName(String name){ 4945 return UCharacterName.INSTANCE.getCharFromName( 4946 UCharacterNameChoice.UNICODE_CHAR_NAME, name); 4947 } 4948 4949 /** 4950 * {@icu} Returns -1. 4951 * <p>Used to find a Unicode character by its version 1.0 Unicode name and return 4952 * its code point value. 4953 * @param name Unicode 1.0 code point name whose code point is to be 4954 * returned 4955 * @return -1 4956 * @deprecated ICU 49 4957 * @see #getName1_0(int) 4958 */ 4959 @Deprecated getCharFromName1_0(String name)4960 public static int getCharFromName1_0(String name){ 4961 return -1; 4962 } 4963 4964 /** 4965 * {@icu} <p>Find a Unicode character by either its name and return its code 4966 * point value. All Unicode names are in uppercase. 4967 * Extended names are all lowercase except for numbers and are contained 4968 * within angle brackets. 4969 * The names are searched in the following order 4970 * <ul> 4971 * <li> Most current Unicode name if there is any 4972 * <li> Unicode 1.0 name if there is any 4973 * <li> Extended name in the form of 4974 * "<codepoint_type-codepoint_hex_digits>". E.g. <noncharacter-FFFE> 4975 * </ul> 4976 * Note calling any methods related to code point names, e.g. get*Name*() 4977 * incurs a one-time initialisation cost to construct the name tables. 4978 * @param name codepoint name 4979 * @return code point associated with the name or -1 if the name is not 4980 * found. 4981 * @stable ICU 2.6 4982 */ getCharFromExtendedName(String name)4983 public static int getCharFromExtendedName(String name){ 4984 return UCharacterName.INSTANCE.getCharFromName( 4985 UCharacterNameChoice.EXTENDED_CHAR_NAME, name); 4986 } 4987 4988 /** 4989 * {@icu} <p>Find a Unicode character by its corrected name alias and return 4990 * its code point value. All Unicode names are in uppercase. 4991 * Note calling any methods related to code point names, e.g. get*Name*() 4992 * incurs a one-time initialisation cost to construct the name tables. 4993 * @param name Unicode name alias whose code point is to be returned 4994 * @return code point or -1 if name is not found 4995 * @stable ICU 4.4 4996 */ getCharFromNameAlias(String name)4997 public static int getCharFromNameAlias(String name){ 4998 return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name); 4999 } 5000 5001 /** 5002 * {@icu} Return the Unicode name for a given property, as given in the 5003 * Unicode database file PropertyAliases.txt. Most properties 5004 * have more than one name. The nameChoice determines which one 5005 * is returned. 5006 * 5007 * In addition, this function maps the property 5008 * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" / 5009 * "General_Category_Mask". These names are not in 5010 * PropertyAliases.txt. 5011 * 5012 * @param property UProperty selector. 5013 * 5014 * @param nameChoice UProperty.NameChoice selector for which name 5015 * to get. All properties have a long name. Most have a short 5016 * name, but some do not. Unicode allows for additional names; if 5017 * present these will be returned by UProperty.NameChoice.LONG + i, 5018 * where i=1, 2,... 5019 * 5020 * @return a name, or null if Unicode explicitly defines no name 5021 * ("n/a") for a given property/nameChoice. If a given nameChoice 5022 * throws an exception, then all larger values of nameChoice will 5023 * throw an exception. If null is returned for a given 5024 * nameChoice, then other nameChoice values may return non-null 5025 * results. 5026 * 5027 * @exception IllegalArgumentException thrown if property or 5028 * nameChoice are invalid. 5029 * 5030 * @see UProperty 5031 * @see UProperty.NameChoice 5032 * @stable ICU 2.4 5033 */ getPropertyName(int property, int nameChoice)5034 public static String getPropertyName(int property, 5035 int nameChoice) { 5036 return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice); 5037 } 5038 5039 /** 5040 * {@icu} Return the UProperty selector for a given property name, as 5041 * specified in the Unicode database file PropertyAliases.txt. 5042 * Short, long, and any other variants are recognized. 5043 * 5044 * In addition, this function maps the synthetic names "gcm" / 5045 * "General_Category_Mask" to the property 5046 * UProperty.GENERAL_CATEGORY_MASK. These names are not in 5047 * PropertyAliases.txt. 5048 * 5049 * @param propertyAlias the property name to be matched. The name 5050 * is compared using "loose matching" as described in 5051 * PropertyAliases.txt. 5052 * 5053 * @return a UProperty enum. 5054 * 5055 * @exception IllegalArgumentException thrown if propertyAlias 5056 * is not recognized. 5057 * 5058 * @see UProperty 5059 * @stable ICU 2.4 5060 */ getPropertyEnum(CharSequence propertyAlias)5061 public static int getPropertyEnum(CharSequence propertyAlias) { 5062 int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias); 5063 if (propEnum == UProperty.UNDEFINED) { 5064 throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias); 5065 } 5066 return propEnum; 5067 } 5068 5069 /** 5070 * {@icu} Return the Unicode name for a given property value, as given in 5071 * the Unicode database file PropertyValueAliases.txt. Most 5072 * values have more than one name. The nameChoice determines 5073 * which one is returned. 5074 * 5075 * Note: Some of the names in PropertyValueAliases.txt can only be 5076 * retrieved using UProperty.GENERAL_CATEGORY_MASK, not 5077 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 5078 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 5079 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 5080 * 5081 * @param property UProperty selector constant. 5082 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 5083 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 5084 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 5085 * If out of range, null is returned. 5086 * 5087 * @param value selector for a value for the given property. In 5088 * general, valid values range from 0 up to some maximum. There 5089 * are a few exceptions: (1.) UProperty.BLOCK values begin at the 5090 * non-zero value BASIC_LATIN.getID(). (2.) 5091 * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous 5092 * and range from 0..240. (3.) UProperty.GENERAL_CATEGORY_MASK values 5093 * are mask values produced by left-shifting 1 by 5094 * UCharacter.getType(). This allows grouped categories such as 5095 * [:L:] to be represented. Mask values are non-contiguous. 5096 * 5097 * @param nameChoice UProperty.NameChoice selector for which name 5098 * to get. All values have a long name. Most have a short name, 5099 * but some do not. Unicode allows for additional names; if 5100 * present these will be returned by UProperty.NameChoice.LONG + i, 5101 * where i=1, 2,... 5102 * 5103 * @return a name, or null if Unicode explicitly defines no name 5104 * ("n/a") for a given property/value/nameChoice. If a given 5105 * nameChoice throws an exception, then all larger values of 5106 * nameChoice will throw an exception. If null is returned for a 5107 * given nameChoice, then other nameChoice values may return 5108 * non-null results. 5109 * 5110 * @exception IllegalArgumentException thrown if property, value, 5111 * or nameChoice are invalid. 5112 * 5113 * @see UProperty 5114 * @see UProperty.NameChoice 5115 * @stable ICU 2.4 5116 */ getPropertyValueName(int property, int value, int nameChoice)5117 public static String getPropertyValueName(int property, 5118 int value, 5119 int nameChoice) 5120 { 5121 if ((property == UProperty.CANONICAL_COMBINING_CLASS 5122 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS 5123 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS) 5124 && value >= UCharacter.getIntPropertyMinValue( 5125 UProperty.CANONICAL_COMBINING_CLASS) 5126 && value <= UCharacter.getIntPropertyMaxValue( 5127 UProperty.CANONICAL_COMBINING_CLASS) 5128 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) { 5129 // this is hard coded for the valid cc 5130 // because PropertyValueAliases.txt does not contain all of them 5131 try { 5132 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, 5133 nameChoice); 5134 } 5135 catch (IllegalArgumentException e) { 5136 return null; 5137 } 5138 } 5139 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice); 5140 } 5141 5142 /** 5143 * {@icu} Return the property value integer for a given value name, as 5144 * specified in the Unicode database file PropertyValueAliases.txt. 5145 * Short, long, and any other variants are recognized. 5146 * 5147 * Note: Some of the names in PropertyValueAliases.txt will only be 5148 * recognized with UProperty.GENERAL_CATEGORY_MASK, not 5149 * UProperty.GENERAL_CATEGORY. These include: "C" / "Other", "L" / 5150 * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P" 5151 * / "Punctuation", "S" / "Symbol", and "Z" / "Separator". 5152 * 5153 * @param property UProperty selector constant. 5154 * UProperty.INT_START <= property < UProperty.INT_LIMIT or 5155 * UProperty.BINARY_START <= property < UProperty.BINARY_LIMIT or 5156 * UProperty.MASK_START < = property < UProperty.MASK_LIMIT. 5157 * Only these properties can be enumerated. 5158 * 5159 * @param valueAlias the value name to be matched. The name is 5160 * compared using "loose matching" as described in 5161 * PropertyValueAliases.txt. 5162 * 5163 * @return a value integer. Note: UProperty.GENERAL_CATEGORY 5164 * values are mask values produced by left-shifting 1 by 5165 * UCharacter.getType(). This allows grouped categories such as 5166 * [:L:] to be represented. 5167 * 5168 * @see UProperty 5169 * @throws IllegalArgumentException if property is not a valid UProperty 5170 * selector or valueAlias is not a value of this property 5171 * @stable ICU 2.4 5172 */ getPropertyValueEnum(int property, CharSequence valueAlias)5173 public static int getPropertyValueEnum(int property, CharSequence valueAlias) { 5174 int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias); 5175 if (propEnum == UProperty.UNDEFINED) { 5176 throw new IllegalIcuArgumentException("Invalid name: " + valueAlias); 5177 } 5178 return propEnum; 5179 } 5180 5181 /** 5182 * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED. 5183 * @param property Same as {@link #getPropertyValueEnum(int, CharSequence)} 5184 * @param valueAlias Same as {@link #getPropertyValueEnum(int, CharSequence)} 5185 * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value. 5186 * @internal 5187 * @deprecated This API is ICU internal only. 5188 */ 5189 @Deprecated getPropertyValueEnumNoThrow(int property, CharSequence valueAlias)5190 public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) { 5191 return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias); 5192 } 5193 5194 5195 /** 5196 * {@icu} Returns a code point corresponding to the two surrogate code units. 5197 * 5198 * @param lead the lead char 5199 * @param trail the trail char 5200 * @return code point if surrogate characters are valid. 5201 * @exception IllegalArgumentException thrown when the code units do 5202 * not form a valid code point 5203 * @stable ICU 2.1 5204 */ getCodePoint(char lead, char trail)5205 public static int getCodePoint(char lead, char trail) 5206 { 5207 if (Character.isSurrogatePair(lead, trail)) { 5208 return Character.toCodePoint(lead, trail); 5209 } 5210 throw new IllegalArgumentException("Illegal surrogate characters"); 5211 } 5212 5213 /** 5214 * {@icu} Returns the code point corresponding to the BMP code point. 5215 * 5216 * @param char16 the BMP code point 5217 * @return code point if argument is a valid character. 5218 * @exception IllegalArgumentException thrown when char16 is not a valid 5219 * code point 5220 * @stable ICU 2.1 5221 */ getCodePoint(char char16)5222 public static int getCodePoint(char char16) 5223 { 5224 if (UCharacter.isLegal(char16)) { 5225 return char16; 5226 } 5227 throw new IllegalArgumentException("Illegal codepoint"); 5228 } 5229 5230 /** 5231 * Returns the uppercase version of the argument string. 5232 * Casing is dependent on the default locale and context-sensitive. 5233 * @param str source string to be performed on 5234 * @return uppercase version of the argument string 5235 * @stable ICU 2.1 5236 */ toUpperCase(String str)5237 public static String toUpperCase(String str) 5238 { 5239 return CaseMapImpl.toUpper(getDefaultCaseLocale(), 0, str); 5240 } 5241 5242 /** 5243 * Returns the lowercase version of the argument string. 5244 * Casing is dependent on the default locale and context-sensitive 5245 * @param str source string to be performed on 5246 * @return lowercase version of the argument string 5247 * @stable ICU 2.1 5248 */ toLowerCase(String str)5249 public static String toLowerCase(String str) 5250 { 5251 return CaseMapImpl.toLower(getDefaultCaseLocale(), 0, str); 5252 } 5253 5254 /** 5255 * <p>Returns the titlecase version of the argument string. 5256 * <p>Position for titlecasing is determined by the argument break 5257 * iterator, hence the user can customize his break iterator for 5258 * a specialized titlecasing. In this case only the forward iteration 5259 * needs to be implemented. 5260 * If the break iterator passed in is null, the default Unicode algorithm 5261 * will be used to determine the titlecase positions. 5262 * 5263 * <p>Only positions returned by the break iterator will be title cased, 5264 * character in between the positions will all be in lower case. 5265 * <p>Casing is dependent on the default locale and context-sensitive 5266 * @param str source string to be performed on 5267 * @param breakiter break iterator to determine the positions in which 5268 * the character should be title cased. 5269 * @return titlecase version of the argument string 5270 * @stable ICU 2.6 5271 */ toTitleCase(String str, BreakIterator breakiter)5272 public static String toTitleCase(String str, BreakIterator breakiter) 5273 { 5274 return toTitleCase(Locale.getDefault(), str, breakiter, 0); 5275 } 5276 getDefaultCaseLocale()5277 private static int getDefaultCaseLocale() { 5278 return UCaseProps.getCaseLocale(Locale.getDefault()); 5279 } 5280 getCaseLocale(Locale locale)5281 private static int getCaseLocale(Locale locale) { 5282 if (locale == null) { 5283 locale = Locale.getDefault(); 5284 } 5285 return UCaseProps.getCaseLocale(locale); 5286 } 5287 getCaseLocale(ULocale locale)5288 private static int getCaseLocale(ULocale locale) { 5289 if (locale == null) { 5290 locale = ULocale.getDefault(); 5291 } 5292 return UCaseProps.getCaseLocale(locale); 5293 } 5294 5295 /** 5296 * Returns the uppercase version of the argument string. 5297 * Casing is dependent on the argument locale and context-sensitive. 5298 * @param locale which string is to be converted in 5299 * @param str source string to be performed on 5300 * @return uppercase version of the argument string 5301 * @stable ICU 2.1 5302 */ toUpperCase(Locale locale, String str)5303 public static String toUpperCase(Locale locale, String str) 5304 { 5305 return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str); 5306 } 5307 5308 /** 5309 * Returns the uppercase version of the argument string. 5310 * Casing is dependent on the argument locale and context-sensitive. 5311 * @param locale which string is to be converted in 5312 * @param str source string to be performed on 5313 * @return uppercase version of the argument string 5314 * @stable ICU 3.2 5315 */ toUpperCase(ULocale locale, String str)5316 public static String toUpperCase(ULocale locale, String str) { 5317 return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str); 5318 } 5319 5320 /** 5321 * Returns the lowercase version of the argument string. 5322 * Casing is dependent on the argument locale and context-sensitive 5323 * @param locale which string is to be converted in 5324 * @param str source string to be performed on 5325 * @return lowercase version of the argument string 5326 * @stable ICU 2.1 5327 */ toLowerCase(Locale locale, String str)5328 public static String toLowerCase(Locale locale, String str) 5329 { 5330 return CaseMapImpl.toLower(getCaseLocale(locale), 0, str); 5331 } 5332 5333 /** 5334 * Returns the lowercase version of the argument string. 5335 * Casing is dependent on the argument locale and context-sensitive 5336 * @param locale which string is to be converted in 5337 * @param str source string to be performed on 5338 * @return lowercase version of the argument string 5339 * @stable ICU 3.2 5340 */ toLowerCase(ULocale locale, String str)5341 public static String toLowerCase(ULocale locale, String str) { 5342 return CaseMapImpl.toLower(getCaseLocale(locale), 0, str); 5343 } 5344 5345 /** 5346 * <p>Returns the titlecase version of the argument string. 5347 * <p>Position for titlecasing is determined by the argument break 5348 * iterator, hence the user can customize his break iterator for 5349 * a specialized titlecasing. In this case only the forward iteration 5350 * needs to be implemented. 5351 * If the break iterator passed in is null, the default Unicode algorithm 5352 * will be used to determine the titlecase positions. 5353 * 5354 * <p>Only positions returned by the break iterator will be title cased, 5355 * character in between the positions will all be in lower case. 5356 * <p>Casing is dependent on the argument locale and context-sensitive 5357 * @param locale which string is to be converted in 5358 * @param str source string to be performed on 5359 * @param breakiter break iterator to determine the positions in which 5360 * the character should be title cased. 5361 * @return titlecase version of the argument string 5362 * @stable ICU 2.6 5363 */ toTitleCase(Locale locale, String str, BreakIterator breakiter)5364 public static String toTitleCase(Locale locale, String str, 5365 BreakIterator breakiter) 5366 { 5367 return toTitleCase(locale, str, breakiter, 0); 5368 } 5369 5370 /** 5371 * <p>Returns the titlecase version of the argument string. 5372 * <p>Position for titlecasing is determined by the argument break 5373 * iterator, hence the user can customize his break iterator for 5374 * a specialized titlecasing. In this case only the forward iteration 5375 * needs to be implemented. 5376 * If the break iterator passed in is null, the default Unicode algorithm 5377 * will be used to determine the titlecase positions. 5378 * 5379 * <p>Only positions returned by the break iterator will be title cased, 5380 * character in between the positions will all be in lower case. 5381 * <p>Casing is dependent on the argument locale and context-sensitive 5382 * @param locale which string is to be converted in 5383 * @param str source string to be performed on 5384 * @param titleIter break iterator to determine the positions in which 5385 * the character should be title cased. 5386 * @return titlecase version of the argument string 5387 * @stable ICU 3.2 5388 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter)5389 public static String toTitleCase(ULocale locale, String str, 5390 BreakIterator titleIter) { 5391 return toTitleCase(locale, str, titleIter, 0); 5392 } 5393 5394 /** 5395 * <p>Returns the titlecase version of the argument string. 5396 * <p>Position for titlecasing is determined by the argument break 5397 * iterator, hence the user can customize his break iterator for 5398 * a specialized titlecasing. In this case only the forward iteration 5399 * needs to be implemented. 5400 * If the break iterator passed in is null, the default Unicode algorithm 5401 * will be used to determine the titlecase positions. 5402 * 5403 * <p>Only positions returned by the break iterator will be title cased, 5404 * character in between the positions will all be in lower case. 5405 * <p>Casing is dependent on the argument locale and context-sensitive 5406 * @param locale which string is to be converted in 5407 * @param str source string to be performed on 5408 * @param titleIter break iterator to determine the positions in which 5409 * the character should be title cased. 5410 * @param options bit set to modify the titlecasing operation 5411 * @return titlecase version of the argument string 5412 * @stable ICU 3.8 5413 * @see #TITLECASE_NO_LOWERCASE 5414 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 5415 */ toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)5416 public static String toTitleCase(ULocale locale, String str, 5417 BreakIterator titleIter, int options) { 5418 if (titleIter == null && locale == null) { 5419 locale = ULocale.getDefault(); 5420 } 5421 titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter); 5422 titleIter.setText(str); 5423 return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str); 5424 } 5425 5426 /** 5427 * {@icu} <p>Returns the titlecase version of the argument string. 5428 * <p>Position for titlecasing is determined by the argument break 5429 * iterator, hence the user can customize his break iterator for 5430 * a specialized titlecasing. In this case only the forward iteration 5431 * needs to be implemented. 5432 * If the break iterator passed in is null, the default Unicode algorithm 5433 * will be used to determine the titlecase positions. 5434 * 5435 * <p>Only positions returned by the break iterator will be title cased, 5436 * character in between the positions will all be in lower case. 5437 * <p>Casing is dependent on the argument locale and context-sensitive 5438 * @param locale which string is to be converted in 5439 * @param str source string to be performed on 5440 * @param titleIter break iterator to determine the positions in which 5441 * the character should be title cased. 5442 * @param options bit set to modify the titlecasing operation 5443 * @return titlecase version of the argument string 5444 * @see #TITLECASE_NO_LOWERCASE 5445 * @see #TITLECASE_NO_BREAK_ADJUSTMENT 5446 * @stable ICU 54 5447 */ toTitleCase(Locale locale, String str, BreakIterator titleIter, int options)5448 public static String toTitleCase(Locale locale, String str, 5449 BreakIterator titleIter, 5450 int options) { 5451 if (titleIter == null && locale == null) { 5452 locale = Locale.getDefault(); 5453 } 5454 titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter); 5455 titleIter.setText(str); 5456 return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str); 5457 } 5458 5459 /** 5460 * {@icu} The given character is mapped to its case folding equivalent according 5461 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 5462 * folding equivalent, the character itself is returned. 5463 * 5464 * <p>This function only returns the simple, single-code point case mapping. 5465 * Full case mappings should be used whenever possible because they produce 5466 * better results by working on whole strings. 5467 * They can map to a result string with a different length as appropriate. 5468 * Full case mappings are applied by the case mapping functions 5469 * that take String parameters rather than code points (int). 5470 * See also the User Guide chapter on C/POSIX migration: 5471 * http://www.icu-project.org/userguide/posix.html#case_mappings 5472 * 5473 * @param ch the character to be converted 5474 * @param defaultmapping Indicates whether the default mappings defined in 5475 * CaseFolding.txt are to be used, otherwise the 5476 * mappings for dotted I and dotless i marked with 5477 * 'T' in CaseFolding.txt are included. 5478 * @return the case folding equivalent of the character, if 5479 * any; otherwise the character itself. 5480 * @see #foldCase(String, boolean) 5481 * @stable ICU 2.1 5482 */ foldCase(int ch, boolean defaultmapping)5483 public static int foldCase(int ch, boolean defaultmapping) { 5484 return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 5485 } 5486 5487 /** 5488 * {@icu} The given string is mapped to its case folding equivalent according to 5489 * UnicodeData.txt and CaseFolding.txt; if any character has no case 5490 * folding equivalent, the character itself is returned. 5491 * "Full", multiple-code point case folding mappings are returned here. 5492 * For "simple" single-code point mappings use the API 5493 * foldCase(int ch, boolean defaultmapping). 5494 * @param str the String to be converted 5495 * @param defaultmapping Indicates whether the default mappings defined in 5496 * CaseFolding.txt are to be used, otherwise the 5497 * mappings for dotted I and dotless i marked with 5498 * 'T' in CaseFolding.txt are included. 5499 * @return the case folding equivalent of the character, if 5500 * any; otherwise the character itself. 5501 * @see #foldCase(int, boolean) 5502 * @stable ICU 2.1 5503 */ foldCase(String str, boolean defaultmapping)5504 public static String foldCase(String str, boolean defaultmapping) { 5505 return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I); 5506 } 5507 5508 /** 5509 * {@icu} Option value for case folding: use default mappings defined in 5510 * CaseFolding.txt. 5511 * @stable ICU 2.6 5512 */ 5513 public static final int FOLD_CASE_DEFAULT = 0x0000; 5514 /** 5515 * {@icu} Option value for case folding: 5516 * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I 5517 * and dotless i appropriately for Turkic languages (tr, az). 5518 * 5519 * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that 5520 * are to be included for default mappings and 5521 * excluded for the Turkic-specific mappings. 5522 * 5523 * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that 5524 * are to be excluded for default mappings and 5525 * included for the Turkic-specific mappings. 5526 * 5527 * @stable ICU 2.6 5528 */ 5529 public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001; 5530 5531 /** 5532 * {@icu} The given character is mapped to its case folding equivalent according 5533 * to UnicodeData.txt and CaseFolding.txt; if the character has no case 5534 * folding equivalent, the character itself is returned. 5535 * 5536 * <p>This function only returns the simple, single-code point case mapping. 5537 * Full case mappings should be used whenever possible because they produce 5538 * better results by working on whole strings. 5539 * They can map to a result string with a different length as appropriate. 5540 * Full case mappings are applied by the case mapping functions 5541 * that take String parameters rather than code points (int). 5542 * See also the User Guide chapter on C/POSIX migration: 5543 * http://www.icu-project.org/userguide/posix.html#case_mappings 5544 * 5545 * @param ch the character to be converted 5546 * @param options A bit set for special processing. Currently the recognised options 5547 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 5548 * @return the case folding equivalent of the character, if any; otherwise the 5549 * character itself. 5550 * @see #foldCase(String, boolean) 5551 * @stable ICU 2.6 5552 */ foldCase(int ch, int options)5553 public static int foldCase(int ch, int options) { 5554 return UCaseProps.INSTANCE.fold(ch, options); 5555 } 5556 5557 /** 5558 * {@icu} The given string is mapped to its case folding equivalent according to 5559 * UnicodeData.txt and CaseFolding.txt; if any character has no case 5560 * folding equivalent, the character itself is returned. 5561 * "Full", multiple-code point case folding mappings are returned here. 5562 * For "simple" single-code point mappings use the API 5563 * foldCase(int ch, boolean defaultmapping). 5564 * @param str the String to be converted 5565 * @param options A bit set for special processing. Currently the recognised options 5566 * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT 5567 * @return the case folding equivalent of the character, if any; otherwise the 5568 * character itself. 5569 * @see #foldCase(int, boolean) 5570 * @stable ICU 2.6 5571 */ foldCase(String str, int options)5572 public static final String foldCase(String str, int options) { 5573 return CaseMapImpl.fold(options, str); 5574 } 5575 5576 /** 5577 * {@icu} Returns the numeric value of a Han character. 5578 * 5579 * <p>This returns the value of Han 'numeric' code points, 5580 * including those for zero, ten, hundred, thousand, ten thousand, 5581 * and hundred million. 5582 * This includes both the standard and 'checkwriting' 5583 * characters, the 'big circle' zero character, and the standard 5584 * zero character. 5585 * 5586 * <p>Note: The Unicode Standard has numeric values for more 5587 * Han characters recognized by this method 5588 * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt), 5589 * and a {@link com.ibm.icu.text.NumberFormat} can be used with 5590 * a Chinese {@link com.ibm.icu.text.NumberingSystem}. 5591 * 5592 * @param ch code point to query 5593 * @return value if it is a Han 'numeric character,' otherwise return -1. 5594 * @stable ICU 2.4 5595 */ getHanNumericValue(int ch)5596 public static int getHanNumericValue(int ch) 5597 { 5598 switch(ch) 5599 { 5600 case IDEOGRAPHIC_NUMBER_ZERO_ : 5601 case CJK_IDEOGRAPH_COMPLEX_ZERO_ : 5602 return 0; // Han Zero 5603 case CJK_IDEOGRAPH_FIRST_ : 5604 case CJK_IDEOGRAPH_COMPLEX_ONE_ : 5605 return 1; // Han One 5606 case CJK_IDEOGRAPH_SECOND_ : 5607 case CJK_IDEOGRAPH_COMPLEX_TWO_ : 5608 return 2; // Han Two 5609 case CJK_IDEOGRAPH_THIRD_ : 5610 case CJK_IDEOGRAPH_COMPLEX_THREE_ : 5611 return 3; // Han Three 5612 case CJK_IDEOGRAPH_FOURTH_ : 5613 case CJK_IDEOGRAPH_COMPLEX_FOUR_ : 5614 return 4; // Han Four 5615 case CJK_IDEOGRAPH_FIFTH_ : 5616 case CJK_IDEOGRAPH_COMPLEX_FIVE_ : 5617 return 5; // Han Five 5618 case CJK_IDEOGRAPH_SIXTH_ : 5619 case CJK_IDEOGRAPH_COMPLEX_SIX_ : 5620 return 6; // Han Six 5621 case CJK_IDEOGRAPH_SEVENTH_ : 5622 case CJK_IDEOGRAPH_COMPLEX_SEVEN_ : 5623 return 7; // Han Seven 5624 case CJK_IDEOGRAPH_EIGHTH_ : 5625 case CJK_IDEOGRAPH_COMPLEX_EIGHT_ : 5626 return 8; // Han Eight 5627 case CJK_IDEOGRAPH_NINETH_ : 5628 case CJK_IDEOGRAPH_COMPLEX_NINE_ : 5629 return 9; // Han Nine 5630 case CJK_IDEOGRAPH_TEN_ : 5631 case CJK_IDEOGRAPH_COMPLEX_TEN_ : 5632 return 10; 5633 case CJK_IDEOGRAPH_HUNDRED_ : 5634 case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ : 5635 return 100; 5636 case CJK_IDEOGRAPH_THOUSAND_ : 5637 case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ : 5638 return 1000; 5639 case CJK_IDEOGRAPH_TEN_THOUSAND_ : 5640 return 10000; 5641 case CJK_IDEOGRAPH_HUNDRED_MILLION_ : 5642 return 100000000; 5643 } 5644 return -1; // no value 5645 } 5646 5647 /** 5648 * {@icu} <p>Returns an iterator for character types, iterating over codepoints. 5649 * <p>Example of use:<br> 5650 * <pre> 5651 * RangeValueIterator iterator = UCharacter.getTypeIterator(); 5652 * RangeValueIterator.Element element = new RangeValueIterator.Element(); 5653 * while (iterator.next(element)) { 5654 * System.out.println("Codepoint \\u" + 5655 * Integer.toHexString(element.start) + 5656 * " to codepoint \\u" + 5657 * Integer.toHexString(element.limit - 1) + 5658 * " has the character type " + 5659 * element.value); 5660 * } 5661 * </pre> 5662 * @return an iterator 5663 * @stable ICU 2.6 5664 */ getTypeIterator()5665 public static RangeValueIterator getTypeIterator() 5666 { 5667 return new UCharacterTypeIterator(); 5668 } 5669 5670 private static final class UCharacterTypeIterator implements RangeValueIterator { UCharacterTypeIterator()5671 UCharacterTypeIterator() { 5672 reset(); 5673 } 5674 5675 // implements RangeValueIterator 5676 @Override next(Element element)5677 public boolean next(Element element) { 5678 if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) { 5679 element.start=range.startCodePoint; 5680 element.limit=range.endCodePoint+1; 5681 element.value=range.value; 5682 return true; 5683 } else { 5684 return false; 5685 } 5686 } 5687 5688 // implements RangeValueIterator 5689 @Override reset()5690 public void reset() { 5691 trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE); 5692 } 5693 5694 private Iterator<Trie2.Range> trieIterator; 5695 private Trie2.Range range; 5696 5697 private static final class MaskType implements Trie2.ValueMapper { 5698 // Extracts the general category ("character type") from the trie value. 5699 @Override map(int value)5700 public int map(int value) { 5701 return value & UCharacterProperty.TYPE_MASK; 5702 } 5703 } 5704 private static final MaskType MASK_TYPE=new MaskType(); 5705 } 5706 5707 /** 5708 * {@icu} <p>Returns an iterator for character names, iterating over codepoints. 5709 * <p>This API only gets the iterator for the modern, most up-to-date 5710 * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or 5711 * for extended names use getExtendedNameIterator(). 5712 * <p>Example of use:<br> 5713 * <pre> 5714 * ValueIterator iterator = UCharacter.getNameIterator(); 5715 * ValueIterator.Element element = new ValueIterator.Element(); 5716 * while (iterator.next(element)) { 5717 * System.out.println("Codepoint \\u" + 5718 * Integer.toHexString(element.codepoint) + 5719 * " has the name " + (String)element.value); 5720 * } 5721 * </pre> 5722 * <p>The maximal range which the name iterator iterates is from 5723 * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE. 5724 * @return an iterator 5725 * @stable ICU 2.6 5726 */ getNameIterator()5727 public static ValueIterator getNameIterator(){ 5728 return new UCharacterNameIterator(UCharacterName.INSTANCE, 5729 UCharacterNameChoice.UNICODE_CHAR_NAME); 5730 } 5731 5732 /** 5733 * {@icu} Returns an empty iterator. 5734 * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints. 5735 * @return an empty iterator 5736 * @deprecated ICU 49 5737 * @see #getName1_0(int) 5738 */ 5739 @Deprecated getName1_0Iterator()5740 public static ValueIterator getName1_0Iterator(){ 5741 return new DummyValueIterator(); 5742 } 5743 5744 private static final class DummyValueIterator implements ValueIterator { 5745 @Override next(Element element)5746 public boolean next(Element element) { return false; } 5747 @Override reset()5748 public void reset() {} 5749 @Override setRange(int start, int limit)5750 public void setRange(int start, int limit) {} 5751 } 5752 5753 /** 5754 * {@icu} <p>Returns an iterator for character names, iterating over codepoints. 5755 * <p>This API only gets the iterator for the extended names. 5756 * For modern, most up-to-date Unicode names use getNameIterator() or 5757 * for older 1.0 Unicode names use get1_0NameIterator(). 5758 * <p>Example of use:<br> 5759 * <pre> 5760 * ValueIterator iterator = UCharacter.getExtendedNameIterator(); 5761 * ValueIterator.Element element = new ValueIterator.Element(); 5762 * while (iterator.next(element)) { 5763 * System.out.println("Codepoint \\u" + 5764 * Integer.toHexString(element.codepoint) + 5765 * " has the name " + (String)element.value); 5766 * } 5767 * </pre> 5768 * <p>The maximal range which the name iterator iterates is from 5769 * @return an iterator 5770 * @stable ICU 2.6 5771 */ getExtendedNameIterator()5772 public static ValueIterator getExtendedNameIterator(){ 5773 return new UCharacterNameIterator(UCharacterName.INSTANCE, 5774 UCharacterNameChoice.EXTENDED_CHAR_NAME); 5775 } 5776 5777 /** 5778 * {@icu} Returns the "age" of the code point. 5779 * <p>The "age" is the Unicode version when the code point was first 5780 * designated (as a non-character or for Private Use) or assigned a 5781 * character. 5782 * <p>This can be useful to avoid emitting code points to receiving 5783 * processes that do not accept newer characters. 5784 * <p>The data is from the UCD file DerivedAge.txt. 5785 * @param ch The code point. 5786 * @return the Unicode version number 5787 * @stable ICU 2.6 5788 */ getAge(int ch)5789 public static VersionInfo getAge(int ch) 5790 { 5791 if (ch < MIN_VALUE || ch > MAX_VALUE) { 5792 throw new IllegalArgumentException("Codepoint out of bounds"); 5793 } 5794 return UCharacterProperty.INSTANCE.getAge(ch); 5795 } 5796 5797 /** 5798 * {@icu} Check a binary Unicode property for a code point. 5799 * <p>Unicode, especially in version 3.2, defines many more properties 5800 * than the original set in UnicodeData.txt. 5801 * <p>This API is intended to reflect Unicode properties as defined in 5802 * the Unicode Character Database (UCD) and Unicode Technical Reports 5803 * (UTR). 5804 * <p>For details about the properties see 5805 * <a href=http://www.unicode.org/>http://www.unicode.org/</a>. 5806 * <p>For names of Unicode properties see the UCD file 5807 * PropertyAliases.txt. 5808 * <p>This API does not check the validity of the codepoint. 5809 * <p>Important: If ICU is built with UCD files from Unicode versions 5810 * below 3.2, then properties marked with "new" are not or 5811 * not fully available. 5812 * @param ch code point to test. 5813 * @param property selector constant from com.ibm.icu.lang.UProperty, 5814 * identifies which binary property to check. 5815 * @return true or false according to the binary Unicode property value 5816 * for ch. Also false if property is out of bounds or if the 5817 * Unicode version does not have data for the property at all, or 5818 * not for this code point. 5819 * @see com.ibm.icu.lang.UProperty 5820 * @see CharacterProperties#getBinaryPropertySet(int) 5821 * @stable ICU 2.6 5822 */ hasBinaryProperty(int ch, int property)5823 public static boolean hasBinaryProperty(int ch, int property) 5824 { 5825 return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property); 5826 } 5827 5828 /** 5829 * {@icu} <p>Check if a code point has the Alphabetic Unicode property. 5830 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC). 5831 * <p>Different from UCharacter.isLetter(ch)! 5832 * @stable ICU 2.6 5833 * @param ch codepoint to be tested 5834 */ isUAlphabetic(int ch)5835 public static boolean isUAlphabetic(int ch) 5836 { 5837 return hasBinaryProperty(ch, UProperty.ALPHABETIC); 5838 } 5839 5840 /** 5841 * {@icu} <p>Check if a code point has the Lowercase Unicode property. 5842 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE). 5843 * <p>This is different from UCharacter.isLowerCase(ch)! 5844 * @param ch codepoint to be tested 5845 * @stable ICU 2.6 5846 */ isULowercase(int ch)5847 public static boolean isULowercase(int ch) 5848 { 5849 return hasBinaryProperty(ch, UProperty.LOWERCASE); 5850 } 5851 5852 /** 5853 * {@icu} <p>Check if a code point has the Uppercase Unicode property. 5854 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE). 5855 * <p>This is different from UCharacter.isUpperCase(ch)! 5856 * @param ch codepoint to be tested 5857 * @stable ICU 2.6 5858 */ isUUppercase(int ch)5859 public static boolean isUUppercase(int ch) 5860 { 5861 return hasBinaryProperty(ch, UProperty.UPPERCASE); 5862 } 5863 5864 /** 5865 * {@icu} <p>Check if a code point has the White_Space Unicode property. 5866 * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE). 5867 * <p>This is different from both UCharacter.isSpace(ch) and 5868 * UCharacter.isWhitespace(ch)! 5869 * @param ch codepoint to be tested 5870 * @stable ICU 2.6 5871 */ isUWhiteSpace(int ch)5872 public static boolean isUWhiteSpace(int ch) 5873 { 5874 return hasBinaryProperty(ch, UProperty.WHITE_SPACE); 5875 } 5876 5877 /** 5878 * {@icu} Returns the property value for a Unicode property type of a code point. 5879 * Also returns binary and mask property values. 5880 * <p>Unicode, especially in version 3.2, defines many more properties than 5881 * the original set in UnicodeData.txt. 5882 * <p>The properties APIs are intended to reflect Unicode properties as 5883 * defined in the Unicode Character Database (UCD) and Unicode Technical 5884 * Reports (UTR). For details about the properties see 5885 * http://www.unicode.org/. 5886 * <p>For names of Unicode properties see the UCD file PropertyAliases.txt. 5887 * 5888 * <pre> 5889 * Sample usage: 5890 * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH); 5891 * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC); 5892 * boolean b = (ideo == 1) ? true : false; 5893 * </pre> 5894 * @param ch code point to test. 5895 * @param type UProperty selector constant, identifies which binary 5896 * property to check. Must be 5897 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5898 * UProperty.INT_START <= type < UProperty.INT_LIMIT or 5899 * UProperty.MASK_START <= type < UProperty.MASK_LIMIT. 5900 * @return numeric value that is directly the property value or, 5901 * for enumerated properties, corresponds to the numeric value of 5902 * the enumerated constant of the respective property value type 5903 * ({@link ECharacterCategory}, {@link ECharacterDirection}, 5904 * {@link DecompositionType}, etc.). 5905 * Returns 0 or 1 (for false / true) for binary Unicode properties. 5906 * Returns a bit-mask for mask properties. 5907 * Returns 0 if 'type' is out of bounds or if the Unicode version 5908 * does not have data for the property at all, or not for this code 5909 * point. 5910 * @see UProperty 5911 * @see #hasBinaryProperty 5912 * @see #getIntPropertyMinValue 5913 * @see #getIntPropertyMaxValue 5914 * @see CharacterProperties#getIntPropertyMap(int) 5915 * @see #getUnicodeVersion 5916 * @stable ICU 2.4 5917 */ getIntPropertyValue(int ch, int type)5918 public static int getIntPropertyValue(int ch, int type) 5919 { 5920 return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type); 5921 } 5922 /** 5923 * {@icu} Returns a string version of the property value. 5924 * @param propertyEnum The property enum value. 5925 * @param codepoint The codepoint value. 5926 * @param nameChoice The choice of the name. 5927 * @return value as string 5928 * @internal 5929 * @deprecated This API is ICU internal only. 5930 */ 5931 @Deprecated 5932 ///CLOVER:OFF getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice)5933 public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) { 5934 if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) || 5935 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) { 5936 return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum), 5937 nameChoice); 5938 } 5939 if (propertyEnum == UProperty.NUMERIC_VALUE) { 5940 return String.valueOf(getUnicodeNumericValue(codepoint)); 5941 } 5942 // otherwise must be string property 5943 switch (propertyEnum) { 5944 case UProperty.AGE: return getAge(codepoint).toString(); 5945 case UProperty.ISO_COMMENT: return getISOComment(codepoint); 5946 case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint)); 5947 case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true)); 5948 case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 5949 case UProperty.NAME: return getName(codepoint); 5950 case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true)); 5951 case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint)); 5952 case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 5953 case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 5954 case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint)); 5955 case UProperty.UNICODE_1_NAME: return getName1_0(codepoint); 5956 case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint)); 5957 } 5958 throw new IllegalArgumentException("Illegal Property Enum"); 5959 } 5960 ///CLOVER:ON 5961 5962 /** 5963 * {@icu} Returns the minimum value for an integer/binary Unicode property type. 5964 * Can be used together with UCharacter.getIntPropertyMaxValue(int) 5965 * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar. 5966 * @param type UProperty selector constant, identifies which binary 5967 * property to check. Must be 5968 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 5969 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 5970 * @return Minimum value returned by UCharacter.getIntPropertyValue(int) 5971 * for a Unicode property. 0 if the property 5972 * selector 'type' is out of range. 5973 * @see UProperty 5974 * @see #hasBinaryProperty 5975 * @see #getUnicodeVersion 5976 * @see #getIntPropertyMaxValue 5977 * @see #getIntPropertyValue 5978 * @stable ICU 2.4 5979 */ getIntPropertyMinValue(int type)5980 public static int getIntPropertyMinValue(int type){ 5981 5982 return 0; // undefined; and: all other properties have a minimum value of 0 5983 } 5984 5985 5986 /** 5987 * {@icu} Returns the maximum value for an integer/binary Unicode property. 5988 * Can be used together with UCharacter.getIntPropertyMinValue(int) 5989 * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar. 5990 * Examples for min/max values (for Unicode 3.2): 5991 * <ul> 5992 * <li> UProperty.BIDI_CLASS: 0/18 5993 * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL) 5994 * <li> UProperty.SCRIPT: 0/45 (UScript.COMMON/UScript.TAGBANWA) 5995 * <li> UProperty.IDEOGRAPHIC: 0/1 (false/true) 5996 * </ul> 5997 * For undefined UProperty constant values, min/max values will be 0/-1. 5998 * @param type UProperty selector constant, identifies which binary 5999 * property to check. Must be 6000 * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or 6001 * UProperty.INT_START <= type < UProperty.INT_LIMIT. 6002 * @return Maximum value returned by u_getIntPropertyValue for a Unicode 6003 * property. <= 0 if the property selector 'type' is out of range. 6004 * @see UProperty 6005 * @see #hasBinaryProperty 6006 * @see #getUnicodeVersion 6007 * @see #getIntPropertyMaxValue 6008 * @see #getIntPropertyValue 6009 * @stable ICU 2.4 6010 */ getIntPropertyMaxValue(int type)6011 public static int getIntPropertyMaxValue(int type) 6012 { 6013 return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type); 6014 } 6015 6016 /** 6017 * Provide the java.lang.Character forDigit API, for convenience. 6018 * @stable ICU 3.0 6019 */ forDigit(int digit, int radix)6020 public static char forDigit(int digit, int radix) { 6021 return java.lang.Character.forDigit(digit, radix); 6022 } 6023 6024 // JDK 1.5 API coverage 6025 6026 /** 6027 * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}. 6028 * 6029 * @stable ICU 3.0 6030 */ 6031 public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE; 6032 6033 /** 6034 * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}. 6035 * 6036 * @stable ICU 3.0 6037 */ 6038 public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE; 6039 6040 /** 6041 * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}. 6042 * 6043 * @stable ICU 3.0 6044 */ 6045 public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE; 6046 6047 /** 6048 * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}. 6049 * 6050 * @stable ICU 3.0 6051 */ 6052 public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE; 6053 6054 /** 6055 * Constant U+D800, same as {@link Character#MIN_SURROGATE}. 6056 * 6057 * @stable ICU 3.0 6058 */ 6059 public static final char MIN_SURROGATE = Character.MIN_SURROGATE; 6060 6061 /** 6062 * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}. 6063 * 6064 * @stable ICU 3.0 6065 */ 6066 public static final char MAX_SURROGATE = Character.MAX_SURROGATE; 6067 6068 /** 6069 * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}. 6070 * 6071 * @stable ICU 3.0 6072 */ 6073 public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT; 6074 6075 /** 6076 * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}. 6077 * 6078 * @stable ICU 3.0 6079 */ 6080 public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT; 6081 6082 /** 6083 * Constant U+0000, same as {@link Character#MIN_CODE_POINT}. 6084 * 6085 * @stable ICU 3.0 6086 */ 6087 public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT; 6088 6089 /** 6090 * Equivalent to {@link Character#isValidCodePoint}. 6091 * 6092 * @param cp the code point to check 6093 * @return true if cp is a valid code point 6094 * @stable ICU 3.0 6095 */ isValidCodePoint(int cp)6096 public static final boolean isValidCodePoint(int cp) { 6097 return cp >= 0 && cp <= MAX_CODE_POINT; 6098 } 6099 6100 /** 6101 * Same as {@link Character#isSupplementaryCodePoint}. 6102 * 6103 * @param cp the code point to check 6104 * @return true if cp is a supplementary code point 6105 * @stable ICU 3.0 6106 */ isSupplementaryCodePoint(int cp)6107 public static final boolean isSupplementaryCodePoint(int cp) { 6108 return Character.isSupplementaryCodePoint(cp); 6109 } 6110 6111 /** 6112 * Same as {@link Character#isHighSurrogate}. 6113 * 6114 * @param ch the char to check 6115 * @return true if ch is a high (lead) surrogate 6116 * @stable ICU 3.0 6117 */ isHighSurrogate(char ch)6118 public static boolean isHighSurrogate(char ch) { 6119 return Character.isHighSurrogate(ch); 6120 } 6121 6122 /** 6123 * Same as {@link Character#isLowSurrogate}. 6124 * 6125 * @param ch the char to check 6126 * @return true if ch is a low (trail) surrogate 6127 * @stable ICU 3.0 6128 */ isLowSurrogate(char ch)6129 public static boolean isLowSurrogate(char ch) { 6130 return Character.isLowSurrogate(ch); 6131 } 6132 6133 /** 6134 * Same as {@link Character#isSurrogatePair}. 6135 * 6136 * @param high the high (lead) char 6137 * @param low the low (trail) char 6138 * @return true if high, low form a surrogate pair 6139 * @stable ICU 3.0 6140 */ isSurrogatePair(char high, char low)6141 public static final boolean isSurrogatePair(char high, char low) { 6142 return Character.isSurrogatePair(high, low); 6143 } 6144 6145 /** 6146 * Same as {@link Character#charCount}. 6147 * Returns the number of chars needed to represent the code point (1 or 2). 6148 * This does not check the code point for validity. 6149 * 6150 * @param cp the code point to check 6151 * @return the number of chars needed to represent the code point 6152 * @stable ICU 3.0 6153 */ charCount(int cp)6154 public static int charCount(int cp) { 6155 return Character.charCount(cp); 6156 } 6157 6158 /** 6159 * Same as {@link Character#toCodePoint}. 6160 * Returns the code point represented by the two surrogate code units. 6161 * This does not check the surrogate pair for validity. 6162 * 6163 * @param high the high (lead) surrogate 6164 * @param low the low (trail) surrogate 6165 * @return the code point formed by the surrogate pair 6166 * @stable ICU 3.0 6167 */ toCodePoint(char high, char low)6168 public static final int toCodePoint(char high, char low) { 6169 return Character.toCodePoint(high, low); 6170 } 6171 6172 /** 6173 * Same as {@link Character#codePointAt(CharSequence, int)}. 6174 * Returns the code point at index. 6175 * This examines only the characters at index and index+1. 6176 * 6177 * @param seq the characters to check 6178 * @param index the index of the first or only char forming the code point 6179 * @return the code point at the index 6180 * @stable ICU 3.0 6181 */ codePointAt(CharSequence seq, int index)6182 public static final int codePointAt(CharSequence seq, int index) { 6183 char c1 = seq.charAt(index++); 6184 if (isHighSurrogate(c1)) { 6185 if (index < seq.length()) { 6186 char c2 = seq.charAt(index); 6187 if (isLowSurrogate(c2)) { 6188 return toCodePoint(c1, c2); 6189 } 6190 } 6191 } 6192 return c1; 6193 } 6194 6195 /** 6196 * Same as {@link Character#codePointAt(char[], int)}. 6197 * Returns the code point at index. 6198 * This examines only the characters at index and index+1. 6199 * 6200 * @param text the characters to check 6201 * @param index the index of the first or only char forming the code point 6202 * @return the code point at the index 6203 * @stable ICU 3.0 6204 */ codePointAt(char[] text, int index)6205 public static final int codePointAt(char[] text, int index) { 6206 char c1 = text[index++]; 6207 if (isHighSurrogate(c1)) { 6208 if (index < text.length) { 6209 char c2 = text[index]; 6210 if (isLowSurrogate(c2)) { 6211 return toCodePoint(c1, c2); 6212 } 6213 } 6214 } 6215 return c1; 6216 } 6217 6218 /** 6219 * Same as {@link Character#codePointAt(char[], int, int)}. 6220 * Returns the code point at index. 6221 * This examines only the characters at index and index+1. 6222 * 6223 * @param text the characters to check 6224 * @param index the index of the first or only char forming the code point 6225 * @param limit the limit of the valid text 6226 * @return the code point at the index 6227 * @stable ICU 3.0 6228 */ codePointAt(char[] text, int index, int limit)6229 public static final int codePointAt(char[] text, int index, int limit) { 6230 if (index >= limit || limit > text.length) { 6231 throw new IndexOutOfBoundsException(); 6232 } 6233 char c1 = text[index++]; 6234 if (isHighSurrogate(c1)) { 6235 if (index < limit) { 6236 char c2 = text[index]; 6237 if (isLowSurrogate(c2)) { 6238 return toCodePoint(c1, c2); 6239 } 6240 } 6241 } 6242 return c1; 6243 } 6244 6245 /** 6246 * Same as {@link Character#codePointBefore(CharSequence, int)}. 6247 * Return the code point before index. 6248 * This examines only the characters at index-1 and index-2. 6249 * 6250 * @param seq the characters to check 6251 * @param index the index after the last or only char forming the code point 6252 * @return the code point before the index 6253 * @stable ICU 3.0 6254 */ codePointBefore(CharSequence seq, int index)6255 public static final int codePointBefore(CharSequence seq, int index) { 6256 char c2 = seq.charAt(--index); 6257 if (isLowSurrogate(c2)) { 6258 if (index > 0) { 6259 char c1 = seq.charAt(--index); 6260 if (isHighSurrogate(c1)) { 6261 return toCodePoint(c1, c2); 6262 } 6263 } 6264 } 6265 return c2; 6266 } 6267 6268 /** 6269 * Same as {@link Character#codePointBefore(char[], int)}. 6270 * Returns the code point before index. 6271 * This examines only the characters at index-1 and index-2. 6272 * 6273 * @param text the characters to check 6274 * @param index the index after the last or only char forming the code point 6275 * @return the code point before the index 6276 * @stable ICU 3.0 6277 */ codePointBefore(char[] text, int index)6278 public static final int codePointBefore(char[] text, int index) { 6279 char c2 = text[--index]; 6280 if (isLowSurrogate(c2)) { 6281 if (index > 0) { 6282 char c1 = text[--index]; 6283 if (isHighSurrogate(c1)) { 6284 return toCodePoint(c1, c2); 6285 } 6286 } 6287 } 6288 return c2; 6289 } 6290 6291 /** 6292 * Same as {@link Character#codePointBefore(char[], int, int)}. 6293 * Return the code point before index. 6294 * This examines only the characters at index-1 and index-2. 6295 * 6296 * @param text the characters to check 6297 * @param index the index after the last or only char forming the code point 6298 * @param limit the start of the valid text 6299 * @return the code point before the index 6300 * @stable ICU 3.0 6301 */ codePointBefore(char[] text, int index, int limit)6302 public static final int codePointBefore(char[] text, int index, int limit) { 6303 if (index <= limit || limit < 0) { 6304 throw new IndexOutOfBoundsException(); 6305 } 6306 char c2 = text[--index]; 6307 if (isLowSurrogate(c2)) { 6308 if (index > limit) { 6309 char c1 = text[--index]; 6310 if (isHighSurrogate(c1)) { 6311 return toCodePoint(c1, c2); 6312 } 6313 } 6314 } 6315 return c2; 6316 } 6317 6318 /** 6319 * Same as {@link Character#toChars(int, char[], int)}. 6320 * Writes the chars representing the 6321 * code point into the destination at the given index. 6322 * 6323 * @param cp the code point to convert 6324 * @param dst the destination array into which to put the char(s) representing the code point 6325 * @param dstIndex the index at which to put the first (or only) char 6326 * @return the count of the number of chars written (1 or 2) 6327 * @throws IllegalArgumentException if cp is not a valid code point 6328 * @stable ICU 3.0 6329 */ toChars(int cp, char[] dst, int dstIndex)6330 public static final int toChars(int cp, char[] dst, int dstIndex) { 6331 return Character.toChars(cp, dst, dstIndex); 6332 } 6333 6334 /** 6335 * Same as {@link Character#toChars(int)}. 6336 * Returns a char array representing the code point. 6337 * 6338 * @param cp the code point to convert 6339 * @return an array containing the char(s) representing the code point 6340 * @throws IllegalArgumentException if cp is not a valid code point 6341 * @stable ICU 3.0 6342 */ toChars(int cp)6343 public static final char[] toChars(int cp) { 6344 return Character.toChars(cp); 6345 } 6346 6347 /** 6348 * Equivalent to the {@link Character#getDirectionality(char)} method, for 6349 * convenience. Returns a byte representing the directionality of the 6350 * character. 6351 * 6352 * {@icunote} Unlike {@link Character#getDirectionality(char)}, this returns 6353 * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters. 6354 * 6355 * {@icunote} The return value must be tested using the constants defined in {@link 6356 * UCharacterDirection} and its interface {@link 6357 * UCharacterEnums.ECharacterDirection} since the values are different from the ones 6358 * defined by <code>java.lang.Character</code>. 6359 * @param cp the code point to check 6360 * @return the directionality of the code point 6361 * @see #getDirection 6362 * @stable ICU 3.0 6363 */ getDirectionality(int cp)6364 public static byte getDirectionality(int cp) 6365 { 6366 return (byte)getDirection(cp); 6367 } 6368 6369 /** 6370 * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)} 6371 * method, for convenience. Counts the number of code points in the range 6372 * of text. 6373 * @param text the characters to check 6374 * @param start the start of the range 6375 * @param limit the limit of the range 6376 * @return the number of code points in the range 6377 * @stable ICU 3.0 6378 */ codePointCount(CharSequence text, int start, int limit)6379 public static int codePointCount(CharSequence text, int start, int limit) { 6380 if (start < 0 || limit < start || limit > text.length()) { 6381 throw new IndexOutOfBoundsException("start (" + start + 6382 ") or limit (" + limit + 6383 ") invalid or out of range 0, " + text.length()); 6384 } 6385 6386 int len = limit - start; 6387 while (limit > start) { 6388 char ch = text.charAt(--limit); 6389 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 6390 ch = text.charAt(--limit); 6391 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 6392 --len; 6393 break; 6394 } 6395 } 6396 } 6397 return len; 6398 } 6399 6400 /** 6401 * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for 6402 * convenience. Counts the number of code points in the range of text. 6403 * @param text the characters to check 6404 * @param start the start of the range 6405 * @param limit the limit of the range 6406 * @return the number of code points in the range 6407 * @stable ICU 3.0 6408 */ codePointCount(char[] text, int start, int limit)6409 public static int codePointCount(char[] text, int start, int limit) { 6410 if (start < 0 || limit < start || limit > text.length) { 6411 throw new IndexOutOfBoundsException("start (" + start + 6412 ") or limit (" + limit + 6413 ") invalid or out of range 0, " + text.length); 6414 } 6415 6416 int len = limit - start; 6417 while (limit > start) { 6418 char ch = text[--limit]; 6419 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) { 6420 ch = text[--limit]; 6421 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) { 6422 --len; 6423 break; 6424 } 6425 } 6426 } 6427 return len; 6428 } 6429 6430 /** 6431 * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)} 6432 * method, for convenience. Adjusts the char index by a code point offset. 6433 * @param text the characters to check 6434 * @param index the index to adjust 6435 * @param codePointOffset the number of code points by which to offset the index 6436 * @return the adjusted index 6437 * @stable ICU 3.0 6438 */ offsetByCodePoints(CharSequence text, int index, int codePointOffset)6439 public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) { 6440 if (index < 0 || index > text.length()) { 6441 throw new IndexOutOfBoundsException("index ( " + index + 6442 ") out of range 0, " + text.length()); 6443 } 6444 6445 if (codePointOffset < 0) { 6446 while (++codePointOffset <= 0) { 6447 char ch = text.charAt(--index); 6448 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) { 6449 ch = text.charAt(--index); 6450 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 6451 if (++codePointOffset > 0) { 6452 return index+1; 6453 } 6454 } 6455 } 6456 } 6457 } else { 6458 int limit = text.length(); 6459 while (--codePointOffset >= 0) { 6460 char ch = text.charAt(index++); 6461 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 6462 ch = text.charAt(index++); 6463 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 6464 if (--codePointOffset < 0) { 6465 return index-1; 6466 } 6467 } 6468 } 6469 } 6470 } 6471 6472 return index; 6473 } 6474 6475 /** 6476 * Equivalent to the 6477 * {@link Character#offsetByCodePoints(char[], int, int, int, int)} 6478 * method, for convenience. Adjusts the char index by a code point offset. 6479 * @param text the characters to check 6480 * @param start the start of the range to check 6481 * @param count the length of the range to check 6482 * @param index the index to adjust 6483 * @param codePointOffset the number of code points by which to offset the index 6484 * @return the adjusted index 6485 * @stable ICU 3.0 6486 */ offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset)6487 public static int offsetByCodePoints(char[] text, int start, int count, int index, 6488 int codePointOffset) { 6489 int limit = start + count; 6490 if (start < 0 || limit < start || limit > text.length || index < start || index > limit) { 6491 throw new IndexOutOfBoundsException("index ( " + index + 6492 ") out of range " + start + 6493 ", " + limit + 6494 " in array 0, " + text.length); 6495 } 6496 6497 if (codePointOffset < 0) { 6498 while (++codePointOffset <= 0) { 6499 char ch = text[--index]; 6500 if (index < start) { 6501 throw new IndexOutOfBoundsException("index ( " + index + 6502 ") < start (" + start + 6503 ")"); 6504 } 6505 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) { 6506 ch = text[--index]; 6507 if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) { 6508 if (++codePointOffset > 0) { 6509 return index+1; 6510 } 6511 } 6512 } 6513 } 6514 } else { 6515 while (--codePointOffset >= 0) { 6516 char ch = text[index++]; 6517 if (index > limit) { 6518 throw new IndexOutOfBoundsException("index ( " + index + 6519 ") > limit (" + limit + 6520 ")"); 6521 } 6522 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) { 6523 ch = text[index++]; 6524 if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) { 6525 if (--codePointOffset < 0) { 6526 return index-1; 6527 } 6528 } 6529 } 6530 } 6531 } 6532 6533 return index; 6534 } 6535 6536 // private variables ------------------------------------------------- 6537 6538 /** 6539 * To get the last character out from a data type 6540 */ 6541 private static final int LAST_CHAR_MASK_ = 0xFFFF; 6542 6543 // /** 6544 // * To get the last byte out from a data type 6545 // */ 6546 // private static final int LAST_BYTE_MASK_ = 0xFF; 6547 // 6548 // /** 6549 // * Shift 16 bits 6550 // */ 6551 // private static final int SHIFT_16_ = 16; 6552 // 6553 // /** 6554 // * Shift 24 bits 6555 // */ 6556 // private static final int SHIFT_24_ = 24; 6557 // 6558 // /** 6559 // * Decimal radix 6560 // */ 6561 // private static final int DECIMAL_RADIX_ = 10; 6562 6563 /** 6564 * No break space code point 6565 */ 6566 private static final int NO_BREAK_SPACE_ = 0xA0; 6567 6568 /** 6569 * Figure space code point 6570 */ 6571 private static final int FIGURE_SPACE_ = 0x2007; 6572 6573 /** 6574 * Narrow no break space code point 6575 */ 6576 private static final int NARROW_NO_BREAK_SPACE_ = 0x202F; 6577 6578 /** 6579 * Ideographic number zero code point 6580 */ 6581 private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007; 6582 6583 /** 6584 * CJK Ideograph, First code point 6585 */ 6586 private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00; 6587 6588 /** 6589 * CJK Ideograph, Second code point 6590 */ 6591 private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c; 6592 6593 /** 6594 * CJK Ideograph, Third code point 6595 */ 6596 private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09; 6597 6598 /** 6599 * CJK Ideograph, Fourth code point 6600 */ 6601 private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db; 6602 6603 /** 6604 * CJK Ideograph, FIFTH code point 6605 */ 6606 private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94; 6607 6608 /** 6609 * CJK Ideograph, Sixth code point 6610 */ 6611 private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d; 6612 6613 /** 6614 * CJK Ideograph, Seventh code point 6615 */ 6616 private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03; 6617 6618 /** 6619 * CJK Ideograph, Eighth code point 6620 */ 6621 private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b; 6622 6623 /** 6624 * CJK Ideograph, Nineth code point 6625 */ 6626 private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d; 6627 6628 /** 6629 * Application Program command code point 6630 */ 6631 private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F; 6632 6633 /** 6634 * Unit separator code point 6635 */ 6636 private static final int UNIT_SEPARATOR_ = 0x001F; 6637 6638 /** 6639 * Delete code point 6640 */ 6641 private static final int DELETE_ = 0x007F; 6642 6643 /** 6644 * Han digit characters 6645 */ 6646 private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_ = 0x96f6; 6647 private static final int CJK_IDEOGRAPH_COMPLEX_ONE_ = 0x58f9; 6648 private static final int CJK_IDEOGRAPH_COMPLEX_TWO_ = 0x8cb3; 6649 private static final int CJK_IDEOGRAPH_COMPLEX_THREE_ = 0x53c3; 6650 private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_ = 0x8086; 6651 private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_ = 0x4f0d; 6652 private static final int CJK_IDEOGRAPH_COMPLEX_SIX_ = 0x9678; 6653 private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_ = 0x67d2; 6654 private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_ = 0x634c; 6655 private static final int CJK_IDEOGRAPH_COMPLEX_NINE_ = 0x7396; 6656 private static final int CJK_IDEOGRAPH_TEN_ = 0x5341; 6657 private static final int CJK_IDEOGRAPH_COMPLEX_TEN_ = 0x62fe; 6658 private static final int CJK_IDEOGRAPH_HUNDRED_ = 0x767e; 6659 private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_ = 0x4f70; 6660 private static final int CJK_IDEOGRAPH_THOUSAND_ = 0x5343; 6661 private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf; 6662 private static final int CJK_IDEOGRAPH_TEN_THOUSAND_ = 0x824c; 6663 private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_ = 0x5104; 6664 6665 // private constructor ----------------------------------------------- 6666 ///CLOVER:OFF 6667 /** 6668 * Private constructor to prevent instantiation 6669 */ UCharacter()6670 private UCharacter() 6671 { 6672 } 6673 ///CLOVER:ON 6674 } 6675