1 /* 2 * Copyright (c) 2002, 2023, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import dalvik.annotation.optimization.FastNative; 29 // Android-removed: CDS is not used on Android. 30 // import jdk.internal.misc.CDS; 31 import jdk.internal.vm.annotation.IntrinsicCandidate; 32 import jdk.internal.vm.annotation.Stable; 33 34 import java.util.Arrays; 35 import java.util.HashMap; 36 import java.util.Locale; 37 import java.util.Map; 38 import java.util.Objects; 39 import java.util.Optional; 40 41 import java.lang.constant.Constable; 42 import java.lang.constant.DynamicConstantDesc; 43 import static java.lang.constant.ConstantDescs.BSM_EXPLICIT_CAST; 44 import static java.lang.constant.ConstantDescs.CD_char; 45 import static java.lang.constant.ConstantDescs.DEFAULT_NAME; 46 47 import android.icu.lang.UProperty; 48 49 import libcore.icu.ICU; 50 51 // Android-changed: Remove reference to a specific unicode standard version 52 /** 53 * The {@code Character} class wraps a value of the primitive 54 * type {@code char} in an object. An object of class 55 * {@code Character} contains a single field whose type is 56 * {@code char}. 57 * <p> 58 * In addition, this class provides several methods for determining 59 * a character's category (lowercase letter, digit, etc.) and for converting 60 * characters from uppercase to lowercase and vice versa. 61 * <p> 62 * Character information is based on the Unicode Standard 63 * <p> 64 * The methods and data of class {@code Character} are defined by 65 * the information in the <i>UnicodeData</i> file that is part of the 66 * Unicode Character Database maintained by the Unicode 67 * Consortium. This file specifies various properties including name 68 * and general category for every defined Unicode code point or 69 * character range. 70 * <p> 71 * The file and its description are available from the Unicode Consortium at: 72 * <ul> 73 * <li><a href="http://www.unicode.org">http://www.unicode.org</a> 74 * </ul> 75 * 76 * <h2><a id="conformance">Unicode Conformance</a></h2> 77 * <p> 78 * The fields and methods of class {@code Character} are defined in terms 79 * of character information from the Unicode Standard, specifically the 80 * <i>UnicodeData</i> file that is part of the Unicode Character Database. 81 * This file specifies properties including name and category for every 82 * assigned Unicode code point or character range. The file is available 83 * from the Unicode Consortium at 84 * <a href="http://www.unicode.org">http://www.unicode.org</a>. 85 * <p> 86 * Character information is based on the Unicode Standard, version 15.0. 87 * <p> 88 * The Java platform has supported different versions of the Unicode 89 * Standard over time. Upgrades to newer versions of the Unicode Standard 90 * occurred in the following Java releases, each indicating the new version: 91 * <table class="striped"> 92 * <caption style="display:none">Shows Java releases and supported Unicode versions</caption> 93 * <thead> 94 * <tr><th scope="col">Java release</th> 95 * <th scope="col">Unicode version</th></tr> 96 * </thead> 97 * <tbody> 98 * <tr><th scope="row" style="text-align:left">Java SE 20</th> 99 * <td>Unicode 15.0</td></tr> 100 * <tr><th scope="row" style="text-align:left">Java SE 19</th> 101 * <td>Unicode 14.0</td></tr> 102 * <tr><th scope="row" style="text-align:left">Java SE 15</th> 103 * <td>Unicode 13.0</td></tr> 104 * <tr><th scope="row" style="text-align:left">Java SE 13</th> 105 * <td>Unicode 12.1</td></tr> 106 * <tr><th scope="row" style="text-align:left">Java SE 12</th> 107 * <td>Unicode 11.0</td></tr> 108 * <tr><th scope="row" style="text-align:left">Java SE 11</th> 109 * <td>Unicode 10.0</td></tr> 110 * <tr><th scope="row" style="text-align:left">Java SE 9</th> 111 * <td>Unicode 8.0</td></tr> 112 * <tr><th scope="row" style="text-align:left">Java SE 8</th> 113 * <td>Unicode 6.2</td></tr> 114 * <tr><th scope="row" style="text-align:left">Java SE 7</th> 115 * <td>Unicode 6.0</td></tr> 116 * <tr><th scope="row" style="text-align:left">Java SE 5.0</th> 117 * <td>Unicode 4.0</td></tr> 118 * <tr><th scope="row" style="text-align:left">Java SE 1.4</th> 119 * <td>Unicode 3.0</td></tr> 120 * <tr><th scope="row" style="text-align:left">JDK 1.1</th> 121 * <td>Unicode 2.0</td></tr> 122 * <tr><th scope="row" style="text-align:left">JDK 1.0.2</th> 123 * <td>Unicode 1.1.5</td></tr> 124 * </tbody> 125 * </table> 126 * Variations from these base Unicode versions, such as recognized appendixes, 127 * are documented elsewhere. 128 * <h2><a id="unicode">Unicode Character Representations</a></h2> 129 * 130 * <p>The {@code char} data type (and therefore the value that a 131 * {@code Character} object encapsulates) are based on the 132 * original Unicode specification, which defined characters as 133 * fixed-width 16-bit entities. The Unicode Standard has since been 134 * changed to allow for characters whose representation requires more 135 * than 16 bits. The range of legal <em>code point</em>s is now 136 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>. 137 * (Refer to the <a 138 * href="http://www.unicode.org/reports/tr27/#notation"><i> 139 * definition</i></a> of the U+<i>n</i> notation in the Unicode 140 * Standard.) 141 * 142 * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is 143 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. 144 * <a id="supplementary">Characters</a> whose code points are greater 145 * than U+FFFF are called <em>supplementary character</em>s. The Java 146 * platform uses the UTF-16 representation in {@code char} arrays and 147 * in the {@code String} and {@code StringBuffer} classes. In 148 * this representation, supplementary characters are represented as a pair 149 * of {@code char} values, the first from the <em>high-surrogates</em> 150 * range, (\uD800-\uDBFF), the second from the 151 * <em>low-surrogates</em> range (\uDC00-\uDFFF). 152 * 153 * <p>A {@code char} value, therefore, represents Basic 154 * Multilingual Plane (BMP) code points, including the surrogate 155 * code points, or code units of the UTF-16 encoding. An 156 * {@code int} value represents all Unicode code points, 157 * including supplementary code points. The lower (least significant) 158 * 21 bits of {@code int} are used to represent Unicode code 159 * points and the upper (most significant) 11 bits must be zero. 160 * Unless otherwise specified, the behavior with respect to 161 * supplementary characters and surrogate {@code char} values is 162 * as follows: 163 * 164 * <ul> 165 * <li>The methods that only accept a {@code char} value cannot support 166 * supplementary characters. They treat {@code char} values from the 167 * surrogate ranges as undefined characters. For example, 168 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though 169 * this specific value if followed by any low-surrogate value in a string 170 * would represent a letter. 171 * 172 * <li>The methods that accept an {@code int} value support all 173 * Unicode characters, including supplementary characters. For 174 * example, {@code Character.isLetter(0x2F81A)} returns 175 * {@code true} because the code point value represents a letter 176 * (a CJK ideograph). 177 * </ul> 178 * 179 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 180 * used for character values in the range between U+0000 and U+10FFFF, 181 * and <em>Unicode code unit</em> is used for 16-bit 182 * {@code char} values that are code units of the <em>UTF-16</em> 183 * encoding. For more information on Unicode terminology, refer to the 184 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 185 * 186 * <!-- Android-removed: paragraph on ValueBased 187 * <p>This is a <a href="{@docRoot}/java.base/java/lang/doc-files/ValueBased.html">value-based</a> 188 * class; programmers should treat instances that are 189 * {@linkplain #equals(Object) equal} as interchangeable and should not 190 * use instances for synchronization, or unpredictable behavior may 191 * occur. For example, in a future release, synchronization may fail. 192 * --> 193 * 194 * @spec https://www.unicode.org/reports/tr27 Unicode 3.1.0 195 * @author Lee Boynton 196 * @author Guy Steele 197 * @author Akira Tanaka 198 * @author Martin Buchholz 199 * @author Ulf Zibis 200 * @since 1.0 201 */ 202 @jdk.internal.ValueBased 203 public final 204 class Character implements java.io.Serializable, Comparable<Character>, Constable { 205 /** 206 * The minimum radix available for conversion to and from strings. 207 * The constant value of this field is the smallest value permitted 208 * for the radix argument in radix-conversion methods such as the 209 * {@code digit} method, the {@code forDigit} method, and the 210 * {@code toString} method of class {@code Integer}. 211 * 212 * @see Character#digit(char, int) 213 * @see Character#forDigit(int, int) 214 * @see Integer#toString(int, int) 215 * @see Integer#valueOf(String) 216 */ 217 public static final int MIN_RADIX = 2; 218 219 /** 220 * The maximum radix available for conversion to and from strings. 221 * The constant value of this field is the largest value permitted 222 * for the radix argument in radix-conversion methods such as the 223 * {@code digit} method, the {@code forDigit} method, and the 224 * {@code toString} method of class {@code Integer}. 225 * 226 * @see Character#digit(char, int) 227 * @see Character#forDigit(int, int) 228 * @see Integer#toString(int, int) 229 * @see Integer#valueOf(String) 230 */ 231 public static final int MAX_RADIX = 36; 232 233 /** 234 * The constant value of this field is the smallest value of type 235 * {@code char}, {@code '\u005Cu0000'}. 236 * 237 * @since 1.0.2 238 */ 239 public static final char MIN_VALUE = '\u0000'; 240 241 /** 242 * The constant value of this field is the largest value of type 243 * {@code char}, {@code '\u005CuFFFF'}. 244 * 245 * @since 1.0.2 246 */ 247 public static final char MAX_VALUE = '\uFFFF'; 248 249 /** 250 * The {@code Class} instance representing the primitive type 251 * {@code char}. 252 * 253 * @since 1.1 254 */ 255 @SuppressWarnings("unchecked") 256 public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char"); 257 258 /* 259 * Normative general types 260 */ 261 262 /* 263 * General character types 264 */ 265 266 /** 267 * General category "Cn" in the Unicode specification. 268 * @since 1.1 269 */ 270 public static final byte UNASSIGNED = 0; 271 272 /** 273 * General category "Lu" in the Unicode specification. 274 * @since 1.1 275 */ 276 public static final byte UPPERCASE_LETTER = 1; 277 278 /** 279 * General category "Ll" in the Unicode specification. 280 * @since 1.1 281 */ 282 public static final byte LOWERCASE_LETTER = 2; 283 284 /** 285 * General category "Lt" in the Unicode specification. 286 * @since 1.1 287 */ 288 public static final byte TITLECASE_LETTER = 3; 289 290 /** 291 * General category "Lm" in the Unicode specification. 292 * @since 1.1 293 */ 294 public static final byte MODIFIER_LETTER = 4; 295 296 /** 297 * General category "Lo" in the Unicode specification. 298 * @since 1.1 299 */ 300 public static final byte OTHER_LETTER = 5; 301 302 /** 303 * General category "Mn" in the Unicode specification. 304 * @since 1.1 305 */ 306 public static final byte NON_SPACING_MARK = 6; 307 308 /** 309 * General category "Me" in the Unicode specification. 310 * @since 1.1 311 */ 312 public static final byte ENCLOSING_MARK = 7; 313 314 /** 315 * General category "Mc" in the Unicode specification. 316 * @since 1.1 317 */ 318 public static final byte COMBINING_SPACING_MARK = 8; 319 320 /** 321 * General category "Nd" in the Unicode specification. 322 * @since 1.1 323 */ 324 public static final byte DECIMAL_DIGIT_NUMBER = 9; 325 326 /** 327 * General category "Nl" in the Unicode specification. 328 * @since 1.1 329 */ 330 public static final byte LETTER_NUMBER = 10; 331 332 /** 333 * General category "No" in the Unicode specification. 334 * @since 1.1 335 */ 336 public static final byte OTHER_NUMBER = 11; 337 338 /** 339 * General category "Zs" in the Unicode specification. 340 * @since 1.1 341 */ 342 public static final byte SPACE_SEPARATOR = 12; 343 344 /** 345 * General category "Zl" in the Unicode specification. 346 * @since 1.1 347 */ 348 public static final byte LINE_SEPARATOR = 13; 349 350 /** 351 * General category "Zp" in the Unicode specification. 352 * @since 1.1 353 */ 354 public static final byte PARAGRAPH_SEPARATOR = 14; 355 356 /** 357 * General category "Cc" in the Unicode specification. 358 * @since 1.1 359 */ 360 public static final byte CONTROL = 15; 361 362 /** 363 * General category "Cf" in the Unicode specification. 364 * @since 1.1 365 */ 366 public static final byte FORMAT = 16; 367 368 /** 369 * General category "Co" in the Unicode specification. 370 * @since 1.1 371 */ 372 public static final byte PRIVATE_USE = 18; 373 374 /** 375 * General category "Cs" in the Unicode specification. 376 * @since 1.1 377 */ 378 public static final byte SURROGATE = 19; 379 380 /** 381 * General category "Pd" in the Unicode specification. 382 * @since 1.1 383 */ 384 public static final byte DASH_PUNCTUATION = 20; 385 386 /** 387 * General category "Ps" in the Unicode specification. 388 * @since 1.1 389 */ 390 public static final byte START_PUNCTUATION = 21; 391 392 /** 393 * General category "Pe" in the Unicode specification. 394 * @since 1.1 395 */ 396 public static final byte END_PUNCTUATION = 22; 397 398 /** 399 * General category "Pc" in the Unicode specification. 400 * @since 1.1 401 */ 402 public static final byte CONNECTOR_PUNCTUATION = 23; 403 404 /** 405 * General category "Po" in the Unicode specification. 406 * @since 1.1 407 */ 408 public static final byte OTHER_PUNCTUATION = 24; 409 410 /** 411 * General category "Sm" in the Unicode specification. 412 * @since 1.1 413 */ 414 public static final byte MATH_SYMBOL = 25; 415 416 /** 417 * General category "Sc" in the Unicode specification. 418 * @since 1.1 419 */ 420 public static final byte CURRENCY_SYMBOL = 26; 421 422 /** 423 * General category "Sk" in the Unicode specification. 424 * @since 1.1 425 */ 426 public static final byte MODIFIER_SYMBOL = 27; 427 428 /** 429 * General category "So" in the Unicode specification. 430 * @since 1.1 431 */ 432 public static final byte OTHER_SYMBOL = 28; 433 434 /** 435 * General category "Pi" in the Unicode specification. 436 * @since 1.4 437 */ 438 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 439 440 /** 441 * General category "Pf" in the Unicode specification. 442 * @since 1.4 443 */ 444 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 445 446 /** 447 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 448 */ 449 static final int ERROR = 0xFFFFFFFF; 450 451 452 /** 453 * Undefined bidirectional character type. Undefined {@code char} 454 * values have undefined directionality in the Unicode specification. 455 * @since 1.4 456 */ 457 public static final byte DIRECTIONALITY_UNDEFINED = -1; 458 459 /** 460 * Strong bidirectional character type "L" in the Unicode specification. 461 * @since 1.4 462 */ 463 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 464 465 /** 466 * Strong bidirectional character type "R" in the Unicode specification. 467 * @since 1.4 468 */ 469 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 470 471 /** 472 * Strong bidirectional character type "AL" in the Unicode specification. 473 * @since 1.4 474 */ 475 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 476 477 /** 478 * Weak bidirectional character type "EN" in the Unicode specification. 479 * @since 1.4 480 */ 481 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 482 483 /** 484 * Weak bidirectional character type "ES" in the Unicode specification. 485 * @since 1.4 486 */ 487 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 488 489 /** 490 * Weak bidirectional character type "ET" in the Unicode specification. 491 * @since 1.4 492 */ 493 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 494 495 /** 496 * Weak bidirectional character type "AN" in the Unicode specification. 497 * @since 1.4 498 */ 499 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 500 501 /** 502 * Weak bidirectional character type "CS" in the Unicode specification. 503 * @since 1.4 504 */ 505 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 506 507 /** 508 * Weak bidirectional character type "NSM" in the Unicode specification. 509 * @since 1.4 510 */ 511 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 512 513 /** 514 * Weak bidirectional character type "BN" in the Unicode specification. 515 * @since 1.4 516 */ 517 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 518 519 /** 520 * Neutral bidirectional character type "B" in the Unicode specification. 521 * @since 1.4 522 */ 523 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 524 525 /** 526 * Neutral bidirectional character type "S" in the Unicode specification. 527 * @since 1.4 528 */ 529 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 530 531 /** 532 * Neutral bidirectional character type "WS" in the Unicode specification. 533 * @since 1.4 534 */ 535 public static final byte DIRECTIONALITY_WHITESPACE = 12; 536 537 /** 538 * Neutral bidirectional character type "ON" in the Unicode specification. 539 * @since 1.4 540 */ 541 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 542 543 /** 544 * Strong bidirectional character type "LRE" in the Unicode specification. 545 * @since 1.4 546 */ 547 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 548 549 /** 550 * Strong bidirectional character type "LRO" in the Unicode specification. 551 * @since 1.4 552 */ 553 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 554 555 /** 556 * Strong bidirectional character type "RLE" in the Unicode specification. 557 * @since 1.4 558 */ 559 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 560 561 /** 562 * Strong bidirectional character type "RLO" in the Unicode specification. 563 * @since 1.4 564 */ 565 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 566 567 /** 568 * Weak bidirectional character type "PDF" in the Unicode specification. 569 * @since 1.4 570 */ 571 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 572 573 /** 574 * Weak bidirectional character type "LRI" in the Unicode specification. 575 * @since 9 576 */ 577 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19; 578 579 /** 580 * Weak bidirectional character type "RLI" in the Unicode specification. 581 * @since 9 582 */ 583 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20; 584 585 /** 586 * Weak bidirectional character type "FSI" in the Unicode specification. 587 * @since 9 588 */ 589 public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21; 590 591 /** 592 * Weak bidirectional character type "PDI" in the Unicode specification. 593 * @since 9 594 */ 595 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22; 596 597 /** 598 * The minimum value of a 599 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 600 * Unicode high-surrogate code unit</a> 601 * in the UTF-16 encoding, constant {@code '\u005CuD800'}. 602 * A high-surrogate is also known as a <i>leading-surrogate</i>. 603 * 604 * @since 1.5 605 */ 606 public static final char MIN_HIGH_SURROGATE = '\uD800'; 607 608 /** 609 * The maximum value of a 610 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 611 * Unicode high-surrogate code unit</a> 612 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}. 613 * A high-surrogate is also known as a <i>leading-surrogate</i>. 614 * 615 * @since 1.5 616 */ 617 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 618 619 /** 620 * The minimum value of a 621 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 622 * Unicode low-surrogate code unit</a> 623 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}. 624 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 625 * 626 * @since 1.5 627 */ 628 public static final char MIN_LOW_SURROGATE = '\uDC00'; 629 630 /** 631 * The maximum value of a 632 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 633 * Unicode low-surrogate code unit</a> 634 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}. 635 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 636 * 637 * @since 1.5 638 */ 639 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 640 641 /** 642 * The minimum value of a Unicode surrogate code unit in the 643 * UTF-16 encoding, constant {@code '\u005CuD800'}. 644 * 645 * @since 1.5 646 */ 647 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 648 649 /** 650 * The maximum value of a Unicode surrogate code unit in the 651 * UTF-16 encoding, constant {@code '\u005CuDFFF'}. 652 * 653 * @since 1.5 654 */ 655 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 656 657 /** 658 * The minimum value of a 659 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 660 * Unicode supplementary code point</a>, constant {@code U+10000}. 661 * 662 * @since 1.5 663 */ 664 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 665 666 /** 667 * The minimum value of a 668 * <a href="http://www.unicode.org/glossary/#code_point"> 669 * Unicode code point</a>, constant {@code U+0000}. 670 * 671 * @since 1.5 672 */ 673 public static final int MIN_CODE_POINT = 0x000000; 674 675 /** 676 * The maximum value of a 677 * <a href="http://www.unicode.org/glossary/#code_point"> 678 * Unicode code point</a>, constant {@code U+10FFFF}. 679 * 680 * @since 1.5 681 */ 682 public static final int MAX_CODE_POINT = 0X10FFFF; 683 684 // BEGIN Android-added: Use ICU. 685 // The indices in int[] DIRECTIONALITY are based on icu4c's u_charDirection(), 686 // accessed via getDirectionalityImpl(), implemented in Character.cpp. 687 private static final byte[] DIRECTIONALITY = new byte[] { 688 DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT, 689 DIRECTIONALITY_EUROPEAN_NUMBER, 690 DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, 691 DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, 692 DIRECTIONALITY_ARABIC_NUMBER, 693 DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, 694 DIRECTIONALITY_PARAGRAPH_SEPARATOR, 695 DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE, 696 DIRECTIONALITY_OTHER_NEUTRALS, 697 DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING, 698 DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, 699 DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, 700 DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, 701 DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE, 702 DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, 703 DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL }; 704 // END Android-added: Use ICU. 705 706 /** 707 * Returns an {@link Optional} containing the nominal descriptor for this 708 * instance. 709 * 710 * @return an {@link Optional} describing the {@linkplain Character} instance 711 * @since 15 712 * @hide 713 */ 714 @Override describeConstable()715 public Optional<DynamicConstantDesc<Character>> describeConstable() { 716 return Optional.of(DynamicConstantDesc.ofNamed(BSM_EXPLICIT_CAST, DEFAULT_NAME, CD_char, (int) value)); 717 } 718 719 /** 720 * Instances of this class represent particular subsets of the Unicode 721 * character set. The only family of subsets defined in the 722 * {@code Character} class is {@link Character.UnicodeBlock}. 723 * Other portions of the Java API may define other subsets for their 724 * own purposes. 725 * 726 * @since 1.2 727 */ 728 public static class Subset { 729 730 private String name; 731 732 /** 733 * Constructs a new {@code Subset} instance. 734 * 735 * @param name The name of this subset 736 * @throws NullPointerException if name is {@code null} 737 */ Subset(String name)738 protected Subset(String name) { 739 if (name == null) { 740 throw new NullPointerException("name"); 741 } 742 this.name = name; 743 } 744 745 /** 746 * Compares two {@code Subset} objects for equality. 747 * This method returns {@code true} if and only if 748 * {@code this} and the argument refer to the same 749 * object; since this method is {@code final}, this 750 * guarantee holds for all subclasses. 751 */ equals(Object obj)752 public final boolean equals(Object obj) { 753 return (this == obj); 754 } 755 756 /** 757 * Returns the standard hash code as defined by the 758 * {@link Object#hashCode} method. This method 759 * is {@code final} in order to ensure that the 760 * {@code equals} and {@code hashCode} methods will 761 * be consistent in all subclasses. 762 */ hashCode()763 public final int hashCode() { 764 return super.hashCode(); 765 } 766 767 /** 768 * Returns the name of this subset. 769 */ toString()770 public final String toString() { 771 return name; 772 } 773 } 774 775 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt 776 // for the latest specification of Unicode Blocks. 777 778 /** 779 * A family of character subsets representing the character blocks in the 780 * Unicode specification. Character blocks generally define characters 781 * used for a specific script or purpose. A character is contained by 782 * at most one Unicode block. 783 * 784 * @since 1.2 785 */ 786 public static final class UnicodeBlock extends Subset { 787 /** 788 * NUM_ENTITIES should match the total number of UnicodeBlocks. 789 * It should be adjusted whenever the Unicode Character Database 790 * is upgraded. 791 */ 792 private static final int NUM_ENTITIES = 756; 793 private static Map<String, UnicodeBlock> map = HashMap.newHashMap(NUM_ENTITIES); 794 795 /** 796 * Creates a UnicodeBlock with the given identifier name. 797 * This name must be the same as the block identifier. 798 */ UnicodeBlock(String idName)799 private UnicodeBlock(String idName) { 800 super(idName); 801 map.put(idName, this); 802 } 803 804 // BEGIN Android-added: ICU consistency: Don't map deprecated SURROGATES_AREA. b/26140229 805 // Add a (String, boolean) constructor for use by SURROGATES_AREA. UnicodeBlock(String idName, boolean isMap)806 private UnicodeBlock(String idName, boolean isMap) { 807 super(idName); 808 if (isMap) { 809 map.put(idName, this); 810 } 811 } 812 // END Android-added: ICU consistency: Don't map deprecated SURROGATES_AREA. b/26140229 813 814 /** 815 * Creates a UnicodeBlock with the given identifier name and 816 * alias name. 817 */ UnicodeBlock(String idName, String alias)818 private UnicodeBlock(String idName, String alias) { 819 this(idName); 820 map.put(alias, this); 821 } 822 823 /** 824 * Creates a UnicodeBlock with the given identifier name and 825 * alias names. 826 */ UnicodeBlock(String idName, String... aliases)827 private UnicodeBlock(String idName, String... aliases) { 828 this(idName); 829 for (String alias : aliases) 830 map.put(alias, this); 831 } 832 833 /** 834 * Constant for the "Basic Latin" Unicode character block. 835 * @since 1.2 836 */ 837 public static final UnicodeBlock BASIC_LATIN = 838 new UnicodeBlock("BASIC_LATIN", 839 "BASIC LATIN", 840 "BASICLATIN"); 841 842 /** 843 * Constant for the "Latin-1 Supplement" Unicode character block. 844 * @since 1.2 845 */ 846 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 847 new UnicodeBlock("LATIN_1_SUPPLEMENT", 848 "LATIN-1 SUPPLEMENT", 849 "LATIN-1SUPPLEMENT"); 850 851 /** 852 * Constant for the "Latin Extended-A" Unicode character block. 853 * @since 1.2 854 */ 855 public static final UnicodeBlock LATIN_EXTENDED_A = 856 new UnicodeBlock("LATIN_EXTENDED_A", 857 "LATIN EXTENDED-A", 858 "LATINEXTENDED-A"); 859 860 /** 861 * Constant for the "Latin Extended-B" Unicode character block. 862 * @since 1.2 863 */ 864 public static final UnicodeBlock LATIN_EXTENDED_B = 865 new UnicodeBlock("LATIN_EXTENDED_B", 866 "LATIN EXTENDED-B", 867 "LATINEXTENDED-B"); 868 869 /** 870 * Constant for the "IPA Extensions" Unicode character block. 871 * @since 1.2 872 */ 873 public static final UnicodeBlock IPA_EXTENSIONS = 874 new UnicodeBlock("IPA_EXTENSIONS", 875 "IPA EXTENSIONS", 876 "IPAEXTENSIONS"); 877 878 /** 879 * Constant for the "Spacing Modifier Letters" Unicode character block. 880 * @since 1.2 881 */ 882 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 883 new UnicodeBlock("SPACING_MODIFIER_LETTERS", 884 "SPACING MODIFIER LETTERS", 885 "SPACINGMODIFIERLETTERS"); 886 887 /** 888 * Constant for the "Combining Diacritical Marks" Unicode character block. 889 * @since 1.2 890 */ 891 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 892 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 893 "COMBINING DIACRITICAL MARKS", 894 "COMBININGDIACRITICALMARKS"); 895 896 /** 897 * Constant for the "Greek and Coptic" Unicode character block. 898 * <p> 899 * This block was previously known as the "Greek" block. 900 * 901 * @since 1.2 902 */ 903 public static final UnicodeBlock GREEK = 904 new UnicodeBlock("GREEK", 905 "GREEK AND COPTIC", 906 "GREEKANDCOPTIC"); 907 908 /** 909 * Constant for the "Cyrillic" Unicode character block. 910 * @since 1.2 911 */ 912 public static final UnicodeBlock CYRILLIC = 913 new UnicodeBlock("CYRILLIC"); 914 915 /** 916 * Constant for the "Armenian" Unicode character block. 917 * @since 1.2 918 */ 919 public static final UnicodeBlock ARMENIAN = 920 new UnicodeBlock("ARMENIAN"); 921 922 /** 923 * Constant for the "Hebrew" Unicode character block. 924 * @since 1.2 925 */ 926 public static final UnicodeBlock HEBREW = 927 new UnicodeBlock("HEBREW"); 928 929 /** 930 * Constant for the "Arabic" Unicode character block. 931 * @since 1.2 932 */ 933 public static final UnicodeBlock ARABIC = 934 new UnicodeBlock("ARABIC"); 935 936 /** 937 * Constant for the "Devanagari" Unicode character block. 938 * @since 1.2 939 */ 940 public static final UnicodeBlock DEVANAGARI = 941 new UnicodeBlock("DEVANAGARI"); 942 943 /** 944 * Constant for the "Bengali" Unicode character block. 945 * @since 1.2 946 */ 947 public static final UnicodeBlock BENGALI = 948 new UnicodeBlock("BENGALI"); 949 950 /** 951 * Constant for the "Gurmukhi" Unicode character block. 952 * @since 1.2 953 */ 954 public static final UnicodeBlock GURMUKHI = 955 new UnicodeBlock("GURMUKHI"); 956 957 /** 958 * Constant for the "Gujarati" Unicode character block. 959 * @since 1.2 960 */ 961 public static final UnicodeBlock GUJARATI = 962 new UnicodeBlock("GUJARATI"); 963 964 /** 965 * Constant for the "Oriya" Unicode character block. 966 * @since 1.2 967 */ 968 public static final UnicodeBlock ORIYA = 969 new UnicodeBlock("ORIYA"); 970 971 /** 972 * Constant for the "Tamil" Unicode character block. 973 * @since 1.2 974 */ 975 public static final UnicodeBlock TAMIL = 976 new UnicodeBlock("TAMIL"); 977 978 /** 979 * Constant for the "Telugu" Unicode character block. 980 * @since 1.2 981 */ 982 public static final UnicodeBlock TELUGU = 983 new UnicodeBlock("TELUGU"); 984 985 /** 986 * Constant for the "Kannada" Unicode character block. 987 * @since 1.2 988 */ 989 public static final UnicodeBlock KANNADA = 990 new UnicodeBlock("KANNADA"); 991 992 /** 993 * Constant for the "Malayalam" Unicode character block. 994 * @since 1.2 995 */ 996 public static final UnicodeBlock MALAYALAM = 997 new UnicodeBlock("MALAYALAM"); 998 999 /** 1000 * Constant for the "Thai" Unicode character block. 1001 * @since 1.2 1002 */ 1003 public static final UnicodeBlock THAI = 1004 new UnicodeBlock("THAI"); 1005 1006 /** 1007 * Constant for the "Lao" Unicode character block. 1008 * @since 1.2 1009 */ 1010 public static final UnicodeBlock LAO = 1011 new UnicodeBlock("LAO"); 1012 1013 /** 1014 * Constant for the "Tibetan" Unicode character block. 1015 * @since 1.2 1016 */ 1017 public static final UnicodeBlock TIBETAN = 1018 new UnicodeBlock("TIBETAN"); 1019 1020 /** 1021 * Constant for the "Georgian" Unicode character block. 1022 * @since 1.2 1023 */ 1024 public static final UnicodeBlock GEORGIAN = 1025 new UnicodeBlock("GEORGIAN"); 1026 1027 /** 1028 * Constant for the "Hangul Jamo" Unicode character block. 1029 * @since 1.2 1030 */ 1031 public static final UnicodeBlock HANGUL_JAMO = 1032 new UnicodeBlock("HANGUL_JAMO", 1033 "HANGUL JAMO", 1034 "HANGULJAMO"); 1035 1036 /** 1037 * Constant for the "Latin Extended Additional" Unicode character block. 1038 * @since 1.2 1039 */ 1040 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 1041 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 1042 "LATIN EXTENDED ADDITIONAL", 1043 "LATINEXTENDEDADDITIONAL"); 1044 1045 /** 1046 * Constant for the "Greek Extended" Unicode character block. 1047 * @since 1.2 1048 */ 1049 public static final UnicodeBlock GREEK_EXTENDED = 1050 new UnicodeBlock("GREEK_EXTENDED", 1051 "GREEK EXTENDED", 1052 "GREEKEXTENDED"); 1053 1054 /** 1055 * Constant for the "General Punctuation" Unicode character block. 1056 * @since 1.2 1057 */ 1058 public static final UnicodeBlock GENERAL_PUNCTUATION = 1059 new UnicodeBlock("GENERAL_PUNCTUATION", 1060 "GENERAL PUNCTUATION", 1061 "GENERALPUNCTUATION"); 1062 1063 /** 1064 * Constant for the "Superscripts and Subscripts" Unicode character 1065 * block. 1066 * @since 1.2 1067 */ 1068 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 1069 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 1070 "SUPERSCRIPTS AND SUBSCRIPTS", 1071 "SUPERSCRIPTSANDSUBSCRIPTS"); 1072 1073 /** 1074 * Constant for the "Currency Symbols" Unicode character block. 1075 * @since 1.2 1076 */ 1077 public static final UnicodeBlock CURRENCY_SYMBOLS = 1078 new UnicodeBlock("CURRENCY_SYMBOLS", 1079 "CURRENCY SYMBOLS", 1080 "CURRENCYSYMBOLS"); 1081 1082 /** 1083 * Constant for the "Combining Diacritical Marks for Symbols" Unicode 1084 * character block. 1085 * <p> 1086 * This block was previously known as "Combining Marks for Symbols". 1087 * @since 1.2 1088 */ 1089 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 1090 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 1091 "COMBINING DIACRITICAL MARKS FOR SYMBOLS", 1092 "COMBININGDIACRITICALMARKSFORSYMBOLS", 1093 "COMBINING MARKS FOR SYMBOLS", 1094 "COMBININGMARKSFORSYMBOLS"); 1095 1096 /** 1097 * Constant for the "Letterlike Symbols" Unicode character block. 1098 * @since 1.2 1099 */ 1100 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 1101 new UnicodeBlock("LETTERLIKE_SYMBOLS", 1102 "LETTERLIKE SYMBOLS", 1103 "LETTERLIKESYMBOLS"); 1104 1105 /** 1106 * Constant for the "Number Forms" Unicode character block. 1107 * @since 1.2 1108 */ 1109 public static final UnicodeBlock NUMBER_FORMS = 1110 new UnicodeBlock("NUMBER_FORMS", 1111 "NUMBER FORMS", 1112 "NUMBERFORMS"); 1113 1114 /** 1115 * Constant for the "Arrows" Unicode character block. 1116 * @since 1.2 1117 */ 1118 public static final UnicodeBlock ARROWS = 1119 new UnicodeBlock("ARROWS"); 1120 1121 /** 1122 * Constant for the "Mathematical Operators" Unicode character block. 1123 * @since 1.2 1124 */ 1125 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 1126 new UnicodeBlock("MATHEMATICAL_OPERATORS", 1127 "MATHEMATICAL OPERATORS", 1128 "MATHEMATICALOPERATORS"); 1129 1130 /** 1131 * Constant for the "Miscellaneous Technical" Unicode character block. 1132 * @since 1.2 1133 */ 1134 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 1135 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 1136 "MISCELLANEOUS TECHNICAL", 1137 "MISCELLANEOUSTECHNICAL"); 1138 1139 /** 1140 * Constant for the "Control Pictures" Unicode character block. 1141 * @since 1.2 1142 */ 1143 public static final UnicodeBlock CONTROL_PICTURES = 1144 new UnicodeBlock("CONTROL_PICTURES", 1145 "CONTROL PICTURES", 1146 "CONTROLPICTURES"); 1147 1148 /** 1149 * Constant for the "Optical Character Recognition" Unicode character block. 1150 * @since 1.2 1151 */ 1152 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 1153 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 1154 "OPTICAL CHARACTER RECOGNITION", 1155 "OPTICALCHARACTERRECOGNITION"); 1156 1157 /** 1158 * Constant for the "Enclosed Alphanumerics" Unicode character block. 1159 * @since 1.2 1160 */ 1161 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1162 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 1163 "ENCLOSED ALPHANUMERICS", 1164 "ENCLOSEDALPHANUMERICS"); 1165 1166 /** 1167 * Constant for the "Box Drawing" Unicode character block. 1168 * @since 1.2 1169 */ 1170 public static final UnicodeBlock BOX_DRAWING = 1171 new UnicodeBlock("BOX_DRAWING", 1172 "BOX DRAWING", 1173 "BOXDRAWING"); 1174 1175 /** 1176 * Constant for the "Block Elements" Unicode character block. 1177 * @since 1.2 1178 */ 1179 public static final UnicodeBlock BLOCK_ELEMENTS = 1180 new UnicodeBlock("BLOCK_ELEMENTS", 1181 "BLOCK ELEMENTS", 1182 "BLOCKELEMENTS"); 1183 1184 /** 1185 * Constant for the "Geometric Shapes" Unicode character block. 1186 * @since 1.2 1187 */ 1188 public static final UnicodeBlock GEOMETRIC_SHAPES = 1189 new UnicodeBlock("GEOMETRIC_SHAPES", 1190 "GEOMETRIC SHAPES", 1191 "GEOMETRICSHAPES"); 1192 1193 /** 1194 * Constant for the "Miscellaneous Symbols" Unicode character block. 1195 * @since 1.2 1196 */ 1197 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1198 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 1199 "MISCELLANEOUS SYMBOLS", 1200 "MISCELLANEOUSSYMBOLS"); 1201 1202 /** 1203 * Constant for the "Dingbats" Unicode character block. 1204 * @since 1.2 1205 */ 1206 public static final UnicodeBlock DINGBATS = 1207 new UnicodeBlock("DINGBATS"); 1208 1209 /** 1210 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1211 * @since 1.2 1212 */ 1213 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1214 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 1215 "CJK SYMBOLS AND PUNCTUATION", 1216 "CJKSYMBOLSANDPUNCTUATION"); 1217 1218 /** 1219 * Constant for the "Hiragana" Unicode character block. 1220 * @since 1.2 1221 */ 1222 public static final UnicodeBlock HIRAGANA = 1223 new UnicodeBlock("HIRAGANA"); 1224 1225 /** 1226 * Constant for the "Katakana" Unicode character block. 1227 * @since 1.2 1228 */ 1229 public static final UnicodeBlock KATAKANA = 1230 new UnicodeBlock("KATAKANA"); 1231 1232 /** 1233 * Constant for the "Bopomofo" Unicode character block. 1234 * @since 1.2 1235 */ 1236 public static final UnicodeBlock BOPOMOFO = 1237 new UnicodeBlock("BOPOMOFO"); 1238 1239 /** 1240 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1241 * @since 1.2 1242 */ 1243 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1244 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 1245 "HANGUL COMPATIBILITY JAMO", 1246 "HANGULCOMPATIBILITYJAMO"); 1247 1248 /** 1249 * Constant for the "Kanbun" Unicode character block. 1250 * @since 1.2 1251 */ 1252 public static final UnicodeBlock KANBUN = 1253 new UnicodeBlock("KANBUN"); 1254 1255 /** 1256 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1257 * @since 1.2 1258 */ 1259 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1260 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1261 "ENCLOSED CJK LETTERS AND MONTHS", 1262 "ENCLOSEDCJKLETTERSANDMONTHS"); 1263 1264 /** 1265 * Constant for the "CJK Compatibility" Unicode character block. 1266 * @since 1.2 1267 */ 1268 public static final UnicodeBlock CJK_COMPATIBILITY = 1269 new UnicodeBlock("CJK_COMPATIBILITY", 1270 "CJK COMPATIBILITY", 1271 "CJKCOMPATIBILITY"); 1272 1273 /** 1274 * Constant for the "CJK Unified Ideographs" Unicode character block. 1275 * @since 1.2 1276 */ 1277 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1278 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 1279 "CJK UNIFIED IDEOGRAPHS", 1280 "CJKUNIFIEDIDEOGRAPHS"); 1281 1282 /** 1283 * Constant for the "Hangul Syllables" Unicode character block. 1284 * @since 1.2 1285 */ 1286 public static final UnicodeBlock HANGUL_SYLLABLES = 1287 new UnicodeBlock("HANGUL_SYLLABLES", 1288 "HANGUL SYLLABLES", 1289 "HANGULSYLLABLES"); 1290 1291 /** 1292 * Constant for the "Private Use Area" Unicode character block. 1293 * @since 1.2 1294 */ 1295 public static final UnicodeBlock PRIVATE_USE_AREA = 1296 new UnicodeBlock("PRIVATE_USE_AREA", 1297 "PRIVATE USE AREA", 1298 "PRIVATEUSEAREA"); 1299 1300 /** 1301 * Constant for the "CJK Compatibility Ideographs" Unicode character 1302 * block. 1303 * @since 1.2 1304 */ 1305 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1306 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1307 "CJK COMPATIBILITY IDEOGRAPHS", 1308 "CJKCOMPATIBILITYIDEOGRAPHS"); 1309 1310 /** 1311 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1312 * @since 1.2 1313 */ 1314 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1315 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 1316 "ALPHABETIC PRESENTATION FORMS", 1317 "ALPHABETICPRESENTATIONFORMS"); 1318 1319 /** 1320 * Constant for the "Arabic Presentation Forms-A" Unicode character 1321 * block. 1322 * @since 1.2 1323 */ 1324 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1325 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 1326 "ARABIC PRESENTATION FORMS-A", 1327 "ARABICPRESENTATIONFORMS-A"); 1328 1329 /** 1330 * Constant for the "Combining Half Marks" Unicode character block. 1331 * @since 1.2 1332 */ 1333 public static final UnicodeBlock COMBINING_HALF_MARKS = 1334 new UnicodeBlock("COMBINING_HALF_MARKS", 1335 "COMBINING HALF MARKS", 1336 "COMBININGHALFMARKS"); 1337 1338 /** 1339 * Constant for the "CJK Compatibility Forms" Unicode character block. 1340 * @since 1.2 1341 */ 1342 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1343 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 1344 "CJK COMPATIBILITY FORMS", 1345 "CJKCOMPATIBILITYFORMS"); 1346 1347 /** 1348 * Constant for the "Small Form Variants" Unicode character block. 1349 * @since 1.2 1350 */ 1351 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1352 new UnicodeBlock("SMALL_FORM_VARIANTS", 1353 "SMALL FORM VARIANTS", 1354 "SMALLFORMVARIANTS"); 1355 1356 /** 1357 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1358 * @since 1.2 1359 */ 1360 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1361 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 1362 "ARABIC PRESENTATION FORMS-B", 1363 "ARABICPRESENTATIONFORMS-B"); 1364 1365 /** 1366 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character 1367 * block. 1368 * @since 1.2 1369 */ 1370 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1371 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1372 "HALFWIDTH AND FULLWIDTH FORMS", 1373 "HALFWIDTHANDFULLWIDTHFORMS"); 1374 1375 /** 1376 * Constant for the "Specials" Unicode character block. 1377 * @since 1.2 1378 */ 1379 public static final UnicodeBlock SPECIALS = 1380 new UnicodeBlock("SPECIALS"); 1381 1382 /** 1383 * @deprecated 1384 * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES}, 1385 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}. 1386 * These constants match the block definitions of the Unicode Standard. 1387 * The {@link #of(char)} and {@link #of(int)} methods return the 1388 * standard constants. 1389 */ 1390 @Deprecated(since="1.5") 1391 public static final UnicodeBlock SURROGATES_AREA = 1392 // Android-changed: ICU consistency: Don't map deprecated SURROGATES_AREA. b/26140229 1393 // new UnicodeBlock("SURROGATES_AREA"); 1394 new UnicodeBlock("SURROGATES_AREA", false); 1395 1396 /** 1397 * Constant for the "Syriac" Unicode character block. 1398 * @since 1.4 1399 */ 1400 public static final UnicodeBlock SYRIAC = 1401 new UnicodeBlock("SYRIAC"); 1402 1403 /** 1404 * Constant for the "Thaana" Unicode character block. 1405 * @since 1.4 1406 */ 1407 public static final UnicodeBlock THAANA = 1408 new UnicodeBlock("THAANA"); 1409 1410 /** 1411 * Constant for the "Sinhala" Unicode character block. 1412 * @since 1.4 1413 */ 1414 public static final UnicodeBlock SINHALA = 1415 new UnicodeBlock("SINHALA"); 1416 1417 /** 1418 * Constant for the "Myanmar" Unicode character block. 1419 * @since 1.4 1420 */ 1421 public static final UnicodeBlock MYANMAR = 1422 new UnicodeBlock("MYANMAR"); 1423 1424 /** 1425 * Constant for the "Ethiopic" Unicode character block. 1426 * @since 1.4 1427 */ 1428 public static final UnicodeBlock ETHIOPIC = 1429 new UnicodeBlock("ETHIOPIC"); 1430 1431 /** 1432 * Constant for the "Cherokee" Unicode character block. 1433 * @since 1.4 1434 */ 1435 public static final UnicodeBlock CHEROKEE = 1436 new UnicodeBlock("CHEROKEE"); 1437 1438 /** 1439 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1440 * @since 1.4 1441 */ 1442 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1443 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1444 "UNIFIED CANADIAN ABORIGINAL SYLLABICS", 1445 "UNIFIEDCANADIANABORIGINALSYLLABICS"); 1446 1447 /** 1448 * Constant for the "Ogham" Unicode character block. 1449 * @since 1.4 1450 */ 1451 public static final UnicodeBlock OGHAM = 1452 new UnicodeBlock("OGHAM"); 1453 1454 /** 1455 * Constant for the "Runic" Unicode character block. 1456 * @since 1.4 1457 */ 1458 public static final UnicodeBlock RUNIC = 1459 new UnicodeBlock("RUNIC"); 1460 1461 /** 1462 * Constant for the "Khmer" Unicode character block. 1463 * @since 1.4 1464 */ 1465 public static final UnicodeBlock KHMER = 1466 new UnicodeBlock("KHMER"); 1467 1468 /** 1469 * Constant for the "Mongolian" Unicode character block. 1470 * @since 1.4 1471 */ 1472 public static final UnicodeBlock MONGOLIAN = 1473 new UnicodeBlock("MONGOLIAN"); 1474 1475 /** 1476 * Constant for the "Braille Patterns" Unicode character block. 1477 * @since 1.4 1478 */ 1479 public static final UnicodeBlock BRAILLE_PATTERNS = 1480 new UnicodeBlock("BRAILLE_PATTERNS", 1481 "BRAILLE PATTERNS", 1482 "BRAILLEPATTERNS"); 1483 1484 /** 1485 * Constant for the "CJK Radicals Supplement" Unicode character block. 1486 * @since 1.4 1487 */ 1488 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1489 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 1490 "CJK RADICALS SUPPLEMENT", 1491 "CJKRADICALSSUPPLEMENT"); 1492 1493 /** 1494 * Constant for the "Kangxi Radicals" Unicode character block. 1495 * @since 1.4 1496 */ 1497 public static final UnicodeBlock KANGXI_RADICALS = 1498 new UnicodeBlock("KANGXI_RADICALS", 1499 "KANGXI RADICALS", 1500 "KANGXIRADICALS"); 1501 1502 /** 1503 * Constant for the "Ideographic Description Characters" Unicode character block. 1504 * @since 1.4 1505 */ 1506 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1507 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1508 "IDEOGRAPHIC DESCRIPTION CHARACTERS", 1509 "IDEOGRAPHICDESCRIPTIONCHARACTERS"); 1510 1511 /** 1512 * Constant for the "Bopomofo Extended" Unicode character block. 1513 * @since 1.4 1514 */ 1515 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1516 new UnicodeBlock("BOPOMOFO_EXTENDED", 1517 "BOPOMOFO EXTENDED", 1518 "BOPOMOFOEXTENDED"); 1519 1520 /** 1521 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1522 * @since 1.4 1523 */ 1524 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1525 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1526 "CJK UNIFIED IDEOGRAPHS EXTENSION A", 1527 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA"); 1528 1529 /** 1530 * Constant for the "Yi Syllables" Unicode character block. 1531 * @since 1.4 1532 */ 1533 public static final UnicodeBlock YI_SYLLABLES = 1534 new UnicodeBlock("YI_SYLLABLES", 1535 "YI SYLLABLES", 1536 "YISYLLABLES"); 1537 1538 /** 1539 * Constant for the "Yi Radicals" Unicode character block. 1540 * @since 1.4 1541 */ 1542 public static final UnicodeBlock YI_RADICALS = 1543 new UnicodeBlock("YI_RADICALS", 1544 "YI RADICALS", 1545 "YIRADICALS"); 1546 1547 /** 1548 * Constant for the "Cyrillic Supplement" Unicode character block. 1549 * This block was previously known as the "Cyrillic Supplementary" block. 1550 * @since 1.5 1551 */ 1552 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1553 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1554 "CYRILLIC SUPPLEMENTARY", 1555 "CYRILLICSUPPLEMENTARY", 1556 "CYRILLIC SUPPLEMENT", 1557 "CYRILLICSUPPLEMENT"); 1558 1559 /** 1560 * Constant for the "Tagalog" Unicode character block. 1561 * @since 1.5 1562 */ 1563 public static final UnicodeBlock TAGALOG = 1564 new UnicodeBlock("TAGALOG"); 1565 1566 /** 1567 * Constant for the "Hanunoo" Unicode character block. 1568 * @since 1.5 1569 */ 1570 public static final UnicodeBlock HANUNOO = 1571 new UnicodeBlock("HANUNOO"); 1572 1573 /** 1574 * Constant for the "Buhid" Unicode character block. 1575 * @since 1.5 1576 */ 1577 public static final UnicodeBlock BUHID = 1578 new UnicodeBlock("BUHID"); 1579 1580 /** 1581 * Constant for the "Tagbanwa" Unicode character block. 1582 * @since 1.5 1583 */ 1584 public static final UnicodeBlock TAGBANWA = 1585 new UnicodeBlock("TAGBANWA"); 1586 1587 /** 1588 * Constant for the "Limbu" Unicode character block. 1589 * @since 1.5 1590 */ 1591 public static final UnicodeBlock LIMBU = 1592 new UnicodeBlock("LIMBU"); 1593 1594 /** 1595 * Constant for the "Tai Le" Unicode character block. 1596 * @since 1.5 1597 */ 1598 public static final UnicodeBlock TAI_LE = 1599 new UnicodeBlock("TAI_LE", 1600 "TAI LE", 1601 "TAILE"); 1602 1603 /** 1604 * Constant for the "Khmer Symbols" Unicode character block. 1605 * @since 1.5 1606 */ 1607 public static final UnicodeBlock KHMER_SYMBOLS = 1608 new UnicodeBlock("KHMER_SYMBOLS", 1609 "KHMER SYMBOLS", 1610 "KHMERSYMBOLS"); 1611 1612 /** 1613 * Constant for the "Phonetic Extensions" Unicode character block. 1614 * @since 1.5 1615 */ 1616 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1617 new UnicodeBlock("PHONETIC_EXTENSIONS", 1618 "PHONETIC EXTENSIONS", 1619 "PHONETICEXTENSIONS"); 1620 1621 /** 1622 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1623 * @since 1.5 1624 */ 1625 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1626 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1627 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A", 1628 "MISCELLANEOUSMATHEMATICALSYMBOLS-A"); 1629 1630 /** 1631 * Constant for the "Supplemental Arrows-A" Unicode character block. 1632 * @since 1.5 1633 */ 1634 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1635 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 1636 "SUPPLEMENTAL ARROWS-A", 1637 "SUPPLEMENTALARROWS-A"); 1638 1639 /** 1640 * Constant for the "Supplemental Arrows-B" Unicode character block. 1641 * @since 1.5 1642 */ 1643 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1644 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 1645 "SUPPLEMENTAL ARROWS-B", 1646 "SUPPLEMENTALARROWS-B"); 1647 1648 /** 1649 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode 1650 * character block. 1651 * @since 1.5 1652 */ 1653 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 1654 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1655 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B", 1656 "MISCELLANEOUSMATHEMATICALSYMBOLS-B"); 1657 1658 /** 1659 * Constant for the "Supplemental Mathematical Operators" Unicode 1660 * character block. 1661 * @since 1.5 1662 */ 1663 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1664 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1665 "SUPPLEMENTAL MATHEMATICAL OPERATORS", 1666 "SUPPLEMENTALMATHEMATICALOPERATORS"); 1667 1668 /** 1669 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character 1670 * block. 1671 * @since 1.5 1672 */ 1673 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1674 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1675 "MISCELLANEOUS SYMBOLS AND ARROWS", 1676 "MISCELLANEOUSSYMBOLSANDARROWS"); 1677 1678 /** 1679 * Constant for the "Katakana Phonetic Extensions" Unicode character 1680 * block. 1681 * @since 1.5 1682 */ 1683 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1684 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 1685 "KATAKANA PHONETIC EXTENSIONS", 1686 "KATAKANAPHONETICEXTENSIONS"); 1687 1688 /** 1689 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1690 * @since 1.5 1691 */ 1692 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1693 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 1694 "YIJING HEXAGRAM SYMBOLS", 1695 "YIJINGHEXAGRAMSYMBOLS"); 1696 1697 /** 1698 * Constant for the "Variation Selectors" Unicode character block. 1699 * @since 1.5 1700 */ 1701 public static final UnicodeBlock VARIATION_SELECTORS = 1702 new UnicodeBlock("VARIATION_SELECTORS", 1703 "VARIATION SELECTORS", 1704 "VARIATIONSELECTORS"); 1705 1706 /** 1707 * Constant for the "Linear B Syllabary" Unicode character block. 1708 * @since 1.5 1709 */ 1710 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1711 new UnicodeBlock("LINEAR_B_SYLLABARY", 1712 "LINEAR B SYLLABARY", 1713 "LINEARBSYLLABARY"); 1714 1715 /** 1716 * Constant for the "Linear B Ideograms" Unicode character block. 1717 * @since 1.5 1718 */ 1719 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1720 new UnicodeBlock("LINEAR_B_IDEOGRAMS", 1721 "LINEAR B IDEOGRAMS", 1722 "LINEARBIDEOGRAMS"); 1723 1724 /** 1725 * Constant for the "Aegean Numbers" Unicode character block. 1726 * @since 1.5 1727 */ 1728 public static final UnicodeBlock AEGEAN_NUMBERS = 1729 new UnicodeBlock("AEGEAN_NUMBERS", 1730 "AEGEAN NUMBERS", 1731 "AEGEANNUMBERS"); 1732 1733 /** 1734 * Constant for the "Old Italic" Unicode character block. 1735 * @since 1.5 1736 */ 1737 public static final UnicodeBlock OLD_ITALIC = 1738 new UnicodeBlock("OLD_ITALIC", 1739 "OLD ITALIC", 1740 "OLDITALIC"); 1741 1742 /** 1743 * Constant for the "Gothic" Unicode character block. 1744 * @since 1.5 1745 */ 1746 public static final UnicodeBlock GOTHIC = 1747 new UnicodeBlock("GOTHIC"); 1748 1749 /** 1750 * Constant for the "Ugaritic" Unicode character block. 1751 * @since 1.5 1752 */ 1753 public static final UnicodeBlock UGARITIC = 1754 new UnicodeBlock("UGARITIC"); 1755 1756 /** 1757 * Constant for the "Deseret" Unicode character block. 1758 * @since 1.5 1759 */ 1760 public static final UnicodeBlock DESERET = 1761 new UnicodeBlock("DESERET"); 1762 1763 /** 1764 * Constant for the "Shavian" Unicode character block. 1765 * @since 1.5 1766 */ 1767 public static final UnicodeBlock SHAVIAN = 1768 new UnicodeBlock("SHAVIAN"); 1769 1770 /** 1771 * Constant for the "Osmanya" Unicode character block. 1772 * @since 1.5 1773 */ 1774 public static final UnicodeBlock OSMANYA = 1775 new UnicodeBlock("OSMANYA"); 1776 1777 /** 1778 * Constant for the "Cypriot Syllabary" Unicode character block. 1779 * @since 1.5 1780 */ 1781 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1782 new UnicodeBlock("CYPRIOT_SYLLABARY", 1783 "CYPRIOT SYLLABARY", 1784 "CYPRIOTSYLLABARY"); 1785 1786 /** 1787 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1788 * @since 1.5 1789 */ 1790 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1791 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 1792 "BYZANTINE MUSICAL SYMBOLS", 1793 "BYZANTINEMUSICALSYMBOLS"); 1794 1795 /** 1796 * Constant for the "Musical Symbols" Unicode character block. 1797 * @since 1.5 1798 */ 1799 public static final UnicodeBlock MUSICAL_SYMBOLS = 1800 new UnicodeBlock("MUSICAL_SYMBOLS", 1801 "MUSICAL SYMBOLS", 1802 "MUSICALSYMBOLS"); 1803 1804 /** 1805 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1806 * @since 1.5 1807 */ 1808 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1809 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 1810 "TAI XUAN JING SYMBOLS", 1811 "TAIXUANJINGSYMBOLS"); 1812 1813 /** 1814 * Constant for the "Mathematical Alphanumeric Symbols" Unicode 1815 * character block. 1816 * @since 1.5 1817 */ 1818 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1819 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1820 "MATHEMATICAL ALPHANUMERIC SYMBOLS", 1821 "MATHEMATICALALPHANUMERICSYMBOLS"); 1822 1823 /** 1824 * Constant for the "CJK Unified Ideographs Extension B" Unicode 1825 * character block. 1826 * @since 1.5 1827 */ 1828 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1829 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1830 "CJK UNIFIED IDEOGRAPHS EXTENSION B", 1831 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB"); 1832 1833 /** 1834 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1835 * @since 1.5 1836 */ 1837 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1838 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1839 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT", 1840 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT"); 1841 1842 /** 1843 * Constant for the "Tags" Unicode character block. 1844 * @since 1.5 1845 */ 1846 public static final UnicodeBlock TAGS = 1847 new UnicodeBlock("TAGS"); 1848 1849 /** 1850 * Constant for the "Variation Selectors Supplement" Unicode character 1851 * block. 1852 * @since 1.5 1853 */ 1854 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1855 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 1856 "VARIATION SELECTORS SUPPLEMENT", 1857 "VARIATIONSELECTORSSUPPLEMENT"); 1858 1859 /** 1860 * Constant for the "Supplementary Private Use Area-A" Unicode character 1861 * block. 1862 * @since 1.5 1863 */ 1864 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1865 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1866 "SUPPLEMENTARY PRIVATE USE AREA-A", 1867 "SUPPLEMENTARYPRIVATEUSEAREA-A"); 1868 1869 /** 1870 * Constant for the "Supplementary Private Use Area-B" Unicode character 1871 * block. 1872 * @since 1.5 1873 */ 1874 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1875 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1876 "SUPPLEMENTARY PRIVATE USE AREA-B", 1877 "SUPPLEMENTARYPRIVATEUSEAREA-B"); 1878 1879 /** 1880 * Constant for the "High Surrogates" Unicode character block. 1881 * This block represents codepoint values in the high surrogate 1882 * range: U+D800 through U+DB7F 1883 * 1884 * @since 1.5 1885 */ 1886 public static final UnicodeBlock HIGH_SURROGATES = 1887 new UnicodeBlock("HIGH_SURROGATES", 1888 "HIGH SURROGATES", 1889 "HIGHSURROGATES"); 1890 1891 /** 1892 * Constant for the "High Private Use Surrogates" Unicode character 1893 * block. 1894 * This block represents codepoint values in the private use high 1895 * surrogate range: U+DB80 through U+DBFF 1896 * 1897 * @since 1.5 1898 */ 1899 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1900 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 1901 "HIGH PRIVATE USE SURROGATES", 1902 "HIGHPRIVATEUSESURROGATES"); 1903 1904 /** 1905 * Constant for the "Low Surrogates" Unicode character block. 1906 * This block represents codepoint values in the low surrogate 1907 * range: U+DC00 through U+DFFF 1908 * 1909 * @since 1.5 1910 */ 1911 public static final UnicodeBlock LOW_SURROGATES = 1912 new UnicodeBlock("LOW_SURROGATES", 1913 "LOW SURROGATES", 1914 "LOWSURROGATES"); 1915 1916 /** 1917 * Constant for the "Arabic Supplement" Unicode character block. 1918 * @since 1.7 1919 */ 1920 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1921 new UnicodeBlock("ARABIC_SUPPLEMENT", 1922 "ARABIC SUPPLEMENT", 1923 "ARABICSUPPLEMENT"); 1924 1925 /** 1926 * Constant for the "NKo" Unicode character block. 1927 * @since 1.7 1928 */ 1929 public static final UnicodeBlock NKO = 1930 new UnicodeBlock("NKO"); 1931 1932 /** 1933 * Constant for the "Samaritan" Unicode character block. 1934 * @since 1.7 1935 */ 1936 public static final UnicodeBlock SAMARITAN = 1937 new UnicodeBlock("SAMARITAN"); 1938 1939 /** 1940 * Constant for the "Mandaic" Unicode character block. 1941 * @since 1.7 1942 */ 1943 public static final UnicodeBlock MANDAIC = 1944 new UnicodeBlock("MANDAIC"); 1945 1946 /** 1947 * Constant for the "Ethiopic Supplement" Unicode character block. 1948 * @since 1.7 1949 */ 1950 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1951 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1952 "ETHIOPIC SUPPLEMENT", 1953 "ETHIOPICSUPPLEMENT"); 1954 1955 /** 1956 * Constant for the "Unified Canadian Aboriginal Syllabics Extended" 1957 * Unicode character block. 1958 * @since 1.7 1959 */ 1960 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1961 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1962 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED", 1963 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED"); 1964 1965 /** 1966 * Constant for the "New Tai Lue" Unicode character block. 1967 * @since 1.7 1968 */ 1969 public static final UnicodeBlock NEW_TAI_LUE = 1970 new UnicodeBlock("NEW_TAI_LUE", 1971 "NEW TAI LUE", 1972 "NEWTAILUE"); 1973 1974 /** 1975 * Constant for the "Buginese" Unicode character block. 1976 * @since 1.7 1977 */ 1978 public static final UnicodeBlock BUGINESE = 1979 new UnicodeBlock("BUGINESE"); 1980 1981 /** 1982 * Constant for the "Tai Tham" Unicode character block. 1983 * @since 1.7 1984 */ 1985 public static final UnicodeBlock TAI_THAM = 1986 new UnicodeBlock("TAI_THAM", 1987 "TAI THAM", 1988 "TAITHAM"); 1989 1990 /** 1991 * Constant for the "Balinese" Unicode character block. 1992 * @since 1.7 1993 */ 1994 public static final UnicodeBlock BALINESE = 1995 new UnicodeBlock("BALINESE"); 1996 1997 /** 1998 * Constant for the "Sundanese" Unicode character block. 1999 * @since 1.7 2000 */ 2001 public static final UnicodeBlock SUNDANESE = 2002 new UnicodeBlock("SUNDANESE"); 2003 2004 /** 2005 * Constant for the "Batak" Unicode character block. 2006 * @since 1.7 2007 */ 2008 public static final UnicodeBlock BATAK = 2009 new UnicodeBlock("BATAK"); 2010 2011 /** 2012 * Constant for the "Lepcha" Unicode character block. 2013 * @since 1.7 2014 */ 2015 public static final UnicodeBlock LEPCHA = 2016 new UnicodeBlock("LEPCHA"); 2017 2018 /** 2019 * Constant for the "Ol Chiki" Unicode character block. 2020 * @since 1.7 2021 */ 2022 public static final UnicodeBlock OL_CHIKI = 2023 new UnicodeBlock("OL_CHIKI", 2024 "OL CHIKI", 2025 "OLCHIKI"); 2026 2027 /** 2028 * Constant for the "Vedic Extensions" Unicode character block. 2029 * @since 1.7 2030 */ 2031 public static final UnicodeBlock VEDIC_EXTENSIONS = 2032 new UnicodeBlock("VEDIC_EXTENSIONS", 2033 "VEDIC EXTENSIONS", 2034 "VEDICEXTENSIONS"); 2035 2036 /** 2037 * Constant for the "Phonetic Extensions Supplement" Unicode character 2038 * block. 2039 * @since 1.7 2040 */ 2041 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 2042 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 2043 "PHONETIC EXTENSIONS SUPPLEMENT", 2044 "PHONETICEXTENSIONSSUPPLEMENT"); 2045 2046 /** 2047 * Constant for the "Combining Diacritical Marks Supplement" Unicode 2048 * character block. 2049 * @since 1.7 2050 */ 2051 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 2052 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 2053 "COMBINING DIACRITICAL MARKS SUPPLEMENT", 2054 "COMBININGDIACRITICALMARKSSUPPLEMENT"); 2055 2056 /** 2057 * Constant for the "Glagolitic" Unicode character block. 2058 * @since 1.7 2059 */ 2060 public static final UnicodeBlock GLAGOLITIC = 2061 new UnicodeBlock("GLAGOLITIC"); 2062 2063 /** 2064 * Constant for the "Latin Extended-C" Unicode character block. 2065 * @since 1.7 2066 */ 2067 public static final UnicodeBlock LATIN_EXTENDED_C = 2068 new UnicodeBlock("LATIN_EXTENDED_C", 2069 "LATIN EXTENDED-C", 2070 "LATINEXTENDED-C"); 2071 2072 /** 2073 * Constant for the "Coptic" Unicode character block. 2074 * @since 1.7 2075 */ 2076 public static final UnicodeBlock COPTIC = 2077 new UnicodeBlock("COPTIC"); 2078 2079 /** 2080 * Constant for the "Georgian Supplement" Unicode character block. 2081 * @since 1.7 2082 */ 2083 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 2084 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 2085 "GEORGIAN SUPPLEMENT", 2086 "GEORGIANSUPPLEMENT"); 2087 2088 /** 2089 * Constant for the "Tifinagh" Unicode character block. 2090 * @since 1.7 2091 */ 2092 public static final UnicodeBlock TIFINAGH = 2093 new UnicodeBlock("TIFINAGH"); 2094 2095 /** 2096 * Constant for the "Ethiopic Extended" Unicode character block. 2097 * @since 1.7 2098 */ 2099 public static final UnicodeBlock ETHIOPIC_EXTENDED = 2100 new UnicodeBlock("ETHIOPIC_EXTENDED", 2101 "ETHIOPIC EXTENDED", 2102 "ETHIOPICEXTENDED"); 2103 2104 /** 2105 * Constant for the "Cyrillic Extended-A" Unicode character block. 2106 * @since 1.7 2107 */ 2108 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 2109 new UnicodeBlock("CYRILLIC_EXTENDED_A", 2110 "CYRILLIC EXTENDED-A", 2111 "CYRILLICEXTENDED-A"); 2112 2113 /** 2114 * Constant for the "Supplemental Punctuation" Unicode character block. 2115 * @since 1.7 2116 */ 2117 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 2118 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 2119 "SUPPLEMENTAL PUNCTUATION", 2120 "SUPPLEMENTALPUNCTUATION"); 2121 2122 /** 2123 * Constant for the "CJK Strokes" Unicode character block. 2124 * @since 1.7 2125 */ 2126 public static final UnicodeBlock CJK_STROKES = 2127 new UnicodeBlock("CJK_STROKES", 2128 "CJK STROKES", 2129 "CJKSTROKES"); 2130 2131 /** 2132 * Constant for the "Lisu" Unicode character block. 2133 * @since 1.7 2134 */ 2135 public static final UnicodeBlock LISU = 2136 new UnicodeBlock("LISU"); 2137 2138 /** 2139 * Constant for the "Vai" Unicode character block. 2140 * @since 1.7 2141 */ 2142 public static final UnicodeBlock VAI = 2143 new UnicodeBlock("VAI"); 2144 2145 /** 2146 * Constant for the "Cyrillic Extended-B" Unicode character block. 2147 * @since 1.7 2148 */ 2149 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2150 new UnicodeBlock("CYRILLIC_EXTENDED_B", 2151 "CYRILLIC EXTENDED-B", 2152 "CYRILLICEXTENDED-B"); 2153 2154 /** 2155 * Constant for the "Bamum" Unicode character block. 2156 * @since 1.7 2157 */ 2158 public static final UnicodeBlock BAMUM = 2159 new UnicodeBlock("BAMUM"); 2160 2161 /** 2162 * Constant for the "Modifier Tone Letters" Unicode character block. 2163 * @since 1.7 2164 */ 2165 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2166 new UnicodeBlock("MODIFIER_TONE_LETTERS", 2167 "MODIFIER TONE LETTERS", 2168 "MODIFIERTONELETTERS"); 2169 2170 /** 2171 * Constant for the "Latin Extended-D" Unicode character block. 2172 * @since 1.7 2173 */ 2174 public static final UnicodeBlock LATIN_EXTENDED_D = 2175 new UnicodeBlock("LATIN_EXTENDED_D", 2176 "LATIN EXTENDED-D", 2177 "LATINEXTENDED-D"); 2178 2179 /** 2180 * Constant for the "Syloti Nagri" Unicode character block. 2181 * @since 1.7 2182 */ 2183 public static final UnicodeBlock SYLOTI_NAGRI = 2184 new UnicodeBlock("SYLOTI_NAGRI", 2185 "SYLOTI NAGRI", 2186 "SYLOTINAGRI"); 2187 2188 /** 2189 * Constant for the "Common Indic Number Forms" Unicode character block. 2190 * @since 1.7 2191 */ 2192 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2193 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", 2194 "COMMON INDIC NUMBER FORMS", 2195 "COMMONINDICNUMBERFORMS"); 2196 2197 /** 2198 * Constant for the "Phags-pa" Unicode character block. 2199 * @since 1.7 2200 */ 2201 public static final UnicodeBlock PHAGS_PA = 2202 new UnicodeBlock("PHAGS_PA", 2203 "PHAGS-PA"); 2204 2205 /** 2206 * Constant for the "Saurashtra" Unicode character block. 2207 * @since 1.7 2208 */ 2209 public static final UnicodeBlock SAURASHTRA = 2210 new UnicodeBlock("SAURASHTRA"); 2211 2212 /** 2213 * Constant for the "Devanagari Extended" Unicode character block. 2214 * @since 1.7 2215 */ 2216 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2217 new UnicodeBlock("DEVANAGARI_EXTENDED", 2218 "DEVANAGARI EXTENDED", 2219 "DEVANAGARIEXTENDED"); 2220 2221 /** 2222 * Constant for the "Kayah Li" Unicode character block. 2223 * @since 1.7 2224 */ 2225 public static final UnicodeBlock KAYAH_LI = 2226 new UnicodeBlock("KAYAH_LI", 2227 "KAYAH LI", 2228 "KAYAHLI"); 2229 2230 /** 2231 * Constant for the "Rejang" Unicode character block. 2232 * @since 1.7 2233 */ 2234 public static final UnicodeBlock REJANG = 2235 new UnicodeBlock("REJANG"); 2236 2237 /** 2238 * Constant for the "Hangul Jamo Extended-A" Unicode character block. 2239 * @since 1.7 2240 */ 2241 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2242 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", 2243 "HANGUL JAMO EXTENDED-A", 2244 "HANGULJAMOEXTENDED-A"); 2245 2246 /** 2247 * Constant for the "Javanese" Unicode character block. 2248 * @since 1.7 2249 */ 2250 public static final UnicodeBlock JAVANESE = 2251 new UnicodeBlock("JAVANESE"); 2252 2253 /** 2254 * Constant for the "Cham" Unicode character block. 2255 * @since 1.7 2256 */ 2257 public static final UnicodeBlock CHAM = 2258 new UnicodeBlock("CHAM"); 2259 2260 /** 2261 * Constant for the "Myanmar Extended-A" Unicode character block. 2262 * @since 1.7 2263 */ 2264 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2265 new UnicodeBlock("MYANMAR_EXTENDED_A", 2266 "MYANMAR EXTENDED-A", 2267 "MYANMAREXTENDED-A"); 2268 2269 /** 2270 * Constant for the "Tai Viet" Unicode character block. 2271 * @since 1.7 2272 */ 2273 public static final UnicodeBlock TAI_VIET = 2274 new UnicodeBlock("TAI_VIET", 2275 "TAI VIET", 2276 "TAIVIET"); 2277 2278 /** 2279 * Constant for the "Ethiopic Extended-A" Unicode character block. 2280 * @since 1.7 2281 */ 2282 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2283 new UnicodeBlock("ETHIOPIC_EXTENDED_A", 2284 "ETHIOPIC EXTENDED-A", 2285 "ETHIOPICEXTENDED-A"); 2286 2287 /** 2288 * Constant for the "Meetei Mayek" Unicode character block. 2289 * @since 1.7 2290 */ 2291 public static final UnicodeBlock MEETEI_MAYEK = 2292 new UnicodeBlock("MEETEI_MAYEK", 2293 "MEETEI MAYEK", 2294 "MEETEIMAYEK"); 2295 2296 /** 2297 * Constant for the "Hangul Jamo Extended-B" Unicode character block. 2298 * @since 1.7 2299 */ 2300 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2301 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", 2302 "HANGUL JAMO EXTENDED-B", 2303 "HANGULJAMOEXTENDED-B"); 2304 2305 /** 2306 * Constant for the "Vertical Forms" Unicode character block. 2307 * @since 1.7 2308 */ 2309 public static final UnicodeBlock VERTICAL_FORMS = 2310 new UnicodeBlock("VERTICAL_FORMS", 2311 "VERTICAL FORMS", 2312 "VERTICALFORMS"); 2313 2314 /** 2315 * Constant for the "Ancient Greek Numbers" Unicode character block. 2316 * @since 1.7 2317 */ 2318 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2319 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 2320 "ANCIENT GREEK NUMBERS", 2321 "ANCIENTGREEKNUMBERS"); 2322 2323 /** 2324 * Constant for the "Ancient Symbols" Unicode character block. 2325 * @since 1.7 2326 */ 2327 public static final UnicodeBlock ANCIENT_SYMBOLS = 2328 new UnicodeBlock("ANCIENT_SYMBOLS", 2329 "ANCIENT SYMBOLS", 2330 "ANCIENTSYMBOLS"); 2331 2332 /** 2333 * Constant for the "Phaistos Disc" Unicode character block. 2334 * @since 1.7 2335 */ 2336 public static final UnicodeBlock PHAISTOS_DISC = 2337 new UnicodeBlock("PHAISTOS_DISC", 2338 "PHAISTOS DISC", 2339 "PHAISTOSDISC"); 2340 2341 /** 2342 * Constant for the "Lycian" Unicode character block. 2343 * @since 1.7 2344 */ 2345 public static final UnicodeBlock LYCIAN = 2346 new UnicodeBlock("LYCIAN"); 2347 2348 /** 2349 * Constant for the "Carian" Unicode character block. 2350 * @since 1.7 2351 */ 2352 public static final UnicodeBlock CARIAN = 2353 new UnicodeBlock("CARIAN"); 2354 2355 /** 2356 * Constant for the "Old Persian" Unicode character block. 2357 * @since 1.7 2358 */ 2359 public static final UnicodeBlock OLD_PERSIAN = 2360 new UnicodeBlock("OLD_PERSIAN", 2361 "OLD PERSIAN", 2362 "OLDPERSIAN"); 2363 2364 /** 2365 * Constant for the "Imperial Aramaic" Unicode character block. 2366 * @since 1.7 2367 */ 2368 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2369 new UnicodeBlock("IMPERIAL_ARAMAIC", 2370 "IMPERIAL ARAMAIC", 2371 "IMPERIALARAMAIC"); 2372 2373 /** 2374 * Constant for the "Phoenician" Unicode character block. 2375 * @since 1.7 2376 */ 2377 public static final UnicodeBlock PHOENICIAN = 2378 new UnicodeBlock("PHOENICIAN"); 2379 2380 /** 2381 * Constant for the "Lydian" Unicode character block. 2382 * @since 1.7 2383 */ 2384 public static final UnicodeBlock LYDIAN = 2385 new UnicodeBlock("LYDIAN"); 2386 2387 /** 2388 * Constant for the "Kharoshthi" Unicode character block. 2389 * @since 1.7 2390 */ 2391 public static final UnicodeBlock KHAROSHTHI = 2392 new UnicodeBlock("KHAROSHTHI"); 2393 2394 /** 2395 * Constant for the "Old South Arabian" Unicode character block. 2396 * @since 1.7 2397 */ 2398 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2399 new UnicodeBlock("OLD_SOUTH_ARABIAN", 2400 "OLD SOUTH ARABIAN", 2401 "OLDSOUTHARABIAN"); 2402 2403 /** 2404 * Constant for the "Avestan" Unicode character block. 2405 * @since 1.7 2406 */ 2407 public static final UnicodeBlock AVESTAN = 2408 new UnicodeBlock("AVESTAN"); 2409 2410 /** 2411 * Constant for the "Inscriptional Parthian" Unicode character block. 2412 * @since 1.7 2413 */ 2414 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2415 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", 2416 "INSCRIPTIONAL PARTHIAN", 2417 "INSCRIPTIONALPARTHIAN"); 2418 2419 /** 2420 * Constant for the "Inscriptional Pahlavi" Unicode character block. 2421 * @since 1.7 2422 */ 2423 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2424 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", 2425 "INSCRIPTIONAL PAHLAVI", 2426 "INSCRIPTIONALPAHLAVI"); 2427 2428 /** 2429 * Constant for the "Old Turkic" Unicode character block. 2430 * @since 1.7 2431 */ 2432 public static final UnicodeBlock OLD_TURKIC = 2433 new UnicodeBlock("OLD_TURKIC", 2434 "OLD TURKIC", 2435 "OLDTURKIC"); 2436 2437 /** 2438 * Constant for the "Rumi Numeral Symbols" Unicode character block. 2439 * @since 1.7 2440 */ 2441 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2442 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", 2443 "RUMI NUMERAL SYMBOLS", 2444 "RUMINUMERALSYMBOLS"); 2445 2446 /** 2447 * Constant for the "Brahmi" Unicode character block. 2448 * @since 1.7 2449 */ 2450 public static final UnicodeBlock BRAHMI = 2451 new UnicodeBlock("BRAHMI"); 2452 2453 /** 2454 * Constant for the "Kaithi" Unicode character block. 2455 * @since 1.7 2456 */ 2457 public static final UnicodeBlock KAITHI = 2458 new UnicodeBlock("KAITHI"); 2459 2460 /** 2461 * Constant for the "Cuneiform" Unicode character block. 2462 * @since 1.7 2463 */ 2464 public static final UnicodeBlock CUNEIFORM = 2465 new UnicodeBlock("CUNEIFORM"); 2466 2467 /** 2468 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 2469 * character block. 2470 * @since 1.7 2471 */ 2472 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2473 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2474 "CUNEIFORM NUMBERS AND PUNCTUATION", 2475 "CUNEIFORMNUMBERSANDPUNCTUATION"); 2476 2477 /** 2478 * Constant for the "Egyptian Hieroglyphs" Unicode character block. 2479 * @since 1.7 2480 */ 2481 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2482 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", 2483 "EGYPTIAN HIEROGLYPHS", 2484 "EGYPTIANHIEROGLYPHS"); 2485 2486 /** 2487 * Constant for the "Bamum Supplement" Unicode character block. 2488 * @since 1.7 2489 */ 2490 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2491 new UnicodeBlock("BAMUM_SUPPLEMENT", 2492 "BAMUM SUPPLEMENT", 2493 "BAMUMSUPPLEMENT"); 2494 2495 /** 2496 * Constant for the "Kana Supplement" Unicode character block. 2497 * @since 1.7 2498 */ 2499 public static final UnicodeBlock KANA_SUPPLEMENT = 2500 new UnicodeBlock("KANA_SUPPLEMENT", 2501 "KANA SUPPLEMENT", 2502 "KANASUPPLEMENT"); 2503 2504 /** 2505 * Constant for the "Ancient Greek Musical Notation" Unicode character 2506 * block. 2507 * @since 1.7 2508 */ 2509 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2510 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2511 "ANCIENT GREEK MUSICAL NOTATION", 2512 "ANCIENTGREEKMUSICALNOTATION"); 2513 2514 /** 2515 * Constant for the "Counting Rod Numerals" Unicode character block. 2516 * @since 1.7 2517 */ 2518 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2519 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2520 "COUNTING ROD NUMERALS", 2521 "COUNTINGRODNUMERALS"); 2522 2523 /** 2524 * Constant for the "Mahjong Tiles" Unicode character block. 2525 * @since 1.7 2526 */ 2527 public static final UnicodeBlock MAHJONG_TILES = 2528 new UnicodeBlock("MAHJONG_TILES", 2529 "MAHJONG TILES", 2530 "MAHJONGTILES"); 2531 2532 /** 2533 * Constant for the "Domino Tiles" Unicode character block. 2534 * @since 1.7 2535 */ 2536 public static final UnicodeBlock DOMINO_TILES = 2537 new UnicodeBlock("DOMINO_TILES", 2538 "DOMINO TILES", 2539 "DOMINOTILES"); 2540 2541 /** 2542 * Constant for the "Playing Cards" Unicode character block. 2543 * @since 1.7 2544 */ 2545 public static final UnicodeBlock PLAYING_CARDS = 2546 new UnicodeBlock("PLAYING_CARDS", 2547 "PLAYING CARDS", 2548 "PLAYINGCARDS"); 2549 2550 /** 2551 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character 2552 * block. 2553 * @since 1.7 2554 */ 2555 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2556 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2557 "ENCLOSED ALPHANUMERIC SUPPLEMENT", 2558 "ENCLOSEDALPHANUMERICSUPPLEMENT"); 2559 2560 /** 2561 * Constant for the "Enclosed Ideographic Supplement" Unicode character 2562 * block. 2563 * @since 1.7 2564 */ 2565 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2566 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2567 "ENCLOSED IDEOGRAPHIC SUPPLEMENT", 2568 "ENCLOSEDIDEOGRAPHICSUPPLEMENT"); 2569 2570 /** 2571 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode 2572 * character block. 2573 * @since 1.7 2574 */ 2575 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2576 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2577 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS", 2578 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS"); 2579 2580 /** 2581 * Constant for the "Emoticons" Unicode character block. 2582 * @since 1.7 2583 */ 2584 public static final UnicodeBlock EMOTICONS = 2585 new UnicodeBlock("EMOTICONS"); 2586 2587 /** 2588 * Constant for the "Transport And Map Symbols" Unicode character block. 2589 * @since 1.7 2590 */ 2591 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2592 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", 2593 "TRANSPORT AND MAP SYMBOLS", 2594 "TRANSPORTANDMAPSYMBOLS"); 2595 2596 /** 2597 * Constant for the "Alchemical Symbols" Unicode character block. 2598 * @since 1.7 2599 */ 2600 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2601 new UnicodeBlock("ALCHEMICAL_SYMBOLS", 2602 "ALCHEMICAL SYMBOLS", 2603 "ALCHEMICALSYMBOLS"); 2604 2605 /** 2606 * Constant for the "CJK Unified Ideographs Extension C" Unicode 2607 * character block. 2608 * @since 1.7 2609 */ 2610 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2611 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2612 "CJK UNIFIED IDEOGRAPHS EXTENSION C", 2613 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC"); 2614 2615 /** 2616 * Constant for the "CJK Unified Ideographs Extension D" Unicode 2617 * character block. 2618 * @since 1.7 2619 */ 2620 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2621 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2622 "CJK UNIFIED IDEOGRAPHS EXTENSION D", 2623 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND"); 2624 2625 /** 2626 * Constant for the "Arabic Extended-A" Unicode character block. 2627 * @since 1.8 2628 */ 2629 public static final UnicodeBlock ARABIC_EXTENDED_A = 2630 new UnicodeBlock("ARABIC_EXTENDED_A", 2631 "ARABIC EXTENDED-A", 2632 "ARABICEXTENDED-A"); 2633 2634 /** 2635 * Constant for the "Sundanese Supplement" Unicode character block. 2636 * @since 1.8 2637 */ 2638 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2639 new UnicodeBlock("SUNDANESE_SUPPLEMENT", 2640 "SUNDANESE SUPPLEMENT", 2641 "SUNDANESESUPPLEMENT"); 2642 2643 /** 2644 * Constant for the "Meetei Mayek Extensions" Unicode character block. 2645 * @since 1.8 2646 */ 2647 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2648 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", 2649 "MEETEI MAYEK EXTENSIONS", 2650 "MEETEIMAYEKEXTENSIONS"); 2651 2652 /** 2653 * Constant for the "Meroitic Hieroglyphs" Unicode character block. 2654 * @since 1.8 2655 */ 2656 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2657 new UnicodeBlock("MEROITIC_HIEROGLYPHS", 2658 "MEROITIC HIEROGLYPHS", 2659 "MEROITICHIEROGLYPHS"); 2660 2661 /** 2662 * Constant for the "Meroitic Cursive" Unicode character block. 2663 * @since 1.8 2664 */ 2665 public static final UnicodeBlock MEROITIC_CURSIVE = 2666 new UnicodeBlock("MEROITIC_CURSIVE", 2667 "MEROITIC CURSIVE", 2668 "MEROITICCURSIVE"); 2669 2670 /** 2671 * Constant for the "Sora Sompeng" Unicode character block. 2672 * @since 1.8 2673 */ 2674 public static final UnicodeBlock SORA_SOMPENG = 2675 new UnicodeBlock("SORA_SOMPENG", 2676 "SORA SOMPENG", 2677 "SORASOMPENG"); 2678 2679 /** 2680 * Constant for the "Chakma" Unicode character block. 2681 * @since 1.8 2682 */ 2683 public static final UnicodeBlock CHAKMA = 2684 new UnicodeBlock("CHAKMA"); 2685 2686 /** 2687 * Constant for the "Sharada" Unicode character block. 2688 * @since 1.8 2689 */ 2690 public static final UnicodeBlock SHARADA = 2691 new UnicodeBlock("SHARADA"); 2692 2693 /** 2694 * Constant for the "Takri" Unicode character block. 2695 * @since 1.8 2696 */ 2697 public static final UnicodeBlock TAKRI = 2698 new UnicodeBlock("TAKRI"); 2699 2700 /** 2701 * Constant for the "Miao" Unicode character block. 2702 * @since 1.8 2703 */ 2704 public static final UnicodeBlock MIAO = 2705 new UnicodeBlock("MIAO"); 2706 2707 /** 2708 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode 2709 * character block. 2710 * @since 1.8 2711 */ 2712 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2713 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", 2714 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS", 2715 "ARABICMATHEMATICALALPHABETICSYMBOLS"); 2716 2717 /** 2718 * Constant for the "Combining Diacritical Marks Extended" Unicode 2719 * character block. 2720 * @since 9 2721 */ 2722 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2723 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", 2724 "COMBINING DIACRITICAL MARKS EXTENDED", 2725 "COMBININGDIACRITICALMARKSEXTENDED"); 2726 2727 /** 2728 * Constant for the "Myanmar Extended-B" Unicode character block. 2729 * @since 9 2730 */ 2731 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2732 new UnicodeBlock("MYANMAR_EXTENDED_B", 2733 "MYANMAR EXTENDED-B", 2734 "MYANMAREXTENDED-B"); 2735 2736 /** 2737 * Constant for the "Latin Extended-E" Unicode character block. 2738 * @since 9 2739 */ 2740 public static final UnicodeBlock LATIN_EXTENDED_E = 2741 new UnicodeBlock("LATIN_EXTENDED_E", 2742 "LATIN EXTENDED-E", 2743 "LATINEXTENDED-E"); 2744 2745 /** 2746 * Constant for the "Coptic Epact Numbers" Unicode character block. 2747 * @since 9 2748 */ 2749 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2750 new UnicodeBlock("COPTIC_EPACT_NUMBERS", 2751 "COPTIC EPACT NUMBERS", 2752 "COPTICEPACTNUMBERS"); 2753 2754 /** 2755 * Constant for the "Old Permic" Unicode character block. 2756 * @since 9 2757 */ 2758 public static final UnicodeBlock OLD_PERMIC = 2759 new UnicodeBlock("OLD_PERMIC", 2760 "OLD PERMIC", 2761 "OLDPERMIC"); 2762 2763 /** 2764 * Constant for the "Elbasan" Unicode character block. 2765 * @since 9 2766 */ 2767 public static final UnicodeBlock ELBASAN = 2768 new UnicodeBlock("ELBASAN"); 2769 2770 /** 2771 * Constant for the "Caucasian Albanian" Unicode character block. 2772 * @since 9 2773 */ 2774 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2775 new UnicodeBlock("CAUCASIAN_ALBANIAN", 2776 "CAUCASIAN ALBANIAN", 2777 "CAUCASIANALBANIAN"); 2778 2779 /** 2780 * Constant for the "Linear A" Unicode character block. 2781 * @since 9 2782 */ 2783 public static final UnicodeBlock LINEAR_A = 2784 new UnicodeBlock("LINEAR_A", 2785 "LINEAR A", 2786 "LINEARA"); 2787 2788 /** 2789 * Constant for the "Palmyrene" Unicode character block. 2790 * @since 9 2791 */ 2792 public static final UnicodeBlock PALMYRENE = 2793 new UnicodeBlock("PALMYRENE"); 2794 2795 /** 2796 * Constant for the "Nabataean" Unicode character block. 2797 * @since 9 2798 */ 2799 public static final UnicodeBlock NABATAEAN = 2800 new UnicodeBlock("NABATAEAN"); 2801 2802 /** 2803 * Constant for the "Old North Arabian" Unicode character block. 2804 * @since 9 2805 */ 2806 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2807 new UnicodeBlock("OLD_NORTH_ARABIAN", 2808 "OLD NORTH ARABIAN", 2809 "OLDNORTHARABIAN"); 2810 2811 /** 2812 * Constant for the "Manichaean" Unicode character block. 2813 * @since 9 2814 */ 2815 public static final UnicodeBlock MANICHAEAN = 2816 new UnicodeBlock("MANICHAEAN"); 2817 2818 /** 2819 * Constant for the "Psalter Pahlavi" Unicode character block. 2820 * @since 9 2821 */ 2822 public static final UnicodeBlock PSALTER_PAHLAVI = 2823 new UnicodeBlock("PSALTER_PAHLAVI", 2824 "PSALTER PAHLAVI", 2825 "PSALTERPAHLAVI"); 2826 2827 /** 2828 * Constant for the "Mahajani" Unicode character block. 2829 * @since 9 2830 */ 2831 public static final UnicodeBlock MAHAJANI = 2832 new UnicodeBlock("MAHAJANI"); 2833 2834 /** 2835 * Constant for the "Sinhala Archaic Numbers" Unicode character block. 2836 * @since 9 2837 */ 2838 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2839 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", 2840 "SINHALA ARCHAIC NUMBERS", 2841 "SINHALAARCHAICNUMBERS"); 2842 2843 /** 2844 * Constant for the "Khojki" Unicode character block. 2845 * @since 9 2846 */ 2847 public static final UnicodeBlock KHOJKI = 2848 new UnicodeBlock("KHOJKI"); 2849 2850 /** 2851 * Constant for the "Khudawadi" Unicode character block. 2852 * @since 9 2853 */ 2854 public static final UnicodeBlock KHUDAWADI = 2855 new UnicodeBlock("KHUDAWADI"); 2856 2857 /** 2858 * Constant for the "Grantha" Unicode character block. 2859 * @since 9 2860 */ 2861 public static final UnicodeBlock GRANTHA = 2862 new UnicodeBlock("GRANTHA"); 2863 2864 /** 2865 * Constant for the "Tirhuta" Unicode character block. 2866 * @since 9 2867 */ 2868 public static final UnicodeBlock TIRHUTA = 2869 new UnicodeBlock("TIRHUTA"); 2870 2871 /** 2872 * Constant for the "Siddham" Unicode character block. 2873 * @since 9 2874 */ 2875 public static final UnicodeBlock SIDDHAM = 2876 new UnicodeBlock("SIDDHAM"); 2877 2878 /** 2879 * Constant for the "Modi" Unicode character block. 2880 * @since 9 2881 */ 2882 public static final UnicodeBlock MODI = 2883 new UnicodeBlock("MODI"); 2884 2885 /** 2886 * Constant for the "Warang Citi" Unicode character block. 2887 * @since 9 2888 */ 2889 public static final UnicodeBlock WARANG_CITI = 2890 new UnicodeBlock("WARANG_CITI", 2891 "WARANG CITI", 2892 "WARANGCITI"); 2893 2894 /** 2895 * Constant for the "Pau Cin Hau" Unicode character block. 2896 * @since 9 2897 */ 2898 public static final UnicodeBlock PAU_CIN_HAU = 2899 new UnicodeBlock("PAU_CIN_HAU", 2900 "PAU CIN HAU", 2901 "PAUCINHAU"); 2902 2903 /** 2904 * Constant for the "Mro" Unicode character block. 2905 * @since 9 2906 */ 2907 public static final UnicodeBlock MRO = 2908 new UnicodeBlock("MRO"); 2909 2910 /** 2911 * Constant for the "Bassa Vah" Unicode character block. 2912 * @since 9 2913 */ 2914 public static final UnicodeBlock BASSA_VAH = 2915 new UnicodeBlock("BASSA_VAH", 2916 "BASSA VAH", 2917 "BASSAVAH"); 2918 2919 /** 2920 * Constant for the "Pahawh Hmong" Unicode character block. 2921 * @since 9 2922 */ 2923 public static final UnicodeBlock PAHAWH_HMONG = 2924 new UnicodeBlock("PAHAWH_HMONG", 2925 "PAHAWH HMONG", 2926 "PAHAWHHMONG"); 2927 2928 /** 2929 * Constant for the "Duployan" Unicode character block. 2930 * @since 9 2931 */ 2932 public static final UnicodeBlock DUPLOYAN = 2933 new UnicodeBlock("DUPLOYAN"); 2934 2935 /** 2936 * Constant for the "Shorthand Format Controls" Unicode character block. 2937 * @since 9 2938 */ 2939 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2940 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", 2941 "SHORTHAND FORMAT CONTROLS", 2942 "SHORTHANDFORMATCONTROLS"); 2943 2944 /** 2945 * Constant for the "Mende Kikakui" Unicode character block. 2946 * @since 9 2947 */ 2948 public static final UnicodeBlock MENDE_KIKAKUI = 2949 new UnicodeBlock("MENDE_KIKAKUI", 2950 "MENDE KIKAKUI", 2951 "MENDEKIKAKUI"); 2952 2953 /** 2954 * Constant for the "Ornamental Dingbats" Unicode character block. 2955 * @since 9 2956 */ 2957 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2958 new UnicodeBlock("ORNAMENTAL_DINGBATS", 2959 "ORNAMENTAL DINGBATS", 2960 "ORNAMENTALDINGBATS"); 2961 2962 /** 2963 * Constant for the "Geometric Shapes Extended" Unicode character block. 2964 * @since 9 2965 */ 2966 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2967 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", 2968 "GEOMETRIC SHAPES EXTENDED", 2969 "GEOMETRICSHAPESEXTENDED"); 2970 2971 /** 2972 * Constant for the "Supplemental Arrows-C" Unicode character block. 2973 * @since 9 2974 */ 2975 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2976 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", 2977 "SUPPLEMENTAL ARROWS-C", 2978 "SUPPLEMENTALARROWS-C"); 2979 2980 /** 2981 * Constant for the "Cherokee Supplement" Unicode character block. 2982 * @since 9 2983 */ 2984 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2985 new UnicodeBlock("CHEROKEE_SUPPLEMENT", 2986 "CHEROKEE SUPPLEMENT", 2987 "CHEROKEESUPPLEMENT"); 2988 2989 /** 2990 * Constant for the "Hatran" Unicode character block. 2991 * @since 9 2992 */ 2993 public static final UnicodeBlock HATRAN = 2994 new UnicodeBlock("HATRAN"); 2995 2996 /** 2997 * Constant for the "Old Hungarian" Unicode character block. 2998 * @since 9 2999 */ 3000 public static final UnicodeBlock OLD_HUNGARIAN = 3001 new UnicodeBlock("OLD_HUNGARIAN", 3002 "OLD HUNGARIAN", 3003 "OLDHUNGARIAN"); 3004 3005 /** 3006 * Constant for the "Multani" Unicode character block. 3007 * @since 9 3008 */ 3009 public static final UnicodeBlock MULTANI = 3010 new UnicodeBlock("MULTANI"); 3011 3012 /** 3013 * Constant for the "Ahom" Unicode character block. 3014 * @since 9 3015 */ 3016 public static final UnicodeBlock AHOM = 3017 new UnicodeBlock("AHOM"); 3018 3019 /** 3020 * Constant for the "Early Dynastic Cuneiform" Unicode character block. 3021 * @since 9 3022 */ 3023 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 3024 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", 3025 "EARLY DYNASTIC CUNEIFORM", 3026 "EARLYDYNASTICCUNEIFORM"); 3027 3028 /** 3029 * Constant for the "Anatolian Hieroglyphs" Unicode character block. 3030 * @since 9 3031 */ 3032 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 3033 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", 3034 "ANATOLIAN HIEROGLYPHS", 3035 "ANATOLIANHIEROGLYPHS"); 3036 3037 /** 3038 * Constant for the "Sutton SignWriting" Unicode character block. 3039 * @since 9 3040 */ 3041 public static final UnicodeBlock SUTTON_SIGNWRITING = 3042 new UnicodeBlock("SUTTON_SIGNWRITING", 3043 "SUTTON SIGNWRITING", 3044 "SUTTONSIGNWRITING"); 3045 3046 /** 3047 * Constant for the "Supplemental Symbols and Pictographs" Unicode 3048 * character block. 3049 * @since 9 3050 */ 3051 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 3052 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 3053 "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS", 3054 "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS"); 3055 3056 /** 3057 * Constant for the "CJK Unified Ideographs Extension E" Unicode 3058 * character block. 3059 * @since 9 3060 */ 3061 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 3062 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 3063 "CJK UNIFIED IDEOGRAPHS EXTENSION E", 3064 "CJKUNIFIEDIDEOGRAPHSEXTENSIONE"); 3065 3066 /** 3067 * Constant for the "Syriac Supplement" Unicode 3068 * character block. 3069 * @since 11 3070 */ 3071 public static final UnicodeBlock SYRIAC_SUPPLEMENT = 3072 new UnicodeBlock("SYRIAC_SUPPLEMENT", 3073 "SYRIAC SUPPLEMENT", 3074 "SYRIACSUPPLEMENT"); 3075 3076 /** 3077 * Constant for the "Cyrillic Extended-C" Unicode 3078 * character block. 3079 * @since 11 3080 */ 3081 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 3082 new UnicodeBlock("CYRILLIC_EXTENDED_C", 3083 "CYRILLIC EXTENDED-C", 3084 "CYRILLICEXTENDED-C"); 3085 3086 /** 3087 * Constant for the "Osage" Unicode 3088 * character block. 3089 * @since 11 3090 */ 3091 public static final UnicodeBlock OSAGE = 3092 new UnicodeBlock("OSAGE"); 3093 3094 /** 3095 * Constant for the "Newa" Unicode 3096 * character block. 3097 * @since 11 3098 */ 3099 public static final UnicodeBlock NEWA = 3100 new UnicodeBlock("NEWA"); 3101 3102 /** 3103 * Constant for the "Mongolian Supplement" Unicode 3104 * character block. 3105 * @since 11 3106 */ 3107 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 3108 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", 3109 "MONGOLIAN SUPPLEMENT", 3110 "MONGOLIANSUPPLEMENT"); 3111 3112 /** 3113 * Constant for the "Marchen" Unicode 3114 * character block. 3115 * @since 11 3116 */ 3117 public static final UnicodeBlock MARCHEN = 3118 new UnicodeBlock("MARCHEN"); 3119 3120 /** 3121 * Constant for the "Ideographic Symbols and Punctuation" Unicode 3122 * character block. 3123 * @since 11 3124 */ 3125 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 3126 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", 3127 "IDEOGRAPHIC SYMBOLS AND PUNCTUATION", 3128 "IDEOGRAPHICSYMBOLSANDPUNCTUATION"); 3129 3130 /** 3131 * Constant for the "Tangut" Unicode 3132 * character block. 3133 * @since 11 3134 */ 3135 public static final UnicodeBlock TANGUT = 3136 new UnicodeBlock("TANGUT"); 3137 3138 /** 3139 * Constant for the "Tangut Components" Unicode 3140 * character block. 3141 * @since 11 3142 */ 3143 public static final UnicodeBlock TANGUT_COMPONENTS = 3144 new UnicodeBlock("TANGUT_COMPONENTS", 3145 "TANGUT COMPONENTS", 3146 "TANGUTCOMPONENTS"); 3147 3148 /** 3149 * Constant for the "Kana Extended-A" Unicode 3150 * character block. 3151 * @since 11 3152 */ 3153 public static final UnicodeBlock KANA_EXTENDED_A = 3154 new UnicodeBlock("KANA_EXTENDED_A", 3155 "KANA EXTENDED-A", 3156 "KANAEXTENDED-A"); 3157 /** 3158 * Constant for the "Glagolitic Supplement" Unicode 3159 * character block. 3160 * @since 11 3161 */ 3162 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 3163 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", 3164 "GLAGOLITIC SUPPLEMENT", 3165 "GLAGOLITICSUPPLEMENT"); 3166 /** 3167 * Constant for the "Adlam" Unicode 3168 * character block. 3169 * @since 11 3170 */ 3171 public static final UnicodeBlock ADLAM = 3172 new UnicodeBlock("ADLAM"); 3173 3174 /** 3175 * Constant for the "Masaram Gondi" Unicode 3176 * character block. 3177 * @since 11 3178 */ 3179 public static final UnicodeBlock MASARAM_GONDI = 3180 new UnicodeBlock("MASARAM_GONDI", 3181 "MASARAM GONDI", 3182 "MASARAMGONDI"); 3183 3184 /** 3185 * Constant for the "Zanabazar Square" Unicode 3186 * character block. 3187 * @since 11 3188 */ 3189 public static final UnicodeBlock ZANABAZAR_SQUARE = 3190 new UnicodeBlock("ZANABAZAR_SQUARE", 3191 "ZANABAZAR SQUARE", 3192 "ZANABAZARSQUARE"); 3193 3194 /** 3195 * Constant for the "Nushu" Unicode 3196 * character block. 3197 * @since 11 3198 */ 3199 public static final UnicodeBlock NUSHU = 3200 new UnicodeBlock("NUSHU"); 3201 3202 /** 3203 * Constant for the "Soyombo" Unicode 3204 * character block. 3205 * @since 11 3206 */ 3207 public static final UnicodeBlock SOYOMBO = 3208 new UnicodeBlock("SOYOMBO"); 3209 3210 /** 3211 * Constant for the "Bhaiksuki" Unicode 3212 * character block. 3213 * @since 11 3214 */ 3215 public static final UnicodeBlock BHAIKSUKI = 3216 new UnicodeBlock("BHAIKSUKI"); 3217 3218 /** 3219 * Constant for the "CJK Unified Ideographs Extension F" Unicode 3220 * character block. 3221 * @since 11 3222 */ 3223 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 3224 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", 3225 "CJK UNIFIED IDEOGRAPHS EXTENSION F", 3226 "CJKUNIFIEDIDEOGRAPHSEXTENSIONF"); 3227 /** 3228 * Constant for the "Georgian Extended" Unicode 3229 * character block. 3230 * @since 12 3231 */ 3232 public static final UnicodeBlock GEORGIAN_EXTENDED = 3233 new UnicodeBlock("GEORGIAN_EXTENDED", 3234 "GEORGIAN EXTENDED", 3235 "GEORGIANEXTENDED"); 3236 3237 /** 3238 * Constant for the "Hanifi Rohingya" Unicode 3239 * character block. 3240 * @since 12 3241 */ 3242 public static final UnicodeBlock HANIFI_ROHINGYA = 3243 new UnicodeBlock("HANIFI_ROHINGYA", 3244 "HANIFI ROHINGYA", 3245 "HANIFIROHINGYA"); 3246 3247 /** 3248 * Constant for the "Old Sogdian" Unicode 3249 * character block. 3250 * @since 12 3251 */ 3252 public static final UnicodeBlock OLD_SOGDIAN = 3253 new UnicodeBlock("OLD_SOGDIAN", 3254 "OLD SOGDIAN", 3255 "OLDSOGDIAN"); 3256 3257 /** 3258 * Constant for the "Sogdian" Unicode 3259 * character block. 3260 * @since 12 3261 */ 3262 public static final UnicodeBlock SOGDIAN = 3263 new UnicodeBlock("SOGDIAN"); 3264 3265 /** 3266 * Constant for the "Dogra" Unicode 3267 * character block. 3268 * @since 12 3269 */ 3270 public static final UnicodeBlock DOGRA = 3271 new UnicodeBlock("DOGRA"); 3272 3273 /** 3274 * Constant for the "Gunjala Gondi" Unicode 3275 * character block. 3276 * @since 12 3277 */ 3278 public static final UnicodeBlock GUNJALA_GONDI = 3279 new UnicodeBlock("GUNJALA_GONDI", 3280 "GUNJALA GONDI", 3281 "GUNJALAGONDI"); 3282 3283 /** 3284 * Constant for the "Makasar" Unicode 3285 * character block. 3286 * @since 12 3287 */ 3288 public static final UnicodeBlock MAKASAR = 3289 new UnicodeBlock("MAKASAR"); 3290 3291 /** 3292 * Constant for the "Medefaidrin" Unicode 3293 * character block. 3294 * @since 12 3295 */ 3296 public static final UnicodeBlock MEDEFAIDRIN = 3297 new UnicodeBlock("MEDEFAIDRIN"); 3298 3299 /** 3300 * Constant for the "Mayan Numerals" Unicode 3301 * character block. 3302 * @since 12 3303 */ 3304 public static final UnicodeBlock MAYAN_NUMERALS = 3305 new UnicodeBlock("MAYAN_NUMERALS", 3306 "MAYAN NUMERALS", 3307 "MAYANNUMERALS"); 3308 3309 /** 3310 * Constant for the "Indic Siyaq Numbers" Unicode 3311 * character block. 3312 * @since 12 3313 */ 3314 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS = 3315 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", 3316 "INDIC SIYAQ NUMBERS", 3317 "INDICSIYAQNUMBERS"); 3318 3319 /** 3320 * Constant for the "Chess Symbols" Unicode 3321 * character block. 3322 * @since 12 3323 */ 3324 public static final UnicodeBlock CHESS_SYMBOLS = 3325 new UnicodeBlock("CHESS_SYMBOLS", 3326 "CHESS SYMBOLS", 3327 "CHESSSYMBOLS"); 3328 3329 /** 3330 * Constant for the "Elymaic" Unicode 3331 * character block. 3332 * @since 13 3333 */ 3334 public static final UnicodeBlock ELYMAIC = 3335 new UnicodeBlock("ELYMAIC"); 3336 3337 /** 3338 * Constant for the "Nandinagari" Unicode 3339 * character block. 3340 * @since 13 3341 */ 3342 public static final UnicodeBlock NANDINAGARI = 3343 new UnicodeBlock("NANDINAGARI"); 3344 3345 /** 3346 * Constant for the "Tamil Supplement" Unicode 3347 * character block. 3348 * @since 13 3349 */ 3350 public static final UnicodeBlock TAMIL_SUPPLEMENT = 3351 new UnicodeBlock("TAMIL_SUPPLEMENT", 3352 "TAMIL SUPPLEMENT", 3353 "TAMILSUPPLEMENT"); 3354 3355 /** 3356 * Constant for the "Egyptian Hieroglyph Format Controls" Unicode 3357 * character block. 3358 * @since 13 3359 */ 3360 public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 3361 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", 3362 "EGYPTIAN HIEROGLYPH FORMAT CONTROLS", 3363 "EGYPTIANHIEROGLYPHFORMATCONTROLS"); 3364 3365 /** 3366 * Constant for the "Small Kana Extension" Unicode 3367 * character block. 3368 * @since 13 3369 */ 3370 public static final UnicodeBlock SMALL_KANA_EXTENSION = 3371 new UnicodeBlock("SMALL_KANA_EXTENSION", 3372 "SMALL KANA EXTENSION", 3373 "SMALLKANAEXTENSION"); 3374 3375 /** 3376 * Constant for the "Nyiakeng Puachue Hmong" Unicode 3377 * character block. 3378 * @since 13 3379 */ 3380 public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG = 3381 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", 3382 "NYIAKENG PUACHUE HMONG", 3383 "NYIAKENGPUACHUEHMONG"); 3384 3385 /** 3386 * Constant for the "Wancho" Unicode 3387 * character block. 3388 * @since 13 3389 */ 3390 public static final UnicodeBlock WANCHO = 3391 new UnicodeBlock("WANCHO"); 3392 3393 /** 3394 * Constant for the "Ottoman Siyaq Numbers" Unicode 3395 * character block. 3396 * @since 13 3397 */ 3398 public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS = 3399 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", 3400 "OTTOMAN SIYAQ NUMBERS", 3401 "OTTOMANSIYAQNUMBERS"); 3402 3403 /** 3404 * Constant for the "Symbols and Pictographs Extended-A" Unicode 3405 * character block. 3406 * @since 13 3407 */ 3408 public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 3409 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", 3410 "SYMBOLS AND PICTOGRAPHS EXTENDED-A", 3411 "SYMBOLSANDPICTOGRAPHSEXTENDED-A"); 3412 3413 /** 3414 * Constant for the "Yezidi" Unicode 3415 * character block. 3416 * @since 15 3417 */ 3418 public static final UnicodeBlock YEZIDI = 3419 new UnicodeBlock("YEZIDI"); 3420 3421 /** 3422 * Constant for the "Chorasmian" Unicode 3423 * character block. 3424 * @since 15 3425 */ 3426 public static final UnicodeBlock CHORASMIAN = 3427 new UnicodeBlock("CHORASMIAN"); 3428 3429 /** 3430 * Constant for the "Dives Akuru" Unicode 3431 * character block. 3432 * @since 15 3433 */ 3434 public static final UnicodeBlock DIVES_AKURU = 3435 new UnicodeBlock("DIVES_AKURU", 3436 "DIVES AKURU", 3437 "DIVESAKURU"); 3438 3439 /** 3440 * Constant for the "Lisu Supplement" Unicode 3441 * character block. 3442 * @since 15 3443 */ 3444 public static final UnicodeBlock LISU_SUPPLEMENT = 3445 new UnicodeBlock("LISU_SUPPLEMENT", 3446 "LISU SUPPLEMENT", 3447 "LISUSUPPLEMENT"); 3448 3449 /** 3450 * Constant for the "Khitan Small Script" Unicode 3451 * character block. 3452 * @since 15 3453 */ 3454 public static final UnicodeBlock KHITAN_SMALL_SCRIPT = 3455 new UnicodeBlock("KHITAN_SMALL_SCRIPT", 3456 "KHITAN SMALL SCRIPT", 3457 "KHITANSMALLSCRIPT"); 3458 3459 /** 3460 * Constant for the "Tangut Supplement" Unicode 3461 * character block. 3462 * @since 15 3463 */ 3464 public static final UnicodeBlock TANGUT_SUPPLEMENT = 3465 new UnicodeBlock("TANGUT_SUPPLEMENT", 3466 "TANGUT SUPPLEMENT", 3467 "TANGUTSUPPLEMENT"); 3468 3469 /** 3470 * Constant for the "Symbols for Legacy Computing" Unicode 3471 * character block. 3472 * @since 15 3473 */ 3474 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING = 3475 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING", 3476 "SYMBOLS FOR LEGACY COMPUTING", 3477 "SYMBOLSFORLEGACYCOMPUTING"); 3478 3479 /** 3480 * Constant for the "CJK Unified Ideographs Extension G" Unicode 3481 * character block. 3482 * @since 15 3483 */ 3484 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 3485 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G", 3486 "CJK UNIFIED IDEOGRAPHS EXTENSION G", 3487 "CJKUNIFIEDIDEOGRAPHSEXTENSIONG"); 3488 3489 /** 3490 * Constant for the "Arabic Extended-B" Unicode 3491 * character block. 3492 * @since 19 3493 */ 3494 public static final UnicodeBlock ARABIC_EXTENDED_B = 3495 new UnicodeBlock("ARABIC_EXTENDED_B", 3496 "ARABIC EXTENDED-B", 3497 "ARABICEXTENDED-B"); 3498 3499 /** 3500 * Constant for the "Vithkuqi" Unicode 3501 * character block. 3502 * @since 19 3503 */ 3504 public static final UnicodeBlock VITHKUQI = 3505 new UnicodeBlock("VITHKUQI"); 3506 3507 /** 3508 * Constant for the "Latin Extended-F" Unicode 3509 * character block. 3510 * @since 19 3511 */ 3512 public static final UnicodeBlock LATIN_EXTENDED_F = 3513 new UnicodeBlock("LATIN_EXTENDED_F", 3514 "LATIN EXTENDED-F", 3515 "LATINEXTENDED-F"); 3516 3517 /** 3518 * Constant for the "Old Uyghur" Unicode 3519 * character block. 3520 * @since 19 3521 */ 3522 public static final UnicodeBlock OLD_UYGHUR = 3523 new UnicodeBlock("OLD_UYGHUR", 3524 "OLD UYGHUR", 3525 "OLDUYGHUR"); 3526 3527 /** 3528 * Constant for the "Unified Canadian Aboriginal Syllabics Extended-A" Unicode 3529 * character block. 3530 * @since 19 3531 */ 3532 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A = 3533 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A", 3534 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED-A", 3535 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED-A"); 3536 3537 /** 3538 * Constant for the "Cypro-Minoan" Unicode 3539 * character block. 3540 * @since 19 3541 */ 3542 public static final UnicodeBlock CYPRO_MINOAN = 3543 new UnicodeBlock("CYPRO_MINOAN", 3544 "CYPRO-MINOAN", 3545 "CYPRO-MINOAN"); 3546 3547 /** 3548 * Constant for the "Tangsa" Unicode 3549 * character block. 3550 * @since 19 3551 */ 3552 public static final UnicodeBlock TANGSA = 3553 new UnicodeBlock("TANGSA"); 3554 3555 /** 3556 * Constant for the "Kana Extended-B" Unicode 3557 * character block. 3558 * @since 19 3559 */ 3560 public static final UnicodeBlock KANA_EXTENDED_B = 3561 new UnicodeBlock("KANA_EXTENDED_B", 3562 "KANA EXTENDED-B", 3563 "KANAEXTENDED-B"); 3564 3565 /** 3566 * Constant for the "Znamenny Musical Notation" Unicode 3567 * character block. 3568 * @since 19 3569 */ 3570 public static final UnicodeBlock ZNAMENNY_MUSICAL_NOTATION = 3571 new UnicodeBlock("ZNAMENNY_MUSICAL_NOTATION", 3572 "ZNAMENNY MUSICAL NOTATION", 3573 "ZNAMENNYMUSICALNOTATION"); 3574 3575 /** 3576 * Constant for the "Latin Extended-G" Unicode 3577 * character block. 3578 * @since 19 3579 */ 3580 public static final UnicodeBlock LATIN_EXTENDED_G = 3581 new UnicodeBlock("LATIN_EXTENDED_G", 3582 "LATIN EXTENDED-G", 3583 "LATINEXTENDED-G"); 3584 3585 /** 3586 * Constant for the "Toto" Unicode 3587 * character block. 3588 * @since 19 3589 */ 3590 public static final UnicodeBlock TOTO = 3591 new UnicodeBlock("TOTO"); 3592 3593 /** 3594 * Constant for the "Ethiopic Extended-B" Unicode 3595 * character block. 3596 * @since 19 3597 */ 3598 public static final UnicodeBlock ETHIOPIC_EXTENDED_B = 3599 new UnicodeBlock("ETHIOPIC_EXTENDED_B", 3600 "ETHIOPIC EXTENDED-B", 3601 "ETHIOPICEXTENDED-B"); 3602 3603 /** 3604 * Constant for the "Arabic Extended-C" Unicode 3605 * character block. 3606 * @since 20 3607 */ 3608 public static final UnicodeBlock ARABIC_EXTENDED_C = 3609 new UnicodeBlock("ARABIC_EXTENDED_C", 3610 "ARABIC EXTENDED-C", 3611 "ARABICEXTENDED-C"); 3612 3613 /** 3614 * Constant for the "Devanagari Extended-A" Unicode 3615 * character block. 3616 * @since 20 3617 */ 3618 public static final UnicodeBlock DEVANAGARI_EXTENDED_A = 3619 new UnicodeBlock("DEVANAGARI_EXTENDED_A", 3620 "DEVANAGARI EXTENDED-A", 3621 "DEVANAGARIEXTENDED-A"); 3622 3623 /** 3624 * Constant for the "Kawi" Unicode 3625 * character block. 3626 * @since 20 3627 */ 3628 public static final UnicodeBlock KAWI = 3629 new UnicodeBlock("KAWI"); 3630 3631 /** 3632 * Constant for the "Kaktovik Numerals" Unicode 3633 * character block. 3634 * @since 20 3635 */ 3636 public static final UnicodeBlock KAKTOVIK_NUMERALS = 3637 new UnicodeBlock("KAKTOVIK_NUMERALS", 3638 "KAKTOVIK NUMERALS", 3639 "KAKTOVIKNUMERALS"); 3640 3641 /** 3642 * Constant for the "Cyrillic Extended-D" Unicode 3643 * character block. 3644 * @since 20 3645 */ 3646 public static final UnicodeBlock CYRILLIC_EXTENDED_D = 3647 new UnicodeBlock("CYRILLIC_EXTENDED_D", 3648 "CYRILLIC EXTENDED-D", 3649 "CYRILLICEXTENDED-D"); 3650 3651 /** 3652 * Constant for the "Nag Mundari" Unicode 3653 * character block. 3654 * @since 20 3655 */ 3656 public static final UnicodeBlock NAG_MUNDARI = 3657 new UnicodeBlock("NAG_MUNDARI", 3658 "NAG MUNDARI", 3659 "NAGMUNDARI"); 3660 3661 /** 3662 * Constant for the "CJK Unified Ideographs Extension H" Unicode 3663 * character block. 3664 * @since 20 3665 */ 3666 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H = 3667 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H", 3668 "CJK UNIFIED IDEOGRAPHS EXTENSION H", 3669 "CJKUNIFIEDIDEOGRAPHSEXTENSIONH"); 3670 3671 private static final int[] blockStarts = { 3672 0x0000, // 0000..007F; Basic Latin 3673 0x0080, // 0080..00FF; Latin-1 Supplement 3674 0x0100, // 0100..017F; Latin Extended-A 3675 0x0180, // 0180..024F; Latin Extended-B 3676 0x0250, // 0250..02AF; IPA Extensions 3677 0x02B0, // 02B0..02FF; Spacing Modifier Letters 3678 0x0300, // 0300..036F; Combining Diacritical Marks 3679 0x0370, // 0370..03FF; Greek and Coptic 3680 0x0400, // 0400..04FF; Cyrillic 3681 0x0500, // 0500..052F; Cyrillic Supplement 3682 0x0530, // 0530..058F; Armenian 3683 0x0590, // 0590..05FF; Hebrew 3684 0x0600, // 0600..06FF; Arabic 3685 0x0700, // 0700..074F; Syriac 3686 0x0750, // 0750..077F; Arabic Supplement 3687 0x0780, // 0780..07BF; Thaana 3688 0x07C0, // 07C0..07FF; NKo 3689 0x0800, // 0800..083F; Samaritan 3690 0x0840, // 0840..085F; Mandaic 3691 0x0860, // 0860..086F; Syriac Supplement 3692 0x0870, // 0870..089F; Arabic Extended-B 3693 0x08A0, // 08A0..08FF; Arabic Extended-A 3694 0x0900, // 0900..097F; Devanagari 3695 0x0980, // 0980..09FF; Bengali 3696 0x0A00, // 0A00..0A7F; Gurmukhi 3697 0x0A80, // 0A80..0AFF; Gujarati 3698 0x0B00, // 0B00..0B7F; Oriya 3699 0x0B80, // 0B80..0BFF; Tamil 3700 0x0C00, // 0C00..0C7F; Telugu 3701 0x0C80, // 0C80..0CFF; Kannada 3702 0x0D00, // 0D00..0D7F; Malayalam 3703 0x0D80, // 0D80..0DFF; Sinhala 3704 0x0E00, // 0E00..0E7F; Thai 3705 0x0E80, // 0E80..0EFF; Lao 3706 0x0F00, // 0F00..0FFF; Tibetan 3707 0x1000, // 1000..109F; Myanmar 3708 0x10A0, // 10A0..10FF; Georgian 3709 0x1100, // 1100..11FF; Hangul Jamo 3710 0x1200, // 1200..137F; Ethiopic 3711 0x1380, // 1380..139F; Ethiopic Supplement 3712 0x13A0, // 13A0..13FF; Cherokee 3713 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 3714 0x1680, // 1680..169F; Ogham 3715 0x16A0, // 16A0..16FF; Runic 3716 0x1700, // 1700..171F; Tagalog 3717 0x1720, // 1720..173F; Hanunoo 3718 0x1740, // 1740..175F; Buhid 3719 0x1760, // 1760..177F; Tagbanwa 3720 0x1780, // 1780..17FF; Khmer 3721 0x1800, // 1800..18AF; Mongolian 3722 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 3723 0x1900, // 1900..194F; Limbu 3724 0x1950, // 1950..197F; Tai Le 3725 0x1980, // 1980..19DF; New Tai Lue 3726 0x19E0, // 19E0..19FF; Khmer Symbols 3727 0x1A00, // 1A00..1A1F; Buginese 3728 0x1A20, // 1A20..1AAF; Tai Tham 3729 0x1AB0, // 1AB0..1AFF; Combining Diacritical Marks Extended 3730 0x1B00, // 1B00..1B7F; Balinese 3731 0x1B80, // 1B80..1BBF; Sundanese 3732 0x1BC0, // 1BC0..1BFF; Batak 3733 0x1C00, // 1C00..1C4F; Lepcha 3734 0x1C50, // 1C50..1C7F; Ol Chiki 3735 0x1C80, // 1C80..1C8F; Cyrillic Extended-C 3736 0x1C90, // 1C90..1CBF; Georgian Extended 3737 0x1CC0, // 1CC0..1CCF; Sundanese Supplement 3738 0x1CD0, // 1CD0..1CFF; Vedic Extensions 3739 0x1D00, // 1D00..1D7F; Phonetic Extensions 3740 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 3741 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 3742 0x1E00, // 1E00..1EFF; Latin Extended Additional 3743 0x1F00, // 1F00..1FFF; Greek Extended 3744 0x2000, // 2000..206F; General Punctuation 3745 0x2070, // 2070..209F; Superscripts and Subscripts 3746 0x20A0, // 20A0..20CF; Currency Symbols 3747 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 3748 0x2100, // 2100..214F; Letterlike Symbols 3749 0x2150, // 2150..218F; Number Forms 3750 0x2190, // 2190..21FF; Arrows 3751 0x2200, // 2200..22FF; Mathematical Operators 3752 0x2300, // 2300..23FF; Miscellaneous Technical 3753 0x2400, // 2400..243F; Control Pictures 3754 0x2440, // 2440..245F; Optical Character Recognition 3755 0x2460, // 2460..24FF; Enclosed Alphanumerics 3756 0x2500, // 2500..257F; Box Drawing 3757 0x2580, // 2580..259F; Block Elements 3758 0x25A0, // 25A0..25FF; Geometric Shapes 3759 0x2600, // 2600..26FF; Miscellaneous Symbols 3760 0x2700, // 2700..27BF; Dingbats 3761 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 3762 0x27F0, // 27F0..27FF; Supplemental Arrows-A 3763 0x2800, // 2800..28FF; Braille Patterns 3764 0x2900, // 2900..297F; Supplemental Arrows-B 3765 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 3766 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 3767 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 3768 0x2C00, // 2C00..2C5F; Glagolitic 3769 0x2C60, // 2C60..2C7F; Latin Extended-C 3770 0x2C80, // 2C80..2CFF; Coptic 3771 0x2D00, // 2D00..2D2F; Georgian Supplement 3772 0x2D30, // 2D30..2D7F; Tifinagh 3773 0x2D80, // 2D80..2DDF; Ethiopic Extended 3774 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 3775 0x2E00, // 2E00..2E7F; Supplemental Punctuation 3776 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 3777 0x2F00, // 2F00..2FDF; Kangxi Radicals 3778 0x2FE0, // unassigned 3779 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 3780 0x3000, // 3000..303F; CJK Symbols and Punctuation 3781 0x3040, // 3040..309F; Hiragana 3782 0x30A0, // 30A0..30FF; Katakana 3783 0x3100, // 3100..312F; Bopomofo 3784 0x3130, // 3130..318F; Hangul Compatibility Jamo 3785 0x3190, // 3190..319F; Kanbun 3786 0x31A0, // 31A0..31BF; Bopomofo Extended 3787 0x31C0, // 31C0..31EF; CJK Strokes 3788 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 3789 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 3790 0x3300, // 3300..33FF; CJK Compatibility 3791 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 3792 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 3793 0x4E00, // 4E00..9FFF; CJK Unified Ideographs 3794 0xA000, // A000..A48F; Yi Syllables 3795 0xA490, // A490..A4CF; Yi Radicals 3796 0xA4D0, // A4D0..A4FF; Lisu 3797 0xA500, // A500..A63F; Vai 3798 0xA640, // A640..A69F; Cyrillic Extended-B 3799 0xA6A0, // A6A0..A6FF; Bamum 3800 0xA700, // A700..A71F; Modifier Tone Letters 3801 0xA720, // A720..A7FF; Latin Extended-D 3802 0xA800, // A800..A82F; Syloti Nagri 3803 0xA830, // A830..A83F; Common Indic Number Forms 3804 0xA840, // A840..A87F; Phags-pa 3805 0xA880, // A880..A8DF; Saurashtra 3806 0xA8E0, // A8E0..A8FF; Devanagari Extended 3807 0xA900, // A900..A92F; Kayah Li 3808 0xA930, // A930..A95F; Rejang 3809 0xA960, // A960..A97F; Hangul Jamo Extended-A 3810 0xA980, // A980..A9DF; Javanese 3811 0xA9E0, // A9E0..A9FF; Myanmar Extended-B 3812 0xAA00, // AA00..AA5F; Cham 3813 0xAA60, // AA60..AA7F; Myanmar Extended-A 3814 0xAA80, // AA80..AADF; Tai Viet 3815 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions 3816 0xAB00, // AB00..AB2F; Ethiopic Extended-A 3817 0xAB30, // AB30..AB6F; Latin Extended-E 3818 0xAB70, // AB70..ABBF; Cherokee Supplement 3819 0xABC0, // ABC0..ABFF; Meetei Mayek 3820 0xAC00, // AC00..D7AF; Hangul Syllables 3821 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B 3822 0xD800, // D800..DB7F; High Surrogates 3823 0xDB80, // DB80..DBFF; High Private Use Surrogates 3824 0xDC00, // DC00..DFFF; Low Surrogates 3825 0xE000, // E000..F8FF; Private Use Area 3826 0xF900, // F900..FAFF; CJK Compatibility Ideographs 3827 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 3828 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 3829 0xFE00, // FE00..FE0F; Variation Selectors 3830 0xFE10, // FE10..FE1F; Vertical Forms 3831 0xFE20, // FE20..FE2F; Combining Half Marks 3832 0xFE30, // FE30..FE4F; CJK Compatibility Forms 3833 0xFE50, // FE50..FE6F; Small Form Variants 3834 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 3835 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 3836 0xFFF0, // FFF0..FFFF; Specials 3837 0x10000, // 10000..1007F; Linear B Syllabary 3838 0x10080, // 10080..100FF; Linear B Ideograms 3839 0x10100, // 10100..1013F; Aegean Numbers 3840 0x10140, // 10140..1018F; Ancient Greek Numbers 3841 0x10190, // 10190..101CF; Ancient Symbols 3842 0x101D0, // 101D0..101FF; Phaistos Disc 3843 0x10200, // unassigned 3844 0x10280, // 10280..1029F; Lycian 3845 0x102A0, // 102A0..102DF; Carian 3846 0x102E0, // 102E0..102FF; Coptic Epact Numbers 3847 0x10300, // 10300..1032F; Old Italic 3848 0x10330, // 10330..1034F; Gothic 3849 0x10350, // 10350..1037F; Old Permic 3850 0x10380, // 10380..1039F; Ugaritic 3851 0x103A0, // 103A0..103DF; Old Persian 3852 0x103E0, // unassigned 3853 0x10400, // 10400..1044F; Deseret 3854 0x10450, // 10450..1047F; Shavian 3855 0x10480, // 10480..104AF; Osmanya 3856 0x104B0, // 104B0..104FF; Osage 3857 0x10500, // 10500..1052F; Elbasan 3858 0x10530, // 10530..1056F; Caucasian Albanian 3859 0x10570, // 10570..105BF; Vithkuqi 3860 0x105C0, // unassigned 3861 0x10600, // 10600..1077F; Linear A 3862 0x10780, // 10780..107BF; Latin Extended-F 3863 0x107C0, // unassigned 3864 0x10800, // 10800..1083F; Cypriot Syllabary 3865 0x10840, // 10840..1085F; Imperial Aramaic 3866 0x10860, // 10860..1087F; Palmyrene 3867 0x10880, // 10880..108AF; Nabataean 3868 0x108B0, // unassigned 3869 0x108E0, // 108E0..108FF; Hatran 3870 0x10900, // 10900..1091F; Phoenician 3871 0x10920, // 10920..1093F; Lydian 3872 0x10940, // unassigned 3873 0x10980, // 10980..1099F; Meroitic Hieroglyphs 3874 0x109A0, // 109A0..109FF; Meroitic Cursive 3875 0x10A00, // 10A00..10A5F; Kharoshthi 3876 0x10A60, // 10A60..10A7F; Old South Arabian 3877 0x10A80, // 10A80..10A9F; Old North Arabian 3878 0x10AA0, // unassigned 3879 0x10AC0, // 10AC0..10AFF; Manichaean 3880 0x10B00, // 10B00..10B3F; Avestan 3881 0x10B40, // 10B40..10B5F; Inscriptional Parthian 3882 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi 3883 0x10B80, // 10B80..10BAF; Psalter Pahlavi 3884 0x10BB0, // unassigned 3885 0x10C00, // 10C00..10C4F; Old Turkic 3886 0x10C50, // unassigned 3887 0x10C80, // 10C80..10CFF; Old Hungarian 3888 0x10D00, // 10D00..10D3F; Hanifi Rohingya 3889 0x10D40, // unassigned 3890 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols 3891 0x10E80, // 10E80..10EBF; Yezidi 3892 0x10EC0, // 10EC0..10EFF; Arabic Extended-C 3893 0x10F00, // 10F00..10F2F; Old Sogdian 3894 0x10F30, // 10F30..10F6F; Sogdian 3895 0x10F70, // 10F70..10FAF; Old Uyghur 3896 0x10FB0, // 10FB0..10FDF; Chorasmian 3897 0x10FE0, // 10FE0..10FFF; Elymaic 3898 0x11000, // 11000..1107F; Brahmi 3899 0x11080, // 11080..110CF; Kaithi 3900 0x110D0, // 110D0..110FF; Sora Sompeng 3901 0x11100, // 11100..1114F; Chakma 3902 0x11150, // 11150..1117F; Mahajani 3903 0x11180, // 11180..111DF; Sharada 3904 0x111E0, // 111E0..111FF; Sinhala Archaic Numbers 3905 0x11200, // 11200..1124F; Khojki 3906 0x11250, // unassigned 3907 0x11280, // 11280..112AF; Multani 3908 0x112B0, // 112B0..112FF; Khudawadi 3909 0x11300, // 11300..1137F; Grantha 3910 0x11380, // unassigned 3911 0x11400, // 11400..1147F; Newa 3912 0x11480, // 11480..114DF; Tirhuta 3913 0x114E0, // unassigned 3914 0x11580, // 11580..115FF; Siddham 3915 0x11600, // 11600..1165F; Modi 3916 0x11660, // 11660..1167F; Mongolian Supplement 3917 0x11680, // 11680..116CF; Takri 3918 0x116D0, // unassigned 3919 0x11700, // 11700..1174F; Ahom 3920 0x11750, // unassigned 3921 0x11800, // 11800..1184F; Dogra 3922 0x11850, // unassigned 3923 0x118A0, // 118A0..118FF; Warang Citi 3924 0x11900, // 11900..1195F; Dives Akuru 3925 0x11960, // unassigned 3926 0x119A0, // 119A0..119FF; Nandinagari 3927 0x11A00, // 11A00..11A4F; Zanabazar Square 3928 0x11A50, // 11A50..11AAF; Soyombo 3929 0x11AB0, // 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A 3930 0x11AC0, // 11AC0..11AFF; Pau Cin Hau 3931 0x11B00, // 11B00..11B5F; Devanagari Extended-A 3932 0x11B60, // unassigned 3933 0x11C00, // 11C00..11C6F; Bhaiksuki 3934 0x11C70, // 11C70..11CBF; Marchen 3935 0x11CC0, // unassigned 3936 0x11D00, // 11D00..11D5F; Masaram Gondi 3937 0x11D60, // 11D60..11DAF; Gunjala Gondi 3938 0x11DB0, // unassigned 3939 0x11EE0, // 11EE0..11EFF; Makasar 3940 0x11F00, // 11F00..11F5F; Kawi 3941 0x11F60, // unassigned 3942 0x11FB0, // 11FB0..11FBF; Lisu Supplement 3943 0x11FC0, // 11FC0..11FFF; Tamil Supplement 3944 0x12000, // 12000..123FF; Cuneiform 3945 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 3946 0x12480, // 12480..1254F; Early Dynastic Cuneiform 3947 0x12550, // unassigned 3948 0x12F90, // 12F90..12FFF; Cypro-Minoan 3949 0x13000, // 13000..1342F; Egyptian Hieroglyphs 3950 0x13430, // 13430..1345F; Egyptian Hieroglyph Format Controls 3951 0x13460, // unassigned 3952 0x14400, // 14400..1467F; Anatolian Hieroglyphs 3953 0x14680, // unassigned 3954 0x16800, // 16800..16A3F; Bamum Supplement 3955 0x16A40, // 16A40..16A6F; Mro 3956 0x16A70, // 16A70..16ACF; Tangsa 3957 0x16AD0, // 16AD0..16AFF; Bassa Vah 3958 0x16B00, // 16B00..16B8F; Pahawh Hmong 3959 0x16B90, // unassigned 3960 0x16E40, // 16E40..16E9F; Medefaidrin 3961 0x16EA0, // unassigned 3962 0x16F00, // 16F00..16F9F; Miao 3963 0x16FA0, // unassigned 3964 0x16FE0, // 16FE0..16FFF; Ideographic Symbols and Punctuation 3965 0x17000, // 17000..187FF; Tangut 3966 0x18800, // 18800..18AFF; Tangut Components 3967 0x18B00, // 18B00..18CFF; Khitan Small Script 3968 0x18D00, // 18D00..18D7F; Tangut Supplement 3969 0x18D80, // unassigned 3970 0x1AFF0, // 1AFF0..1AFFF; Kana Extended-B 3971 0x1B000, // 1B000..1B0FF; Kana Supplement 3972 0x1B100, // 1B100..1B12F; Kana Extended-A 3973 0x1B130, // 1B130..1B16F; Small Kana Extension 3974 0x1B170, // 1B170..1B2FF; Nushu 3975 0x1B300, // unassigned 3976 0x1BC00, // 1BC00..1BC9F; Duployan 3977 0x1BCA0, // 1BCA0..1BCAF; Shorthand Format Controls 3978 0x1BCB0, // unassigned 3979 0x1CF00, // 1CF00..1CFCF; Znamenny Musical Notation 3980 0x1CFD0, // unassigned 3981 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 3982 0x1D100, // 1D100..1D1FF; Musical Symbols 3983 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 3984 0x1D250, // unassigned 3985 0x1D2C0, // 1D2C0..1D2DF; Kaktovik Numerals 3986 0x1D2E0, // 1D2E0..1D2FF; Mayan Numerals 3987 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 3988 0x1D360, // 1D360..1D37F; Counting Rod Numerals 3989 0x1D380, // unassigned 3990 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 3991 0x1D800, // 1D800..1DAAF; Sutton SignWriting 3992 0x1DAB0, // unassigned 3993 0x1DF00, // 1DF00..1DFFF; Latin Extended-G 3994 0x1E000, // 1E000..1E02F; Glagolitic Supplement 3995 0x1E030, // 1E030..1E08F; Cyrillic Extended-D 3996 0x1E090, // unassigned 3997 0x1E100, // 1E100..1E14F; Nyiakeng Puachue Hmong 3998 0x1E150, // unassigned 3999 0x1E290, // 1E290..1E2BF; Toto 4000 0x1E2C0, // 1E2C0..1E2FF; Wancho 4001 0x1E300, // unassigned 4002 0x1E4D0, // 1E4D0..1E4FF; Nag Mundari 4003 0x1E500, // unassigned 4004 0x1E7E0, // 1E7E0..1E7FF; Ethiopic Extended-B 4005 0x1E800, // 1E800..1E8DF; Mende Kikakui 4006 0x1E8E0, // unassigned 4007 0x1E900, // 1E900..1E95F; Adlam 4008 0x1E960, // unassigned 4009 0x1EC70, // 1EC70..1ECBF; Indic Siyaq Numbers 4010 0x1ECC0, // unassigned 4011 0x1ED00, // 1ED00..1ED4F; Ottoman Siyaq Numbers 4012 0x1ED50, // unassigned 4013 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 4014 0x1EF00, // unassigned 4015 0x1F000, // 1F000..1F02F; Mahjong Tiles 4016 0x1F030, // 1F030..1F09F; Domino Tiles 4017 0x1F0A0, // 1F0A0..1F0FF; Playing Cards 4018 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement 4019 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement 4020 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols and Pictographs 4021 0x1F600, // 1F600..1F64F; Emoticons 4022 0x1F650, // 1F650..1F67F; Ornamental Dingbats 4023 0x1F680, // 1F680..1F6FF; Transport and Map Symbols 4024 0x1F700, // 1F700..1F77F; Alchemical Symbols 4025 0x1F780, // 1F780..1F7FF; Geometric Shapes Extended 4026 0x1F800, // 1F800..1F8FF; Supplemental Arrows-C 4027 0x1F900, // 1F900..1F9FF; Supplemental Symbols and Pictographs 4028 0x1FA00, // 1FA00..1FA6F; Chess Symbols 4029 0x1FA70, // 1FA70..1FAFF; Symbols and Pictographs Extended-A 4030 0x1FB00, // 1FB00..1FBFF; Symbols for Legacy Computing 4031 0x1FC00, // unassigned 4032 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 4033 0x2A6E0, // unassigned 4034 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 4035 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D 4036 0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E 4037 0x2CEB0, // 2CEB0..2EBEF; CJK Unified Ideographs Extension F 4038 0x2EBF0, // unassigned 4039 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 4040 0x2FA20, // unassigned 4041 0x30000, // 30000..3134F; CJK Unified Ideographs Extension G 4042 0x31350, // 31350..323AF; CJK Unified Ideographs Extension H 4043 0x323B0, // unassigned 4044 0xE0000, // E0000..E007F; Tags 4045 0xE0080, // unassigned 4046 0xE0100, // E0100..E01EF; Variation Selectors Supplement 4047 0xE01F0, // unassigned 4048 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 4049 0x100000, // 100000..10FFFF; Supplementary Private Use Area-B 4050 }; 4051 4052 private static final UnicodeBlock[] blocks = { 4053 BASIC_LATIN, 4054 LATIN_1_SUPPLEMENT, 4055 LATIN_EXTENDED_A, 4056 LATIN_EXTENDED_B, 4057 IPA_EXTENSIONS, 4058 SPACING_MODIFIER_LETTERS, 4059 COMBINING_DIACRITICAL_MARKS, 4060 GREEK, 4061 CYRILLIC, 4062 CYRILLIC_SUPPLEMENTARY, 4063 ARMENIAN, 4064 HEBREW, 4065 ARABIC, 4066 SYRIAC, 4067 ARABIC_SUPPLEMENT, 4068 THAANA, 4069 NKO, 4070 SAMARITAN, 4071 MANDAIC, 4072 SYRIAC_SUPPLEMENT, 4073 ARABIC_EXTENDED_B, 4074 ARABIC_EXTENDED_A, 4075 DEVANAGARI, 4076 BENGALI, 4077 GURMUKHI, 4078 GUJARATI, 4079 ORIYA, 4080 TAMIL, 4081 TELUGU, 4082 KANNADA, 4083 MALAYALAM, 4084 SINHALA, 4085 THAI, 4086 LAO, 4087 TIBETAN, 4088 MYANMAR, 4089 GEORGIAN, 4090 HANGUL_JAMO, 4091 ETHIOPIC, 4092 ETHIOPIC_SUPPLEMENT, 4093 CHEROKEE, 4094 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 4095 OGHAM, 4096 RUNIC, 4097 TAGALOG, 4098 HANUNOO, 4099 BUHID, 4100 TAGBANWA, 4101 KHMER, 4102 MONGOLIAN, 4103 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 4104 LIMBU, 4105 TAI_LE, 4106 NEW_TAI_LUE, 4107 KHMER_SYMBOLS, 4108 BUGINESE, 4109 TAI_THAM, 4110 COMBINING_DIACRITICAL_MARKS_EXTENDED, 4111 BALINESE, 4112 SUNDANESE, 4113 BATAK, 4114 LEPCHA, 4115 OL_CHIKI, 4116 CYRILLIC_EXTENDED_C, 4117 GEORGIAN_EXTENDED, 4118 SUNDANESE_SUPPLEMENT, 4119 VEDIC_EXTENSIONS, 4120 PHONETIC_EXTENSIONS, 4121 PHONETIC_EXTENSIONS_SUPPLEMENT, 4122 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 4123 LATIN_EXTENDED_ADDITIONAL, 4124 GREEK_EXTENDED, 4125 GENERAL_PUNCTUATION, 4126 SUPERSCRIPTS_AND_SUBSCRIPTS, 4127 CURRENCY_SYMBOLS, 4128 COMBINING_MARKS_FOR_SYMBOLS, 4129 LETTERLIKE_SYMBOLS, 4130 NUMBER_FORMS, 4131 ARROWS, 4132 MATHEMATICAL_OPERATORS, 4133 MISCELLANEOUS_TECHNICAL, 4134 CONTROL_PICTURES, 4135 OPTICAL_CHARACTER_RECOGNITION, 4136 ENCLOSED_ALPHANUMERICS, 4137 BOX_DRAWING, 4138 BLOCK_ELEMENTS, 4139 GEOMETRIC_SHAPES, 4140 MISCELLANEOUS_SYMBOLS, 4141 DINGBATS, 4142 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 4143 SUPPLEMENTAL_ARROWS_A, 4144 BRAILLE_PATTERNS, 4145 SUPPLEMENTAL_ARROWS_B, 4146 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 4147 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 4148 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 4149 GLAGOLITIC, 4150 LATIN_EXTENDED_C, 4151 COPTIC, 4152 GEORGIAN_SUPPLEMENT, 4153 TIFINAGH, 4154 ETHIOPIC_EXTENDED, 4155 CYRILLIC_EXTENDED_A, 4156 SUPPLEMENTAL_PUNCTUATION, 4157 CJK_RADICALS_SUPPLEMENT, 4158 KANGXI_RADICALS, 4159 null, 4160 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 4161 CJK_SYMBOLS_AND_PUNCTUATION, 4162 HIRAGANA, 4163 KATAKANA, 4164 BOPOMOFO, 4165 HANGUL_COMPATIBILITY_JAMO, 4166 KANBUN, 4167 BOPOMOFO_EXTENDED, 4168 CJK_STROKES, 4169 KATAKANA_PHONETIC_EXTENSIONS, 4170 ENCLOSED_CJK_LETTERS_AND_MONTHS, 4171 CJK_COMPATIBILITY, 4172 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 4173 YIJING_HEXAGRAM_SYMBOLS, 4174 CJK_UNIFIED_IDEOGRAPHS, 4175 YI_SYLLABLES, 4176 YI_RADICALS, 4177 LISU, 4178 VAI, 4179 CYRILLIC_EXTENDED_B, 4180 BAMUM, 4181 MODIFIER_TONE_LETTERS, 4182 LATIN_EXTENDED_D, 4183 SYLOTI_NAGRI, 4184 COMMON_INDIC_NUMBER_FORMS, 4185 PHAGS_PA, 4186 SAURASHTRA, 4187 DEVANAGARI_EXTENDED, 4188 KAYAH_LI, 4189 REJANG, 4190 HANGUL_JAMO_EXTENDED_A, 4191 JAVANESE, 4192 MYANMAR_EXTENDED_B, 4193 CHAM, 4194 MYANMAR_EXTENDED_A, 4195 TAI_VIET, 4196 MEETEI_MAYEK_EXTENSIONS, 4197 ETHIOPIC_EXTENDED_A, 4198 LATIN_EXTENDED_E, 4199 CHEROKEE_SUPPLEMENT, 4200 MEETEI_MAYEK, 4201 HANGUL_SYLLABLES, 4202 HANGUL_JAMO_EXTENDED_B, 4203 HIGH_SURROGATES, 4204 HIGH_PRIVATE_USE_SURROGATES, 4205 LOW_SURROGATES, 4206 PRIVATE_USE_AREA, 4207 CJK_COMPATIBILITY_IDEOGRAPHS, 4208 ALPHABETIC_PRESENTATION_FORMS, 4209 ARABIC_PRESENTATION_FORMS_A, 4210 VARIATION_SELECTORS, 4211 VERTICAL_FORMS, 4212 COMBINING_HALF_MARKS, 4213 CJK_COMPATIBILITY_FORMS, 4214 SMALL_FORM_VARIANTS, 4215 ARABIC_PRESENTATION_FORMS_B, 4216 HALFWIDTH_AND_FULLWIDTH_FORMS, 4217 SPECIALS, 4218 LINEAR_B_SYLLABARY, 4219 LINEAR_B_IDEOGRAMS, 4220 AEGEAN_NUMBERS, 4221 ANCIENT_GREEK_NUMBERS, 4222 ANCIENT_SYMBOLS, 4223 PHAISTOS_DISC, 4224 null, 4225 LYCIAN, 4226 CARIAN, 4227 COPTIC_EPACT_NUMBERS, 4228 OLD_ITALIC, 4229 GOTHIC, 4230 OLD_PERMIC, 4231 UGARITIC, 4232 OLD_PERSIAN, 4233 null, 4234 DESERET, 4235 SHAVIAN, 4236 OSMANYA, 4237 OSAGE, 4238 ELBASAN, 4239 CAUCASIAN_ALBANIAN, 4240 VITHKUQI, 4241 null, 4242 LINEAR_A, 4243 LATIN_EXTENDED_F, 4244 null, 4245 CYPRIOT_SYLLABARY, 4246 IMPERIAL_ARAMAIC, 4247 PALMYRENE, 4248 NABATAEAN, 4249 null, 4250 HATRAN, 4251 PHOENICIAN, 4252 LYDIAN, 4253 null, 4254 MEROITIC_HIEROGLYPHS, 4255 MEROITIC_CURSIVE, 4256 KHAROSHTHI, 4257 OLD_SOUTH_ARABIAN, 4258 OLD_NORTH_ARABIAN, 4259 null, 4260 MANICHAEAN, 4261 AVESTAN, 4262 INSCRIPTIONAL_PARTHIAN, 4263 INSCRIPTIONAL_PAHLAVI, 4264 PSALTER_PAHLAVI, 4265 null, 4266 OLD_TURKIC, 4267 null, 4268 OLD_HUNGARIAN, 4269 HANIFI_ROHINGYA, 4270 null, 4271 RUMI_NUMERAL_SYMBOLS, 4272 YEZIDI, 4273 ARABIC_EXTENDED_C, 4274 OLD_SOGDIAN, 4275 SOGDIAN, 4276 OLD_UYGHUR, 4277 CHORASMIAN, 4278 ELYMAIC, 4279 BRAHMI, 4280 KAITHI, 4281 SORA_SOMPENG, 4282 CHAKMA, 4283 MAHAJANI, 4284 SHARADA, 4285 SINHALA_ARCHAIC_NUMBERS, 4286 KHOJKI, 4287 null, 4288 MULTANI, 4289 KHUDAWADI, 4290 GRANTHA, 4291 null, 4292 NEWA, 4293 TIRHUTA, 4294 null, 4295 SIDDHAM, 4296 MODI, 4297 MONGOLIAN_SUPPLEMENT, 4298 TAKRI, 4299 null, 4300 AHOM, 4301 null, 4302 DOGRA, 4303 null, 4304 WARANG_CITI, 4305 DIVES_AKURU, 4306 null, 4307 NANDINAGARI, 4308 ZANABAZAR_SQUARE, 4309 SOYOMBO, 4310 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A, 4311 PAU_CIN_HAU, 4312 DEVANAGARI_EXTENDED_A, 4313 null, 4314 BHAIKSUKI, 4315 MARCHEN, 4316 null, 4317 MASARAM_GONDI, 4318 GUNJALA_GONDI, 4319 null, 4320 MAKASAR, 4321 KAWI, 4322 null, 4323 LISU_SUPPLEMENT, 4324 TAMIL_SUPPLEMENT, 4325 CUNEIFORM, 4326 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 4327 EARLY_DYNASTIC_CUNEIFORM, 4328 null, 4329 CYPRO_MINOAN, 4330 EGYPTIAN_HIEROGLYPHS, 4331 EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS, 4332 null, 4333 ANATOLIAN_HIEROGLYPHS, 4334 null, 4335 BAMUM_SUPPLEMENT, 4336 MRO, 4337 TANGSA, 4338 BASSA_VAH, 4339 PAHAWH_HMONG, 4340 null, 4341 MEDEFAIDRIN, 4342 null, 4343 MIAO, 4344 null, 4345 IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION, 4346 TANGUT, 4347 TANGUT_COMPONENTS, 4348 KHITAN_SMALL_SCRIPT, 4349 TANGUT_SUPPLEMENT, 4350 null, 4351 KANA_EXTENDED_B, 4352 KANA_SUPPLEMENT, 4353 KANA_EXTENDED_A, 4354 SMALL_KANA_EXTENSION, 4355 NUSHU, 4356 null, 4357 DUPLOYAN, 4358 SHORTHAND_FORMAT_CONTROLS, 4359 null, 4360 ZNAMENNY_MUSICAL_NOTATION, 4361 null, 4362 BYZANTINE_MUSICAL_SYMBOLS, 4363 MUSICAL_SYMBOLS, 4364 ANCIENT_GREEK_MUSICAL_NOTATION, 4365 null, 4366 KAKTOVIK_NUMERALS, 4367 MAYAN_NUMERALS, 4368 TAI_XUAN_JING_SYMBOLS, 4369 COUNTING_ROD_NUMERALS, 4370 null, 4371 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 4372 SUTTON_SIGNWRITING, 4373 null, 4374 LATIN_EXTENDED_G, 4375 GLAGOLITIC_SUPPLEMENT, 4376 CYRILLIC_EXTENDED_D, 4377 null, 4378 NYIAKENG_PUACHUE_HMONG, 4379 null, 4380 TOTO, 4381 WANCHO, 4382 null, 4383 NAG_MUNDARI, 4384 null, 4385 ETHIOPIC_EXTENDED_B, 4386 MENDE_KIKAKUI, 4387 null, 4388 ADLAM, 4389 null, 4390 INDIC_SIYAQ_NUMBERS, 4391 null, 4392 OTTOMAN_SIYAQ_NUMBERS, 4393 null, 4394 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 4395 null, 4396 MAHJONG_TILES, 4397 DOMINO_TILES, 4398 PLAYING_CARDS, 4399 ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 4400 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 4401 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 4402 EMOTICONS, 4403 ORNAMENTAL_DINGBATS, 4404 TRANSPORT_AND_MAP_SYMBOLS, 4405 ALCHEMICAL_SYMBOLS, 4406 GEOMETRIC_SHAPES_EXTENDED, 4407 SUPPLEMENTAL_ARROWS_C, 4408 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS, 4409 CHESS_SYMBOLS, 4410 SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A, 4411 SYMBOLS_FOR_LEGACY_COMPUTING, 4412 null, 4413 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 4414 null, 4415 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 4416 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 4417 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E, 4418 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F, 4419 null, 4420 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 4421 null, 4422 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G, 4423 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H, 4424 null, 4425 TAGS, 4426 null, 4427 VARIATION_SELECTORS_SUPPLEMENT, 4428 null, 4429 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 4430 SUPPLEMENTARY_PRIVATE_USE_AREA_B, 4431 }; 4432 4433 4434 /** 4435 * Returns the object representing the Unicode block containing the 4436 * given character, or {@code null} if the character is not a 4437 * member of a defined block. 4438 * 4439 * <p><b>Note:</b> This method cannot handle 4440 * <a href="Character.html#supplementary"> supplementary 4441 * characters</a>. To support all Unicode characters, including 4442 * supplementary characters, use the {@link #of(int)} method. 4443 * 4444 * @param c The character in question 4445 * @return The {@code UnicodeBlock} instance representing the 4446 * Unicode block of which this character is a member, or 4447 * {@code null} if the character is not a member of any 4448 * Unicode block 4449 */ of(char c)4450 public static UnicodeBlock of(char c) { 4451 return of((int)c); 4452 } 4453 4454 /** 4455 * Returns the object representing the Unicode block 4456 * containing the given character (Unicode code point), or 4457 * {@code null} if the character is not a member of a 4458 * defined block. 4459 * 4460 * @param codePoint the character (Unicode code point) in question. 4461 * @return The {@code UnicodeBlock} instance representing the 4462 * Unicode block of which this character is a member, or 4463 * {@code null} if the character is not a member of any 4464 * Unicode block 4465 * @throws IllegalArgumentException if the specified 4466 * {@code codePoint} is an invalid Unicode code point. 4467 * @see Character#isValidCodePoint(int) 4468 * @since 1.5 4469 */ of(int codePoint)4470 public static UnicodeBlock of(int codePoint) { 4471 if (!isValidCodePoint(codePoint)) { 4472 throw new IllegalArgumentException( 4473 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 4474 } 4475 4476 int top, bottom, current; 4477 bottom = 0; 4478 top = blockStarts.length; 4479 current = top/2; 4480 4481 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 4482 while (top - bottom > 1) { 4483 if (codePoint >= blockStarts[current]) { 4484 bottom = current; 4485 } else { 4486 top = current; 4487 } 4488 current = (top + bottom) / 2; 4489 } 4490 return blocks[current]; 4491 } 4492 4493 /** 4494 * Returns the UnicodeBlock with the given name. Block 4495 * names are determined by The Unicode Standard. The file 4496 * {@code Blocks.txt} defines blocks for a particular 4497 * version of the standard. The {@link Character} class specifies 4498 * the version of the standard that it supports. 4499 * <p> 4500 * This method accepts block names in the following forms: 4501 * <ol> 4502 * <li> Canonical block names as defined by the Unicode Standard. 4503 * For example, the standard defines a "Basic Latin" block. Therefore, this 4504 * method accepts "Basic Latin" as a valid block name. The documentation of 4505 * each UnicodeBlock provides the canonical name. 4506 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 4507 * is a valid block name for the "Basic Latin" block. 4508 * <li>The text representation of each constant UnicodeBlock identifier. 4509 * For example, this method will return the {@link #BASIC_LATIN} block if 4510 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 4511 * hyphens in the canonical name with underscores. 4512 * </ol> 4513 * Finally, character case is ignored for all of the valid block name forms. 4514 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 4515 * The en_US locale's case mapping rules are used to provide case-insensitive 4516 * string comparisons for block name validation. 4517 * <p> 4518 * If the Unicode Standard changes block names, both the previous and 4519 * current names will be accepted. 4520 * 4521 * @param blockName A {@code UnicodeBlock} name. 4522 * @return The {@code UnicodeBlock} instance identified 4523 * by {@code blockName} 4524 * @throws IllegalArgumentException if {@code blockName} is an 4525 * invalid name 4526 * @throws NullPointerException if {@code blockName} is null 4527 * @since 1.5 4528 */ forName(String blockName)4529 public static final UnicodeBlock forName(String blockName) { 4530 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US)); 4531 if (block == null) { 4532 throw new IllegalArgumentException("Not a valid block name: " 4533 + blockName); 4534 } 4535 return block; 4536 } 4537 } 4538 4539 4540 /** 4541 * A family of character subsets representing the character scripts 4542 * defined in the <a href="http://www.unicode.org/reports/tr24/"> 4543 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode 4544 * character is assigned to a single Unicode script, either a specific 4545 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or 4546 * one of the following three special values, 4547 * {@link Character.UnicodeScript#INHERITED Inherited}, 4548 * {@link Character.UnicodeScript#COMMON Common} or 4549 * {@link Character.UnicodeScript#UNKNOWN Unknown}. 4550 * 4551 * @spec https://www.unicode.org/reports/tr24 Unicode Script Property 4552 * @since 1.7 4553 */ 4554 public static enum UnicodeScript { 4555 /** 4556 * Unicode script "Common". 4557 */ 4558 COMMON, 4559 4560 /** 4561 * Unicode script "Latin". 4562 */ 4563 LATIN, 4564 4565 /** 4566 * Unicode script "Greek". 4567 */ 4568 GREEK, 4569 4570 /** 4571 * Unicode script "Cyrillic". 4572 */ 4573 CYRILLIC, 4574 4575 /** 4576 * Unicode script "Armenian". 4577 */ 4578 ARMENIAN, 4579 4580 /** 4581 * Unicode script "Hebrew". 4582 */ 4583 HEBREW, 4584 4585 /** 4586 * Unicode script "Arabic". 4587 */ 4588 ARABIC, 4589 4590 /** 4591 * Unicode script "Syriac". 4592 */ 4593 SYRIAC, 4594 4595 /** 4596 * Unicode script "Thaana". 4597 */ 4598 THAANA, 4599 4600 /** 4601 * Unicode script "Devanagari". 4602 */ 4603 DEVANAGARI, 4604 4605 /** 4606 * Unicode script "Bengali". 4607 */ 4608 BENGALI, 4609 4610 /** 4611 * Unicode script "Gurmukhi". 4612 */ 4613 GURMUKHI, 4614 4615 /** 4616 * Unicode script "Gujarati". 4617 */ 4618 GUJARATI, 4619 4620 /** 4621 * Unicode script "Oriya". 4622 */ 4623 ORIYA, 4624 4625 /** 4626 * Unicode script "Tamil". 4627 */ 4628 TAMIL, 4629 4630 /** 4631 * Unicode script "Telugu". 4632 */ 4633 TELUGU, 4634 4635 /** 4636 * Unicode script "Kannada". 4637 */ 4638 KANNADA, 4639 4640 /** 4641 * Unicode script "Malayalam". 4642 */ 4643 MALAYALAM, 4644 4645 /** 4646 * Unicode script "Sinhala". 4647 */ 4648 SINHALA, 4649 4650 /** 4651 * Unicode script "Thai". 4652 */ 4653 THAI, 4654 4655 /** 4656 * Unicode script "Lao". 4657 */ 4658 LAO, 4659 4660 /** 4661 * Unicode script "Tibetan". 4662 */ 4663 TIBETAN, 4664 4665 /** 4666 * Unicode script "Myanmar". 4667 */ 4668 MYANMAR, 4669 4670 /** 4671 * Unicode script "Georgian". 4672 */ 4673 GEORGIAN, 4674 4675 /** 4676 * Unicode script "Hangul". 4677 */ 4678 HANGUL, 4679 4680 /** 4681 * Unicode script "Ethiopic". 4682 */ 4683 ETHIOPIC, 4684 4685 /** 4686 * Unicode script "Cherokee". 4687 */ 4688 CHEROKEE, 4689 4690 /** 4691 * Unicode script "Canadian_Aboriginal". 4692 */ 4693 CANADIAN_ABORIGINAL, 4694 4695 /** 4696 * Unicode script "Ogham". 4697 */ 4698 OGHAM, 4699 4700 /** 4701 * Unicode script "Runic". 4702 */ 4703 RUNIC, 4704 4705 /** 4706 * Unicode script "Khmer". 4707 */ 4708 KHMER, 4709 4710 /** 4711 * Unicode script "Mongolian". 4712 */ 4713 MONGOLIAN, 4714 4715 /** 4716 * Unicode script "Hiragana". 4717 */ 4718 HIRAGANA, 4719 4720 /** 4721 * Unicode script "Katakana". 4722 */ 4723 KATAKANA, 4724 4725 /** 4726 * Unicode script "Bopomofo". 4727 */ 4728 BOPOMOFO, 4729 4730 /** 4731 * Unicode script "Han". 4732 */ 4733 HAN, 4734 4735 /** 4736 * Unicode script "Yi". 4737 */ 4738 YI, 4739 4740 /** 4741 * Unicode script "Old_Italic". 4742 */ 4743 OLD_ITALIC, 4744 4745 /** 4746 * Unicode script "Gothic". 4747 */ 4748 GOTHIC, 4749 4750 /** 4751 * Unicode script "Deseret". 4752 */ 4753 DESERET, 4754 4755 /** 4756 * Unicode script "Inherited". 4757 */ 4758 INHERITED, 4759 4760 /** 4761 * Unicode script "Tagalog". 4762 */ 4763 TAGALOG, 4764 4765 /** 4766 * Unicode script "Hanunoo". 4767 */ 4768 HANUNOO, 4769 4770 /** 4771 * Unicode script "Buhid". 4772 */ 4773 BUHID, 4774 4775 /** 4776 * Unicode script "Tagbanwa". 4777 */ 4778 TAGBANWA, 4779 4780 /** 4781 * Unicode script "Limbu". 4782 */ 4783 LIMBU, 4784 4785 /** 4786 * Unicode script "Tai_Le". 4787 */ 4788 TAI_LE, 4789 4790 /** 4791 * Unicode script "Linear_B". 4792 */ 4793 LINEAR_B, 4794 4795 /** 4796 * Unicode script "Ugaritic". 4797 */ 4798 UGARITIC, 4799 4800 /** 4801 * Unicode script "Shavian". 4802 */ 4803 SHAVIAN, 4804 4805 /** 4806 * Unicode script "Osmanya". 4807 */ 4808 OSMANYA, 4809 4810 /** 4811 * Unicode script "Cypriot". 4812 */ 4813 CYPRIOT, 4814 4815 /** 4816 * Unicode script "Braille". 4817 */ 4818 BRAILLE, 4819 4820 /** 4821 * Unicode script "Buginese". 4822 */ 4823 BUGINESE, 4824 4825 /** 4826 * Unicode script "Coptic". 4827 */ 4828 COPTIC, 4829 4830 /** 4831 * Unicode script "New_Tai_Lue". 4832 */ 4833 NEW_TAI_LUE, 4834 4835 /** 4836 * Unicode script "Glagolitic". 4837 */ 4838 GLAGOLITIC, 4839 4840 /** 4841 * Unicode script "Tifinagh". 4842 */ 4843 TIFINAGH, 4844 4845 /** 4846 * Unicode script "Syloti_Nagri". 4847 */ 4848 SYLOTI_NAGRI, 4849 4850 /** 4851 * Unicode script "Old_Persian". 4852 */ 4853 OLD_PERSIAN, 4854 4855 /** 4856 * Unicode script "Kharoshthi". 4857 */ 4858 KHAROSHTHI, 4859 4860 /** 4861 * Unicode script "Balinese". 4862 */ 4863 BALINESE, 4864 4865 /** 4866 * Unicode script "Cuneiform". 4867 */ 4868 CUNEIFORM, 4869 4870 /** 4871 * Unicode script "Phoenician". 4872 */ 4873 PHOENICIAN, 4874 4875 /** 4876 * Unicode script "Phags_Pa". 4877 */ 4878 PHAGS_PA, 4879 4880 /** 4881 * Unicode script "Nko". 4882 */ 4883 NKO, 4884 4885 /** 4886 * Unicode script "Sundanese". 4887 */ 4888 SUNDANESE, 4889 4890 /** 4891 * Unicode script "Batak". 4892 */ 4893 BATAK, 4894 4895 /** 4896 * Unicode script "Lepcha". 4897 */ 4898 LEPCHA, 4899 4900 /** 4901 * Unicode script "Ol_Chiki". 4902 */ 4903 OL_CHIKI, 4904 4905 /** 4906 * Unicode script "Vai". 4907 */ 4908 VAI, 4909 4910 /** 4911 * Unicode script "Saurashtra". 4912 */ 4913 SAURASHTRA, 4914 4915 /** 4916 * Unicode script "Kayah_Li". 4917 */ 4918 KAYAH_LI, 4919 4920 /** 4921 * Unicode script "Rejang". 4922 */ 4923 REJANG, 4924 4925 /** 4926 * Unicode script "Lycian". 4927 */ 4928 LYCIAN, 4929 4930 /** 4931 * Unicode script "Carian". 4932 */ 4933 CARIAN, 4934 4935 /** 4936 * Unicode script "Lydian". 4937 */ 4938 LYDIAN, 4939 4940 /** 4941 * Unicode script "Cham". 4942 */ 4943 CHAM, 4944 4945 /** 4946 * Unicode script "Tai_Tham". 4947 */ 4948 TAI_THAM, 4949 4950 /** 4951 * Unicode script "Tai_Viet". 4952 */ 4953 TAI_VIET, 4954 4955 /** 4956 * Unicode script "Avestan". 4957 */ 4958 AVESTAN, 4959 4960 /** 4961 * Unicode script "Egyptian_Hieroglyphs". 4962 */ 4963 EGYPTIAN_HIEROGLYPHS, 4964 4965 /** 4966 * Unicode script "Samaritan". 4967 */ 4968 SAMARITAN, 4969 4970 /** 4971 * Unicode script "Mandaic". 4972 */ 4973 MANDAIC, 4974 4975 /** 4976 * Unicode script "Lisu". 4977 */ 4978 LISU, 4979 4980 /** 4981 * Unicode script "Bamum". 4982 */ 4983 BAMUM, 4984 4985 /** 4986 * Unicode script "Javanese". 4987 */ 4988 JAVANESE, 4989 4990 /** 4991 * Unicode script "Meetei_Mayek". 4992 */ 4993 MEETEI_MAYEK, 4994 4995 /** 4996 * Unicode script "Imperial_Aramaic". 4997 */ 4998 IMPERIAL_ARAMAIC, 4999 5000 /** 5001 * Unicode script "Old_South_Arabian". 5002 */ 5003 OLD_SOUTH_ARABIAN, 5004 5005 /** 5006 * Unicode script "Inscriptional_Parthian". 5007 */ 5008 INSCRIPTIONAL_PARTHIAN, 5009 5010 /** 5011 * Unicode script "Inscriptional_Pahlavi". 5012 */ 5013 INSCRIPTIONAL_PAHLAVI, 5014 5015 /** 5016 * Unicode script "Old_Turkic". 5017 */ 5018 OLD_TURKIC, 5019 5020 /** 5021 * Unicode script "Brahmi". 5022 */ 5023 BRAHMI, 5024 5025 /** 5026 * Unicode script "Kaithi". 5027 */ 5028 KAITHI, 5029 5030 /** 5031 * Unicode script "Meroitic Hieroglyphs". 5032 * @since 1.8 5033 */ 5034 MEROITIC_HIEROGLYPHS, 5035 5036 /** 5037 * Unicode script "Meroitic Cursive". 5038 * @since 1.8 5039 */ 5040 MEROITIC_CURSIVE, 5041 5042 /** 5043 * Unicode script "Sora Sompeng". 5044 * @since 1.8 5045 */ 5046 SORA_SOMPENG, 5047 5048 /** 5049 * Unicode script "Chakma". 5050 * @since 1.8 5051 */ 5052 CHAKMA, 5053 5054 /** 5055 * Unicode script "Sharada". 5056 * @since 1.8 5057 */ 5058 SHARADA, 5059 5060 /** 5061 * Unicode script "Takri". 5062 * @since 1.8 5063 */ 5064 TAKRI, 5065 5066 /** 5067 * Unicode script "Miao". 5068 * @since 1.8 5069 */ 5070 MIAO, 5071 5072 /** 5073 * Unicode script "Caucasian Albanian". 5074 * @since 9 5075 */ 5076 CAUCASIAN_ALBANIAN, 5077 5078 /** 5079 * Unicode script "Bassa Vah". 5080 * @since 9 5081 */ 5082 BASSA_VAH, 5083 5084 /** 5085 * Unicode script "Duployan". 5086 * @since 9 5087 */ 5088 DUPLOYAN, 5089 5090 /** 5091 * Unicode script "Elbasan". 5092 * @since 9 5093 */ 5094 ELBASAN, 5095 5096 /** 5097 * Unicode script "Grantha". 5098 * @since 9 5099 */ 5100 GRANTHA, 5101 5102 /** 5103 * Unicode script "Pahawh Hmong". 5104 * @since 9 5105 */ 5106 PAHAWH_HMONG, 5107 5108 /** 5109 * Unicode script "Khojki". 5110 * @since 9 5111 */ 5112 KHOJKI, 5113 5114 /** 5115 * Unicode script "Linear A". 5116 * @since 9 5117 */ 5118 LINEAR_A, 5119 5120 /** 5121 * Unicode script "Mahajani". 5122 * @since 9 5123 */ 5124 MAHAJANI, 5125 5126 /** 5127 * Unicode script "Manichaean". 5128 * @since 9 5129 */ 5130 MANICHAEAN, 5131 5132 /** 5133 * Unicode script "Mende Kikakui". 5134 * @since 9 5135 */ 5136 MENDE_KIKAKUI, 5137 5138 /** 5139 * Unicode script "Modi". 5140 * @since 9 5141 */ 5142 MODI, 5143 5144 /** 5145 * Unicode script "Mro". 5146 * @since 9 5147 */ 5148 MRO, 5149 5150 /** 5151 * Unicode script "Old North Arabian". 5152 * @since 9 5153 */ 5154 OLD_NORTH_ARABIAN, 5155 5156 /** 5157 * Unicode script "Nabataean". 5158 * @since 9 5159 */ 5160 NABATAEAN, 5161 5162 /** 5163 * Unicode script "Palmyrene". 5164 * @since 9 5165 */ 5166 PALMYRENE, 5167 5168 /** 5169 * Unicode script "Pau Cin Hau". 5170 * @since 9 5171 */ 5172 PAU_CIN_HAU, 5173 5174 /** 5175 * Unicode script "Old Permic". 5176 * @since 9 5177 */ 5178 OLD_PERMIC, 5179 5180 /** 5181 * Unicode script "Psalter Pahlavi". 5182 * @since 9 5183 */ 5184 PSALTER_PAHLAVI, 5185 5186 /** 5187 * Unicode script "Siddham". 5188 * @since 9 5189 */ 5190 SIDDHAM, 5191 5192 /** 5193 * Unicode script "Khudawadi". 5194 * @since 9 5195 */ 5196 KHUDAWADI, 5197 5198 /** 5199 * Unicode script "Tirhuta". 5200 * @since 9 5201 */ 5202 TIRHUTA, 5203 5204 /** 5205 * Unicode script "Warang Citi". 5206 * @since 9 5207 */ 5208 WARANG_CITI, 5209 5210 /** 5211 * Unicode script "Ahom". 5212 * @since 9 5213 */ 5214 AHOM, 5215 5216 /** 5217 * Unicode script "Anatolian Hieroglyphs". 5218 * @since 9 5219 */ 5220 ANATOLIAN_HIEROGLYPHS, 5221 5222 /** 5223 * Unicode script "Hatran". 5224 * @since 9 5225 */ 5226 HATRAN, 5227 5228 /** 5229 * Unicode script "Multani". 5230 * @since 9 5231 */ 5232 MULTANI, 5233 5234 /** 5235 * Unicode script "Old Hungarian". 5236 * @since 9 5237 */ 5238 OLD_HUNGARIAN, 5239 5240 /** 5241 * Unicode script "SignWriting". 5242 * @since 9 5243 */ 5244 SIGNWRITING, 5245 5246 /** 5247 * Unicode script "Adlam". 5248 * @since 11 5249 */ 5250 ADLAM, 5251 5252 /** 5253 * Unicode script "Bhaiksuki". 5254 * @since 11 5255 */ 5256 BHAIKSUKI, 5257 5258 /** 5259 * Unicode script "Marchen". 5260 * @since 11 5261 */ 5262 MARCHEN, 5263 5264 /** 5265 * Unicode script "Newa". 5266 * @since 11 5267 */ 5268 NEWA, 5269 5270 /** 5271 * Unicode script "Osage". 5272 * @since 11 5273 */ 5274 OSAGE, 5275 5276 /** 5277 * Unicode script "Tangut". 5278 * @since 11 5279 */ 5280 TANGUT, 5281 5282 /** 5283 * Unicode script "Masaram Gondi". 5284 * @since 11 5285 */ 5286 MASARAM_GONDI, 5287 5288 /** 5289 * Unicode script "Nushu". 5290 * @since 11 5291 */ 5292 NUSHU, 5293 5294 /** 5295 * Unicode script "Soyombo". 5296 * @since 11 5297 */ 5298 SOYOMBO, 5299 5300 /** 5301 * Unicode script "Zanabazar Square". 5302 * @since 11 5303 */ 5304 ZANABAZAR_SQUARE, 5305 5306 /** 5307 * Unicode script "Hanifi Rohingya". 5308 * @since 12 5309 */ 5310 HANIFI_ROHINGYA, 5311 5312 /** 5313 * Unicode script "Old Sogdian". 5314 * @since 12 5315 */ 5316 OLD_SOGDIAN, 5317 5318 /** 5319 * Unicode script "Sogdian". 5320 * @since 12 5321 */ 5322 SOGDIAN, 5323 5324 /** 5325 * Unicode script "Dogra". 5326 * @since 12 5327 */ 5328 DOGRA, 5329 5330 /** 5331 * Unicode script "Gunjala Gondi". 5332 * @since 12 5333 */ 5334 GUNJALA_GONDI, 5335 5336 /** 5337 * Unicode script "Makasar". 5338 * @since 12 5339 */ 5340 MAKASAR, 5341 5342 /** 5343 * Unicode script "Medefaidrin". 5344 * @since 12 5345 */ 5346 MEDEFAIDRIN, 5347 5348 /** 5349 * Unicode script "Elymaic". 5350 * @since 13 5351 */ 5352 ELYMAIC, 5353 5354 /** 5355 * Unicode script "Nandinagari". 5356 * @since 13 5357 */ 5358 NANDINAGARI, 5359 5360 /** 5361 * Unicode script "Nyiakeng Puachue Hmong". 5362 * @since 13 5363 */ 5364 NYIAKENG_PUACHUE_HMONG, 5365 5366 /** 5367 * Unicode script "Wancho". 5368 * @since 13 5369 */ 5370 WANCHO, 5371 5372 /** 5373 * Unicode script "Yezidi". 5374 * @since 15 5375 */ 5376 YEZIDI, 5377 5378 /** 5379 * Unicode script "Chorasmian". 5380 * @since 15 5381 */ 5382 CHORASMIAN, 5383 5384 /** 5385 * Unicode script "Dives Akuru". 5386 * @since 15 5387 */ 5388 DIVES_AKURU, 5389 5390 /** 5391 * Unicode script "Khitan Small Script". 5392 * @since 15 5393 */ 5394 KHITAN_SMALL_SCRIPT, 5395 5396 /** 5397 * Unicode script "Vithkuqi". 5398 * @since 19 5399 */ 5400 VITHKUQI, 5401 5402 /** 5403 * Unicode script "Old Uyghur". 5404 * @since 19 5405 */ 5406 OLD_UYGHUR, 5407 5408 /** 5409 * Unicode script "Cypro Minoan". 5410 * @since 19 5411 */ 5412 CYPRO_MINOAN, 5413 5414 /** 5415 * Unicode script "Tangsa". 5416 * @since 19 5417 */ 5418 TANGSA, 5419 5420 /** 5421 * Unicode script "Toto". 5422 * @since 19 5423 */ 5424 TOTO, 5425 5426 /** 5427 * Unicode script "Kawi". 5428 * @since 20 5429 */ 5430 KAWI, 5431 5432 /** 5433 * Unicode script "Nag Mundari". 5434 * @since 20 5435 */ 5436 NAG_MUNDARI, 5437 5438 /** 5439 * Unicode script "Unknown". 5440 */ 5441 UNKNOWN; // must be the last enum constant for calculating the size of "aliases" hash map. 5442 5443 private static final int[] scriptStarts = { 5444 0x0000, // 0000..0040; COMMON 5445 0x0041, // 0041..005A; LATIN 5446 0x005B, // 005B..0060; COMMON 5447 0x0061, // 0061..007A; LATIN 5448 0x007B, // 007B..00A9; COMMON 5449 0x00AA, // 00AA ; LATIN 5450 0x00AB, // 00AB..00B9; COMMON 5451 0x00BA, // 00BA ; LATIN 5452 0x00BB, // 00BB..00BF; COMMON 5453 0x00C0, // 00C0..00D6; LATIN 5454 0x00D7, // 00D7 ; COMMON 5455 0x00D8, // 00D8..00F6; LATIN 5456 0x00F7, // 00F7 ; COMMON 5457 0x00F8, // 00F8..02B8; LATIN 5458 0x02B9, // 02B9..02DF; COMMON 5459 0x02E0, // 02E0..02E4; LATIN 5460 0x02E5, // 02E5..02E9; COMMON 5461 0x02EA, // 02EA..02EB; BOPOMOFO 5462 0x02EC, // 02EC..02FF; COMMON 5463 0x0300, // 0300..036F; INHERITED 5464 0x0370, // 0370..0373; GREEK 5465 0x0374, // 0374 ; COMMON 5466 0x0375, // 0375..0377; GREEK 5467 0x0378, // 0378..0379; UNKNOWN 5468 0x037A, // 037A..037D; GREEK 5469 0x037E, // 037E ; COMMON 5470 0x037F, // 037F ; GREEK 5471 0x0380, // 0380..0383; UNKNOWN 5472 0x0384, // 0384 ; GREEK 5473 0x0385, // 0385 ; COMMON 5474 0x0386, // 0386 ; GREEK 5475 0x0387, // 0387 ; COMMON 5476 0x0388, // 0388..038A; GREEK 5477 0x038B, // 038B ; UNKNOWN 5478 0x038C, // 038C ; GREEK 5479 0x038D, // 038D ; UNKNOWN 5480 0x038E, // 038E..03A1; GREEK 5481 0x03A2, // 03A2 ; UNKNOWN 5482 0x03A3, // 03A3..03E1; GREEK 5483 0x03E2, // 03E2..03EF; COPTIC 5484 0x03F0, // 03F0..03FF; GREEK 5485 0x0400, // 0400..0484; CYRILLIC 5486 0x0485, // 0485..0486; INHERITED 5487 0x0487, // 0487..052F; CYRILLIC 5488 0x0530, // 0530 ; UNKNOWN 5489 0x0531, // 0531..0556; ARMENIAN 5490 0x0557, // 0557..0558; UNKNOWN 5491 0x0559, // 0559..058A; ARMENIAN 5492 0x058B, // 058B..058C; UNKNOWN 5493 0x058D, // 058D..058F; ARMENIAN 5494 0x0590, // 0590 ; UNKNOWN 5495 0x0591, // 0591..05C7; HEBREW 5496 0x05C8, // 05C8..05CF; UNKNOWN 5497 0x05D0, // 05D0..05EA; HEBREW 5498 0x05EB, // 05EB..05EE; UNKNOWN 5499 0x05EF, // 05EF..05F4; HEBREW 5500 0x05F5, // 05F5..05FF; UNKNOWN 5501 0x0600, // 0600..0604; ARABIC 5502 0x0605, // 0605 ; COMMON 5503 0x0606, // 0606..060B; ARABIC 5504 0x060C, // 060C ; COMMON 5505 0x060D, // 060D..061A; ARABIC 5506 0x061B, // 061B ; COMMON 5507 0x061C, // 061C..061E; ARABIC 5508 0x061F, // 061F ; COMMON 5509 0x0620, // 0620..063F; ARABIC 5510 0x0640, // 0640 ; COMMON 5511 0x0641, // 0641..064A; ARABIC 5512 0x064B, // 064B..0655; INHERITED 5513 0x0656, // 0656..066F; ARABIC 5514 0x0670, // 0670 ; INHERITED 5515 0x0671, // 0671..06DC; ARABIC 5516 0x06DD, // 06DD ; COMMON 5517 0x06DE, // 06DE..06FF; ARABIC 5518 0x0700, // 0700..070D; SYRIAC 5519 0x070E, // 070E ; UNKNOWN 5520 0x070F, // 070F..074A; SYRIAC 5521 0x074B, // 074B..074C; UNKNOWN 5522 0x074D, // 074D..074F; SYRIAC 5523 0x0750, // 0750..077F; ARABIC 5524 0x0780, // 0780..07B1; THAANA 5525 0x07B2, // 07B2..07BF; UNKNOWN 5526 0x07C0, // 07C0..07FA; NKO 5527 0x07FB, // 07FB..07FC; UNKNOWN 5528 0x07FD, // 07FD..07FF; NKO 5529 0x0800, // 0800..082D; SAMARITAN 5530 0x082E, // 082E..082F; UNKNOWN 5531 0x0830, // 0830..083E; SAMARITAN 5532 0x083F, // 083F ; UNKNOWN 5533 0x0840, // 0840..085B; MANDAIC 5534 0x085C, // 085C..085D; UNKNOWN 5535 0x085E, // 085E ; MANDAIC 5536 0x085F, // 085F ; UNKNOWN 5537 0x0860, // 0860..086A; SYRIAC 5538 0x086B, // 086B..086F; UNKNOWN 5539 0x0870, // 0870..088E; ARABIC 5540 0x088F, // 088F ; UNKNOWN 5541 0x0890, // 0890..0891; ARABIC 5542 0x0892, // 0892..0897; UNKNOWN 5543 0x0898, // 0898..08E1; ARABIC 5544 0x08E2, // 08E2 ; COMMON 5545 0x08E3, // 08E3..08FF; ARABIC 5546 0x0900, // 0900..0950; DEVANAGARI 5547 0x0951, // 0951..0954; INHERITED 5548 0x0955, // 0955..0963; DEVANAGARI 5549 0x0964, // 0964..0965; COMMON 5550 0x0966, // 0966..097F; DEVANAGARI 5551 0x0980, // 0980..0983; BENGALI 5552 0x0984, // 0984 ; UNKNOWN 5553 0x0985, // 0985..098C; BENGALI 5554 0x098D, // 098D..098E; UNKNOWN 5555 0x098F, // 098F..0990; BENGALI 5556 0x0991, // 0991..0992; UNKNOWN 5557 0x0993, // 0993..09A8; BENGALI 5558 0x09A9, // 09A9 ; UNKNOWN 5559 0x09AA, // 09AA..09B0; BENGALI 5560 0x09B1, // 09B1 ; UNKNOWN 5561 0x09B2, // 09B2 ; BENGALI 5562 0x09B3, // 09B3..09B5; UNKNOWN 5563 0x09B6, // 09B6..09B9; BENGALI 5564 0x09BA, // 09BA..09BB; UNKNOWN 5565 0x09BC, // 09BC..09C4; BENGALI 5566 0x09C5, // 09C5..09C6; UNKNOWN 5567 0x09C7, // 09C7..09C8; BENGALI 5568 0x09C9, // 09C9..09CA; UNKNOWN 5569 0x09CB, // 09CB..09CE; BENGALI 5570 0x09CF, // 09CF..09D6; UNKNOWN 5571 0x09D7, // 09D7 ; BENGALI 5572 0x09D8, // 09D8..09DB; UNKNOWN 5573 0x09DC, // 09DC..09DD; BENGALI 5574 0x09DE, // 09DE ; UNKNOWN 5575 0x09DF, // 09DF..09E3; BENGALI 5576 0x09E4, // 09E4..09E5; UNKNOWN 5577 0x09E6, // 09E6..09FE; BENGALI 5578 0x09FF, // 09FF..0A00; UNKNOWN 5579 0x0A01, // 0A01..0A03; GURMUKHI 5580 0x0A04, // 0A04 ; UNKNOWN 5581 0x0A05, // 0A05..0A0A; GURMUKHI 5582 0x0A0B, // 0A0B..0A0E; UNKNOWN 5583 0x0A0F, // 0A0F..0A10; GURMUKHI 5584 0x0A11, // 0A11..0A12; UNKNOWN 5585 0x0A13, // 0A13..0A28; GURMUKHI 5586 0x0A29, // 0A29 ; UNKNOWN 5587 0x0A2A, // 0A2A..0A30; GURMUKHI 5588 0x0A31, // 0A31 ; UNKNOWN 5589 0x0A32, // 0A32..0A33; GURMUKHI 5590 0x0A34, // 0A34 ; UNKNOWN 5591 0x0A35, // 0A35..0A36; GURMUKHI 5592 0x0A37, // 0A37 ; UNKNOWN 5593 0x0A38, // 0A38..0A39; GURMUKHI 5594 0x0A3A, // 0A3A..0A3B; UNKNOWN 5595 0x0A3C, // 0A3C ; GURMUKHI 5596 0x0A3D, // 0A3D ; UNKNOWN 5597 0x0A3E, // 0A3E..0A42; GURMUKHI 5598 0x0A43, // 0A43..0A46; UNKNOWN 5599 0x0A47, // 0A47..0A48; GURMUKHI 5600 0x0A49, // 0A49..0A4A; UNKNOWN 5601 0x0A4B, // 0A4B..0A4D; GURMUKHI 5602 0x0A4E, // 0A4E..0A50; UNKNOWN 5603 0x0A51, // 0A51 ; GURMUKHI 5604 0x0A52, // 0A52..0A58; UNKNOWN 5605 0x0A59, // 0A59..0A5C; GURMUKHI 5606 0x0A5D, // 0A5D ; UNKNOWN 5607 0x0A5E, // 0A5E ; GURMUKHI 5608 0x0A5F, // 0A5F..0A65; UNKNOWN 5609 0x0A66, // 0A66..0A76; GURMUKHI 5610 0x0A77, // 0A77..0A80; UNKNOWN 5611 0x0A81, // 0A81..0A83; GUJARATI 5612 0x0A84, // 0A84 ; UNKNOWN 5613 0x0A85, // 0A85..0A8D; GUJARATI 5614 0x0A8E, // 0A8E ; UNKNOWN 5615 0x0A8F, // 0A8F..0A91; GUJARATI 5616 0x0A92, // 0A92 ; UNKNOWN 5617 0x0A93, // 0A93..0AA8; GUJARATI 5618 0x0AA9, // 0AA9 ; UNKNOWN 5619 0x0AAA, // 0AAA..0AB0; GUJARATI 5620 0x0AB1, // 0AB1 ; UNKNOWN 5621 0x0AB2, // 0AB2..0AB3; GUJARATI 5622 0x0AB4, // 0AB4 ; UNKNOWN 5623 0x0AB5, // 0AB5..0AB9; GUJARATI 5624 0x0ABA, // 0ABA..0ABB; UNKNOWN 5625 0x0ABC, // 0ABC..0AC5; GUJARATI 5626 0x0AC6, // 0AC6 ; UNKNOWN 5627 0x0AC7, // 0AC7..0AC9; GUJARATI 5628 0x0ACA, // 0ACA ; UNKNOWN 5629 0x0ACB, // 0ACB..0ACD; GUJARATI 5630 0x0ACE, // 0ACE..0ACF; UNKNOWN 5631 0x0AD0, // 0AD0 ; GUJARATI 5632 0x0AD1, // 0AD1..0ADF; UNKNOWN 5633 0x0AE0, // 0AE0..0AE3; GUJARATI 5634 0x0AE4, // 0AE4..0AE5; UNKNOWN 5635 0x0AE6, // 0AE6..0AF1; GUJARATI 5636 0x0AF2, // 0AF2..0AF8; UNKNOWN 5637 0x0AF9, // 0AF9..0AFF; GUJARATI 5638 0x0B00, // 0B00 ; UNKNOWN 5639 0x0B01, // 0B01..0B03; ORIYA 5640 0x0B04, // 0B04 ; UNKNOWN 5641 0x0B05, // 0B05..0B0C; ORIYA 5642 0x0B0D, // 0B0D..0B0E; UNKNOWN 5643 0x0B0F, // 0B0F..0B10; ORIYA 5644 0x0B11, // 0B11..0B12; UNKNOWN 5645 0x0B13, // 0B13..0B28; ORIYA 5646 0x0B29, // 0B29 ; UNKNOWN 5647 0x0B2A, // 0B2A..0B30; ORIYA 5648 0x0B31, // 0B31 ; UNKNOWN 5649 0x0B32, // 0B32..0B33; ORIYA 5650 0x0B34, // 0B34 ; UNKNOWN 5651 0x0B35, // 0B35..0B39; ORIYA 5652 0x0B3A, // 0B3A..0B3B; UNKNOWN 5653 0x0B3C, // 0B3C..0B44; ORIYA 5654 0x0B45, // 0B45..0B46; UNKNOWN 5655 0x0B47, // 0B47..0B48; ORIYA 5656 0x0B49, // 0B49..0B4A; UNKNOWN 5657 0x0B4B, // 0B4B..0B4D; ORIYA 5658 0x0B4E, // 0B4E..0B54; UNKNOWN 5659 0x0B55, // 0B55..0B57; ORIYA 5660 0x0B58, // 0B58..0B5B; UNKNOWN 5661 0x0B5C, // 0B5C..0B5D; ORIYA 5662 0x0B5E, // 0B5E ; UNKNOWN 5663 0x0B5F, // 0B5F..0B63; ORIYA 5664 0x0B64, // 0B64..0B65; UNKNOWN 5665 0x0B66, // 0B66..0B77; ORIYA 5666 0x0B78, // 0B78..0B81; UNKNOWN 5667 0x0B82, // 0B82..0B83; TAMIL 5668 0x0B84, // 0B84 ; UNKNOWN 5669 0x0B85, // 0B85..0B8A; TAMIL 5670 0x0B8B, // 0B8B..0B8D; UNKNOWN 5671 0x0B8E, // 0B8E..0B90; TAMIL 5672 0x0B91, // 0B91 ; UNKNOWN 5673 0x0B92, // 0B92..0B95; TAMIL 5674 0x0B96, // 0B96..0B98; UNKNOWN 5675 0x0B99, // 0B99..0B9A; TAMIL 5676 0x0B9B, // 0B9B ; UNKNOWN 5677 0x0B9C, // 0B9C ; TAMIL 5678 0x0B9D, // 0B9D ; UNKNOWN 5679 0x0B9E, // 0B9E..0B9F; TAMIL 5680 0x0BA0, // 0BA0..0BA2; UNKNOWN 5681 0x0BA3, // 0BA3..0BA4; TAMIL 5682 0x0BA5, // 0BA5..0BA7; UNKNOWN 5683 0x0BA8, // 0BA8..0BAA; TAMIL 5684 0x0BAB, // 0BAB..0BAD; UNKNOWN 5685 0x0BAE, // 0BAE..0BB9; TAMIL 5686 0x0BBA, // 0BBA..0BBD; UNKNOWN 5687 0x0BBE, // 0BBE..0BC2; TAMIL 5688 0x0BC3, // 0BC3..0BC5; UNKNOWN 5689 0x0BC6, // 0BC6..0BC8; TAMIL 5690 0x0BC9, // 0BC9 ; UNKNOWN 5691 0x0BCA, // 0BCA..0BCD; TAMIL 5692 0x0BCE, // 0BCE..0BCF; UNKNOWN 5693 0x0BD0, // 0BD0 ; TAMIL 5694 0x0BD1, // 0BD1..0BD6; UNKNOWN 5695 0x0BD7, // 0BD7 ; TAMIL 5696 0x0BD8, // 0BD8..0BE5; UNKNOWN 5697 0x0BE6, // 0BE6..0BFA; TAMIL 5698 0x0BFB, // 0BFB..0BFF; UNKNOWN 5699 0x0C00, // 0C00..0C0C; TELUGU 5700 0x0C0D, // 0C0D ; UNKNOWN 5701 0x0C0E, // 0C0E..0C10; TELUGU 5702 0x0C11, // 0C11 ; UNKNOWN 5703 0x0C12, // 0C12..0C28; TELUGU 5704 0x0C29, // 0C29 ; UNKNOWN 5705 0x0C2A, // 0C2A..0C39; TELUGU 5706 0x0C3A, // 0C3A..0C3B; UNKNOWN 5707 0x0C3C, // 0C3C..0C44; TELUGU 5708 0x0C45, // 0C45 ; UNKNOWN 5709 0x0C46, // 0C46..0C48; TELUGU 5710 0x0C49, // 0C49 ; UNKNOWN 5711 0x0C4A, // 0C4A..0C4D; TELUGU 5712 0x0C4E, // 0C4E..0C54; UNKNOWN 5713 0x0C55, // 0C55..0C56; TELUGU 5714 0x0C57, // 0C57 ; UNKNOWN 5715 0x0C58, // 0C58..0C5A; TELUGU 5716 0x0C5B, // 0C5B..0C5C; UNKNOWN 5717 0x0C5D, // 0C5D ; TELUGU 5718 0x0C5E, // 0C5E..0C5F; UNKNOWN 5719 0x0C60, // 0C60..0C63; TELUGU 5720 0x0C64, // 0C64..0C65; UNKNOWN 5721 0x0C66, // 0C66..0C6F; TELUGU 5722 0x0C70, // 0C70..0C76; UNKNOWN 5723 0x0C77, // 0C77..0C7F; TELUGU 5724 0x0C80, // 0C80..0C8C; KANNADA 5725 0x0C8D, // 0C8D ; UNKNOWN 5726 0x0C8E, // 0C8E..0C90; KANNADA 5727 0x0C91, // 0C91 ; UNKNOWN 5728 0x0C92, // 0C92..0CA8; KANNADA 5729 0x0CA9, // 0CA9 ; UNKNOWN 5730 0x0CAA, // 0CAA..0CB3; KANNADA 5731 0x0CB4, // 0CB4 ; UNKNOWN 5732 0x0CB5, // 0CB5..0CB9; KANNADA 5733 0x0CBA, // 0CBA..0CBB; UNKNOWN 5734 0x0CBC, // 0CBC..0CC4; KANNADA 5735 0x0CC5, // 0CC5 ; UNKNOWN 5736 0x0CC6, // 0CC6..0CC8; KANNADA 5737 0x0CC9, // 0CC9 ; UNKNOWN 5738 0x0CCA, // 0CCA..0CCD; KANNADA 5739 0x0CCE, // 0CCE..0CD4; UNKNOWN 5740 0x0CD5, // 0CD5..0CD6; KANNADA 5741 0x0CD7, // 0CD7..0CDC; UNKNOWN 5742 0x0CDD, // 0CDD..0CDE; KANNADA 5743 0x0CDF, // 0CDF ; UNKNOWN 5744 0x0CE0, // 0CE0..0CE3; KANNADA 5745 0x0CE4, // 0CE4..0CE5; UNKNOWN 5746 0x0CE6, // 0CE6..0CEF; KANNADA 5747 0x0CF0, // 0CF0 ; UNKNOWN 5748 0x0CF1, // 0CF1..0CF3; KANNADA 5749 0x0CF4, // 0CF4..0CFF; UNKNOWN 5750 0x0D00, // 0D00..0D0C; MALAYALAM 5751 0x0D0D, // 0D0D ; UNKNOWN 5752 0x0D0E, // 0D0E..0D10; MALAYALAM 5753 0x0D11, // 0D11 ; UNKNOWN 5754 0x0D12, // 0D12..0D44; MALAYALAM 5755 0x0D45, // 0D45 ; UNKNOWN 5756 0x0D46, // 0D46..0D48; MALAYALAM 5757 0x0D49, // 0D49 ; UNKNOWN 5758 0x0D4A, // 0D4A..0D4F; MALAYALAM 5759 0x0D50, // 0D50..0D53; UNKNOWN 5760 0x0D54, // 0D54..0D63; MALAYALAM 5761 0x0D64, // 0D64..0D65; UNKNOWN 5762 0x0D66, // 0D66..0D7F; MALAYALAM 5763 0x0D80, // 0D80 ; UNKNOWN 5764 0x0D81, // 0D81..0D83; SINHALA 5765 0x0D84, // 0D84 ; UNKNOWN 5766 0x0D85, // 0D85..0D96; SINHALA 5767 0x0D97, // 0D97..0D99; UNKNOWN 5768 0x0D9A, // 0D9A..0DB1; SINHALA 5769 0x0DB2, // 0DB2 ; UNKNOWN 5770 0x0DB3, // 0DB3..0DBB; SINHALA 5771 0x0DBC, // 0DBC ; UNKNOWN 5772 0x0DBD, // 0DBD ; SINHALA 5773 0x0DBE, // 0DBE..0DBF; UNKNOWN 5774 0x0DC0, // 0DC0..0DC6; SINHALA 5775 0x0DC7, // 0DC7..0DC9; UNKNOWN 5776 0x0DCA, // 0DCA ; SINHALA 5777 0x0DCB, // 0DCB..0DCE; UNKNOWN 5778 0x0DCF, // 0DCF..0DD4; SINHALA 5779 0x0DD5, // 0DD5 ; UNKNOWN 5780 0x0DD6, // 0DD6 ; SINHALA 5781 0x0DD7, // 0DD7 ; UNKNOWN 5782 0x0DD8, // 0DD8..0DDF; SINHALA 5783 0x0DE0, // 0DE0..0DE5; UNKNOWN 5784 0x0DE6, // 0DE6..0DEF; SINHALA 5785 0x0DF0, // 0DF0..0DF1; UNKNOWN 5786 0x0DF2, // 0DF2..0DF4; SINHALA 5787 0x0DF5, // 0DF5..0E00; UNKNOWN 5788 0x0E01, // 0E01..0E3A; THAI 5789 0x0E3B, // 0E3B..0E3E; UNKNOWN 5790 0x0E3F, // 0E3F ; COMMON 5791 0x0E40, // 0E40..0E5B; THAI 5792 0x0E5C, // 0E5C..0E80; UNKNOWN 5793 0x0E81, // 0E81..0E82; LAO 5794 0x0E83, // 0E83 ; UNKNOWN 5795 0x0E84, // 0E84 ; LAO 5796 0x0E85, // 0E85 ; UNKNOWN 5797 0x0E86, // 0E86..0E8A; LAO 5798 0x0E8B, // 0E8B ; UNKNOWN 5799 0x0E8C, // 0E8C..0EA3; LAO 5800 0x0EA4, // 0EA4 ; UNKNOWN 5801 0x0EA5, // 0EA5 ; LAO 5802 0x0EA6, // 0EA6 ; UNKNOWN 5803 0x0EA7, // 0EA7..0EBD; LAO 5804 0x0EBE, // 0EBE..0EBF; UNKNOWN 5805 0x0EC0, // 0EC0..0EC4; LAO 5806 0x0EC5, // 0EC5 ; UNKNOWN 5807 0x0EC6, // 0EC6 ; LAO 5808 0x0EC7, // 0EC7 ; UNKNOWN 5809 0x0EC8, // 0EC8..0ECE; LAO 5810 0x0ECF, // 0ECF ; UNKNOWN 5811 0x0ED0, // 0ED0..0ED9; LAO 5812 0x0EDA, // 0EDA..0EDB; UNKNOWN 5813 0x0EDC, // 0EDC..0EDF; LAO 5814 0x0EE0, // 0EE0..0EFF; UNKNOWN 5815 0x0F00, // 0F00..0F47; TIBETAN 5816 0x0F48, // 0F48 ; UNKNOWN 5817 0x0F49, // 0F49..0F6C; TIBETAN 5818 0x0F6D, // 0F6D..0F70; UNKNOWN 5819 0x0F71, // 0F71..0F97; TIBETAN 5820 0x0F98, // 0F98 ; UNKNOWN 5821 0x0F99, // 0F99..0FBC; TIBETAN 5822 0x0FBD, // 0FBD ; UNKNOWN 5823 0x0FBE, // 0FBE..0FCC; TIBETAN 5824 0x0FCD, // 0FCD ; UNKNOWN 5825 0x0FCE, // 0FCE..0FD4; TIBETAN 5826 0x0FD5, // 0FD5..0FD8; COMMON 5827 0x0FD9, // 0FD9..0FDA; TIBETAN 5828 0x0FDB, // 0FDB..0FFF; UNKNOWN 5829 0x1000, // 1000..109F; MYANMAR 5830 0x10A0, // 10A0..10C5; GEORGIAN 5831 0x10C6, // 10C6 ; UNKNOWN 5832 0x10C7, // 10C7 ; GEORGIAN 5833 0x10C8, // 10C8..10CC; UNKNOWN 5834 0x10CD, // 10CD ; GEORGIAN 5835 0x10CE, // 10CE..10CF; UNKNOWN 5836 0x10D0, // 10D0..10FA; GEORGIAN 5837 0x10FB, // 10FB ; COMMON 5838 0x10FC, // 10FC..10FF; GEORGIAN 5839 0x1100, // 1100..11FF; HANGUL 5840 0x1200, // 1200..1248; ETHIOPIC 5841 0x1249, // 1249 ; UNKNOWN 5842 0x124A, // 124A..124D; ETHIOPIC 5843 0x124E, // 124E..124F; UNKNOWN 5844 0x1250, // 1250..1256; ETHIOPIC 5845 0x1257, // 1257 ; UNKNOWN 5846 0x1258, // 1258 ; ETHIOPIC 5847 0x1259, // 1259 ; UNKNOWN 5848 0x125A, // 125A..125D; ETHIOPIC 5849 0x125E, // 125E..125F; UNKNOWN 5850 0x1260, // 1260..1288; ETHIOPIC 5851 0x1289, // 1289 ; UNKNOWN 5852 0x128A, // 128A..128D; ETHIOPIC 5853 0x128E, // 128E..128F; UNKNOWN 5854 0x1290, // 1290..12B0; ETHIOPIC 5855 0x12B1, // 12B1 ; UNKNOWN 5856 0x12B2, // 12B2..12B5; ETHIOPIC 5857 0x12B6, // 12B6..12B7; UNKNOWN 5858 0x12B8, // 12B8..12BE; ETHIOPIC 5859 0x12BF, // 12BF ; UNKNOWN 5860 0x12C0, // 12C0 ; ETHIOPIC 5861 0x12C1, // 12C1 ; UNKNOWN 5862 0x12C2, // 12C2..12C5; ETHIOPIC 5863 0x12C6, // 12C6..12C7; UNKNOWN 5864 0x12C8, // 12C8..12D6; ETHIOPIC 5865 0x12D7, // 12D7 ; UNKNOWN 5866 0x12D8, // 12D8..1310; ETHIOPIC 5867 0x1311, // 1311 ; UNKNOWN 5868 0x1312, // 1312..1315; ETHIOPIC 5869 0x1316, // 1316..1317; UNKNOWN 5870 0x1318, // 1318..135A; ETHIOPIC 5871 0x135B, // 135B..135C; UNKNOWN 5872 0x135D, // 135D..137C; ETHIOPIC 5873 0x137D, // 137D..137F; UNKNOWN 5874 0x1380, // 1380..1399; ETHIOPIC 5875 0x139A, // 139A..139F; UNKNOWN 5876 0x13A0, // 13A0..13F5; CHEROKEE 5877 0x13F6, // 13F6..13F7; UNKNOWN 5878 0x13F8, // 13F8..13FD; CHEROKEE 5879 0x13FE, // 13FE..13FF; UNKNOWN 5880 0x1400, // 1400..167F; CANADIAN_ABORIGINAL 5881 0x1680, // 1680..169C; OGHAM 5882 0x169D, // 169D..169F; UNKNOWN 5883 0x16A0, // 16A0..16EA; RUNIC 5884 0x16EB, // 16EB..16ED; COMMON 5885 0x16EE, // 16EE..16F8; RUNIC 5886 0x16F9, // 16F9..16FF; UNKNOWN 5887 0x1700, // 1700..1715; TAGALOG 5888 0x1716, // 1716..171E; UNKNOWN 5889 0x171F, // 171F ; TAGALOG 5890 0x1720, // 1720..1734; HANUNOO 5891 0x1735, // 1735..1736; COMMON 5892 0x1737, // 1737..173F; UNKNOWN 5893 0x1740, // 1740..1753; BUHID 5894 0x1754, // 1754..175F; UNKNOWN 5895 0x1760, // 1760..176C; TAGBANWA 5896 0x176D, // 176D ; UNKNOWN 5897 0x176E, // 176E..1770; TAGBANWA 5898 0x1771, // 1771 ; UNKNOWN 5899 0x1772, // 1772..1773; TAGBANWA 5900 0x1774, // 1774..177F; UNKNOWN 5901 0x1780, // 1780..17DD; KHMER 5902 0x17DE, // 17DE..17DF; UNKNOWN 5903 0x17E0, // 17E0..17E9; KHMER 5904 0x17EA, // 17EA..17EF; UNKNOWN 5905 0x17F0, // 17F0..17F9; KHMER 5906 0x17FA, // 17FA..17FF; UNKNOWN 5907 0x1800, // 1800..1801; MONGOLIAN 5908 0x1802, // 1802..1803; COMMON 5909 0x1804, // 1804 ; MONGOLIAN 5910 0x1805, // 1805 ; COMMON 5911 0x1806, // 1806..1819; MONGOLIAN 5912 0x181A, // 181A..181F; UNKNOWN 5913 0x1820, // 1820..1878; MONGOLIAN 5914 0x1879, // 1879..187F; UNKNOWN 5915 0x1880, // 1880..18AA; MONGOLIAN 5916 0x18AB, // 18AB..18AF; UNKNOWN 5917 0x18B0, // 18B0..18F5; CANADIAN_ABORIGINAL 5918 0x18F6, // 18F6..18FF; UNKNOWN 5919 0x1900, // 1900..191E; LIMBU 5920 0x191F, // 191F ; UNKNOWN 5921 0x1920, // 1920..192B; LIMBU 5922 0x192C, // 192C..192F; UNKNOWN 5923 0x1930, // 1930..193B; LIMBU 5924 0x193C, // 193C..193F; UNKNOWN 5925 0x1940, // 1940 ; LIMBU 5926 0x1941, // 1941..1943; UNKNOWN 5927 0x1944, // 1944..194F; LIMBU 5928 0x1950, // 1950..196D; TAI_LE 5929 0x196E, // 196E..196F; UNKNOWN 5930 0x1970, // 1970..1974; TAI_LE 5931 0x1975, // 1975..197F; UNKNOWN 5932 0x1980, // 1980..19AB; NEW_TAI_LUE 5933 0x19AC, // 19AC..19AF; UNKNOWN 5934 0x19B0, // 19B0..19C9; NEW_TAI_LUE 5935 0x19CA, // 19CA..19CF; UNKNOWN 5936 0x19D0, // 19D0..19DA; NEW_TAI_LUE 5937 0x19DB, // 19DB..19DD; UNKNOWN 5938 0x19DE, // 19DE..19DF; NEW_TAI_LUE 5939 0x19E0, // 19E0..19FF; KHMER 5940 0x1A00, // 1A00..1A1B; BUGINESE 5941 0x1A1C, // 1A1C..1A1D; UNKNOWN 5942 0x1A1E, // 1A1E..1A1F; BUGINESE 5943 0x1A20, // 1A20..1A5E; TAI_THAM 5944 0x1A5F, // 1A5F ; UNKNOWN 5945 0x1A60, // 1A60..1A7C; TAI_THAM 5946 0x1A7D, // 1A7D..1A7E; UNKNOWN 5947 0x1A7F, // 1A7F..1A89; TAI_THAM 5948 0x1A8A, // 1A8A..1A8F; UNKNOWN 5949 0x1A90, // 1A90..1A99; TAI_THAM 5950 0x1A9A, // 1A9A..1A9F; UNKNOWN 5951 0x1AA0, // 1AA0..1AAD; TAI_THAM 5952 0x1AAE, // 1AAE..1AAF; UNKNOWN 5953 0x1AB0, // 1AB0..1ACE; INHERITED 5954 0x1ACF, // 1ACF..1AFF; UNKNOWN 5955 0x1B00, // 1B00..1B4C; BALINESE 5956 0x1B4D, // 1B4D..1B4F; UNKNOWN 5957 0x1B50, // 1B50..1B7E; BALINESE 5958 0x1B7F, // 1B7F ; UNKNOWN 5959 0x1B80, // 1B80..1BBF; SUNDANESE 5960 0x1BC0, // 1BC0..1BF3; BATAK 5961 0x1BF4, // 1BF4..1BFB; UNKNOWN 5962 0x1BFC, // 1BFC..1BFF; BATAK 5963 0x1C00, // 1C00..1C37; LEPCHA 5964 0x1C38, // 1C38..1C3A; UNKNOWN 5965 0x1C3B, // 1C3B..1C49; LEPCHA 5966 0x1C4A, // 1C4A..1C4C; UNKNOWN 5967 0x1C4D, // 1C4D..1C4F; LEPCHA 5968 0x1C50, // 1C50..1C7F; OL_CHIKI 5969 0x1C80, // 1C80..1C88; CYRILLIC 5970 0x1C89, // 1C89..1C8F; UNKNOWN 5971 0x1C90, // 1C90..1CBA; GEORGIAN 5972 0x1CBB, // 1CBB..1CBC; UNKNOWN 5973 0x1CBD, // 1CBD..1CBF; GEORGIAN 5974 0x1CC0, // 1CC0..1CC7; SUNDANESE 5975 0x1CC8, // 1CC8..1CCF; UNKNOWN 5976 0x1CD0, // 1CD0..1CD2; INHERITED 5977 0x1CD3, // 1CD3 ; COMMON 5978 0x1CD4, // 1CD4..1CE0; INHERITED 5979 0x1CE1, // 1CE1 ; COMMON 5980 0x1CE2, // 1CE2..1CE8; INHERITED 5981 0x1CE9, // 1CE9..1CEC; COMMON 5982 0x1CED, // 1CED ; INHERITED 5983 0x1CEE, // 1CEE..1CF3; COMMON 5984 0x1CF4, // 1CF4 ; INHERITED 5985 0x1CF5, // 1CF5..1CF7; COMMON 5986 0x1CF8, // 1CF8..1CF9; INHERITED 5987 0x1CFA, // 1CFA ; COMMON 5988 0x1CFB, // 1CFB..1CFF; UNKNOWN 5989 0x1D00, // 1D00..1D25; LATIN 5990 0x1D26, // 1D26..1D2A; GREEK 5991 0x1D2B, // 1D2B ; CYRILLIC 5992 0x1D2C, // 1D2C..1D5C; LATIN 5993 0x1D5D, // 1D5D..1D61; GREEK 5994 0x1D62, // 1D62..1D65; LATIN 5995 0x1D66, // 1D66..1D6A; GREEK 5996 0x1D6B, // 1D6B..1D77; LATIN 5997 0x1D78, // 1D78 ; CYRILLIC 5998 0x1D79, // 1D79..1DBE; LATIN 5999 0x1DBF, // 1DBF ; GREEK 6000 0x1DC0, // 1DC0..1DFF; INHERITED 6001 0x1E00, // 1E00..1EFF; LATIN 6002 0x1F00, // 1F00..1F15; GREEK 6003 0x1F16, // 1F16..1F17; UNKNOWN 6004 0x1F18, // 1F18..1F1D; GREEK 6005 0x1F1E, // 1F1E..1F1F; UNKNOWN 6006 0x1F20, // 1F20..1F45; GREEK 6007 0x1F46, // 1F46..1F47; UNKNOWN 6008 0x1F48, // 1F48..1F4D; GREEK 6009 0x1F4E, // 1F4E..1F4F; UNKNOWN 6010 0x1F50, // 1F50..1F57; GREEK 6011 0x1F58, // 1F58 ; UNKNOWN 6012 0x1F59, // 1F59 ; GREEK 6013 0x1F5A, // 1F5A ; UNKNOWN 6014 0x1F5B, // 1F5B ; GREEK 6015 0x1F5C, // 1F5C ; UNKNOWN 6016 0x1F5D, // 1F5D ; GREEK 6017 0x1F5E, // 1F5E ; UNKNOWN 6018 0x1F5F, // 1F5F..1F7D; GREEK 6019 0x1F7E, // 1F7E..1F7F; UNKNOWN 6020 0x1F80, // 1F80..1FB4; GREEK 6021 0x1FB5, // 1FB5 ; UNKNOWN 6022 0x1FB6, // 1FB6..1FC4; GREEK 6023 0x1FC5, // 1FC5 ; UNKNOWN 6024 0x1FC6, // 1FC6..1FD3; GREEK 6025 0x1FD4, // 1FD4..1FD5; UNKNOWN 6026 0x1FD6, // 1FD6..1FDB; GREEK 6027 0x1FDC, // 1FDC ; UNKNOWN 6028 0x1FDD, // 1FDD..1FEF; GREEK 6029 0x1FF0, // 1FF0..1FF1; UNKNOWN 6030 0x1FF2, // 1FF2..1FF4; GREEK 6031 0x1FF5, // 1FF5 ; UNKNOWN 6032 0x1FF6, // 1FF6..1FFE; GREEK 6033 0x1FFF, // 1FFF ; UNKNOWN 6034 0x2000, // 2000..200B; COMMON 6035 0x200C, // 200C..200D; INHERITED 6036 0x200E, // 200E..2064; COMMON 6037 0x2065, // 2065 ; UNKNOWN 6038 0x2066, // 2066..2070; COMMON 6039 0x2071, // 2071 ; LATIN 6040 0x2072, // 2072..2073; UNKNOWN 6041 0x2074, // 2074..207E; COMMON 6042 0x207F, // 207F ; LATIN 6043 0x2080, // 2080..208E; COMMON 6044 0x208F, // 208F ; UNKNOWN 6045 0x2090, // 2090..209C; LATIN 6046 0x209D, // 209D..209F; UNKNOWN 6047 0x20A0, // 20A0..20C0; COMMON 6048 0x20C1, // 20C1..20CF; UNKNOWN 6049 0x20D0, // 20D0..20F0; INHERITED 6050 0x20F1, // 20F1..20FF; UNKNOWN 6051 0x2100, // 2100..2125; COMMON 6052 0x2126, // 2126 ; GREEK 6053 0x2127, // 2127..2129; COMMON 6054 0x212A, // 212A..212B; LATIN 6055 0x212C, // 212C..2131; COMMON 6056 0x2132, // 2132 ; LATIN 6057 0x2133, // 2133..214D; COMMON 6058 0x214E, // 214E ; LATIN 6059 0x214F, // 214F..215F; COMMON 6060 0x2160, // 2160..2188; LATIN 6061 0x2189, // 2189..218B; COMMON 6062 0x218C, // 218C..218F; UNKNOWN 6063 0x2190, // 2190..2426; COMMON 6064 0x2427, // 2427..243F; UNKNOWN 6065 0x2440, // 2440..244A; COMMON 6066 0x244B, // 244B..245F; UNKNOWN 6067 0x2460, // 2460..27FF; COMMON 6068 0x2800, // 2800..28FF; BRAILLE 6069 0x2900, // 2900..2B73; COMMON 6070 0x2B74, // 2B74..2B75; UNKNOWN 6071 0x2B76, // 2B76..2B95; COMMON 6072 0x2B96, // 2B96 ; UNKNOWN 6073 0x2B97, // 2B97..2BFF; COMMON 6074 0x2C00, // 2C00..2C5F; GLAGOLITIC 6075 0x2C60, // 2C60..2C7F; LATIN 6076 0x2C80, // 2C80..2CF3; COPTIC 6077 0x2CF4, // 2CF4..2CF8; UNKNOWN 6078 0x2CF9, // 2CF9..2CFF; COPTIC 6079 0x2D00, // 2D00..2D25; GEORGIAN 6080 0x2D26, // 2D26 ; UNKNOWN 6081 0x2D27, // 2D27 ; GEORGIAN 6082 0x2D28, // 2D28..2D2C; UNKNOWN 6083 0x2D2D, // 2D2D ; GEORGIAN 6084 0x2D2E, // 2D2E..2D2F; UNKNOWN 6085 0x2D30, // 2D30..2D67; TIFINAGH 6086 0x2D68, // 2D68..2D6E; UNKNOWN 6087 0x2D6F, // 2D6F..2D70; TIFINAGH 6088 0x2D71, // 2D71..2D7E; UNKNOWN 6089 0x2D7F, // 2D7F ; TIFINAGH 6090 0x2D80, // 2D80..2D96; ETHIOPIC 6091 0x2D97, // 2D97..2D9F; UNKNOWN 6092 0x2DA0, // 2DA0..2DA6; ETHIOPIC 6093 0x2DA7, // 2DA7 ; UNKNOWN 6094 0x2DA8, // 2DA8..2DAE; ETHIOPIC 6095 0x2DAF, // 2DAF ; UNKNOWN 6096 0x2DB0, // 2DB0..2DB6; ETHIOPIC 6097 0x2DB7, // 2DB7 ; UNKNOWN 6098 0x2DB8, // 2DB8..2DBE; ETHIOPIC 6099 0x2DBF, // 2DBF ; UNKNOWN 6100 0x2DC0, // 2DC0..2DC6; ETHIOPIC 6101 0x2DC7, // 2DC7 ; UNKNOWN 6102 0x2DC8, // 2DC8..2DCE; ETHIOPIC 6103 0x2DCF, // 2DCF ; UNKNOWN 6104 0x2DD0, // 2DD0..2DD6; ETHIOPIC 6105 0x2DD7, // 2DD7 ; UNKNOWN 6106 0x2DD8, // 2DD8..2DDE; ETHIOPIC 6107 0x2DDF, // 2DDF ; UNKNOWN 6108 0x2DE0, // 2DE0..2DFF; CYRILLIC 6109 0x2E00, // 2E00..2E5D; COMMON 6110 0x2E5E, // 2E5E..2E7F; UNKNOWN 6111 0x2E80, // 2E80..2E99; HAN 6112 0x2E9A, // 2E9A ; UNKNOWN 6113 0x2E9B, // 2E9B..2EF3; HAN 6114 0x2EF4, // 2EF4..2EFF; UNKNOWN 6115 0x2F00, // 2F00..2FD5; HAN 6116 0x2FD6, // 2FD6..2FEF; UNKNOWN 6117 0x2FF0, // 2FF0..2FFB; COMMON 6118 0x2FFC, // 2FFC..2FFF; UNKNOWN 6119 0x3000, // 3000..3004; COMMON 6120 0x3005, // 3005 ; HAN 6121 0x3006, // 3006 ; COMMON 6122 0x3007, // 3007 ; HAN 6123 0x3008, // 3008..3020; COMMON 6124 0x3021, // 3021..3029; HAN 6125 0x302A, // 302A..302D; INHERITED 6126 0x302E, // 302E..302F; HANGUL 6127 0x3030, // 3030..3037; COMMON 6128 0x3038, // 3038..303B; HAN 6129 0x303C, // 303C..303F; COMMON 6130 0x3040, // 3040 ; UNKNOWN 6131 0x3041, // 3041..3096; HIRAGANA 6132 0x3097, // 3097..3098; UNKNOWN 6133 0x3099, // 3099..309A; INHERITED 6134 0x309B, // 309B..309C; COMMON 6135 0x309D, // 309D..309F; HIRAGANA 6136 0x30A0, // 30A0 ; COMMON 6137 0x30A1, // 30A1..30FA; KATAKANA 6138 0x30FB, // 30FB..30FC; COMMON 6139 0x30FD, // 30FD..30FF; KATAKANA 6140 0x3100, // 3100..3104; UNKNOWN 6141 0x3105, // 3105..312F; BOPOMOFO 6142 0x3130, // 3130 ; UNKNOWN 6143 0x3131, // 3131..318E; HANGUL 6144 0x318F, // 318F ; UNKNOWN 6145 0x3190, // 3190..319F; COMMON 6146 0x31A0, // 31A0..31BF; BOPOMOFO 6147 0x31C0, // 31C0..31E3; COMMON 6148 0x31E4, // 31E4..31EF; UNKNOWN 6149 0x31F0, // 31F0..31FF; KATAKANA 6150 0x3200, // 3200..321E; HANGUL 6151 0x321F, // 321F ; UNKNOWN 6152 0x3220, // 3220..325F; COMMON 6153 0x3260, // 3260..327E; HANGUL 6154 0x327F, // 327F..32CF; COMMON 6155 0x32D0, // 32D0..32FE; KATAKANA 6156 0x32FF, // 32FF ; COMMON 6157 0x3300, // 3300..3357; KATAKANA 6158 0x3358, // 3358..33FF; COMMON 6159 0x3400, // 3400..4DBF; HAN 6160 0x4DC0, // 4DC0..4DFF; COMMON 6161 0x4E00, // 4E00..9FFF; HAN 6162 0xA000, // A000..A48C; YI 6163 0xA48D, // A48D..A48F; UNKNOWN 6164 0xA490, // A490..A4C6; YI 6165 0xA4C7, // A4C7..A4CF; UNKNOWN 6166 0xA4D0, // A4D0..A4FF; LISU 6167 0xA500, // A500..A62B; VAI 6168 0xA62C, // A62C..A63F; UNKNOWN 6169 0xA640, // A640..A69F; CYRILLIC 6170 0xA6A0, // A6A0..A6F7; BAMUM 6171 0xA6F8, // A6F8..A6FF; UNKNOWN 6172 0xA700, // A700..A721; COMMON 6173 0xA722, // A722..A787; LATIN 6174 0xA788, // A788..A78A; COMMON 6175 0xA78B, // A78B..A7CA; LATIN 6176 0xA7CB, // A7CB..A7CF; UNKNOWN 6177 0xA7D0, // A7D0..A7D1; LATIN 6178 0xA7D2, // A7D2 ; UNKNOWN 6179 0xA7D3, // A7D3 ; LATIN 6180 0xA7D4, // A7D4 ; UNKNOWN 6181 0xA7D5, // A7D5..A7D9; LATIN 6182 0xA7DA, // A7DA..A7F1; UNKNOWN 6183 0xA7F2, // A7F2..A7FF; LATIN 6184 0xA800, // A800..A82C; SYLOTI_NAGRI 6185 0xA82D, // A82D..A82F; UNKNOWN 6186 0xA830, // A830..A839; COMMON 6187 0xA83A, // A83A..A83F; UNKNOWN 6188 0xA840, // A840..A877; PHAGS_PA 6189 0xA878, // A878..A87F; UNKNOWN 6190 0xA880, // A880..A8C5; SAURASHTRA 6191 0xA8C6, // A8C6..A8CD; UNKNOWN 6192 0xA8CE, // A8CE..A8D9; SAURASHTRA 6193 0xA8DA, // A8DA..A8DF; UNKNOWN 6194 0xA8E0, // A8E0..A8FF; DEVANAGARI 6195 0xA900, // A900..A92D; KAYAH_LI 6196 0xA92E, // A92E ; COMMON 6197 0xA92F, // A92F ; KAYAH_LI 6198 0xA930, // A930..A953; REJANG 6199 0xA954, // A954..A95E; UNKNOWN 6200 0xA95F, // A95F ; REJANG 6201 0xA960, // A960..A97C; HANGUL 6202 0xA97D, // A97D..A97F; UNKNOWN 6203 0xA980, // A980..A9CD; JAVANESE 6204 0xA9CE, // A9CE ; UNKNOWN 6205 0xA9CF, // A9CF ; COMMON 6206 0xA9D0, // A9D0..A9D9; JAVANESE 6207 0xA9DA, // A9DA..A9DD; UNKNOWN 6208 0xA9DE, // A9DE..A9DF; JAVANESE 6209 0xA9E0, // A9E0..A9FE; MYANMAR 6210 0xA9FF, // A9FF ; UNKNOWN 6211 0xAA00, // AA00..AA36; CHAM 6212 0xAA37, // AA37..AA3F; UNKNOWN 6213 0xAA40, // AA40..AA4D; CHAM 6214 0xAA4E, // AA4E..AA4F; UNKNOWN 6215 0xAA50, // AA50..AA59; CHAM 6216 0xAA5A, // AA5A..AA5B; UNKNOWN 6217 0xAA5C, // AA5C..AA5F; CHAM 6218 0xAA60, // AA60..AA7F; MYANMAR 6219 0xAA80, // AA80..AAC2; TAI_VIET 6220 0xAAC3, // AAC3..AADA; UNKNOWN 6221 0xAADB, // AADB..AADF; TAI_VIET 6222 0xAAE0, // AAE0..AAF6; MEETEI_MAYEK 6223 0xAAF7, // AAF7..AB00; UNKNOWN 6224 0xAB01, // AB01..AB06; ETHIOPIC 6225 0xAB07, // AB07..AB08; UNKNOWN 6226 0xAB09, // AB09..AB0E; ETHIOPIC 6227 0xAB0F, // AB0F..AB10; UNKNOWN 6228 0xAB11, // AB11..AB16; ETHIOPIC 6229 0xAB17, // AB17..AB1F; UNKNOWN 6230 0xAB20, // AB20..AB26; ETHIOPIC 6231 0xAB27, // AB27 ; UNKNOWN 6232 0xAB28, // AB28..AB2E; ETHIOPIC 6233 0xAB2F, // AB2F ; UNKNOWN 6234 0xAB30, // AB30..AB5A; LATIN 6235 0xAB5B, // AB5B ; COMMON 6236 0xAB5C, // AB5C..AB64; LATIN 6237 0xAB65, // AB65 ; GREEK 6238 0xAB66, // AB66..AB69; LATIN 6239 0xAB6A, // AB6A..AB6B; COMMON 6240 0xAB6C, // AB6C..AB6F; UNKNOWN 6241 0xAB70, // AB70..ABBF; CHEROKEE 6242 0xABC0, // ABC0..ABED; MEETEI_MAYEK 6243 0xABEE, // ABEE..ABEF; UNKNOWN 6244 0xABF0, // ABF0..ABF9; MEETEI_MAYEK 6245 0xABFA, // ABFA..ABFF; UNKNOWN 6246 0xAC00, // AC00..D7A3; HANGUL 6247 0xD7A4, // D7A4..D7AF; UNKNOWN 6248 0xD7B0, // D7B0..D7C6; HANGUL 6249 0xD7C7, // D7C7..D7CA; UNKNOWN 6250 0xD7CB, // D7CB..D7FB; HANGUL 6251 0xD7FC, // D7FC..F8FF; UNKNOWN 6252 0xF900, // F900..FA6D; HAN 6253 0xFA6E, // FA6E..FA6F; UNKNOWN 6254 0xFA70, // FA70..FAD9; HAN 6255 0xFADA, // FADA..FAFF; UNKNOWN 6256 0xFB00, // FB00..FB06; LATIN 6257 0xFB07, // FB07..FB12; UNKNOWN 6258 0xFB13, // FB13..FB17; ARMENIAN 6259 0xFB18, // FB18..FB1C; UNKNOWN 6260 0xFB1D, // FB1D..FB36; HEBREW 6261 0xFB37, // FB37 ; UNKNOWN 6262 0xFB38, // FB38..FB3C; HEBREW 6263 0xFB3D, // FB3D ; UNKNOWN 6264 0xFB3E, // FB3E ; HEBREW 6265 0xFB3F, // FB3F ; UNKNOWN 6266 0xFB40, // FB40..FB41; HEBREW 6267 0xFB42, // FB42 ; UNKNOWN 6268 0xFB43, // FB43..FB44; HEBREW 6269 0xFB45, // FB45 ; UNKNOWN 6270 0xFB46, // FB46..FB4F; HEBREW 6271 0xFB50, // FB50..FBC2; ARABIC 6272 0xFBC3, // FBC3..FBD2; UNKNOWN 6273 0xFBD3, // FBD3..FD3D; ARABIC 6274 0xFD3E, // FD3E..FD3F; COMMON 6275 0xFD40, // FD40..FD8F; ARABIC 6276 0xFD90, // FD90..FD91; UNKNOWN 6277 0xFD92, // FD92..FDC7; ARABIC 6278 0xFDC8, // FDC8..FDCE; UNKNOWN 6279 0xFDCF, // FDCF ; ARABIC 6280 0xFDD0, // FDD0..FDEF; UNKNOWN 6281 0xFDF0, // FDF0..FDFF; ARABIC 6282 0xFE00, // FE00..FE0F; INHERITED 6283 0xFE10, // FE10..FE19; COMMON 6284 0xFE1A, // FE1A..FE1F; UNKNOWN 6285 0xFE20, // FE20..FE2D; INHERITED 6286 0xFE2E, // FE2E..FE2F; CYRILLIC 6287 0xFE30, // FE30..FE52; COMMON 6288 0xFE53, // FE53 ; UNKNOWN 6289 0xFE54, // FE54..FE66; COMMON 6290 0xFE67, // FE67 ; UNKNOWN 6291 0xFE68, // FE68..FE6B; COMMON 6292 0xFE6C, // FE6C..FE6F; UNKNOWN 6293 0xFE70, // FE70..FE74; ARABIC 6294 0xFE75, // FE75 ; UNKNOWN 6295 0xFE76, // FE76..FEFC; ARABIC 6296 0xFEFD, // FEFD..FEFE; UNKNOWN 6297 0xFEFF, // FEFF ; COMMON 6298 0xFF00, // FF00 ; UNKNOWN 6299 0xFF01, // FF01..FF20; COMMON 6300 0xFF21, // FF21..FF3A; LATIN 6301 0xFF3B, // FF3B..FF40; COMMON 6302 0xFF41, // FF41..FF5A; LATIN 6303 0xFF5B, // FF5B..FF65; COMMON 6304 0xFF66, // FF66..FF6F; KATAKANA 6305 0xFF70, // FF70 ; COMMON 6306 0xFF71, // FF71..FF9D; KATAKANA 6307 0xFF9E, // FF9E..FF9F; COMMON 6308 0xFFA0, // FFA0..FFBE; HANGUL 6309 0xFFBF, // FFBF..FFC1; UNKNOWN 6310 0xFFC2, // FFC2..FFC7; HANGUL 6311 0xFFC8, // FFC8..FFC9; UNKNOWN 6312 0xFFCA, // FFCA..FFCF; HANGUL 6313 0xFFD0, // FFD0..FFD1; UNKNOWN 6314 0xFFD2, // FFD2..FFD7; HANGUL 6315 0xFFD8, // FFD8..FFD9; UNKNOWN 6316 0xFFDA, // FFDA..FFDC; HANGUL 6317 0xFFDD, // FFDD..FFDF; UNKNOWN 6318 0xFFE0, // FFE0..FFE6; COMMON 6319 0xFFE7, // FFE7 ; UNKNOWN 6320 0xFFE8, // FFE8..FFEE; COMMON 6321 0xFFEF, // FFEF..FFF8; UNKNOWN 6322 0xFFF9, // FFF9..FFFD; COMMON 6323 0xFFFE, // FFFE..FFFF; UNKNOWN 6324 0x10000, // 10000..1000B; LINEAR_B 6325 0x1000C, // 1000C ; UNKNOWN 6326 0x1000D, // 1000D..10026; LINEAR_B 6327 0x10027, // 10027 ; UNKNOWN 6328 0x10028, // 10028..1003A; LINEAR_B 6329 0x1003B, // 1003B ; UNKNOWN 6330 0x1003C, // 1003C..1003D; LINEAR_B 6331 0x1003E, // 1003E ; UNKNOWN 6332 0x1003F, // 1003F..1004D; LINEAR_B 6333 0x1004E, // 1004E..1004F; UNKNOWN 6334 0x10050, // 10050..1005D; LINEAR_B 6335 0x1005E, // 1005E..1007F; UNKNOWN 6336 0x10080, // 10080..100FA; LINEAR_B 6337 0x100FB, // 100FB..100FF; UNKNOWN 6338 0x10100, // 10100..10102; COMMON 6339 0x10103, // 10103..10106; UNKNOWN 6340 0x10107, // 10107..10133; COMMON 6341 0x10134, // 10134..10136; UNKNOWN 6342 0x10137, // 10137..1013F; COMMON 6343 0x10140, // 10140..1018E; GREEK 6344 0x1018F, // 1018F ; UNKNOWN 6345 0x10190, // 10190..1019C; COMMON 6346 0x1019D, // 1019D..1019F; UNKNOWN 6347 0x101A0, // 101A0 ; GREEK 6348 0x101A1, // 101A1..101CF; UNKNOWN 6349 0x101D0, // 101D0..101FC; COMMON 6350 0x101FD, // 101FD ; INHERITED 6351 0x101FE, // 101FE..1027F; UNKNOWN 6352 0x10280, // 10280..1029C; LYCIAN 6353 0x1029D, // 1029D..1029F; UNKNOWN 6354 0x102A0, // 102A0..102D0; CARIAN 6355 0x102D1, // 102D1..102DF; UNKNOWN 6356 0x102E0, // 102E0 ; INHERITED 6357 0x102E1, // 102E1..102FB; COMMON 6358 0x102FC, // 102FC..102FF; UNKNOWN 6359 0x10300, // 10300..10323; OLD_ITALIC 6360 0x10324, // 10324..1032C; UNKNOWN 6361 0x1032D, // 1032D..1032F; OLD_ITALIC 6362 0x10330, // 10330..1034A; GOTHIC 6363 0x1034B, // 1034B..1034F; UNKNOWN 6364 0x10350, // 10350..1037A; OLD_PERMIC 6365 0x1037B, // 1037B..1037F; UNKNOWN 6366 0x10380, // 10380..1039D; UGARITIC 6367 0x1039E, // 1039E ; UNKNOWN 6368 0x1039F, // 1039F ; UGARITIC 6369 0x103A0, // 103A0..103C3; OLD_PERSIAN 6370 0x103C4, // 103C4..103C7; UNKNOWN 6371 0x103C8, // 103C8..103D5; OLD_PERSIAN 6372 0x103D6, // 103D6..103FF; UNKNOWN 6373 0x10400, // 10400..1044F; DESERET 6374 0x10450, // 10450..1047F; SHAVIAN 6375 0x10480, // 10480..1049D; OSMANYA 6376 0x1049E, // 1049E..1049F; UNKNOWN 6377 0x104A0, // 104A0..104A9; OSMANYA 6378 0x104AA, // 104AA..104AF; UNKNOWN 6379 0x104B0, // 104B0..104D3; OSAGE 6380 0x104D4, // 104D4..104D7; UNKNOWN 6381 0x104D8, // 104D8..104FB; OSAGE 6382 0x104FC, // 104FC..104FF; UNKNOWN 6383 0x10500, // 10500..10527; ELBASAN 6384 0x10528, // 10528..1052F; UNKNOWN 6385 0x10530, // 10530..10563; CAUCASIAN_ALBANIAN 6386 0x10564, // 10564..1056E; UNKNOWN 6387 0x1056F, // 1056F ; CAUCASIAN_ALBANIAN 6388 0x10570, // 10570..1057A; VITHKUQI 6389 0x1057B, // 1057B ; UNKNOWN 6390 0x1057C, // 1057C..1058A; VITHKUQI 6391 0x1058B, // 1058B ; UNKNOWN 6392 0x1058C, // 1058C..10592; VITHKUQI 6393 0x10593, // 10593 ; UNKNOWN 6394 0x10594, // 10594..10595; VITHKUQI 6395 0x10596, // 10596 ; UNKNOWN 6396 0x10597, // 10597..105A1; VITHKUQI 6397 0x105A2, // 105A2 ; UNKNOWN 6398 0x105A3, // 105A3..105B1; VITHKUQI 6399 0x105B2, // 105B2 ; UNKNOWN 6400 0x105B3, // 105B3..105B9; VITHKUQI 6401 0x105BA, // 105BA ; UNKNOWN 6402 0x105BB, // 105BB..105BC; VITHKUQI 6403 0x105BD, // 105BD..105FF; UNKNOWN 6404 0x10600, // 10600..10736; LINEAR_A 6405 0x10737, // 10737..1073F; UNKNOWN 6406 0x10740, // 10740..10755; LINEAR_A 6407 0x10756, // 10756..1075F; UNKNOWN 6408 0x10760, // 10760..10767; LINEAR_A 6409 0x10768, // 10768..1077F; UNKNOWN 6410 0x10780, // 10780..10785; LATIN 6411 0x10786, // 10786 ; UNKNOWN 6412 0x10787, // 10787..107B0; LATIN 6413 0x107B1, // 107B1 ; UNKNOWN 6414 0x107B2, // 107B2..107BA; LATIN 6415 0x107BB, // 107BB..107FF; UNKNOWN 6416 0x10800, // 10800..10805; CYPRIOT 6417 0x10806, // 10806..10807; UNKNOWN 6418 0x10808, // 10808 ; CYPRIOT 6419 0x10809, // 10809 ; UNKNOWN 6420 0x1080A, // 1080A..10835; CYPRIOT 6421 0x10836, // 10836 ; UNKNOWN 6422 0x10837, // 10837..10838; CYPRIOT 6423 0x10839, // 10839..1083B; UNKNOWN 6424 0x1083C, // 1083C ; CYPRIOT 6425 0x1083D, // 1083D..1083E; UNKNOWN 6426 0x1083F, // 1083F ; CYPRIOT 6427 0x10840, // 10840..10855; IMPERIAL_ARAMAIC 6428 0x10856, // 10856 ; UNKNOWN 6429 0x10857, // 10857..1085F; IMPERIAL_ARAMAIC 6430 0x10860, // 10860..1087F; PALMYRENE 6431 0x10880, // 10880..1089E; NABATAEAN 6432 0x1089F, // 1089F..108A6; UNKNOWN 6433 0x108A7, // 108A7..108AF; NABATAEAN 6434 0x108B0, // 108B0..108DF; UNKNOWN 6435 0x108E0, // 108E0..108F2; HATRAN 6436 0x108F3, // 108F3 ; UNKNOWN 6437 0x108F4, // 108F4..108F5; HATRAN 6438 0x108F6, // 108F6..108FA; UNKNOWN 6439 0x108FB, // 108FB..108FF; HATRAN 6440 0x10900, // 10900..1091B; PHOENICIAN 6441 0x1091C, // 1091C..1091E; UNKNOWN 6442 0x1091F, // 1091F ; PHOENICIAN 6443 0x10920, // 10920..10939; LYDIAN 6444 0x1093A, // 1093A..1093E; UNKNOWN 6445 0x1093F, // 1093F ; LYDIAN 6446 0x10940, // 10940..1097F; UNKNOWN 6447 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS 6448 0x109A0, // 109A0..109B7; MEROITIC_CURSIVE 6449 0x109B8, // 109B8..109BB; UNKNOWN 6450 0x109BC, // 109BC..109CF; MEROITIC_CURSIVE 6451 0x109D0, // 109D0..109D1; UNKNOWN 6452 0x109D2, // 109D2..109FF; MEROITIC_CURSIVE 6453 0x10A00, // 10A00..10A03; KHAROSHTHI 6454 0x10A04, // 10A04 ; UNKNOWN 6455 0x10A05, // 10A05..10A06; KHAROSHTHI 6456 0x10A07, // 10A07..10A0B; UNKNOWN 6457 0x10A0C, // 10A0C..10A13; KHAROSHTHI 6458 0x10A14, // 10A14 ; UNKNOWN 6459 0x10A15, // 10A15..10A17; KHAROSHTHI 6460 0x10A18, // 10A18 ; UNKNOWN 6461 0x10A19, // 10A19..10A35; KHAROSHTHI 6462 0x10A36, // 10A36..10A37; UNKNOWN 6463 0x10A38, // 10A38..10A3A; KHAROSHTHI 6464 0x10A3B, // 10A3B..10A3E; UNKNOWN 6465 0x10A3F, // 10A3F..10A48; KHAROSHTHI 6466 0x10A49, // 10A49..10A4F; UNKNOWN 6467 0x10A50, // 10A50..10A58; KHAROSHTHI 6468 0x10A59, // 10A59..10A5F; UNKNOWN 6469 0x10A60, // 10A60..10A7F; OLD_SOUTH_ARABIAN 6470 0x10A80, // 10A80..10A9F; OLD_NORTH_ARABIAN 6471 0x10AA0, // 10AA0..10ABF; UNKNOWN 6472 0x10AC0, // 10AC0..10AE6; MANICHAEAN 6473 0x10AE7, // 10AE7..10AEA; UNKNOWN 6474 0x10AEB, // 10AEB..10AF6; MANICHAEAN 6475 0x10AF7, // 10AF7..10AFF; UNKNOWN 6476 0x10B00, // 10B00..10B35; AVESTAN 6477 0x10B36, // 10B36..10B38; UNKNOWN 6478 0x10B39, // 10B39..10B3F; AVESTAN 6479 0x10B40, // 10B40..10B55; INSCRIPTIONAL_PARTHIAN 6480 0x10B56, // 10B56..10B57; UNKNOWN 6481 0x10B58, // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN 6482 0x10B60, // 10B60..10B72; INSCRIPTIONAL_PAHLAVI 6483 0x10B73, // 10B73..10B77; UNKNOWN 6484 0x10B78, // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI 6485 0x10B80, // 10B80..10B91; PSALTER_PAHLAVI 6486 0x10B92, // 10B92..10B98; UNKNOWN 6487 0x10B99, // 10B99..10B9C; PSALTER_PAHLAVI 6488 0x10B9D, // 10B9D..10BA8; UNKNOWN 6489 0x10BA9, // 10BA9..10BAF; PSALTER_PAHLAVI 6490 0x10BB0, // 10BB0..10BFF; UNKNOWN 6491 0x10C00, // 10C00..10C48; OLD_TURKIC 6492 0x10C49, // 10C49..10C7F; UNKNOWN 6493 0x10C80, // 10C80..10CB2; OLD_HUNGARIAN 6494 0x10CB3, // 10CB3..10CBF; UNKNOWN 6495 0x10CC0, // 10CC0..10CF2; OLD_HUNGARIAN 6496 0x10CF3, // 10CF3..10CF9; UNKNOWN 6497 0x10CFA, // 10CFA..10CFF; OLD_HUNGARIAN 6498 0x10D00, // 10D00..10D27; HANIFI_ROHINGYA 6499 0x10D28, // 10D28..10D2F; UNKNOWN 6500 0x10D30, // 10D30..10D39; HANIFI_ROHINGYA 6501 0x10D3A, // 10D3A..10E5F; UNKNOWN 6502 0x10E60, // 10E60..10E7E; ARABIC 6503 0x10E7F, // 10E7F ; UNKNOWN 6504 0x10E80, // 10E80..10EA9; YEZIDI 6505 0x10EAA, // 10EAA ; UNKNOWN 6506 0x10EAB, // 10EAB..10EAD; YEZIDI 6507 0x10EAE, // 10EAE..10EAF; UNKNOWN 6508 0x10EB0, // 10EB0..10EB1; YEZIDI 6509 0x10EB2, // 10EB2..10EFC; UNKNOWN 6510 0x10EFD, // 10EFD..10EFF; ARABIC 6511 0x10F00, // 10F00..10F27; OLD_SOGDIAN 6512 0x10F28, // 10F28..10F2F; UNKNOWN 6513 0x10F30, // 10F30..10F59; SOGDIAN 6514 0x10F5A, // 10F5A..10F6F; UNKNOWN 6515 0x10F70, // 10F70..10F89; OLD_UYGHUR 6516 0x10F8A, // 10F8A..10FAF; UNKNOWN 6517 0x10FB0, // 10FB0..10FCB; CHORASMIAN 6518 0x10FCC, // 10FCC..10FDF; UNKNOWN 6519 0x10FE0, // 10FE0..10FF6; ELYMAIC 6520 0x10FF7, // 10FF7..10FFF; UNKNOWN 6521 0x11000, // 11000..1104D; BRAHMI 6522 0x1104E, // 1104E..11051; UNKNOWN 6523 0x11052, // 11052..11075; BRAHMI 6524 0x11076, // 11076..1107E; UNKNOWN 6525 0x1107F, // 1107F ; BRAHMI 6526 0x11080, // 11080..110C2; KAITHI 6527 0x110C3, // 110C3..110CC; UNKNOWN 6528 0x110CD, // 110CD ; KAITHI 6529 0x110CE, // 110CE..110CF; UNKNOWN 6530 0x110D0, // 110D0..110E8; SORA_SOMPENG 6531 0x110E9, // 110E9..110EF; UNKNOWN 6532 0x110F0, // 110F0..110F9; SORA_SOMPENG 6533 0x110FA, // 110FA..110FF; UNKNOWN 6534 0x11100, // 11100..11134; CHAKMA 6535 0x11135, // 11135 ; UNKNOWN 6536 0x11136, // 11136..11147; CHAKMA 6537 0x11148, // 11148..1114F; UNKNOWN 6538 0x11150, // 11150..11176; MAHAJANI 6539 0x11177, // 11177..1117F; UNKNOWN 6540 0x11180, // 11180..111DF; SHARADA 6541 0x111E0, // 111E0 ; UNKNOWN 6542 0x111E1, // 111E1..111F4; SINHALA 6543 0x111F5, // 111F5..111FF; UNKNOWN 6544 0x11200, // 11200..11211; KHOJKI 6545 0x11212, // 11212 ; UNKNOWN 6546 0x11213, // 11213..11241; KHOJKI 6547 0x11242, // 11242..1127F; UNKNOWN 6548 0x11280, // 11280..11286; MULTANI 6549 0x11287, // 11287 ; UNKNOWN 6550 0x11288, // 11288 ; MULTANI 6551 0x11289, // 11289 ; UNKNOWN 6552 0x1128A, // 1128A..1128D; MULTANI 6553 0x1128E, // 1128E ; UNKNOWN 6554 0x1128F, // 1128F..1129D; MULTANI 6555 0x1129E, // 1129E ; UNKNOWN 6556 0x1129F, // 1129F..112A9; MULTANI 6557 0x112AA, // 112AA..112AF; UNKNOWN 6558 0x112B0, // 112B0..112EA; KHUDAWADI 6559 0x112EB, // 112EB..112EF; UNKNOWN 6560 0x112F0, // 112F0..112F9; KHUDAWADI 6561 0x112FA, // 112FA..112FF; UNKNOWN 6562 0x11300, // 11300..11303; GRANTHA 6563 0x11304, // 11304 ; UNKNOWN 6564 0x11305, // 11305..1130C; GRANTHA 6565 0x1130D, // 1130D..1130E; UNKNOWN 6566 0x1130F, // 1130F..11310; GRANTHA 6567 0x11311, // 11311..11312; UNKNOWN 6568 0x11313, // 11313..11328; GRANTHA 6569 0x11329, // 11329 ; UNKNOWN 6570 0x1132A, // 1132A..11330; GRANTHA 6571 0x11331, // 11331 ; UNKNOWN 6572 0x11332, // 11332..11333; GRANTHA 6573 0x11334, // 11334 ; UNKNOWN 6574 0x11335, // 11335..11339; GRANTHA 6575 0x1133A, // 1133A ; UNKNOWN 6576 0x1133B, // 1133B ; INHERITED 6577 0x1133C, // 1133C..11344; GRANTHA 6578 0x11345, // 11345..11346; UNKNOWN 6579 0x11347, // 11347..11348; GRANTHA 6580 0x11349, // 11349..1134A; UNKNOWN 6581 0x1134B, // 1134B..1134D; GRANTHA 6582 0x1134E, // 1134E..1134F; UNKNOWN 6583 0x11350, // 11350 ; GRANTHA 6584 0x11351, // 11351..11356; UNKNOWN 6585 0x11357, // 11357 ; GRANTHA 6586 0x11358, // 11358..1135C; UNKNOWN 6587 0x1135D, // 1135D..11363; GRANTHA 6588 0x11364, // 11364..11365; UNKNOWN 6589 0x11366, // 11366..1136C; GRANTHA 6590 0x1136D, // 1136D..1136F; UNKNOWN 6591 0x11370, // 11370..11374; GRANTHA 6592 0x11375, // 11375..113FF; UNKNOWN 6593 0x11400, // 11400..1145B; NEWA 6594 0x1145C, // 1145C ; UNKNOWN 6595 0x1145D, // 1145D..11461; NEWA 6596 0x11462, // 11462..1147F; UNKNOWN 6597 0x11480, // 11480..114C7; TIRHUTA 6598 0x114C8, // 114C8..114CF; UNKNOWN 6599 0x114D0, // 114D0..114D9; TIRHUTA 6600 0x114DA, // 114DA..1157F; UNKNOWN 6601 0x11580, // 11580..115B5; SIDDHAM 6602 0x115B6, // 115B6..115B7; UNKNOWN 6603 0x115B8, // 115B8..115DD; SIDDHAM 6604 0x115DE, // 115DE..115FF; UNKNOWN 6605 0x11600, // 11600..11644; MODI 6606 0x11645, // 11645..1164F; UNKNOWN 6607 0x11650, // 11650..11659; MODI 6608 0x1165A, // 1165A..1165F; UNKNOWN 6609 0x11660, // 11660..1166C; MONGOLIAN 6610 0x1166D, // 1166D..1167F; UNKNOWN 6611 0x11680, // 11680..116B9; TAKRI 6612 0x116BA, // 116BA..116BF; UNKNOWN 6613 0x116C0, // 116C0..116C9; TAKRI 6614 0x116CA, // 116CA..116FF; UNKNOWN 6615 0x11700, // 11700..1171A; AHOM 6616 0x1171B, // 1171B..1171C; UNKNOWN 6617 0x1171D, // 1171D..1172B; AHOM 6618 0x1172C, // 1172C..1172F; UNKNOWN 6619 0x11730, // 11730..11746; AHOM 6620 0x11747, // 11747..117FF; UNKNOWN 6621 0x11800, // 11800..1183B; DOGRA 6622 0x1183C, // 1183C..1189F; UNKNOWN 6623 0x118A0, // 118A0..118F2; WARANG_CITI 6624 0x118F3, // 118F3..118FE; UNKNOWN 6625 0x118FF, // 118FF ; WARANG_CITI 6626 0x11900, // 11900..11906; DIVES_AKURU 6627 0x11907, // 11907..11908; UNKNOWN 6628 0x11909, // 11909 ; DIVES_AKURU 6629 0x1190A, // 1190A..1190B; UNKNOWN 6630 0x1190C, // 1190C..11913; DIVES_AKURU 6631 0x11914, // 11914 ; UNKNOWN 6632 0x11915, // 11915..11916; DIVES_AKURU 6633 0x11917, // 11917 ; UNKNOWN 6634 0x11918, // 11918..11935; DIVES_AKURU 6635 0x11936, // 11936 ; UNKNOWN 6636 0x11937, // 11937..11938; DIVES_AKURU 6637 0x11939, // 11939..1193A; UNKNOWN 6638 0x1193B, // 1193B..11946; DIVES_AKURU 6639 0x11947, // 11947..1194F; UNKNOWN 6640 0x11950, // 11950..11959; DIVES_AKURU 6641 0x1195A, // 1195A..1199F; UNKNOWN 6642 0x119A0, // 119A0..119A7; NANDINAGARI 6643 0x119A8, // 119A8..119A9; UNKNOWN 6644 0x119AA, // 119AA..119D7; NANDINAGARI 6645 0x119D8, // 119D8..119D9; UNKNOWN 6646 0x119DA, // 119DA..119E4; NANDINAGARI 6647 0x119E5, // 119E5..119FF; UNKNOWN 6648 0x11A00, // 11A00..11A47; ZANABAZAR_SQUARE 6649 0x11A48, // 11A48..11A4F; UNKNOWN 6650 0x11A50, // 11A50..11AA2; SOYOMBO 6651 0x11AA3, // 11AA3..11AAF; UNKNOWN 6652 0x11AB0, // 11AB0..11ABF; CANADIAN_ABORIGINAL 6653 0x11AC0, // 11AC0..11AF8; PAU_CIN_HAU 6654 0x11AF9, // 11AF9..11AFF; UNKNOWN 6655 0x11B00, // 11B00..11B09; DEVANAGARI 6656 0x11B0A, // 11B0A..11BFF; UNKNOWN 6657 0x11C00, // 11C00..11C08; BHAIKSUKI 6658 0x11C09, // 11C09 ; UNKNOWN 6659 0x11C0A, // 11C0A..11C36; BHAIKSUKI 6660 0x11C37, // 11C37 ; UNKNOWN 6661 0x11C38, // 11C38..11C45; BHAIKSUKI 6662 0x11C46, // 11C46..11C4F; UNKNOWN 6663 0x11C50, // 11C50..11C6C; BHAIKSUKI 6664 0x11C6D, // 11C6D..11C6F; UNKNOWN 6665 0x11C70, // 11C70..11C8F; MARCHEN 6666 0x11C90, // 11C90..11C91; UNKNOWN 6667 0x11C92, // 11C92..11CA7; MARCHEN 6668 0x11CA8, // 11CA8 ; UNKNOWN 6669 0x11CA9, // 11CA9..11CB6; MARCHEN 6670 0x11CB7, // 11CB7..11CFF; UNKNOWN 6671 0x11D00, // 11D00..11D06; MASARAM_GONDI 6672 0x11D07, // 11D07 ; UNKNOWN 6673 0x11D08, // 11D08..11D09; MASARAM_GONDI 6674 0x11D0A, // 11D0A ; UNKNOWN 6675 0x11D0B, // 11D0B..11D36; MASARAM_GONDI 6676 0x11D37, // 11D37..11D39; UNKNOWN 6677 0x11D3A, // 11D3A ; MASARAM_GONDI 6678 0x11D3B, // 11D3B ; UNKNOWN 6679 0x11D3C, // 11D3C..11D3D; MASARAM_GONDI 6680 0x11D3E, // 11D3E ; UNKNOWN 6681 0x11D3F, // 11D3F..11D47; MASARAM_GONDI 6682 0x11D48, // 11D48..11D4F; UNKNOWN 6683 0x11D50, // 11D50..11D59; MASARAM_GONDI 6684 0x11D5A, // 11D5A..11D5F; UNKNOWN 6685 0x11D60, // 11D60..11D65; GUNJALA_GONDI 6686 0x11D66, // 11D66 ; UNKNOWN 6687 0x11D67, // 11D67..11D68; GUNJALA_GONDI 6688 0x11D69, // 11D69 ; UNKNOWN 6689 0x11D6A, // 11D6A..11D8E; GUNJALA_GONDI 6690 0x11D8F, // 11D8F ; UNKNOWN 6691 0x11D90, // 11D90..11D91; GUNJALA_GONDI 6692 0x11D92, // 11D92 ; UNKNOWN 6693 0x11D93, // 11D93..11D98; GUNJALA_GONDI 6694 0x11D99, // 11D99..11D9F; UNKNOWN 6695 0x11DA0, // 11DA0..11DA9; GUNJALA_GONDI 6696 0x11DAA, // 11DAA..11EDF; UNKNOWN 6697 0x11EE0, // 11EE0..11EF8; MAKASAR 6698 0x11EF9, // 11EF9..11EFF; UNKNOWN 6699 0x11F00, // 11F00..11F10; KAWI 6700 0x11F11, // 11F11 ; UNKNOWN 6701 0x11F12, // 11F12..11F3A; KAWI 6702 0x11F3B, // 11F3B..11F3D; UNKNOWN 6703 0x11F3E, // 11F3E..11F59; KAWI 6704 0x11F5A, // 11F5A..11FAF; UNKNOWN 6705 0x11FB0, // 11FB0 ; LISU 6706 0x11FB1, // 11FB1..11FBF; UNKNOWN 6707 0x11FC0, // 11FC0..11FF1; TAMIL 6708 0x11FF2, // 11FF2..11FFE; UNKNOWN 6709 0x11FFF, // 11FFF ; TAMIL 6710 0x12000, // 12000..12399; CUNEIFORM 6711 0x1239A, // 1239A..123FF; UNKNOWN 6712 0x12400, // 12400..1246E; CUNEIFORM 6713 0x1246F, // 1246F ; UNKNOWN 6714 0x12470, // 12470..12474; CUNEIFORM 6715 0x12475, // 12475..1247F; UNKNOWN 6716 0x12480, // 12480..12543; CUNEIFORM 6717 0x12544, // 12544..12F8F; UNKNOWN 6718 0x12F90, // 12F90..12FF2; CYPRO_MINOAN 6719 0x12FF3, // 12FF3..12FFF; UNKNOWN 6720 0x13000, // 13000..13455; EGYPTIAN_HIEROGLYPHS 6721 0x13456, // 13456..143FF; UNKNOWN 6722 0x14400, // 14400..14646; ANATOLIAN_HIEROGLYPHS 6723 0x14647, // 14647..167FF; UNKNOWN 6724 0x16800, // 16800..16A38; BAMUM 6725 0x16A39, // 16A39..16A3F; UNKNOWN 6726 0x16A40, // 16A40..16A5E; MRO 6727 0x16A5F, // 16A5F ; UNKNOWN 6728 0x16A60, // 16A60..16A69; MRO 6729 0x16A6A, // 16A6A..16A6D; UNKNOWN 6730 0x16A6E, // 16A6E..16A6F; MRO 6731 0x16A70, // 16A70..16ABE; TANGSA 6732 0x16ABF, // 16ABF ; UNKNOWN 6733 0x16AC0, // 16AC0..16AC9; TANGSA 6734 0x16ACA, // 16ACA..16ACF; UNKNOWN 6735 0x16AD0, // 16AD0..16AED; BASSA_VAH 6736 0x16AEE, // 16AEE..16AEF; UNKNOWN 6737 0x16AF0, // 16AF0..16AF5; BASSA_VAH 6738 0x16AF6, // 16AF6..16AFF; UNKNOWN 6739 0x16B00, // 16B00..16B45; PAHAWH_HMONG 6740 0x16B46, // 16B46..16B4F; UNKNOWN 6741 0x16B50, // 16B50..16B59; PAHAWH_HMONG 6742 0x16B5A, // 16B5A ; UNKNOWN 6743 0x16B5B, // 16B5B..16B61; PAHAWH_HMONG 6744 0x16B62, // 16B62 ; UNKNOWN 6745 0x16B63, // 16B63..16B77; PAHAWH_HMONG 6746 0x16B78, // 16B78..16B7C; UNKNOWN 6747 0x16B7D, // 16B7D..16B8F; PAHAWH_HMONG 6748 0x16B90, // 16B90..16E3F; UNKNOWN 6749 0x16E40, // 16E40..16E9A; MEDEFAIDRIN 6750 0x16E9B, // 16E9B..16EFF; UNKNOWN 6751 0x16F00, // 16F00..16F4A; MIAO 6752 0x16F4B, // 16F4B..16F4E; UNKNOWN 6753 0x16F4F, // 16F4F..16F87; MIAO 6754 0x16F88, // 16F88..16F8E; UNKNOWN 6755 0x16F8F, // 16F8F..16F9F; MIAO 6756 0x16FA0, // 16FA0..16FDF; UNKNOWN 6757 0x16FE0, // 16FE0 ; TANGUT 6758 0x16FE1, // 16FE1 ; NUSHU 6759 0x16FE2, // 16FE2..16FE3; HAN 6760 0x16FE4, // 16FE4 ; KHITAN_SMALL_SCRIPT 6761 0x16FE5, // 16FE5..16FEF; UNKNOWN 6762 0x16FF0, // 16FF0..16FF1; HAN 6763 0x16FF2, // 16FF2..16FFF; UNKNOWN 6764 0x17000, // 17000..187F7; TANGUT 6765 0x187F8, // 187F8..187FF; UNKNOWN 6766 0x18800, // 18800..18AFF; TANGUT 6767 0x18B00, // 18B00..18CD5; KHITAN_SMALL_SCRIPT 6768 0x18CD6, // 18CD6..18CFF; UNKNOWN 6769 0x18D00, // 18D00..18D08; TANGUT 6770 0x18D09, // 18D09..1AFEF; UNKNOWN 6771 0x1AFF0, // 1AFF0..1AFF3; KATAKANA 6772 0x1AFF4, // 1AFF4 ; UNKNOWN 6773 0x1AFF5, // 1AFF5..1AFFB; KATAKANA 6774 0x1AFFC, // 1AFFC ; UNKNOWN 6775 0x1AFFD, // 1AFFD..1AFFE; KATAKANA 6776 0x1AFFF, // 1AFFF ; UNKNOWN 6777 0x1B000, // 1B000 ; KATAKANA 6778 0x1B001, // 1B001..1B11F; HIRAGANA 6779 0x1B120, // 1B120..1B122; KATAKANA 6780 0x1B123, // 1B123..1B131; UNKNOWN 6781 0x1B132, // 1B132 ; HIRAGANA 6782 0x1B133, // 1B133..1B14F; UNKNOWN 6783 0x1B150, // 1B150..1B152; HIRAGANA 6784 0x1B153, // 1B153..1B154; UNKNOWN 6785 0x1B155, // 1B155 ; KATAKANA 6786 0x1B156, // 1B156..1B163; UNKNOWN 6787 0x1B164, // 1B164..1B167; KATAKANA 6788 0x1B168, // 1B168..1B16F; UNKNOWN 6789 0x1B170, // 1B170..1B2FB; NUSHU 6790 0x1B2FC, // 1B2FC..1BBFF; UNKNOWN 6791 0x1BC00, // 1BC00..1BC6A; DUPLOYAN 6792 0x1BC6B, // 1BC6B..1BC6F; UNKNOWN 6793 0x1BC70, // 1BC70..1BC7C; DUPLOYAN 6794 0x1BC7D, // 1BC7D..1BC7F; UNKNOWN 6795 0x1BC80, // 1BC80..1BC88; DUPLOYAN 6796 0x1BC89, // 1BC89..1BC8F; UNKNOWN 6797 0x1BC90, // 1BC90..1BC99; DUPLOYAN 6798 0x1BC9A, // 1BC9A..1BC9B; UNKNOWN 6799 0x1BC9C, // 1BC9C..1BC9F; DUPLOYAN 6800 0x1BCA0, // 1BCA0..1BCA3; COMMON 6801 0x1BCA4, // 1BCA4..1CEFF; UNKNOWN 6802 0x1CF00, // 1CF00..1CF2D; INHERITED 6803 0x1CF2E, // 1CF2E..1CF2F; UNKNOWN 6804 0x1CF30, // 1CF30..1CF46; INHERITED 6805 0x1CF47, // 1CF47..1CF4F; UNKNOWN 6806 0x1CF50, // 1CF50..1CFC3; COMMON 6807 0x1CFC4, // 1CFC4..1CFFF; UNKNOWN 6808 0x1D000, // 1D000..1D0F5; COMMON 6809 0x1D0F6, // 1D0F6..1D0FF; UNKNOWN 6810 0x1D100, // 1D100..1D126; COMMON 6811 0x1D127, // 1D127..1D128; UNKNOWN 6812 0x1D129, // 1D129..1D166; COMMON 6813 0x1D167, // 1D167..1D169; INHERITED 6814 0x1D16A, // 1D16A..1D17A; COMMON 6815 0x1D17B, // 1D17B..1D182; INHERITED 6816 0x1D183, // 1D183..1D184; COMMON 6817 0x1D185, // 1D185..1D18B; INHERITED 6818 0x1D18C, // 1D18C..1D1A9; COMMON 6819 0x1D1AA, // 1D1AA..1D1AD; INHERITED 6820 0x1D1AE, // 1D1AE..1D1EA; COMMON 6821 0x1D1EB, // 1D1EB..1D1FF; UNKNOWN 6822 0x1D200, // 1D200..1D245; GREEK 6823 0x1D246, // 1D246..1D2BF; UNKNOWN 6824 0x1D2C0, // 1D2C0..1D2D3; COMMON 6825 0x1D2D4, // 1D2D4..1D2DF; UNKNOWN 6826 0x1D2E0, // 1D2E0..1D2F3; COMMON 6827 0x1D2F4, // 1D2F4..1D2FF; UNKNOWN 6828 0x1D300, // 1D300..1D356; COMMON 6829 0x1D357, // 1D357..1D35F; UNKNOWN 6830 0x1D360, // 1D360..1D378; COMMON 6831 0x1D379, // 1D379..1D3FF; UNKNOWN 6832 0x1D400, // 1D400..1D454; COMMON 6833 0x1D455, // 1D455 ; UNKNOWN 6834 0x1D456, // 1D456..1D49C; COMMON 6835 0x1D49D, // 1D49D ; UNKNOWN 6836 0x1D49E, // 1D49E..1D49F; COMMON 6837 0x1D4A0, // 1D4A0..1D4A1; UNKNOWN 6838 0x1D4A2, // 1D4A2 ; COMMON 6839 0x1D4A3, // 1D4A3..1D4A4; UNKNOWN 6840 0x1D4A5, // 1D4A5..1D4A6; COMMON 6841 0x1D4A7, // 1D4A7..1D4A8; UNKNOWN 6842 0x1D4A9, // 1D4A9..1D4AC; COMMON 6843 0x1D4AD, // 1D4AD ; UNKNOWN 6844 0x1D4AE, // 1D4AE..1D4B9; COMMON 6845 0x1D4BA, // 1D4BA ; UNKNOWN 6846 0x1D4BB, // 1D4BB ; COMMON 6847 0x1D4BC, // 1D4BC ; UNKNOWN 6848 0x1D4BD, // 1D4BD..1D4C3; COMMON 6849 0x1D4C4, // 1D4C4 ; UNKNOWN 6850 0x1D4C5, // 1D4C5..1D505; COMMON 6851 0x1D506, // 1D506 ; UNKNOWN 6852 0x1D507, // 1D507..1D50A; COMMON 6853 0x1D50B, // 1D50B..1D50C; UNKNOWN 6854 0x1D50D, // 1D50D..1D514; COMMON 6855 0x1D515, // 1D515 ; UNKNOWN 6856 0x1D516, // 1D516..1D51C; COMMON 6857 0x1D51D, // 1D51D ; UNKNOWN 6858 0x1D51E, // 1D51E..1D539; COMMON 6859 0x1D53A, // 1D53A ; UNKNOWN 6860 0x1D53B, // 1D53B..1D53E; COMMON 6861 0x1D53F, // 1D53F ; UNKNOWN 6862 0x1D540, // 1D540..1D544; COMMON 6863 0x1D545, // 1D545 ; UNKNOWN 6864 0x1D546, // 1D546 ; COMMON 6865 0x1D547, // 1D547..1D549; UNKNOWN 6866 0x1D54A, // 1D54A..1D550; COMMON 6867 0x1D551, // 1D551 ; UNKNOWN 6868 0x1D552, // 1D552..1D6A5; COMMON 6869 0x1D6A6, // 1D6A6..1D6A7; UNKNOWN 6870 0x1D6A8, // 1D6A8..1D7CB; COMMON 6871 0x1D7CC, // 1D7CC..1D7CD; UNKNOWN 6872 0x1D7CE, // 1D7CE..1D7FF; COMMON 6873 0x1D800, // 1D800..1DA8B; SIGNWRITING 6874 0x1DA8C, // 1DA8C..1DA9A; UNKNOWN 6875 0x1DA9B, // 1DA9B..1DA9F; SIGNWRITING 6876 0x1DAA0, // 1DAA0 ; UNKNOWN 6877 0x1DAA1, // 1DAA1..1DAAF; SIGNWRITING 6878 0x1DAB0, // 1DAB0..1DEFF; UNKNOWN 6879 0x1DF00, // 1DF00..1DF1E; LATIN 6880 0x1DF1F, // 1DF1F..1DF24; UNKNOWN 6881 0x1DF25, // 1DF25..1DF2A; LATIN 6882 0x1DF2B, // 1DF2B..1DFFF; UNKNOWN 6883 0x1E000, // 1E000..1E006; GLAGOLITIC 6884 0x1E007, // 1E007 ; UNKNOWN 6885 0x1E008, // 1E008..1E018; GLAGOLITIC 6886 0x1E019, // 1E019..1E01A; UNKNOWN 6887 0x1E01B, // 1E01B..1E021; GLAGOLITIC 6888 0x1E022, // 1E022 ; UNKNOWN 6889 0x1E023, // 1E023..1E024; GLAGOLITIC 6890 0x1E025, // 1E025 ; UNKNOWN 6891 0x1E026, // 1E026..1E02A; GLAGOLITIC 6892 0x1E02B, // 1E02B..1E02F; UNKNOWN 6893 0x1E030, // 1E030..1E06D; CYRILLIC 6894 0x1E06E, // 1E06E..1E08E; UNKNOWN 6895 0x1E08F, // 1E08F ; CYRILLIC 6896 0x1E090, // 1E090..1E0FF; UNKNOWN 6897 0x1E100, // 1E100..1E12C; NYIAKENG_PUACHUE_HMONG 6898 0x1E12D, // 1E12D..1E12F; UNKNOWN 6899 0x1E130, // 1E130..1E13D; NYIAKENG_PUACHUE_HMONG 6900 0x1E13E, // 1E13E..1E13F; UNKNOWN 6901 0x1E140, // 1E140..1E149; NYIAKENG_PUACHUE_HMONG 6902 0x1E14A, // 1E14A..1E14D; UNKNOWN 6903 0x1E14E, // 1E14E..1E14F; NYIAKENG_PUACHUE_HMONG 6904 0x1E150, // 1E150..1E28F; UNKNOWN 6905 0x1E290, // 1E290..1E2AE; TOTO 6906 0x1E2AF, // 1E2AF..1E2BF; UNKNOWN 6907 0x1E2C0, // 1E2C0..1E2F9; WANCHO 6908 0x1E2FA, // 1E2FA..1E2FE; UNKNOWN 6909 0x1E2FF, // 1E2FF ; WANCHO 6910 0x1E300, // 1E300..1E4CF; UNKNOWN 6911 0x1E4D0, // 1E4D0..1E4F9; NAG_MUNDARI 6912 0x1E4FA, // 1E4FA..1E7DF; UNKNOWN 6913 0x1E7E0, // 1E7E0..1E7E6; ETHIOPIC 6914 0x1E7E7, // 1E7E7 ; UNKNOWN 6915 0x1E7E8, // 1E7E8..1E7EB; ETHIOPIC 6916 0x1E7EC, // 1E7EC ; UNKNOWN 6917 0x1E7ED, // 1E7ED..1E7EE; ETHIOPIC 6918 0x1E7EF, // 1E7EF ; UNKNOWN 6919 0x1E7F0, // 1E7F0..1E7FE; ETHIOPIC 6920 0x1E7FF, // 1E7FF ; UNKNOWN 6921 0x1E800, // 1E800..1E8C4; MENDE_KIKAKUI 6922 0x1E8C5, // 1E8C5..1E8C6; UNKNOWN 6923 0x1E8C7, // 1E8C7..1E8D6; MENDE_KIKAKUI 6924 0x1E8D7, // 1E8D7..1E8FF; UNKNOWN 6925 0x1E900, // 1E900..1E94B; ADLAM 6926 0x1E94C, // 1E94C..1E94F; UNKNOWN 6927 0x1E950, // 1E950..1E959; ADLAM 6928 0x1E95A, // 1E95A..1E95D; UNKNOWN 6929 0x1E95E, // 1E95E..1E95F; ADLAM 6930 0x1E960, // 1E960..1EC70; UNKNOWN 6931 0x1EC71, // 1EC71..1ECB4; COMMON 6932 0x1ECB5, // 1ECB5..1ED00; UNKNOWN 6933 0x1ED01, // 1ED01..1ED3D; COMMON 6934 0x1ED3E, // 1ED3E..1EDFF; UNKNOWN 6935 0x1EE00, // 1EE00..1EE03; ARABIC 6936 0x1EE04, // 1EE04 ; UNKNOWN 6937 0x1EE05, // 1EE05..1EE1F; ARABIC 6938 0x1EE20, // 1EE20 ; UNKNOWN 6939 0x1EE21, // 1EE21..1EE22; ARABIC 6940 0x1EE23, // 1EE23 ; UNKNOWN 6941 0x1EE24, // 1EE24 ; ARABIC 6942 0x1EE25, // 1EE25..1EE26; UNKNOWN 6943 0x1EE27, // 1EE27 ; ARABIC 6944 0x1EE28, // 1EE28 ; UNKNOWN 6945 0x1EE29, // 1EE29..1EE32; ARABIC 6946 0x1EE33, // 1EE33 ; UNKNOWN 6947 0x1EE34, // 1EE34..1EE37; ARABIC 6948 0x1EE38, // 1EE38 ; UNKNOWN 6949 0x1EE39, // 1EE39 ; ARABIC 6950 0x1EE3A, // 1EE3A ; UNKNOWN 6951 0x1EE3B, // 1EE3B ; ARABIC 6952 0x1EE3C, // 1EE3C..1EE41; UNKNOWN 6953 0x1EE42, // 1EE42 ; ARABIC 6954 0x1EE43, // 1EE43..1EE46; UNKNOWN 6955 0x1EE47, // 1EE47 ; ARABIC 6956 0x1EE48, // 1EE48 ; UNKNOWN 6957 0x1EE49, // 1EE49 ; ARABIC 6958 0x1EE4A, // 1EE4A ; UNKNOWN 6959 0x1EE4B, // 1EE4B ; ARABIC 6960 0x1EE4C, // 1EE4C ; UNKNOWN 6961 0x1EE4D, // 1EE4D..1EE4F; ARABIC 6962 0x1EE50, // 1EE50 ; UNKNOWN 6963 0x1EE51, // 1EE51..1EE52; ARABIC 6964 0x1EE53, // 1EE53 ; UNKNOWN 6965 0x1EE54, // 1EE54 ; ARABIC 6966 0x1EE55, // 1EE55..1EE56; UNKNOWN 6967 0x1EE57, // 1EE57 ; ARABIC 6968 0x1EE58, // 1EE58 ; UNKNOWN 6969 0x1EE59, // 1EE59 ; ARABIC 6970 0x1EE5A, // 1EE5A ; UNKNOWN 6971 0x1EE5B, // 1EE5B ; ARABIC 6972 0x1EE5C, // 1EE5C ; UNKNOWN 6973 0x1EE5D, // 1EE5D ; ARABIC 6974 0x1EE5E, // 1EE5E ; UNKNOWN 6975 0x1EE5F, // 1EE5F ; ARABIC 6976 0x1EE60, // 1EE60 ; UNKNOWN 6977 0x1EE61, // 1EE61..1EE62; ARABIC 6978 0x1EE63, // 1EE63 ; UNKNOWN 6979 0x1EE64, // 1EE64 ; ARABIC 6980 0x1EE65, // 1EE65..1EE66; UNKNOWN 6981 0x1EE67, // 1EE67..1EE6A; ARABIC 6982 0x1EE6B, // 1EE6B ; UNKNOWN 6983 0x1EE6C, // 1EE6C..1EE72; ARABIC 6984 0x1EE73, // 1EE73 ; UNKNOWN 6985 0x1EE74, // 1EE74..1EE77; ARABIC 6986 0x1EE78, // 1EE78 ; UNKNOWN 6987 0x1EE79, // 1EE79..1EE7C; ARABIC 6988 0x1EE7D, // 1EE7D ; UNKNOWN 6989 0x1EE7E, // 1EE7E ; ARABIC 6990 0x1EE7F, // 1EE7F ; UNKNOWN 6991 0x1EE80, // 1EE80..1EE89; ARABIC 6992 0x1EE8A, // 1EE8A ; UNKNOWN 6993 0x1EE8B, // 1EE8B..1EE9B; ARABIC 6994 0x1EE9C, // 1EE9C..1EEA0; UNKNOWN 6995 0x1EEA1, // 1EEA1..1EEA3; ARABIC 6996 0x1EEA4, // 1EEA4 ; UNKNOWN 6997 0x1EEA5, // 1EEA5..1EEA9; ARABIC 6998 0x1EEAA, // 1EEAA ; UNKNOWN 6999 0x1EEAB, // 1EEAB..1EEBB; ARABIC 7000 0x1EEBC, // 1EEBC..1EEEF; UNKNOWN 7001 0x1EEF0, // 1EEF0..1EEF1; ARABIC 7002 0x1EEF2, // 1EEF2..1EFFF; UNKNOWN 7003 0x1F000, // 1F000..1F02B; COMMON 7004 0x1F02C, // 1F02C..1F02F; UNKNOWN 7005 0x1F030, // 1F030..1F093; COMMON 7006 0x1F094, // 1F094..1F09F; UNKNOWN 7007 0x1F0A0, // 1F0A0..1F0AE; COMMON 7008 0x1F0AF, // 1F0AF..1F0B0; UNKNOWN 7009 0x1F0B1, // 1F0B1..1F0BF; COMMON 7010 0x1F0C0, // 1F0C0 ; UNKNOWN 7011 0x1F0C1, // 1F0C1..1F0CF; COMMON 7012 0x1F0D0, // 1F0D0 ; UNKNOWN 7013 0x1F0D1, // 1F0D1..1F0F5; COMMON 7014 0x1F0F6, // 1F0F6..1F0FF; UNKNOWN 7015 0x1F100, // 1F100..1F1AD; COMMON 7016 0x1F1AE, // 1F1AE..1F1E5; UNKNOWN 7017 0x1F1E6, // 1F1E6..1F1FF; COMMON 7018 0x1F200, // 1F200 ; HIRAGANA 7019 0x1F201, // 1F201..1F202; COMMON 7020 0x1F203, // 1F203..1F20F; UNKNOWN 7021 0x1F210, // 1F210..1F23B; COMMON 7022 0x1F23C, // 1F23C..1F23F; UNKNOWN 7023 0x1F240, // 1F240..1F248; COMMON 7024 0x1F249, // 1F249..1F24F; UNKNOWN 7025 0x1F250, // 1F250..1F251; COMMON 7026 0x1F252, // 1F252..1F25F; UNKNOWN 7027 0x1F260, // 1F260..1F265; COMMON 7028 0x1F266, // 1F266..1F2FF; UNKNOWN 7029 0x1F300, // 1F300..1F6D7; COMMON 7030 0x1F6D8, // 1F6D8..1F6DB; UNKNOWN 7031 0x1F6DC, // 1F6DC..1F6EC; COMMON 7032 0x1F6ED, // 1F6ED..1F6EF; UNKNOWN 7033 0x1F6F0, // 1F6F0..1F6FC; COMMON 7034 0x1F6FD, // 1F6FD..1F6FF; UNKNOWN 7035 0x1F700, // 1F700..1F776; COMMON 7036 0x1F777, // 1F777..1F77A; UNKNOWN 7037 0x1F77B, // 1F77B..1F7D9; COMMON 7038 0x1F7DA, // 1F7DA..1F7DF; UNKNOWN 7039 0x1F7E0, // 1F7E0..1F7EB; COMMON 7040 0x1F7EC, // 1F7EC..1F7EF; UNKNOWN 7041 0x1F7F0, // 1F7F0 ; COMMON 7042 0x1F7F1, // 1F7F1..1F7FF; UNKNOWN 7043 0x1F800, // 1F800..1F80B; COMMON 7044 0x1F80C, // 1F80C..1F80F; UNKNOWN 7045 0x1F810, // 1F810..1F847; COMMON 7046 0x1F848, // 1F848..1F84F; UNKNOWN 7047 0x1F850, // 1F850..1F859; COMMON 7048 0x1F85A, // 1F85A..1F85F; UNKNOWN 7049 0x1F860, // 1F860..1F887; COMMON 7050 0x1F888, // 1F888..1F88F; UNKNOWN 7051 0x1F890, // 1F890..1F8AD; COMMON 7052 0x1F8AE, // 1F8AE..1F8AF; UNKNOWN 7053 0x1F8B0, // 1F8B0..1F8B1; COMMON 7054 0x1F8B2, // 1F8B2..1F8FF; UNKNOWN 7055 0x1F900, // 1F900..1FA53; COMMON 7056 0x1FA54, // 1FA54..1FA5F; UNKNOWN 7057 0x1FA60, // 1FA60..1FA6D; COMMON 7058 0x1FA6E, // 1FA6E..1FA6F; UNKNOWN 7059 0x1FA70, // 1FA70..1FA7C; COMMON 7060 0x1FA7D, // 1FA7D..1FA7F; UNKNOWN 7061 0x1FA80, // 1FA80..1FA88; COMMON 7062 0x1FA89, // 1FA89..1FA8F; UNKNOWN 7063 0x1FA90, // 1FA90..1FABD; COMMON 7064 0x1FABE, // 1FABE ; UNKNOWN 7065 0x1FABF, // 1FABF..1FAC5; COMMON 7066 0x1FAC6, // 1FAC6..1FACD; UNKNOWN 7067 0x1FACE, // 1FACE..1FADB; COMMON 7068 0x1FADC, // 1FADC..1FADF; UNKNOWN 7069 0x1FAE0, // 1FAE0..1FAE8; COMMON 7070 0x1FAE9, // 1FAE9..1FAEF; UNKNOWN 7071 0x1FAF0, // 1FAF0..1FAF8; COMMON 7072 0x1FAF9, // 1FAF9..1FAFF; UNKNOWN 7073 0x1FB00, // 1FB00..1FB92; COMMON 7074 0x1FB93, // 1FB93 ; UNKNOWN 7075 0x1FB94, // 1FB94..1FBCA; COMMON 7076 0x1FBCB, // 1FBCB..1FBEF; UNKNOWN 7077 0x1FBF0, // 1FBF0..1FBF9; COMMON 7078 0x1FBFA, // 1FBFA..1FFFF; UNKNOWN 7079 0x20000, // 20000..2A6DF; HAN 7080 0x2A6E0, // 2A6E0..2A6FF; UNKNOWN 7081 0x2A700, // 2A700..2B739; HAN 7082 0x2B73A, // 2B73A..2B73F; UNKNOWN 7083 0x2B740, // 2B740..2B81D; HAN 7084 0x2B81E, // 2B81E..2B81F; UNKNOWN 7085 0x2B820, // 2B820..2CEA1; HAN 7086 0x2CEA2, // 2CEA2..2CEAF; UNKNOWN 7087 0x2CEB0, // 2CEB0..2EBE0; HAN 7088 0x2EBE1, // 2EBE1..2F7FF; UNKNOWN 7089 0x2F800, // 2F800..2FA1D; HAN 7090 0x2FA1E, // 2FA1E..2FFFF; UNKNOWN 7091 0x30000, // 30000..3134A; HAN 7092 0x3134B, // 3134B..3134F; UNKNOWN 7093 0x31350, // 31350..323AF; HAN 7094 0x323B0, // 323B0..E0000; UNKNOWN 7095 0xE0001, // E0001 ; COMMON 7096 0xE0002, // E0002..E001F; UNKNOWN 7097 0xE0020, // E0020..E007F; COMMON 7098 0xE0080, // E0080..E00FF; UNKNOWN 7099 0xE0100, // E0100..E01EF; INHERITED 7100 0xE01F0, // E01F0..10FFFF; UNKNOWN 7101 }; 7102 7103 private static final UnicodeScript[] scripts = { 7104 COMMON, // 0000..0040 7105 LATIN, // 0041..005A 7106 COMMON, // 005B..0060 7107 LATIN, // 0061..007A 7108 COMMON, // 007B..00A9 7109 LATIN, // 00AA 7110 COMMON, // 00AB..00B9 7111 LATIN, // 00BA 7112 COMMON, // 00BB..00BF 7113 LATIN, // 00C0..00D6 7114 COMMON, // 00D7 7115 LATIN, // 00D8..00F6 7116 COMMON, // 00F7 7117 LATIN, // 00F8..02B8 7118 COMMON, // 02B9..02DF 7119 LATIN, // 02E0..02E4 7120 COMMON, // 02E5..02E9 7121 BOPOMOFO, // 02EA..02EB 7122 COMMON, // 02EC..02FF 7123 INHERITED, // 0300..036F 7124 GREEK, // 0370..0373 7125 COMMON, // 0374 7126 GREEK, // 0375..0377 7127 UNKNOWN, // 0378..0379 7128 GREEK, // 037A..037D 7129 COMMON, // 037E 7130 GREEK, // 037F 7131 UNKNOWN, // 0380..0383 7132 GREEK, // 0384 7133 COMMON, // 0385 7134 GREEK, // 0386 7135 COMMON, // 0387 7136 GREEK, // 0388..038A 7137 UNKNOWN, // 038B 7138 GREEK, // 038C 7139 UNKNOWN, // 038D 7140 GREEK, // 038E..03A1 7141 UNKNOWN, // 03A2 7142 GREEK, // 03A3..03E1 7143 COPTIC, // 03E2..03EF 7144 GREEK, // 03F0..03FF 7145 CYRILLIC, // 0400..0484 7146 INHERITED, // 0485..0486 7147 CYRILLIC, // 0487..052F 7148 UNKNOWN, // 0530 7149 ARMENIAN, // 0531..0556 7150 UNKNOWN, // 0557..0558 7151 ARMENIAN, // 0559..058A 7152 UNKNOWN, // 058B..058C 7153 ARMENIAN, // 058D..058F 7154 UNKNOWN, // 0590 7155 HEBREW, // 0591..05C7 7156 UNKNOWN, // 05C8..05CF 7157 HEBREW, // 05D0..05EA 7158 UNKNOWN, // 05EB..05EE 7159 HEBREW, // 05EF..05F4 7160 UNKNOWN, // 05F5..05FF 7161 ARABIC, // 0600..0604 7162 COMMON, // 0605 7163 ARABIC, // 0606..060B 7164 COMMON, // 060C 7165 ARABIC, // 060D..061A 7166 COMMON, // 061B 7167 ARABIC, // 061C..061E 7168 COMMON, // 061F 7169 ARABIC, // 0620..063F 7170 COMMON, // 0640 7171 ARABIC, // 0641..064A 7172 INHERITED, // 064B..0655 7173 ARABIC, // 0656..066F 7174 INHERITED, // 0670 7175 ARABIC, // 0671..06DC 7176 COMMON, // 06DD 7177 ARABIC, // 06DE..06FF 7178 SYRIAC, // 0700..070D 7179 UNKNOWN, // 070E 7180 SYRIAC, // 070F..074A 7181 UNKNOWN, // 074B..074C 7182 SYRIAC, // 074D..074F 7183 ARABIC, // 0750..077F 7184 THAANA, // 0780..07B1 7185 UNKNOWN, // 07B2..07BF 7186 NKO, // 07C0..07FA 7187 UNKNOWN, // 07FB..07FC 7188 NKO, // 07FD..07FF 7189 SAMARITAN, // 0800..082D 7190 UNKNOWN, // 082E..082F 7191 SAMARITAN, // 0830..083E 7192 UNKNOWN, // 083F 7193 MANDAIC, // 0840..085B 7194 UNKNOWN, // 085C..085D 7195 MANDAIC, // 085E 7196 UNKNOWN, // 085F 7197 SYRIAC, // 0860..086A 7198 UNKNOWN, // 086B..086F 7199 ARABIC, // 0870..088E 7200 UNKNOWN, // 088F 7201 ARABIC, // 0890..0891 7202 UNKNOWN, // 0892..0897 7203 ARABIC, // 0898..08E1 7204 COMMON, // 08E2 7205 ARABIC, // 08E3..08FF 7206 DEVANAGARI, // 0900..0950 7207 INHERITED, // 0951..0954 7208 DEVANAGARI, // 0955..0963 7209 COMMON, // 0964..0965 7210 DEVANAGARI, // 0966..097F 7211 BENGALI, // 0980..0983 7212 UNKNOWN, // 0984 7213 BENGALI, // 0985..098C 7214 UNKNOWN, // 098D..098E 7215 BENGALI, // 098F..0990 7216 UNKNOWN, // 0991..0992 7217 BENGALI, // 0993..09A8 7218 UNKNOWN, // 09A9 7219 BENGALI, // 09AA..09B0 7220 UNKNOWN, // 09B1 7221 BENGALI, // 09B2 7222 UNKNOWN, // 09B3..09B5 7223 BENGALI, // 09B6..09B9 7224 UNKNOWN, // 09BA..09BB 7225 BENGALI, // 09BC..09C4 7226 UNKNOWN, // 09C5..09C6 7227 BENGALI, // 09C7..09C8 7228 UNKNOWN, // 09C9..09CA 7229 BENGALI, // 09CB..09CE 7230 UNKNOWN, // 09CF..09D6 7231 BENGALI, // 09D7 7232 UNKNOWN, // 09D8..09DB 7233 BENGALI, // 09DC..09DD 7234 UNKNOWN, // 09DE 7235 BENGALI, // 09DF..09E3 7236 UNKNOWN, // 09E4..09E5 7237 BENGALI, // 09E6..09FE 7238 UNKNOWN, // 09FF..0A00 7239 GURMUKHI, // 0A01..0A03 7240 UNKNOWN, // 0A04 7241 GURMUKHI, // 0A05..0A0A 7242 UNKNOWN, // 0A0B..0A0E 7243 GURMUKHI, // 0A0F..0A10 7244 UNKNOWN, // 0A11..0A12 7245 GURMUKHI, // 0A13..0A28 7246 UNKNOWN, // 0A29 7247 GURMUKHI, // 0A2A..0A30 7248 UNKNOWN, // 0A31 7249 GURMUKHI, // 0A32..0A33 7250 UNKNOWN, // 0A34 7251 GURMUKHI, // 0A35..0A36 7252 UNKNOWN, // 0A37 7253 GURMUKHI, // 0A38..0A39 7254 UNKNOWN, // 0A3A..0A3B 7255 GURMUKHI, // 0A3C 7256 UNKNOWN, // 0A3D 7257 GURMUKHI, // 0A3E..0A42 7258 UNKNOWN, // 0A43..0A46 7259 GURMUKHI, // 0A47..0A48 7260 UNKNOWN, // 0A49..0A4A 7261 GURMUKHI, // 0A4B..0A4D 7262 UNKNOWN, // 0A4E..0A50 7263 GURMUKHI, // 0A51 7264 UNKNOWN, // 0A52..0A58 7265 GURMUKHI, // 0A59..0A5C 7266 UNKNOWN, // 0A5D 7267 GURMUKHI, // 0A5E 7268 UNKNOWN, // 0A5F..0A65 7269 GURMUKHI, // 0A66..0A76 7270 UNKNOWN, // 0A77..0A80 7271 GUJARATI, // 0A81..0A83 7272 UNKNOWN, // 0A84 7273 GUJARATI, // 0A85..0A8D 7274 UNKNOWN, // 0A8E 7275 GUJARATI, // 0A8F..0A91 7276 UNKNOWN, // 0A92 7277 GUJARATI, // 0A93..0AA8 7278 UNKNOWN, // 0AA9 7279 GUJARATI, // 0AAA..0AB0 7280 UNKNOWN, // 0AB1 7281 GUJARATI, // 0AB2..0AB3 7282 UNKNOWN, // 0AB4 7283 GUJARATI, // 0AB5..0AB9 7284 UNKNOWN, // 0ABA..0ABB 7285 GUJARATI, // 0ABC..0AC5 7286 UNKNOWN, // 0AC6 7287 GUJARATI, // 0AC7..0AC9 7288 UNKNOWN, // 0ACA 7289 GUJARATI, // 0ACB..0ACD 7290 UNKNOWN, // 0ACE..0ACF 7291 GUJARATI, // 0AD0 7292 UNKNOWN, // 0AD1..0ADF 7293 GUJARATI, // 0AE0..0AE3 7294 UNKNOWN, // 0AE4..0AE5 7295 GUJARATI, // 0AE6..0AF1 7296 UNKNOWN, // 0AF2..0AF8 7297 GUJARATI, // 0AF9..0AFF 7298 UNKNOWN, // 0B00 7299 ORIYA, // 0B01..0B03 7300 UNKNOWN, // 0B04 7301 ORIYA, // 0B05..0B0C 7302 UNKNOWN, // 0B0D..0B0E 7303 ORIYA, // 0B0F..0B10 7304 UNKNOWN, // 0B11..0B12 7305 ORIYA, // 0B13..0B28 7306 UNKNOWN, // 0B29 7307 ORIYA, // 0B2A..0B30 7308 UNKNOWN, // 0B31 7309 ORIYA, // 0B32..0B33 7310 UNKNOWN, // 0B34 7311 ORIYA, // 0B35..0B39 7312 UNKNOWN, // 0B3A..0B3B 7313 ORIYA, // 0B3C..0B44 7314 UNKNOWN, // 0B45..0B46 7315 ORIYA, // 0B47..0B48 7316 UNKNOWN, // 0B49..0B4A 7317 ORIYA, // 0B4B..0B4D 7318 UNKNOWN, // 0B4E..0B54 7319 ORIYA, // 0B55..0B57 7320 UNKNOWN, // 0B58..0B5B 7321 ORIYA, // 0B5C..0B5D 7322 UNKNOWN, // 0B5E 7323 ORIYA, // 0B5F..0B63 7324 UNKNOWN, // 0B64..0B65 7325 ORIYA, // 0B66..0B77 7326 UNKNOWN, // 0B78..0B81 7327 TAMIL, // 0B82..0B83 7328 UNKNOWN, // 0B84 7329 TAMIL, // 0B85..0B8A 7330 UNKNOWN, // 0B8B..0B8D 7331 TAMIL, // 0B8E..0B90 7332 UNKNOWN, // 0B91 7333 TAMIL, // 0B92..0B95 7334 UNKNOWN, // 0B96..0B98 7335 TAMIL, // 0B99..0B9A 7336 UNKNOWN, // 0B9B 7337 TAMIL, // 0B9C 7338 UNKNOWN, // 0B9D 7339 TAMIL, // 0B9E..0B9F 7340 UNKNOWN, // 0BA0..0BA2 7341 TAMIL, // 0BA3..0BA4 7342 UNKNOWN, // 0BA5..0BA7 7343 TAMIL, // 0BA8..0BAA 7344 UNKNOWN, // 0BAB..0BAD 7345 TAMIL, // 0BAE..0BB9 7346 UNKNOWN, // 0BBA..0BBD 7347 TAMIL, // 0BBE..0BC2 7348 UNKNOWN, // 0BC3..0BC5 7349 TAMIL, // 0BC6..0BC8 7350 UNKNOWN, // 0BC9 7351 TAMIL, // 0BCA..0BCD 7352 UNKNOWN, // 0BCE..0BCF 7353 TAMIL, // 0BD0 7354 UNKNOWN, // 0BD1..0BD6 7355 TAMIL, // 0BD7 7356 UNKNOWN, // 0BD8..0BE5 7357 TAMIL, // 0BE6..0BFA 7358 UNKNOWN, // 0BFB..0BFF 7359 TELUGU, // 0C00..0C0C 7360 UNKNOWN, // 0C0D 7361 TELUGU, // 0C0E..0C10 7362 UNKNOWN, // 0C11 7363 TELUGU, // 0C12..0C28 7364 UNKNOWN, // 0C29 7365 TELUGU, // 0C2A..0C39 7366 UNKNOWN, // 0C3A..0C3B 7367 TELUGU, // 0C3C..0C44 7368 UNKNOWN, // 0C45 7369 TELUGU, // 0C46..0C48 7370 UNKNOWN, // 0C49 7371 TELUGU, // 0C4A..0C4D 7372 UNKNOWN, // 0C4E..0C54 7373 TELUGU, // 0C55..0C56 7374 UNKNOWN, // 0C57 7375 TELUGU, // 0C58..0C5A 7376 UNKNOWN, // 0C5B..0C5C 7377 TELUGU, // 0C5D 7378 UNKNOWN, // 0C5E..0C5F 7379 TELUGU, // 0C60..0C63 7380 UNKNOWN, // 0C64..0C65 7381 TELUGU, // 0C66..0C6F 7382 UNKNOWN, // 0C70..0C76 7383 TELUGU, // 0C77..0C7F 7384 KANNADA, // 0C80..0C8C 7385 UNKNOWN, // 0C8D 7386 KANNADA, // 0C8E..0C90 7387 UNKNOWN, // 0C91 7388 KANNADA, // 0C92..0CA8 7389 UNKNOWN, // 0CA9 7390 KANNADA, // 0CAA..0CB3 7391 UNKNOWN, // 0CB4 7392 KANNADA, // 0CB5..0CB9 7393 UNKNOWN, // 0CBA..0CBB 7394 KANNADA, // 0CBC..0CC4 7395 UNKNOWN, // 0CC5 7396 KANNADA, // 0CC6..0CC8 7397 UNKNOWN, // 0CC9 7398 KANNADA, // 0CCA..0CCD 7399 UNKNOWN, // 0CCE..0CD4 7400 KANNADA, // 0CD5..0CD6 7401 UNKNOWN, // 0CD7..0CDC 7402 KANNADA, // 0CDD..0CDE 7403 UNKNOWN, // 0CDF 7404 KANNADA, // 0CE0..0CE3 7405 UNKNOWN, // 0CE4..0CE5 7406 KANNADA, // 0CE6..0CEF 7407 UNKNOWN, // 0CF0 7408 KANNADA, // 0CF1..0CF3 7409 UNKNOWN, // 0CF4..0CFF 7410 MALAYALAM, // 0D00..0D0C 7411 UNKNOWN, // 0D0D 7412 MALAYALAM, // 0D0E..0D10 7413 UNKNOWN, // 0D11 7414 MALAYALAM, // 0D12..0D44 7415 UNKNOWN, // 0D45 7416 MALAYALAM, // 0D46..0D48 7417 UNKNOWN, // 0D49 7418 MALAYALAM, // 0D4A..0D4F 7419 UNKNOWN, // 0D50..0D53 7420 MALAYALAM, // 0D54..0D63 7421 UNKNOWN, // 0D64..0D65 7422 MALAYALAM, // 0D66..0D7F 7423 UNKNOWN, // 0D80 7424 SINHALA, // 0D81..0D83 7425 UNKNOWN, // 0D84 7426 SINHALA, // 0D85..0D96 7427 UNKNOWN, // 0D97..0D99 7428 SINHALA, // 0D9A..0DB1 7429 UNKNOWN, // 0DB2 7430 SINHALA, // 0DB3..0DBB 7431 UNKNOWN, // 0DBC 7432 SINHALA, // 0DBD 7433 UNKNOWN, // 0DBE..0DBF 7434 SINHALA, // 0DC0..0DC6 7435 UNKNOWN, // 0DC7..0DC9 7436 SINHALA, // 0DCA 7437 UNKNOWN, // 0DCB..0DCE 7438 SINHALA, // 0DCF..0DD4 7439 UNKNOWN, // 0DD5 7440 SINHALA, // 0DD6 7441 UNKNOWN, // 0DD7 7442 SINHALA, // 0DD8..0DDF 7443 UNKNOWN, // 0DE0..0DE5 7444 SINHALA, // 0DE6..0DEF 7445 UNKNOWN, // 0DF0..0DF1 7446 SINHALA, // 0DF2..0DF4 7447 UNKNOWN, // 0DF5..0E00 7448 THAI, // 0E01..0E3A 7449 UNKNOWN, // 0E3B..0E3E 7450 COMMON, // 0E3F 7451 THAI, // 0E40..0E5B 7452 UNKNOWN, // 0E5C..0E80 7453 LAO, // 0E81..0E82 7454 UNKNOWN, // 0E83 7455 LAO, // 0E84 7456 UNKNOWN, // 0E85 7457 LAO, // 0E86..0E8A 7458 UNKNOWN, // 0E8B 7459 LAO, // 0E8C..0EA3 7460 UNKNOWN, // 0EA4 7461 LAO, // 0EA5 7462 UNKNOWN, // 0EA6 7463 LAO, // 0EA7..0EBD 7464 UNKNOWN, // 0EBE..0EBF 7465 LAO, // 0EC0..0EC4 7466 UNKNOWN, // 0EC5 7467 LAO, // 0EC6 7468 UNKNOWN, // 0EC7 7469 LAO, // 0EC8..0ECE 7470 UNKNOWN, // 0ECF 7471 LAO, // 0ED0..0ED9 7472 UNKNOWN, // 0EDA..0EDB 7473 LAO, // 0EDC..0EDF 7474 UNKNOWN, // 0EE0..0EFF 7475 TIBETAN, // 0F00..0F47 7476 UNKNOWN, // 0F48 7477 TIBETAN, // 0F49..0F6C 7478 UNKNOWN, // 0F6D..0F70 7479 TIBETAN, // 0F71..0F97 7480 UNKNOWN, // 0F98 7481 TIBETAN, // 0F99..0FBC 7482 UNKNOWN, // 0FBD 7483 TIBETAN, // 0FBE..0FCC 7484 UNKNOWN, // 0FCD 7485 TIBETAN, // 0FCE..0FD4 7486 COMMON, // 0FD5..0FD8 7487 TIBETAN, // 0FD9..0FDA 7488 UNKNOWN, // 0FDB..0FFF 7489 MYANMAR, // 1000..109F 7490 GEORGIAN, // 10A0..10C5 7491 UNKNOWN, // 10C6 7492 GEORGIAN, // 10C7 7493 UNKNOWN, // 10C8..10CC 7494 GEORGIAN, // 10CD 7495 UNKNOWN, // 10CE..10CF 7496 GEORGIAN, // 10D0..10FA 7497 COMMON, // 10FB 7498 GEORGIAN, // 10FC..10FF 7499 HANGUL, // 1100..11FF 7500 ETHIOPIC, // 1200..1248 7501 UNKNOWN, // 1249 7502 ETHIOPIC, // 124A..124D 7503 UNKNOWN, // 124E..124F 7504 ETHIOPIC, // 1250..1256 7505 UNKNOWN, // 1257 7506 ETHIOPIC, // 1258 7507 UNKNOWN, // 1259 7508 ETHIOPIC, // 125A..125D 7509 UNKNOWN, // 125E..125F 7510 ETHIOPIC, // 1260..1288 7511 UNKNOWN, // 1289 7512 ETHIOPIC, // 128A..128D 7513 UNKNOWN, // 128E..128F 7514 ETHIOPIC, // 1290..12B0 7515 UNKNOWN, // 12B1 7516 ETHIOPIC, // 12B2..12B5 7517 UNKNOWN, // 12B6..12B7 7518 ETHIOPIC, // 12B8..12BE 7519 UNKNOWN, // 12BF 7520 ETHIOPIC, // 12C0 7521 UNKNOWN, // 12C1 7522 ETHIOPIC, // 12C2..12C5 7523 UNKNOWN, // 12C6..12C7 7524 ETHIOPIC, // 12C8..12D6 7525 UNKNOWN, // 12D7 7526 ETHIOPIC, // 12D8..1310 7527 UNKNOWN, // 1311 7528 ETHIOPIC, // 1312..1315 7529 UNKNOWN, // 1316..1317 7530 ETHIOPIC, // 1318..135A 7531 UNKNOWN, // 135B..135C 7532 ETHIOPIC, // 135D..137C 7533 UNKNOWN, // 137D..137F 7534 ETHIOPIC, // 1380..1399 7535 UNKNOWN, // 139A..139F 7536 CHEROKEE, // 13A0..13F5 7537 UNKNOWN, // 13F6..13F7 7538 CHEROKEE, // 13F8..13FD 7539 UNKNOWN, // 13FE..13FF 7540 CANADIAN_ABORIGINAL, // 1400..167F 7541 OGHAM, // 1680..169C 7542 UNKNOWN, // 169D..169F 7543 RUNIC, // 16A0..16EA 7544 COMMON, // 16EB..16ED 7545 RUNIC, // 16EE..16F8 7546 UNKNOWN, // 16F9..16FF 7547 TAGALOG, // 1700..1715 7548 UNKNOWN, // 1716..171E 7549 TAGALOG, // 171F 7550 HANUNOO, // 1720..1734 7551 COMMON, // 1735..1736 7552 UNKNOWN, // 1737..173F 7553 BUHID, // 1740..1753 7554 UNKNOWN, // 1754..175F 7555 TAGBANWA, // 1760..176C 7556 UNKNOWN, // 176D 7557 TAGBANWA, // 176E..1770 7558 UNKNOWN, // 1771 7559 TAGBANWA, // 1772..1773 7560 UNKNOWN, // 1774..177F 7561 KHMER, // 1780..17DD 7562 UNKNOWN, // 17DE..17DF 7563 KHMER, // 17E0..17E9 7564 UNKNOWN, // 17EA..17EF 7565 KHMER, // 17F0..17F9 7566 UNKNOWN, // 17FA..17FF 7567 MONGOLIAN, // 1800..1801 7568 COMMON, // 1802..1803 7569 MONGOLIAN, // 1804 7570 COMMON, // 1805 7571 MONGOLIAN, // 1806..1819 7572 UNKNOWN, // 181A..181F 7573 MONGOLIAN, // 1820..1878 7574 UNKNOWN, // 1879..187F 7575 MONGOLIAN, // 1880..18AA 7576 UNKNOWN, // 18AB..18AF 7577 CANADIAN_ABORIGINAL, // 18B0..18F5 7578 UNKNOWN, // 18F6..18FF 7579 LIMBU, // 1900..191E 7580 UNKNOWN, // 191F 7581 LIMBU, // 1920..192B 7582 UNKNOWN, // 192C..192F 7583 LIMBU, // 1930..193B 7584 UNKNOWN, // 193C..193F 7585 LIMBU, // 1940 7586 UNKNOWN, // 1941..1943 7587 LIMBU, // 1944..194F 7588 TAI_LE, // 1950..196D 7589 UNKNOWN, // 196E..196F 7590 TAI_LE, // 1970..1974 7591 UNKNOWN, // 1975..197F 7592 NEW_TAI_LUE, // 1980..19AB 7593 UNKNOWN, // 19AC..19AF 7594 NEW_TAI_LUE, // 19B0..19C9 7595 UNKNOWN, // 19CA..19CF 7596 NEW_TAI_LUE, // 19D0..19DA 7597 UNKNOWN, // 19DB..19DD 7598 NEW_TAI_LUE, // 19DE..19DF 7599 KHMER, // 19E0..19FF 7600 BUGINESE, // 1A00..1A1B 7601 UNKNOWN, // 1A1C..1A1D 7602 BUGINESE, // 1A1E..1A1F 7603 TAI_THAM, // 1A20..1A5E 7604 UNKNOWN, // 1A5F 7605 TAI_THAM, // 1A60..1A7C 7606 UNKNOWN, // 1A7D..1A7E 7607 TAI_THAM, // 1A7F..1A89 7608 UNKNOWN, // 1A8A..1A8F 7609 TAI_THAM, // 1A90..1A99 7610 UNKNOWN, // 1A9A..1A9F 7611 TAI_THAM, // 1AA0..1AAD 7612 UNKNOWN, // 1AAE..1AAF 7613 INHERITED, // 1AB0..1ACE 7614 UNKNOWN, // 1ACF..1AFF 7615 BALINESE, // 1B00..1B4C 7616 UNKNOWN, // 1B4D..1B4F 7617 BALINESE, // 1B50..1B7E 7618 UNKNOWN, // 1B7F 7619 SUNDANESE, // 1B80..1BBF 7620 BATAK, // 1BC0..1BF3 7621 UNKNOWN, // 1BF4..1BFB 7622 BATAK, // 1BFC..1BFF 7623 LEPCHA, // 1C00..1C37 7624 UNKNOWN, // 1C38..1C3A 7625 LEPCHA, // 1C3B..1C49 7626 UNKNOWN, // 1C4A..1C4C 7627 LEPCHA, // 1C4D..1C4F 7628 OL_CHIKI, // 1C50..1C7F 7629 CYRILLIC, // 1C80..1C88 7630 UNKNOWN, // 1C89..1C8F 7631 GEORGIAN, // 1C90..1CBA 7632 UNKNOWN, // 1CBB..1CBC 7633 GEORGIAN, // 1CBD..1CBF 7634 SUNDANESE, // 1CC0..1CC7 7635 UNKNOWN, // 1CC8..1CCF 7636 INHERITED, // 1CD0..1CD2 7637 COMMON, // 1CD3 7638 INHERITED, // 1CD4..1CE0 7639 COMMON, // 1CE1 7640 INHERITED, // 1CE2..1CE8 7641 COMMON, // 1CE9..1CEC 7642 INHERITED, // 1CED 7643 COMMON, // 1CEE..1CF3 7644 INHERITED, // 1CF4 7645 COMMON, // 1CF5..1CF7 7646 INHERITED, // 1CF8..1CF9 7647 COMMON, // 1CFA 7648 UNKNOWN, // 1CFB..1CFF 7649 LATIN, // 1D00..1D25 7650 GREEK, // 1D26..1D2A 7651 CYRILLIC, // 1D2B 7652 LATIN, // 1D2C..1D5C 7653 GREEK, // 1D5D..1D61 7654 LATIN, // 1D62..1D65 7655 GREEK, // 1D66..1D6A 7656 LATIN, // 1D6B..1D77 7657 CYRILLIC, // 1D78 7658 LATIN, // 1D79..1DBE 7659 GREEK, // 1DBF 7660 INHERITED, // 1DC0..1DFF 7661 LATIN, // 1E00..1EFF 7662 GREEK, // 1F00..1F15 7663 UNKNOWN, // 1F16..1F17 7664 GREEK, // 1F18..1F1D 7665 UNKNOWN, // 1F1E..1F1F 7666 GREEK, // 1F20..1F45 7667 UNKNOWN, // 1F46..1F47 7668 GREEK, // 1F48..1F4D 7669 UNKNOWN, // 1F4E..1F4F 7670 GREEK, // 1F50..1F57 7671 UNKNOWN, // 1F58 7672 GREEK, // 1F59 7673 UNKNOWN, // 1F5A 7674 GREEK, // 1F5B 7675 UNKNOWN, // 1F5C 7676 GREEK, // 1F5D 7677 UNKNOWN, // 1F5E 7678 GREEK, // 1F5F..1F7D 7679 UNKNOWN, // 1F7E..1F7F 7680 GREEK, // 1F80..1FB4 7681 UNKNOWN, // 1FB5 7682 GREEK, // 1FB6..1FC4 7683 UNKNOWN, // 1FC5 7684 GREEK, // 1FC6..1FD3 7685 UNKNOWN, // 1FD4..1FD5 7686 GREEK, // 1FD6..1FDB 7687 UNKNOWN, // 1FDC 7688 GREEK, // 1FDD..1FEF 7689 UNKNOWN, // 1FF0..1FF1 7690 GREEK, // 1FF2..1FF4 7691 UNKNOWN, // 1FF5 7692 GREEK, // 1FF6..1FFE 7693 UNKNOWN, // 1FFF 7694 COMMON, // 2000..200B 7695 INHERITED, // 200C..200D 7696 COMMON, // 200E..2064 7697 UNKNOWN, // 2065 7698 COMMON, // 2066..2070 7699 LATIN, // 2071 7700 UNKNOWN, // 2072..2073 7701 COMMON, // 2074..207E 7702 LATIN, // 207F 7703 COMMON, // 2080..208E 7704 UNKNOWN, // 208F 7705 LATIN, // 2090..209C 7706 UNKNOWN, // 209D..209F 7707 COMMON, // 20A0..20C0 7708 UNKNOWN, // 20C1..20CF 7709 INHERITED, // 20D0..20F0 7710 UNKNOWN, // 20F1..20FF 7711 COMMON, // 2100..2125 7712 GREEK, // 2126 7713 COMMON, // 2127..2129 7714 LATIN, // 212A..212B 7715 COMMON, // 212C..2131 7716 LATIN, // 2132 7717 COMMON, // 2133..214D 7718 LATIN, // 214E 7719 COMMON, // 214F..215F 7720 LATIN, // 2160..2188 7721 COMMON, // 2189..218B 7722 UNKNOWN, // 218C..218F 7723 COMMON, // 2190..2426 7724 UNKNOWN, // 2427..243F 7725 COMMON, // 2440..244A 7726 UNKNOWN, // 244B..245F 7727 COMMON, // 2460..27FF 7728 BRAILLE, // 2800..28FF 7729 COMMON, // 2900..2B73 7730 UNKNOWN, // 2B74..2B75 7731 COMMON, // 2B76..2B95 7732 UNKNOWN, // 2B96 7733 COMMON, // 2B97..2BFF 7734 GLAGOLITIC, // 2C00..2C5F 7735 LATIN, // 2C60..2C7F 7736 COPTIC, // 2C80..2CF3 7737 UNKNOWN, // 2CF4..2CF8 7738 COPTIC, // 2CF9..2CFF 7739 GEORGIAN, // 2D00..2D25 7740 UNKNOWN, // 2D26 7741 GEORGIAN, // 2D27 7742 UNKNOWN, // 2D28..2D2C 7743 GEORGIAN, // 2D2D 7744 UNKNOWN, // 2D2E..2D2F 7745 TIFINAGH, // 2D30..2D67 7746 UNKNOWN, // 2D68..2D6E 7747 TIFINAGH, // 2D6F..2D70 7748 UNKNOWN, // 2D71..2D7E 7749 TIFINAGH, // 2D7F 7750 ETHIOPIC, // 2D80..2D96 7751 UNKNOWN, // 2D97..2D9F 7752 ETHIOPIC, // 2DA0..2DA6 7753 UNKNOWN, // 2DA7 7754 ETHIOPIC, // 2DA8..2DAE 7755 UNKNOWN, // 2DAF 7756 ETHIOPIC, // 2DB0..2DB6 7757 UNKNOWN, // 2DB7 7758 ETHIOPIC, // 2DB8..2DBE 7759 UNKNOWN, // 2DBF 7760 ETHIOPIC, // 2DC0..2DC6 7761 UNKNOWN, // 2DC7 7762 ETHIOPIC, // 2DC8..2DCE 7763 UNKNOWN, // 2DCF 7764 ETHIOPIC, // 2DD0..2DD6 7765 UNKNOWN, // 2DD7 7766 ETHIOPIC, // 2DD8..2DDE 7767 UNKNOWN, // 2DDF 7768 CYRILLIC, // 2DE0..2DFF 7769 COMMON, // 2E00..2E5D 7770 UNKNOWN, // 2E5E..2E7F 7771 HAN, // 2E80..2E99 7772 UNKNOWN, // 2E9A 7773 HAN, // 2E9B..2EF3 7774 UNKNOWN, // 2EF4..2EFF 7775 HAN, // 2F00..2FD5 7776 UNKNOWN, // 2FD6..2FEF 7777 COMMON, // 2FF0..2FFB 7778 UNKNOWN, // 2FFC..2FFF 7779 COMMON, // 3000..3004 7780 HAN, // 3005 7781 COMMON, // 3006 7782 HAN, // 3007 7783 COMMON, // 3008..3020 7784 HAN, // 3021..3029 7785 INHERITED, // 302A..302D 7786 HANGUL, // 302E..302F 7787 COMMON, // 3030..3037 7788 HAN, // 3038..303B 7789 COMMON, // 303C..303F 7790 UNKNOWN, // 3040 7791 HIRAGANA, // 3041..3096 7792 UNKNOWN, // 3097..3098 7793 INHERITED, // 3099..309A 7794 COMMON, // 309B..309C 7795 HIRAGANA, // 309D..309F 7796 COMMON, // 30A0 7797 KATAKANA, // 30A1..30FA 7798 COMMON, // 30FB..30FC 7799 KATAKANA, // 30FD..30FF 7800 UNKNOWN, // 3100..3104 7801 BOPOMOFO, // 3105..312F 7802 UNKNOWN, // 3130 7803 HANGUL, // 3131..318E 7804 UNKNOWN, // 318F 7805 COMMON, // 3190..319F 7806 BOPOMOFO, // 31A0..31BF 7807 COMMON, // 31C0..31E3 7808 UNKNOWN, // 31E4..31EF 7809 KATAKANA, // 31F0..31FF 7810 HANGUL, // 3200..321E 7811 UNKNOWN, // 321F 7812 COMMON, // 3220..325F 7813 HANGUL, // 3260..327E 7814 COMMON, // 327F..32CF 7815 KATAKANA, // 32D0..32FE 7816 COMMON, // 32FF 7817 KATAKANA, // 3300..3357 7818 COMMON, // 3358..33FF 7819 HAN, // 3400..4DBF 7820 COMMON, // 4DC0..4DFF 7821 HAN, // 4E00..9FFF 7822 YI, // A000..A48C 7823 UNKNOWN, // A48D..A48F 7824 YI, // A490..A4C6 7825 UNKNOWN, // A4C7..A4CF 7826 LISU, // A4D0..A4FF 7827 VAI, // A500..A62B 7828 UNKNOWN, // A62C..A63F 7829 CYRILLIC, // A640..A69F 7830 BAMUM, // A6A0..A6F7 7831 UNKNOWN, // A6F8..A6FF 7832 COMMON, // A700..A721 7833 LATIN, // A722..A787 7834 COMMON, // A788..A78A 7835 LATIN, // A78B..A7CA 7836 UNKNOWN, // A7CB..A7CF 7837 LATIN, // A7D0..A7D1 7838 UNKNOWN, // A7D2 7839 LATIN, // A7D3 7840 UNKNOWN, // A7D4 7841 LATIN, // A7D5..A7D9 7842 UNKNOWN, // A7DA..A7F1 7843 LATIN, // A7F2..A7FF 7844 SYLOTI_NAGRI, // A800..A82C 7845 UNKNOWN, // A82D..A82F 7846 COMMON, // A830..A839 7847 UNKNOWN, // A83A..A83F 7848 PHAGS_PA, // A840..A877 7849 UNKNOWN, // A878..A87F 7850 SAURASHTRA, // A880..A8C5 7851 UNKNOWN, // A8C6..A8CD 7852 SAURASHTRA, // A8CE..A8D9 7853 UNKNOWN, // A8DA..A8DF 7854 DEVANAGARI, // A8E0..A8FF 7855 KAYAH_LI, // A900..A92D 7856 COMMON, // A92E 7857 KAYAH_LI, // A92F 7858 REJANG, // A930..A953 7859 UNKNOWN, // A954..A95E 7860 REJANG, // A95F 7861 HANGUL, // A960..A97C 7862 UNKNOWN, // A97D..A97F 7863 JAVANESE, // A980..A9CD 7864 UNKNOWN, // A9CE 7865 COMMON, // A9CF 7866 JAVANESE, // A9D0..A9D9 7867 UNKNOWN, // A9DA..A9DD 7868 JAVANESE, // A9DE..A9DF 7869 MYANMAR, // A9E0..A9FE 7870 UNKNOWN, // A9FF 7871 CHAM, // AA00..AA36 7872 UNKNOWN, // AA37..AA3F 7873 CHAM, // AA40..AA4D 7874 UNKNOWN, // AA4E..AA4F 7875 CHAM, // AA50..AA59 7876 UNKNOWN, // AA5A..AA5B 7877 CHAM, // AA5C..AA5F 7878 MYANMAR, // AA60..AA7F 7879 TAI_VIET, // AA80..AAC2 7880 UNKNOWN, // AAC3..AADA 7881 TAI_VIET, // AADB..AADF 7882 MEETEI_MAYEK, // AAE0..AAF6 7883 UNKNOWN, // AAF7..AB00 7884 ETHIOPIC, // AB01..AB06 7885 UNKNOWN, // AB07..AB08 7886 ETHIOPIC, // AB09..AB0E 7887 UNKNOWN, // AB0F..AB10 7888 ETHIOPIC, // AB11..AB16 7889 UNKNOWN, // AB17..AB1F 7890 ETHIOPIC, // AB20..AB26 7891 UNKNOWN, // AB27 7892 ETHIOPIC, // AB28..AB2E 7893 UNKNOWN, // AB2F 7894 LATIN, // AB30..AB5A 7895 COMMON, // AB5B 7896 LATIN, // AB5C..AB64 7897 GREEK, // AB65 7898 LATIN, // AB66..AB69 7899 COMMON, // AB6A..AB6B 7900 UNKNOWN, // AB6C..AB6F 7901 CHEROKEE, // AB70..ABBF 7902 MEETEI_MAYEK, // ABC0..ABED 7903 UNKNOWN, // ABEE..ABEF 7904 MEETEI_MAYEK, // ABF0..ABF9 7905 UNKNOWN, // ABFA..ABFF 7906 HANGUL, // AC00..D7A3 7907 UNKNOWN, // D7A4..D7AF 7908 HANGUL, // D7B0..D7C6 7909 UNKNOWN, // D7C7..D7CA 7910 HANGUL, // D7CB..D7FB 7911 UNKNOWN, // D7FC..F8FF 7912 HAN, // F900..FA6D 7913 UNKNOWN, // FA6E..FA6F 7914 HAN, // FA70..FAD9 7915 UNKNOWN, // FADA..FAFF 7916 LATIN, // FB00..FB06 7917 UNKNOWN, // FB07..FB12 7918 ARMENIAN, // FB13..FB17 7919 UNKNOWN, // FB18..FB1C 7920 HEBREW, // FB1D..FB36 7921 UNKNOWN, // FB37 7922 HEBREW, // FB38..FB3C 7923 UNKNOWN, // FB3D 7924 HEBREW, // FB3E 7925 UNKNOWN, // FB3F 7926 HEBREW, // FB40..FB41 7927 UNKNOWN, // FB42 7928 HEBREW, // FB43..FB44 7929 UNKNOWN, // FB45 7930 HEBREW, // FB46..FB4F 7931 ARABIC, // FB50..FBC2 7932 UNKNOWN, // FBC3..FBD2 7933 ARABIC, // FBD3..FD3D 7934 COMMON, // FD3E..FD3F 7935 ARABIC, // FD40..FD8F 7936 UNKNOWN, // FD90..FD91 7937 ARABIC, // FD92..FDC7 7938 UNKNOWN, // FDC8..FDCE 7939 ARABIC, // FDCF 7940 UNKNOWN, // FDD0..FDEF 7941 ARABIC, // FDF0..FDFF 7942 INHERITED, // FE00..FE0F 7943 COMMON, // FE10..FE19 7944 UNKNOWN, // FE1A..FE1F 7945 INHERITED, // FE20..FE2D 7946 CYRILLIC, // FE2E..FE2F 7947 COMMON, // FE30..FE52 7948 UNKNOWN, // FE53 7949 COMMON, // FE54..FE66 7950 UNKNOWN, // FE67 7951 COMMON, // FE68..FE6B 7952 UNKNOWN, // FE6C..FE6F 7953 ARABIC, // FE70..FE74 7954 UNKNOWN, // FE75 7955 ARABIC, // FE76..FEFC 7956 UNKNOWN, // FEFD..FEFE 7957 COMMON, // FEFF 7958 UNKNOWN, // FF00 7959 COMMON, // FF01..FF20 7960 LATIN, // FF21..FF3A 7961 COMMON, // FF3B..FF40 7962 LATIN, // FF41..FF5A 7963 COMMON, // FF5B..FF65 7964 KATAKANA, // FF66..FF6F 7965 COMMON, // FF70 7966 KATAKANA, // FF71..FF9D 7967 COMMON, // FF9E..FF9F 7968 HANGUL, // FFA0..FFBE 7969 UNKNOWN, // FFBF..FFC1 7970 HANGUL, // FFC2..FFC7 7971 UNKNOWN, // FFC8..FFC9 7972 HANGUL, // FFCA..FFCF 7973 UNKNOWN, // FFD0..FFD1 7974 HANGUL, // FFD2..FFD7 7975 UNKNOWN, // FFD8..FFD9 7976 HANGUL, // FFDA..FFDC 7977 UNKNOWN, // FFDD..FFDF 7978 COMMON, // FFE0..FFE6 7979 UNKNOWN, // FFE7 7980 COMMON, // FFE8..FFEE 7981 UNKNOWN, // FFEF..FFF8 7982 COMMON, // FFF9..FFFD 7983 UNKNOWN, // FFFE..FFFF 7984 LINEAR_B, // 10000..1000B 7985 UNKNOWN, // 1000C 7986 LINEAR_B, // 1000D..10026 7987 UNKNOWN, // 10027 7988 LINEAR_B, // 10028..1003A 7989 UNKNOWN, // 1003B 7990 LINEAR_B, // 1003C..1003D 7991 UNKNOWN, // 1003E 7992 LINEAR_B, // 1003F..1004D 7993 UNKNOWN, // 1004E..1004F 7994 LINEAR_B, // 10050..1005D 7995 UNKNOWN, // 1005E..1007F 7996 LINEAR_B, // 10080..100FA 7997 UNKNOWN, // 100FB..100FF 7998 COMMON, // 10100..10102 7999 UNKNOWN, // 10103..10106 8000 COMMON, // 10107..10133 8001 UNKNOWN, // 10134..10136 8002 COMMON, // 10137..1013F 8003 GREEK, // 10140..1018E 8004 UNKNOWN, // 1018F 8005 COMMON, // 10190..1019C 8006 UNKNOWN, // 1019D..1019F 8007 GREEK, // 101A0 8008 UNKNOWN, // 101A1..101CF 8009 COMMON, // 101D0..101FC 8010 INHERITED, // 101FD 8011 UNKNOWN, // 101FE..1027F 8012 LYCIAN, // 10280..1029C 8013 UNKNOWN, // 1029D..1029F 8014 CARIAN, // 102A0..102D0 8015 UNKNOWN, // 102D1..102DF 8016 INHERITED, // 102E0 8017 COMMON, // 102E1..102FB 8018 UNKNOWN, // 102FC..102FF 8019 OLD_ITALIC, // 10300..10323 8020 UNKNOWN, // 10324..1032C 8021 OLD_ITALIC, // 1032D..1032F 8022 GOTHIC, // 10330..1034A 8023 UNKNOWN, // 1034B..1034F 8024 OLD_PERMIC, // 10350..1037A 8025 UNKNOWN, // 1037B..1037F 8026 UGARITIC, // 10380..1039D 8027 UNKNOWN, // 1039E 8028 UGARITIC, // 1039F 8029 OLD_PERSIAN, // 103A0..103C3 8030 UNKNOWN, // 103C4..103C7 8031 OLD_PERSIAN, // 103C8..103D5 8032 UNKNOWN, // 103D6..103FF 8033 DESERET, // 10400..1044F 8034 SHAVIAN, // 10450..1047F 8035 OSMANYA, // 10480..1049D 8036 UNKNOWN, // 1049E..1049F 8037 OSMANYA, // 104A0..104A9 8038 UNKNOWN, // 104AA..104AF 8039 OSAGE, // 104B0..104D3 8040 UNKNOWN, // 104D4..104D7 8041 OSAGE, // 104D8..104FB 8042 UNKNOWN, // 104FC..104FF 8043 ELBASAN, // 10500..10527 8044 UNKNOWN, // 10528..1052F 8045 CAUCASIAN_ALBANIAN, // 10530..10563 8046 UNKNOWN, // 10564..1056E 8047 CAUCASIAN_ALBANIAN, // 1056F 8048 VITHKUQI, // 10570..1057A 8049 UNKNOWN, // 1057B 8050 VITHKUQI, // 1057C..1058A 8051 UNKNOWN, // 1058B 8052 VITHKUQI, // 1058C..10592 8053 UNKNOWN, // 10593 8054 VITHKUQI, // 10594..10595 8055 UNKNOWN, // 10596 8056 VITHKUQI, // 10597..105A1 8057 UNKNOWN, // 105A2 8058 VITHKUQI, // 105A3..105B1 8059 UNKNOWN, // 105B2 8060 VITHKUQI, // 105B3..105B9 8061 UNKNOWN, // 105BA 8062 VITHKUQI, // 105BB..105BC 8063 UNKNOWN, // 105BD..105FF 8064 LINEAR_A, // 10600..10736 8065 UNKNOWN, // 10737..1073F 8066 LINEAR_A, // 10740..10755 8067 UNKNOWN, // 10756..1075F 8068 LINEAR_A, // 10760..10767 8069 UNKNOWN, // 10768..1077F 8070 LATIN, // 10780..10785 8071 UNKNOWN, // 10786 8072 LATIN, // 10787..107B0 8073 UNKNOWN, // 107B1 8074 LATIN, // 107B2..107BA 8075 UNKNOWN, // 107BB..107FF 8076 CYPRIOT, // 10800..10805 8077 UNKNOWN, // 10806..10807 8078 CYPRIOT, // 10808 8079 UNKNOWN, // 10809 8080 CYPRIOT, // 1080A..10835 8081 UNKNOWN, // 10836 8082 CYPRIOT, // 10837..10838 8083 UNKNOWN, // 10839..1083B 8084 CYPRIOT, // 1083C 8085 UNKNOWN, // 1083D..1083E 8086 CYPRIOT, // 1083F 8087 IMPERIAL_ARAMAIC, // 10840..10855 8088 UNKNOWN, // 10856 8089 IMPERIAL_ARAMAIC, // 10857..1085F 8090 PALMYRENE, // 10860..1087F 8091 NABATAEAN, // 10880..1089E 8092 UNKNOWN, // 1089F..108A6 8093 NABATAEAN, // 108A7..108AF 8094 UNKNOWN, // 108B0..108DF 8095 HATRAN, // 108E0..108F2 8096 UNKNOWN, // 108F3 8097 HATRAN, // 108F4..108F5 8098 UNKNOWN, // 108F6..108FA 8099 HATRAN, // 108FB..108FF 8100 PHOENICIAN, // 10900..1091B 8101 UNKNOWN, // 1091C..1091E 8102 PHOENICIAN, // 1091F 8103 LYDIAN, // 10920..10939 8104 UNKNOWN, // 1093A..1093E 8105 LYDIAN, // 1093F 8106 UNKNOWN, // 10940..1097F 8107 MEROITIC_HIEROGLYPHS, // 10980..1099F 8108 MEROITIC_CURSIVE, // 109A0..109B7 8109 UNKNOWN, // 109B8..109BB 8110 MEROITIC_CURSIVE, // 109BC..109CF 8111 UNKNOWN, // 109D0..109D1 8112 MEROITIC_CURSIVE, // 109D2..109FF 8113 KHAROSHTHI, // 10A00..10A03 8114 UNKNOWN, // 10A04 8115 KHAROSHTHI, // 10A05..10A06 8116 UNKNOWN, // 10A07..10A0B 8117 KHAROSHTHI, // 10A0C..10A13 8118 UNKNOWN, // 10A14 8119 KHAROSHTHI, // 10A15..10A17 8120 UNKNOWN, // 10A18 8121 KHAROSHTHI, // 10A19..10A35 8122 UNKNOWN, // 10A36..10A37 8123 KHAROSHTHI, // 10A38..10A3A 8124 UNKNOWN, // 10A3B..10A3E 8125 KHAROSHTHI, // 10A3F..10A48 8126 UNKNOWN, // 10A49..10A4F 8127 KHAROSHTHI, // 10A50..10A58 8128 UNKNOWN, // 10A59..10A5F 8129 OLD_SOUTH_ARABIAN, // 10A60..10A7F 8130 OLD_NORTH_ARABIAN, // 10A80..10A9F 8131 UNKNOWN, // 10AA0..10ABF 8132 MANICHAEAN, // 10AC0..10AE6 8133 UNKNOWN, // 10AE7..10AEA 8134 MANICHAEAN, // 10AEB..10AF6 8135 UNKNOWN, // 10AF7..10AFF 8136 AVESTAN, // 10B00..10B35 8137 UNKNOWN, // 10B36..10B38 8138 AVESTAN, // 10B39..10B3F 8139 INSCRIPTIONAL_PARTHIAN, // 10B40..10B55 8140 UNKNOWN, // 10B56..10B57 8141 INSCRIPTIONAL_PARTHIAN, // 10B58..10B5F 8142 INSCRIPTIONAL_PAHLAVI, // 10B60..10B72 8143 UNKNOWN, // 10B73..10B77 8144 INSCRIPTIONAL_PAHLAVI, // 10B78..10B7F 8145 PSALTER_PAHLAVI, // 10B80..10B91 8146 UNKNOWN, // 10B92..10B98 8147 PSALTER_PAHLAVI, // 10B99..10B9C 8148 UNKNOWN, // 10B9D..10BA8 8149 PSALTER_PAHLAVI, // 10BA9..10BAF 8150 UNKNOWN, // 10BB0..10BFF 8151 OLD_TURKIC, // 10C00..10C48 8152 UNKNOWN, // 10C49..10C7F 8153 OLD_HUNGARIAN, // 10C80..10CB2 8154 UNKNOWN, // 10CB3..10CBF 8155 OLD_HUNGARIAN, // 10CC0..10CF2 8156 UNKNOWN, // 10CF3..10CF9 8157 OLD_HUNGARIAN, // 10CFA..10CFF 8158 HANIFI_ROHINGYA, // 10D00..10D27 8159 UNKNOWN, // 10D28..10D2F 8160 HANIFI_ROHINGYA, // 10D30..10D39 8161 UNKNOWN, // 10D3A..10E5F 8162 ARABIC, // 10E60..10E7E 8163 UNKNOWN, // 10E7F 8164 YEZIDI, // 10E80..10EA9 8165 UNKNOWN, // 10EAA 8166 YEZIDI, // 10EAB..10EAD 8167 UNKNOWN, // 10EAE..10EAF 8168 YEZIDI, // 10EB0..10EB1 8169 UNKNOWN, // 10EB2..10EFC 8170 ARABIC, // 10EFD..10EFF 8171 OLD_SOGDIAN, // 10F00..10F27 8172 UNKNOWN, // 10F28..10F2F 8173 SOGDIAN, // 10F30..10F59 8174 UNKNOWN, // 10F5A..10F6F 8175 OLD_UYGHUR, // 10F70..10F89 8176 UNKNOWN, // 10F8A..10FAF 8177 CHORASMIAN, // 10FB0..10FCB 8178 UNKNOWN, // 10FCC..10FDF 8179 ELYMAIC, // 10FE0..10FF6 8180 UNKNOWN, // 10FF7..10FFF 8181 BRAHMI, // 11000..1104D 8182 UNKNOWN, // 1104E..11051 8183 BRAHMI, // 11052..11075 8184 UNKNOWN, // 11076..1107E 8185 BRAHMI, // 1107F 8186 KAITHI, // 11080..110C2 8187 UNKNOWN, // 110C3..110CC 8188 KAITHI, // 110CD 8189 UNKNOWN, // 110CE..110CF 8190 SORA_SOMPENG, // 110D0..110E8 8191 UNKNOWN, // 110E9..110EF 8192 SORA_SOMPENG, // 110F0..110F9 8193 UNKNOWN, // 110FA..110FF 8194 CHAKMA, // 11100..11134 8195 UNKNOWN, // 11135 8196 CHAKMA, // 11136..11147 8197 UNKNOWN, // 11148..1114F 8198 MAHAJANI, // 11150..11176 8199 UNKNOWN, // 11177..1117F 8200 SHARADA, // 11180..111DF 8201 UNKNOWN, // 111E0 8202 SINHALA, // 111E1..111F4 8203 UNKNOWN, // 111F5..111FF 8204 KHOJKI, // 11200..11211 8205 UNKNOWN, // 11212 8206 KHOJKI, // 11213..11241 8207 UNKNOWN, // 11242..1127F 8208 MULTANI, // 11280..11286 8209 UNKNOWN, // 11287 8210 MULTANI, // 11288 8211 UNKNOWN, // 11289 8212 MULTANI, // 1128A..1128D 8213 UNKNOWN, // 1128E 8214 MULTANI, // 1128F..1129D 8215 UNKNOWN, // 1129E 8216 MULTANI, // 1129F..112A9 8217 UNKNOWN, // 112AA..112AF 8218 KHUDAWADI, // 112B0..112EA 8219 UNKNOWN, // 112EB..112EF 8220 KHUDAWADI, // 112F0..112F9 8221 UNKNOWN, // 112FA..112FF 8222 GRANTHA, // 11300..11303 8223 UNKNOWN, // 11304 8224 GRANTHA, // 11305..1130C 8225 UNKNOWN, // 1130D..1130E 8226 GRANTHA, // 1130F..11310 8227 UNKNOWN, // 11311..11312 8228 GRANTHA, // 11313..11328 8229 UNKNOWN, // 11329 8230 GRANTHA, // 1132A..11330 8231 UNKNOWN, // 11331 8232 GRANTHA, // 11332..11333 8233 UNKNOWN, // 11334 8234 GRANTHA, // 11335..11339 8235 UNKNOWN, // 1133A 8236 INHERITED, // 1133B 8237 GRANTHA, // 1133C..11344 8238 UNKNOWN, // 11345..11346 8239 GRANTHA, // 11347..11348 8240 UNKNOWN, // 11349..1134A 8241 GRANTHA, // 1134B..1134D 8242 UNKNOWN, // 1134E..1134F 8243 GRANTHA, // 11350 8244 UNKNOWN, // 11351..11356 8245 GRANTHA, // 11357 8246 UNKNOWN, // 11358..1135C 8247 GRANTHA, // 1135D..11363 8248 UNKNOWN, // 11364..11365 8249 GRANTHA, // 11366..1136C 8250 UNKNOWN, // 1136D..1136F 8251 GRANTHA, // 11370..11374 8252 UNKNOWN, // 11375..113FF 8253 NEWA, // 11400..1145B 8254 UNKNOWN, // 1145C 8255 NEWA, // 1145D..11461 8256 UNKNOWN, // 11462..1147F 8257 TIRHUTA, // 11480..114C7 8258 UNKNOWN, // 114C8..114CF 8259 TIRHUTA, // 114D0..114D9 8260 UNKNOWN, // 114DA..1157F 8261 SIDDHAM, // 11580..115B5 8262 UNKNOWN, // 115B6..115B7 8263 SIDDHAM, // 115B8..115DD 8264 UNKNOWN, // 115DE..115FF 8265 MODI, // 11600..11644 8266 UNKNOWN, // 11645..1164F 8267 MODI, // 11650..11659 8268 UNKNOWN, // 1165A..1165F 8269 MONGOLIAN, // 11660..1166C 8270 UNKNOWN, // 1166D..1167F 8271 TAKRI, // 11680..116B9 8272 UNKNOWN, // 116BA..116BF 8273 TAKRI, // 116C0..116C9 8274 UNKNOWN, // 116CA..116FF 8275 AHOM, // 11700..1171A 8276 UNKNOWN, // 1171B..1171C 8277 AHOM, // 1171D..1172B 8278 UNKNOWN, // 1172C..1172F 8279 AHOM, // 11730..11746 8280 UNKNOWN, // 11747..117FF 8281 DOGRA, // 11800..1183B 8282 UNKNOWN, // 1183C..1189F 8283 WARANG_CITI, // 118A0..118F2 8284 UNKNOWN, // 118F3..118FE 8285 WARANG_CITI, // 118FF 8286 DIVES_AKURU, // 11900..11906 8287 UNKNOWN, // 11907..11908 8288 DIVES_AKURU, // 11909 8289 UNKNOWN, // 1190A..1190B 8290 DIVES_AKURU, // 1190C..11913 8291 UNKNOWN, // 11914 8292 DIVES_AKURU, // 11915..11916 8293 UNKNOWN, // 11917 8294 DIVES_AKURU, // 11918..11935 8295 UNKNOWN, // 11936 8296 DIVES_AKURU, // 11937..11938 8297 UNKNOWN, // 11939..1193A 8298 DIVES_AKURU, // 1193B..11946 8299 UNKNOWN, // 11947..1194F 8300 DIVES_AKURU, // 11950..11959 8301 UNKNOWN, // 1195A..1199F 8302 NANDINAGARI, // 119A0..119A7 8303 UNKNOWN, // 119A8..119A9 8304 NANDINAGARI, // 119AA..119D7 8305 UNKNOWN, // 119D8..119D9 8306 NANDINAGARI, // 119DA..119E4 8307 UNKNOWN, // 119E5..119FF 8308 ZANABAZAR_SQUARE, // 11A00..11A47 8309 UNKNOWN, // 11A48..11A4F 8310 SOYOMBO, // 11A50..11AA2 8311 UNKNOWN, // 11AA3..11AAF 8312 CANADIAN_ABORIGINAL, // 11AB0..11ABF 8313 PAU_CIN_HAU, // 11AC0..11AF8 8314 UNKNOWN, // 11AF9..11AFF 8315 DEVANAGARI, // 11B00..11B09 8316 UNKNOWN, // 11B0A..11BFF 8317 BHAIKSUKI, // 11C00..11C08 8318 UNKNOWN, // 11C09 8319 BHAIKSUKI, // 11C0A..11C36 8320 UNKNOWN, // 11C37 8321 BHAIKSUKI, // 11C38..11C45 8322 UNKNOWN, // 11C46..11C4F 8323 BHAIKSUKI, // 11C50..11C6C 8324 UNKNOWN, // 11C6D..11C6F 8325 MARCHEN, // 11C70..11C8F 8326 UNKNOWN, // 11C90..11C91 8327 MARCHEN, // 11C92..11CA7 8328 UNKNOWN, // 11CA8 8329 MARCHEN, // 11CA9..11CB6 8330 UNKNOWN, // 11CB7..11CFF 8331 MASARAM_GONDI, // 11D00..11D06 8332 UNKNOWN, // 11D07 8333 MASARAM_GONDI, // 11D08..11D09 8334 UNKNOWN, // 11D0A 8335 MASARAM_GONDI, // 11D0B..11D36 8336 UNKNOWN, // 11D37..11D39 8337 MASARAM_GONDI, // 11D3A 8338 UNKNOWN, // 11D3B 8339 MASARAM_GONDI, // 11D3C..11D3D 8340 UNKNOWN, // 11D3E 8341 MASARAM_GONDI, // 11D3F..11D47 8342 UNKNOWN, // 11D48..11D4F 8343 MASARAM_GONDI, // 11D50..11D59 8344 UNKNOWN, // 11D5A..11D5F 8345 GUNJALA_GONDI, // 11D60..11D65 8346 UNKNOWN, // 11D66 8347 GUNJALA_GONDI, // 11D67..11D68 8348 UNKNOWN, // 11D69 8349 GUNJALA_GONDI, // 11D6A..11D8E 8350 UNKNOWN, // 11D8F 8351 GUNJALA_GONDI, // 11D90..11D91 8352 UNKNOWN, // 11D92 8353 GUNJALA_GONDI, // 11D93..11D98 8354 UNKNOWN, // 11D99..11D9F 8355 GUNJALA_GONDI, // 11DA0..11DA9 8356 UNKNOWN, // 11DAA..11EDF 8357 MAKASAR, // 11EE0..11EF8 8358 UNKNOWN, // 11EF9..11EFF 8359 KAWI, // 11F00..11F10 8360 UNKNOWN, // 11F11 8361 KAWI, // 11F12..11F3A 8362 UNKNOWN, // 11F3B..11F3D 8363 KAWI, // 11F3E..11F59 8364 UNKNOWN, // 11F5A..11FAF 8365 LISU, // 11FB0 8366 UNKNOWN, // 11FB1..11FBF 8367 TAMIL, // 11FC0..11FF1 8368 UNKNOWN, // 11FF2..11FFE 8369 TAMIL, // 11FFF 8370 CUNEIFORM, // 12000..12399 8371 UNKNOWN, // 1239A..123FF 8372 CUNEIFORM, // 12400..1246E 8373 UNKNOWN, // 1246F 8374 CUNEIFORM, // 12470..12474 8375 UNKNOWN, // 12475..1247F 8376 CUNEIFORM, // 12480..12543 8377 UNKNOWN, // 12544..12F8F 8378 CYPRO_MINOAN, // 12F90..12FF2 8379 UNKNOWN, // 12FF3..12FFF 8380 EGYPTIAN_HIEROGLYPHS, // 13000..13455 8381 UNKNOWN, // 13456..143FF 8382 ANATOLIAN_HIEROGLYPHS, // 14400..14646 8383 UNKNOWN, // 14647..167FF 8384 BAMUM, // 16800..16A38 8385 UNKNOWN, // 16A39..16A3F 8386 MRO, // 16A40..16A5E 8387 UNKNOWN, // 16A5F 8388 MRO, // 16A60..16A69 8389 UNKNOWN, // 16A6A..16A6D 8390 MRO, // 16A6E..16A6F 8391 TANGSA, // 16A70..16ABE 8392 UNKNOWN, // 16ABF 8393 TANGSA, // 16AC0..16AC9 8394 UNKNOWN, // 16ACA..16ACF 8395 BASSA_VAH, // 16AD0..16AED 8396 UNKNOWN, // 16AEE..16AEF 8397 BASSA_VAH, // 16AF0..16AF5 8398 UNKNOWN, // 16AF6..16AFF 8399 PAHAWH_HMONG, // 16B00..16B45 8400 UNKNOWN, // 16B46..16B4F 8401 PAHAWH_HMONG, // 16B50..16B59 8402 UNKNOWN, // 16B5A 8403 PAHAWH_HMONG, // 16B5B..16B61 8404 UNKNOWN, // 16B62 8405 PAHAWH_HMONG, // 16B63..16B77 8406 UNKNOWN, // 16B78..16B7C 8407 PAHAWH_HMONG, // 16B7D..16B8F 8408 UNKNOWN, // 16B90..16E3F 8409 MEDEFAIDRIN, // 16E40..16E9A 8410 UNKNOWN, // 16E9B..16EFF 8411 MIAO, // 16F00..16F4A 8412 UNKNOWN, // 16F4B..16F4E 8413 MIAO, // 16F4F..16F87 8414 UNKNOWN, // 16F88..16F8E 8415 MIAO, // 16F8F..16F9F 8416 UNKNOWN, // 16FA0..16FDF 8417 TANGUT, // 16FE0 8418 NUSHU, // 16FE1 8419 HAN, // 16FE2..16FE3 8420 KHITAN_SMALL_SCRIPT, // 16FE4 8421 UNKNOWN, // 16FE5..16FEF 8422 HAN, // 16FF0..16FF1 8423 UNKNOWN, // 16FF2..16FFF 8424 TANGUT, // 17000..187F7 8425 UNKNOWN, // 187F8..187FF 8426 TANGUT, // 18800..18AFF 8427 KHITAN_SMALL_SCRIPT, // 18B00..18CD5 8428 UNKNOWN, // 18CD6..18CFF 8429 TANGUT, // 18D00..18D08 8430 UNKNOWN, // 18D09..1AFEF 8431 KATAKANA, // 1AFF0..1AFF3 8432 UNKNOWN, // 1AFF4 8433 KATAKANA, // 1AFF5..1AFFB 8434 UNKNOWN, // 1AFFC 8435 KATAKANA, // 1AFFD..1AFFE 8436 UNKNOWN, // 1AFFF 8437 KATAKANA, // 1B000 8438 HIRAGANA, // 1B001..1B11F 8439 KATAKANA, // 1B120..1B122 8440 UNKNOWN, // 1B123..1B131 8441 HIRAGANA, // 1B132 8442 UNKNOWN, // 1B133..1B14F 8443 HIRAGANA, // 1B150..1B152 8444 UNKNOWN, // 1B153..1B154 8445 KATAKANA, // 1B155 8446 UNKNOWN, // 1B156..1B163 8447 KATAKANA, // 1B164..1B167 8448 UNKNOWN, // 1B168..1B16F 8449 NUSHU, // 1B170..1B2FB 8450 UNKNOWN, // 1B2FC..1BBFF 8451 DUPLOYAN, // 1BC00..1BC6A 8452 UNKNOWN, // 1BC6B..1BC6F 8453 DUPLOYAN, // 1BC70..1BC7C 8454 UNKNOWN, // 1BC7D..1BC7F 8455 DUPLOYAN, // 1BC80..1BC88 8456 UNKNOWN, // 1BC89..1BC8F 8457 DUPLOYAN, // 1BC90..1BC99 8458 UNKNOWN, // 1BC9A..1BC9B 8459 DUPLOYAN, // 1BC9C..1BC9F 8460 COMMON, // 1BCA0..1BCA3 8461 UNKNOWN, // 1BCA4..1CEFF 8462 INHERITED, // 1CF00..1CF2D 8463 UNKNOWN, // 1CF2E..1CF2F 8464 INHERITED, // 1CF30..1CF46 8465 UNKNOWN, // 1CF47..1CF4F 8466 COMMON, // 1CF50..1CFC3 8467 UNKNOWN, // 1CFC4..1CFFF 8468 COMMON, // 1D000..1D0F5 8469 UNKNOWN, // 1D0F6..1D0FF 8470 COMMON, // 1D100..1D126 8471 UNKNOWN, // 1D127..1D128 8472 COMMON, // 1D129..1D166 8473 INHERITED, // 1D167..1D169 8474 COMMON, // 1D16A..1D17A 8475 INHERITED, // 1D17B..1D182 8476 COMMON, // 1D183..1D184 8477 INHERITED, // 1D185..1D18B 8478 COMMON, // 1D18C..1D1A9 8479 INHERITED, // 1D1AA..1D1AD 8480 COMMON, // 1D1AE..1D1EA 8481 UNKNOWN, // 1D1EB..1D1FF 8482 GREEK, // 1D200..1D245 8483 UNKNOWN, // 1D246..1D2BF 8484 COMMON, // 1D2C0..1D2D3 8485 UNKNOWN, // 1D2D4..1D2DF 8486 COMMON, // 1D2E0..1D2F3 8487 UNKNOWN, // 1D2F4..1D2FF 8488 COMMON, // 1D300..1D356 8489 UNKNOWN, // 1D357..1D35F 8490 COMMON, // 1D360..1D378 8491 UNKNOWN, // 1D379..1D3FF 8492 COMMON, // 1D400..1D454 8493 UNKNOWN, // 1D455 8494 COMMON, // 1D456..1D49C 8495 UNKNOWN, // 1D49D 8496 COMMON, // 1D49E..1D49F 8497 UNKNOWN, // 1D4A0..1D4A1 8498 COMMON, // 1D4A2 8499 UNKNOWN, // 1D4A3..1D4A4 8500 COMMON, // 1D4A5..1D4A6 8501 UNKNOWN, // 1D4A7..1D4A8 8502 COMMON, // 1D4A9..1D4AC 8503 UNKNOWN, // 1D4AD 8504 COMMON, // 1D4AE..1D4B9 8505 UNKNOWN, // 1D4BA 8506 COMMON, // 1D4BB 8507 UNKNOWN, // 1D4BC 8508 COMMON, // 1D4BD..1D4C3 8509 UNKNOWN, // 1D4C4 8510 COMMON, // 1D4C5..1D505 8511 UNKNOWN, // 1D506 8512 COMMON, // 1D507..1D50A 8513 UNKNOWN, // 1D50B..1D50C 8514 COMMON, // 1D50D..1D514 8515 UNKNOWN, // 1D515 8516 COMMON, // 1D516..1D51C 8517 UNKNOWN, // 1D51D 8518 COMMON, // 1D51E..1D539 8519 UNKNOWN, // 1D53A 8520 COMMON, // 1D53B..1D53E 8521 UNKNOWN, // 1D53F 8522 COMMON, // 1D540..1D544 8523 UNKNOWN, // 1D545 8524 COMMON, // 1D546 8525 UNKNOWN, // 1D547..1D549 8526 COMMON, // 1D54A..1D550 8527 UNKNOWN, // 1D551 8528 COMMON, // 1D552..1D6A5 8529 UNKNOWN, // 1D6A6..1D6A7 8530 COMMON, // 1D6A8..1D7CB 8531 UNKNOWN, // 1D7CC..1D7CD 8532 COMMON, // 1D7CE..1D7FF 8533 SIGNWRITING, // 1D800..1DA8B 8534 UNKNOWN, // 1DA8C..1DA9A 8535 SIGNWRITING, // 1DA9B..1DA9F 8536 UNKNOWN, // 1DAA0 8537 SIGNWRITING, // 1DAA1..1DAAF 8538 UNKNOWN, // 1DAB0..1DEFF 8539 LATIN, // 1DF00..1DF1E 8540 UNKNOWN, // 1DF1F..1DF24 8541 LATIN, // 1DF25..1DF2A 8542 UNKNOWN, // 1DF2B..1DFFF 8543 GLAGOLITIC, // 1E000..1E006 8544 UNKNOWN, // 1E007 8545 GLAGOLITIC, // 1E008..1E018 8546 UNKNOWN, // 1E019..1E01A 8547 GLAGOLITIC, // 1E01B..1E021 8548 UNKNOWN, // 1E022 8549 GLAGOLITIC, // 1E023..1E024 8550 UNKNOWN, // 1E025 8551 GLAGOLITIC, // 1E026..1E02A 8552 UNKNOWN, // 1E02B..1E02F 8553 CYRILLIC, // 1E030..1E06D 8554 UNKNOWN, // 1E06E..1E08E 8555 CYRILLIC, // 1E08F 8556 UNKNOWN, // 1E090..1E0FF 8557 NYIAKENG_PUACHUE_HMONG, // 1E100..1E12C 8558 UNKNOWN, // 1E12D..1E12F 8559 NYIAKENG_PUACHUE_HMONG, // 1E130..1E13D 8560 UNKNOWN, // 1E13E..1E13F 8561 NYIAKENG_PUACHUE_HMONG, // 1E140..1E149 8562 UNKNOWN, // 1E14A..1E14D 8563 NYIAKENG_PUACHUE_HMONG, // 1E14E..1E14F 8564 UNKNOWN, // 1E150..1E28F 8565 TOTO, // 1E290..1E2AE 8566 UNKNOWN, // 1E2AF..1E2BF 8567 WANCHO, // 1E2C0..1E2F9 8568 UNKNOWN, // 1E2FA..1E2FE 8569 WANCHO, // 1E2FF 8570 UNKNOWN, // 1E300..1E4CF 8571 NAG_MUNDARI, // 1E4D0..1E4F9 8572 UNKNOWN, // 1E4FA..1E7DF 8573 ETHIOPIC, // 1E7E0..1E7E6 8574 UNKNOWN, // 1E7E7 8575 ETHIOPIC, // 1E7E8..1E7EB 8576 UNKNOWN, // 1E7EC 8577 ETHIOPIC, // 1E7ED..1E7EE 8578 UNKNOWN, // 1E7EF 8579 ETHIOPIC, // 1E7F0..1E7FE 8580 UNKNOWN, // 1E7FF 8581 MENDE_KIKAKUI, // 1E800..1E8C4 8582 UNKNOWN, // 1E8C5..1E8C6 8583 MENDE_KIKAKUI, // 1E8C7..1E8D6 8584 UNKNOWN, // 1E8D7..1E8FF 8585 ADLAM, // 1E900..1E94B 8586 UNKNOWN, // 1E94C..1E94F 8587 ADLAM, // 1E950..1E959 8588 UNKNOWN, // 1E95A..1E95D 8589 ADLAM, // 1E95E..1E95F 8590 UNKNOWN, // 1E960..1EC70 8591 COMMON, // 1EC71..1ECB4 8592 UNKNOWN, // 1ECB5..1ED00 8593 COMMON, // 1ED01..1ED3D 8594 UNKNOWN, // 1ED3E..1EDFF 8595 ARABIC, // 1EE00..1EE03 8596 UNKNOWN, // 1EE04 8597 ARABIC, // 1EE05..1EE1F 8598 UNKNOWN, // 1EE20 8599 ARABIC, // 1EE21..1EE22 8600 UNKNOWN, // 1EE23 8601 ARABIC, // 1EE24 8602 UNKNOWN, // 1EE25..1EE26 8603 ARABIC, // 1EE27 8604 UNKNOWN, // 1EE28 8605 ARABIC, // 1EE29..1EE32 8606 UNKNOWN, // 1EE33 8607 ARABIC, // 1EE34..1EE37 8608 UNKNOWN, // 1EE38 8609 ARABIC, // 1EE39 8610 UNKNOWN, // 1EE3A 8611 ARABIC, // 1EE3B 8612 UNKNOWN, // 1EE3C..1EE41 8613 ARABIC, // 1EE42 8614 UNKNOWN, // 1EE43..1EE46 8615 ARABIC, // 1EE47 8616 UNKNOWN, // 1EE48 8617 ARABIC, // 1EE49 8618 UNKNOWN, // 1EE4A 8619 ARABIC, // 1EE4B 8620 UNKNOWN, // 1EE4C 8621 ARABIC, // 1EE4D..1EE4F 8622 UNKNOWN, // 1EE50 8623 ARABIC, // 1EE51..1EE52 8624 UNKNOWN, // 1EE53 8625 ARABIC, // 1EE54 8626 UNKNOWN, // 1EE55..1EE56 8627 ARABIC, // 1EE57 8628 UNKNOWN, // 1EE58 8629 ARABIC, // 1EE59 8630 UNKNOWN, // 1EE5A 8631 ARABIC, // 1EE5B 8632 UNKNOWN, // 1EE5C 8633 ARABIC, // 1EE5D 8634 UNKNOWN, // 1EE5E 8635 ARABIC, // 1EE5F 8636 UNKNOWN, // 1EE60 8637 ARABIC, // 1EE61..1EE62 8638 UNKNOWN, // 1EE63 8639 ARABIC, // 1EE64 8640 UNKNOWN, // 1EE65..1EE66 8641 ARABIC, // 1EE67..1EE6A 8642 UNKNOWN, // 1EE6B 8643 ARABIC, // 1EE6C..1EE72 8644 UNKNOWN, // 1EE73 8645 ARABIC, // 1EE74..1EE77 8646 UNKNOWN, // 1EE78 8647 ARABIC, // 1EE79..1EE7C 8648 UNKNOWN, // 1EE7D 8649 ARABIC, // 1EE7E 8650 UNKNOWN, // 1EE7F 8651 ARABIC, // 1EE80..1EE89 8652 UNKNOWN, // 1EE8A 8653 ARABIC, // 1EE8B..1EE9B 8654 UNKNOWN, // 1EE9C..1EEA0 8655 ARABIC, // 1EEA1..1EEA3 8656 UNKNOWN, // 1EEA4 8657 ARABIC, // 1EEA5..1EEA9 8658 UNKNOWN, // 1EEAA 8659 ARABIC, // 1EEAB..1EEBB 8660 UNKNOWN, // 1EEBC..1EEEF 8661 ARABIC, // 1EEF0..1EEF1 8662 UNKNOWN, // 1EEF2..1EFFF 8663 COMMON, // 1F000..1F02B 8664 UNKNOWN, // 1F02C..1F02F 8665 COMMON, // 1F030..1F093 8666 UNKNOWN, // 1F094..1F09F 8667 COMMON, // 1F0A0..1F0AE 8668 UNKNOWN, // 1F0AF..1F0B0 8669 COMMON, // 1F0B1..1F0BF 8670 UNKNOWN, // 1F0C0 8671 COMMON, // 1F0C1..1F0CF 8672 UNKNOWN, // 1F0D0 8673 COMMON, // 1F0D1..1F0F5 8674 UNKNOWN, // 1F0F6..1F0FF 8675 COMMON, // 1F100..1F1AD 8676 UNKNOWN, // 1F1AE..1F1E5 8677 COMMON, // 1F1E6..1F1FF 8678 HIRAGANA, // 1F200 8679 COMMON, // 1F201..1F202 8680 UNKNOWN, // 1F203..1F20F 8681 COMMON, // 1F210..1F23B 8682 UNKNOWN, // 1F23C..1F23F 8683 COMMON, // 1F240..1F248 8684 UNKNOWN, // 1F249..1F24F 8685 COMMON, // 1F250..1F251 8686 UNKNOWN, // 1F252..1F25F 8687 COMMON, // 1F260..1F265 8688 UNKNOWN, // 1F266..1F2FF 8689 COMMON, // 1F300..1F6D7 8690 UNKNOWN, // 1F6D8..1F6DB 8691 COMMON, // 1F6DC..1F6EC 8692 UNKNOWN, // 1F6ED..1F6EF 8693 COMMON, // 1F6F0..1F6FC 8694 UNKNOWN, // 1F6FD..1F6FF 8695 COMMON, // 1F700..1F776 8696 UNKNOWN, // 1F777..1F77A 8697 COMMON, // 1F77B..1F7D9 8698 UNKNOWN, // 1F7DA..1F7DF 8699 COMMON, // 1F7E0..1F7EB 8700 UNKNOWN, // 1F7EC..1F7EF 8701 COMMON, // 1F7F0 8702 UNKNOWN, // 1F7F1..1F7FF 8703 COMMON, // 1F800..1F80B 8704 UNKNOWN, // 1F80C..1F80F 8705 COMMON, // 1F810..1F847 8706 UNKNOWN, // 1F848..1F84F 8707 COMMON, // 1F850..1F859 8708 UNKNOWN, // 1F85A..1F85F 8709 COMMON, // 1F860..1F887 8710 UNKNOWN, // 1F888..1F88F 8711 COMMON, // 1F890..1F8AD 8712 UNKNOWN, // 1F8AE..1F8AF 8713 COMMON, // 1F8B0..1F8B1 8714 UNKNOWN, // 1F8B2..1F8FF 8715 COMMON, // 1F900..1FA53 8716 UNKNOWN, // 1FA54..1FA5F 8717 COMMON, // 1FA60..1FA6D 8718 UNKNOWN, // 1FA6E..1FA6F 8719 COMMON, // 1FA70..1FA7C 8720 UNKNOWN, // 1FA7D..1FA7F 8721 COMMON, // 1FA80..1FA88 8722 UNKNOWN, // 1FA89..1FA8F 8723 COMMON, // 1FA90..1FABD 8724 UNKNOWN, // 1FABE 8725 COMMON, // 1FABF..1FAC5 8726 UNKNOWN, // 1FAC6..1FACD 8727 COMMON, // 1FACE..1FADB 8728 UNKNOWN, // 1FADC..1FADF 8729 COMMON, // 1FAE0..1FAE8 8730 UNKNOWN, // 1FAE9..1FAEF 8731 COMMON, // 1FAF0..1FAF8 8732 UNKNOWN, // 1FAF9..1FAFF 8733 COMMON, // 1FB00..1FB92 8734 UNKNOWN, // 1FB93 8735 COMMON, // 1FB94..1FBCA 8736 UNKNOWN, // 1FBCB..1FBEF 8737 COMMON, // 1FBF0..1FBF9 8738 UNKNOWN, // 1FBFA..1FFFF 8739 HAN, // 20000..2A6DF 8740 UNKNOWN, // 2A6E0..2A6FF 8741 HAN, // 2A700..2B739 8742 UNKNOWN, // 2B73A..2B73F 8743 HAN, // 2B740..2B81D 8744 UNKNOWN, // 2B81E..2B81F 8745 HAN, // 2B820..2CEA1 8746 UNKNOWN, // 2CEA2..2CEAF 8747 HAN, // 2CEB0..2EBE0 8748 UNKNOWN, // 2EBE1..2F7FF 8749 HAN, // 2F800..2FA1D 8750 UNKNOWN, // 2FA1E..2FFFF 8751 HAN, // 30000..3134A 8752 UNKNOWN, // 3134B..3134F 8753 HAN, // 31350..323AF 8754 UNKNOWN, // 323B0..E0000 8755 COMMON, // E0001 8756 UNKNOWN, // E0002..E001F 8757 COMMON, // E0020..E007F 8758 UNKNOWN, // E0080..E00FF 8759 INHERITED, // E0100..E01EF 8760 UNKNOWN, // E01F0..10FFFF 8761 }; 8762 8763 private static final HashMap<String, Character.UnicodeScript> aliases; 8764 static { 8765 aliases = HashMap.newHashMap(UNKNOWN.ordinal() + 1); 8766 aliases.put("ADLM", ADLAM); 8767 aliases.put("AGHB", CAUCASIAN_ALBANIAN); 8768 aliases.put("AHOM", AHOM); 8769 aliases.put("ARAB", ARABIC); 8770 aliases.put("ARMI", IMPERIAL_ARAMAIC); 8771 aliases.put("ARMN", ARMENIAN); 8772 aliases.put("AVST", AVESTAN); 8773 aliases.put("BALI", BALINESE); 8774 aliases.put("BAMU", BAMUM); 8775 aliases.put("BASS", BASSA_VAH); 8776 aliases.put("BATK", BATAK); 8777 aliases.put("BENG", BENGALI); 8778 aliases.put("BHKS", BHAIKSUKI); 8779 aliases.put("BOPO", BOPOMOFO); 8780 aliases.put("BRAH", BRAHMI); 8781 aliases.put("BRAI", BRAILLE); 8782 aliases.put("BUGI", BUGINESE); 8783 aliases.put("BUHD", BUHID); 8784 aliases.put("CAKM", CHAKMA); 8785 aliases.put("CANS", CANADIAN_ABORIGINAL); 8786 aliases.put("CARI", CARIAN); 8787 aliases.put("CHAM", CHAM); 8788 aliases.put("CHER", CHEROKEE); 8789 aliases.put("CHRS", CHORASMIAN); 8790 aliases.put("COPT", COPTIC); 8791 aliases.put("CPMN", CYPRO_MINOAN); 8792 aliases.put("CPRT", CYPRIOT); 8793 aliases.put("CYRL", CYRILLIC); 8794 aliases.put("DEVA", DEVANAGARI); 8795 aliases.put("DIAK", DIVES_AKURU); 8796 aliases.put("DOGR", DOGRA); 8797 aliases.put("DSRT", DESERET); 8798 aliases.put("DUPL", DUPLOYAN); 8799 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); 8800 aliases.put("ELBA", ELBASAN); 8801 aliases.put("ELYM", ELYMAIC); 8802 aliases.put("ETHI", ETHIOPIC); 8803 aliases.put("GEOR", GEORGIAN); 8804 aliases.put("GLAG", GLAGOLITIC); 8805 aliases.put("GONG", GUNJALA_GONDI); 8806 aliases.put("GONM", MASARAM_GONDI); 8807 aliases.put("GOTH", GOTHIC); 8808 aliases.put("GRAN", GRANTHA); 8809 aliases.put("GREK", GREEK); 8810 aliases.put("GUJR", GUJARATI); 8811 aliases.put("GURU", GURMUKHI); 8812 aliases.put("HANG", HANGUL); 8813 aliases.put("HANI", HAN); 8814 aliases.put("HANO", HANUNOO); 8815 aliases.put("HATR", HATRAN); 8816 aliases.put("HEBR", HEBREW); 8817 aliases.put("HIRA", HIRAGANA); 8818 aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS); 8819 aliases.put("HMNG", PAHAWH_HMONG); 8820 aliases.put("HMNP", NYIAKENG_PUACHUE_HMONG); 8821 aliases.put("HUNG", OLD_HUNGARIAN); 8822 aliases.put("ITAL", OLD_ITALIC); 8823 aliases.put("JAVA", JAVANESE); 8824 aliases.put("KALI", KAYAH_LI); 8825 aliases.put("KANA", KATAKANA); 8826 aliases.put("KAWI", KAWI); 8827 aliases.put("KHAR", KHAROSHTHI); 8828 aliases.put("KHMR", KHMER); 8829 aliases.put("KHOJ", KHOJKI); 8830 aliases.put("KITS", KHITAN_SMALL_SCRIPT); 8831 aliases.put("KNDA", KANNADA); 8832 aliases.put("KTHI", KAITHI); 8833 aliases.put("LANA", TAI_THAM); 8834 aliases.put("LAOO", LAO); 8835 aliases.put("LATN", LATIN); 8836 aliases.put("LEPC", LEPCHA); 8837 aliases.put("LIMB", LIMBU); 8838 aliases.put("LINA", LINEAR_A); 8839 aliases.put("LINB", LINEAR_B); 8840 aliases.put("LISU", LISU); 8841 aliases.put("LYCI", LYCIAN); 8842 aliases.put("LYDI", LYDIAN); 8843 aliases.put("MAHJ", MAHAJANI); 8844 aliases.put("MAKA", MAKASAR); 8845 aliases.put("MAND", MANDAIC); 8846 aliases.put("MANI", MANICHAEAN); 8847 aliases.put("MARC", MARCHEN); 8848 aliases.put("MEDF", MEDEFAIDRIN); 8849 aliases.put("MEND", MENDE_KIKAKUI); 8850 aliases.put("MERC", MEROITIC_CURSIVE); 8851 aliases.put("MERO", MEROITIC_HIEROGLYPHS); 8852 aliases.put("MLYM", MALAYALAM); 8853 aliases.put("MODI", MODI); 8854 aliases.put("MONG", MONGOLIAN); 8855 aliases.put("MROO", MRO); 8856 aliases.put("MTEI", MEETEI_MAYEK); 8857 aliases.put("MULT", MULTANI); 8858 aliases.put("MYMR", MYANMAR); 8859 aliases.put("NAGM", NAG_MUNDARI); 8860 aliases.put("NAND", NANDINAGARI); 8861 aliases.put("NARB", OLD_NORTH_ARABIAN); 8862 aliases.put("NBAT", NABATAEAN); 8863 aliases.put("NEWA", NEWA); 8864 aliases.put("NKOO", NKO); 8865 aliases.put("NSHU", NUSHU); 8866 aliases.put("OGAM", OGHAM); 8867 aliases.put("OLCK", OL_CHIKI); 8868 aliases.put("ORKH", OLD_TURKIC); 8869 aliases.put("ORYA", ORIYA); 8870 aliases.put("OSGE", OSAGE); 8871 aliases.put("OSMA", OSMANYA); 8872 aliases.put("OUGR", OLD_UYGHUR); 8873 aliases.put("PALM", PALMYRENE); 8874 aliases.put("PAUC", PAU_CIN_HAU); 8875 aliases.put("PERM", OLD_PERMIC); 8876 aliases.put("PHAG", PHAGS_PA); 8877 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); 8878 aliases.put("PHLP", PSALTER_PAHLAVI); 8879 aliases.put("PHNX", PHOENICIAN); 8880 aliases.put("PLRD", MIAO); 8881 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); 8882 aliases.put("RJNG", REJANG); 8883 aliases.put("ROHG", HANIFI_ROHINGYA); 8884 aliases.put("RUNR", RUNIC); 8885 aliases.put("SAMR", SAMARITAN); 8886 aliases.put("SARB", OLD_SOUTH_ARABIAN); 8887 aliases.put("SAUR", SAURASHTRA); 8888 aliases.put("SGNW", SIGNWRITING); 8889 aliases.put("SHAW", SHAVIAN); 8890 aliases.put("SHRD", SHARADA); 8891 aliases.put("SIDD", SIDDHAM); 8892 aliases.put("SIND", KHUDAWADI); 8893 aliases.put("SINH", SINHALA); 8894 aliases.put("SOGD", SOGDIAN); 8895 aliases.put("SOGO", OLD_SOGDIAN); 8896 aliases.put("SORA", SORA_SOMPENG); 8897 aliases.put("SOYO", SOYOMBO); 8898 aliases.put("SUND", SUNDANESE); 8899 aliases.put("SYLO", SYLOTI_NAGRI); 8900 aliases.put("SYRC", SYRIAC); 8901 aliases.put("TAGB", TAGBANWA); 8902 aliases.put("TAKR", TAKRI); 8903 aliases.put("TALE", TAI_LE); 8904 aliases.put("TALU", NEW_TAI_LUE); 8905 aliases.put("TAML", TAMIL); 8906 aliases.put("TANG", TANGUT); 8907 aliases.put("TAVT", TAI_VIET); 8908 aliases.put("TELU", TELUGU); 8909 aliases.put("TFNG", TIFINAGH); 8910 aliases.put("TGLG", TAGALOG); 8911 aliases.put("THAA", THAANA); 8912 aliases.put("THAI", THAI); 8913 aliases.put("TIBT", TIBETAN); 8914 aliases.put("TIRH", TIRHUTA); 8915 aliases.put("TNSA", TANGSA); 8916 aliases.put("TOTO", TOTO); 8917 aliases.put("UGAR", UGARITIC); 8918 aliases.put("VAII", VAI); 8919 aliases.put("VITH", VITHKUQI); 8920 aliases.put("WARA", WARANG_CITI); 8921 aliases.put("WCHO", WANCHO); 8922 aliases.put("XPEO", OLD_PERSIAN); 8923 aliases.put("XSUX", CUNEIFORM); 8924 aliases.put("YEZI", YEZIDI); 8925 aliases.put("YIII", YI); 8926 aliases.put("ZANB", ZANABAZAR_SQUARE); 8927 aliases.put("ZINH", INHERITED); 8928 aliases.put("ZYYY", COMMON); 8929 aliases.put("ZZZZ", UNKNOWN); 8930 } 8931 8932 /** 8933 * Returns the enum constant representing the Unicode script of which 8934 * the given character (Unicode code point) is assigned to. 8935 * 8936 * @param codePoint the character (Unicode code point) in question. 8937 * @return The {@code UnicodeScript} constant representing the 8938 * Unicode script of which this character is assigned to. 8939 * 8940 * @throws IllegalArgumentException if the specified 8941 * {@code codePoint} is an invalid Unicode code point. 8942 * @see Character#isValidCodePoint(int) 8943 * 8944 */ of(int codePoint)8945 public static UnicodeScript of(int codePoint) { 8946 if (!isValidCodePoint(codePoint)) 8947 throw new IllegalArgumentException( 8948 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 8949 int type = getType(codePoint); 8950 // leave SURROGATE and PRIVATE_USE for table lookup 8951 if (type == UNASSIGNED) 8952 return UNKNOWN; 8953 int index = Arrays.binarySearch(scriptStarts, codePoint); 8954 if (index < 0) 8955 index = -index - 2; 8956 return scripts[index]; 8957 } 8958 8959 /** 8960 * Returns the UnicodeScript constant with the given Unicode script 8961 * name or the script name alias. Script names and their aliases are 8962 * determined by The Unicode Standard. The files {@code Scripts.txt} 8963 * and {@code PropertyValueAliases.txt} define script names 8964 * and the script name aliases for a particular version of the 8965 * standard. The {@link Character} class specifies the version of 8966 * the standard that it supports. 8967 * <p> 8968 * Character case is ignored for all of the valid script names. 8969 * The en_US locale's case mapping rules are used to provide 8970 * case-insensitive string comparisons for script name validation. 8971 * 8972 * @param scriptName A {@code UnicodeScript} name. 8973 * @return The {@code UnicodeScript} constant identified 8974 * by {@code scriptName} 8975 * @throws IllegalArgumentException if {@code scriptName} is an 8976 * invalid name 8977 * @throws NullPointerException if {@code scriptName} is null 8978 */ forName(String scriptName)8979 public static final UnicodeScript forName(String scriptName) { 8980 scriptName = scriptName.toUpperCase(Locale.ENGLISH); 8981 //.replace(' ', '_')); 8982 UnicodeScript sc = aliases.get(scriptName); 8983 if (sc != null) 8984 return sc; 8985 return valueOf(scriptName); 8986 } 8987 } 8988 8989 /** 8990 * The value of the {@code Character}. 8991 * 8992 * @serial 8993 */ 8994 private final char value; 8995 8996 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 8997 @java.io.Serial 8998 private static final long serialVersionUID = 3786198910865385080L; 8999 9000 /** 9001 * Constructs a newly allocated {@code Character} object that 9002 * represents the specified {@code char} value. 9003 * 9004 * @param value the value to be represented by the 9005 * {@code Character} object. 9006 * 9007 * @deprecated 9008 * It is rarely appropriate to use this constructor. The static factory 9009 * {@link #valueOf(char)} is generally a better choice, as it is 9010 * likely to yield significantly better space and time performance. 9011 */ 9012 // Android-changed: not yet forRemoval on Android. 9013 @Deprecated(since="9"/*, forRemoval = true*/) Character(char value)9014 public Character(char value) { 9015 this.value = value; 9016 } 9017 9018 private static final class CharacterCache { CharacterCache()9019 private CharacterCache(){} 9020 9021 @Stable 9022 static final Character[] cache; 9023 static Character[] archivedCache; 9024 9025 static { 9026 int size = 127 + 1; 9027 9028 // Load and use the archived cache if it exists 9029 // Android-removed: CDS is not used on Android. 9030 // CDS.initializeFromArchive(CharacterCache.class); 9031 if (archivedCache == null || archivedCache.length != size) { 9032 Character[] c = new Character[size]; 9033 for (int i = 0; i < size; i++) { 9034 c[i] = new Character((char) i); 9035 } 9036 archivedCache = c; 9037 } 9038 cache = archivedCache; 9039 } 9040 } 9041 9042 /** 9043 * Returns a {@code Character} instance representing the specified 9044 * {@code char} value. 9045 * If a new {@code Character} instance is not required, this method 9046 * should generally be used in preference to the constructor 9047 * {@link #Character(char)}, as this method is likely to yield 9048 * significantly better space and time performance by caching 9049 * frequently requested values. 9050 * 9051 * This method will always cache values in the range {@code 9052 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may 9053 * cache other values outside of this range. 9054 * 9055 * @param c a char value. 9056 * @return a {@code Character} instance representing {@code c}. 9057 * @since 1.5 9058 */ 9059 @IntrinsicCandidate valueOf(char c)9060 public static Character valueOf(char c) { 9061 if (c <= 127) { // must cache 9062 return CharacterCache.cache[(int)c]; 9063 } 9064 return new Character(c); 9065 } 9066 9067 /** 9068 * Returns the value of this {@code Character} object. 9069 * @return the primitive {@code char} value represented by 9070 * this object. 9071 */ 9072 @IntrinsicCandidate charValue()9073 public char charValue() { 9074 return value; 9075 } 9076 9077 /** 9078 * Returns a hash code for this {@code Character}; equal to the result 9079 * of invoking {@code charValue()}. 9080 * 9081 * @return a hash code value for this {@code Character} 9082 */ 9083 @Override hashCode()9084 public int hashCode() { 9085 return Character.hashCode(value); 9086 } 9087 9088 /** 9089 * Returns a hash code for a {@code char} value; compatible with 9090 * {@code Character.hashCode()}. 9091 * 9092 * @since 1.8 9093 * 9094 * @param value The {@code char} for which to return a hash code. 9095 * @return a hash code value for a {@code char} value. 9096 */ hashCode(char value)9097 public static int hashCode(char value) { 9098 return (int)value; 9099 } 9100 9101 /** 9102 * Compares this object against the specified object. 9103 * The result is {@code true} if and only if the argument is not 9104 * {@code null} and is a {@code Character} object that 9105 * represents the same {@code char} value as this object. 9106 * 9107 * @param obj the object to compare with. 9108 * @return {@code true} if the objects are the same; 9109 * {@code false} otherwise. 9110 */ equals(Object obj)9111 public boolean equals(Object obj) { 9112 if (obj instanceof Character) { 9113 return value == ((Character)obj).charValue(); 9114 } 9115 return false; 9116 } 9117 9118 /** 9119 * Returns a {@code String} object representing this 9120 * {@code Character}'s value. The result is a string of 9121 * length 1 whose sole component is the primitive 9122 * {@code char} value represented by this 9123 * {@code Character} object. 9124 * 9125 * @return a string representation of this object. 9126 */ 9127 @Override toString()9128 public String toString() { 9129 return String.valueOf(value); 9130 } 9131 9132 // Android-removed: reference to Character.toString(int) in javadoc. 9133 /** 9134 * Returns a {@code String} object representing the 9135 * specified {@code char}. The result is a string of length 9136 * 1 consisting solely of the specified {@code char}. 9137 * 9138 * @param c the {@code char} to be converted 9139 * @return the string representation of the specified {@code char} 9140 * @since 1.4 9141 */ toString(char c)9142 public static String toString(char c) { 9143 return String.valueOf(c); 9144 } 9145 9146 // BEGIN Android-removed: expose after String.valueOfCodePoint() is imported. 9147 /** 9148 * Returns a {@code String} object representing the 9149 * specified character (Unicode code point). The result is a string of 9150 * length 1 or 2, consisting solely of the specified {@code codePoint}. 9151 * 9152 * @param codePoint the {@code codePoint} to be converted 9153 * @return the string representation of the specified {@code codePoint} 9154 * @throws IllegalArgumentException if the specified 9155 * {@code codePoint} is not a {@linkplain #isValidCodePoint 9156 * valid Unicode code point}. 9157 * @since 11 9158 */ toString(int codePoint)9159 public static String toString(int codePoint) { 9160 return String.valueOfCodePoint(codePoint); 9161 } 9162 9163 /** 9164 * Determines whether the specified code point is a valid 9165 * <a href="http://www.unicode.org/glossary/#code_point"> 9166 * Unicode code point value</a>. 9167 * 9168 * @param codePoint the Unicode code point to be tested 9169 * @return {@code true} if the specified code point value is between 9170 * {@link #MIN_CODE_POINT} and 9171 * {@link #MAX_CODE_POINT} inclusive; 9172 * {@code false} otherwise. 9173 * @since 1.5 9174 */ isValidCodePoint(int codePoint)9175 public static boolean isValidCodePoint(int codePoint) { 9176 // Optimized form of: 9177 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT 9178 int plane = codePoint >>> 16; 9179 return plane < ((MAX_CODE_POINT + 1) >>> 16); 9180 } 9181 9182 /** 9183 * Determines whether the specified character (Unicode code point) 9184 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>. 9185 * Such code points can be represented using a single {@code char}. 9186 * 9187 * @param codePoint the character (Unicode code point) to be to 9188 * @return {@code true} if the specified code point is between 9189 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive; 9190 * {@code false} otherwise. 9191 * @since 1.7 9192 */ isBmpCodePoint(int codePoint)9193 public static boolean isBmpCodePoint(int codePoint) { 9194 return codePoint >>> 16 == 0; 9195 // Optimized form of: 9196 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE 9197 // We consistently use logical shift (>>>) to facilitate 9198 // additional runtime optimizations. 9199 } 9200 9201 /** 9202 * Determines whether the specified character (Unicode code point) 9203 * is in the <a href="#supplementary">supplementary character</a> range. 9204 * 9205 * @param codePoint the character (Unicode code point) to be tested 9206 * @return {@code true} if the specified code point is between 9207 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 9208 * {@link #MAX_CODE_POINT} inclusive; 9209 * {@code false} otherwise. 9210 * @since 1.5 9211 */ isSupplementaryCodePoint(int codePoint)9212 public static boolean isSupplementaryCodePoint(int codePoint) { 9213 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 9214 && codePoint < MAX_CODE_POINT + 1; 9215 } 9216 9217 /** 9218 * Determines if the given {@code char} value is a 9219 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 9220 * Unicode high-surrogate code unit</a> 9221 * (also known as <i>leading-surrogate code unit</i>). 9222 * 9223 * <p>Such values do not represent characters by themselves, 9224 * but are used in the representation of 9225 * <a href="#supplementary">supplementary characters</a> 9226 * in the UTF-16 encoding. 9227 * 9228 * @param ch the {@code char} value to be tested. 9229 * @return {@code true} if the {@code char} value is between 9230 * {@link #MIN_HIGH_SURROGATE} and 9231 * {@link #MAX_HIGH_SURROGATE} inclusive; 9232 * {@code false} otherwise. 9233 * @see Character#isLowSurrogate(char) 9234 * @see Character.UnicodeBlock#of(int) 9235 * @since 1.5 9236 */ isHighSurrogate(char ch)9237 public static boolean isHighSurrogate(char ch) { 9238 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE 9239 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); 9240 } 9241 9242 /** 9243 * Determines if the given {@code char} value is a 9244 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 9245 * Unicode low-surrogate code unit</a> 9246 * (also known as <i>trailing-surrogate code unit</i>). 9247 * 9248 * <p>Such values do not represent characters by themselves, 9249 * but are used in the representation of 9250 * <a href="#supplementary">supplementary characters</a> 9251 * in the UTF-16 encoding. 9252 * 9253 * @param ch the {@code char} value to be tested. 9254 * @return {@code true} if the {@code char} value is between 9255 * {@link #MIN_LOW_SURROGATE} and 9256 * {@link #MAX_LOW_SURROGATE} inclusive; 9257 * {@code false} otherwise. 9258 * @see Character#isHighSurrogate(char) 9259 * @since 1.5 9260 */ isLowSurrogate(char ch)9261 public static boolean isLowSurrogate(char ch) { 9262 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); 9263 } 9264 9265 /** 9266 * Determines if the given {@code char} value is a Unicode 9267 * <i>surrogate code unit</i>. 9268 * 9269 * <p>Such values do not represent characters by themselves, 9270 * but are used in the representation of 9271 * <a href="#supplementary">supplementary characters</a> 9272 * in the UTF-16 encoding. 9273 * 9274 * <p>A char value is a surrogate code unit if and only if it is either 9275 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 9276 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 9277 * 9278 * @param ch the {@code char} value to be tested. 9279 * @return {@code true} if the {@code char} value is between 9280 * {@link #MIN_SURROGATE} and 9281 * {@link #MAX_SURROGATE} inclusive; 9282 * {@code false} otherwise. 9283 * @since 1.7 9284 */ isSurrogate(char ch)9285 public static boolean isSurrogate(char ch) { 9286 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); 9287 } 9288 9289 /** 9290 * Determines whether the specified pair of {@code char} 9291 * values is a valid 9292 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9293 * Unicode surrogate pair</a>. 9294 * 9295 * <p>This method is equivalent to the expression: 9296 * <blockquote><pre>{@code 9297 * isHighSurrogate(high) && isLowSurrogate(low) 9298 * }</pre></blockquote> 9299 * 9300 * @param high the high-surrogate code value to be tested 9301 * @param low the low-surrogate code value to be tested 9302 * @return {@code true} if the specified high and 9303 * low-surrogate code values represent a valid surrogate pair; 9304 * {@code false} otherwise. 9305 * @since 1.5 9306 */ isSurrogatePair(char high, char low)9307 public static boolean isSurrogatePair(char high, char low) { 9308 return isHighSurrogate(high) && isLowSurrogate(low); 9309 } 9310 9311 /** 9312 * Determines the number of {@code char} values needed to 9313 * represent the specified character (Unicode code point). If the 9314 * specified character is equal to or greater than 0x10000, then 9315 * the method returns 2. Otherwise, the method returns 1. 9316 * 9317 * <p>This method doesn't validate the specified character to be a 9318 * valid Unicode code point. The caller must validate the 9319 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 9320 * if necessary. 9321 * 9322 * @param codePoint the character (Unicode code point) to be tested. 9323 * @return 2 if the character is a valid supplementary character; 1 otherwise. 9324 * @see Character#isSupplementaryCodePoint(int) 9325 * @since 1.5 9326 */ charCount(int codePoint)9327 public static int charCount(int codePoint) { 9328 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; 9329 } 9330 9331 /** 9332 * Converts the specified surrogate pair to its supplementary code 9333 * point value. This method does not validate the specified 9334 * surrogate pair. The caller must validate it using {@link 9335 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 9336 * 9337 * @param high the high-surrogate code unit 9338 * @param low the low-surrogate code unit 9339 * @return the supplementary code point composed from the 9340 * specified surrogate pair. 9341 * @since 1.5 9342 */ toCodePoint(char high, char low)9343 public static int toCodePoint(char high, char low) { 9344 // Optimized form of: 9345 // return ((high - MIN_HIGH_SURROGATE) << 10) 9346 // + (low - MIN_LOW_SURROGATE) 9347 // + MIN_SUPPLEMENTARY_CODE_POINT; 9348 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 9349 - (MIN_HIGH_SURROGATE << 10) 9350 - MIN_LOW_SURROGATE); 9351 } 9352 9353 /** 9354 * Returns the code point at the given index of the 9355 * {@code CharSequence}. If the {@code char} value at 9356 * the given index in the {@code CharSequence} is in the 9357 * high-surrogate range, the following index is less than the 9358 * length of the {@code CharSequence}, and the 9359 * {@code char} value at the following index is in the 9360 * low-surrogate range, then the supplementary code point 9361 * corresponding to this surrogate pair is returned. Otherwise, 9362 * the {@code char} value at the given index is returned. 9363 * 9364 * @param seq a sequence of {@code char} values (Unicode code 9365 * units) 9366 * @param index the index to the {@code char} values (Unicode 9367 * code units) in {@code seq} to be converted 9368 * @return the Unicode code point at the given index 9369 * @throws NullPointerException if {@code seq} is null. 9370 * @throws IndexOutOfBoundsException if the value 9371 * {@code index} is negative or not less than 9372 * {@link CharSequence#length() seq.length()}. 9373 * @since 1.5 9374 */ codePointAt(CharSequence seq, int index)9375 public static int codePointAt(CharSequence seq, int index) { 9376 char c1 = seq.charAt(index); 9377 if (isHighSurrogate(c1) && ++index < seq.length()) { 9378 char c2 = seq.charAt(index); 9379 if (isLowSurrogate(c2)) { 9380 return toCodePoint(c1, c2); 9381 } 9382 } 9383 return c1; 9384 } 9385 9386 /** 9387 * Returns the code point at the given index of the 9388 * {@code char} array. If the {@code char} value at 9389 * the given index in the {@code char} array is in the 9390 * high-surrogate range, the following index is less than the 9391 * length of the {@code char} array, and the 9392 * {@code char} value at the following index is in the 9393 * low-surrogate range, then the supplementary code point 9394 * corresponding to this surrogate pair is returned. Otherwise, 9395 * the {@code char} value at the given index is returned. 9396 * 9397 * @param a the {@code char} array 9398 * @param index the index to the {@code char} values (Unicode 9399 * code units) in the {@code char} array to be converted 9400 * @return the Unicode code point at the given index 9401 * @throws NullPointerException if {@code a} is null. 9402 * @throws IndexOutOfBoundsException if the value 9403 * {@code index} is negative or not less than 9404 * the length of the {@code char} array. 9405 * @since 1.5 9406 */ codePointAt(char[] a, int index)9407 public static int codePointAt(char[] a, int index) { 9408 return codePointAtImpl(a, index, a.length); 9409 } 9410 9411 /** 9412 * Returns the code point at the given index of the 9413 * {@code char} array, where only array elements with 9414 * {@code index} less than {@code limit} can be used. If 9415 * the {@code char} value at the given index in the 9416 * {@code char} array is in the high-surrogate range, the 9417 * following index is less than the {@code limit}, and the 9418 * {@code char} value at the following index is in the 9419 * low-surrogate range, then the supplementary code point 9420 * corresponding to this surrogate pair is returned. Otherwise, 9421 * the {@code char} value at the given index is returned. 9422 * 9423 * @param a the {@code char} array 9424 * @param index the index to the {@code char} values (Unicode 9425 * code units) in the {@code char} array to be converted 9426 * @param limit the index after the last array element that 9427 * can be used in the {@code char} array 9428 * @return the Unicode code point at the given index 9429 * @throws NullPointerException if {@code a} is null. 9430 * @throws IndexOutOfBoundsException if the {@code index} 9431 * argument is negative or not less than the {@code limit} 9432 * argument, or if the {@code limit} argument is negative or 9433 * greater than the length of the {@code char} array. 9434 * @since 1.5 9435 */ codePointAt(char[] a, int index, int limit)9436 public static int codePointAt(char[] a, int index, int limit) { 9437 if (index >= limit || index < 0 || limit > a.length) { 9438 throw new IndexOutOfBoundsException(); 9439 } 9440 return codePointAtImpl(a, index, limit); 9441 } 9442 9443 // throws ArrayIndexOutOfBoundsException if index out of bounds codePointAtImpl(char[] a, int index, int limit)9444 static int codePointAtImpl(char[] a, int index, int limit) { 9445 char c1 = a[index]; 9446 if (isHighSurrogate(c1) && ++index < limit) { 9447 char c2 = a[index]; 9448 if (isLowSurrogate(c2)) { 9449 return toCodePoint(c1, c2); 9450 } 9451 } 9452 return c1; 9453 } 9454 9455 /** 9456 * Returns the code point preceding the given index of the 9457 * {@code CharSequence}. If the {@code char} value at 9458 * {@code (index - 1)} in the {@code CharSequence} is in 9459 * the low-surrogate range, {@code (index - 2)} is not 9460 * negative, and the {@code char} value at {@code (index - 2)} 9461 * in the {@code CharSequence} is in the 9462 * high-surrogate range, then the supplementary code point 9463 * corresponding to this surrogate pair is returned. Otherwise, 9464 * the {@code char} value at {@code (index - 1)} is 9465 * returned. 9466 * 9467 * @param seq the {@code CharSequence} instance 9468 * @param index the index following the code point that should be returned 9469 * @return the Unicode code point value before the given index. 9470 * @throws NullPointerException if {@code seq} is null. 9471 * @throws IndexOutOfBoundsException if the {@code index} 9472 * argument is less than 1 or greater than {@link 9473 * CharSequence#length() seq.length()}. 9474 * @since 1.5 9475 */ codePointBefore(CharSequence seq, int index)9476 public static int codePointBefore(CharSequence seq, int index) { 9477 char c2 = seq.charAt(--index); 9478 if (isLowSurrogate(c2) && index > 0) { 9479 char c1 = seq.charAt(--index); 9480 if (isHighSurrogate(c1)) { 9481 return toCodePoint(c1, c2); 9482 } 9483 } 9484 return c2; 9485 } 9486 9487 /** 9488 * Returns the code point preceding the given index of the 9489 * {@code char} array. If the {@code char} value at 9490 * {@code (index - 1)} in the {@code char} array is in 9491 * the low-surrogate range, {@code (index - 2)} is not 9492 * negative, and the {@code char} value at {@code (index - 2)} 9493 * in the {@code char} array is in the 9494 * high-surrogate range, then the supplementary code point 9495 * corresponding to this surrogate pair is returned. Otherwise, 9496 * the {@code char} value at {@code (index - 1)} is 9497 * returned. 9498 * 9499 * @param a the {@code char} array 9500 * @param index the index following the code point that should be returned 9501 * @return the Unicode code point value before the given index. 9502 * @throws NullPointerException if {@code a} is null. 9503 * @throws IndexOutOfBoundsException if the {@code index} 9504 * argument is less than 1 or greater than the length of the 9505 * {@code char} array 9506 * @since 1.5 9507 */ codePointBefore(char[] a, int index)9508 public static int codePointBefore(char[] a, int index) { 9509 return codePointBeforeImpl(a, index, 0); 9510 } 9511 9512 /** 9513 * Returns the code point preceding the given index of the 9514 * {@code char} array, where only array elements with 9515 * {@code index} greater than or equal to {@code start} 9516 * can be used. If the {@code char} value at {@code (index - 1)} 9517 * in the {@code char} array is in the 9518 * low-surrogate range, {@code (index - 2)} is not less than 9519 * {@code start}, and the {@code char} value at 9520 * {@code (index - 2)} in the {@code char} array is in 9521 * the high-surrogate range, then the supplementary code point 9522 * corresponding to this surrogate pair is returned. Otherwise, 9523 * the {@code char} value at {@code (index - 1)} is 9524 * returned. 9525 * 9526 * @param a the {@code char} array 9527 * @param index the index following the code point that should be returned 9528 * @param start the index of the first array element in the 9529 * {@code char} array 9530 * @return the Unicode code point value before the given index. 9531 * @throws NullPointerException if {@code a} is null. 9532 * @throws IndexOutOfBoundsException if the {@code index} 9533 * argument is not greater than the {@code start} argument or 9534 * is greater than the length of the {@code char} array, or 9535 * if the {@code start} argument is negative or not less than 9536 * the length of the {@code char} array. 9537 * @since 1.5 9538 */ codePointBefore(char[] a, int index, int start)9539 public static int codePointBefore(char[] a, int index, int start) { 9540 if (index <= start || start < 0 || index > a.length) { 9541 throw new IndexOutOfBoundsException(); 9542 } 9543 return codePointBeforeImpl(a, index, start); 9544 } 9545 9546 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds codePointBeforeImpl(char[] a, int index, int start)9547 static int codePointBeforeImpl(char[] a, int index, int start) { 9548 char c2 = a[--index]; 9549 if (isLowSurrogate(c2) && index > start) { 9550 char c1 = a[--index]; 9551 if (isHighSurrogate(c1)) { 9552 return toCodePoint(c1, c2); 9553 } 9554 } 9555 return c2; 9556 } 9557 9558 /** 9559 * Returns the leading surrogate (a 9560 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 9561 * high surrogate code unit</a>) of the 9562 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9563 * surrogate pair</a> 9564 * representing the specified supplementary character (Unicode 9565 * code point) in the UTF-16 encoding. If the specified character 9566 * is not a 9567 * <a href="Character.html#supplementary">supplementary character</a>, 9568 * an unspecified {@code char} is returned. 9569 * 9570 * <p>If 9571 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 9572 * is {@code true}, then 9573 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and 9574 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x} 9575 * are also always {@code true}. 9576 * 9577 * @param codePoint a supplementary character (Unicode code point) 9578 * @return the leading surrogate code unit used to represent the 9579 * character in the UTF-16 encoding 9580 * @since 1.7 9581 */ highSurrogate(int codePoint)9582 public static char highSurrogate(int codePoint) { 9583 return (char) ((codePoint >>> 10) 9584 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 9585 } 9586 9587 /** 9588 * Returns the trailing surrogate (a 9589 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 9590 * low surrogate code unit</a>) of the 9591 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9592 * surrogate pair</a> 9593 * representing the specified supplementary character (Unicode 9594 * code point) in the UTF-16 encoding. If the specified character 9595 * is not a 9596 * <a href="Character.html#supplementary">supplementary character</a>, 9597 * an unspecified {@code char} is returned. 9598 * 9599 * <p>If 9600 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 9601 * is {@code true}, then 9602 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and 9603 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x} 9604 * are also always {@code true}. 9605 * 9606 * @param codePoint a supplementary character (Unicode code point) 9607 * @return the trailing surrogate code unit used to represent the 9608 * character in the UTF-16 encoding 9609 * @since 1.7 9610 */ lowSurrogate(int codePoint)9611 public static char lowSurrogate(int codePoint) { 9612 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 9613 } 9614 9615 /** 9616 * Converts the specified character (Unicode code point) to its 9617 * UTF-16 representation. If the specified code point is a BMP 9618 * (Basic Multilingual Plane or Plane 0) value, the same value is 9619 * stored in {@code dst[dstIndex]}, and 1 is returned. If the 9620 * specified code point is a supplementary character, its 9621 * surrogate values are stored in {@code dst[dstIndex]} 9622 * (high-surrogate) and {@code dst[dstIndex+1]} 9623 * (low-surrogate), and 2 is returned. 9624 * 9625 * @param codePoint the character (Unicode code point) to be converted. 9626 * @param dst an array of {@code char} in which the 9627 * {@code codePoint}'s UTF-16 value is stored. 9628 * @param dstIndex the start index into the {@code dst} 9629 * array where the converted value is stored. 9630 * @return 1 if the code point is a BMP code point, 2 if the 9631 * code point is a supplementary code point. 9632 * @throws IllegalArgumentException if the specified 9633 * {@code codePoint} is not a valid Unicode code point. 9634 * @throws NullPointerException if the specified {@code dst} is null. 9635 * @throws IndexOutOfBoundsException if {@code dstIndex} 9636 * is negative or not less than {@code dst.length}, or if 9637 * {@code dst} at {@code dstIndex} doesn't have enough 9638 * array element(s) to store the resulting {@code char} 9639 * value(s). (If {@code dstIndex} is equal to 9640 * {@code dst.length-1} and the specified 9641 * {@code codePoint} is a supplementary character, the 9642 * high-surrogate value is not stored in 9643 * {@code dst[dstIndex]}.) 9644 * @since 1.5 9645 */ toChars(int codePoint, char[] dst, int dstIndex)9646 public static int toChars(int codePoint, char[] dst, int dstIndex) { 9647 if (isBmpCodePoint(codePoint)) { 9648 dst[dstIndex] = (char) codePoint; 9649 return 1; 9650 } else if (isValidCodePoint(codePoint)) { 9651 toSurrogates(codePoint, dst, dstIndex); 9652 return 2; 9653 } else { 9654 throw new IllegalArgumentException( 9655 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9656 } 9657 } 9658 9659 /** 9660 * Converts the specified character (Unicode code point) to its 9661 * UTF-16 representation stored in a {@code char} array. If 9662 * the specified code point is a BMP (Basic Multilingual Plane or 9663 * Plane 0) value, the resulting {@code char} array has 9664 * the same value as {@code codePoint}. If the specified code 9665 * point is a supplementary code point, the resulting 9666 * {@code char} array has the corresponding surrogate pair. 9667 * 9668 * @param codePoint a Unicode code point 9669 * @return a {@code char} array having 9670 * {@code codePoint}'s UTF-16 representation. 9671 * @throws IllegalArgumentException if the specified 9672 * {@code codePoint} is not a valid Unicode code point. 9673 * @since 1.5 9674 */ toChars(int codePoint)9675 public static char[] toChars(int codePoint) { 9676 if (isBmpCodePoint(codePoint)) { 9677 return new char[] { (char) codePoint }; 9678 } else if (isValidCodePoint(codePoint)) { 9679 char[] result = new char[2]; 9680 toSurrogates(codePoint, result, 0); 9681 return result; 9682 } else { 9683 throw new IllegalArgumentException( 9684 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9685 } 9686 } 9687 toSurrogates(int codePoint, char[] dst, int index)9688 static void toSurrogates(int codePoint, char[] dst, int index) { 9689 // We write elements "backwards" to guarantee all-or-nothing 9690 dst[index+1] = lowSurrogate(codePoint); 9691 dst[index] = highSurrogate(codePoint); 9692 } 9693 9694 /** 9695 * Returns the number of Unicode code points in the text range of 9696 * the specified char sequence. The text range begins at the 9697 * specified {@code beginIndex} and extends to the 9698 * {@code char} at index {@code endIndex - 1}. Thus the 9699 * length (in {@code char}s) of the text range is 9700 * {@code endIndex-beginIndex}. Unpaired surrogates within 9701 * the text range count as one code point each. 9702 * 9703 * @param seq the char sequence 9704 * @param beginIndex the index to the first {@code char} of 9705 * the text range. 9706 * @param endIndex the index after the last {@code char} of 9707 * the text range. 9708 * @return the number of Unicode code points in the specified text 9709 * range 9710 * @throws NullPointerException if {@code seq} is null. 9711 * @throws IndexOutOfBoundsException if the 9712 * {@code beginIndex} is negative, or {@code endIndex} 9713 * is larger than the length of the given sequence, or 9714 * {@code beginIndex} is larger than {@code endIndex}. 9715 * @since 1.5 9716 */ codePointCount(CharSequence seq, int beginIndex, int endIndex)9717 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 9718 Objects.checkFromToIndex(beginIndex, endIndex, seq.length()); 9719 int n = endIndex - beginIndex; 9720 for (int i = beginIndex; i < endIndex; ) { 9721 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex && 9722 isLowSurrogate(seq.charAt(i))) { 9723 n--; 9724 i++; 9725 } 9726 } 9727 return n; 9728 } 9729 9730 /** 9731 * Returns the number of Unicode code points in a subarray of the 9732 * {@code char} array argument. The {@code offset} 9733 * argument is the index of the first {@code char} of the 9734 * subarray and the {@code count} argument specifies the 9735 * length of the subarray in {@code char}s. Unpaired 9736 * surrogates within the subarray count as one code point each. 9737 * 9738 * @param a the {@code char} array 9739 * @param offset the index of the first {@code char} in the 9740 * given {@code char} array 9741 * @param count the length of the subarray in {@code char}s 9742 * @return the number of Unicode code points in the specified subarray 9743 * @throws NullPointerException if {@code a} is null. 9744 * @throws IndexOutOfBoundsException if {@code offset} or 9745 * {@code count} is negative, or if {@code offset + 9746 * count} is larger than the length of the given array. 9747 * @since 1.5 9748 */ codePointCount(char[] a, int offset, int count)9749 public static int codePointCount(char[] a, int offset, int count) { 9750 Objects.checkFromIndexSize(offset, count, a.length); 9751 return codePointCountImpl(a, offset, count); 9752 } 9753 codePointCountImpl(char[] a, int offset, int count)9754 static int codePointCountImpl(char[] a, int offset, int count) { 9755 int endIndex = offset + count; 9756 int n = count; 9757 for (int i = offset; i < endIndex; ) { 9758 if (isHighSurrogate(a[i++]) && i < endIndex && 9759 isLowSurrogate(a[i])) { 9760 n--; 9761 i++; 9762 } 9763 } 9764 return n; 9765 } 9766 9767 /** 9768 * Returns the index within the given char sequence that is offset 9769 * from the given {@code index} by {@code codePointOffset} 9770 * code points. Unpaired surrogates within the text range given by 9771 * {@code index} and {@code codePointOffset} count as 9772 * one code point each. 9773 * 9774 * @param seq the char sequence 9775 * @param index the index to be offset 9776 * @param codePointOffset the offset in code points 9777 * @return the index within the char sequence 9778 * @throws NullPointerException if {@code seq} is null. 9779 * @throws IndexOutOfBoundsException if {@code index} 9780 * is negative or larger than the length of the char sequence, 9781 * or if {@code codePointOffset} is positive and the 9782 * subsequence starting with {@code index} has fewer than 9783 * {@code codePointOffset} code points, or if 9784 * {@code codePointOffset} is negative and the subsequence 9785 * before {@code index} has fewer than the absolute value 9786 * of {@code codePointOffset} code points. 9787 * @since 1.5 9788 */ offsetByCodePoints(CharSequence seq, int index, int codePointOffset)9789 public static int offsetByCodePoints(CharSequence seq, int index, 9790 int codePointOffset) { 9791 int length = seq.length(); 9792 if (index < 0 || index > length) { 9793 throw new IndexOutOfBoundsException(); 9794 } 9795 9796 int x = index; 9797 if (codePointOffset >= 0) { 9798 int i; 9799 for (i = 0; x < length && i < codePointOffset; i++) { 9800 if (isHighSurrogate(seq.charAt(x++)) && x < length && 9801 isLowSurrogate(seq.charAt(x))) { 9802 x++; 9803 } 9804 } 9805 if (i < codePointOffset) { 9806 throw new IndexOutOfBoundsException(); 9807 } 9808 } else { 9809 int i; 9810 for (i = codePointOffset; x > 0 && i < 0; i++) { 9811 if (isLowSurrogate(seq.charAt(--x)) && x > 0 && 9812 isHighSurrogate(seq.charAt(x-1))) { 9813 x--; 9814 } 9815 } 9816 if (i < 0) { 9817 throw new IndexOutOfBoundsException(); 9818 } 9819 } 9820 return x; 9821 } 9822 9823 /** 9824 * Returns the index within the given {@code char} subarray 9825 * that is offset from the given {@code index} by 9826 * {@code codePointOffset} code points. The 9827 * {@code start} and {@code count} arguments specify a 9828 * subarray of the {@code char} array. Unpaired surrogates 9829 * within the text range given by {@code index} and 9830 * {@code codePointOffset} count as one code point each. 9831 * 9832 * @param a the {@code char} array 9833 * @param start the index of the first {@code char} of the 9834 * subarray 9835 * @param count the length of the subarray in {@code char}s 9836 * @param index the index to be offset 9837 * @param codePointOffset the offset in code points 9838 * @return the index within the subarray 9839 * @throws NullPointerException if {@code a} is null. 9840 * @throws IndexOutOfBoundsException 9841 * if {@code start} or {@code count} is negative, 9842 * or if {@code start + count} is larger than the length of 9843 * the given array, 9844 * or if {@code index} is less than {@code start} or 9845 * larger then {@code start + count}, 9846 * or if {@code codePointOffset} is positive and the text range 9847 * starting with {@code index} and ending with {@code start + count - 1} 9848 * has fewer than {@code codePointOffset} code 9849 * points, 9850 * or if {@code codePointOffset} is negative and the text range 9851 * starting with {@code start} and ending with {@code index - 1} 9852 * has fewer than the absolute value of 9853 * {@code codePointOffset} code points. 9854 * @since 1.5 9855 */ offsetByCodePoints(char[] a, int start, int count, int index, int codePointOffset)9856 public static int offsetByCodePoints(char[] a, int start, int count, 9857 int index, int codePointOffset) { 9858 if (count > a.length-start || start < 0 || count < 0 9859 || index < start || index > start+count) { 9860 throw new IndexOutOfBoundsException(); 9861 } 9862 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 9863 } 9864 offsetByCodePointsImpl(char[]a, int start, int count, int index, int codePointOffset)9865 static int offsetByCodePointsImpl(char[]a, int start, int count, 9866 int index, int codePointOffset) { 9867 int x = index; 9868 if (codePointOffset >= 0) { 9869 int limit = start + count; 9870 int i; 9871 for (i = 0; x < limit && i < codePointOffset; i++) { 9872 if (isHighSurrogate(a[x++]) && x < limit && 9873 isLowSurrogate(a[x])) { 9874 x++; 9875 } 9876 } 9877 if (i < codePointOffset) { 9878 throw new IndexOutOfBoundsException(); 9879 } 9880 } else { 9881 int i; 9882 for (i = codePointOffset; x > start && i < 0; i++) { 9883 if (isLowSurrogate(a[--x]) && x > start && 9884 isHighSurrogate(a[x-1])) { 9885 x--; 9886 } 9887 } 9888 if (i < 0) { 9889 throw new IndexOutOfBoundsException(); 9890 } 9891 } 9892 return x; 9893 } 9894 9895 /** 9896 * Determines if the specified character is a lowercase character. 9897 * <p> 9898 * A character is lowercase if its general category type, provided 9899 * by {@code Character.getType(ch)}, is 9900 * {@code LOWERCASE_LETTER}, or it has contributory property 9901 * Other_Lowercase as defined by the Unicode Standard. 9902 * <p> 9903 * The following are examples of lowercase characters: 9904 * <blockquote><pre> 9905 * a b c d e f g h i j k l m n o p q r s t u v w x y z 9906 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 9907 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 9908 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 9909 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 9910 * </pre></blockquote> 9911 * <p> Many other Unicode characters are lowercase too. 9912 * 9913 * <p><b>Note:</b> This method cannot handle <a 9914 * href="#supplementary"> supplementary characters</a>. To support 9915 * all Unicode characters, including supplementary characters, use 9916 * the {@link #isLowerCase(int)} method. 9917 * 9918 * @param ch the character to be tested. 9919 * @return {@code true} if the character is lowercase; 9920 * {@code false} otherwise. 9921 * @see Character#isLowerCase(char) 9922 * @see Character#isTitleCase(char) 9923 * @see Character#toLowerCase(char) 9924 * @see Character#getType(char) 9925 */ isLowerCase(char ch)9926 public static boolean isLowerCase(char ch) { 9927 return isLowerCase((int)ch); 9928 } 9929 9930 /** 9931 * Determines if the specified character (Unicode code point) is a 9932 * lowercase character. 9933 * <p> 9934 * A character is lowercase if its general category type, provided 9935 * by {@link Character#getType getType(codePoint)}, is 9936 * {@code LOWERCASE_LETTER}, or it has contributory property 9937 * Other_Lowercase as defined by the Unicode Standard. 9938 * <p> 9939 * The following are examples of lowercase characters: 9940 * <blockquote><pre> 9941 * a b c d e f g h i j k l m n o p q r s t u v w x y z 9942 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 9943 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 9944 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 9945 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 9946 * </pre></blockquote> 9947 * <p> Many other Unicode characters are lowercase too. 9948 * 9949 * @param codePoint the character (Unicode code point) to be tested. 9950 * @return {@code true} if the character is lowercase; 9951 * {@code false} otherwise. 9952 * @see Character#isLowerCase(int) 9953 * @see Character#isTitleCase(int) 9954 * @see Character#toLowerCase(int) 9955 * @see Character#getType(int) 9956 * @since 1.5 9957 */ 9958 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 9959 /* 9960 public static boolean isLowerCase(int codePoint) { 9961 return CharacterData.of(codePoint).isLowerCase(codePoint); 9962 } 9963 */ isLowerCase(int codePoint)9964 public static boolean isLowerCase(int codePoint) { 9965 return isLowerCaseImpl(codePoint); 9966 } 9967 9968 @FastNative isLowerCaseImpl(int codePoint)9969 static native boolean isLowerCaseImpl(int codePoint); 9970 // END Android-changed: Reimplement methods natively on top of ICU4C. 9971 9972 /** 9973 * Determines if the specified character is an uppercase character. 9974 * <p> 9975 * A character is uppercase if its general category type, provided by 9976 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}. 9977 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 9978 * <p> 9979 * The following are examples of uppercase characters: 9980 * <blockquote><pre> 9981 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 9982 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 9983 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 9984 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 9985 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 9986 * </pre></blockquote> 9987 * <p> Many other Unicode characters are uppercase too. 9988 * 9989 * <p><b>Note:</b> This method cannot handle <a 9990 * href="#supplementary"> supplementary characters</a>. To support 9991 * all Unicode characters, including supplementary characters, use 9992 * the {@link #isUpperCase(int)} method. 9993 * 9994 * @param ch the character to be tested. 9995 * @return {@code true} if the character is uppercase; 9996 * {@code false} otherwise. 9997 * @see Character#isLowerCase(char) 9998 * @see Character#isTitleCase(char) 9999 * @see Character#toUpperCase(char) 10000 * @see Character#getType(char) 10001 * @since 1.0 10002 */ isUpperCase(char ch)10003 public static boolean isUpperCase(char ch) { 10004 return isUpperCase((int)ch); 10005 } 10006 10007 /** 10008 * Determines if the specified character (Unicode code point) is an uppercase character. 10009 * <p> 10010 * A character is uppercase if its general category type, provided by 10011 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}, 10012 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 10013 * <p> 10014 * The following are examples of uppercase characters: 10015 * <blockquote><pre> 10016 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 10017 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 10018 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 10019 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 10020 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 10021 * </pre></blockquote> 10022 * <p> Many other Unicode characters are uppercase too. 10023 * 10024 * @param codePoint the character (Unicode code point) to be tested. 10025 * @return {@code true} if the character is uppercase; 10026 * {@code false} otherwise. 10027 * @see Character#isLowerCase(int) 10028 * @see Character#isTitleCase(int) 10029 * @see Character#toUpperCase(int) 10030 * @see Character#getType(int) 10031 * @since 1.5 10032 */ 10033 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10034 /* 10035 public static boolean isUpperCase(int codePoint) { 10036 return CharacterData.of(codePoint).isUpperCase(codePoint); 10037 } 10038 */ isUpperCase(int codePoint)10039 public static boolean isUpperCase(int codePoint) { 10040 return isUpperCaseImpl(codePoint); 10041 } 10042 10043 @FastNative isUpperCaseImpl(int codePoint)10044 static native boolean isUpperCaseImpl(int codePoint); 10045 // END Android-changed: Reimplement methods natively on top of ICU4C. 10046 10047 /** 10048 * Determines if the specified character is a titlecase character. 10049 * <p> 10050 * A character is a titlecase character if its general 10051 * category type, provided by {@code Character.getType(ch)}, 10052 * is {@code TITLECASE_LETTER}. 10053 * <p> 10054 * Some characters look like pairs of Latin letters. For example, there 10055 * is an uppercase letter that looks like "LJ" and has a corresponding 10056 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 10057 * is the appropriate form to use when rendering a word in lowercase 10058 * with initial capitals, as for a book title. 10059 * <p> 10060 * These are some of the Unicode characters for which this method returns 10061 * {@code true}: 10062 * <ul> 10063 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 10064 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 10065 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 10066 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 10067 * </ul> 10068 * <p> Many other Unicode characters are titlecase too. 10069 * 10070 * <p><b>Note:</b> This method cannot handle <a 10071 * href="#supplementary"> supplementary characters</a>. To support 10072 * all Unicode characters, including supplementary characters, use 10073 * the {@link #isTitleCase(int)} method. 10074 * 10075 * @param ch the character to be tested. 10076 * @return {@code true} if the character is titlecase; 10077 * {@code false} otherwise. 10078 * @see Character#isLowerCase(char) 10079 * @see Character#isUpperCase(char) 10080 * @see Character#toTitleCase(char) 10081 * @see Character#getType(char) 10082 * @since 1.0.2 10083 */ isTitleCase(char ch)10084 public static boolean isTitleCase(char ch) { 10085 return isTitleCase((int)ch); 10086 } 10087 10088 /** 10089 * Determines if the specified character (Unicode code point) is a titlecase character. 10090 * <p> 10091 * A character is a titlecase character if its general 10092 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 10093 * is {@code TITLECASE_LETTER}. 10094 * <p> 10095 * Some characters look like pairs of Latin letters. For example, there 10096 * is an uppercase letter that looks like "LJ" and has a corresponding 10097 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 10098 * is the appropriate form to use when rendering a word in lowercase 10099 * with initial capitals, as for a book title. 10100 * <p> 10101 * These are some of the Unicode characters for which this method returns 10102 * {@code true}: 10103 * <ul> 10104 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 10105 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 10106 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 10107 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 10108 * </ul> 10109 * <p> Many other Unicode characters are titlecase too. 10110 * 10111 * @param codePoint the character (Unicode code point) to be tested. 10112 * @return {@code true} if the character is titlecase; 10113 * {@code false} otherwise. 10114 * @see Character#isLowerCase(int) 10115 * @see Character#isUpperCase(int) 10116 * @see Character#toTitleCase(int) 10117 * @see Character#getType(int) 10118 * @since 1.5 10119 */ 10120 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10121 /* 10122 public static boolean isTitleCase(int codePoint) { 10123 return getType(codePoint) == Character.TITLECASE_LETTER; 10124 } 10125 */ isTitleCase(int codePoint)10126 public static boolean isTitleCase(int codePoint) { 10127 return isTitleCaseImpl(codePoint); 10128 } 10129 10130 @FastNative isTitleCaseImpl(int codePoint)10131 static native boolean isTitleCaseImpl(int codePoint); 10132 // END Android-changed: Reimplement methods natively on top of ICU4C. 10133 10134 /** 10135 * Determines if the specified character is a digit. 10136 * <p> 10137 * A character is a digit if its general category type, provided 10138 * by {@code Character.getType(ch)}, is 10139 * {@code DECIMAL_DIGIT_NUMBER}. 10140 * <p> 10141 * Some Unicode character ranges that contain digits: 10142 * <ul> 10143 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 10144 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 10145 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 10146 * Arabic-Indic digits 10147 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 10148 * Extended Arabic-Indic digits 10149 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 10150 * Devanagari digits 10151 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 10152 * Fullwidth digits 10153 * </ul> 10154 * 10155 * Many other character ranges contain digits as well. 10156 * 10157 * <p><b>Note:</b> This method cannot handle <a 10158 * href="#supplementary"> supplementary characters</a>. To support 10159 * all Unicode characters, including supplementary characters, use 10160 * the {@link #isDigit(int)} method. 10161 * 10162 * @param ch the character to be tested. 10163 * @return {@code true} if the character is a digit; 10164 * {@code false} otherwise. 10165 * @see Character#digit(char, int) 10166 * @see Character#forDigit(int, int) 10167 * @see Character#getType(char) 10168 */ isDigit(char ch)10169 public static boolean isDigit(char ch) { 10170 return isDigit((int)ch); 10171 } 10172 10173 /** 10174 * Determines if the specified character (Unicode code point) is a digit. 10175 * <p> 10176 * A character is a digit if its general category type, provided 10177 * by {@link Character#getType(int) getType(codePoint)}, is 10178 * {@code DECIMAL_DIGIT_NUMBER}. 10179 * <p> 10180 * Some Unicode character ranges that contain digits: 10181 * <ul> 10182 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 10183 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 10184 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 10185 * Arabic-Indic digits 10186 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 10187 * Extended Arabic-Indic digits 10188 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 10189 * Devanagari digits 10190 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 10191 * Fullwidth digits 10192 * </ul> 10193 * 10194 * Many other character ranges contain digits as well. 10195 * 10196 * @param codePoint the character (Unicode code point) to be tested. 10197 * @return {@code true} if the character is a digit; 10198 * {@code false} otherwise. 10199 * @see Character#forDigit(int, int) 10200 * @see Character#getType(int) 10201 * @since 1.5 10202 */ 10203 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10204 /* 10205 public static boolean isDigit(int codePoint) { 10206 return CharacterData.of(codePoint).isDigit(codePoint); 10207 } 10208 */ isDigit(int codePoint)10209 public static boolean isDigit(int codePoint) { 10210 return isDigitImpl(codePoint); 10211 } 10212 10213 @FastNative isDigitImpl(int codePoint)10214 static native boolean isDigitImpl(int codePoint); 10215 // END Android-changed: Reimplement methods natively on top of ICU4C. 10216 10217 /** 10218 * Determines if a character is defined in Unicode. 10219 * <p> 10220 * A character is defined if at least one of the following is true: 10221 * <ul> 10222 * <li>It has an entry in the UnicodeData file. 10223 * <li>It has a value in a range defined by the UnicodeData file. 10224 * </ul> 10225 * 10226 * <p><b>Note:</b> This method cannot handle <a 10227 * href="#supplementary"> supplementary characters</a>. To support 10228 * all Unicode characters, including supplementary characters, use 10229 * the {@link #isDefined(int)} method. 10230 * 10231 * @param ch the character to be tested 10232 * @return {@code true} if the character has a defined meaning 10233 * in Unicode; {@code false} otherwise. 10234 * @see Character#isDigit(char) 10235 * @see Character#isLetter(char) 10236 * @see Character#isLetterOrDigit(char) 10237 * @see Character#isLowerCase(char) 10238 * @see Character#isTitleCase(char) 10239 * @see Character#isUpperCase(char) 10240 * @since 1.0.2 10241 */ isDefined(char ch)10242 public static boolean isDefined(char ch) { 10243 return isDefined((int)ch); 10244 } 10245 10246 /** 10247 * Determines if a character (Unicode code point) is defined in Unicode. 10248 * <p> 10249 * A character is defined if at least one of the following is true: 10250 * <ul> 10251 * <li>It has an entry in the UnicodeData file. 10252 * <li>It has a value in a range defined by the UnicodeData file. 10253 * </ul> 10254 * 10255 * @param codePoint the character (Unicode code point) to be tested. 10256 * @return {@code true} if the character has a defined meaning 10257 * in Unicode; {@code false} otherwise. 10258 * @see Character#isDigit(int) 10259 * @see Character#isLetter(int) 10260 * @see Character#isLetterOrDigit(int) 10261 * @see Character#isLowerCase(int) 10262 * @see Character#isTitleCase(int) 10263 * @see Character#isUpperCase(int) 10264 * @since 1.5 10265 */ 10266 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10267 /* 10268 public static boolean isDefined(int codePoint) { 10269 return getType(codePoint) != Character.UNASSIGNED; 10270 } 10271 */ isDefined(int codePoint)10272 public static boolean isDefined(int codePoint) { 10273 return isDefinedImpl(codePoint); 10274 } 10275 10276 @FastNative isDefinedImpl(int codePoint)10277 static native boolean isDefinedImpl(int codePoint); 10278 // END Android-changed: Reimplement methods natively on top of ICU4C. 10279 10280 /** 10281 * Determines if the specified character is a letter. 10282 * <p> 10283 * A character is considered to be a letter if its general 10284 * category type, provided by {@code Character.getType(ch)}, 10285 * is any of the following: 10286 * <ul> 10287 * <li> {@code UPPERCASE_LETTER} 10288 * <li> {@code LOWERCASE_LETTER} 10289 * <li> {@code TITLECASE_LETTER} 10290 * <li> {@code MODIFIER_LETTER} 10291 * <li> {@code OTHER_LETTER} 10292 * </ul> 10293 * 10294 * Not all letters have case. Many characters are 10295 * letters but are neither uppercase nor lowercase nor titlecase. 10296 * 10297 * <p><b>Note:</b> This method cannot handle <a 10298 * href="#supplementary"> supplementary characters</a>. To support 10299 * all Unicode characters, including supplementary characters, use 10300 * the {@link #isLetter(int)} method. 10301 * 10302 * @param ch the character to be tested. 10303 * @return {@code true} if the character is a letter; 10304 * {@code false} otherwise. 10305 * @see Character#isDigit(char) 10306 * @see Character#isJavaIdentifierStart(char) 10307 * @see Character#isJavaLetter(char) 10308 * @see Character#isJavaLetterOrDigit(char) 10309 * @see Character#isLetterOrDigit(char) 10310 * @see Character#isLowerCase(char) 10311 * @see Character#isTitleCase(char) 10312 * @see Character#isUnicodeIdentifierStart(char) 10313 * @see Character#isUpperCase(char) 10314 */ isLetter(char ch)10315 public static boolean isLetter(char ch) { 10316 return isLetter((int)ch); 10317 } 10318 10319 /** 10320 * Determines if the specified character (Unicode code point) is a letter. 10321 * <p> 10322 * A character is considered to be a letter if its general 10323 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 10324 * is any of the following: 10325 * <ul> 10326 * <li> {@code UPPERCASE_LETTER} 10327 * <li> {@code LOWERCASE_LETTER} 10328 * <li> {@code TITLECASE_LETTER} 10329 * <li> {@code MODIFIER_LETTER} 10330 * <li> {@code OTHER_LETTER} 10331 * </ul> 10332 * 10333 * Not all letters have case. Many characters are 10334 * letters but are neither uppercase nor lowercase nor titlecase. 10335 * 10336 * @param codePoint the character (Unicode code point) to be tested. 10337 * @return {@code true} if the character is a letter; 10338 * {@code false} otherwise. 10339 * @see Character#isDigit(int) 10340 * @see Character#isJavaIdentifierStart(int) 10341 * @see Character#isLetterOrDigit(int) 10342 * @see Character#isLowerCase(int) 10343 * @see Character#isTitleCase(int) 10344 * @see Character#isUnicodeIdentifierStart(int) 10345 * @see Character#isUpperCase(int) 10346 * @since 1.5 10347 */ 10348 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10349 /* 10350 public static boolean isLetter(int codePoint) { 10351 return ((((1 << Character.UPPERCASE_LETTER) | 10352 (1 << Character.LOWERCASE_LETTER) | 10353 (1 << Character.TITLECASE_LETTER) | 10354 (1 << Character.MODIFIER_LETTER) | 10355 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 10356 != 0; 10357 } 10358 */ isLetter(int codePoint)10359 public static boolean isLetter(int codePoint) { 10360 return isLetterImpl(codePoint); 10361 } 10362 10363 @FastNative isLetterImpl(int codePoint)10364 static native boolean isLetterImpl(int codePoint); 10365 // END Android-changed: Reimplement methods natively on top of ICU4C. 10366 10367 /** 10368 * Determines if the specified character is a letter or digit. 10369 * <p> 10370 * A character is considered to be a letter or digit if either 10371 * {@code Character.isLetter(char ch)} or 10372 * {@code Character.isDigit(char ch)} returns 10373 * {@code true} for the character. 10374 * 10375 * <p><b>Note:</b> This method cannot handle <a 10376 * href="#supplementary"> supplementary characters</a>. To support 10377 * all Unicode characters, including supplementary characters, use 10378 * the {@link #isLetterOrDigit(int)} method. 10379 * 10380 * @param ch the character to be tested. 10381 * @return {@code true} if the character is a letter or digit; 10382 * {@code false} otherwise. 10383 * @see Character#isDigit(char) 10384 * @see Character#isJavaIdentifierPart(char) 10385 * @see Character#isJavaLetter(char) 10386 * @see Character#isJavaLetterOrDigit(char) 10387 * @see Character#isLetter(char) 10388 * @see Character#isUnicodeIdentifierPart(char) 10389 * @since 1.0.2 10390 */ isLetterOrDigit(char ch)10391 public static boolean isLetterOrDigit(char ch) { 10392 return isLetterOrDigit((int)ch); 10393 } 10394 10395 /** 10396 * Determines if the specified character (Unicode code point) is a letter or digit. 10397 * <p> 10398 * A character is considered to be a letter or digit if either 10399 * {@link #isLetter(int) isLetter(codePoint)} or 10400 * {@link #isDigit(int) isDigit(codePoint)} returns 10401 * {@code true} for the character. 10402 * 10403 * @param codePoint the character (Unicode code point) to be tested. 10404 * @return {@code true} if the character is a letter or digit; 10405 * {@code false} otherwise. 10406 * @see Character#isDigit(int) 10407 * @see Character#isJavaIdentifierPart(int) 10408 * @see Character#isLetter(int) 10409 * @see Character#isUnicodeIdentifierPart(int) 10410 * @since 1.5 10411 */ 10412 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10413 /* 10414 public static boolean isLetterOrDigit(int codePoint) { 10415 return ((((1 << Character.UPPERCASE_LETTER) | 10416 (1 << Character.LOWERCASE_LETTER) | 10417 (1 << Character.TITLECASE_LETTER) | 10418 (1 << Character.MODIFIER_LETTER) | 10419 (1 << Character.OTHER_LETTER) | 10420 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 10421 != 0; 10422 } 10423 */ isLetterOrDigit(int codePoint)10424 public static boolean isLetterOrDigit(int codePoint) { 10425 return isLetterOrDigitImpl(codePoint); 10426 } 10427 10428 @FastNative isLetterOrDigitImpl(int codePoint)10429 static native boolean isLetterOrDigitImpl(int codePoint); 10430 // END Android-changed: Reimplement methods natively on top of ICU4C. 10431 10432 /** 10433 * Determines if the specified character is permissible as the first 10434 * character in a Java identifier. 10435 * <p> 10436 * A character may start a Java identifier if and only if 10437 * one of the following conditions is true: 10438 * <ul> 10439 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10440 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 10441 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 10442 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 10443 * </ul> 10444 * 10445 * @param ch the character to be tested. 10446 * @return {@code true} if the character may start a Java 10447 * identifier; {@code false} otherwise. 10448 * @see Character#isJavaLetterOrDigit(char) 10449 * @see Character#isJavaIdentifierStart(char) 10450 * @see Character#isJavaIdentifierPart(char) 10451 * @see Character#isLetter(char) 10452 * @see Character#isLetterOrDigit(char) 10453 * @see Character#isUnicodeIdentifierStart(char) 10454 * @since 1.0.2 10455 * @deprecated Replaced by isJavaIdentifierStart(char). 10456 */ 10457 @Deprecated(since="1.1") isJavaLetter(char ch)10458 public static boolean isJavaLetter(char ch) { 10459 return isJavaIdentifierStart(ch); 10460 } 10461 10462 /** 10463 * Determines if the specified character may be part of a Java 10464 * identifier as other than the first character. 10465 * <p> 10466 * A character may be part of a Java identifier if and only if one 10467 * of the following conditions is true: 10468 * <ul> 10469 * <li> it is a letter 10470 * <li> it is a currency symbol (such as {@code '$'}) 10471 * <li> it is a connecting punctuation character (such as {@code '_'}) 10472 * <li> it is a digit 10473 * <li> it is a numeric letter (such as a Roman numeral character) 10474 * <li> it is a combining mark 10475 * <li> it is a non-spacing mark 10476 * <li> {@code isIdentifierIgnorable} returns 10477 * {@code true} for the character. 10478 * </ul> 10479 * 10480 * @param ch the character to be tested. 10481 * @return {@code true} if the character may be part of a 10482 * Java identifier; {@code false} otherwise. 10483 * @see Character#isJavaLetter(char) 10484 * @see Character#isJavaIdentifierStart(char) 10485 * @see Character#isJavaIdentifierPart(char) 10486 * @see Character#isLetter(char) 10487 * @see Character#isLetterOrDigit(char) 10488 * @see Character#isUnicodeIdentifierPart(char) 10489 * @see Character#isIdentifierIgnorable(char) 10490 * @since 1.0.2 10491 * @deprecated Replaced by isJavaIdentifierPart(char). 10492 */ 10493 @Deprecated(since="1.1") isJavaLetterOrDigit(char ch)10494 public static boolean isJavaLetterOrDigit(char ch) { 10495 return isJavaIdentifierPart(ch); 10496 } 10497 10498 /** 10499 * Determines if the specified character (Unicode code point) is alphabetic. 10500 * <p> 10501 * A character is considered to be alphabetic if its general category type, 10502 * provided by {@link Character#getType(int) getType(codePoint)}, is any of 10503 * the following: 10504 * <ul> 10505 * <li> {@code UPPERCASE_LETTER} 10506 * <li> {@code LOWERCASE_LETTER} 10507 * <li> {@code TITLECASE_LETTER} 10508 * <li> {@code MODIFIER_LETTER} 10509 * <li> {@code OTHER_LETTER} 10510 * <li> {@code LETTER_NUMBER} 10511 * </ul> 10512 * or it has contributory property Other_Alphabetic as defined by the 10513 * Unicode Standard. 10514 * 10515 * @param codePoint the character (Unicode code point) to be tested. 10516 * @return {@code true} if the character is a Unicode alphabet 10517 * character, {@code false} otherwise. 10518 * @since 1.7 10519 */ 10520 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10521 /* 10522 public static boolean isAlphabetic(int codePoint) { 10523 return (((((1 << Character.UPPERCASE_LETTER) | 10524 (1 << Character.LOWERCASE_LETTER) | 10525 (1 << Character.TITLECASE_LETTER) | 10526 (1 << Character.MODIFIER_LETTER) | 10527 (1 << Character.OTHER_LETTER) | 10528 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) || 10529 CharacterData.of(codePoint).isOtherAlphabetic(codePoint); 10530 } 10531 */ isAlphabetic(int codePoint)10532 public static boolean isAlphabetic(int codePoint) { 10533 return isAlphabeticImpl(codePoint); 10534 } 10535 10536 @FastNative isAlphabeticImpl(int codePoint)10537 static native boolean isAlphabeticImpl(int codePoint); 10538 // END Android-changed: Reimplement methods natively on top of ICU4C. 10539 10540 /** 10541 * Determines if the specified character (Unicode code point) is a CJKV 10542 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by 10543 * the Unicode Standard. 10544 * 10545 * @param codePoint the character (Unicode code point) to be tested. 10546 * @return {@code true} if the character is a Unicode ideograph 10547 * character, {@code false} otherwise. 10548 * @since 1.7 10549 */ 10550 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10551 /* 10552 public static boolean isIdeographic(int codePoint) { 10553 return CharacterData.of(codePoint).isIdeographic(codePoint); 10554 } 10555 */ isIdeographic(int codePoint)10556 public static boolean isIdeographic(int codePoint) { 10557 return isIdeographicImpl(codePoint); 10558 } 10559 @FastNative isIdeographicImpl(int codePoint)10560 static native boolean isIdeographicImpl(int codePoint); 10561 // END Android-changed: Reimplement methods natively on top of ICU4C. 10562 10563 // Android-changed: Removed @see tag (target does not exist on Android): 10564 // @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10565 /** 10566 * Determines if the specified character is 10567 * permissible as the first character in a Java identifier. 10568 * <p> 10569 * A character may start a Java identifier if and only if 10570 * one of the following conditions is true: 10571 * <ul> 10572 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10573 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 10574 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 10575 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 10576 * </ul> 10577 * 10578 * <p><b>Note:</b> This method cannot handle <a 10579 * href="#supplementary"> supplementary characters</a>. To support 10580 * all Unicode characters, including supplementary characters, use 10581 * the {@link #isJavaIdentifierStart(int)} method. 10582 * 10583 * @param ch the character to be tested. 10584 * @return {@code true} if the character may start a Java identifier; 10585 * {@code false} otherwise. 10586 * @see Character#isJavaIdentifierPart(char) 10587 * @see Character#isLetter(char) 10588 * @see Character#isUnicodeIdentifierStart(char) 10589 * @since 1.1 10590 */ 10591 @SuppressWarnings("doclint:reference") // cross-module links isJavaIdentifierStart(char ch)10592 public static boolean isJavaIdentifierStart(char ch) { 10593 return isJavaIdentifierStart((int)ch); 10594 } 10595 10596 // Android-changed: Removed @see tag (target does not exist on Android): 10597 // @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10598 /** 10599 * Determines if the character (Unicode code point) is 10600 * permissible as the first character in a Java identifier. 10601 * <p> 10602 * A character may start a Java identifier if and only if 10603 * one of the following conditions is true: 10604 * <ul> 10605 * <li> {@link #isLetter(int) isLetter(codePoint)} 10606 * returns {@code true} 10607 * <li> {@link #getType(int) getType(codePoint)} 10608 * returns {@code LETTER_NUMBER} 10609 * <li> the referenced character is a currency symbol (such as {@code '$'}) 10610 * <li> the referenced character is a connecting punctuation character 10611 * (such as {@code '_'}). 10612 * </ul> 10613 * 10614 * @param codePoint the character (Unicode code point) to be tested. 10615 * @return {@code true} if the character may start a Java identifier; 10616 * {@code false} otherwise. 10617 * @see Character#isJavaIdentifierPart(int) 10618 * @see Character#isLetter(int) 10619 * @see Character#isUnicodeIdentifierStart(int) 10620 * @since 1.5 10621 */ 10622 // BEGIN Android-changed: Use ICU. 10623 /* 10624 public static boolean isJavaIdentifierStart(int codePoint) { 10625 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 10626 } 10627 */ 10628 @SuppressWarnings("doclint:reference") // cross-module links isJavaIdentifierStart(int codePoint)10629 public static boolean isJavaIdentifierStart(int codePoint) { 10630 // Use precomputed bitmasks to optimize the ASCII range. 10631 if (codePoint < 64) { 10632 return (codePoint == '$'); // There's only one character in this range. 10633 } else if (codePoint < 128) { 10634 return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 10635 } 10636 return ((1 << getType(codePoint)) 10637 & ((1 << UPPERCASE_LETTER) 10638 | (1 << LOWERCASE_LETTER) 10639 | (1 << TITLECASE_LETTER) 10640 | (1 << MODIFIER_LETTER) 10641 | (1 << OTHER_LETTER) 10642 | (1 << CURRENCY_SYMBOL) 10643 | (1 << CONNECTOR_PUNCTUATION) 10644 | (1 << LETTER_NUMBER))) != 0; 10645 } 10646 // END Android-changed: Use ICU. 10647 10648 // Android-changed: Removed @see tag (target does not exist on Android): 10649 // @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10650 /** 10651 * Determines if the specified character may be part of a Java 10652 * identifier as other than the first character. 10653 * <p> 10654 * A character may be part of a Java identifier if any of the following 10655 * conditions are true: 10656 * <ul> 10657 * <li> it is a letter 10658 * <li> it is a currency symbol (such as {@code '$'}) 10659 * <li> it is a connecting punctuation character (such as {@code '_'}) 10660 * <li> it is a digit 10661 * <li> it is a numeric letter (such as a Roman numeral character) 10662 * <li> it is a combining mark 10663 * <li> it is a non-spacing mark 10664 * <li> {@code isIdentifierIgnorable} returns 10665 * {@code true} for the character 10666 * </ul> 10667 * 10668 * <p><b>Note:</b> This method cannot handle <a 10669 * href="#supplementary"> supplementary characters</a>. To support 10670 * all Unicode characters, including supplementary characters, use 10671 * the {@link #isJavaIdentifierPart(int)} method. 10672 * 10673 * @param ch the character to be tested. 10674 * @return {@code true} if the character may be part of a 10675 * Java identifier; {@code false} otherwise. 10676 * @see Character#isIdentifierIgnorable(char) 10677 * @see Character#isJavaIdentifierStart(char) 10678 * @see Character#isLetterOrDigit(char) 10679 * @see Character#isUnicodeIdentifierPart(char) 10680 * @since 1.1 10681 */ 10682 @SuppressWarnings("doclint:reference") // cross-module links isJavaIdentifierPart(char ch)10683 public static boolean isJavaIdentifierPart(char ch) { 10684 return isJavaIdentifierPart((int)ch); 10685 } 10686 10687 // Android-changed: Removed @see tag (target does not exist on Android): 10688 // @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10689 /** 10690 * Determines if the character (Unicode code point) may be part of a Java 10691 * identifier as other than the first character. 10692 * <p> 10693 * A character may be part of a Java identifier if any of the following 10694 * conditions are true: 10695 * <ul> 10696 * <li> it is a letter 10697 * <li> it is a currency symbol (such as {@code '$'}) 10698 * <li> it is a connecting punctuation character (such as {@code '_'}) 10699 * <li> it is a digit 10700 * <li> it is a numeric letter (such as a Roman numeral character) 10701 * <li> it is a combining mark 10702 * <li> it is a non-spacing mark 10703 * <li> {@link #isIdentifierIgnorable(int) 10704 * isIdentifierIgnorable(codePoint)} returns {@code true} for 10705 * the code point 10706 * </ul> 10707 * 10708 * @param codePoint the character (Unicode code point) to be tested. 10709 * @return {@code true} if the character may be part of a 10710 * Java identifier; {@code false} otherwise. 10711 * @see Character#isIdentifierIgnorable(int) 10712 * @see Character#isJavaIdentifierStart(int) 10713 * @see Character#isLetterOrDigit(int) 10714 * @see Character#isUnicodeIdentifierPart(int) 10715 * @since 1.5 10716 */ 10717 // BEGIN Android-changed: Use ICU. 10718 /* 10719 public static boolean isJavaIdentifierPart(int codePoint) { 10720 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 10721 } 10722 */ 10723 @SuppressWarnings("doclint:reference") // cross-module links isJavaIdentifierPart(int codePoint)10724 public static boolean isJavaIdentifierPart(int codePoint) { 10725 // Use precomputed bitmasks to optimize the ASCII range. 10726 if (codePoint < 64) { 10727 return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0; 10728 } else if (codePoint < 128) { 10729 return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 10730 } 10731 return ((1 << getType(codePoint)) 10732 & ((1 << UPPERCASE_LETTER) 10733 | (1 << LOWERCASE_LETTER) 10734 | (1 << TITLECASE_LETTER) 10735 | (1 << MODIFIER_LETTER) 10736 | (1 << OTHER_LETTER) 10737 | (1 << CURRENCY_SYMBOL) 10738 | (1 << CONNECTOR_PUNCTUATION) 10739 | (1 << DECIMAL_DIGIT_NUMBER) 10740 | (1 << LETTER_NUMBER) 10741 | (1 << FORMAT) 10742 | (1 << COMBINING_SPACING_MARK) 10743 | (1 << NON_SPACING_MARK))) != 0 10744 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) 10745 || (codePoint >= 0x7f && codePoint <= 0x9f); 10746 } 10747 // END Android-changed: Use ICU. 10748 10749 /** 10750 * Determines if the specified character is permissible as the 10751 * first character in a Unicode identifier. 10752 * <p> 10753 * A character may start a Unicode identifier if and only if 10754 * one of the following conditions is true: 10755 * <ul> 10756 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10757 * <li> {@link #getType(char) getType(ch)} returns 10758 * {@code LETTER_NUMBER}. 10759 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10760 * {@code Other_ID_Start}</a> character. 10761 * </ul> 10762 * <p> 10763 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10764 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10765 * with the following profile of UAX31: 10766 * <pre> 10767 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 10768 * </pre> 10769 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 10770 * compatibility. 10771 * 10772 * <p><b>Note:</b> This method cannot handle <a 10773 * href="#supplementary"> supplementary characters</a>. To support 10774 * all Unicode characters, including supplementary characters, use 10775 * the {@link #isUnicodeIdentifierStart(int)} method. 10776 * 10777 * @param ch the character to be tested. 10778 * @return {@code true} if the character may start a Unicode 10779 * identifier; {@code false} otherwise. 10780 * 10781 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10782 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10783 * @see Character#isJavaIdentifierStart(char) 10784 * @see Character#isLetter(char) 10785 * @see Character#isUnicodeIdentifierPart(char) 10786 * @since 1.1 10787 */ isUnicodeIdentifierStart(char ch)10788 public static boolean isUnicodeIdentifierStart(char ch) { 10789 return isUnicodeIdentifierStart((int)ch); 10790 } 10791 10792 /** 10793 * Determines if the specified character (Unicode code point) is permissible as the 10794 * first character in a Unicode identifier. 10795 * <p> 10796 * A character may start a Unicode identifier if and only if 10797 * one of the following conditions is true: 10798 * <ul> 10799 * <li> {@link #isLetter(int) isLetter(codePoint)} 10800 * returns {@code true} 10801 * <li> {@link #getType(int) getType(codePoint)} 10802 * returns {@code LETTER_NUMBER}. 10803 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10804 * {@code Other_ID_Start}</a> character. 10805 * </ul> 10806 * <p> 10807 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10808 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10809 * with the following profile of UAX31: 10810 * <pre> 10811 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 10812 * </pre> 10813 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 10814 * compatibility. 10815 * 10816 * @param codePoint the character (Unicode code point) to be tested. 10817 * @return {@code true} if the character may start a Unicode 10818 * identifier; {@code false} otherwise. 10819 * 10820 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10821 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10822 * @see Character#isJavaIdentifierStart(int) 10823 * @see Character#isLetter(int) 10824 * @see Character#isUnicodeIdentifierPart(int) 10825 * @since 1.5 10826 */ 10827 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10828 /* 10829 public static boolean isUnicodeIdentifierStart(int codePoint) { 10830 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 10831 } 10832 */ isUnicodeIdentifierStart(int codePoint)10833 public static boolean isUnicodeIdentifierStart(int codePoint) { 10834 return isUnicodeIdentifierStartImpl(codePoint); 10835 } 10836 10837 @FastNative isUnicodeIdentifierStartImpl(int codePoint)10838 static native boolean isUnicodeIdentifierStartImpl(int codePoint); 10839 // END Android-changed: Reimplement methods natively on top of ICU4C. 10840 10841 /** 10842 * Determines if the specified character may be part of a Unicode 10843 * identifier as other than the first character. 10844 * <p> 10845 * A character may be part of a Unicode identifier if and only if 10846 * one of the following statements is true: 10847 * <ul> 10848 * <li> it is a letter 10849 * <li> it is a connecting punctuation character (such as {@code '_'}) 10850 * <li> it is a digit 10851 * <li> it is a numeric letter (such as a Roman numeral character) 10852 * <li> it is a combining mark 10853 * <li> it is a non-spacing mark 10854 * <li> {@code isIdentifierIgnorable} returns 10855 * {@code true} for this character. 10856 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10857 * {@code Other_ID_Start}</a> character. 10858 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 10859 * {@code Other_ID_Continue}</a> character. 10860 * </ul> 10861 * <p> 10862 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10863 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10864 * with the following profile of UAX31: 10865 * <pre> 10866 * Continue := Start + ID_Continue + ignorable 10867 * Medial := empty 10868 * ignorable := isIdentifierIgnorable(char) returns true for the character 10869 * </pre> 10870 * {@code ignorable} is added to {@code Continue} for backward 10871 * compatibility. 10872 * 10873 * <p><b>Note:</b> This method cannot handle <a 10874 * href="#supplementary"> supplementary characters</a>. To support 10875 * all Unicode characters, including supplementary characters, use 10876 * the {@link #isUnicodeIdentifierPart(int)} method. 10877 * 10878 * @param ch the character to be tested. 10879 * @return {@code true} if the character may be part of a 10880 * Unicode identifier; {@code false} otherwise. 10881 * 10882 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10883 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10884 * @see Character#isIdentifierIgnorable(char) 10885 * @see Character#isJavaIdentifierPart(char) 10886 * @see Character#isLetterOrDigit(char) 10887 * @see Character#isUnicodeIdentifierStart(char) 10888 * @since 1.1 10889 */ isUnicodeIdentifierPart(char ch)10890 public static boolean isUnicodeIdentifierPart(char ch) { 10891 return isUnicodeIdentifierPart((int)ch); 10892 } 10893 10894 /** 10895 * Determines if the specified character (Unicode code point) may be part of a Unicode 10896 * identifier as other than the first character. 10897 * <p> 10898 * A character may be part of a Unicode identifier if and only if 10899 * one of the following statements is true: 10900 * <ul> 10901 * <li> it is a letter 10902 * <li> it is a connecting punctuation character (such as {@code '_'}) 10903 * <li> it is a digit 10904 * <li> it is a numeric letter (such as a Roman numeral character) 10905 * <li> it is a combining mark 10906 * <li> it is a non-spacing mark 10907 * <li> {@code isIdentifierIgnorable} returns 10908 * {@code true} for this character. 10909 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10910 * {@code Other_ID_Start}</a> character. 10911 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 10912 * {@code Other_ID_Continue}</a> character. 10913 * </ul> 10914 * <p> 10915 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10916 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10917 * with the following profile of UAX31: 10918 * <pre> 10919 * Continue := Start + ID_Continue + ignorable 10920 * Medial := empty 10921 * ignorable := isIdentifierIgnorable(int) returns true for the character 10922 * </pre> 10923 * {@code ignorable} is added to {@code Continue} for backward 10924 * compatibility. 10925 * 10926 * @param codePoint the character (Unicode code point) to be tested. 10927 * @return {@code true} if the character may be part of a 10928 * Unicode identifier; {@code false} otherwise. 10929 * 10930 * @spec https://www.unicode.org/reports/tr44 Unicode Character Database 10931 * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax 10932 * @see Character#isIdentifierIgnorable(int) 10933 * @see Character#isJavaIdentifierPart(int) 10934 * @see Character#isLetterOrDigit(int) 10935 * @see Character#isUnicodeIdentifierStart(int) 10936 * @since 1.5 10937 */ 10938 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10939 /* 10940 public static boolean isUnicodeIdentifierPart(int codePoint) { 10941 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 10942 } 10943 */ isUnicodeIdentifierPart(int codePoint)10944 public static boolean isUnicodeIdentifierPart(int codePoint) { 10945 return isUnicodeIdentifierPartImpl(codePoint); 10946 } 10947 10948 @FastNative isUnicodeIdentifierPartImpl(int codePoint)10949 static native boolean isUnicodeIdentifierPartImpl(int codePoint); 10950 // END Android-changed: Reimplement methods natively on top of ICU4C. 10951 10952 /** 10953 * Determines if the specified character should be regarded as 10954 * an ignorable character in a Java identifier or a Unicode identifier. 10955 * <p> 10956 * The following Unicode characters are ignorable in a Java identifier 10957 * or a Unicode identifier: 10958 * <ul> 10959 * <li>ISO control characters that are not whitespace 10960 * <ul> 10961 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 10962 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 10963 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 10964 * </ul> 10965 * 10966 * <li>all characters that have the {@code FORMAT} general 10967 * category value 10968 * </ul> 10969 * 10970 * <p><b>Note:</b> This method cannot handle <a 10971 * href="#supplementary"> supplementary characters</a>. To support 10972 * all Unicode characters, including supplementary characters, use 10973 * the {@link #isIdentifierIgnorable(int)} method. 10974 * 10975 * @param ch the character to be tested. 10976 * @return {@code true} if the character is an ignorable control 10977 * character that may be part of a Java or Unicode identifier; 10978 * {@code false} otherwise. 10979 * @see Character#isJavaIdentifierPart(char) 10980 * @see Character#isUnicodeIdentifierPart(char) 10981 * @since 1.1 10982 */ isIdentifierIgnorable(char ch)10983 public static boolean isIdentifierIgnorable(char ch) { 10984 return isIdentifierIgnorable((int)ch); 10985 } 10986 10987 /** 10988 * Determines if the specified character (Unicode code point) should be regarded as 10989 * an ignorable character in a Java identifier or a Unicode identifier. 10990 * <p> 10991 * The following Unicode characters are ignorable in a Java identifier 10992 * or a Unicode identifier: 10993 * <ul> 10994 * <li>ISO control characters that are not whitespace 10995 * <ul> 10996 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 10997 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 10998 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 10999 * </ul> 11000 * 11001 * <li>all characters that have the {@code FORMAT} general 11002 * category value 11003 * </ul> 11004 * 11005 * @param codePoint the character (Unicode code point) to be tested. 11006 * @return {@code true} if the character is an ignorable control 11007 * character that may be part of a Java or Unicode identifier; 11008 * {@code false} otherwise. 11009 * @see Character#isJavaIdentifierPart(int) 11010 * @see Character#isUnicodeIdentifierPart(int) 11011 * @since 1.5 11012 */ 11013 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 11014 /* 11015 public static boolean isIdentifierIgnorable(int codePoint) { 11016 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 11017 } 11018 */ isIdentifierIgnorable(int codePoint)11019 public static boolean isIdentifierIgnorable(int codePoint) { 11020 return isIdentifierIgnorableImpl(codePoint); 11021 } 11022 11023 @FastNative isIdentifierIgnorableImpl(int codePoint)11024 static native boolean isIdentifierIgnorableImpl(int codePoint); 11025 // END Android-changed: Reimplement methods natively on top of ICU4C. 11026 11027 /** 11028 * Determines if the specified character (Unicode code point) is an Emoji. 11029 * <p> 11030 * A character is considered to be an Emoji if and only if it has the {@code Emoji} 11031 * property, defined in 11032 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11033 * Unicode Emoji (Technical Standard #51)</a>. 11034 * 11035 * @param codePoint the character (Unicode code point) to be tested. 11036 * @return {@code true} if the character is an Emoji; 11037 * {@code false} otherwise. 11038 * @since 21 11039 */ isEmoji(int codePoint)11040 public static boolean isEmoji(int codePoint) { 11041 // Android-changed: Use ICU. 11042 // return CharacterData.of(codePoint).isEmoji(codePoint); 11043 return ICU.hasBinaryProperty(codePoint, UProperty.EMOJI); 11044 } 11045 11046 /** 11047 * Determines if the specified character (Unicode code point) has the 11048 * Emoji Presentation property by default. 11049 * <p> 11050 * A character is considered to have the Emoji Presentation property if and 11051 * only if it has the {@code Emoji_Presentation} property, defined in 11052 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11053 * Unicode Emoji (Technical Standard #51)</a>. 11054 * 11055 * @param codePoint the character (Unicode code point) to be tested. 11056 * @return {@code true} if the character has the Emoji Presentation 11057 * property; {@code false} otherwise. 11058 * @since 21 11059 */ isEmojiPresentation(int codePoint)11060 public static boolean isEmojiPresentation(int codePoint) { 11061 // Android-changed: Use ICU. 11062 // return CharacterData.of(codePoint).isEmojiPresentation(codePoint); 11063 return ICU.hasBinaryProperty(codePoint, UProperty.EMOJI_PRESENTATION); 11064 } 11065 11066 /** 11067 * Determines if the specified character (Unicode code point) is an 11068 * Emoji Modifier. 11069 * <p> 11070 * A character is considered to be an Emoji Modifier if and only if it has 11071 * the {@code Emoji_Modifier} property, defined in 11072 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11073 * Unicode Emoji (Technical Standard #51)</a>. 11074 * 11075 * @param codePoint the character (Unicode code point) to be tested. 11076 * @return {@code true} if the character is an Emoji Modifier; 11077 * {@code false} otherwise. 11078 * @since 21 11079 */ isEmojiModifier(int codePoint)11080 public static boolean isEmojiModifier(int codePoint) { 11081 // Android-changed: Use ICU. 11082 // return CharacterData.of(codePoint).isEmojiModifier(codePoint); 11083 return ICU.hasBinaryProperty(codePoint, UProperty.EMOJI_MODIFIER); 11084 } 11085 11086 /** 11087 * Determines if the specified character (Unicode code point) is an 11088 * Emoji Modifier Base. 11089 * <p> 11090 * A character is considered to be an Emoji Modifier Base if and only if it has 11091 * the {@code Emoji_Modifier_Base} property, defined in 11092 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11093 * Unicode Emoji (Technical Standard #51)</a>. 11094 * 11095 * @param codePoint the character (Unicode code point) to be tested. 11096 * @return {@code true} if the character is an Emoji Modifier Base; 11097 * {@code false} otherwise. 11098 * @since 21 11099 */ isEmojiModifierBase(int codePoint)11100 public static boolean isEmojiModifierBase(int codePoint) { 11101 // Android-changed: Use ICU. 11102 // return CharacterData.of(codePoint).isEmojiModifierBase(codePoint); 11103 return ICU.hasBinaryProperty(codePoint, UProperty.EMOJI_MODIFIER_BASE); 11104 } 11105 11106 /** 11107 * Determines if the specified character (Unicode code point) is an 11108 * Emoji Component. 11109 * <p> 11110 * A character is considered to be an Emoji Component if and only if it has 11111 * the {@code Emoji_Component} property, defined in 11112 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11113 * Unicode Emoji (Technical Standard #51)</a>. 11114 * 11115 * @param codePoint the character (Unicode code point) to be tested. 11116 * @return {@code true} if the character is an Emoji Component; 11117 * {@code false} otherwise. 11118 * @since 21 11119 */ isEmojiComponent(int codePoint)11120 public static boolean isEmojiComponent(int codePoint) { 11121 // Android-changed: Use ICU. 11122 // return CharacterData.of(codePoint).isEmojiComponent(codePoint); 11123 return ICU.hasBinaryProperty(codePoint, UProperty.EMOJI_COMPONENT); 11124 } 11125 11126 /** 11127 * Determines if the specified character (Unicode code point) is 11128 * an Extended Pictographic. 11129 * <p> 11130 * A character is considered to be an Extended Pictographic if and only if it has 11131 * the {@code Extended_Pictographic} property, defined in 11132 * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files"> 11133 * Unicode Emoji (Technical Standard #51)</a>. 11134 * 11135 * @param codePoint the character (Unicode code point) to be tested. 11136 * @return {@code true} if the character is an Extended Pictographic; 11137 * {@code false} otherwise. 11138 * @since 21 11139 */ isExtendedPictographic(int codePoint)11140 public static boolean isExtendedPictographic(int codePoint) { 11141 // Android-changed: Use ICU. 11142 // return CharacterData.of(codePoint).isExtendedPictographic(codePoint); 11143 return ICU.hasBinaryProperty(codePoint, UProperty.EXTENDED_PICTOGRAPHIC); 11144 } 11145 11146 /** 11147 * Converts the character argument to lowercase using case 11148 * mapping information from the UnicodeData file. 11149 * <p> 11150 * Note that 11151 * {@code Character.isLowerCase(Character.toLowerCase(ch))} 11152 * does not always return {@code true} for some ranges of 11153 * characters, particularly those that are symbols or ideographs. 11154 * 11155 * <p>In general, {@link String#toLowerCase()} should be used to map 11156 * characters to lowercase. {@code String} case mapping methods 11157 * have several benefits over {@code Character} case mapping methods. 11158 * {@code String} case mapping methods can perform locale-sensitive 11159 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11160 * the {@code Character} case mapping methods cannot. 11161 * 11162 * <p><b>Note:</b> This method cannot handle <a 11163 * href="#supplementary"> supplementary characters</a>. To support 11164 * all Unicode characters, including supplementary characters, use 11165 * the {@link #toLowerCase(int)} method. 11166 * 11167 * @param ch the character to be converted. 11168 * @return the lowercase equivalent of the character, if any; 11169 * otherwise, the character itself. 11170 * @see Character#isLowerCase(char) 11171 * @see String#toLowerCase() 11172 */ toLowerCase(char ch)11173 public static char toLowerCase(char ch) { 11174 return (char)toLowerCase((int)ch); 11175 } 11176 11177 /** 11178 * Converts the character (Unicode code point) argument to 11179 * lowercase using case mapping information from the UnicodeData 11180 * file. 11181 * 11182 * <p> Note that 11183 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))} 11184 * does not always return {@code true} for some ranges of 11185 * characters, particularly those that are symbols or ideographs. 11186 * 11187 * <p>In general, {@link String#toLowerCase()} should be used to map 11188 * characters to lowercase. {@code String} case mapping methods 11189 * have several benefits over {@code Character} case mapping methods. 11190 * {@code String} case mapping methods can perform locale-sensitive 11191 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11192 * the {@code Character} case mapping methods cannot. 11193 * 11194 * @param codePoint the character (Unicode code point) to be converted. 11195 * @return the lowercase equivalent of the character (Unicode code 11196 * point), if any; otherwise, the character itself. 11197 * @see Character#isLowerCase(int) 11198 * @see String#toLowerCase() 11199 * 11200 * @since 1.5 11201 */ 11202 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 11203 /* 11204 public static int toLowerCase(int codePoint) { 11205 return CharacterData.of(codePoint).toLowerCase(codePoint); 11206 } 11207 */ toLowerCase(int codePoint)11208 public static int toLowerCase(int codePoint) { 11209 if (codePoint >= 'A' && codePoint <= 'Z') { 11210 return codePoint + ('a' - 'A'); 11211 } 11212 11213 // All ASCII codepoints except the ones above remain unchanged. 11214 if (codePoint < 0x80) { 11215 return codePoint; 11216 } 11217 11218 return toLowerCaseImpl(codePoint); 11219 } 11220 11221 @FastNative toLowerCaseImpl(int codePoint)11222 static native int toLowerCaseImpl(int codePoint); 11223 // END Android-changed: Reimplement methods natively on top of ICU4C. 11224 11225 /** 11226 * Converts the character argument to uppercase using case mapping 11227 * information from the UnicodeData file. 11228 * <p> 11229 * Note that 11230 * {@code Character.isUpperCase(Character.toUpperCase(ch))} 11231 * does not always return {@code true} for some ranges of 11232 * characters, particularly those that are symbols or ideographs. 11233 * 11234 * <p>In general, {@link String#toUpperCase()} should be used to map 11235 * characters to uppercase. {@code String} case mapping methods 11236 * have several benefits over {@code Character} case mapping methods. 11237 * {@code String} case mapping methods can perform locale-sensitive 11238 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11239 * the {@code Character} case mapping methods cannot. 11240 * 11241 * <p><b>Note:</b> This method cannot handle <a 11242 * href="#supplementary"> supplementary characters</a>. To support 11243 * all Unicode characters, including supplementary characters, use 11244 * the {@link #toUpperCase(int)} method. 11245 * 11246 * @param ch the character to be converted. 11247 * @return the uppercase equivalent of the character, if any; 11248 * otherwise, the character itself. 11249 * @see Character#isUpperCase(char) 11250 * @see String#toUpperCase() 11251 */ toUpperCase(char ch)11252 public static char toUpperCase(char ch) { 11253 return (char)toUpperCase((int)ch); 11254 } 11255 11256 /** 11257 * Converts the character (Unicode code point) argument to 11258 * uppercase using case mapping information from the UnicodeData 11259 * file. 11260 * 11261 * <p>Note that 11262 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))} 11263 * does not always return {@code true} for some ranges of 11264 * characters, particularly those that are symbols or ideographs. 11265 * 11266 * <p>In general, {@link String#toUpperCase()} should be used to map 11267 * characters to uppercase. {@code String} case mapping methods 11268 * have several benefits over {@code Character} case mapping methods. 11269 * {@code String} case mapping methods can perform locale-sensitive 11270 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 11271 * the {@code Character} case mapping methods cannot. 11272 * 11273 * @param codePoint the character (Unicode code point) to be converted. 11274 * @return the uppercase equivalent of the character, if any; 11275 * otherwise, the character itself. 11276 * @see Character#isUpperCase(int) 11277 * @see String#toUpperCase() 11278 * 11279 * @since 1.5 11280 */ 11281 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 11282 /* 11283 public static int toUpperCase(int codePoint) { 11284 return CharacterData.of(codePoint).toUpperCase(codePoint); 11285 } 11286 */ toUpperCase(int codePoint)11287 public static int toUpperCase(int codePoint) { 11288 if (codePoint >= 'a' && codePoint <= 'z') { 11289 return codePoint - ('a' - 'A'); 11290 } 11291 11292 // All ASCII codepoints except the ones above remain unchanged. 11293 if (codePoint < 0x80) { 11294 return codePoint; 11295 } 11296 11297 return toUpperCaseImpl(codePoint); 11298 } 11299 11300 @FastNative toUpperCaseImpl(int codePoint)11301 static native int toUpperCaseImpl(int codePoint); 11302 // END Android-changed: Reimplement methods natively on top of ICU4C. 11303 11304 /** 11305 * Converts the character argument to titlecase using case mapping 11306 * information from the UnicodeData file. If a character has no 11307 * explicit titlecase mapping and is not itself a titlecase char 11308 * according to UnicodeData, then the uppercase mapping is 11309 * returned as an equivalent titlecase mapping. If the 11310 * {@code char} argument is already a titlecase 11311 * {@code char}, the same {@code char} value will be 11312 * returned. 11313 * <p> 11314 * Note that 11315 * {@code Character.isTitleCase(Character.toTitleCase(ch))} 11316 * does not always return {@code true} for some ranges of 11317 * characters. 11318 * 11319 * <p><b>Note:</b> This method cannot handle <a 11320 * href="#supplementary"> supplementary characters</a>. To support 11321 * all Unicode characters, including supplementary characters, use 11322 * the {@link #toTitleCase(int)} method. 11323 * 11324 * @param ch the character to be converted. 11325 * @return the titlecase equivalent of the character, if any; 11326 * otherwise, the character itself. 11327 * @see Character#isTitleCase(char) 11328 * @see Character#toLowerCase(char) 11329 * @see Character#toUpperCase(char) 11330 * @since 1.0.2 11331 */ toTitleCase(char ch)11332 public static char toTitleCase(char ch) { 11333 return (char)toTitleCase((int)ch); 11334 } 11335 11336 /** 11337 * Converts the character (Unicode code point) argument to titlecase using case mapping 11338 * information from the UnicodeData file. If a character has no 11339 * explicit titlecase mapping and is not itself a titlecase char 11340 * according to UnicodeData, then the uppercase mapping is 11341 * returned as an equivalent titlecase mapping. If the 11342 * character argument is already a titlecase 11343 * character, the same character value will be 11344 * returned. 11345 * 11346 * <p>Note that 11347 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))} 11348 * does not always return {@code true} for some ranges of 11349 * characters. 11350 * 11351 * @param codePoint the character (Unicode code point) to be converted. 11352 * @return the titlecase equivalent of the character, if any; 11353 * otherwise, the character itself. 11354 * @see Character#isTitleCase(int) 11355 * @see Character#toLowerCase(int) 11356 * @see Character#toUpperCase(int) 11357 * @since 1.5 11358 */ 11359 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 11360 /* 11361 public static int toTitleCase(int codePoint) { 11362 return CharacterData.of(codePoint).toTitleCase(codePoint); 11363 } 11364 */ toTitleCase(int codePoint)11365 public static int toTitleCase(int codePoint) { 11366 return toTitleCaseImpl(codePoint); 11367 } 11368 11369 @FastNative toTitleCaseImpl(int codePoint)11370 static native int toTitleCaseImpl(int codePoint); 11371 // END Android-changed: Reimplement methods natively on top of ICU4C. 11372 11373 /** 11374 * Returns the numeric value of the character {@code ch} in the 11375 * specified radix. 11376 * <p> 11377 * If the radix is not in the range {@code MIN_RADIX} ≤ 11378 * {@code radix} ≤ {@code MAX_RADIX} or if the 11379 * value of {@code ch} is not a valid digit in the specified 11380 * radix, {@code -1} is returned. A character is a valid digit 11381 * if at least one of the following is true: 11382 * <ul> 11383 * <li>The method {@code isDigit} is {@code true} of the character 11384 * and the Unicode decimal digit value of the character (or its 11385 * single-character decomposition) is less than the specified radix. 11386 * In this case the decimal digit value is returned. 11387 * <li>The character is one of the uppercase Latin letters 11388 * {@code 'A'} through {@code 'Z'} and its code is less than 11389 * {@code radix + 'A' - 10}. 11390 * In this case, {@code ch - 'A' + 10} 11391 * is returned. 11392 * <li>The character is one of the lowercase Latin letters 11393 * {@code 'a'} through {@code 'z'} and its code is less than 11394 * {@code radix + 'a' - 10}. 11395 * In this case, {@code ch - 'a' + 10} 11396 * is returned. 11397 * <li>The character is one of the fullwidth uppercase Latin letters A 11398 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 11399 * and its code is less than 11400 * {@code radix + '\u005CuFF21' - 10}. 11401 * In this case, {@code ch - '\u005CuFF21' + 10} 11402 * is returned. 11403 * <li>The character is one of the fullwidth lowercase Latin letters a 11404 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 11405 * and its code is less than 11406 * {@code radix + '\u005CuFF41' - 10}. 11407 * In this case, {@code ch - '\u005CuFF41' + 10} 11408 * is returned. 11409 * </ul> 11410 * 11411 * <p><b>Note:</b> This method cannot handle <a 11412 * href="#supplementary"> supplementary characters</a>. To support 11413 * all Unicode characters, including supplementary characters, use 11414 * the {@link #digit(int, int)} method. 11415 * 11416 * @param ch the character to be converted. 11417 * @param radix the radix. 11418 * @return the numeric value represented by the character in the 11419 * specified radix. 11420 * @see Character#forDigit(int, int) 11421 * @see Character#isDigit(char) 11422 */ digit(char ch, int radix)11423 public static int digit(char ch, int radix) { 11424 return digit((int)ch, radix); 11425 } 11426 11427 /** 11428 * Returns the numeric value of the specified character (Unicode 11429 * code point) in the specified radix. 11430 * 11431 * <p>If the radix is not in the range {@code MIN_RADIX} ≤ 11432 * {@code radix} ≤ {@code MAX_RADIX} or if the 11433 * character is not a valid digit in the specified 11434 * radix, {@code -1} is returned. A character is a valid digit 11435 * if at least one of the following is true: 11436 * <ul> 11437 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character 11438 * and the Unicode decimal digit value of the character (or its 11439 * single-character decomposition) is less than the specified radix. 11440 * In this case the decimal digit value is returned. 11441 * <li>The character is one of the uppercase Latin letters 11442 * {@code 'A'} through {@code 'Z'} and its code is less than 11443 * {@code radix + 'A' - 10}. 11444 * In this case, {@code codePoint - 'A' + 10} 11445 * is returned. 11446 * <li>The character is one of the lowercase Latin letters 11447 * {@code 'a'} through {@code 'z'} and its code is less than 11448 * {@code radix + 'a' - 10}. 11449 * In this case, {@code codePoint - 'a' + 10} 11450 * is returned. 11451 * <li>The character is one of the fullwidth uppercase Latin letters A 11452 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 11453 * and its code is less than 11454 * {@code radix + '\u005CuFF21' - 10}. 11455 * In this case, 11456 * {@code codePoint - '\u005CuFF21' + 10} 11457 * is returned. 11458 * <li>The character is one of the fullwidth lowercase Latin letters a 11459 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 11460 * and its code is less than 11461 * {@code radix + '\u005CuFF41'- 10}. 11462 * In this case, 11463 * {@code codePoint - '\u005CuFF41' + 10} 11464 * is returned. 11465 * </ul> 11466 * 11467 * @param codePoint the character (Unicode code point) to be converted. 11468 * @param radix the radix. 11469 * @return the numeric value represented by the character in the 11470 * specified radix. 11471 * @see Character#forDigit(int, int) 11472 * @see Character#isDigit(int) 11473 * @since 1.5 11474 */ 11475 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 11476 /* 11477 public static int digit(int codePoint, int radix) { 11478 return CharacterData.of(codePoint).digit(codePoint, radix); 11479 } 11480 */ digit(int codePoint, int radix)11481 public static int digit(int codePoint, int radix) { 11482 if (radix < MIN_RADIX || radix > MAX_RADIX) { 11483 return -1; 11484 } 11485 if (codePoint < 128) { 11486 // Optimized for ASCII 11487 int result = -1; 11488 if ('0' <= codePoint && codePoint <= '9') { 11489 result = codePoint - '0'; 11490 } else if ('a' <= codePoint && codePoint <= 'z') { 11491 result = 10 + (codePoint - 'a'); 11492 } else if ('A' <= codePoint && codePoint <= 'Z') { 11493 result = 10 + (codePoint - 'A'); 11494 } 11495 return result < radix ? result : -1; 11496 } 11497 return digitImpl(codePoint, radix); 11498 } 11499 11500 @FastNative digitImpl(int codePoint, int radix)11501 native static int digitImpl(int codePoint, int radix); 11502 // END Android-changed: Reimplement methods natively on top of ICU4C. 11503 11504 /** 11505 * Returns the {@code int} value that the specified Unicode 11506 * character represents. For example, the character 11507 * {@code '\u005Cu216C'} (the roman numeral fifty) will return 11508 * an int with a value of 50. 11509 * <p> 11510 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 11511 * {@code '\u005Cu005A'}), lowercase 11512 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 11513 * full width variant ({@code '\u005CuFF21'} through 11514 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 11515 * {@code '\u005CuFF5A'}) forms have numeric values from 10 11516 * through 35. This is independent of the Unicode specification, 11517 * which does not assign numeric values to these {@code char} 11518 * values. 11519 * <p> 11520 * If the character does not have a numeric value, then -1 is returned. 11521 * If the character has a numeric value that cannot be represented as a 11522 * nonnegative integer (for example, a fractional value), then -2 11523 * is returned. 11524 * 11525 * <p><b>Note:</b> This method cannot handle <a 11526 * href="#supplementary"> supplementary characters</a>. To support 11527 * all Unicode characters, including supplementary characters, use 11528 * the {@link #getNumericValue(int)} method. 11529 * 11530 * @param ch the character to be converted. 11531 * @return the numeric value of the character, as a nonnegative {@code int} 11532 * value; -2 if the character has a numeric value but the value 11533 * can not be represented as a nonnegative {@code int} value; 11534 * -1 if the character has no numeric value. 11535 * @see Character#forDigit(int, int) 11536 * @see Character#isDigit(char) 11537 * @since 1.1 11538 */ getNumericValue(char ch)11539 public static int getNumericValue(char ch) { 11540 return getNumericValue((int)ch); 11541 } 11542 11543 /** 11544 * Returns the {@code int} value that the specified 11545 * character (Unicode code point) represents. For example, the character 11546 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return 11547 * an {@code int} with a value of 50. 11548 * <p> 11549 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 11550 * {@code '\u005Cu005A'}), lowercase 11551 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 11552 * full width variant ({@code '\u005CuFF21'} through 11553 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 11554 * {@code '\u005CuFF5A'}) forms have numeric values from 10 11555 * through 35. This is independent of the Unicode specification, 11556 * which does not assign numeric values to these {@code char} 11557 * values. 11558 * <p> 11559 * If the character does not have a numeric value, then -1 is returned. 11560 * If the character has a numeric value that cannot be represented as a 11561 * nonnegative integer (for example, a fractional value), then -2 11562 * is returned. 11563 * 11564 * @param codePoint the character (Unicode code point) to be converted. 11565 * @return the numeric value of the character, as a nonnegative {@code int} 11566 * value; -2 if the character has a numeric value but the value 11567 * can not be represented as a nonnegative {@code int} value; 11568 * -1 if the character has no numeric value. 11569 * @see Character#forDigit(int, int) 11570 * @see Character#isDigit(int) 11571 * @since 1.5 11572 */ 11573 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 11574 /* 11575 public static int getNumericValue(int codePoint) { 11576 return CharacterData.of(codePoint).getNumericValue(codePoint); 11577 } 11578 */ getNumericValue(int codePoint)11579 public static int getNumericValue(int codePoint) { 11580 // This is both an optimization and papers over differences between Java and ICU. 11581 if (codePoint < 128) { 11582 if (codePoint >= '0' && codePoint <= '9') { 11583 return codePoint - '0'; 11584 } 11585 if (codePoint >= 'a' && codePoint <= 'z') { 11586 return codePoint - ('a' - 10); 11587 } 11588 if (codePoint >= 'A' && codePoint <= 'Z') { 11589 return codePoint - ('A' - 10); 11590 } 11591 return -1; 11592 } 11593 // Full-width uppercase A-Z. 11594 if (codePoint >= 0xff21 && codePoint <= 0xff3a) { 11595 return codePoint - 0xff17; 11596 } 11597 // Full-width lowercase a-z. 11598 if (codePoint >= 0xff41 && codePoint <= 0xff5a) { 11599 return codePoint - 0xff37; 11600 } 11601 return getNumericValueImpl(codePoint); 11602 } 11603 11604 @FastNative getNumericValueImpl(int codePoint)11605 native static int getNumericValueImpl(int codePoint); 11606 // END Android-changed: Reimplement methods natively on top of ICU4C. 11607 11608 /** 11609 * Determines if the specified character is ISO-LATIN-1 white space. 11610 * This method returns {@code true} for the following five 11611 * characters only: 11612 * <table class="striped"> 11613 * <caption style="display:none">truechars</caption> 11614 * <thead> 11615 * <tr><th scope="col">Character 11616 * <th scope="col">Code 11617 * <th scope="col">Name 11618 * </thead> 11619 * <tbody> 11620 * <tr><th scope="row">{@code '\t'}</th> <td>{@code U+0009}</td> 11621 * <td>{@code HORIZONTAL TABULATION}</td></tr> 11622 * <tr><th scope="row">{@code '\n'}</th> <td>{@code U+000A}</td> 11623 * <td>{@code NEW LINE}</td></tr> 11624 * <tr><th scope="row">{@code '\f'}</th> <td>{@code U+000C}</td> 11625 * <td>{@code FORM FEED}</td></tr> 11626 * <tr><th scope="row">{@code '\r'}</th> <td>{@code U+000D}</td> 11627 * <td>{@code CARRIAGE RETURN}</td></tr> 11628 * <tr><th scope="row">{@code ' '}</th> <td>{@code U+0020}</td> 11629 * <td>{@code SPACE}</td></tr> 11630 * </tbody> 11631 * </table> 11632 * 11633 * @param ch the character to be tested. 11634 * @return {@code true} if the character is ISO-LATIN-1 white 11635 * space; {@code false} otherwise. 11636 * @see Character#isSpaceChar(char) 11637 * @see Character#isWhitespace(char) 11638 * @deprecated Replaced by isWhitespace(char). 11639 */ 11640 @Deprecated(since="1.1") isSpace(char ch)11641 public static boolean isSpace(char ch) { 11642 return (ch <= 0x0020) && 11643 (((((1L << 0x0009) | 11644 (1L << 0x000A) | 11645 (1L << 0x000C) | 11646 (1L << 0x000D) | 11647 (1L << 0x0020)) >> ch) & 1L) != 0); 11648 } 11649 11650 11651 /** 11652 * Determines if the specified character is a Unicode space character. 11653 * A character is considered to be a space character if and only if 11654 * it is specified to be a space character by the Unicode Standard. This 11655 * method returns true if the character's general category type is any of 11656 * the following: 11657 * <ul> 11658 * <li> {@code SPACE_SEPARATOR} 11659 * <li> {@code LINE_SEPARATOR} 11660 * <li> {@code PARAGRAPH_SEPARATOR} 11661 * </ul> 11662 * 11663 * <p><b>Note:</b> This method cannot handle <a 11664 * href="#supplementary"> supplementary characters</a>. To support 11665 * all Unicode characters, including supplementary characters, use 11666 * the {@link #isSpaceChar(int)} method. 11667 * 11668 * @param ch the character to be tested. 11669 * @return {@code true} if the character is a space character; 11670 * {@code false} otherwise. 11671 * @see Character#isWhitespace(char) 11672 * @since 1.1 11673 */ isSpaceChar(char ch)11674 public static boolean isSpaceChar(char ch) { 11675 return isSpaceChar((int)ch); 11676 } 11677 11678 /** 11679 * Determines if the specified character (Unicode code point) is a 11680 * Unicode space character. A character is considered to be a 11681 * space character if and only if it is specified to be a space 11682 * character by the Unicode Standard. This method returns true if 11683 * the character's general category type is any of the following: 11684 * 11685 * <ul> 11686 * <li> {@link #SPACE_SEPARATOR} 11687 * <li> {@link #LINE_SEPARATOR} 11688 * <li> {@link #PARAGRAPH_SEPARATOR} 11689 * </ul> 11690 * 11691 * @param codePoint the character (Unicode code point) to be tested. 11692 * @return {@code true} if the character is a space character; 11693 * {@code false} otherwise. 11694 * @see Character#isWhitespace(int) 11695 * @since 1.5 11696 */ 11697 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 11698 /* 11699 public static boolean isSpaceChar(int codePoint) { 11700 return ((((1 << Character.SPACE_SEPARATOR) | 11701 (1 << Character.LINE_SEPARATOR) | 11702 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 11703 != 0; 11704 } 11705 */ isSpaceChar(int codePoint)11706 public static boolean isSpaceChar(int codePoint) { 11707 // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that. 11708 // SPACE or NO-BREAK SPACE? 11709 if (codePoint == 0x20 || codePoint == 0xa0) { 11710 return true; 11711 } 11712 if (codePoint < 0x1000) { 11713 return false; 11714 } 11715 // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR? 11716 if (codePoint == 0x1680 || codePoint == 0x180e) { 11717 return true; 11718 } 11719 if (codePoint < 0x2000) { 11720 return false; 11721 } 11722 if (codePoint <= 0xffff) { 11723 // Other whitespace from General Punctuation... 11724 return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f || 11725 codePoint == 0x3000; // ...or CJK Symbols and Punctuation? 11726 } 11727 // Let icu4c worry about non-BMP code points. 11728 return isSpaceCharImpl(codePoint); 11729 } 11730 11731 @FastNative isSpaceCharImpl(int codePoint)11732 static native boolean isSpaceCharImpl(int codePoint); 11733 // END Android-changed: Reimplement methods natively on top of ICU4C. 11734 11735 /** 11736 * Determines if the specified character is white space according to Java. 11737 * A character is a Java whitespace character if and only if it satisfies 11738 * one of the following criteria: 11739 * <ul> 11740 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR}, 11741 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR}) 11742 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 11743 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 11744 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 11745 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 11746 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 11747 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 11748 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 11749 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 11750 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 11751 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 11752 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 11753 * </ul> 11754 * 11755 * <p><b>Note:</b> This method cannot handle <a 11756 * href="#supplementary"> supplementary characters</a>. To support 11757 * all Unicode characters, including supplementary characters, use 11758 * the {@link #isWhitespace(int)} method. 11759 * 11760 * @param ch the character to be tested. 11761 * @return {@code true} if the character is a Java whitespace 11762 * character; {@code false} otherwise. 11763 * @see Character#isSpaceChar(char) 11764 * @since 1.1 11765 */ isWhitespace(char ch)11766 public static boolean isWhitespace(char ch) { 11767 return isWhitespace((int)ch); 11768 } 11769 11770 /** 11771 * Determines if the specified character (Unicode code point) is 11772 * white space according to Java. A character is a Java 11773 * whitespace character if and only if it satisfies one of the 11774 * following criteria: 11775 * <ul> 11776 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 11777 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 11778 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 11779 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 11780 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 11781 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 11782 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 11783 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 11784 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 11785 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 11786 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 11787 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 11788 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 11789 * </ul> 11790 * 11791 * @param codePoint the character (Unicode code point) to be tested. 11792 * @return {@code true} if the character is a Java whitespace 11793 * character; {@code false} otherwise. 11794 * @see Character#isSpaceChar(int) 11795 * @since 1.5 11796 */ 11797 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 11798 /* 11799 public static boolean isWhitespace(int codePoint) { 11800 return CharacterData.of(codePoint).isWhitespace(codePoint); 11801 } 11802 */ isWhitespace(int codePoint)11803 public static boolean isWhitespace(int codePoint) { 11804 // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that. 11805 // Any ASCII whitespace character? 11806 if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) { 11807 return true; 11808 } 11809 if (codePoint < 0x1000) { 11810 return false; 11811 } 11812 // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR? 11813 if (codePoint == 0x1680 || codePoint == 0x180e) { 11814 return true; 11815 } 11816 if (codePoint < 0x2000) { 11817 return false; 11818 } 11819 // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE). 11820 if (codePoint == 0x2007 || codePoint == 0x202f) { 11821 return false; 11822 } 11823 if (codePoint <= 0xffff) { 11824 // Other whitespace from General Punctuation... 11825 return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f || 11826 codePoint == 0x3000; // ...or CJK Symbols and Punctuation? 11827 } 11828 // Let icu4c worry about non-BMP code points. 11829 return isWhitespaceImpl(codePoint); 11830 } 11831 11832 @FastNative isWhitespaceImpl(int codePoint)11833 native static boolean isWhitespaceImpl(int codePoint); 11834 // END Android-changed: Reimplement methods natively on top of ICU4C. 11835 11836 /** 11837 * Determines if the specified character is an ISO control 11838 * character. A character is considered to be an ISO control 11839 * character if its code is in the range {@code '\u005Cu0000'} 11840 * through {@code '\u005Cu001F'} or in the range 11841 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 11842 * 11843 * <p><b>Note:</b> This method cannot handle <a 11844 * href="#supplementary"> supplementary characters</a>. To support 11845 * all Unicode characters, including supplementary characters, use 11846 * the {@link #isISOControl(int)} method. 11847 * 11848 * @param ch the character to be tested. 11849 * @return {@code true} if the character is an ISO control character; 11850 * {@code false} otherwise. 11851 * 11852 * @see Character#isSpaceChar(char) 11853 * @see Character#isWhitespace(char) 11854 * @since 1.1 11855 */ isISOControl(char ch)11856 public static boolean isISOControl(char ch) { 11857 return isISOControl((int)ch); 11858 } 11859 11860 /** 11861 * Determines if the referenced character (Unicode code point) is an ISO control 11862 * character. A character is considered to be an ISO control 11863 * character if its code is in the range {@code '\u005Cu0000'} 11864 * through {@code '\u005Cu001F'} or in the range 11865 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 11866 * 11867 * @param codePoint the character (Unicode code point) to be tested. 11868 * @return {@code true} if the character is an ISO control character; 11869 * {@code false} otherwise. 11870 * @see Character#isSpaceChar(int) 11871 * @see Character#isWhitespace(int) 11872 * @since 1.5 11873 */ isISOControl(int codePoint)11874 public static boolean isISOControl(int codePoint) { 11875 // Optimized form of: 11876 // (codePoint >= 0x00 && codePoint <= 0x1F) || 11877 // (codePoint >= 0x7F && codePoint <= 0x9F); 11878 return codePoint <= 0x9F && 11879 (codePoint >= 0x7F || (codePoint >>> 5 == 0)); 11880 } 11881 11882 /** 11883 * Returns a value indicating a character's general category. 11884 * 11885 * <p><b>Note:</b> This method cannot handle <a 11886 * href="#supplementary"> supplementary characters</a>. To support 11887 * all Unicode characters, including supplementary characters, use 11888 * the {@link #getType(int)} method. 11889 * 11890 * @param ch the character to be tested. 11891 * @return a value of type {@code int} representing the 11892 * character's general category. 11893 * @see Character#COMBINING_SPACING_MARK 11894 * @see Character#CONNECTOR_PUNCTUATION 11895 * @see Character#CONTROL 11896 * @see Character#CURRENCY_SYMBOL 11897 * @see Character#DASH_PUNCTUATION 11898 * @see Character#DECIMAL_DIGIT_NUMBER 11899 * @see Character#ENCLOSING_MARK 11900 * @see Character#END_PUNCTUATION 11901 * @see Character#FINAL_QUOTE_PUNCTUATION 11902 * @see Character#FORMAT 11903 * @see Character#INITIAL_QUOTE_PUNCTUATION 11904 * @see Character#LETTER_NUMBER 11905 * @see Character#LINE_SEPARATOR 11906 * @see Character#LOWERCASE_LETTER 11907 * @see Character#MATH_SYMBOL 11908 * @see Character#MODIFIER_LETTER 11909 * @see Character#MODIFIER_SYMBOL 11910 * @see Character#NON_SPACING_MARK 11911 * @see Character#OTHER_LETTER 11912 * @see Character#OTHER_NUMBER 11913 * @see Character#OTHER_PUNCTUATION 11914 * @see Character#OTHER_SYMBOL 11915 * @see Character#PARAGRAPH_SEPARATOR 11916 * @see Character#PRIVATE_USE 11917 * @see Character#SPACE_SEPARATOR 11918 * @see Character#START_PUNCTUATION 11919 * @see Character#SURROGATE 11920 * @see Character#TITLECASE_LETTER 11921 * @see Character#UNASSIGNED 11922 * @see Character#UPPERCASE_LETTER 11923 * @since 1.1 11924 */ getType(char ch)11925 public static int getType(char ch) { 11926 return getType((int)ch); 11927 } 11928 11929 /** 11930 * Returns a value indicating a character's general category. 11931 * 11932 * @param codePoint the character (Unicode code point) to be tested. 11933 * @return a value of type {@code int} representing the 11934 * character's general category. 11935 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 11936 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 11937 * @see Character#CONTROL CONTROL 11938 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 11939 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 11940 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 11941 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 11942 * @see Character#END_PUNCTUATION END_PUNCTUATION 11943 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 11944 * @see Character#FORMAT FORMAT 11945 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 11946 * @see Character#LETTER_NUMBER LETTER_NUMBER 11947 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 11948 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 11949 * @see Character#MATH_SYMBOL MATH_SYMBOL 11950 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 11951 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 11952 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 11953 * @see Character#OTHER_LETTER OTHER_LETTER 11954 * @see Character#OTHER_NUMBER OTHER_NUMBER 11955 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 11956 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 11957 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 11958 * @see Character#PRIVATE_USE PRIVATE_USE 11959 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 11960 * @see Character#START_PUNCTUATION START_PUNCTUATION 11961 * @see Character#SURROGATE SURROGATE 11962 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 11963 * @see Character#UNASSIGNED UNASSIGNED 11964 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 11965 * @since 1.5 11966 */ 11967 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 11968 /* 11969 public static int getType(int codePoint) { 11970 return CharacterData.of(codePoint).getType(codePoint); 11971 } 11972 */ getType(int codePoint)11973 public static int getType(int codePoint) { 11974 int type = getTypeImpl(codePoint); 11975 // The type values returned by ICU are not RI-compatible. The RI skips the value 17. 11976 if (type <= Character.FORMAT) { 11977 return type; 11978 } 11979 return (type + 1); 11980 } 11981 11982 @FastNative getTypeImpl(int codePoint)11983 static native int getTypeImpl(int codePoint); 11984 // END Android-changed: Reimplement methods natively on top of ICU4C. 11985 11986 /** 11987 * Determines the character representation for a specific digit in 11988 * the specified radix. If the value of {@code radix} is not a 11989 * valid radix, or the value of {@code digit} is not a valid 11990 * digit in the specified radix, the null character 11991 * ({@code '\u005Cu0000'}) is returned. 11992 * <p> 11993 * The {@code radix} argument is valid if it is greater than or 11994 * equal to {@code MIN_RADIX} and less than or equal to 11995 * {@code MAX_RADIX}. The {@code digit} argument is valid if 11996 * {@code 0 <= digit < radix}. 11997 * <p> 11998 * If the digit is less than 10, then 11999 * {@code '0' + digit} is returned. Otherwise, the value 12000 * {@code 'a' + digit - 10} is returned. 12001 * 12002 * @param digit the number to convert to a character. 12003 * @param radix the radix. 12004 * @return the {@code char} representation of the specified digit 12005 * in the specified radix. 12006 * @see Character#MIN_RADIX 12007 * @see Character#MAX_RADIX 12008 * @see Character#digit(char, int) 12009 */ forDigit(int digit, int radix)12010 public static char forDigit(int digit, int radix) { 12011 if ((digit >= radix) || (digit < 0)) { 12012 return '\0'; 12013 } 12014 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 12015 return '\0'; 12016 } 12017 if (digit < 10) { 12018 return (char)('0' + digit); 12019 } 12020 return (char)('a' - 10 + digit); 12021 } 12022 12023 /** 12024 * Returns the Unicode directionality property for the given 12025 * character. Character directionality is used to calculate the 12026 * visual ordering of text. The directionality value of undefined 12027 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}. 12028 * 12029 * <p><b>Note:</b> This method cannot handle <a 12030 * href="#supplementary"> supplementary characters</a>. To support 12031 * all Unicode characters, including supplementary characters, use 12032 * the {@link #getDirectionality(int)} method. 12033 * 12034 * @param ch {@code char} for which the directionality property 12035 * is requested. 12036 * @return the directionality property of the {@code char} value. 12037 * 12038 * @see Character#DIRECTIONALITY_UNDEFINED 12039 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 12040 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 12041 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 12042 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 12043 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 12044 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 12045 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 12046 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 12047 * @see Character#DIRECTIONALITY_NONSPACING_MARK 12048 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 12049 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 12050 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 12051 * @see Character#DIRECTIONALITY_WHITESPACE 12052 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 12053 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 12054 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 12055 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 12056 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 12057 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 12058 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 12059 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 12060 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE 12061 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 12062 * @since 1.4 12063 */ getDirectionality(char ch)12064 public static byte getDirectionality(char ch) { 12065 return getDirectionality((int)ch); 12066 } 12067 12068 /** 12069 * Returns the Unicode directionality property for the given 12070 * character (Unicode code point). Character directionality is 12071 * used to calculate the visual ordering of text. The 12072 * directionality value of undefined character is {@link 12073 * #DIRECTIONALITY_UNDEFINED}. 12074 * 12075 * @param codePoint the character (Unicode code point) for which 12076 * the directionality property is requested. 12077 * @return the directionality property of the character. 12078 * 12079 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 12080 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 12081 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 12082 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 12083 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 12084 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 12085 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 12086 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 12087 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 12088 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 12089 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 12090 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 12091 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 12092 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 12093 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 12094 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 12095 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 12096 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 12097 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 12098 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 12099 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 12100 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 12101 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE 12102 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 12103 * @since 1.5 12104 */ 12105 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 12106 /* 12107 public static byte getDirectionality(int codePoint) { 12108 return CharacterData.of(codePoint).getDirectionality(codePoint); 12109 } 12110 */ getDirectionality(int codePoint)12111 public static byte getDirectionality(int codePoint) { 12112 if (getType(codePoint) == Character.UNASSIGNED) { 12113 return Character.DIRECTIONALITY_UNDEFINED; 12114 } 12115 12116 byte directionality = getDirectionalityImpl(codePoint); 12117 if (directionality >= 0 && directionality < DIRECTIONALITY.length) { 12118 return DIRECTIONALITY[directionality]; 12119 } 12120 return Character.DIRECTIONALITY_UNDEFINED; 12121 } 12122 12123 @FastNative getDirectionalityImpl(int codePoint)12124 native static byte getDirectionalityImpl(int codePoint); 12125 // END Android-changed: Reimplement methods natively on top of ICU4C. 12126 12127 /** 12128 * Determines whether the character is mirrored according to the 12129 * Unicode specification. Mirrored characters should have their 12130 * glyphs horizontally mirrored when displayed in text that is 12131 * right-to-left. For example, {@code '\u005Cu0028'} LEFT 12132 * PARENTHESIS is semantically defined to be an <i>opening 12133 * parenthesis</i>. This will appear as a "(" in text that is 12134 * left-to-right but as a ")" in text that is right-to-left. 12135 * 12136 * <p><b>Note:</b> This method cannot handle <a 12137 * href="#supplementary"> supplementary characters</a>. To support 12138 * all Unicode characters, including supplementary characters, use 12139 * the {@link #isMirrored(int)} method. 12140 * 12141 * @param ch {@code char} for which the mirrored property is requested 12142 * @return {@code true} if the char is mirrored, {@code false} 12143 * if the {@code char} is not mirrored or is not defined. 12144 * @since 1.4 12145 */ isMirrored(char ch)12146 public static boolean isMirrored(char ch) { 12147 return isMirrored((int)ch); 12148 } 12149 12150 /** 12151 * Determines whether the specified character (Unicode code point) 12152 * is mirrored according to the Unicode specification. Mirrored 12153 * characters should have their glyphs horizontally mirrored when 12154 * displayed in text that is right-to-left. For example, 12155 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically 12156 * defined to be an <i>opening parenthesis</i>. This will appear 12157 * as a "(" in text that is left-to-right but as a ")" in text 12158 * that is right-to-left. 12159 * 12160 * @param codePoint the character (Unicode code point) to be tested. 12161 * @return {@code true} if the character is mirrored, {@code false} 12162 * if the character is not mirrored or is not defined. 12163 * @since 1.5 12164 */ 12165 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 12166 /* 12167 public static boolean isMirrored(int codePoint) { 12168 return CharacterData.of(codePoint).isMirrored(codePoint); 12169 } 12170 */ isMirrored(int codePoint)12171 public static boolean isMirrored(int codePoint) { 12172 return isMirroredImpl(codePoint); 12173 } 12174 12175 @FastNative isMirroredImpl(int codePoint)12176 native static boolean isMirroredImpl(int codePoint); 12177 // END Android-changed: Reimplement methods natively on top of ICU4C. 12178 12179 /** 12180 * Compares two {@code Character} objects numerically. 12181 * 12182 * @param anotherCharacter the {@code Character} to be compared. 12183 * @return the value {@code 0} if the argument {@code Character} 12184 * is equal to this {@code Character}; a value less than 12185 * {@code 0} if this {@code Character} is numerically less 12186 * than the {@code Character} argument; and a value greater than 12187 * {@code 0} if this {@code Character} is numerically greater 12188 * than the {@code Character} argument (unsigned comparison). 12189 * Note that this is strictly a numerical comparison; it is not 12190 * locale-dependent. 12191 * @since 1.2 12192 */ compareTo(Character anotherCharacter)12193 public int compareTo(Character anotherCharacter) { 12194 return compare(this.value, anotherCharacter.value); 12195 } 12196 12197 /** 12198 * Compares two {@code char} values numerically. 12199 * The value returned is identical to what would be returned by: 12200 * <pre> 12201 * Character.valueOf(x).compareTo(Character.valueOf(y)) 12202 * </pre> 12203 * 12204 * @param x the first {@code char} to compare 12205 * @param y the second {@code char} to compare 12206 * @return the value {@code 0} if {@code x == y}; 12207 * a value less than {@code 0} if {@code x < y}; and 12208 * a value greater than {@code 0} if {@code x > y} 12209 * @since 1.7 12210 */ compare(char x, char y)12211 public static int compare(char x, char y) { 12212 return x - y; 12213 } 12214 12215 // BEGIN Android-removed: Use ICU. 12216 /* 12217 * Converts the character (Unicode code point) argument to uppercase using 12218 * information from the UnicodeData file. 12219 * 12220 * @param codePoint the character (Unicode code point) to be converted. 12221 * @return either the uppercase equivalent of the character, if 12222 * any, or an error flag ({@code Character.ERROR}) 12223 * that indicates that a 1:M {@code char} mapping exists. 12224 * @see Character#isLowerCase(char) 12225 * @see Character#isUpperCase(char) 12226 * @see Character#toLowerCase(char) 12227 * @see Character#toTitleCase(char) 12228 * @since 1.4 12229 * 12230 static int toUpperCaseEx(int codePoint) { 12231 assert isValidCodePoint(codePoint); 12232 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 12233 } 12234 12235 /** 12236 * Converts the character (Unicode code point) argument to uppercase using case 12237 * mapping information from the SpecialCasing file in the Unicode 12238 * specification. If a character has no explicit uppercase 12239 * mapping, then the {@code char} itself is returned in the 12240 * {@code char[]}. 12241 * 12242 * @param codePoint the character (Unicode code point) to be converted. 12243 * @return a {@code char[]} with the uppercased character. 12244 * @since 1.4 12245 * 12246 static char[] toUpperCaseCharArray(int codePoint) { 12247 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP. 12248 assert isBmpCodePoint(codePoint); 12249 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 12250 } 12251 */ 12252 // END Android-removed: Use ICU. 12253 12254 /** 12255 * The number of bits used to represent a {@code char} value in unsigned 12256 * binary form, constant {@code 16}. 12257 * 12258 * @since 1.5 12259 */ 12260 public static final int SIZE = 16; 12261 12262 /** 12263 * The number of bytes used to represent a {@code char} value in unsigned 12264 * binary form. 12265 * 12266 * @since 1.8 12267 */ 12268 public static final int BYTES = SIZE / Byte.SIZE; 12269 12270 /** 12271 * Returns the value obtained by reversing the order of the bytes in the 12272 * specified {@code char} value. 12273 * 12274 * @param ch The {@code char} of which to reverse the byte order. 12275 * @return the value obtained by reversing (or, equivalently, swapping) 12276 * the bytes in the specified {@code char} value. 12277 * @since 1.5 12278 */ 12279 @IntrinsicCandidate reverseBytes(char ch)12280 public static char reverseBytes(char ch) { 12281 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 12282 } 12283 12284 /** 12285 * Returns the name of the specified character 12286 * {@code codePoint}, or null if the code point is 12287 * {@link #UNASSIGNED unassigned}. 12288 * <p> 12289 * If the specified character is not assigned a name by 12290 * the <i>UnicodeData</i> file (part of the Unicode Character 12291 * Database maintained by the Unicode Consortium), the returned 12292 * name is the same as the result of the expression: 12293 * 12294 * <blockquote>{@code 12295 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 12296 * + " " 12297 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 12298 * 12299 * }</blockquote> 12300 * 12301 * For the {@code codePoint}s in the <i>UnicodeData</i> file, the name 12302 * returned by this method follows the naming scheme in the 12303 * "Unicode Name Property" section of the Unicode Standard. For other 12304 * code points, such as Hangul/Ideographs, The name generation rule above 12305 * differs from the one defined in the Unicode Standard. 12306 * 12307 * @param codePoint the character (Unicode code point) 12308 * 12309 * @return the name of the specified character, or null if 12310 * the code point is unassigned. 12311 * 12312 * @throws IllegalArgumentException if the specified 12313 * {@code codePoint} is not a valid Unicode 12314 * code point. 12315 * 12316 * @since 1.7 12317 */ getName(int codePoint)12318 public static String getName(int codePoint) { 12319 if (!isValidCodePoint(codePoint)) { 12320 throw new IllegalArgumentException( 12321 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 12322 } 12323 // Android-changed: Use ICU. 12324 // String name = CharacterName.get(codePoint); 12325 String name = getNameImpl(codePoint); 12326 if (name != null) 12327 return name; 12328 if (getType(codePoint) == UNASSIGNED) 12329 return null; 12330 UnicodeBlock block = UnicodeBlock.of(codePoint); 12331 if (block != null) 12332 return block.toString().replace('_', ' ') + " " 12333 + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 12334 // should never come here 12335 return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 12336 } 12337 12338 // BEGIN Android-removed: expose after CharacterName.getCodePoint() is imported. 12339 /** 12340 * Returns the code point value of the Unicode character specified by 12341 * the given character name. 12342 * <p> 12343 * If a character is not assigned a name by the <i>UnicodeData</i> 12344 * file (part of the Unicode Character Database maintained by the Unicode 12345 * Consortium), its name is defined as the result of the expression: 12346 * 12347 * <blockquote>{@code 12348 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 12349 * + " " 12350 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 12351 * 12352 * }</blockquote> 12353 * <p> 12354 * The {@code name} matching is case insensitive, with any leading and 12355 * trailing whitespace character removed. 12356 * 12357 * For the code points in the <i>UnicodeData</i> file, this method 12358 * recognizes the name which conforms to the name defined in the 12359 * "Unicode Name Property" section in the Unicode Standard. For other 12360 * code points, this method recognizes the name generated with 12361 * {@link #getName(int)} method. 12362 * 12363 * @param name the character name 12364 * 12365 * @return the code point value of the character specified by its name. 12366 * 12367 * @throws IllegalArgumentException if the specified {@code name} 12368 * is not a valid character name. 12369 * @throws NullPointerException if {@code name} is {@code null} 12370 * 12371 * @since 9 12372 */ codePointOf(String name)12373 public static int codePointOf(String name) { 12374 name = name.trim().toUpperCase(Locale.ROOT); 12375 // Android-changed: Use ICU4C. 12376 // int cp = CharacterName.getInstance().getCodePoint(name); 12377 int cp = codePointOfImpl(name); 12378 if (cp != -1) 12379 return cp; 12380 try { 12381 int off = name.lastIndexOf(' '); 12382 if (off != -1) { 12383 cp = Integer.parseInt(name, off + 1, name.length(), 16); 12384 if (isValidCodePoint(cp) && name.equals(getName(cp))) 12385 return cp; 12386 } 12387 } catch (Exception x) {} 12388 throw new IllegalArgumentException("Unrecognized character name :" + name); 12389 } 12390 // END Android-removed: expose after CharacterName.getCodePoint() is imported. 12391 12392 // Android-added: Use ICU. 12393 // Implement getNameImpl() and codePointOfImpl() natively. getNameImpl(int codePoint)12394 private static native String getNameImpl(int codePoint); 12395 12396 @FastNative codePointOfImpl(String name)12397 private static native int codePointOfImpl(String name); 12398 } 12399