1 /* 2 * Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import dalvik.annotation.optimization.FastNative; 29 // Android-removed: CDS is not used on Android. 30 // import jdk.internal.misc.CDS; 31 import jdk.internal.vm.annotation.IntrinsicCandidate; 32 33 import java.util.Arrays; 34 import java.util.HashMap; 35 import java.util.Locale; 36 import java.util.Map; 37 38 // BEGIN Android-removed: dynamic constants not supported on Android. 39 /* 40 import java.lang.constant.Constable; 41 import java.lang.constant.DynamicConstantDesc; 42 import java.util.Optional; 43 import static java.lang.constant.ConstantDescs.BSM_EXPLICIT_CAST; 44 import static java.lang.constant.ConstantDescs.CD_char; 45 import static java.lang.constant.ConstantDescs.CD_int; 46 import static java.lang.constant.ConstantDescs.DEFAULT_NAME; 47 */ 48 // END Android-removed: dynamic constants not supported on Android. 49 50 // Android-changed: Remove reference to a specific unicode standard version 51 /** 52 * The {@code Character} class wraps a value of the primitive 53 * type {@code char} in an object. An object of class 54 * {@code Character} contains a single field whose type is 55 * {@code char}. 56 * <p> 57 * In addition, this class provides several methods for determining 58 * a character's category (lowercase letter, digit, etc.) and for converting 59 * characters from uppercase to lowercase and vice versa. 60 * <p> 61 * Character information is based on the Unicode Standard 62 * <p> 63 * The methods and data of class {@code Character} are defined by 64 * the information in the <i>UnicodeData</i> file that is part of the 65 * Unicode Character Database maintained by the Unicode 66 * Consortium. This file specifies various properties including name 67 * and general category for every defined Unicode code point or 68 * character range. 69 * <p> 70 * The file and its description are available from the Unicode Consortium at: 71 * <ul> 72 * <li><a href="http://www.unicode.org">http://www.unicode.org</a> 73 * </ul> 74 * 75 * <h2><a id="conformance">Unicode Conformance</a></h2> 76 * <p> 77 * The fields and methods of class {@code Character} are defined in terms 78 * of character information from the Unicode Standard, specifically the 79 * <i>UnicodeData</i> file that is part of the Unicode Character Database. 80 * This file specifies properties including name and category for every 81 * assigned Unicode code point or character range. The file is available 82 * from the Unicode Consortium at 83 * <a href="http://www.unicode.org">http://www.unicode.org</a>. 84 * <p> 85 * Character information is based on the Unicode Standard, version 13.0. 86 * <p> 87 * The Java platform has supported different versions of the Unicode 88 * Standard over time. Upgrades to newer versions of the Unicode Standard 89 * occurred in the following Java releases, each indicating the new version: 90 * <table class="striped"> 91 * <caption style="display:none">Shows Java releases and supported Unicode versions</caption> 92 * <thead> 93 * <tr><th scope="col">Java release</th> 94 * <th scope="col">Unicode version</th></tr> 95 * </thead> 96 * <tbody> 97 * <tr><td>Java SE 15</td> 98 * <td>Unicode 13.0</td></tr> 99 * <tr><td>Java SE 13</td> 100 * <td>Unicode 12.1</td></tr> 101 * <tr><td>Java SE 12</td> 102 * <td>Unicode 11.0</td></tr> 103 * <tr><td>Java SE 11</td> 104 * <td>Unicode 10.0</td></tr> 105 * <tr><td>Java SE 9</td> 106 * <td>Unicode 8.0</td></tr> 107 * <tr><td>Java SE 8</td> 108 * <td>Unicode 6.2</td></tr> 109 * <tr><td>Java SE 7</td> 110 * <td>Unicode 6.0</td></tr> 111 * <tr><td>Java SE 5.0</td> 112 * <td>Unicode 4.0</td></tr> 113 * <tr><td>Java SE 1.4</td> 114 * <td>Unicode 3.0</td></tr> 115 * <tr><td>JDK 1.1</td> 116 * <td>Unicode 2.0</td></tr> 117 * <tr><td>JDK 1.0.2</td> 118 * <td>Unicode 1.1.5</td></tr> 119 * </tbody> 120 * </table> 121 * Variations from these base Unicode versions, such as recognized appendixes, 122 * are documented elsewhere. 123 * <h2><a id="unicode">Unicode Character Representations</a></h2> 124 * 125 * <p>The {@code char} data type (and therefore the value that a 126 * {@code Character} object encapsulates) are based on the 127 * original Unicode specification, which defined characters as 128 * fixed-width 16-bit entities. The Unicode Standard has since been 129 * changed to allow for characters whose representation requires more 130 * than 16 bits. The range of legal <em>code point</em>s is now 131 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>. 132 * (Refer to the <a 133 * href="http://www.unicode.org/reports/tr27/#notation"><i> 134 * definition</i></a> of the U+<i>n</i> notation in the Unicode 135 * Standard.) 136 * 137 * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is 138 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>. 139 * <a id="supplementary">Characters</a> whose code points are greater 140 * than U+FFFF are called <em>supplementary character</em>s. The Java 141 * platform uses the UTF-16 representation in {@code char} arrays and 142 * in the {@code String} and {@code StringBuffer} classes. In 143 * this representation, supplementary characters are represented as a pair 144 * of {@code char} values, the first from the <em>high-surrogates</em> 145 * range, (\uD800-\uDBFF), the second from the 146 * <em>low-surrogates</em> range (\uDC00-\uDFFF). 147 * 148 * <p>A {@code char} value, therefore, represents Basic 149 * Multilingual Plane (BMP) code points, including the surrogate 150 * code points, or code units of the UTF-16 encoding. An 151 * {@code int} value represents all Unicode code points, 152 * including supplementary code points. The lower (least significant) 153 * 21 bits of {@code int} are used to represent Unicode code 154 * points and the upper (most significant) 11 bits must be zero. 155 * Unless otherwise specified, the behavior with respect to 156 * supplementary characters and surrogate {@code char} values is 157 * as follows: 158 * 159 * <ul> 160 * <li>The methods that only accept a {@code char} value cannot support 161 * supplementary characters. They treat {@code char} values from the 162 * surrogate ranges as undefined characters. For example, 163 * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though 164 * this specific value if followed by any low-surrogate value in a string 165 * would represent a letter. 166 * 167 * <li>The methods that accept an {@code int} value support all 168 * Unicode characters, including supplementary characters. For 169 * example, {@code Character.isLetter(0x2F81A)} returns 170 * {@code true} because the code point value represents a letter 171 * (a CJK ideograph). 172 * </ul> 173 * 174 * <p>In the Java SE API documentation, <em>Unicode code point</em> is 175 * used for character values in the range between U+0000 and U+10FFFF, 176 * and <em>Unicode code unit</em> is used for 16-bit 177 * {@code char} values that are code units of the <em>UTF-16</em> 178 * encoding. For more information on Unicode terminology, refer to the 179 * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>. 180 * 181 * <!-- Android-removed: paragraph on ValueBased 182 * <p>This is a <a href="{@docRoot}/java.base/java/lang/doc-files/ValueBased.html">value-based</a> 183 * class; programmers should treat instances that are 184 * {@linkplain #equals(Object) equal} as interchangeable and should not 185 * use instances for synchronization, or unpredictable behavior may 186 * occur. For example, in a future release, synchronization may fail. 187 * --> 188 * 189 * @author Lee Boynton 190 * @author Guy Steele 191 * @author Akira Tanaka 192 * @author Martin Buchholz 193 * @author Ulf Zibis 194 * @since 1.0 195 */ 196 @jdk.internal.ValueBased 197 public final 198 class Character implements java.io.Serializable, Comparable<Character> { 199 // Android-removed: no Constable support. 200 // , Constable 201 /** 202 * The minimum radix available for conversion to and from strings. 203 * The constant value of this field is the smallest value permitted 204 * for the radix argument in radix-conversion methods such as the 205 * {@code digit} method, the {@code forDigit} method, and the 206 * {@code toString} method of class {@code Integer}. 207 * 208 * @see Character#digit(char, int) 209 * @see Character#forDigit(int, int) 210 * @see Integer#toString(int, int) 211 * @see Integer#valueOf(String) 212 */ 213 public static final int MIN_RADIX = 2; 214 215 /** 216 * The maximum radix available for conversion to and from strings. 217 * The constant value of this field is the largest value permitted 218 * for the radix argument in radix-conversion methods such as the 219 * {@code digit} method, the {@code forDigit} method, and the 220 * {@code toString} method of class {@code Integer}. 221 * 222 * @see Character#digit(char, int) 223 * @see Character#forDigit(int, int) 224 * @see Integer#toString(int, int) 225 * @see Integer#valueOf(String) 226 */ 227 public static final int MAX_RADIX = 36; 228 229 /** 230 * The constant value of this field is the smallest value of type 231 * {@code char}, {@code '\u005Cu0000'}. 232 * 233 * @since 1.0.2 234 */ 235 public static final char MIN_VALUE = '\u0000'; 236 237 /** 238 * The constant value of this field is the largest value of type 239 * {@code char}, {@code '\u005CuFFFF'}. 240 * 241 * @since 1.0.2 242 */ 243 public static final char MAX_VALUE = '\uFFFF'; 244 245 /** 246 * The {@code Class} instance representing the primitive type 247 * {@code char}. 248 * 249 * @since 1.1 250 */ 251 @SuppressWarnings("unchecked") 252 public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char"); 253 254 /* 255 * Normative general types 256 */ 257 258 /* 259 * General character types 260 */ 261 262 /** 263 * General category "Cn" in the Unicode specification. 264 * @since 1.1 265 */ 266 public static final byte UNASSIGNED = 0; 267 268 /** 269 * General category "Lu" in the Unicode specification. 270 * @since 1.1 271 */ 272 public static final byte UPPERCASE_LETTER = 1; 273 274 /** 275 * General category "Ll" in the Unicode specification. 276 * @since 1.1 277 */ 278 public static final byte LOWERCASE_LETTER = 2; 279 280 /** 281 * General category "Lt" in the Unicode specification. 282 * @since 1.1 283 */ 284 public static final byte TITLECASE_LETTER = 3; 285 286 /** 287 * General category "Lm" in the Unicode specification. 288 * @since 1.1 289 */ 290 public static final byte MODIFIER_LETTER = 4; 291 292 /** 293 * General category "Lo" in the Unicode specification. 294 * @since 1.1 295 */ 296 public static final byte OTHER_LETTER = 5; 297 298 /** 299 * General category "Mn" in the Unicode specification. 300 * @since 1.1 301 */ 302 public static final byte NON_SPACING_MARK = 6; 303 304 /** 305 * General category "Me" in the Unicode specification. 306 * @since 1.1 307 */ 308 public static final byte ENCLOSING_MARK = 7; 309 310 /** 311 * General category "Mc" in the Unicode specification. 312 * @since 1.1 313 */ 314 public static final byte COMBINING_SPACING_MARK = 8; 315 316 /** 317 * General category "Nd" in the Unicode specification. 318 * @since 1.1 319 */ 320 public static final byte DECIMAL_DIGIT_NUMBER = 9; 321 322 /** 323 * General category "Nl" in the Unicode specification. 324 * @since 1.1 325 */ 326 public static final byte LETTER_NUMBER = 10; 327 328 /** 329 * General category "No" in the Unicode specification. 330 * @since 1.1 331 */ 332 public static final byte OTHER_NUMBER = 11; 333 334 /** 335 * General category "Zs" in the Unicode specification. 336 * @since 1.1 337 */ 338 public static final byte SPACE_SEPARATOR = 12; 339 340 /** 341 * General category "Zl" in the Unicode specification. 342 * @since 1.1 343 */ 344 public static final byte LINE_SEPARATOR = 13; 345 346 /** 347 * General category "Zp" in the Unicode specification. 348 * @since 1.1 349 */ 350 public static final byte PARAGRAPH_SEPARATOR = 14; 351 352 /** 353 * General category "Cc" in the Unicode specification. 354 * @since 1.1 355 */ 356 public static final byte CONTROL = 15; 357 358 /** 359 * General category "Cf" in the Unicode specification. 360 * @since 1.1 361 */ 362 public static final byte FORMAT = 16; 363 364 /** 365 * General category "Co" in the Unicode specification. 366 * @since 1.1 367 */ 368 public static final byte PRIVATE_USE = 18; 369 370 /** 371 * General category "Cs" in the Unicode specification. 372 * @since 1.1 373 */ 374 public static final byte SURROGATE = 19; 375 376 /** 377 * General category "Pd" in the Unicode specification. 378 * @since 1.1 379 */ 380 public static final byte DASH_PUNCTUATION = 20; 381 382 /** 383 * General category "Ps" in the Unicode specification. 384 * @since 1.1 385 */ 386 public static final byte START_PUNCTUATION = 21; 387 388 /** 389 * General category "Pe" in the Unicode specification. 390 * @since 1.1 391 */ 392 public static final byte END_PUNCTUATION = 22; 393 394 /** 395 * General category "Pc" in the Unicode specification. 396 * @since 1.1 397 */ 398 public static final byte CONNECTOR_PUNCTUATION = 23; 399 400 /** 401 * General category "Po" in the Unicode specification. 402 * @since 1.1 403 */ 404 public static final byte OTHER_PUNCTUATION = 24; 405 406 /** 407 * General category "Sm" in the Unicode specification. 408 * @since 1.1 409 */ 410 public static final byte MATH_SYMBOL = 25; 411 412 /** 413 * General category "Sc" in the Unicode specification. 414 * @since 1.1 415 */ 416 public static final byte CURRENCY_SYMBOL = 26; 417 418 /** 419 * General category "Sk" in the Unicode specification. 420 * @since 1.1 421 */ 422 public static final byte MODIFIER_SYMBOL = 27; 423 424 /** 425 * General category "So" in the Unicode specification. 426 * @since 1.1 427 */ 428 public static final byte OTHER_SYMBOL = 28; 429 430 /** 431 * General category "Pi" in the Unicode specification. 432 * @since 1.4 433 */ 434 public static final byte INITIAL_QUOTE_PUNCTUATION = 29; 435 436 /** 437 * General category "Pf" in the Unicode specification. 438 * @since 1.4 439 */ 440 public static final byte FINAL_QUOTE_PUNCTUATION = 30; 441 442 /** 443 * Error flag. Use int (code point) to avoid confusion with U+FFFF. 444 */ 445 static final int ERROR = 0xFFFFFFFF; 446 447 448 /** 449 * Undefined bidirectional character type. Undefined {@code char} 450 * values have undefined directionality in the Unicode specification. 451 * @since 1.4 452 */ 453 public static final byte DIRECTIONALITY_UNDEFINED = -1; 454 455 /** 456 * Strong bidirectional character type "L" in the Unicode specification. 457 * @since 1.4 458 */ 459 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; 460 461 /** 462 * Strong bidirectional character type "R" in the Unicode specification. 463 * @since 1.4 464 */ 465 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; 466 467 /** 468 * Strong bidirectional character type "AL" in the Unicode specification. 469 * @since 1.4 470 */ 471 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; 472 473 /** 474 * Weak bidirectional character type "EN" in the Unicode specification. 475 * @since 1.4 476 */ 477 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; 478 479 /** 480 * Weak bidirectional character type "ES" in the Unicode specification. 481 * @since 1.4 482 */ 483 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; 484 485 /** 486 * Weak bidirectional character type "ET" in the Unicode specification. 487 * @since 1.4 488 */ 489 public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; 490 491 /** 492 * Weak bidirectional character type "AN" in the Unicode specification. 493 * @since 1.4 494 */ 495 public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; 496 497 /** 498 * Weak bidirectional character type "CS" in the Unicode specification. 499 * @since 1.4 500 */ 501 public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; 502 503 /** 504 * Weak bidirectional character type "NSM" in the Unicode specification. 505 * @since 1.4 506 */ 507 public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; 508 509 /** 510 * Weak bidirectional character type "BN" in the Unicode specification. 511 * @since 1.4 512 */ 513 public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; 514 515 /** 516 * Neutral bidirectional character type "B" in the Unicode specification. 517 * @since 1.4 518 */ 519 public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; 520 521 /** 522 * Neutral bidirectional character type "S" in the Unicode specification. 523 * @since 1.4 524 */ 525 public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; 526 527 /** 528 * Neutral bidirectional character type "WS" in the Unicode specification. 529 * @since 1.4 530 */ 531 public static final byte DIRECTIONALITY_WHITESPACE = 12; 532 533 /** 534 * Neutral bidirectional character type "ON" in the Unicode specification. 535 * @since 1.4 536 */ 537 public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; 538 539 /** 540 * Strong bidirectional character type "LRE" in the Unicode specification. 541 * @since 1.4 542 */ 543 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; 544 545 /** 546 * Strong bidirectional character type "LRO" in the Unicode specification. 547 * @since 1.4 548 */ 549 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; 550 551 /** 552 * Strong bidirectional character type "RLE" in the Unicode specification. 553 * @since 1.4 554 */ 555 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; 556 557 /** 558 * Strong bidirectional character type "RLO" in the Unicode specification. 559 * @since 1.4 560 */ 561 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; 562 563 /** 564 * Weak bidirectional character type "PDF" in the Unicode specification. 565 * @since 1.4 566 */ 567 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; 568 569 /** 570 * Weak bidirectional character type "LRI" in the Unicode specification. 571 * @since 9 572 */ 573 public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19; 574 575 /** 576 * Weak bidirectional character type "RLI" in the Unicode specification. 577 * @since 9 578 */ 579 public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20; 580 581 /** 582 * Weak bidirectional character type "FSI" in the Unicode specification. 583 * @since 9 584 */ 585 public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21; 586 587 /** 588 * Weak bidirectional character type "PDI" in the Unicode specification. 589 * @since 9 590 */ 591 public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22; 592 593 /** 594 * The minimum value of a 595 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 596 * Unicode high-surrogate code unit</a> 597 * in the UTF-16 encoding, constant {@code '\u005CuD800'}. 598 * A high-surrogate is also known as a <i>leading-surrogate</i>. 599 * 600 * @since 1.5 601 */ 602 public static final char MIN_HIGH_SURROGATE = '\uD800'; 603 604 /** 605 * The maximum value of a 606 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 607 * Unicode high-surrogate code unit</a> 608 * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}. 609 * A high-surrogate is also known as a <i>leading-surrogate</i>. 610 * 611 * @since 1.5 612 */ 613 public static final char MAX_HIGH_SURROGATE = '\uDBFF'; 614 615 /** 616 * The minimum value of a 617 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 618 * Unicode low-surrogate code unit</a> 619 * in the UTF-16 encoding, constant {@code '\u005CuDC00'}. 620 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 621 * 622 * @since 1.5 623 */ 624 public static final char MIN_LOW_SURROGATE = '\uDC00'; 625 626 /** 627 * The maximum value of a 628 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 629 * Unicode low-surrogate code unit</a> 630 * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}. 631 * A low-surrogate is also known as a <i>trailing-surrogate</i>. 632 * 633 * @since 1.5 634 */ 635 public static final char MAX_LOW_SURROGATE = '\uDFFF'; 636 637 /** 638 * The minimum value of a Unicode surrogate code unit in the 639 * UTF-16 encoding, constant {@code '\u005CuD800'}. 640 * 641 * @since 1.5 642 */ 643 public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; 644 645 /** 646 * The maximum value of a Unicode surrogate code unit in the 647 * UTF-16 encoding, constant {@code '\u005CuDFFF'}. 648 * 649 * @since 1.5 650 */ 651 public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; 652 653 /** 654 * The minimum value of a 655 * <a href="http://www.unicode.org/glossary/#supplementary_code_point"> 656 * Unicode supplementary code point</a>, constant {@code U+10000}. 657 * 658 * @since 1.5 659 */ 660 public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; 661 662 /** 663 * The minimum value of a 664 * <a href="http://www.unicode.org/glossary/#code_point"> 665 * Unicode code point</a>, constant {@code U+0000}. 666 * 667 * @since 1.5 668 */ 669 public static final int MIN_CODE_POINT = 0x000000; 670 671 /** 672 * The maximum value of a 673 * <a href="http://www.unicode.org/glossary/#code_point"> 674 * Unicode code point</a>, constant {@code U+10FFFF}. 675 * 676 * @since 1.5 677 */ 678 public static final int MAX_CODE_POINT = 0X10FFFF; 679 680 // BEGIN Android-added: Use ICU. 681 // The indices in int[] DIRECTIONALITY are based on icu4c's u_charDirection(), 682 // accessed via getDirectionalityImpl(), implemented in Character.cpp. 683 private static final byte[] DIRECTIONALITY = new byte[] { 684 DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT, 685 DIRECTIONALITY_EUROPEAN_NUMBER, 686 DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, 687 DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, 688 DIRECTIONALITY_ARABIC_NUMBER, 689 DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, 690 DIRECTIONALITY_PARAGRAPH_SEPARATOR, 691 DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE, 692 DIRECTIONALITY_OTHER_NEUTRALS, 693 DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING, 694 DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, 695 DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, 696 DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, 697 DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE, 698 DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, 699 DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL }; 700 // END Android-added: Use ICU. 701 702 // BEGIN Android-removed: dynamic constants not supported on Android. 703 /** 704 * Returns an {@link Optional} containing the nominal descriptor for this 705 * instance. 706 * 707 * @return an {@link Optional} describing the {@linkplain Character} instance 708 * @since 15 709 * 710 @Override 711 public Optional<DynamicConstantDesc<Character>> describeConstable() { 712 return Optional.of(DynamicConstantDesc.ofNamed(BSM_EXPLICIT_CAST, DEFAULT_NAME, CD_char, (int) value)); 713 } 714 */ 715 // END Android-removed: dynamic constants not supported on Android. 716 717 /** 718 * Instances of this class represent particular subsets of the Unicode 719 * character set. The only family of subsets defined in the 720 * {@code Character} class is {@link Character.UnicodeBlock}. 721 * Other portions of the Java API may define other subsets for their 722 * own purposes. 723 * 724 * @since 1.2 725 */ 726 public static class Subset { 727 728 private String name; 729 730 /** 731 * Constructs a new {@code Subset} instance. 732 * 733 * @param name The name of this subset 734 * @throws NullPointerException if name is {@code null} 735 */ Subset(String name)736 protected Subset(String name) { 737 if (name == null) { 738 throw new NullPointerException("name"); 739 } 740 this.name = name; 741 } 742 743 /** 744 * Compares two {@code Subset} objects for equality. 745 * This method returns {@code true} if and only if 746 * {@code this} and the argument refer to the same 747 * object; since this method is {@code final}, this 748 * guarantee holds for all subclasses. 749 */ equals(Object obj)750 public final boolean equals(Object obj) { 751 return (this == obj); 752 } 753 754 /** 755 * Returns the standard hash code as defined by the 756 * {@link Object#hashCode} method. This method 757 * is {@code final} in order to ensure that the 758 * {@code equals} and {@code hashCode} methods will 759 * be consistent in all subclasses. 760 */ hashCode()761 public final int hashCode() { 762 return super.hashCode(); 763 } 764 765 /** 766 * Returns the name of this subset. 767 */ toString()768 public final String toString() { 769 return name; 770 } 771 } 772 773 // See http://www.unicode.org/Public/UNIDATA/Blocks.txt 774 // for the latest specification of Unicode Blocks. 775 776 /** 777 * A family of character subsets representing the character blocks in the 778 * Unicode specification. Character blocks generally define characters 779 * used for a specific script or purpose. A character is contained by 780 * at most one Unicode block. 781 * 782 * @since 1.2 783 */ 784 public static final class UnicodeBlock extends Subset { 785 /** 786 * 684 - the expected number of entities 787 * 0.75 - the default load factor of HashMap 788 */ 789 private static final int NUM_ENTITIES = 684; 790 private static Map<String, UnicodeBlock> map = 791 new HashMap<>((int)(NUM_ENTITIES / 0.75f + 1.0f)); 792 793 /** 794 * Creates a UnicodeBlock with the given identifier name. 795 * This name must be the same as the block identifier. 796 */ UnicodeBlock(String idName)797 private UnicodeBlock(String idName) { 798 super(idName); 799 map.put(idName, this); 800 } 801 802 // BEGIN Android-added: ICU consistency: Don't map deprecated SURROGATES_AREA. b/26140229 803 // Add a (String, boolean) constructor for use by SURROGATES_AREA. UnicodeBlock(String idName, boolean isMap)804 private UnicodeBlock(String idName, boolean isMap) { 805 super(idName); 806 if (isMap) { 807 map.put(idName, this); 808 } 809 } 810 // END Android-added: ICU consistency: Don't map deprecated SURROGATES_AREA. b/26140229 811 812 /** 813 * Creates a UnicodeBlock with the given identifier name and 814 * alias name. 815 */ UnicodeBlock(String idName, String alias)816 private UnicodeBlock(String idName, String alias) { 817 this(idName); 818 map.put(alias, this); 819 } 820 821 /** 822 * Creates a UnicodeBlock with the given identifier name and 823 * alias names. 824 */ UnicodeBlock(String idName, String... aliases)825 private UnicodeBlock(String idName, String... aliases) { 826 this(idName); 827 for (String alias : aliases) 828 map.put(alias, this); 829 } 830 831 /** 832 * Constant for the "Basic Latin" Unicode character block. 833 * @since 1.2 834 */ 835 public static final UnicodeBlock BASIC_LATIN = 836 new UnicodeBlock("BASIC_LATIN", 837 "BASIC LATIN", 838 "BASICLATIN"); 839 840 /** 841 * Constant for the "Latin-1 Supplement" Unicode character block. 842 * @since 1.2 843 */ 844 public static final UnicodeBlock LATIN_1_SUPPLEMENT = 845 new UnicodeBlock("LATIN_1_SUPPLEMENT", 846 "LATIN-1 SUPPLEMENT", 847 "LATIN-1SUPPLEMENT"); 848 849 /** 850 * Constant for the "Latin Extended-A" Unicode character block. 851 * @since 1.2 852 */ 853 public static final UnicodeBlock LATIN_EXTENDED_A = 854 new UnicodeBlock("LATIN_EXTENDED_A", 855 "LATIN EXTENDED-A", 856 "LATINEXTENDED-A"); 857 858 /** 859 * Constant for the "Latin Extended-B" Unicode character block. 860 * @since 1.2 861 */ 862 public static final UnicodeBlock LATIN_EXTENDED_B = 863 new UnicodeBlock("LATIN_EXTENDED_B", 864 "LATIN EXTENDED-B", 865 "LATINEXTENDED-B"); 866 867 /** 868 * Constant for the "IPA Extensions" Unicode character block. 869 * @since 1.2 870 */ 871 public static final UnicodeBlock IPA_EXTENSIONS = 872 new UnicodeBlock("IPA_EXTENSIONS", 873 "IPA EXTENSIONS", 874 "IPAEXTENSIONS"); 875 876 /** 877 * Constant for the "Spacing Modifier Letters" Unicode character block. 878 * @since 1.2 879 */ 880 public static final UnicodeBlock SPACING_MODIFIER_LETTERS = 881 new UnicodeBlock("SPACING_MODIFIER_LETTERS", 882 "SPACING MODIFIER LETTERS", 883 "SPACINGMODIFIERLETTERS"); 884 885 /** 886 * Constant for the "Combining Diacritical Marks" Unicode character block. 887 * @since 1.2 888 */ 889 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = 890 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", 891 "COMBINING DIACRITICAL MARKS", 892 "COMBININGDIACRITICALMARKS"); 893 894 /** 895 * Constant for the "Greek and Coptic" Unicode character block. 896 * <p> 897 * This block was previously known as the "Greek" block. 898 * 899 * @since 1.2 900 */ 901 public static final UnicodeBlock GREEK = 902 new UnicodeBlock("GREEK", 903 "GREEK AND COPTIC", 904 "GREEKANDCOPTIC"); 905 906 /** 907 * Constant for the "Cyrillic" Unicode character block. 908 * @since 1.2 909 */ 910 public static final UnicodeBlock CYRILLIC = 911 new UnicodeBlock("CYRILLIC"); 912 913 /** 914 * Constant for the "Armenian" Unicode character block. 915 * @since 1.2 916 */ 917 public static final UnicodeBlock ARMENIAN = 918 new UnicodeBlock("ARMENIAN"); 919 920 /** 921 * Constant for the "Hebrew" Unicode character block. 922 * @since 1.2 923 */ 924 public static final UnicodeBlock HEBREW = 925 new UnicodeBlock("HEBREW"); 926 927 /** 928 * Constant for the "Arabic" Unicode character block. 929 * @since 1.2 930 */ 931 public static final UnicodeBlock ARABIC = 932 new UnicodeBlock("ARABIC"); 933 934 /** 935 * Constant for the "Devanagari" Unicode character block. 936 * @since 1.2 937 */ 938 public static final UnicodeBlock DEVANAGARI = 939 new UnicodeBlock("DEVANAGARI"); 940 941 /** 942 * Constant for the "Bengali" Unicode character block. 943 * @since 1.2 944 */ 945 public static final UnicodeBlock BENGALI = 946 new UnicodeBlock("BENGALI"); 947 948 /** 949 * Constant for the "Gurmukhi" Unicode character block. 950 * @since 1.2 951 */ 952 public static final UnicodeBlock GURMUKHI = 953 new UnicodeBlock("GURMUKHI"); 954 955 /** 956 * Constant for the "Gujarati" Unicode character block. 957 * @since 1.2 958 */ 959 public static final UnicodeBlock GUJARATI = 960 new UnicodeBlock("GUJARATI"); 961 962 /** 963 * Constant for the "Oriya" Unicode character block. 964 * @since 1.2 965 */ 966 public static final UnicodeBlock ORIYA = 967 new UnicodeBlock("ORIYA"); 968 969 /** 970 * Constant for the "Tamil" Unicode character block. 971 * @since 1.2 972 */ 973 public static final UnicodeBlock TAMIL = 974 new UnicodeBlock("TAMIL"); 975 976 /** 977 * Constant for the "Telugu" Unicode character block. 978 * @since 1.2 979 */ 980 public static final UnicodeBlock TELUGU = 981 new UnicodeBlock("TELUGU"); 982 983 /** 984 * Constant for the "Kannada" Unicode character block. 985 * @since 1.2 986 */ 987 public static final UnicodeBlock KANNADA = 988 new UnicodeBlock("KANNADA"); 989 990 /** 991 * Constant for the "Malayalam" Unicode character block. 992 * @since 1.2 993 */ 994 public static final UnicodeBlock MALAYALAM = 995 new UnicodeBlock("MALAYALAM"); 996 997 /** 998 * Constant for the "Thai" Unicode character block. 999 * @since 1.2 1000 */ 1001 public static final UnicodeBlock THAI = 1002 new UnicodeBlock("THAI"); 1003 1004 /** 1005 * Constant for the "Lao" Unicode character block. 1006 * @since 1.2 1007 */ 1008 public static final UnicodeBlock LAO = 1009 new UnicodeBlock("LAO"); 1010 1011 /** 1012 * Constant for the "Tibetan" Unicode character block. 1013 * @since 1.2 1014 */ 1015 public static final UnicodeBlock TIBETAN = 1016 new UnicodeBlock("TIBETAN"); 1017 1018 /** 1019 * Constant for the "Georgian" Unicode character block. 1020 * @since 1.2 1021 */ 1022 public static final UnicodeBlock GEORGIAN = 1023 new UnicodeBlock("GEORGIAN"); 1024 1025 /** 1026 * Constant for the "Hangul Jamo" Unicode character block. 1027 * @since 1.2 1028 */ 1029 public static final UnicodeBlock HANGUL_JAMO = 1030 new UnicodeBlock("HANGUL_JAMO", 1031 "HANGUL JAMO", 1032 "HANGULJAMO"); 1033 1034 /** 1035 * Constant for the "Latin Extended Additional" Unicode character block. 1036 * @since 1.2 1037 */ 1038 public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = 1039 new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", 1040 "LATIN EXTENDED ADDITIONAL", 1041 "LATINEXTENDEDADDITIONAL"); 1042 1043 /** 1044 * Constant for the "Greek Extended" Unicode character block. 1045 * @since 1.2 1046 */ 1047 public static final UnicodeBlock GREEK_EXTENDED = 1048 new UnicodeBlock("GREEK_EXTENDED", 1049 "GREEK EXTENDED", 1050 "GREEKEXTENDED"); 1051 1052 /** 1053 * Constant for the "General Punctuation" Unicode character block. 1054 * @since 1.2 1055 */ 1056 public static final UnicodeBlock GENERAL_PUNCTUATION = 1057 new UnicodeBlock("GENERAL_PUNCTUATION", 1058 "GENERAL PUNCTUATION", 1059 "GENERALPUNCTUATION"); 1060 1061 /** 1062 * Constant for the "Superscripts and Subscripts" Unicode character 1063 * block. 1064 * @since 1.2 1065 */ 1066 public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = 1067 new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", 1068 "SUPERSCRIPTS AND SUBSCRIPTS", 1069 "SUPERSCRIPTSANDSUBSCRIPTS"); 1070 1071 /** 1072 * Constant for the "Currency Symbols" Unicode character block. 1073 * @since 1.2 1074 */ 1075 public static final UnicodeBlock CURRENCY_SYMBOLS = 1076 new UnicodeBlock("CURRENCY_SYMBOLS", 1077 "CURRENCY SYMBOLS", 1078 "CURRENCYSYMBOLS"); 1079 1080 /** 1081 * Constant for the "Combining Diacritical Marks for Symbols" Unicode 1082 * character block. 1083 * <p> 1084 * This block was previously known as "Combining Marks for Symbols". 1085 * @since 1.2 1086 */ 1087 public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = 1088 new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", 1089 "COMBINING DIACRITICAL MARKS FOR SYMBOLS", 1090 "COMBININGDIACRITICALMARKSFORSYMBOLS", 1091 "COMBINING MARKS FOR SYMBOLS", 1092 "COMBININGMARKSFORSYMBOLS"); 1093 1094 /** 1095 * Constant for the "Letterlike Symbols" Unicode character block. 1096 * @since 1.2 1097 */ 1098 public static final UnicodeBlock LETTERLIKE_SYMBOLS = 1099 new UnicodeBlock("LETTERLIKE_SYMBOLS", 1100 "LETTERLIKE SYMBOLS", 1101 "LETTERLIKESYMBOLS"); 1102 1103 /** 1104 * Constant for the "Number Forms" Unicode character block. 1105 * @since 1.2 1106 */ 1107 public static final UnicodeBlock NUMBER_FORMS = 1108 new UnicodeBlock("NUMBER_FORMS", 1109 "NUMBER FORMS", 1110 "NUMBERFORMS"); 1111 1112 /** 1113 * Constant for the "Arrows" Unicode character block. 1114 * @since 1.2 1115 */ 1116 public static final UnicodeBlock ARROWS = 1117 new UnicodeBlock("ARROWS"); 1118 1119 /** 1120 * Constant for the "Mathematical Operators" Unicode character block. 1121 * @since 1.2 1122 */ 1123 public static final UnicodeBlock MATHEMATICAL_OPERATORS = 1124 new UnicodeBlock("MATHEMATICAL_OPERATORS", 1125 "MATHEMATICAL OPERATORS", 1126 "MATHEMATICALOPERATORS"); 1127 1128 /** 1129 * Constant for the "Miscellaneous Technical" Unicode character block. 1130 * @since 1.2 1131 */ 1132 public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = 1133 new UnicodeBlock("MISCELLANEOUS_TECHNICAL", 1134 "MISCELLANEOUS TECHNICAL", 1135 "MISCELLANEOUSTECHNICAL"); 1136 1137 /** 1138 * Constant for the "Control Pictures" Unicode character block. 1139 * @since 1.2 1140 */ 1141 public static final UnicodeBlock CONTROL_PICTURES = 1142 new UnicodeBlock("CONTROL_PICTURES", 1143 "CONTROL PICTURES", 1144 "CONTROLPICTURES"); 1145 1146 /** 1147 * Constant for the "Optical Character Recognition" Unicode character block. 1148 * @since 1.2 1149 */ 1150 public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = 1151 new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", 1152 "OPTICAL CHARACTER RECOGNITION", 1153 "OPTICALCHARACTERRECOGNITION"); 1154 1155 /** 1156 * Constant for the "Enclosed Alphanumerics" Unicode character block. 1157 * @since 1.2 1158 */ 1159 public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = 1160 new UnicodeBlock("ENCLOSED_ALPHANUMERICS", 1161 "ENCLOSED ALPHANUMERICS", 1162 "ENCLOSEDALPHANUMERICS"); 1163 1164 /** 1165 * Constant for the "Box Drawing" Unicode character block. 1166 * @since 1.2 1167 */ 1168 public static final UnicodeBlock BOX_DRAWING = 1169 new UnicodeBlock("BOX_DRAWING", 1170 "BOX DRAWING", 1171 "BOXDRAWING"); 1172 1173 /** 1174 * Constant for the "Block Elements" Unicode character block. 1175 * @since 1.2 1176 */ 1177 public static final UnicodeBlock BLOCK_ELEMENTS = 1178 new UnicodeBlock("BLOCK_ELEMENTS", 1179 "BLOCK ELEMENTS", 1180 "BLOCKELEMENTS"); 1181 1182 /** 1183 * Constant for the "Geometric Shapes" Unicode character block. 1184 * @since 1.2 1185 */ 1186 public static final UnicodeBlock GEOMETRIC_SHAPES = 1187 new UnicodeBlock("GEOMETRIC_SHAPES", 1188 "GEOMETRIC SHAPES", 1189 "GEOMETRICSHAPES"); 1190 1191 /** 1192 * Constant for the "Miscellaneous Symbols" Unicode character block. 1193 * @since 1.2 1194 */ 1195 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = 1196 new UnicodeBlock("MISCELLANEOUS_SYMBOLS", 1197 "MISCELLANEOUS SYMBOLS", 1198 "MISCELLANEOUSSYMBOLS"); 1199 1200 /** 1201 * Constant for the "Dingbats" Unicode character block. 1202 * @since 1.2 1203 */ 1204 public static final UnicodeBlock DINGBATS = 1205 new UnicodeBlock("DINGBATS"); 1206 1207 /** 1208 * Constant for the "CJK Symbols and Punctuation" Unicode character block. 1209 * @since 1.2 1210 */ 1211 public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = 1212 new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", 1213 "CJK SYMBOLS AND PUNCTUATION", 1214 "CJKSYMBOLSANDPUNCTUATION"); 1215 1216 /** 1217 * Constant for the "Hiragana" Unicode character block. 1218 * @since 1.2 1219 */ 1220 public static final UnicodeBlock HIRAGANA = 1221 new UnicodeBlock("HIRAGANA"); 1222 1223 /** 1224 * Constant for the "Katakana" Unicode character block. 1225 * @since 1.2 1226 */ 1227 public static final UnicodeBlock KATAKANA = 1228 new UnicodeBlock("KATAKANA"); 1229 1230 /** 1231 * Constant for the "Bopomofo" Unicode character block. 1232 * @since 1.2 1233 */ 1234 public static final UnicodeBlock BOPOMOFO = 1235 new UnicodeBlock("BOPOMOFO"); 1236 1237 /** 1238 * Constant for the "Hangul Compatibility Jamo" Unicode character block. 1239 * @since 1.2 1240 */ 1241 public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = 1242 new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", 1243 "HANGUL COMPATIBILITY JAMO", 1244 "HANGULCOMPATIBILITYJAMO"); 1245 1246 /** 1247 * Constant for the "Kanbun" Unicode character block. 1248 * @since 1.2 1249 */ 1250 public static final UnicodeBlock KANBUN = 1251 new UnicodeBlock("KANBUN"); 1252 1253 /** 1254 * Constant for the "Enclosed CJK Letters and Months" Unicode character block. 1255 * @since 1.2 1256 */ 1257 public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = 1258 new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS", 1259 "ENCLOSED CJK LETTERS AND MONTHS", 1260 "ENCLOSEDCJKLETTERSANDMONTHS"); 1261 1262 /** 1263 * Constant for the "CJK Compatibility" Unicode character block. 1264 * @since 1.2 1265 */ 1266 public static final UnicodeBlock CJK_COMPATIBILITY = 1267 new UnicodeBlock("CJK_COMPATIBILITY", 1268 "CJK COMPATIBILITY", 1269 "CJKCOMPATIBILITY"); 1270 1271 /** 1272 * Constant for the "CJK Unified Ideographs" Unicode character block. 1273 * @since 1.2 1274 */ 1275 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = 1276 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", 1277 "CJK UNIFIED IDEOGRAPHS", 1278 "CJKUNIFIEDIDEOGRAPHS"); 1279 1280 /** 1281 * Constant for the "Hangul Syllables" Unicode character block. 1282 * @since 1.2 1283 */ 1284 public static final UnicodeBlock HANGUL_SYLLABLES = 1285 new UnicodeBlock("HANGUL_SYLLABLES", 1286 "HANGUL SYLLABLES", 1287 "HANGULSYLLABLES"); 1288 1289 /** 1290 * Constant for the "Private Use Area" Unicode character block. 1291 * @since 1.2 1292 */ 1293 public static final UnicodeBlock PRIVATE_USE_AREA = 1294 new UnicodeBlock("PRIVATE_USE_AREA", 1295 "PRIVATE USE AREA", 1296 "PRIVATEUSEAREA"); 1297 1298 /** 1299 * Constant for the "CJK Compatibility Ideographs" Unicode character 1300 * block. 1301 * @since 1.2 1302 */ 1303 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = 1304 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", 1305 "CJK COMPATIBILITY IDEOGRAPHS", 1306 "CJKCOMPATIBILITYIDEOGRAPHS"); 1307 1308 /** 1309 * Constant for the "Alphabetic Presentation Forms" Unicode character block. 1310 * @since 1.2 1311 */ 1312 public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = 1313 new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", 1314 "ALPHABETIC PRESENTATION FORMS", 1315 "ALPHABETICPRESENTATIONFORMS"); 1316 1317 /** 1318 * Constant for the "Arabic Presentation Forms-A" Unicode character 1319 * block. 1320 * @since 1.2 1321 */ 1322 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = 1323 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", 1324 "ARABIC PRESENTATION FORMS-A", 1325 "ARABICPRESENTATIONFORMS-A"); 1326 1327 /** 1328 * Constant for the "Combining Half Marks" Unicode character block. 1329 * @since 1.2 1330 */ 1331 public static final UnicodeBlock COMBINING_HALF_MARKS = 1332 new UnicodeBlock("COMBINING_HALF_MARKS", 1333 "COMBINING HALF MARKS", 1334 "COMBININGHALFMARKS"); 1335 1336 /** 1337 * Constant for the "CJK Compatibility Forms" Unicode character block. 1338 * @since 1.2 1339 */ 1340 public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = 1341 new UnicodeBlock("CJK_COMPATIBILITY_FORMS", 1342 "CJK COMPATIBILITY FORMS", 1343 "CJKCOMPATIBILITYFORMS"); 1344 1345 /** 1346 * Constant for the "Small Form Variants" Unicode character block. 1347 * @since 1.2 1348 */ 1349 public static final UnicodeBlock SMALL_FORM_VARIANTS = 1350 new UnicodeBlock("SMALL_FORM_VARIANTS", 1351 "SMALL FORM VARIANTS", 1352 "SMALLFORMVARIANTS"); 1353 1354 /** 1355 * Constant for the "Arabic Presentation Forms-B" Unicode character block. 1356 * @since 1.2 1357 */ 1358 public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = 1359 new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", 1360 "ARABIC PRESENTATION FORMS-B", 1361 "ARABICPRESENTATIONFORMS-B"); 1362 1363 /** 1364 * Constant for the "Halfwidth and Fullwidth Forms" Unicode character 1365 * block. 1366 * @since 1.2 1367 */ 1368 public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = 1369 new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", 1370 "HALFWIDTH AND FULLWIDTH FORMS", 1371 "HALFWIDTHANDFULLWIDTHFORMS"); 1372 1373 /** 1374 * Constant for the "Specials" Unicode character block. 1375 * @since 1.2 1376 */ 1377 public static final UnicodeBlock SPECIALS = 1378 new UnicodeBlock("SPECIALS"); 1379 1380 /** 1381 * @deprecated 1382 * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES}, 1383 * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}. 1384 * These constants match the block definitions of the Unicode Standard. 1385 * The {@link #of(char)} and {@link #of(int)} methods return the 1386 * standard constants. 1387 */ 1388 @Deprecated(since="1.5") 1389 public static final UnicodeBlock SURROGATES_AREA = 1390 // Android-changed: ICU consistency: Don't map deprecated SURROGATES_AREA. b/26140229 1391 // new UnicodeBlock("SURROGATES_AREA"); 1392 new UnicodeBlock("SURROGATES_AREA", false); 1393 1394 /** 1395 * Constant for the "Syriac" Unicode character block. 1396 * @since 1.4 1397 */ 1398 public static final UnicodeBlock SYRIAC = 1399 new UnicodeBlock("SYRIAC"); 1400 1401 /** 1402 * Constant for the "Thaana" Unicode character block. 1403 * @since 1.4 1404 */ 1405 public static final UnicodeBlock THAANA = 1406 new UnicodeBlock("THAANA"); 1407 1408 /** 1409 * Constant for the "Sinhala" Unicode character block. 1410 * @since 1.4 1411 */ 1412 public static final UnicodeBlock SINHALA = 1413 new UnicodeBlock("SINHALA"); 1414 1415 /** 1416 * Constant for the "Myanmar" Unicode character block. 1417 * @since 1.4 1418 */ 1419 public static final UnicodeBlock MYANMAR = 1420 new UnicodeBlock("MYANMAR"); 1421 1422 /** 1423 * Constant for the "Ethiopic" Unicode character block. 1424 * @since 1.4 1425 */ 1426 public static final UnicodeBlock ETHIOPIC = 1427 new UnicodeBlock("ETHIOPIC"); 1428 1429 /** 1430 * Constant for the "Cherokee" Unicode character block. 1431 * @since 1.4 1432 */ 1433 public static final UnicodeBlock CHEROKEE = 1434 new UnicodeBlock("CHEROKEE"); 1435 1436 /** 1437 * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block. 1438 * @since 1.4 1439 */ 1440 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 1441 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS", 1442 "UNIFIED CANADIAN ABORIGINAL SYLLABICS", 1443 "UNIFIEDCANADIANABORIGINALSYLLABICS"); 1444 1445 /** 1446 * Constant for the "Ogham" Unicode character block. 1447 * @since 1.4 1448 */ 1449 public static final UnicodeBlock OGHAM = 1450 new UnicodeBlock("OGHAM"); 1451 1452 /** 1453 * Constant for the "Runic" Unicode character block. 1454 * @since 1.4 1455 */ 1456 public static final UnicodeBlock RUNIC = 1457 new UnicodeBlock("RUNIC"); 1458 1459 /** 1460 * Constant for the "Khmer" Unicode character block. 1461 * @since 1.4 1462 */ 1463 public static final UnicodeBlock KHMER = 1464 new UnicodeBlock("KHMER"); 1465 1466 /** 1467 * Constant for the "Mongolian" Unicode character block. 1468 * @since 1.4 1469 */ 1470 public static final UnicodeBlock MONGOLIAN = 1471 new UnicodeBlock("MONGOLIAN"); 1472 1473 /** 1474 * Constant for the "Braille Patterns" Unicode character block. 1475 * @since 1.4 1476 */ 1477 public static final UnicodeBlock BRAILLE_PATTERNS = 1478 new UnicodeBlock("BRAILLE_PATTERNS", 1479 "BRAILLE PATTERNS", 1480 "BRAILLEPATTERNS"); 1481 1482 /** 1483 * Constant for the "CJK Radicals Supplement" Unicode character block. 1484 * @since 1.4 1485 */ 1486 public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = 1487 new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", 1488 "CJK RADICALS SUPPLEMENT", 1489 "CJKRADICALSSUPPLEMENT"); 1490 1491 /** 1492 * Constant for the "Kangxi Radicals" Unicode character block. 1493 * @since 1.4 1494 */ 1495 public static final UnicodeBlock KANGXI_RADICALS = 1496 new UnicodeBlock("KANGXI_RADICALS", 1497 "KANGXI RADICALS", 1498 "KANGXIRADICALS"); 1499 1500 /** 1501 * Constant for the "Ideographic Description Characters" Unicode character block. 1502 * @since 1.4 1503 */ 1504 public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 1505 new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS", 1506 "IDEOGRAPHIC DESCRIPTION CHARACTERS", 1507 "IDEOGRAPHICDESCRIPTIONCHARACTERS"); 1508 1509 /** 1510 * Constant for the "Bopomofo Extended" Unicode character block. 1511 * @since 1.4 1512 */ 1513 public static final UnicodeBlock BOPOMOFO_EXTENDED = 1514 new UnicodeBlock("BOPOMOFO_EXTENDED", 1515 "BOPOMOFO EXTENDED", 1516 "BOPOMOFOEXTENDED"); 1517 1518 /** 1519 * Constant for the "CJK Unified Ideographs Extension A" Unicode character block. 1520 * @since 1.4 1521 */ 1522 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 1523 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A", 1524 "CJK UNIFIED IDEOGRAPHS EXTENSION A", 1525 "CJKUNIFIEDIDEOGRAPHSEXTENSIONA"); 1526 1527 /** 1528 * Constant for the "Yi Syllables" Unicode character block. 1529 * @since 1.4 1530 */ 1531 public static final UnicodeBlock YI_SYLLABLES = 1532 new UnicodeBlock("YI_SYLLABLES", 1533 "YI SYLLABLES", 1534 "YISYLLABLES"); 1535 1536 /** 1537 * Constant for the "Yi Radicals" Unicode character block. 1538 * @since 1.4 1539 */ 1540 public static final UnicodeBlock YI_RADICALS = 1541 new UnicodeBlock("YI_RADICALS", 1542 "YI RADICALS", 1543 "YIRADICALS"); 1544 1545 /** 1546 * Constant for the "Cyrillic Supplement" Unicode character block. 1547 * This block was previously known as the "Cyrillic Supplementary" block. 1548 * @since 1.5 1549 */ 1550 public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = 1551 new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", 1552 "CYRILLIC SUPPLEMENTARY", 1553 "CYRILLICSUPPLEMENTARY", 1554 "CYRILLIC SUPPLEMENT", 1555 "CYRILLICSUPPLEMENT"); 1556 1557 /** 1558 * Constant for the "Tagalog" Unicode character block. 1559 * @since 1.5 1560 */ 1561 public static final UnicodeBlock TAGALOG = 1562 new UnicodeBlock("TAGALOG"); 1563 1564 /** 1565 * Constant for the "Hanunoo" Unicode character block. 1566 * @since 1.5 1567 */ 1568 public static final UnicodeBlock HANUNOO = 1569 new UnicodeBlock("HANUNOO"); 1570 1571 /** 1572 * Constant for the "Buhid" Unicode character block. 1573 * @since 1.5 1574 */ 1575 public static final UnicodeBlock BUHID = 1576 new UnicodeBlock("BUHID"); 1577 1578 /** 1579 * Constant for the "Tagbanwa" Unicode character block. 1580 * @since 1.5 1581 */ 1582 public static final UnicodeBlock TAGBANWA = 1583 new UnicodeBlock("TAGBANWA"); 1584 1585 /** 1586 * Constant for the "Limbu" Unicode character block. 1587 * @since 1.5 1588 */ 1589 public static final UnicodeBlock LIMBU = 1590 new UnicodeBlock("LIMBU"); 1591 1592 /** 1593 * Constant for the "Tai Le" Unicode character block. 1594 * @since 1.5 1595 */ 1596 public static final UnicodeBlock TAI_LE = 1597 new UnicodeBlock("TAI_LE", 1598 "TAI LE", 1599 "TAILE"); 1600 1601 /** 1602 * Constant for the "Khmer Symbols" Unicode character block. 1603 * @since 1.5 1604 */ 1605 public static final UnicodeBlock KHMER_SYMBOLS = 1606 new UnicodeBlock("KHMER_SYMBOLS", 1607 "KHMER SYMBOLS", 1608 "KHMERSYMBOLS"); 1609 1610 /** 1611 * Constant for the "Phonetic Extensions" Unicode character block. 1612 * @since 1.5 1613 */ 1614 public static final UnicodeBlock PHONETIC_EXTENSIONS = 1615 new UnicodeBlock("PHONETIC_EXTENSIONS", 1616 "PHONETIC EXTENSIONS", 1617 "PHONETICEXTENSIONS"); 1618 1619 /** 1620 * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block. 1621 * @since 1.5 1622 */ 1623 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 1624 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A", 1625 "MISCELLANEOUS MATHEMATICAL SYMBOLS-A", 1626 "MISCELLANEOUSMATHEMATICALSYMBOLS-A"); 1627 1628 /** 1629 * Constant for the "Supplemental Arrows-A" Unicode character block. 1630 * @since 1.5 1631 */ 1632 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = 1633 new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", 1634 "SUPPLEMENTAL ARROWS-A", 1635 "SUPPLEMENTALARROWS-A"); 1636 1637 /** 1638 * Constant for the "Supplemental Arrows-B" Unicode character block. 1639 * @since 1.5 1640 */ 1641 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = 1642 new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", 1643 "SUPPLEMENTAL ARROWS-B", 1644 "SUPPLEMENTALARROWS-B"); 1645 1646 /** 1647 * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode 1648 * character block. 1649 * @since 1.5 1650 */ 1651 public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 1652 new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B", 1653 "MISCELLANEOUS MATHEMATICAL SYMBOLS-B", 1654 "MISCELLANEOUSMATHEMATICALSYMBOLS-B"); 1655 1656 /** 1657 * Constant for the "Supplemental Mathematical Operators" Unicode 1658 * character block. 1659 * @since 1.5 1660 */ 1661 public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 1662 new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS", 1663 "SUPPLEMENTAL MATHEMATICAL OPERATORS", 1664 "SUPPLEMENTALMATHEMATICALOPERATORS"); 1665 1666 /** 1667 * Constant for the "Miscellaneous Symbols and Arrows" Unicode character 1668 * block. 1669 * @since 1.5 1670 */ 1671 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = 1672 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS", 1673 "MISCELLANEOUS SYMBOLS AND ARROWS", 1674 "MISCELLANEOUSSYMBOLSANDARROWS"); 1675 1676 /** 1677 * Constant for the "Katakana Phonetic Extensions" Unicode character 1678 * block. 1679 * @since 1.5 1680 */ 1681 public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = 1682 new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", 1683 "KATAKANA PHONETIC EXTENSIONS", 1684 "KATAKANAPHONETICEXTENSIONS"); 1685 1686 /** 1687 * Constant for the "Yijing Hexagram Symbols" Unicode character block. 1688 * @since 1.5 1689 */ 1690 public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = 1691 new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", 1692 "YIJING HEXAGRAM SYMBOLS", 1693 "YIJINGHEXAGRAMSYMBOLS"); 1694 1695 /** 1696 * Constant for the "Variation Selectors" Unicode character block. 1697 * @since 1.5 1698 */ 1699 public static final UnicodeBlock VARIATION_SELECTORS = 1700 new UnicodeBlock("VARIATION_SELECTORS", 1701 "VARIATION SELECTORS", 1702 "VARIATIONSELECTORS"); 1703 1704 /** 1705 * Constant for the "Linear B Syllabary" Unicode character block. 1706 * @since 1.5 1707 */ 1708 public static final UnicodeBlock LINEAR_B_SYLLABARY = 1709 new UnicodeBlock("LINEAR_B_SYLLABARY", 1710 "LINEAR B SYLLABARY", 1711 "LINEARBSYLLABARY"); 1712 1713 /** 1714 * Constant for the "Linear B Ideograms" Unicode character block. 1715 * @since 1.5 1716 */ 1717 public static final UnicodeBlock LINEAR_B_IDEOGRAMS = 1718 new UnicodeBlock("LINEAR_B_IDEOGRAMS", 1719 "LINEAR B IDEOGRAMS", 1720 "LINEARBIDEOGRAMS"); 1721 1722 /** 1723 * Constant for the "Aegean Numbers" Unicode character block. 1724 * @since 1.5 1725 */ 1726 public static final UnicodeBlock AEGEAN_NUMBERS = 1727 new UnicodeBlock("AEGEAN_NUMBERS", 1728 "AEGEAN NUMBERS", 1729 "AEGEANNUMBERS"); 1730 1731 /** 1732 * Constant for the "Old Italic" Unicode character block. 1733 * @since 1.5 1734 */ 1735 public static final UnicodeBlock OLD_ITALIC = 1736 new UnicodeBlock("OLD_ITALIC", 1737 "OLD ITALIC", 1738 "OLDITALIC"); 1739 1740 /** 1741 * Constant for the "Gothic" Unicode character block. 1742 * @since 1.5 1743 */ 1744 public static final UnicodeBlock GOTHIC = 1745 new UnicodeBlock("GOTHIC"); 1746 1747 /** 1748 * Constant for the "Ugaritic" Unicode character block. 1749 * @since 1.5 1750 */ 1751 public static final UnicodeBlock UGARITIC = 1752 new UnicodeBlock("UGARITIC"); 1753 1754 /** 1755 * Constant for the "Deseret" Unicode character block. 1756 * @since 1.5 1757 */ 1758 public static final UnicodeBlock DESERET = 1759 new UnicodeBlock("DESERET"); 1760 1761 /** 1762 * Constant for the "Shavian" Unicode character block. 1763 * @since 1.5 1764 */ 1765 public static final UnicodeBlock SHAVIAN = 1766 new UnicodeBlock("SHAVIAN"); 1767 1768 /** 1769 * Constant for the "Osmanya" Unicode character block. 1770 * @since 1.5 1771 */ 1772 public static final UnicodeBlock OSMANYA = 1773 new UnicodeBlock("OSMANYA"); 1774 1775 /** 1776 * Constant for the "Cypriot Syllabary" Unicode character block. 1777 * @since 1.5 1778 */ 1779 public static final UnicodeBlock CYPRIOT_SYLLABARY = 1780 new UnicodeBlock("CYPRIOT_SYLLABARY", 1781 "CYPRIOT SYLLABARY", 1782 "CYPRIOTSYLLABARY"); 1783 1784 /** 1785 * Constant for the "Byzantine Musical Symbols" Unicode character block. 1786 * @since 1.5 1787 */ 1788 public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = 1789 new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", 1790 "BYZANTINE MUSICAL SYMBOLS", 1791 "BYZANTINEMUSICALSYMBOLS"); 1792 1793 /** 1794 * Constant for the "Musical Symbols" Unicode character block. 1795 * @since 1.5 1796 */ 1797 public static final UnicodeBlock MUSICAL_SYMBOLS = 1798 new UnicodeBlock("MUSICAL_SYMBOLS", 1799 "MUSICAL SYMBOLS", 1800 "MUSICALSYMBOLS"); 1801 1802 /** 1803 * Constant for the "Tai Xuan Jing Symbols" Unicode character block. 1804 * @since 1.5 1805 */ 1806 public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = 1807 new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", 1808 "TAI XUAN JING SYMBOLS", 1809 "TAIXUANJINGSYMBOLS"); 1810 1811 /** 1812 * Constant for the "Mathematical Alphanumeric Symbols" Unicode 1813 * character block. 1814 * @since 1.5 1815 */ 1816 public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 1817 new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS", 1818 "MATHEMATICAL ALPHANUMERIC SYMBOLS", 1819 "MATHEMATICALALPHANUMERICSYMBOLS"); 1820 1821 /** 1822 * Constant for the "CJK Unified Ideographs Extension B" Unicode 1823 * character block. 1824 * @since 1.5 1825 */ 1826 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 1827 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B", 1828 "CJK UNIFIED IDEOGRAPHS EXTENSION B", 1829 "CJKUNIFIEDIDEOGRAPHSEXTENSIONB"); 1830 1831 /** 1832 * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block. 1833 * @since 1.5 1834 */ 1835 public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 1836 new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT", 1837 "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT", 1838 "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT"); 1839 1840 /** 1841 * Constant for the "Tags" Unicode character block. 1842 * @since 1.5 1843 */ 1844 public static final UnicodeBlock TAGS = 1845 new UnicodeBlock("TAGS"); 1846 1847 /** 1848 * Constant for the "Variation Selectors Supplement" Unicode character 1849 * block. 1850 * @since 1.5 1851 */ 1852 public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = 1853 new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", 1854 "VARIATION SELECTORS SUPPLEMENT", 1855 "VARIATIONSELECTORSSUPPLEMENT"); 1856 1857 /** 1858 * Constant for the "Supplementary Private Use Area-A" Unicode character 1859 * block. 1860 * @since 1.5 1861 */ 1862 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = 1863 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A", 1864 "SUPPLEMENTARY PRIVATE USE AREA-A", 1865 "SUPPLEMENTARYPRIVATEUSEAREA-A"); 1866 1867 /** 1868 * Constant for the "Supplementary Private Use Area-B" Unicode character 1869 * block. 1870 * @since 1.5 1871 */ 1872 public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = 1873 new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B", 1874 "SUPPLEMENTARY PRIVATE USE AREA-B", 1875 "SUPPLEMENTARYPRIVATEUSEAREA-B"); 1876 1877 /** 1878 * Constant for the "High Surrogates" Unicode character block. 1879 * This block represents codepoint values in the high surrogate 1880 * range: U+D800 through U+DB7F 1881 * 1882 * @since 1.5 1883 */ 1884 public static final UnicodeBlock HIGH_SURROGATES = 1885 new UnicodeBlock("HIGH_SURROGATES", 1886 "HIGH SURROGATES", 1887 "HIGHSURROGATES"); 1888 1889 /** 1890 * Constant for the "High Private Use Surrogates" Unicode character 1891 * block. 1892 * This block represents codepoint values in the private use high 1893 * surrogate range: U+DB80 through U+DBFF 1894 * 1895 * @since 1.5 1896 */ 1897 public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = 1898 new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", 1899 "HIGH PRIVATE USE SURROGATES", 1900 "HIGHPRIVATEUSESURROGATES"); 1901 1902 /** 1903 * Constant for the "Low Surrogates" Unicode character block. 1904 * This block represents codepoint values in the low surrogate 1905 * range: U+DC00 through U+DFFF 1906 * 1907 * @since 1.5 1908 */ 1909 public static final UnicodeBlock LOW_SURROGATES = 1910 new UnicodeBlock("LOW_SURROGATES", 1911 "LOW SURROGATES", 1912 "LOWSURROGATES"); 1913 1914 /** 1915 * Constant for the "Arabic Supplement" Unicode character block. 1916 * @since 1.7 1917 */ 1918 public static final UnicodeBlock ARABIC_SUPPLEMENT = 1919 new UnicodeBlock("ARABIC_SUPPLEMENT", 1920 "ARABIC SUPPLEMENT", 1921 "ARABICSUPPLEMENT"); 1922 1923 /** 1924 * Constant for the "NKo" Unicode character block. 1925 * @since 1.7 1926 */ 1927 public static final UnicodeBlock NKO = 1928 new UnicodeBlock("NKO"); 1929 1930 /** 1931 * Constant for the "Samaritan" Unicode character block. 1932 * @since 1.7 1933 */ 1934 public static final UnicodeBlock SAMARITAN = 1935 new UnicodeBlock("SAMARITAN"); 1936 1937 /** 1938 * Constant for the "Mandaic" Unicode character block. 1939 * @since 1.7 1940 */ 1941 public static final UnicodeBlock MANDAIC = 1942 new UnicodeBlock("MANDAIC"); 1943 1944 /** 1945 * Constant for the "Ethiopic Supplement" Unicode character block. 1946 * @since 1.7 1947 */ 1948 public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = 1949 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", 1950 "ETHIOPIC SUPPLEMENT", 1951 "ETHIOPICSUPPLEMENT"); 1952 1953 /** 1954 * Constant for the "Unified Canadian Aboriginal Syllabics Extended" 1955 * Unicode character block. 1956 * @since 1.7 1957 */ 1958 public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 1959 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED", 1960 "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED", 1961 "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED"); 1962 1963 /** 1964 * Constant for the "New Tai Lue" Unicode character block. 1965 * @since 1.7 1966 */ 1967 public static final UnicodeBlock NEW_TAI_LUE = 1968 new UnicodeBlock("NEW_TAI_LUE", 1969 "NEW TAI LUE", 1970 "NEWTAILUE"); 1971 1972 /** 1973 * Constant for the "Buginese" Unicode character block. 1974 * @since 1.7 1975 */ 1976 public static final UnicodeBlock BUGINESE = 1977 new UnicodeBlock("BUGINESE"); 1978 1979 /** 1980 * Constant for the "Tai Tham" Unicode character block. 1981 * @since 1.7 1982 */ 1983 public static final UnicodeBlock TAI_THAM = 1984 new UnicodeBlock("TAI_THAM", 1985 "TAI THAM", 1986 "TAITHAM"); 1987 1988 /** 1989 * Constant for the "Balinese" Unicode character block. 1990 * @since 1.7 1991 */ 1992 public static final UnicodeBlock BALINESE = 1993 new UnicodeBlock("BALINESE"); 1994 1995 /** 1996 * Constant for the "Sundanese" Unicode character block. 1997 * @since 1.7 1998 */ 1999 public static final UnicodeBlock SUNDANESE = 2000 new UnicodeBlock("SUNDANESE"); 2001 2002 /** 2003 * Constant for the "Batak" Unicode character block. 2004 * @since 1.7 2005 */ 2006 public static final UnicodeBlock BATAK = 2007 new UnicodeBlock("BATAK"); 2008 2009 /** 2010 * Constant for the "Lepcha" Unicode character block. 2011 * @since 1.7 2012 */ 2013 public static final UnicodeBlock LEPCHA = 2014 new UnicodeBlock("LEPCHA"); 2015 2016 /** 2017 * Constant for the "Ol Chiki" Unicode character block. 2018 * @since 1.7 2019 */ 2020 public static final UnicodeBlock OL_CHIKI = 2021 new UnicodeBlock("OL_CHIKI", 2022 "OL CHIKI", 2023 "OLCHIKI"); 2024 2025 /** 2026 * Constant for the "Vedic Extensions" Unicode character block. 2027 * @since 1.7 2028 */ 2029 public static final UnicodeBlock VEDIC_EXTENSIONS = 2030 new UnicodeBlock("VEDIC_EXTENSIONS", 2031 "VEDIC EXTENSIONS", 2032 "VEDICEXTENSIONS"); 2033 2034 /** 2035 * Constant for the "Phonetic Extensions Supplement" Unicode character 2036 * block. 2037 * @since 1.7 2038 */ 2039 public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = 2040 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT", 2041 "PHONETIC EXTENSIONS SUPPLEMENT", 2042 "PHONETICEXTENSIONSSUPPLEMENT"); 2043 2044 /** 2045 * Constant for the "Combining Diacritical Marks Supplement" Unicode 2046 * character block. 2047 * @since 1.7 2048 */ 2049 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 2050 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT", 2051 "COMBINING DIACRITICAL MARKS SUPPLEMENT", 2052 "COMBININGDIACRITICALMARKSSUPPLEMENT"); 2053 2054 /** 2055 * Constant for the "Glagolitic" Unicode character block. 2056 * @since 1.7 2057 */ 2058 public static final UnicodeBlock GLAGOLITIC = 2059 new UnicodeBlock("GLAGOLITIC"); 2060 2061 /** 2062 * Constant for the "Latin Extended-C" Unicode character block. 2063 * @since 1.7 2064 */ 2065 public static final UnicodeBlock LATIN_EXTENDED_C = 2066 new UnicodeBlock("LATIN_EXTENDED_C", 2067 "LATIN EXTENDED-C", 2068 "LATINEXTENDED-C"); 2069 2070 /** 2071 * Constant for the "Coptic" Unicode character block. 2072 * @since 1.7 2073 */ 2074 public static final UnicodeBlock COPTIC = 2075 new UnicodeBlock("COPTIC"); 2076 2077 /** 2078 * Constant for the "Georgian Supplement" Unicode character block. 2079 * @since 1.7 2080 */ 2081 public static final UnicodeBlock GEORGIAN_SUPPLEMENT = 2082 new UnicodeBlock("GEORGIAN_SUPPLEMENT", 2083 "GEORGIAN SUPPLEMENT", 2084 "GEORGIANSUPPLEMENT"); 2085 2086 /** 2087 * Constant for the "Tifinagh" Unicode character block. 2088 * @since 1.7 2089 */ 2090 public static final UnicodeBlock TIFINAGH = 2091 new UnicodeBlock("TIFINAGH"); 2092 2093 /** 2094 * Constant for the "Ethiopic Extended" Unicode character block. 2095 * @since 1.7 2096 */ 2097 public static final UnicodeBlock ETHIOPIC_EXTENDED = 2098 new UnicodeBlock("ETHIOPIC_EXTENDED", 2099 "ETHIOPIC EXTENDED", 2100 "ETHIOPICEXTENDED"); 2101 2102 /** 2103 * Constant for the "Cyrillic Extended-A" Unicode character block. 2104 * @since 1.7 2105 */ 2106 public static final UnicodeBlock CYRILLIC_EXTENDED_A = 2107 new UnicodeBlock("CYRILLIC_EXTENDED_A", 2108 "CYRILLIC EXTENDED-A", 2109 "CYRILLICEXTENDED-A"); 2110 2111 /** 2112 * Constant for the "Supplemental Punctuation" Unicode character block. 2113 * @since 1.7 2114 */ 2115 public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = 2116 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", 2117 "SUPPLEMENTAL PUNCTUATION", 2118 "SUPPLEMENTALPUNCTUATION"); 2119 2120 /** 2121 * Constant for the "CJK Strokes" Unicode character block. 2122 * @since 1.7 2123 */ 2124 public static final UnicodeBlock CJK_STROKES = 2125 new UnicodeBlock("CJK_STROKES", 2126 "CJK STROKES", 2127 "CJKSTROKES"); 2128 2129 /** 2130 * Constant for the "Lisu" Unicode character block. 2131 * @since 1.7 2132 */ 2133 public static final UnicodeBlock LISU = 2134 new UnicodeBlock("LISU"); 2135 2136 /** 2137 * Constant for the "Vai" Unicode character block. 2138 * @since 1.7 2139 */ 2140 public static final UnicodeBlock VAI = 2141 new UnicodeBlock("VAI"); 2142 2143 /** 2144 * Constant for the "Cyrillic Extended-B" Unicode character block. 2145 * @since 1.7 2146 */ 2147 public static final UnicodeBlock CYRILLIC_EXTENDED_B = 2148 new UnicodeBlock("CYRILLIC_EXTENDED_B", 2149 "CYRILLIC EXTENDED-B", 2150 "CYRILLICEXTENDED-B"); 2151 2152 /** 2153 * Constant for the "Bamum" Unicode character block. 2154 * @since 1.7 2155 */ 2156 public static final UnicodeBlock BAMUM = 2157 new UnicodeBlock("BAMUM"); 2158 2159 /** 2160 * Constant for the "Modifier Tone Letters" Unicode character block. 2161 * @since 1.7 2162 */ 2163 public static final UnicodeBlock MODIFIER_TONE_LETTERS = 2164 new UnicodeBlock("MODIFIER_TONE_LETTERS", 2165 "MODIFIER TONE LETTERS", 2166 "MODIFIERTONELETTERS"); 2167 2168 /** 2169 * Constant for the "Latin Extended-D" Unicode character block. 2170 * @since 1.7 2171 */ 2172 public static final UnicodeBlock LATIN_EXTENDED_D = 2173 new UnicodeBlock("LATIN_EXTENDED_D", 2174 "LATIN EXTENDED-D", 2175 "LATINEXTENDED-D"); 2176 2177 /** 2178 * Constant for the "Syloti Nagri" Unicode character block. 2179 * @since 1.7 2180 */ 2181 public static final UnicodeBlock SYLOTI_NAGRI = 2182 new UnicodeBlock("SYLOTI_NAGRI", 2183 "SYLOTI NAGRI", 2184 "SYLOTINAGRI"); 2185 2186 /** 2187 * Constant for the "Common Indic Number Forms" Unicode character block. 2188 * @since 1.7 2189 */ 2190 public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = 2191 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", 2192 "COMMON INDIC NUMBER FORMS", 2193 "COMMONINDICNUMBERFORMS"); 2194 2195 /** 2196 * Constant for the "Phags-pa" Unicode character block. 2197 * @since 1.7 2198 */ 2199 public static final UnicodeBlock PHAGS_PA = 2200 new UnicodeBlock("PHAGS_PA", 2201 "PHAGS-PA"); 2202 2203 /** 2204 * Constant for the "Saurashtra" Unicode character block. 2205 * @since 1.7 2206 */ 2207 public static final UnicodeBlock SAURASHTRA = 2208 new UnicodeBlock("SAURASHTRA"); 2209 2210 /** 2211 * Constant for the "Devanagari Extended" Unicode character block. 2212 * @since 1.7 2213 */ 2214 public static final UnicodeBlock DEVANAGARI_EXTENDED = 2215 new UnicodeBlock("DEVANAGARI_EXTENDED", 2216 "DEVANAGARI EXTENDED", 2217 "DEVANAGARIEXTENDED"); 2218 2219 /** 2220 * Constant for the "Kayah Li" Unicode character block. 2221 * @since 1.7 2222 */ 2223 public static final UnicodeBlock KAYAH_LI = 2224 new UnicodeBlock("KAYAH_LI", 2225 "KAYAH LI", 2226 "KAYAHLI"); 2227 2228 /** 2229 * Constant for the "Rejang" Unicode character block. 2230 * @since 1.7 2231 */ 2232 public static final UnicodeBlock REJANG = 2233 new UnicodeBlock("REJANG"); 2234 2235 /** 2236 * Constant for the "Hangul Jamo Extended-A" Unicode character block. 2237 * @since 1.7 2238 */ 2239 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = 2240 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", 2241 "HANGUL JAMO EXTENDED-A", 2242 "HANGULJAMOEXTENDED-A"); 2243 2244 /** 2245 * Constant for the "Javanese" Unicode character block. 2246 * @since 1.7 2247 */ 2248 public static final UnicodeBlock JAVANESE = 2249 new UnicodeBlock("JAVANESE"); 2250 2251 /** 2252 * Constant for the "Cham" Unicode character block. 2253 * @since 1.7 2254 */ 2255 public static final UnicodeBlock CHAM = 2256 new UnicodeBlock("CHAM"); 2257 2258 /** 2259 * Constant for the "Myanmar Extended-A" Unicode character block. 2260 * @since 1.7 2261 */ 2262 public static final UnicodeBlock MYANMAR_EXTENDED_A = 2263 new UnicodeBlock("MYANMAR_EXTENDED_A", 2264 "MYANMAR EXTENDED-A", 2265 "MYANMAREXTENDED-A"); 2266 2267 /** 2268 * Constant for the "Tai Viet" Unicode character block. 2269 * @since 1.7 2270 */ 2271 public static final UnicodeBlock TAI_VIET = 2272 new UnicodeBlock("TAI_VIET", 2273 "TAI VIET", 2274 "TAIVIET"); 2275 2276 /** 2277 * Constant for the "Ethiopic Extended-A" Unicode character block. 2278 * @since 1.7 2279 */ 2280 public static final UnicodeBlock ETHIOPIC_EXTENDED_A = 2281 new UnicodeBlock("ETHIOPIC_EXTENDED_A", 2282 "ETHIOPIC EXTENDED-A", 2283 "ETHIOPICEXTENDED-A"); 2284 2285 /** 2286 * Constant for the "Meetei Mayek" Unicode character block. 2287 * @since 1.7 2288 */ 2289 public static final UnicodeBlock MEETEI_MAYEK = 2290 new UnicodeBlock("MEETEI_MAYEK", 2291 "MEETEI MAYEK", 2292 "MEETEIMAYEK"); 2293 2294 /** 2295 * Constant for the "Hangul Jamo Extended-B" Unicode character block. 2296 * @since 1.7 2297 */ 2298 public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = 2299 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", 2300 "HANGUL JAMO EXTENDED-B", 2301 "HANGULJAMOEXTENDED-B"); 2302 2303 /** 2304 * Constant for the "Vertical Forms" Unicode character block. 2305 * @since 1.7 2306 */ 2307 public static final UnicodeBlock VERTICAL_FORMS = 2308 new UnicodeBlock("VERTICAL_FORMS", 2309 "VERTICAL FORMS", 2310 "VERTICALFORMS"); 2311 2312 /** 2313 * Constant for the "Ancient Greek Numbers" Unicode character block. 2314 * @since 1.7 2315 */ 2316 public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = 2317 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", 2318 "ANCIENT GREEK NUMBERS", 2319 "ANCIENTGREEKNUMBERS"); 2320 2321 /** 2322 * Constant for the "Ancient Symbols" Unicode character block. 2323 * @since 1.7 2324 */ 2325 public static final UnicodeBlock ANCIENT_SYMBOLS = 2326 new UnicodeBlock("ANCIENT_SYMBOLS", 2327 "ANCIENT SYMBOLS", 2328 "ANCIENTSYMBOLS"); 2329 2330 /** 2331 * Constant for the "Phaistos Disc" Unicode character block. 2332 * @since 1.7 2333 */ 2334 public static final UnicodeBlock PHAISTOS_DISC = 2335 new UnicodeBlock("PHAISTOS_DISC", 2336 "PHAISTOS DISC", 2337 "PHAISTOSDISC"); 2338 2339 /** 2340 * Constant for the "Lycian" Unicode character block. 2341 * @since 1.7 2342 */ 2343 public static final UnicodeBlock LYCIAN = 2344 new UnicodeBlock("LYCIAN"); 2345 2346 /** 2347 * Constant for the "Carian" Unicode character block. 2348 * @since 1.7 2349 */ 2350 public static final UnicodeBlock CARIAN = 2351 new UnicodeBlock("CARIAN"); 2352 2353 /** 2354 * Constant for the "Old Persian" Unicode character block. 2355 * @since 1.7 2356 */ 2357 public static final UnicodeBlock OLD_PERSIAN = 2358 new UnicodeBlock("OLD_PERSIAN", 2359 "OLD PERSIAN", 2360 "OLDPERSIAN"); 2361 2362 /** 2363 * Constant for the "Imperial Aramaic" Unicode character block. 2364 * @since 1.7 2365 */ 2366 public static final UnicodeBlock IMPERIAL_ARAMAIC = 2367 new UnicodeBlock("IMPERIAL_ARAMAIC", 2368 "IMPERIAL ARAMAIC", 2369 "IMPERIALARAMAIC"); 2370 2371 /** 2372 * Constant for the "Phoenician" Unicode character block. 2373 * @since 1.7 2374 */ 2375 public static final UnicodeBlock PHOENICIAN = 2376 new UnicodeBlock("PHOENICIAN"); 2377 2378 /** 2379 * Constant for the "Lydian" Unicode character block. 2380 * @since 1.7 2381 */ 2382 public static final UnicodeBlock LYDIAN = 2383 new UnicodeBlock("LYDIAN"); 2384 2385 /** 2386 * Constant for the "Kharoshthi" Unicode character block. 2387 * @since 1.7 2388 */ 2389 public static final UnicodeBlock KHAROSHTHI = 2390 new UnicodeBlock("KHAROSHTHI"); 2391 2392 /** 2393 * Constant for the "Old South Arabian" Unicode character block. 2394 * @since 1.7 2395 */ 2396 public static final UnicodeBlock OLD_SOUTH_ARABIAN = 2397 new UnicodeBlock("OLD_SOUTH_ARABIAN", 2398 "OLD SOUTH ARABIAN", 2399 "OLDSOUTHARABIAN"); 2400 2401 /** 2402 * Constant for the "Avestan" Unicode character block. 2403 * @since 1.7 2404 */ 2405 public static final UnicodeBlock AVESTAN = 2406 new UnicodeBlock("AVESTAN"); 2407 2408 /** 2409 * Constant for the "Inscriptional Parthian" Unicode character block. 2410 * @since 1.7 2411 */ 2412 public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = 2413 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", 2414 "INSCRIPTIONAL PARTHIAN", 2415 "INSCRIPTIONALPARTHIAN"); 2416 2417 /** 2418 * Constant for the "Inscriptional Pahlavi" Unicode character block. 2419 * @since 1.7 2420 */ 2421 public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = 2422 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", 2423 "INSCRIPTIONAL PAHLAVI", 2424 "INSCRIPTIONALPAHLAVI"); 2425 2426 /** 2427 * Constant for the "Old Turkic" Unicode character block. 2428 * @since 1.7 2429 */ 2430 public static final UnicodeBlock OLD_TURKIC = 2431 new UnicodeBlock("OLD_TURKIC", 2432 "OLD TURKIC", 2433 "OLDTURKIC"); 2434 2435 /** 2436 * Constant for the "Rumi Numeral Symbols" Unicode character block. 2437 * @since 1.7 2438 */ 2439 public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = 2440 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", 2441 "RUMI NUMERAL SYMBOLS", 2442 "RUMINUMERALSYMBOLS"); 2443 2444 /** 2445 * Constant for the "Brahmi" Unicode character block. 2446 * @since 1.7 2447 */ 2448 public static final UnicodeBlock BRAHMI = 2449 new UnicodeBlock("BRAHMI"); 2450 2451 /** 2452 * Constant for the "Kaithi" Unicode character block. 2453 * @since 1.7 2454 */ 2455 public static final UnicodeBlock KAITHI = 2456 new UnicodeBlock("KAITHI"); 2457 2458 /** 2459 * Constant for the "Cuneiform" Unicode character block. 2460 * @since 1.7 2461 */ 2462 public static final UnicodeBlock CUNEIFORM = 2463 new UnicodeBlock("CUNEIFORM"); 2464 2465 /** 2466 * Constant for the "Cuneiform Numbers and Punctuation" Unicode 2467 * character block. 2468 * @since 1.7 2469 */ 2470 public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = 2471 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION", 2472 "CUNEIFORM NUMBERS AND PUNCTUATION", 2473 "CUNEIFORMNUMBERSANDPUNCTUATION"); 2474 2475 /** 2476 * Constant for the "Egyptian Hieroglyphs" Unicode character block. 2477 * @since 1.7 2478 */ 2479 public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = 2480 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", 2481 "EGYPTIAN HIEROGLYPHS", 2482 "EGYPTIANHIEROGLYPHS"); 2483 2484 /** 2485 * Constant for the "Bamum Supplement" Unicode character block. 2486 * @since 1.7 2487 */ 2488 public static final UnicodeBlock BAMUM_SUPPLEMENT = 2489 new UnicodeBlock("BAMUM_SUPPLEMENT", 2490 "BAMUM SUPPLEMENT", 2491 "BAMUMSUPPLEMENT"); 2492 2493 /** 2494 * Constant for the "Kana Supplement" Unicode character block. 2495 * @since 1.7 2496 */ 2497 public static final UnicodeBlock KANA_SUPPLEMENT = 2498 new UnicodeBlock("KANA_SUPPLEMENT", 2499 "KANA SUPPLEMENT", 2500 "KANASUPPLEMENT"); 2501 2502 /** 2503 * Constant for the "Ancient Greek Musical Notation" Unicode character 2504 * block. 2505 * @since 1.7 2506 */ 2507 public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = 2508 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION", 2509 "ANCIENT GREEK MUSICAL NOTATION", 2510 "ANCIENTGREEKMUSICALNOTATION"); 2511 2512 /** 2513 * Constant for the "Counting Rod Numerals" Unicode character block. 2514 * @since 1.7 2515 */ 2516 public static final UnicodeBlock COUNTING_ROD_NUMERALS = 2517 new UnicodeBlock("COUNTING_ROD_NUMERALS", 2518 "COUNTING ROD NUMERALS", 2519 "COUNTINGRODNUMERALS"); 2520 2521 /** 2522 * Constant for the "Mahjong Tiles" Unicode character block. 2523 * @since 1.7 2524 */ 2525 public static final UnicodeBlock MAHJONG_TILES = 2526 new UnicodeBlock("MAHJONG_TILES", 2527 "MAHJONG TILES", 2528 "MAHJONGTILES"); 2529 2530 /** 2531 * Constant for the "Domino Tiles" Unicode character block. 2532 * @since 1.7 2533 */ 2534 public static final UnicodeBlock DOMINO_TILES = 2535 new UnicodeBlock("DOMINO_TILES", 2536 "DOMINO TILES", 2537 "DOMINOTILES"); 2538 2539 /** 2540 * Constant for the "Playing Cards" Unicode character block. 2541 * @since 1.7 2542 */ 2543 public static final UnicodeBlock PLAYING_CARDS = 2544 new UnicodeBlock("PLAYING_CARDS", 2545 "PLAYING CARDS", 2546 "PLAYINGCARDS"); 2547 2548 /** 2549 * Constant for the "Enclosed Alphanumeric Supplement" Unicode character 2550 * block. 2551 * @since 1.7 2552 */ 2553 public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 2554 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT", 2555 "ENCLOSED ALPHANUMERIC SUPPLEMENT", 2556 "ENCLOSEDALPHANUMERICSUPPLEMENT"); 2557 2558 /** 2559 * Constant for the "Enclosed Ideographic Supplement" Unicode character 2560 * block. 2561 * @since 1.7 2562 */ 2563 public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 2564 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT", 2565 "ENCLOSED IDEOGRAPHIC SUPPLEMENT", 2566 "ENCLOSEDIDEOGRAPHICSUPPLEMENT"); 2567 2568 /** 2569 * Constant for the "Miscellaneous Symbols And Pictographs" Unicode 2570 * character block. 2571 * @since 1.7 2572 */ 2573 public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 2574 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS", 2575 "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS", 2576 "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS"); 2577 2578 /** 2579 * Constant for the "Emoticons" Unicode character block. 2580 * @since 1.7 2581 */ 2582 public static final UnicodeBlock EMOTICONS = 2583 new UnicodeBlock("EMOTICONS"); 2584 2585 /** 2586 * Constant for the "Transport And Map Symbols" Unicode character block. 2587 * @since 1.7 2588 */ 2589 public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = 2590 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", 2591 "TRANSPORT AND MAP SYMBOLS", 2592 "TRANSPORTANDMAPSYMBOLS"); 2593 2594 /** 2595 * Constant for the "Alchemical Symbols" Unicode character block. 2596 * @since 1.7 2597 */ 2598 public static final UnicodeBlock ALCHEMICAL_SYMBOLS = 2599 new UnicodeBlock("ALCHEMICAL_SYMBOLS", 2600 "ALCHEMICAL SYMBOLS", 2601 "ALCHEMICALSYMBOLS"); 2602 2603 /** 2604 * Constant for the "CJK Unified Ideographs Extension C" Unicode 2605 * character block. 2606 * @since 1.7 2607 */ 2608 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 2609 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C", 2610 "CJK UNIFIED IDEOGRAPHS EXTENSION C", 2611 "CJKUNIFIEDIDEOGRAPHSEXTENSIONC"); 2612 2613 /** 2614 * Constant for the "CJK Unified Ideographs Extension D" Unicode 2615 * character block. 2616 * @since 1.7 2617 */ 2618 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 2619 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D", 2620 "CJK UNIFIED IDEOGRAPHS EXTENSION D", 2621 "CJKUNIFIEDIDEOGRAPHSEXTENSIOND"); 2622 2623 /** 2624 * Constant for the "Arabic Extended-A" Unicode character block. 2625 * @since 1.8 2626 */ 2627 public static final UnicodeBlock ARABIC_EXTENDED_A = 2628 new UnicodeBlock("ARABIC_EXTENDED_A", 2629 "ARABIC EXTENDED-A", 2630 "ARABICEXTENDED-A"); 2631 2632 /** 2633 * Constant for the "Sundanese Supplement" Unicode character block. 2634 * @since 1.8 2635 */ 2636 public static final UnicodeBlock SUNDANESE_SUPPLEMENT = 2637 new UnicodeBlock("SUNDANESE_SUPPLEMENT", 2638 "SUNDANESE SUPPLEMENT", 2639 "SUNDANESESUPPLEMENT"); 2640 2641 /** 2642 * Constant for the "Meetei Mayek Extensions" Unicode character block. 2643 * @since 1.8 2644 */ 2645 public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS = 2646 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", 2647 "MEETEI MAYEK EXTENSIONS", 2648 "MEETEIMAYEKEXTENSIONS"); 2649 2650 /** 2651 * Constant for the "Meroitic Hieroglyphs" Unicode character block. 2652 * @since 1.8 2653 */ 2654 public static final UnicodeBlock MEROITIC_HIEROGLYPHS = 2655 new UnicodeBlock("MEROITIC_HIEROGLYPHS", 2656 "MEROITIC HIEROGLYPHS", 2657 "MEROITICHIEROGLYPHS"); 2658 2659 /** 2660 * Constant for the "Meroitic Cursive" Unicode character block. 2661 * @since 1.8 2662 */ 2663 public static final UnicodeBlock MEROITIC_CURSIVE = 2664 new UnicodeBlock("MEROITIC_CURSIVE", 2665 "MEROITIC CURSIVE", 2666 "MEROITICCURSIVE"); 2667 2668 /** 2669 * Constant for the "Sora Sompeng" Unicode character block. 2670 * @since 1.8 2671 */ 2672 public static final UnicodeBlock SORA_SOMPENG = 2673 new UnicodeBlock("SORA_SOMPENG", 2674 "SORA SOMPENG", 2675 "SORASOMPENG"); 2676 2677 /** 2678 * Constant for the "Chakma" Unicode character block. 2679 * @since 1.8 2680 */ 2681 public static final UnicodeBlock CHAKMA = 2682 new UnicodeBlock("CHAKMA"); 2683 2684 /** 2685 * Constant for the "Sharada" Unicode character block. 2686 * @since 1.8 2687 */ 2688 public static final UnicodeBlock SHARADA = 2689 new UnicodeBlock("SHARADA"); 2690 2691 /** 2692 * Constant for the "Takri" Unicode character block. 2693 * @since 1.8 2694 */ 2695 public static final UnicodeBlock TAKRI = 2696 new UnicodeBlock("TAKRI"); 2697 2698 /** 2699 * Constant for the "Miao" Unicode character block. 2700 * @since 1.8 2701 */ 2702 public static final UnicodeBlock MIAO = 2703 new UnicodeBlock("MIAO"); 2704 2705 /** 2706 * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode 2707 * character block. 2708 * @since 1.8 2709 */ 2710 public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 2711 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", 2712 "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS", 2713 "ARABICMATHEMATICALALPHABETICSYMBOLS"); 2714 2715 /** 2716 * Constant for the "Combining Diacritical Marks Extended" Unicode 2717 * character block. 2718 * @since 9 2719 */ 2720 public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED = 2721 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", 2722 "COMBINING DIACRITICAL MARKS EXTENDED", 2723 "COMBININGDIACRITICALMARKSEXTENDED"); 2724 2725 /** 2726 * Constant for the "Myanmar Extended-B" Unicode character block. 2727 * @since 9 2728 */ 2729 public static final UnicodeBlock MYANMAR_EXTENDED_B = 2730 new UnicodeBlock("MYANMAR_EXTENDED_B", 2731 "MYANMAR EXTENDED-B", 2732 "MYANMAREXTENDED-B"); 2733 2734 /** 2735 * Constant for the "Latin Extended-E" Unicode character block. 2736 * @since 9 2737 */ 2738 public static final UnicodeBlock LATIN_EXTENDED_E = 2739 new UnicodeBlock("LATIN_EXTENDED_E", 2740 "LATIN EXTENDED-E", 2741 "LATINEXTENDED-E"); 2742 2743 /** 2744 * Constant for the "Coptic Epact Numbers" Unicode character block. 2745 * @since 9 2746 */ 2747 public static final UnicodeBlock COPTIC_EPACT_NUMBERS = 2748 new UnicodeBlock("COPTIC_EPACT_NUMBERS", 2749 "COPTIC EPACT NUMBERS", 2750 "COPTICEPACTNUMBERS"); 2751 2752 /** 2753 * Constant for the "Old Permic" Unicode character block. 2754 * @since 9 2755 */ 2756 public static final UnicodeBlock OLD_PERMIC = 2757 new UnicodeBlock("OLD_PERMIC", 2758 "OLD PERMIC", 2759 "OLDPERMIC"); 2760 2761 /** 2762 * Constant for the "Elbasan" Unicode character block. 2763 * @since 9 2764 */ 2765 public static final UnicodeBlock ELBASAN = 2766 new UnicodeBlock("ELBASAN"); 2767 2768 /** 2769 * Constant for the "Caucasian Albanian" Unicode character block. 2770 * @since 9 2771 */ 2772 public static final UnicodeBlock CAUCASIAN_ALBANIAN = 2773 new UnicodeBlock("CAUCASIAN_ALBANIAN", 2774 "CAUCASIAN ALBANIAN", 2775 "CAUCASIANALBANIAN"); 2776 2777 /** 2778 * Constant for the "Linear A" Unicode character block. 2779 * @since 9 2780 */ 2781 public static final UnicodeBlock LINEAR_A = 2782 new UnicodeBlock("LINEAR_A", 2783 "LINEAR A", 2784 "LINEARA"); 2785 2786 /** 2787 * Constant for the "Palmyrene" Unicode character block. 2788 * @since 9 2789 */ 2790 public static final UnicodeBlock PALMYRENE = 2791 new UnicodeBlock("PALMYRENE"); 2792 2793 /** 2794 * Constant for the "Nabataean" Unicode character block. 2795 * @since 9 2796 */ 2797 public static final UnicodeBlock NABATAEAN = 2798 new UnicodeBlock("NABATAEAN"); 2799 2800 /** 2801 * Constant for the "Old North Arabian" Unicode character block. 2802 * @since 9 2803 */ 2804 public static final UnicodeBlock OLD_NORTH_ARABIAN = 2805 new UnicodeBlock("OLD_NORTH_ARABIAN", 2806 "OLD NORTH ARABIAN", 2807 "OLDNORTHARABIAN"); 2808 2809 /** 2810 * Constant for the "Manichaean" Unicode character block. 2811 * @since 9 2812 */ 2813 public static final UnicodeBlock MANICHAEAN = 2814 new UnicodeBlock("MANICHAEAN"); 2815 2816 /** 2817 * Constant for the "Psalter Pahlavi" Unicode character block. 2818 * @since 9 2819 */ 2820 public static final UnicodeBlock PSALTER_PAHLAVI = 2821 new UnicodeBlock("PSALTER_PAHLAVI", 2822 "PSALTER PAHLAVI", 2823 "PSALTERPAHLAVI"); 2824 2825 /** 2826 * Constant for the "Mahajani" Unicode character block. 2827 * @since 9 2828 */ 2829 public static final UnicodeBlock MAHAJANI = 2830 new UnicodeBlock("MAHAJANI"); 2831 2832 /** 2833 * Constant for the "Sinhala Archaic Numbers" Unicode character block. 2834 * @since 9 2835 */ 2836 public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS = 2837 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", 2838 "SINHALA ARCHAIC NUMBERS", 2839 "SINHALAARCHAICNUMBERS"); 2840 2841 /** 2842 * Constant for the "Khojki" Unicode character block. 2843 * @since 9 2844 */ 2845 public static final UnicodeBlock KHOJKI = 2846 new UnicodeBlock("KHOJKI"); 2847 2848 /** 2849 * Constant for the "Khudawadi" Unicode character block. 2850 * @since 9 2851 */ 2852 public static final UnicodeBlock KHUDAWADI = 2853 new UnicodeBlock("KHUDAWADI"); 2854 2855 /** 2856 * Constant for the "Grantha" Unicode character block. 2857 * @since 9 2858 */ 2859 public static final UnicodeBlock GRANTHA = 2860 new UnicodeBlock("GRANTHA"); 2861 2862 /** 2863 * Constant for the "Tirhuta" Unicode character block. 2864 * @since 9 2865 */ 2866 public static final UnicodeBlock TIRHUTA = 2867 new UnicodeBlock("TIRHUTA"); 2868 2869 /** 2870 * Constant for the "Siddham" Unicode character block. 2871 * @since 9 2872 */ 2873 public static final UnicodeBlock SIDDHAM = 2874 new UnicodeBlock("SIDDHAM"); 2875 2876 /** 2877 * Constant for the "Modi" Unicode character block. 2878 * @since 9 2879 */ 2880 public static final UnicodeBlock MODI = 2881 new UnicodeBlock("MODI"); 2882 2883 /** 2884 * Constant for the "Warang Citi" Unicode character block. 2885 * @since 9 2886 */ 2887 public static final UnicodeBlock WARANG_CITI = 2888 new UnicodeBlock("WARANG_CITI", 2889 "WARANG CITI", 2890 "WARANGCITI"); 2891 2892 /** 2893 * Constant for the "Pau Cin Hau" Unicode character block. 2894 * @since 9 2895 */ 2896 public static final UnicodeBlock PAU_CIN_HAU = 2897 new UnicodeBlock("PAU_CIN_HAU", 2898 "PAU CIN HAU", 2899 "PAUCINHAU"); 2900 2901 /** 2902 * Constant for the "Mro" Unicode character block. 2903 * @since 9 2904 */ 2905 public static final UnicodeBlock MRO = 2906 new UnicodeBlock("MRO"); 2907 2908 /** 2909 * Constant for the "Bassa Vah" Unicode character block. 2910 * @since 9 2911 */ 2912 public static final UnicodeBlock BASSA_VAH = 2913 new UnicodeBlock("BASSA_VAH", 2914 "BASSA VAH", 2915 "BASSAVAH"); 2916 2917 /** 2918 * Constant for the "Pahawh Hmong" Unicode character block. 2919 * @since 9 2920 */ 2921 public static final UnicodeBlock PAHAWH_HMONG = 2922 new UnicodeBlock("PAHAWH_HMONG", 2923 "PAHAWH HMONG", 2924 "PAHAWHHMONG"); 2925 2926 /** 2927 * Constant for the "Duployan" Unicode character block. 2928 * @since 9 2929 */ 2930 public static final UnicodeBlock DUPLOYAN = 2931 new UnicodeBlock("DUPLOYAN"); 2932 2933 /** 2934 * Constant for the "Shorthand Format Controls" Unicode character block. 2935 * @since 9 2936 */ 2937 public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS = 2938 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", 2939 "SHORTHAND FORMAT CONTROLS", 2940 "SHORTHANDFORMATCONTROLS"); 2941 2942 /** 2943 * Constant for the "Mende Kikakui" Unicode character block. 2944 * @since 9 2945 */ 2946 public static final UnicodeBlock MENDE_KIKAKUI = 2947 new UnicodeBlock("MENDE_KIKAKUI", 2948 "MENDE KIKAKUI", 2949 "MENDEKIKAKUI"); 2950 2951 /** 2952 * Constant for the "Ornamental Dingbats" Unicode character block. 2953 * @since 9 2954 */ 2955 public static final UnicodeBlock ORNAMENTAL_DINGBATS = 2956 new UnicodeBlock("ORNAMENTAL_DINGBATS", 2957 "ORNAMENTAL DINGBATS", 2958 "ORNAMENTALDINGBATS"); 2959 2960 /** 2961 * Constant for the "Geometric Shapes Extended" Unicode character block. 2962 * @since 9 2963 */ 2964 public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED = 2965 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", 2966 "GEOMETRIC SHAPES EXTENDED", 2967 "GEOMETRICSHAPESEXTENDED"); 2968 2969 /** 2970 * Constant for the "Supplemental Arrows-C" Unicode character block. 2971 * @since 9 2972 */ 2973 public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C = 2974 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", 2975 "SUPPLEMENTAL ARROWS-C", 2976 "SUPPLEMENTALARROWS-C"); 2977 2978 /** 2979 * Constant for the "Cherokee Supplement" Unicode character block. 2980 * @since 9 2981 */ 2982 public static final UnicodeBlock CHEROKEE_SUPPLEMENT = 2983 new UnicodeBlock("CHEROKEE_SUPPLEMENT", 2984 "CHEROKEE SUPPLEMENT", 2985 "CHEROKEESUPPLEMENT"); 2986 2987 /** 2988 * Constant for the "Hatran" Unicode character block. 2989 * @since 9 2990 */ 2991 public static final UnicodeBlock HATRAN = 2992 new UnicodeBlock("HATRAN"); 2993 2994 /** 2995 * Constant for the "Old Hungarian" Unicode character block. 2996 * @since 9 2997 */ 2998 public static final UnicodeBlock OLD_HUNGARIAN = 2999 new UnicodeBlock("OLD_HUNGARIAN", 3000 "OLD HUNGARIAN", 3001 "OLDHUNGARIAN"); 3002 3003 /** 3004 * Constant for the "Multani" Unicode character block. 3005 * @since 9 3006 */ 3007 public static final UnicodeBlock MULTANI = 3008 new UnicodeBlock("MULTANI"); 3009 3010 /** 3011 * Constant for the "Ahom" Unicode character block. 3012 * @since 9 3013 */ 3014 public static final UnicodeBlock AHOM = 3015 new UnicodeBlock("AHOM"); 3016 3017 /** 3018 * Constant for the "Early Dynastic Cuneiform" Unicode character block. 3019 * @since 9 3020 */ 3021 public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM = 3022 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", 3023 "EARLY DYNASTIC CUNEIFORM", 3024 "EARLYDYNASTICCUNEIFORM"); 3025 3026 /** 3027 * Constant for the "Anatolian Hieroglyphs" Unicode character block. 3028 * @since 9 3029 */ 3030 public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS = 3031 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", 3032 "ANATOLIAN HIEROGLYPHS", 3033 "ANATOLIANHIEROGLYPHS"); 3034 3035 /** 3036 * Constant for the "Sutton SignWriting" Unicode character block. 3037 * @since 9 3038 */ 3039 public static final UnicodeBlock SUTTON_SIGNWRITING = 3040 new UnicodeBlock("SUTTON_SIGNWRITING", 3041 "SUTTON SIGNWRITING", 3042 "SUTTONSIGNWRITING"); 3043 3044 /** 3045 * Constant for the "Supplemental Symbols and Pictographs" Unicode 3046 * character block. 3047 * @since 9 3048 */ 3049 public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 3050 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS", 3051 "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS", 3052 "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS"); 3053 3054 /** 3055 * Constant for the "CJK Unified Ideographs Extension E" Unicode 3056 * character block. 3057 * @since 9 3058 */ 3059 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 3060 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E", 3061 "CJK UNIFIED IDEOGRAPHS EXTENSION E", 3062 "CJKUNIFIEDIDEOGRAPHSEXTENSIONE"); 3063 3064 /** 3065 * Constant for the "Syriac Supplement" Unicode 3066 * character block. 3067 * @since 11 3068 */ 3069 public static final UnicodeBlock SYRIAC_SUPPLEMENT = 3070 new UnicodeBlock("SYRIAC_SUPPLEMENT", 3071 "SYRIAC SUPPLEMENT", 3072 "SYRIACSUPPLEMENT"); 3073 3074 /** 3075 * Constant for the "Cyrillic Extended-C" Unicode 3076 * character block. 3077 * @since 11 3078 */ 3079 public static final UnicodeBlock CYRILLIC_EXTENDED_C = 3080 new UnicodeBlock("CYRILLIC_EXTENDED_C", 3081 "CYRILLIC EXTENDED-C", 3082 "CYRILLICEXTENDED-C"); 3083 3084 /** 3085 * Constant for the "Osage" Unicode 3086 * character block. 3087 * @since 11 3088 */ 3089 public static final UnicodeBlock OSAGE = 3090 new UnicodeBlock("OSAGE"); 3091 3092 /** 3093 * Constant for the "Newa" Unicode 3094 * character block. 3095 * @since 11 3096 */ 3097 public static final UnicodeBlock NEWA = 3098 new UnicodeBlock("NEWA"); 3099 3100 /** 3101 * Constant for the "Mongolian Supplement" Unicode 3102 * character block. 3103 * @since 11 3104 */ 3105 public static final UnicodeBlock MONGOLIAN_SUPPLEMENT = 3106 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", 3107 "MONGOLIAN SUPPLEMENT", 3108 "MONGOLIANSUPPLEMENT"); 3109 3110 /** 3111 * Constant for the "Marchen" Unicode 3112 * character block. 3113 * @since 11 3114 */ 3115 public static final UnicodeBlock MARCHEN = 3116 new UnicodeBlock("MARCHEN"); 3117 3118 /** 3119 * Constant for the "Ideographic Symbols and Punctuation" Unicode 3120 * character block. 3121 * @since 11 3122 */ 3123 public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 3124 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", 3125 "IDEOGRAPHIC SYMBOLS AND PUNCTUATION", 3126 "IDEOGRAPHICSYMBOLSANDPUNCTUATION"); 3127 3128 /** 3129 * Constant for the "Tangut" Unicode 3130 * character block. 3131 * @since 11 3132 */ 3133 public static final UnicodeBlock TANGUT = 3134 new UnicodeBlock("TANGUT"); 3135 3136 /** 3137 * Constant for the "Tangut Components" Unicode 3138 * character block. 3139 * @since 11 3140 */ 3141 public static final UnicodeBlock TANGUT_COMPONENTS = 3142 new UnicodeBlock("TANGUT_COMPONENTS", 3143 "TANGUT COMPONENTS", 3144 "TANGUTCOMPONENTS"); 3145 3146 /** 3147 * Constant for the "Kana Extended-A" Unicode 3148 * character block. 3149 * @since 11 3150 */ 3151 public static final UnicodeBlock KANA_EXTENDED_A = 3152 new UnicodeBlock("KANA_EXTENDED_A", 3153 "KANA EXTENDED-A", 3154 "KANAEXTENDED-A"); 3155 /** 3156 * Constant for the "Glagolitic Supplement" Unicode 3157 * character block. 3158 * @since 11 3159 */ 3160 public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT = 3161 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", 3162 "GLAGOLITIC SUPPLEMENT", 3163 "GLAGOLITICSUPPLEMENT"); 3164 /** 3165 * Constant for the "Adlam" Unicode 3166 * character block. 3167 * @since 11 3168 */ 3169 public static final UnicodeBlock ADLAM = 3170 new UnicodeBlock("ADLAM"); 3171 3172 /** 3173 * Constant for the "Masaram Gondi" Unicode 3174 * character block. 3175 * @since 11 3176 */ 3177 public static final UnicodeBlock MASARAM_GONDI = 3178 new UnicodeBlock("MASARAM_GONDI", 3179 "MASARAM GONDI", 3180 "MASARAMGONDI"); 3181 3182 /** 3183 * Constant for the "Zanabazar Square" Unicode 3184 * character block. 3185 * @since 11 3186 */ 3187 public static final UnicodeBlock ZANABAZAR_SQUARE = 3188 new UnicodeBlock("ZANABAZAR_SQUARE", 3189 "ZANABAZAR SQUARE", 3190 "ZANABAZARSQUARE"); 3191 3192 /** 3193 * Constant for the "Nushu" Unicode 3194 * character block. 3195 * @since 11 3196 */ 3197 public static final UnicodeBlock NUSHU = 3198 new UnicodeBlock("NUSHU"); 3199 3200 /** 3201 * Constant for the "Soyombo" Unicode 3202 * character block. 3203 * @since 11 3204 */ 3205 public static final UnicodeBlock SOYOMBO = 3206 new UnicodeBlock("SOYOMBO"); 3207 3208 /** 3209 * Constant for the "Bhaiksuki" Unicode 3210 * character block. 3211 * @since 11 3212 */ 3213 public static final UnicodeBlock BHAIKSUKI = 3214 new UnicodeBlock("BHAIKSUKI"); 3215 3216 /** 3217 * Constant for the "CJK Unified Ideographs Extension F" Unicode 3218 * character block. 3219 * @since 11 3220 */ 3221 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 3222 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", 3223 "CJK UNIFIED IDEOGRAPHS EXTENSION F", 3224 "CJKUNIFIEDIDEOGRAPHSEXTENSIONF"); 3225 /** 3226 * Constant for the "Georgian Extended" Unicode 3227 * character block. 3228 * @since 12 3229 */ 3230 public static final UnicodeBlock GEORGIAN_EXTENDED = 3231 new UnicodeBlock("GEORGIAN_EXTENDED", 3232 "GEORGIAN EXTENDED", 3233 "GEORGIANEXTENDED"); 3234 3235 /** 3236 * Constant for the "Hanifi Rohingya" Unicode 3237 * character block. 3238 * @since 12 3239 */ 3240 public static final UnicodeBlock HANIFI_ROHINGYA = 3241 new UnicodeBlock("HANIFI_ROHINGYA", 3242 "HANIFI ROHINGYA", 3243 "HANIFIROHINGYA"); 3244 3245 /** 3246 * Constant for the "Old Sogdian" Unicode 3247 * character block. 3248 * @since 12 3249 */ 3250 public static final UnicodeBlock OLD_SOGDIAN = 3251 new UnicodeBlock("OLD_SOGDIAN", 3252 "OLD SOGDIAN", 3253 "OLDSOGDIAN"); 3254 3255 /** 3256 * Constant for the "Sogdian" Unicode 3257 * character block. 3258 * @since 12 3259 */ 3260 public static final UnicodeBlock SOGDIAN = 3261 new UnicodeBlock("SOGDIAN"); 3262 3263 /** 3264 * Constant for the "Dogra" Unicode 3265 * character block. 3266 * @since 12 3267 */ 3268 public static final UnicodeBlock DOGRA = 3269 new UnicodeBlock("DOGRA"); 3270 3271 /** 3272 * Constant for the "Gunjala Gondi" Unicode 3273 * character block. 3274 * @since 12 3275 */ 3276 public static final UnicodeBlock GUNJALA_GONDI = 3277 new UnicodeBlock("GUNJALA_GONDI", 3278 "GUNJALA GONDI", 3279 "GUNJALAGONDI"); 3280 3281 /** 3282 * Constant for the "Makasar" Unicode 3283 * character block. 3284 * @since 12 3285 */ 3286 public static final UnicodeBlock MAKASAR = 3287 new UnicodeBlock("MAKASAR"); 3288 3289 /** 3290 * Constant for the "Medefaidrin" Unicode 3291 * character block. 3292 * @since 12 3293 */ 3294 public static final UnicodeBlock MEDEFAIDRIN = 3295 new UnicodeBlock("MEDEFAIDRIN"); 3296 3297 /** 3298 * Constant for the "Mayan Numerals" Unicode 3299 * character block. 3300 * @since 12 3301 */ 3302 public static final UnicodeBlock MAYAN_NUMERALS = 3303 new UnicodeBlock("MAYAN_NUMERALS", 3304 "MAYAN NUMERALS", 3305 "MAYANNUMERALS"); 3306 3307 /** 3308 * Constant for the "Indic Siyaq Numbers" Unicode 3309 * character block. 3310 * @since 12 3311 */ 3312 public static final UnicodeBlock INDIC_SIYAQ_NUMBERS = 3313 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", 3314 "INDIC SIYAQ NUMBERS", 3315 "INDICSIYAQNUMBERS"); 3316 3317 /** 3318 * Constant for the "Chess Symbols" Unicode 3319 * character block. 3320 * @since 12 3321 */ 3322 public static final UnicodeBlock CHESS_SYMBOLS = 3323 new UnicodeBlock("CHESS_SYMBOLS", 3324 "CHESS SYMBOLS", 3325 "CHESSSYMBOLS"); 3326 3327 /** 3328 * Constant for the "Elymaic" Unicode 3329 * character block. 3330 * @since 13 3331 */ 3332 public static final UnicodeBlock ELYMAIC = 3333 new UnicodeBlock("ELYMAIC"); 3334 3335 /** 3336 * Constant for the "Nandinagari" Unicode 3337 * character block. 3338 * @since 13 3339 */ 3340 public static final UnicodeBlock NANDINAGARI = 3341 new UnicodeBlock("NANDINAGARI"); 3342 3343 /** 3344 * Constant for the "Tamil Supplement" Unicode 3345 * character block. 3346 * @since 13 3347 */ 3348 public static final UnicodeBlock TAMIL_SUPPLEMENT = 3349 new UnicodeBlock("TAMIL_SUPPLEMENT", 3350 "TAMIL SUPPLEMENT", 3351 "TAMILSUPPLEMENT"); 3352 3353 /** 3354 * Constant for the "Egyptian Hieroglyph Format Controls" Unicode 3355 * character block. 3356 * @since 13 3357 */ 3358 public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 3359 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", 3360 "EGYPTIAN HIEROGLYPH FORMAT CONTROLS", 3361 "EGYPTIANHIEROGLYPHFORMATCONTROLS"); 3362 3363 /** 3364 * Constant for the "Small Kana Extension" Unicode 3365 * character block. 3366 * @since 13 3367 */ 3368 public static final UnicodeBlock SMALL_KANA_EXTENSION = 3369 new UnicodeBlock("SMALL_KANA_EXTENSION", 3370 "SMALL KANA EXTENSION", 3371 "SMALLKANAEXTENSION"); 3372 3373 /** 3374 * Constant for the "Nyiakeng Puachue Hmong" Unicode 3375 * character block. 3376 * @since 13 3377 */ 3378 public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG = 3379 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", 3380 "NYIAKENG PUACHUE HMONG", 3381 "NYIAKENGPUACHUEHMONG"); 3382 3383 /** 3384 * Constant for the "Wancho" Unicode 3385 * character block. 3386 * @since 13 3387 */ 3388 public static final UnicodeBlock WANCHO = 3389 new UnicodeBlock("WANCHO"); 3390 3391 /** 3392 * Constant for the "Ottoman Siyaq Numbers" Unicode 3393 * character block. 3394 * @since 13 3395 */ 3396 public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS = 3397 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", 3398 "OTTOMAN SIYAQ NUMBERS", 3399 "OTTOMANSIYAQNUMBERS"); 3400 3401 /** 3402 * Constant for the "Symbols and Pictographs Extended-A" Unicode 3403 * character block. 3404 * @since 13 3405 */ 3406 public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 3407 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", 3408 "SYMBOLS AND PICTOGRAPHS EXTENDED-A", 3409 "SYMBOLSANDPICTOGRAPHSEXTENDED-A"); 3410 3411 /** 3412 * Constant for the "Yezidi" Unicode 3413 * character block. 3414 * @since 15 3415 */ 3416 public static final UnicodeBlock YEZIDI = 3417 new UnicodeBlock("YEZIDI"); 3418 3419 /** 3420 * Constant for the "Chorasmian" Unicode 3421 * character block. 3422 * @since 15 3423 */ 3424 public static final UnicodeBlock CHORASMIAN = 3425 new UnicodeBlock("CHORASMIAN"); 3426 3427 /** 3428 * Constant for the "Dives Akuru" Unicode 3429 * character block. 3430 * @since 15 3431 */ 3432 public static final UnicodeBlock DIVES_AKURU = 3433 new UnicodeBlock("DIVES_AKURU", 3434 "DIVES AKURU", 3435 "DIVESAKURU"); 3436 3437 /** 3438 * Constant for the "Lisu Supplement" Unicode 3439 * character block. 3440 * @since 15 3441 */ 3442 public static final UnicodeBlock LISU_SUPPLEMENT = 3443 new UnicodeBlock("LISU_SUPPLEMENT", 3444 "LISU SUPPLEMENT", 3445 "LISUSUPPLEMENT"); 3446 3447 /** 3448 * Constant for the "Khitan Small Script" Unicode 3449 * character block. 3450 * @since 15 3451 */ 3452 public static final UnicodeBlock KHITAN_SMALL_SCRIPT = 3453 new UnicodeBlock("KHITAN_SMALL_SCRIPT", 3454 "KHITAN SMALL SCRIPT", 3455 "KHITANSMALLSCRIPT"); 3456 3457 /** 3458 * Constant for the "Tangut Supplement" Unicode 3459 * character block. 3460 * @since 15 3461 */ 3462 public static final UnicodeBlock TANGUT_SUPPLEMENT = 3463 new UnicodeBlock("TANGUT_SUPPLEMENT", 3464 "TANGUT SUPPLEMENT", 3465 "TANGUTSUPPLEMENT"); 3466 3467 /** 3468 * Constant for the "Symbols for Legacy Computing" Unicode 3469 * character block. 3470 * @since 15 3471 */ 3472 public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING = 3473 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING", 3474 "SYMBOLS FOR LEGACY COMPUTING", 3475 "SYMBOLSFORLEGACYCOMPUTING"); 3476 3477 /** 3478 * Constant for the "CJK Unified Ideographs Extension G" Unicode 3479 * character block. 3480 * @since 15 3481 */ 3482 public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 3483 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G", 3484 "CJK UNIFIED IDEOGRAPHS EXTENSION G", 3485 "CJKUNIFIEDIDEOGRAPHSEXTENSIONG"); 3486 3487 private static final int[] blockStarts = { 3488 0x0000, // 0000..007F; Basic Latin 3489 0x0080, // 0080..00FF; Latin-1 Supplement 3490 0x0100, // 0100..017F; Latin Extended-A 3491 0x0180, // 0180..024F; Latin Extended-B 3492 0x0250, // 0250..02AF; IPA Extensions 3493 0x02B0, // 02B0..02FF; Spacing Modifier Letters 3494 0x0300, // 0300..036F; Combining Diacritical Marks 3495 0x0370, // 0370..03FF; Greek and Coptic 3496 0x0400, // 0400..04FF; Cyrillic 3497 0x0500, // 0500..052F; Cyrillic Supplement 3498 0x0530, // 0530..058F; Armenian 3499 0x0590, // 0590..05FF; Hebrew 3500 0x0600, // 0600..06FF; Arabic 3501 0x0700, // 0700..074F; Syriac 3502 0x0750, // 0750..077F; Arabic Supplement 3503 0x0780, // 0780..07BF; Thaana 3504 0x07C0, // 07C0..07FF; NKo 3505 0x0800, // 0800..083F; Samaritan 3506 0x0840, // 0840..085F; Mandaic 3507 0x0860, // 0860..086F; Syriac Supplement 3508 0x0870, // unassigned 3509 0x08A0, // 08A0..08FF; Arabic Extended-A 3510 0x0900, // 0900..097F; Devanagari 3511 0x0980, // 0980..09FF; Bengali 3512 0x0A00, // 0A00..0A7F; Gurmukhi 3513 0x0A80, // 0A80..0AFF; Gujarati 3514 0x0B00, // 0B00..0B7F; Oriya 3515 0x0B80, // 0B80..0BFF; Tamil 3516 0x0C00, // 0C00..0C7F; Telugu 3517 0x0C80, // 0C80..0CFF; Kannada 3518 0x0D00, // 0D00..0D7F; Malayalam 3519 0x0D80, // 0D80..0DFF; Sinhala 3520 0x0E00, // 0E00..0E7F; Thai 3521 0x0E80, // 0E80..0EFF; Lao 3522 0x0F00, // 0F00..0FFF; Tibetan 3523 0x1000, // 1000..109F; Myanmar 3524 0x10A0, // 10A0..10FF; Georgian 3525 0x1100, // 1100..11FF; Hangul Jamo 3526 0x1200, // 1200..137F; Ethiopic 3527 0x1380, // 1380..139F; Ethiopic Supplement 3528 0x13A0, // 13A0..13FF; Cherokee 3529 0x1400, // 1400..167F; Unified Canadian Aboriginal Syllabics 3530 0x1680, // 1680..169F; Ogham 3531 0x16A0, // 16A0..16FF; Runic 3532 0x1700, // 1700..171F; Tagalog 3533 0x1720, // 1720..173F; Hanunoo 3534 0x1740, // 1740..175F; Buhid 3535 0x1760, // 1760..177F; Tagbanwa 3536 0x1780, // 1780..17FF; Khmer 3537 0x1800, // 1800..18AF; Mongolian 3538 0x18B0, // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended 3539 0x1900, // 1900..194F; Limbu 3540 0x1950, // 1950..197F; Tai Le 3541 0x1980, // 1980..19DF; New Tai Lue 3542 0x19E0, // 19E0..19FF; Khmer Symbols 3543 0x1A00, // 1A00..1A1F; Buginese 3544 0x1A20, // 1A20..1AAF; Tai Tham 3545 0x1AB0, // 1AB0..1AFF; Combining Diacritical Marks Extended 3546 0x1B00, // 1B00..1B7F; Balinese 3547 0x1B80, // 1B80..1BBF; Sundanese 3548 0x1BC0, // 1BC0..1BFF; Batak 3549 0x1C00, // 1C00..1C4F; Lepcha 3550 0x1C50, // 1C50..1C7F; Ol Chiki 3551 0x1C80, // 1C80..1C8F; Cyrillic Extended-C 3552 0x1C90, // 1C90..1CBF; Georgian Extended 3553 0x1CC0, // 1CC0..1CCF; Sundanese Supplement 3554 0x1CD0, // 1CD0..1CFF; Vedic Extensions 3555 0x1D00, // 1D00..1D7F; Phonetic Extensions 3556 0x1D80, // 1D80..1DBF; Phonetic Extensions Supplement 3557 0x1DC0, // 1DC0..1DFF; Combining Diacritical Marks Supplement 3558 0x1E00, // 1E00..1EFF; Latin Extended Additional 3559 0x1F00, // 1F00..1FFF; Greek Extended 3560 0x2000, // 2000..206F; General Punctuation 3561 0x2070, // 2070..209F; Superscripts and Subscripts 3562 0x20A0, // 20A0..20CF; Currency Symbols 3563 0x20D0, // 20D0..20FF; Combining Diacritical Marks for Symbols 3564 0x2100, // 2100..214F; Letterlike Symbols 3565 0x2150, // 2150..218F; Number Forms 3566 0x2190, // 2190..21FF; Arrows 3567 0x2200, // 2200..22FF; Mathematical Operators 3568 0x2300, // 2300..23FF; Miscellaneous Technical 3569 0x2400, // 2400..243F; Control Pictures 3570 0x2440, // 2440..245F; Optical Character Recognition 3571 0x2460, // 2460..24FF; Enclosed Alphanumerics 3572 0x2500, // 2500..257F; Box Drawing 3573 0x2580, // 2580..259F; Block Elements 3574 0x25A0, // 25A0..25FF; Geometric Shapes 3575 0x2600, // 2600..26FF; Miscellaneous Symbols 3576 0x2700, // 2700..27BF; Dingbats 3577 0x27C0, // 27C0..27EF; Miscellaneous Mathematical Symbols-A 3578 0x27F0, // 27F0..27FF; Supplemental Arrows-A 3579 0x2800, // 2800..28FF; Braille Patterns 3580 0x2900, // 2900..297F; Supplemental Arrows-B 3581 0x2980, // 2980..29FF; Miscellaneous Mathematical Symbols-B 3582 0x2A00, // 2A00..2AFF; Supplemental Mathematical Operators 3583 0x2B00, // 2B00..2BFF; Miscellaneous Symbols and Arrows 3584 0x2C00, // 2C00..2C5F; Glagolitic 3585 0x2C60, // 2C60..2C7F; Latin Extended-C 3586 0x2C80, // 2C80..2CFF; Coptic 3587 0x2D00, // 2D00..2D2F; Georgian Supplement 3588 0x2D30, // 2D30..2D7F; Tifinagh 3589 0x2D80, // 2D80..2DDF; Ethiopic Extended 3590 0x2DE0, // 2DE0..2DFF; Cyrillic Extended-A 3591 0x2E00, // 2E00..2E7F; Supplemental Punctuation 3592 0x2E80, // 2E80..2EFF; CJK Radicals Supplement 3593 0x2F00, // 2F00..2FDF; Kangxi Radicals 3594 0x2FE0, // unassigned 3595 0x2FF0, // 2FF0..2FFF; Ideographic Description Characters 3596 0x3000, // 3000..303F; CJK Symbols and Punctuation 3597 0x3040, // 3040..309F; Hiragana 3598 0x30A0, // 30A0..30FF; Katakana 3599 0x3100, // 3100..312F; Bopomofo 3600 0x3130, // 3130..318F; Hangul Compatibility Jamo 3601 0x3190, // 3190..319F; Kanbun 3602 0x31A0, // 31A0..31BF; Bopomofo Extended 3603 0x31C0, // 31C0..31EF; CJK Strokes 3604 0x31F0, // 31F0..31FF; Katakana Phonetic Extensions 3605 0x3200, // 3200..32FF; Enclosed CJK Letters and Months 3606 0x3300, // 3300..33FF; CJK Compatibility 3607 0x3400, // 3400..4DBF; CJK Unified Ideographs Extension A 3608 0x4DC0, // 4DC0..4DFF; Yijing Hexagram Symbols 3609 0x4E00, // 4E00..9FFF; CJK Unified Ideographs 3610 0xA000, // A000..A48F; Yi Syllables 3611 0xA490, // A490..A4CF; Yi Radicals 3612 0xA4D0, // A4D0..A4FF; Lisu 3613 0xA500, // A500..A63F; Vai 3614 0xA640, // A640..A69F; Cyrillic Extended-B 3615 0xA6A0, // A6A0..A6FF; Bamum 3616 0xA700, // A700..A71F; Modifier Tone Letters 3617 0xA720, // A720..A7FF; Latin Extended-D 3618 0xA800, // A800..A82F; Syloti Nagri 3619 0xA830, // A830..A83F; Common Indic Number Forms 3620 0xA840, // A840..A87F; Phags-pa 3621 0xA880, // A880..A8DF; Saurashtra 3622 0xA8E0, // A8E0..A8FF; Devanagari Extended 3623 0xA900, // A900..A92F; Kayah Li 3624 0xA930, // A930..A95F; Rejang 3625 0xA960, // A960..A97F; Hangul Jamo Extended-A 3626 0xA980, // A980..A9DF; Javanese 3627 0xA9E0, // A9E0..A9FF; Myanmar Extended-B 3628 0xAA00, // AA00..AA5F; Cham 3629 0xAA60, // AA60..AA7F; Myanmar Extended-A 3630 0xAA80, // AA80..AADF; Tai Viet 3631 0xAAE0, // AAE0..AAFF; Meetei Mayek Extensions 3632 0xAB00, // AB00..AB2F; Ethiopic Extended-A 3633 0xAB30, // AB30..AB6F; Latin Extended-E 3634 0xAB70, // AB70..ABBF; Cherokee Supplement 3635 0xABC0, // ABC0..ABFF; Meetei Mayek 3636 0xAC00, // AC00..D7AF; Hangul Syllables 3637 0xD7B0, // D7B0..D7FF; Hangul Jamo Extended-B 3638 0xD800, // D800..DB7F; High Surrogates 3639 0xDB80, // DB80..DBFF; High Private Use Surrogates 3640 0xDC00, // DC00..DFFF; Low Surrogates 3641 0xE000, // E000..F8FF; Private Use Area 3642 0xF900, // F900..FAFF; CJK Compatibility Ideographs 3643 0xFB00, // FB00..FB4F; Alphabetic Presentation Forms 3644 0xFB50, // FB50..FDFF; Arabic Presentation Forms-A 3645 0xFE00, // FE00..FE0F; Variation Selectors 3646 0xFE10, // FE10..FE1F; Vertical Forms 3647 0xFE20, // FE20..FE2F; Combining Half Marks 3648 0xFE30, // FE30..FE4F; CJK Compatibility Forms 3649 0xFE50, // FE50..FE6F; Small Form Variants 3650 0xFE70, // FE70..FEFF; Arabic Presentation Forms-B 3651 0xFF00, // FF00..FFEF; Halfwidth and Fullwidth Forms 3652 0xFFF0, // FFF0..FFFF; Specials 3653 0x10000, // 10000..1007F; Linear B Syllabary 3654 0x10080, // 10080..100FF; Linear B Ideograms 3655 0x10100, // 10100..1013F; Aegean Numbers 3656 0x10140, // 10140..1018F; Ancient Greek Numbers 3657 0x10190, // 10190..101CF; Ancient Symbols 3658 0x101D0, // 101D0..101FF; Phaistos Disc 3659 0x10200, // unassigned 3660 0x10280, // 10280..1029F; Lycian 3661 0x102A0, // 102A0..102DF; Carian 3662 0x102E0, // 102E0..102FF; Coptic Epact Numbers 3663 0x10300, // 10300..1032F; Old Italic 3664 0x10330, // 10330..1034F; Gothic 3665 0x10350, // 10350..1037F; Old Permic 3666 0x10380, // 10380..1039F; Ugaritic 3667 0x103A0, // 103A0..103DF; Old Persian 3668 0x103E0, // unassigned 3669 0x10400, // 10400..1044F; Deseret 3670 0x10450, // 10450..1047F; Shavian 3671 0x10480, // 10480..104AF; Osmanya 3672 0x104B0, // 104B0..104FF; Osage 3673 0x10500, // 10500..1052F; Elbasan 3674 0x10530, // 10530..1056F; Caucasian Albanian 3675 0x10570, // unassigned 3676 0x10600, // 10600..1077F; Linear A 3677 0x10780, // unassigned 3678 0x10800, // 10800..1083F; Cypriot Syllabary 3679 0x10840, // 10840..1085F; Imperial Aramaic 3680 0x10860, // 10860..1087F; Palmyrene 3681 0x10880, // 10880..108AF; Nabataean 3682 0x108B0, // unassigned 3683 0x108E0, // 108E0..108FF; Hatran 3684 0x10900, // 10900..1091F; Phoenician 3685 0x10920, // 10920..1093F; Lydian 3686 0x10940, // unassigned 3687 0x10980, // 10980..1099F; Meroitic Hieroglyphs 3688 0x109A0, // 109A0..109FF; Meroitic Cursive 3689 0x10A00, // 10A00..10A5F; Kharoshthi 3690 0x10A60, // 10A60..10A7F; Old South Arabian 3691 0x10A80, // 10A80..10A9F; Old North Arabian 3692 0x10AA0, // unassigned 3693 0x10AC0, // 10AC0..10AFF; Manichaean 3694 0x10B00, // 10B00..10B3F; Avestan 3695 0x10B40, // 10B40..10B5F; Inscriptional Parthian 3696 0x10B60, // 10B60..10B7F; Inscriptional Pahlavi 3697 0x10B80, // 10B80..10BAF; Psalter Pahlavi 3698 0x10BB0, // unassigned 3699 0x10C00, // 10C00..10C4F; Old Turkic 3700 0x10C50, // unassigned 3701 0x10C80, // 10C80..10CFF; Old Hungarian 3702 0x10D00, // 10D00..10D3F; Hanifi Rohingya 3703 0x10D40, // unassigned 3704 0x10E60, // 10E60..10E7F; Rumi Numeral Symbols 3705 0x10E80, // 10E80..10EBF; Yezidi 3706 0x10EC0, // unassigned 3707 0x10F00, // 10F00..10F2F; Old Sogdian 3708 0x10F30, // 10F30..10F6F; Sogdian 3709 0x10F70, // unassigned 3710 0x10FB0, // 10FB0..10FDF; Chorasmian 3711 0x10FE0, // 10FE0..10FFF; Elymaic 3712 0x11000, // 11000..1107F; Brahmi 3713 0x11080, // 11080..110CF; Kaithi 3714 0x110D0, // 110D0..110FF; Sora Sompeng 3715 0x11100, // 11100..1114F; Chakma 3716 0x11150, // 11150..1117F; Mahajani 3717 0x11180, // 11180..111DF; Sharada 3718 0x111E0, // 111E0..111FF; Sinhala Archaic Numbers 3719 0x11200, // 11200..1124F; Khojki 3720 0x11250, // unassigned 3721 0x11280, // 11280..112AF; Multani 3722 0x112B0, // 112B0..112FF; Khudawadi 3723 0x11300, // 11300..1137F; Grantha 3724 0x11380, // unassigned 3725 0x11400, // 11400..1147F; Newa 3726 0x11480, // 11480..114DF; Tirhuta 3727 0x114E0, // unassigned 3728 0x11580, // 11580..115FF; Siddham 3729 0x11600, // 11600..1165F; Modi 3730 0x11660, // 11660..1167F; Mongolian Supplement 3731 0x11680, // 11680..116CF; Takri 3732 0x116D0, // unassigned 3733 0x11700, // 11700..1173F; Ahom 3734 0x11740, // unassigned 3735 0x11800, // 11800..1184F; Dogra 3736 0x11850, // unassigned 3737 0x118A0, // 118A0..118FF; Warang Citi 3738 0x11900, // 11900..1195F; Dives Akuru 3739 0x11960, // unassigned 3740 0x119A0, // 119A0..119FF; Nandinagari 3741 0x11A00, // 11A00..11A4F; Zanabazar Square 3742 0x11A50, // 11A50..11AAF; Soyombo 3743 0x11AB0, // unassigned 3744 0x11AC0, // 11AC0..11AFF; Pau Cin Hau 3745 0x11B00, // unassigned 3746 0x11C00, // 11C00..11C6F; Bhaiksuki 3747 0x11C70, // 11C70..11CBF; Marchen 3748 0x11CC0, // unassigned 3749 0x11D00, // 11D00..11D5F; Masaram Gondi 3750 0x11D60, // 11D60..11DAF; Gunjala Gondi 3751 0x11DB0, // unassigned 3752 0x11EE0, // 11EE0..11EFF; Makasar 3753 0x11F00, // unassigned 3754 0x11FB0, // 11FB0..11FBF; Lisu Supplement 3755 0x11FC0, // 11FC0..11FFF; Tamil Supplement 3756 0x12000, // 12000..123FF; Cuneiform 3757 0x12400, // 12400..1247F; Cuneiform Numbers and Punctuation 3758 0x12480, // 12480..1254F; Early Dynastic Cuneiform 3759 0x12550, // unassigned 3760 0x13000, // 13000..1342F; Egyptian Hieroglyphs 3761 0x13430, // 13430..1343F; Egyptian Hieroglyph Format Controls 3762 0x13440, // unassigned 3763 0x14400, // 14400..1467F; Anatolian Hieroglyphs 3764 0x14680, // unassigned 3765 0x16800, // 16800..16A3F; Bamum Supplement 3766 0x16A40, // 16A40..16A6F; Mro 3767 0x16A70, // unassigned 3768 0x16AD0, // 16AD0..16AFF; Bassa Vah 3769 0x16B00, // 16B00..16B8F; Pahawh Hmong 3770 0x16B90, // unassigned 3771 0x16E40, // 16E40..16E9F; Medefaidrin 3772 0x16EA0, // unassigned 3773 0x16F00, // 16F00..16F9F; Miao 3774 0x16FA0, // unassigned 3775 0x16FE0, // 16FE0..16FFF; Ideographic Symbols and Punctuation 3776 0x17000, // 17000..187FF; Tangut 3777 0x18800, // 18800..18AFF; Tangut Components 3778 0x18B00, // 18B00..18CFF; Khitan Small Script 3779 0x18D00, // 18D00..18D8F; Tangut Supplement 3780 0x18D90, // unassigned 3781 0x1B000, // 1B000..1B0FF; Kana Supplement 3782 0x1B100, // 1B100..1B12F; Kana Extended-A 3783 0x1B130, // 1B130..1B16F; Small Kana Extension 3784 0x1B170, // 1B170..1B2FF; Nushu 3785 0x1B300, // unassigned 3786 0x1BC00, // 1BC00..1BC9F; Duployan 3787 0x1BCA0, // 1BCA0..1BCAF; Shorthand Format Controls 3788 0x1BCB0, // unassigned 3789 0x1D000, // 1D000..1D0FF; Byzantine Musical Symbols 3790 0x1D100, // 1D100..1D1FF; Musical Symbols 3791 0x1D200, // 1D200..1D24F; Ancient Greek Musical Notation 3792 0x1D250, // unassigned 3793 0x1D2E0, // 1D2E0..1D2FF; Mayan Numerals 3794 0x1D300, // 1D300..1D35F; Tai Xuan Jing Symbols 3795 0x1D360, // 1D360..1D37F; Counting Rod Numerals 3796 0x1D380, // unassigned 3797 0x1D400, // 1D400..1D7FF; Mathematical Alphanumeric Symbols 3798 0x1D800, // 1D800..1DAAF; Sutton SignWriting 3799 0x1DAB0, // unassigned 3800 0x1E000, // 1E000..1E02F; Glagolitic Supplement 3801 0x1E030, // unassigned 3802 0x1E100, // 1E100..1E14F; Nyiakeng Puachue Hmong 3803 0x1E150, // unassigned 3804 0x1E2C0, // 1E2C0..1E2FF; Wancho 3805 0x1E300, // unassigned 3806 0x1E800, // 1E800..1E8DF; Mende Kikakui 3807 0x1E8E0, // unassigned 3808 0x1E900, // 1E900..1E95F; Adlam 3809 0x1E960, // unassigned 3810 0x1EC70, // 1EC70..1ECBF; Indic Siyaq Numbers 3811 0x1ECC0, // unassigned 3812 0x1ED00, // 1ED00..1ED4F; Ottoman Siyaq Numbers 3813 0x1ED50, // unassigned 3814 0x1EE00, // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols 3815 0x1EF00, // unassigned 3816 0x1F000, // 1F000..1F02F; Mahjong Tiles 3817 0x1F030, // 1F030..1F09F; Domino Tiles 3818 0x1F0A0, // 1F0A0..1F0FF; Playing Cards 3819 0x1F100, // 1F100..1F1FF; Enclosed Alphanumeric Supplement 3820 0x1F200, // 1F200..1F2FF; Enclosed Ideographic Supplement 3821 0x1F300, // 1F300..1F5FF; Miscellaneous Symbols and Pictographs 3822 0x1F600, // 1F600..1F64F; Emoticons 3823 0x1F650, // 1F650..1F67F; Ornamental Dingbats 3824 0x1F680, // 1F680..1F6FF; Transport and Map Symbols 3825 0x1F700, // 1F700..1F77F; Alchemical Symbols 3826 0x1F780, // 1F780..1F7FF; Geometric Shapes Extended 3827 0x1F800, // 1F800..1F8FF; Supplemental Arrows-C 3828 0x1F900, // 1F900..1F9FF; Supplemental Symbols and Pictographs 3829 0x1FA00, // 1FA00..1FA6F; Chess Symbols 3830 0x1FA70, // 1FA70..1FAFF; Symbols and Pictographs Extended-A 3831 0x1FB00, // 1FB00..1FBFF; Symbols for Legacy Computing 3832 0x1FC00, // unassigned 3833 0x20000, // 20000..2A6DF; CJK Unified Ideographs Extension B 3834 0x2A6E0, // unassigned 3835 0x2A700, // 2A700..2B73F; CJK Unified Ideographs Extension C 3836 0x2B740, // 2B740..2B81F; CJK Unified Ideographs Extension D 3837 0x2B820, // 2B820..2CEAF; CJK Unified Ideographs Extension E 3838 0x2CEB0, // 2CEB0..2EBEF; CJK Unified Ideographs Extension F 3839 0x2EBF0, // unassigned 3840 0x2F800, // 2F800..2FA1F; CJK Compatibility Ideographs Supplement 3841 0x2FA20, // unassigned 3842 0x30000, // 30000..3134F; CJK Unified Ideographs Extension G 3843 0x31350, // unassigned 3844 0xE0000, // E0000..E007F; Tags 3845 0xE0080, // unassigned 3846 0xE0100, // E0100..E01EF; Variation Selectors Supplement 3847 0xE01F0, // unassigned 3848 0xF0000, // F0000..FFFFF; Supplementary Private Use Area-A 3849 0x100000, // 100000..10FFFF; Supplementary Private Use Area-B 3850 }; 3851 3852 private static final UnicodeBlock[] blocks = { 3853 BASIC_LATIN, 3854 LATIN_1_SUPPLEMENT, 3855 LATIN_EXTENDED_A, 3856 LATIN_EXTENDED_B, 3857 IPA_EXTENSIONS, 3858 SPACING_MODIFIER_LETTERS, 3859 COMBINING_DIACRITICAL_MARKS, 3860 GREEK, 3861 CYRILLIC, 3862 CYRILLIC_SUPPLEMENTARY, 3863 ARMENIAN, 3864 HEBREW, 3865 ARABIC, 3866 SYRIAC, 3867 ARABIC_SUPPLEMENT, 3868 THAANA, 3869 NKO, 3870 SAMARITAN, 3871 MANDAIC, 3872 SYRIAC_SUPPLEMENT, 3873 null, 3874 ARABIC_EXTENDED_A, 3875 DEVANAGARI, 3876 BENGALI, 3877 GURMUKHI, 3878 GUJARATI, 3879 ORIYA, 3880 TAMIL, 3881 TELUGU, 3882 KANNADA, 3883 MALAYALAM, 3884 SINHALA, 3885 THAI, 3886 LAO, 3887 TIBETAN, 3888 MYANMAR, 3889 GEORGIAN, 3890 HANGUL_JAMO, 3891 ETHIOPIC, 3892 ETHIOPIC_SUPPLEMENT, 3893 CHEROKEE, 3894 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 3895 OGHAM, 3896 RUNIC, 3897 TAGALOG, 3898 HANUNOO, 3899 BUHID, 3900 TAGBANWA, 3901 KHMER, 3902 MONGOLIAN, 3903 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 3904 LIMBU, 3905 TAI_LE, 3906 NEW_TAI_LUE, 3907 KHMER_SYMBOLS, 3908 BUGINESE, 3909 TAI_THAM, 3910 COMBINING_DIACRITICAL_MARKS_EXTENDED, 3911 BALINESE, 3912 SUNDANESE, 3913 BATAK, 3914 LEPCHA, 3915 OL_CHIKI, 3916 CYRILLIC_EXTENDED_C, 3917 GEORGIAN_EXTENDED, 3918 SUNDANESE_SUPPLEMENT, 3919 VEDIC_EXTENSIONS, 3920 PHONETIC_EXTENSIONS, 3921 PHONETIC_EXTENSIONS_SUPPLEMENT, 3922 COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 3923 LATIN_EXTENDED_ADDITIONAL, 3924 GREEK_EXTENDED, 3925 GENERAL_PUNCTUATION, 3926 SUPERSCRIPTS_AND_SUBSCRIPTS, 3927 CURRENCY_SYMBOLS, 3928 COMBINING_MARKS_FOR_SYMBOLS, 3929 LETTERLIKE_SYMBOLS, 3930 NUMBER_FORMS, 3931 ARROWS, 3932 MATHEMATICAL_OPERATORS, 3933 MISCELLANEOUS_TECHNICAL, 3934 CONTROL_PICTURES, 3935 OPTICAL_CHARACTER_RECOGNITION, 3936 ENCLOSED_ALPHANUMERICS, 3937 BOX_DRAWING, 3938 BLOCK_ELEMENTS, 3939 GEOMETRIC_SHAPES, 3940 MISCELLANEOUS_SYMBOLS, 3941 DINGBATS, 3942 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 3943 SUPPLEMENTAL_ARROWS_A, 3944 BRAILLE_PATTERNS, 3945 SUPPLEMENTAL_ARROWS_B, 3946 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 3947 SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 3948 MISCELLANEOUS_SYMBOLS_AND_ARROWS, 3949 GLAGOLITIC, 3950 LATIN_EXTENDED_C, 3951 COPTIC, 3952 GEORGIAN_SUPPLEMENT, 3953 TIFINAGH, 3954 ETHIOPIC_EXTENDED, 3955 CYRILLIC_EXTENDED_A, 3956 SUPPLEMENTAL_PUNCTUATION, 3957 CJK_RADICALS_SUPPLEMENT, 3958 KANGXI_RADICALS, 3959 null, 3960 IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 3961 CJK_SYMBOLS_AND_PUNCTUATION, 3962 HIRAGANA, 3963 KATAKANA, 3964 BOPOMOFO, 3965 HANGUL_COMPATIBILITY_JAMO, 3966 KANBUN, 3967 BOPOMOFO_EXTENDED, 3968 CJK_STROKES, 3969 KATAKANA_PHONETIC_EXTENSIONS, 3970 ENCLOSED_CJK_LETTERS_AND_MONTHS, 3971 CJK_COMPATIBILITY, 3972 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 3973 YIJING_HEXAGRAM_SYMBOLS, 3974 CJK_UNIFIED_IDEOGRAPHS, 3975 YI_SYLLABLES, 3976 YI_RADICALS, 3977 LISU, 3978 VAI, 3979 CYRILLIC_EXTENDED_B, 3980 BAMUM, 3981 MODIFIER_TONE_LETTERS, 3982 LATIN_EXTENDED_D, 3983 SYLOTI_NAGRI, 3984 COMMON_INDIC_NUMBER_FORMS, 3985 PHAGS_PA, 3986 SAURASHTRA, 3987 DEVANAGARI_EXTENDED, 3988 KAYAH_LI, 3989 REJANG, 3990 HANGUL_JAMO_EXTENDED_A, 3991 JAVANESE, 3992 MYANMAR_EXTENDED_B, 3993 CHAM, 3994 MYANMAR_EXTENDED_A, 3995 TAI_VIET, 3996 MEETEI_MAYEK_EXTENSIONS, 3997 ETHIOPIC_EXTENDED_A, 3998 LATIN_EXTENDED_E, 3999 CHEROKEE_SUPPLEMENT, 4000 MEETEI_MAYEK, 4001 HANGUL_SYLLABLES, 4002 HANGUL_JAMO_EXTENDED_B, 4003 HIGH_SURROGATES, 4004 HIGH_PRIVATE_USE_SURROGATES, 4005 LOW_SURROGATES, 4006 PRIVATE_USE_AREA, 4007 CJK_COMPATIBILITY_IDEOGRAPHS, 4008 ALPHABETIC_PRESENTATION_FORMS, 4009 ARABIC_PRESENTATION_FORMS_A, 4010 VARIATION_SELECTORS, 4011 VERTICAL_FORMS, 4012 COMBINING_HALF_MARKS, 4013 CJK_COMPATIBILITY_FORMS, 4014 SMALL_FORM_VARIANTS, 4015 ARABIC_PRESENTATION_FORMS_B, 4016 HALFWIDTH_AND_FULLWIDTH_FORMS, 4017 SPECIALS, 4018 LINEAR_B_SYLLABARY, 4019 LINEAR_B_IDEOGRAMS, 4020 AEGEAN_NUMBERS, 4021 ANCIENT_GREEK_NUMBERS, 4022 ANCIENT_SYMBOLS, 4023 PHAISTOS_DISC, 4024 null, 4025 LYCIAN, 4026 CARIAN, 4027 COPTIC_EPACT_NUMBERS, 4028 OLD_ITALIC, 4029 GOTHIC, 4030 OLD_PERMIC, 4031 UGARITIC, 4032 OLD_PERSIAN, 4033 null, 4034 DESERET, 4035 SHAVIAN, 4036 OSMANYA, 4037 OSAGE, 4038 ELBASAN, 4039 CAUCASIAN_ALBANIAN, 4040 null, 4041 LINEAR_A, 4042 null, 4043 CYPRIOT_SYLLABARY, 4044 IMPERIAL_ARAMAIC, 4045 PALMYRENE, 4046 NABATAEAN, 4047 null, 4048 HATRAN, 4049 PHOENICIAN, 4050 LYDIAN, 4051 null, 4052 MEROITIC_HIEROGLYPHS, 4053 MEROITIC_CURSIVE, 4054 KHAROSHTHI, 4055 OLD_SOUTH_ARABIAN, 4056 OLD_NORTH_ARABIAN, 4057 null, 4058 MANICHAEAN, 4059 AVESTAN, 4060 INSCRIPTIONAL_PARTHIAN, 4061 INSCRIPTIONAL_PAHLAVI, 4062 PSALTER_PAHLAVI, 4063 null, 4064 OLD_TURKIC, 4065 null, 4066 OLD_HUNGARIAN, 4067 HANIFI_ROHINGYA, 4068 null, 4069 RUMI_NUMERAL_SYMBOLS, 4070 YEZIDI, 4071 null, 4072 OLD_SOGDIAN, 4073 SOGDIAN, 4074 null, 4075 CHORASMIAN, 4076 ELYMAIC, 4077 BRAHMI, 4078 KAITHI, 4079 SORA_SOMPENG, 4080 CHAKMA, 4081 MAHAJANI, 4082 SHARADA, 4083 SINHALA_ARCHAIC_NUMBERS, 4084 KHOJKI, 4085 null, 4086 MULTANI, 4087 KHUDAWADI, 4088 GRANTHA, 4089 null, 4090 NEWA, 4091 TIRHUTA, 4092 null, 4093 SIDDHAM, 4094 MODI, 4095 MONGOLIAN_SUPPLEMENT, 4096 TAKRI, 4097 null, 4098 AHOM, 4099 null, 4100 DOGRA, 4101 null, 4102 WARANG_CITI, 4103 DIVES_AKURU, 4104 null, 4105 NANDINAGARI, 4106 ZANABAZAR_SQUARE, 4107 SOYOMBO, 4108 null, 4109 PAU_CIN_HAU, 4110 null, 4111 BHAIKSUKI, 4112 MARCHEN, 4113 null, 4114 MASARAM_GONDI, 4115 GUNJALA_GONDI, 4116 null, 4117 MAKASAR, 4118 null, 4119 LISU_SUPPLEMENT, 4120 TAMIL_SUPPLEMENT, 4121 CUNEIFORM, 4122 CUNEIFORM_NUMBERS_AND_PUNCTUATION, 4123 EARLY_DYNASTIC_CUNEIFORM, 4124 null, 4125 EGYPTIAN_HIEROGLYPHS, 4126 EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS, 4127 null, 4128 ANATOLIAN_HIEROGLYPHS, 4129 null, 4130 BAMUM_SUPPLEMENT, 4131 MRO, 4132 null, 4133 BASSA_VAH, 4134 PAHAWH_HMONG, 4135 null, 4136 MEDEFAIDRIN, 4137 null, 4138 MIAO, 4139 null, 4140 IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION, 4141 TANGUT, 4142 TANGUT_COMPONENTS, 4143 KHITAN_SMALL_SCRIPT, 4144 TANGUT_SUPPLEMENT, 4145 null, 4146 KANA_SUPPLEMENT, 4147 KANA_EXTENDED_A, 4148 SMALL_KANA_EXTENSION, 4149 NUSHU, 4150 null, 4151 DUPLOYAN, 4152 SHORTHAND_FORMAT_CONTROLS, 4153 null, 4154 BYZANTINE_MUSICAL_SYMBOLS, 4155 MUSICAL_SYMBOLS, 4156 ANCIENT_GREEK_MUSICAL_NOTATION, 4157 null, 4158 MAYAN_NUMERALS, 4159 TAI_XUAN_JING_SYMBOLS, 4160 COUNTING_ROD_NUMERALS, 4161 null, 4162 MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 4163 SUTTON_SIGNWRITING, 4164 null, 4165 GLAGOLITIC_SUPPLEMENT, 4166 null, 4167 NYIAKENG_PUACHUE_HMONG, 4168 null, 4169 WANCHO, 4170 null, 4171 MENDE_KIKAKUI, 4172 null, 4173 ADLAM, 4174 null, 4175 INDIC_SIYAQ_NUMBERS, 4176 null, 4177 OTTOMAN_SIYAQ_NUMBERS, 4178 null, 4179 ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 4180 null, 4181 MAHJONG_TILES, 4182 DOMINO_TILES, 4183 PLAYING_CARDS, 4184 ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 4185 ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 4186 MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 4187 EMOTICONS, 4188 ORNAMENTAL_DINGBATS, 4189 TRANSPORT_AND_MAP_SYMBOLS, 4190 ALCHEMICAL_SYMBOLS, 4191 GEOMETRIC_SHAPES_EXTENDED, 4192 SUPPLEMENTAL_ARROWS_C, 4193 SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS, 4194 CHESS_SYMBOLS, 4195 SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A, 4196 SYMBOLS_FOR_LEGACY_COMPUTING, 4197 null, 4198 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 4199 null, 4200 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 4201 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 4202 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E, 4203 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F, 4204 null, 4205 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 4206 null, 4207 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G, 4208 null, 4209 TAGS, 4210 null, 4211 VARIATION_SELECTORS_SUPPLEMENT, 4212 null, 4213 SUPPLEMENTARY_PRIVATE_USE_AREA_A, 4214 SUPPLEMENTARY_PRIVATE_USE_AREA_B, 4215 }; 4216 4217 4218 /** 4219 * Returns the object representing the Unicode block containing the 4220 * given character, or {@code null} if the character is not a 4221 * member of a defined block. 4222 * 4223 * <p><b>Note:</b> This method cannot handle 4224 * <a href="Character.html#supplementary"> supplementary 4225 * characters</a>. To support all Unicode characters, including 4226 * supplementary characters, use the {@link #of(int)} method. 4227 * 4228 * @param c The character in question 4229 * @return The {@code UnicodeBlock} instance representing the 4230 * Unicode block of which this character is a member, or 4231 * {@code null} if the character is not a member of any 4232 * Unicode block 4233 */ of(char c)4234 public static UnicodeBlock of(char c) { 4235 return of((int)c); 4236 } 4237 4238 /** 4239 * Returns the object representing the Unicode block 4240 * containing the given character (Unicode code point), or 4241 * {@code null} if the character is not a member of a 4242 * defined block. 4243 * 4244 * @param codePoint the character (Unicode code point) in question. 4245 * @return The {@code UnicodeBlock} instance representing the 4246 * Unicode block of which this character is a member, or 4247 * {@code null} if the character is not a member of any 4248 * Unicode block 4249 * @throws IllegalArgumentException if the specified 4250 * {@code codePoint} is an invalid Unicode code point. 4251 * @see Character#isValidCodePoint(int) 4252 * @since 1.5 4253 */ of(int codePoint)4254 public static UnicodeBlock of(int codePoint) { 4255 if (!isValidCodePoint(codePoint)) { 4256 throw new IllegalArgumentException( 4257 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 4258 } 4259 4260 int top, bottom, current; 4261 bottom = 0; 4262 top = blockStarts.length; 4263 current = top/2; 4264 4265 // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom] 4266 while (top - bottom > 1) { 4267 if (codePoint >= blockStarts[current]) { 4268 bottom = current; 4269 } else { 4270 top = current; 4271 } 4272 current = (top + bottom) / 2; 4273 } 4274 return blocks[current]; 4275 } 4276 4277 /** 4278 * Returns the UnicodeBlock with the given name. Block 4279 * names are determined by The Unicode Standard. The file 4280 * {@code Blocks-<version>.txt} defines blocks for a particular 4281 * version of the standard. The {@link Character} class specifies 4282 * the version of the standard that it supports. 4283 * <p> 4284 * This method accepts block names in the following forms: 4285 * <ol> 4286 * <li> Canonical block names as defined by the Unicode Standard. 4287 * For example, the standard defines a "Basic Latin" block. Therefore, this 4288 * method accepts "Basic Latin" as a valid block name. The documentation of 4289 * each UnicodeBlock provides the canonical name. 4290 * <li>Canonical block names with all spaces removed. For example, "BasicLatin" 4291 * is a valid block name for the "Basic Latin" block. 4292 * <li>The text representation of each constant UnicodeBlock identifier. 4293 * For example, this method will return the {@link #BASIC_LATIN} block if 4294 * provided with the "BASIC_LATIN" name. This form replaces all spaces and 4295 * hyphens in the canonical name with underscores. 4296 * </ol> 4297 * Finally, character case is ignored for all of the valid block name forms. 4298 * For example, "BASIC_LATIN" and "basic_latin" are both valid block names. 4299 * The en_US locale's case mapping rules are used to provide case-insensitive 4300 * string comparisons for block name validation. 4301 * <p> 4302 * If the Unicode Standard changes block names, both the previous and 4303 * current names will be accepted. 4304 * 4305 * @param blockName A {@code UnicodeBlock} name. 4306 * @return The {@code UnicodeBlock} instance identified 4307 * by {@code blockName} 4308 * @throws IllegalArgumentException if {@code blockName} is an 4309 * invalid name 4310 * @throws NullPointerException if {@code blockName} is null 4311 * @since 1.5 4312 */ forName(String blockName)4313 public static final UnicodeBlock forName(String blockName) { 4314 UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US)); 4315 if (block == null) { 4316 throw new IllegalArgumentException("Not a valid block name: " 4317 + blockName); 4318 } 4319 return block; 4320 } 4321 } 4322 4323 4324 /** 4325 * A family of character subsets representing the character scripts 4326 * defined in the <a href="http://www.unicode.org/reports/tr24/"> 4327 * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode 4328 * character is assigned to a single Unicode script, either a specific 4329 * script, such as {@link Character.UnicodeScript#LATIN Latin}, or 4330 * one of the following three special values, 4331 * {@link Character.UnicodeScript#INHERITED Inherited}, 4332 * {@link Character.UnicodeScript#COMMON Common} or 4333 * {@link Character.UnicodeScript#UNKNOWN Unknown}. 4334 * 4335 * @since 1.7 4336 */ 4337 public static enum UnicodeScript { 4338 /** 4339 * Unicode script "Common". 4340 */ 4341 COMMON, 4342 4343 /** 4344 * Unicode script "Latin". 4345 */ 4346 LATIN, 4347 4348 /** 4349 * Unicode script "Greek". 4350 */ 4351 GREEK, 4352 4353 /** 4354 * Unicode script "Cyrillic". 4355 */ 4356 CYRILLIC, 4357 4358 /** 4359 * Unicode script "Armenian". 4360 */ 4361 ARMENIAN, 4362 4363 /** 4364 * Unicode script "Hebrew". 4365 */ 4366 HEBREW, 4367 4368 /** 4369 * Unicode script "Arabic". 4370 */ 4371 ARABIC, 4372 4373 /** 4374 * Unicode script "Syriac". 4375 */ 4376 SYRIAC, 4377 4378 /** 4379 * Unicode script "Thaana". 4380 */ 4381 THAANA, 4382 4383 /** 4384 * Unicode script "Devanagari". 4385 */ 4386 DEVANAGARI, 4387 4388 /** 4389 * Unicode script "Bengali". 4390 */ 4391 BENGALI, 4392 4393 /** 4394 * Unicode script "Gurmukhi". 4395 */ 4396 GURMUKHI, 4397 4398 /** 4399 * Unicode script "Gujarati". 4400 */ 4401 GUJARATI, 4402 4403 /** 4404 * Unicode script "Oriya". 4405 */ 4406 ORIYA, 4407 4408 /** 4409 * Unicode script "Tamil". 4410 */ 4411 TAMIL, 4412 4413 /** 4414 * Unicode script "Telugu". 4415 */ 4416 TELUGU, 4417 4418 /** 4419 * Unicode script "Kannada". 4420 */ 4421 KANNADA, 4422 4423 /** 4424 * Unicode script "Malayalam". 4425 */ 4426 MALAYALAM, 4427 4428 /** 4429 * Unicode script "Sinhala". 4430 */ 4431 SINHALA, 4432 4433 /** 4434 * Unicode script "Thai". 4435 */ 4436 THAI, 4437 4438 /** 4439 * Unicode script "Lao". 4440 */ 4441 LAO, 4442 4443 /** 4444 * Unicode script "Tibetan". 4445 */ 4446 TIBETAN, 4447 4448 /** 4449 * Unicode script "Myanmar". 4450 */ 4451 MYANMAR, 4452 4453 /** 4454 * Unicode script "Georgian". 4455 */ 4456 GEORGIAN, 4457 4458 /** 4459 * Unicode script "Hangul". 4460 */ 4461 HANGUL, 4462 4463 /** 4464 * Unicode script "Ethiopic". 4465 */ 4466 ETHIOPIC, 4467 4468 /** 4469 * Unicode script "Cherokee". 4470 */ 4471 CHEROKEE, 4472 4473 /** 4474 * Unicode script "Canadian_Aboriginal". 4475 */ 4476 CANADIAN_ABORIGINAL, 4477 4478 /** 4479 * Unicode script "Ogham". 4480 */ 4481 OGHAM, 4482 4483 /** 4484 * Unicode script "Runic". 4485 */ 4486 RUNIC, 4487 4488 /** 4489 * Unicode script "Khmer". 4490 */ 4491 KHMER, 4492 4493 /** 4494 * Unicode script "Mongolian". 4495 */ 4496 MONGOLIAN, 4497 4498 /** 4499 * Unicode script "Hiragana". 4500 */ 4501 HIRAGANA, 4502 4503 /** 4504 * Unicode script "Katakana". 4505 */ 4506 KATAKANA, 4507 4508 /** 4509 * Unicode script "Bopomofo". 4510 */ 4511 BOPOMOFO, 4512 4513 /** 4514 * Unicode script "Han". 4515 */ 4516 HAN, 4517 4518 /** 4519 * Unicode script "Yi". 4520 */ 4521 YI, 4522 4523 /** 4524 * Unicode script "Old_Italic". 4525 */ 4526 OLD_ITALIC, 4527 4528 /** 4529 * Unicode script "Gothic". 4530 */ 4531 GOTHIC, 4532 4533 /** 4534 * Unicode script "Deseret". 4535 */ 4536 DESERET, 4537 4538 /** 4539 * Unicode script "Inherited". 4540 */ 4541 INHERITED, 4542 4543 /** 4544 * Unicode script "Tagalog". 4545 */ 4546 TAGALOG, 4547 4548 /** 4549 * Unicode script "Hanunoo". 4550 */ 4551 HANUNOO, 4552 4553 /** 4554 * Unicode script "Buhid". 4555 */ 4556 BUHID, 4557 4558 /** 4559 * Unicode script "Tagbanwa". 4560 */ 4561 TAGBANWA, 4562 4563 /** 4564 * Unicode script "Limbu". 4565 */ 4566 LIMBU, 4567 4568 /** 4569 * Unicode script "Tai_Le". 4570 */ 4571 TAI_LE, 4572 4573 /** 4574 * Unicode script "Linear_B". 4575 */ 4576 LINEAR_B, 4577 4578 /** 4579 * Unicode script "Ugaritic". 4580 */ 4581 UGARITIC, 4582 4583 /** 4584 * Unicode script "Shavian". 4585 */ 4586 SHAVIAN, 4587 4588 /** 4589 * Unicode script "Osmanya". 4590 */ 4591 OSMANYA, 4592 4593 /** 4594 * Unicode script "Cypriot". 4595 */ 4596 CYPRIOT, 4597 4598 /** 4599 * Unicode script "Braille". 4600 */ 4601 BRAILLE, 4602 4603 /** 4604 * Unicode script "Buginese". 4605 */ 4606 BUGINESE, 4607 4608 /** 4609 * Unicode script "Coptic". 4610 */ 4611 COPTIC, 4612 4613 /** 4614 * Unicode script "New_Tai_Lue". 4615 */ 4616 NEW_TAI_LUE, 4617 4618 /** 4619 * Unicode script "Glagolitic". 4620 */ 4621 GLAGOLITIC, 4622 4623 /** 4624 * Unicode script "Tifinagh". 4625 */ 4626 TIFINAGH, 4627 4628 /** 4629 * Unicode script "Syloti_Nagri". 4630 */ 4631 SYLOTI_NAGRI, 4632 4633 /** 4634 * Unicode script "Old_Persian". 4635 */ 4636 OLD_PERSIAN, 4637 4638 /** 4639 * Unicode script "Kharoshthi". 4640 */ 4641 KHAROSHTHI, 4642 4643 /** 4644 * Unicode script "Balinese". 4645 */ 4646 BALINESE, 4647 4648 /** 4649 * Unicode script "Cuneiform". 4650 */ 4651 CUNEIFORM, 4652 4653 /** 4654 * Unicode script "Phoenician". 4655 */ 4656 PHOENICIAN, 4657 4658 /** 4659 * Unicode script "Phags_Pa". 4660 */ 4661 PHAGS_PA, 4662 4663 /** 4664 * Unicode script "Nko". 4665 */ 4666 NKO, 4667 4668 /** 4669 * Unicode script "Sundanese". 4670 */ 4671 SUNDANESE, 4672 4673 /** 4674 * Unicode script "Batak". 4675 */ 4676 BATAK, 4677 4678 /** 4679 * Unicode script "Lepcha". 4680 */ 4681 LEPCHA, 4682 4683 /** 4684 * Unicode script "Ol_Chiki". 4685 */ 4686 OL_CHIKI, 4687 4688 /** 4689 * Unicode script "Vai". 4690 */ 4691 VAI, 4692 4693 /** 4694 * Unicode script "Saurashtra". 4695 */ 4696 SAURASHTRA, 4697 4698 /** 4699 * Unicode script "Kayah_Li". 4700 */ 4701 KAYAH_LI, 4702 4703 /** 4704 * Unicode script "Rejang". 4705 */ 4706 REJANG, 4707 4708 /** 4709 * Unicode script "Lycian". 4710 */ 4711 LYCIAN, 4712 4713 /** 4714 * Unicode script "Carian". 4715 */ 4716 CARIAN, 4717 4718 /** 4719 * Unicode script "Lydian". 4720 */ 4721 LYDIAN, 4722 4723 /** 4724 * Unicode script "Cham". 4725 */ 4726 CHAM, 4727 4728 /** 4729 * Unicode script "Tai_Tham". 4730 */ 4731 TAI_THAM, 4732 4733 /** 4734 * Unicode script "Tai_Viet". 4735 */ 4736 TAI_VIET, 4737 4738 /** 4739 * Unicode script "Avestan". 4740 */ 4741 AVESTAN, 4742 4743 /** 4744 * Unicode script "Egyptian_Hieroglyphs". 4745 */ 4746 EGYPTIAN_HIEROGLYPHS, 4747 4748 /** 4749 * Unicode script "Samaritan". 4750 */ 4751 SAMARITAN, 4752 4753 /** 4754 * Unicode script "Mandaic". 4755 */ 4756 MANDAIC, 4757 4758 /** 4759 * Unicode script "Lisu". 4760 */ 4761 LISU, 4762 4763 /** 4764 * Unicode script "Bamum". 4765 */ 4766 BAMUM, 4767 4768 /** 4769 * Unicode script "Javanese". 4770 */ 4771 JAVANESE, 4772 4773 /** 4774 * Unicode script "Meetei_Mayek". 4775 */ 4776 MEETEI_MAYEK, 4777 4778 /** 4779 * Unicode script "Imperial_Aramaic". 4780 */ 4781 IMPERIAL_ARAMAIC, 4782 4783 /** 4784 * Unicode script "Old_South_Arabian". 4785 */ 4786 OLD_SOUTH_ARABIAN, 4787 4788 /** 4789 * Unicode script "Inscriptional_Parthian". 4790 */ 4791 INSCRIPTIONAL_PARTHIAN, 4792 4793 /** 4794 * Unicode script "Inscriptional_Pahlavi". 4795 */ 4796 INSCRIPTIONAL_PAHLAVI, 4797 4798 /** 4799 * Unicode script "Old_Turkic". 4800 */ 4801 OLD_TURKIC, 4802 4803 /** 4804 * Unicode script "Brahmi". 4805 */ 4806 BRAHMI, 4807 4808 /** 4809 * Unicode script "Kaithi". 4810 */ 4811 KAITHI, 4812 4813 /** 4814 * Unicode script "Meroitic Hieroglyphs". 4815 * @since 1.8 4816 */ 4817 MEROITIC_HIEROGLYPHS, 4818 4819 /** 4820 * Unicode script "Meroitic Cursive". 4821 * @since 1.8 4822 */ 4823 MEROITIC_CURSIVE, 4824 4825 /** 4826 * Unicode script "Sora Sompeng". 4827 * @since 1.8 4828 */ 4829 SORA_SOMPENG, 4830 4831 /** 4832 * Unicode script "Chakma". 4833 * @since 1.8 4834 */ 4835 CHAKMA, 4836 4837 /** 4838 * Unicode script "Sharada". 4839 * @since 1.8 4840 */ 4841 SHARADA, 4842 4843 /** 4844 * Unicode script "Takri". 4845 * @since 1.8 4846 */ 4847 TAKRI, 4848 4849 /** 4850 * Unicode script "Miao". 4851 * @since 1.8 4852 */ 4853 MIAO, 4854 4855 /** 4856 * Unicode script "Caucasian Albanian". 4857 * @since 9 4858 */ 4859 CAUCASIAN_ALBANIAN, 4860 4861 /** 4862 * Unicode script "Bassa Vah". 4863 * @since 9 4864 */ 4865 BASSA_VAH, 4866 4867 /** 4868 * Unicode script "Duployan". 4869 * @since 9 4870 */ 4871 DUPLOYAN, 4872 4873 /** 4874 * Unicode script "Elbasan". 4875 * @since 9 4876 */ 4877 ELBASAN, 4878 4879 /** 4880 * Unicode script "Grantha". 4881 * @since 9 4882 */ 4883 GRANTHA, 4884 4885 /** 4886 * Unicode script "Pahawh Hmong". 4887 * @since 9 4888 */ 4889 PAHAWH_HMONG, 4890 4891 /** 4892 * Unicode script "Khojki". 4893 * @since 9 4894 */ 4895 KHOJKI, 4896 4897 /** 4898 * Unicode script "Linear A". 4899 * @since 9 4900 */ 4901 LINEAR_A, 4902 4903 /** 4904 * Unicode script "Mahajani". 4905 * @since 9 4906 */ 4907 MAHAJANI, 4908 4909 /** 4910 * Unicode script "Manichaean". 4911 * @since 9 4912 */ 4913 MANICHAEAN, 4914 4915 /** 4916 * Unicode script "Mende Kikakui". 4917 * @since 9 4918 */ 4919 MENDE_KIKAKUI, 4920 4921 /** 4922 * Unicode script "Modi". 4923 * @since 9 4924 */ 4925 MODI, 4926 4927 /** 4928 * Unicode script "Mro". 4929 * @since 9 4930 */ 4931 MRO, 4932 4933 /** 4934 * Unicode script "Old North Arabian". 4935 * @since 9 4936 */ 4937 OLD_NORTH_ARABIAN, 4938 4939 /** 4940 * Unicode script "Nabataean". 4941 * @since 9 4942 */ 4943 NABATAEAN, 4944 4945 /** 4946 * Unicode script "Palmyrene". 4947 * @since 9 4948 */ 4949 PALMYRENE, 4950 4951 /** 4952 * Unicode script "Pau Cin Hau". 4953 * @since 9 4954 */ 4955 PAU_CIN_HAU, 4956 4957 /** 4958 * Unicode script "Old Permic". 4959 * @since 9 4960 */ 4961 OLD_PERMIC, 4962 4963 /** 4964 * Unicode script "Psalter Pahlavi". 4965 * @since 9 4966 */ 4967 PSALTER_PAHLAVI, 4968 4969 /** 4970 * Unicode script "Siddham". 4971 * @since 9 4972 */ 4973 SIDDHAM, 4974 4975 /** 4976 * Unicode script "Khudawadi". 4977 * @since 9 4978 */ 4979 KHUDAWADI, 4980 4981 /** 4982 * Unicode script "Tirhuta". 4983 * @since 9 4984 */ 4985 TIRHUTA, 4986 4987 /** 4988 * Unicode script "Warang Citi". 4989 * @since 9 4990 */ 4991 WARANG_CITI, 4992 4993 /** 4994 * Unicode script "Ahom". 4995 * @since 9 4996 */ 4997 AHOM, 4998 4999 /** 5000 * Unicode script "Anatolian Hieroglyphs". 5001 * @since 9 5002 */ 5003 ANATOLIAN_HIEROGLYPHS, 5004 5005 /** 5006 * Unicode script "Hatran". 5007 * @since 9 5008 */ 5009 HATRAN, 5010 5011 /** 5012 * Unicode script "Multani". 5013 * @since 9 5014 */ 5015 MULTANI, 5016 5017 /** 5018 * Unicode script "Old Hungarian". 5019 * @since 9 5020 */ 5021 OLD_HUNGARIAN, 5022 5023 /** 5024 * Unicode script "SignWriting". 5025 * @since 9 5026 */ 5027 SIGNWRITING, 5028 5029 /** 5030 * Unicode script "Adlam". 5031 * @since 11 5032 */ 5033 ADLAM, 5034 5035 /** 5036 * Unicode script "Bhaiksuki". 5037 * @since 11 5038 */ 5039 BHAIKSUKI, 5040 5041 /** 5042 * Unicode script "Marchen". 5043 * @since 11 5044 */ 5045 MARCHEN, 5046 5047 /** 5048 * Unicode script "Newa". 5049 * @since 11 5050 */ 5051 NEWA, 5052 5053 /** 5054 * Unicode script "Osage". 5055 * @since 11 5056 */ 5057 OSAGE, 5058 5059 /** 5060 * Unicode script "Tangut". 5061 * @since 11 5062 */ 5063 TANGUT, 5064 5065 /** 5066 * Unicode script "Masaram Gondi". 5067 * @since 11 5068 */ 5069 MASARAM_GONDI, 5070 5071 /** 5072 * Unicode script "Nushu". 5073 * @since 11 5074 */ 5075 NUSHU, 5076 5077 /** 5078 * Unicode script "Soyombo". 5079 * @since 11 5080 */ 5081 SOYOMBO, 5082 5083 /** 5084 * Unicode script "Zanabazar Square". 5085 * @since 11 5086 */ 5087 ZANABAZAR_SQUARE, 5088 5089 /** 5090 * Unicode script "Hanifi Rohingya". 5091 * @since 12 5092 */ 5093 HANIFI_ROHINGYA, 5094 5095 /** 5096 * Unicode script "Old Sogdian". 5097 * @since 12 5098 */ 5099 OLD_SOGDIAN, 5100 5101 /** 5102 * Unicode script "Sogdian". 5103 * @since 12 5104 */ 5105 SOGDIAN, 5106 5107 /** 5108 * Unicode script "Dogra". 5109 * @since 12 5110 */ 5111 DOGRA, 5112 5113 /** 5114 * Unicode script "Gunjala Gondi". 5115 * @since 12 5116 */ 5117 GUNJALA_GONDI, 5118 5119 /** 5120 * Unicode script "Makasar". 5121 * @since 12 5122 */ 5123 MAKASAR, 5124 5125 /** 5126 * Unicode script "Medefaidrin". 5127 * @since 12 5128 */ 5129 MEDEFAIDRIN, 5130 5131 /** 5132 * Unicode script "Elymaic". 5133 * @since 13 5134 */ 5135 ELYMAIC, 5136 5137 /** 5138 * Unicode script "Nandinagari". 5139 * @since 13 5140 */ 5141 NANDINAGARI, 5142 5143 /** 5144 * Unicode script "Nyiakeng Puachue Hmong". 5145 * @since 13 5146 */ 5147 NYIAKENG_PUACHUE_HMONG, 5148 5149 /** 5150 * Unicode script "Wancho". 5151 * @since 13 5152 */ 5153 WANCHO, 5154 5155 /** 5156 * Unicode script "Yezidi". 5157 * @since 15 5158 */ 5159 YEZIDI, 5160 5161 /** 5162 * Unicode script "Chorasmian". 5163 * @since 15 5164 */ 5165 CHORASMIAN, 5166 5167 /** 5168 * Unicode script "Dives Akuru". 5169 * @since 15 5170 */ 5171 DIVES_AKURU, 5172 5173 /** 5174 * Unicode script "Khitan Small Script". 5175 * @since 15 5176 */ 5177 KHITAN_SMALL_SCRIPT, 5178 5179 /** 5180 * Unicode script "Unknown". 5181 */ 5182 UNKNOWN; 5183 5184 private static final int[] scriptStarts = { 5185 0x0000, // 0000..0040; COMMON 5186 0x0041, // 0041..005A; LATIN 5187 0x005B, // 005B..0060; COMMON 5188 0x0061, // 0061..007A; LATIN 5189 0x007B, // 007B..00A9; COMMON 5190 0x00AA, // 00AA ; LATIN 5191 0x00AB, // 00AB..00B9; COMMON 5192 0x00BA, // 00BA ; LATIN 5193 0x00BB, // 00BB..00BF; COMMON 5194 0x00C0, // 00C0..00D6; LATIN 5195 0x00D7, // 00D7 ; COMMON 5196 0x00D8, // 00D8..00F6; LATIN 5197 0x00F7, // 00F7 ; COMMON 5198 0x00F8, // 00F8..02B8; LATIN 5199 0x02B9, // 02B9..02DF; COMMON 5200 0x02E0, // 02E0..02E4; LATIN 5201 0x02E5, // 02E5..02E9; COMMON 5202 0x02EA, // 02EA..02EB; BOPOMOFO 5203 0x02EC, // 02EC..02FF; COMMON 5204 0x0300, // 0300..036F; INHERITED 5205 0x0370, // 0370..0373; GREEK 5206 0x0374, // 0374 ; COMMON 5207 0x0375, // 0375..0377; GREEK 5208 0x0378, // 0378..0379; UNKNOWN 5209 0x037A, // 037A..037D; GREEK 5210 0x037E, // 037E ; COMMON 5211 0x037F, // 037F ; GREEK 5212 0x0380, // 0380..0383; UNKNOWN 5213 0x0384, // 0384 ; GREEK 5214 0x0385, // 0385 ; COMMON 5215 0x0386, // 0386 ; GREEK 5216 0x0387, // 0387 ; COMMON 5217 0x0388, // 0388..038A; GREEK 5218 0x038B, // 038B ; UNKNOWN 5219 0x038C, // 038C ; GREEK 5220 0x038D, // 038D ; UNKNOWN 5221 0x038E, // 038E..03A1; GREEK 5222 0x03A2, // 03A2 ; UNKNOWN 5223 0x03A3, // 03A3..03E1; GREEK 5224 0x03E2, // 03E2..03EF; COPTIC 5225 0x03F0, // 03F0..03FF; GREEK 5226 0x0400, // 0400..0484; CYRILLIC 5227 0x0485, // 0485..0486; INHERITED 5228 0x0487, // 0487..052F; CYRILLIC 5229 0x0530, // 0530 ; UNKNOWN 5230 0x0531, // 0531..0556; ARMENIAN 5231 0x0557, // 0557..0558; UNKNOWN 5232 0x0559, // 0559..058A; ARMENIAN 5233 0x058B, // 058B..058C; UNKNOWN 5234 0x058D, // 058D..058F; ARMENIAN 5235 0x0590, // 0590 ; UNKNOWN 5236 0x0591, // 0591..05C7; HEBREW 5237 0x05C8, // 05C8..05CF; UNKNOWN 5238 0x05D0, // 05D0..05EA; HEBREW 5239 0x05EB, // 05EB..05EE; UNKNOWN 5240 0x05EF, // 05EF..05F4; HEBREW 5241 0x05F5, // 05F5..05FF; UNKNOWN 5242 0x0600, // 0600..0604; ARABIC 5243 0x0605, // 0605 ; COMMON 5244 0x0606, // 0606..060B; ARABIC 5245 0x060C, // 060C ; COMMON 5246 0x060D, // 060D..061A; ARABIC 5247 0x061B, // 061B ; COMMON 5248 0x061C, // 061C ; ARABIC 5249 0x061D, // 061D ; UNKNOWN 5250 0x061E, // 061E ; ARABIC 5251 0x061F, // 061F ; COMMON 5252 0x0620, // 0620..063F; ARABIC 5253 0x0640, // 0640 ; COMMON 5254 0x0641, // 0641..064A; ARABIC 5255 0x064B, // 064B..0655; INHERITED 5256 0x0656, // 0656..066F; ARABIC 5257 0x0670, // 0670 ; INHERITED 5258 0x0671, // 0671..06DC; ARABIC 5259 0x06DD, // 06DD ; COMMON 5260 0x06DE, // 06DE..06FF; ARABIC 5261 0x0700, // 0700..070D; SYRIAC 5262 0x070E, // 070E ; UNKNOWN 5263 0x070F, // 070F..074A; SYRIAC 5264 0x074B, // 074B..074C; UNKNOWN 5265 0x074D, // 074D..074F; SYRIAC 5266 0x0750, // 0750..077F; ARABIC 5267 0x0780, // 0780..07B1; THAANA 5268 0x07B2, // 07B2..07BF; UNKNOWN 5269 0x07C0, // 07C0..07FA; NKO 5270 0x07FB, // 07FB..07FC; UNKNOWN 5271 0x07FD, // 07FD..07FF; NKO 5272 0x0800, // 0800..082D; SAMARITAN 5273 0x082E, // 082E..082F; UNKNOWN 5274 0x0830, // 0830..083E; SAMARITAN 5275 0x083F, // 083F ; UNKNOWN 5276 0x0840, // 0840..085B; MANDAIC 5277 0x085C, // 085C..085D; UNKNOWN 5278 0x085E, // 085E ; MANDAIC 5279 0x085F, // 085F ; UNKNOWN 5280 0x0860, // 0860..086A; SYRIAC 5281 0x086B, // 086B..089F; UNKNOWN 5282 0x08A0, // 08A0..08B4; ARABIC 5283 0x08B5, // 08B5 ; UNKNOWN 5284 0x08B6, // 08B6..08C7; ARABIC 5285 0x08C8, // 08C8..08D2; UNKNOWN 5286 0x08D3, // 08D3..08E1; ARABIC 5287 0x08E2, // 08E2 ; COMMON 5288 0x08E3, // 08E3..08FF; ARABIC 5289 0x0900, // 0900..0950; DEVANAGARI 5290 0x0951, // 0951..0954; INHERITED 5291 0x0955, // 0955..0963; DEVANAGARI 5292 0x0964, // 0964..0965; COMMON 5293 0x0966, // 0966..097F; DEVANAGARI 5294 0x0980, // 0980..0983; BENGALI 5295 0x0984, // 0984 ; UNKNOWN 5296 0x0985, // 0985..098C; BENGALI 5297 0x098D, // 098D..098E; UNKNOWN 5298 0x098F, // 098F..0990; BENGALI 5299 0x0991, // 0991..0992; UNKNOWN 5300 0x0993, // 0993..09A8; BENGALI 5301 0x09A9, // 09A9 ; UNKNOWN 5302 0x09AA, // 09AA..09B0; BENGALI 5303 0x09B1, // 09B1 ; UNKNOWN 5304 0x09B2, // 09B2 ; BENGALI 5305 0x09B3, // 09B3..09B5; UNKNOWN 5306 0x09B6, // 09B6..09B9; BENGALI 5307 0x09BA, // 09BA..09BB; UNKNOWN 5308 0x09BC, // 09BC..09C4; BENGALI 5309 0x09C5, // 09C5..09C6; UNKNOWN 5310 0x09C7, // 09C7..09C8; BENGALI 5311 0x09C9, // 09C9..09CA; UNKNOWN 5312 0x09CB, // 09CB..09CE; BENGALI 5313 0x09CF, // 09CF..09D6; UNKNOWN 5314 0x09D7, // 09D7 ; BENGALI 5315 0x09D8, // 09D8..09DB; UNKNOWN 5316 0x09DC, // 09DC..09DD; BENGALI 5317 0x09DE, // 09DE ; UNKNOWN 5318 0x09DF, // 09DF..09E3; BENGALI 5319 0x09E4, // 09E4..09E5; UNKNOWN 5320 0x09E6, // 09E6..09FE; BENGALI 5321 0x09FF, // 09FF..0A00; UNKNOWN 5322 0x0A01, // 0A01..0A03; GURMUKHI 5323 0x0A04, // 0A04 ; UNKNOWN 5324 0x0A05, // 0A05..0A0A; GURMUKHI 5325 0x0A0B, // 0A0B..0A0E; UNKNOWN 5326 0x0A0F, // 0A0F..0A10; GURMUKHI 5327 0x0A11, // 0A11..0A12; UNKNOWN 5328 0x0A13, // 0A13..0A28; GURMUKHI 5329 0x0A29, // 0A29 ; UNKNOWN 5330 0x0A2A, // 0A2A..0A30; GURMUKHI 5331 0x0A31, // 0A31 ; UNKNOWN 5332 0x0A32, // 0A32..0A33; GURMUKHI 5333 0x0A34, // 0A34 ; UNKNOWN 5334 0x0A35, // 0A35..0A36; GURMUKHI 5335 0x0A37, // 0A37 ; UNKNOWN 5336 0x0A38, // 0A38..0A39; GURMUKHI 5337 0x0A3A, // 0A3A..0A3B; UNKNOWN 5338 0x0A3C, // 0A3C ; GURMUKHI 5339 0x0A3D, // 0A3D ; UNKNOWN 5340 0x0A3E, // 0A3E..0A42; GURMUKHI 5341 0x0A43, // 0A43..0A46; UNKNOWN 5342 0x0A47, // 0A47..0A48; GURMUKHI 5343 0x0A49, // 0A49..0A4A; UNKNOWN 5344 0x0A4B, // 0A4B..0A4D; GURMUKHI 5345 0x0A4E, // 0A4E..0A50; UNKNOWN 5346 0x0A51, // 0A51 ; GURMUKHI 5347 0x0A52, // 0A52..0A58; UNKNOWN 5348 0x0A59, // 0A59..0A5C; GURMUKHI 5349 0x0A5D, // 0A5D ; UNKNOWN 5350 0x0A5E, // 0A5E ; GURMUKHI 5351 0x0A5F, // 0A5F..0A65; UNKNOWN 5352 0x0A66, // 0A66..0A76; GURMUKHI 5353 0x0A77, // 0A77..0A80; UNKNOWN 5354 0x0A81, // 0A81..0A83; GUJARATI 5355 0x0A84, // 0A84 ; UNKNOWN 5356 0x0A85, // 0A85..0A8D; GUJARATI 5357 0x0A8E, // 0A8E ; UNKNOWN 5358 0x0A8F, // 0A8F..0A91; GUJARATI 5359 0x0A92, // 0A92 ; UNKNOWN 5360 0x0A93, // 0A93..0AA8; GUJARATI 5361 0x0AA9, // 0AA9 ; UNKNOWN 5362 0x0AAA, // 0AAA..0AB0; GUJARATI 5363 0x0AB1, // 0AB1 ; UNKNOWN 5364 0x0AB2, // 0AB2..0AB3; GUJARATI 5365 0x0AB4, // 0AB4 ; UNKNOWN 5366 0x0AB5, // 0AB5..0AB9; GUJARATI 5367 0x0ABA, // 0ABA..0ABB; UNKNOWN 5368 0x0ABC, // 0ABC..0AC5; GUJARATI 5369 0x0AC6, // 0AC6 ; UNKNOWN 5370 0x0AC7, // 0AC7..0AC9; GUJARATI 5371 0x0ACA, // 0ACA ; UNKNOWN 5372 0x0ACB, // 0ACB..0ACD; GUJARATI 5373 0x0ACE, // 0ACE..0ACF; UNKNOWN 5374 0x0AD0, // 0AD0 ; GUJARATI 5375 0x0AD1, // 0AD1..0ADF; UNKNOWN 5376 0x0AE0, // 0AE0..0AE3; GUJARATI 5377 0x0AE4, // 0AE4..0AE5; UNKNOWN 5378 0x0AE6, // 0AE6..0AF1; GUJARATI 5379 0x0AF2, // 0AF2..0AF8; UNKNOWN 5380 0x0AF9, // 0AF9..0AFF; GUJARATI 5381 0x0B00, // 0B00 ; UNKNOWN 5382 0x0B01, // 0B01..0B03; ORIYA 5383 0x0B04, // 0B04 ; UNKNOWN 5384 0x0B05, // 0B05..0B0C; ORIYA 5385 0x0B0D, // 0B0D..0B0E; UNKNOWN 5386 0x0B0F, // 0B0F..0B10; ORIYA 5387 0x0B11, // 0B11..0B12; UNKNOWN 5388 0x0B13, // 0B13..0B28; ORIYA 5389 0x0B29, // 0B29 ; UNKNOWN 5390 0x0B2A, // 0B2A..0B30; ORIYA 5391 0x0B31, // 0B31 ; UNKNOWN 5392 0x0B32, // 0B32..0B33; ORIYA 5393 0x0B34, // 0B34 ; UNKNOWN 5394 0x0B35, // 0B35..0B39; ORIYA 5395 0x0B3A, // 0B3A..0B3B; UNKNOWN 5396 0x0B3C, // 0B3C..0B44; ORIYA 5397 0x0B45, // 0B45..0B46; UNKNOWN 5398 0x0B47, // 0B47..0B48; ORIYA 5399 0x0B49, // 0B49..0B4A; UNKNOWN 5400 0x0B4B, // 0B4B..0B4D; ORIYA 5401 0x0B4E, // 0B4E..0B54; UNKNOWN 5402 0x0B55, // 0B55..0B57; ORIYA 5403 0x0B58, // 0B58..0B5B; UNKNOWN 5404 0x0B5C, // 0B5C..0B5D; ORIYA 5405 0x0B5E, // 0B5E ; UNKNOWN 5406 0x0B5F, // 0B5F..0B63; ORIYA 5407 0x0B64, // 0B64..0B65; UNKNOWN 5408 0x0B66, // 0B66..0B77; ORIYA 5409 0x0B78, // 0B78..0B81; UNKNOWN 5410 0x0B82, // 0B82..0B83; TAMIL 5411 0x0B84, // 0B84 ; UNKNOWN 5412 0x0B85, // 0B85..0B8A; TAMIL 5413 0x0B8B, // 0B8B..0B8D; UNKNOWN 5414 0x0B8E, // 0B8E..0B90; TAMIL 5415 0x0B91, // 0B91 ; UNKNOWN 5416 0x0B92, // 0B92..0B95; TAMIL 5417 0x0B96, // 0B96..0B98; UNKNOWN 5418 0x0B99, // 0B99..0B9A; TAMIL 5419 0x0B9B, // 0B9B ; UNKNOWN 5420 0x0B9C, // 0B9C ; TAMIL 5421 0x0B9D, // 0B9D ; UNKNOWN 5422 0x0B9E, // 0B9E..0B9F; TAMIL 5423 0x0BA0, // 0BA0..0BA2; UNKNOWN 5424 0x0BA3, // 0BA3..0BA4; TAMIL 5425 0x0BA5, // 0BA5..0BA7; UNKNOWN 5426 0x0BA8, // 0BA8..0BAA; TAMIL 5427 0x0BAB, // 0BAB..0BAD; UNKNOWN 5428 0x0BAE, // 0BAE..0BB9; TAMIL 5429 0x0BBA, // 0BBA..0BBD; UNKNOWN 5430 0x0BBE, // 0BBE..0BC2; TAMIL 5431 0x0BC3, // 0BC3..0BC5; UNKNOWN 5432 0x0BC6, // 0BC6..0BC8; TAMIL 5433 0x0BC9, // 0BC9 ; UNKNOWN 5434 0x0BCA, // 0BCA..0BCD; TAMIL 5435 0x0BCE, // 0BCE..0BCF; UNKNOWN 5436 0x0BD0, // 0BD0 ; TAMIL 5437 0x0BD1, // 0BD1..0BD6; UNKNOWN 5438 0x0BD7, // 0BD7 ; TAMIL 5439 0x0BD8, // 0BD8..0BE5; UNKNOWN 5440 0x0BE6, // 0BE6..0BFA; TAMIL 5441 0x0BFB, // 0BFB..0BFF; UNKNOWN 5442 0x0C00, // 0C00..0C0C; TELUGU 5443 0x0C0D, // 0C0D ; UNKNOWN 5444 0x0C0E, // 0C0E..0C10; TELUGU 5445 0x0C11, // 0C11 ; UNKNOWN 5446 0x0C12, // 0C12..0C28; TELUGU 5447 0x0C29, // 0C29 ; UNKNOWN 5448 0x0C2A, // 0C2A..0C39; TELUGU 5449 0x0C3A, // 0C3A..0C3C; UNKNOWN 5450 0x0C3D, // 0C3D..0C44; TELUGU 5451 0x0C45, // 0C45 ; UNKNOWN 5452 0x0C46, // 0C46..0C48; TELUGU 5453 0x0C49, // 0C49 ; UNKNOWN 5454 0x0C4A, // 0C4A..0C4D; TELUGU 5455 0x0C4E, // 0C4E..0C54; UNKNOWN 5456 0x0C55, // 0C55..0C56; TELUGU 5457 0x0C57, // 0C57 ; UNKNOWN 5458 0x0C58, // 0C58..0C5A; TELUGU 5459 0x0C5B, // 0C5B..0C5F; UNKNOWN 5460 0x0C60, // 0C60..0C63; TELUGU 5461 0x0C64, // 0C64..0C65; UNKNOWN 5462 0x0C66, // 0C66..0C6F; TELUGU 5463 0x0C70, // 0C70..0C76; UNKNOWN 5464 0x0C77, // 0C77..0C7F; TELUGU 5465 0x0C80, // 0C80..0C8C; KANNADA 5466 0x0C8D, // 0C8D ; UNKNOWN 5467 0x0C8E, // 0C8E..0C90; KANNADA 5468 0x0C91, // 0C91 ; UNKNOWN 5469 0x0C92, // 0C92..0CA8; KANNADA 5470 0x0CA9, // 0CA9 ; UNKNOWN 5471 0x0CAA, // 0CAA..0CB3; KANNADA 5472 0x0CB4, // 0CB4 ; UNKNOWN 5473 0x0CB5, // 0CB5..0CB9; KANNADA 5474 0x0CBA, // 0CBA..0CBB; UNKNOWN 5475 0x0CBC, // 0CBC..0CC4; KANNADA 5476 0x0CC5, // 0CC5 ; UNKNOWN 5477 0x0CC6, // 0CC6..0CC8; KANNADA 5478 0x0CC9, // 0CC9 ; UNKNOWN 5479 0x0CCA, // 0CCA..0CCD; KANNADA 5480 0x0CCE, // 0CCE..0CD4; UNKNOWN 5481 0x0CD5, // 0CD5..0CD6; KANNADA 5482 0x0CD7, // 0CD7..0CDD; UNKNOWN 5483 0x0CDE, // 0CDE ; KANNADA 5484 0x0CDF, // 0CDF ; UNKNOWN 5485 0x0CE0, // 0CE0..0CE3; KANNADA 5486 0x0CE4, // 0CE4..0CE5; UNKNOWN 5487 0x0CE6, // 0CE6..0CEF; KANNADA 5488 0x0CF0, // 0CF0 ; UNKNOWN 5489 0x0CF1, // 0CF1..0CF2; KANNADA 5490 0x0CF3, // 0CF3..0CFF; UNKNOWN 5491 0x0D00, // 0D00..0D0C; MALAYALAM 5492 0x0D0D, // 0D0D ; UNKNOWN 5493 0x0D0E, // 0D0E..0D10; MALAYALAM 5494 0x0D11, // 0D11 ; UNKNOWN 5495 0x0D12, // 0D12..0D44; MALAYALAM 5496 0x0D45, // 0D45 ; UNKNOWN 5497 0x0D46, // 0D46..0D48; MALAYALAM 5498 0x0D49, // 0D49 ; UNKNOWN 5499 0x0D4A, // 0D4A..0D4F; MALAYALAM 5500 0x0D50, // 0D50..0D53; UNKNOWN 5501 0x0D54, // 0D54..0D63; MALAYALAM 5502 0x0D64, // 0D64..0D65; UNKNOWN 5503 0x0D66, // 0D66..0D7F; MALAYALAM 5504 0x0D80, // 0D80 ; UNKNOWN 5505 0x0D81, // 0D81..0D83; SINHALA 5506 0x0D84, // 0D84 ; UNKNOWN 5507 0x0D85, // 0D85..0D96; SINHALA 5508 0x0D97, // 0D97..0D99; UNKNOWN 5509 0x0D9A, // 0D9A..0DB1; SINHALA 5510 0x0DB2, // 0DB2 ; UNKNOWN 5511 0x0DB3, // 0DB3..0DBB; SINHALA 5512 0x0DBC, // 0DBC ; UNKNOWN 5513 0x0DBD, // 0DBD ; SINHALA 5514 0x0DBE, // 0DBE..0DBF; UNKNOWN 5515 0x0DC0, // 0DC0..0DC6; SINHALA 5516 0x0DC7, // 0DC7..0DC9; UNKNOWN 5517 0x0DCA, // 0DCA ; SINHALA 5518 0x0DCB, // 0DCB..0DCE; UNKNOWN 5519 0x0DCF, // 0DCF..0DD4; SINHALA 5520 0x0DD5, // 0DD5 ; UNKNOWN 5521 0x0DD6, // 0DD6 ; SINHALA 5522 0x0DD7, // 0DD7 ; UNKNOWN 5523 0x0DD8, // 0DD8..0DDF; SINHALA 5524 0x0DE0, // 0DE0..0DE5; UNKNOWN 5525 0x0DE6, // 0DE6..0DEF; SINHALA 5526 0x0DF0, // 0DF0..0DF1; UNKNOWN 5527 0x0DF2, // 0DF2..0DF4; SINHALA 5528 0x0DF5, // 0DF5..0E00; UNKNOWN 5529 0x0E01, // 0E01..0E3A; THAI 5530 0x0E3B, // 0E3B..0E3E; UNKNOWN 5531 0x0E3F, // 0E3F ; COMMON 5532 0x0E40, // 0E40..0E5B; THAI 5533 0x0E5C, // 0E5C..0E80; UNKNOWN 5534 0x0E81, // 0E81..0E82; LAO 5535 0x0E83, // 0E83 ; UNKNOWN 5536 0x0E84, // 0E84 ; LAO 5537 0x0E85, // 0E85 ; UNKNOWN 5538 0x0E86, // 0E86..0E8A; LAO 5539 0x0E8B, // 0E8B ; UNKNOWN 5540 0x0E8C, // 0E8C..0EA3; LAO 5541 0x0EA4, // 0EA4 ; UNKNOWN 5542 0x0EA5, // 0EA5 ; LAO 5543 0x0EA6, // 0EA6 ; UNKNOWN 5544 0x0EA7, // 0EA7..0EBD; LAO 5545 0x0EBE, // 0EBE..0EBF; UNKNOWN 5546 0x0EC0, // 0EC0..0EC4; LAO 5547 0x0EC5, // 0EC5 ; UNKNOWN 5548 0x0EC6, // 0EC6 ; LAO 5549 0x0EC7, // 0EC7 ; UNKNOWN 5550 0x0EC8, // 0EC8..0ECD; LAO 5551 0x0ECE, // 0ECE..0ECF; UNKNOWN 5552 0x0ED0, // 0ED0..0ED9; LAO 5553 0x0EDA, // 0EDA..0EDB; UNKNOWN 5554 0x0EDC, // 0EDC..0EDF; LAO 5555 0x0EE0, // 0EE0..0EFF; UNKNOWN 5556 0x0F00, // 0F00..0F47; TIBETAN 5557 0x0F48, // 0F48 ; UNKNOWN 5558 0x0F49, // 0F49..0F6C; TIBETAN 5559 0x0F6D, // 0F6D..0F70; UNKNOWN 5560 0x0F71, // 0F71..0F97; TIBETAN 5561 0x0F98, // 0F98 ; UNKNOWN 5562 0x0F99, // 0F99..0FBC; TIBETAN 5563 0x0FBD, // 0FBD ; UNKNOWN 5564 0x0FBE, // 0FBE..0FCC; TIBETAN 5565 0x0FCD, // 0FCD ; UNKNOWN 5566 0x0FCE, // 0FCE..0FD4; TIBETAN 5567 0x0FD5, // 0FD5..0FD8; COMMON 5568 0x0FD9, // 0FD9..0FDA; TIBETAN 5569 0x0FDB, // 0FDB..0FFF; UNKNOWN 5570 0x1000, // 1000..109F; MYANMAR 5571 0x10A0, // 10A0..10C5; GEORGIAN 5572 0x10C6, // 10C6 ; UNKNOWN 5573 0x10C7, // 10C7 ; GEORGIAN 5574 0x10C8, // 10C8..10CC; UNKNOWN 5575 0x10CD, // 10CD ; GEORGIAN 5576 0x10CE, // 10CE..10CF; UNKNOWN 5577 0x10D0, // 10D0..10FA; GEORGIAN 5578 0x10FB, // 10FB ; COMMON 5579 0x10FC, // 10FC..10FF; GEORGIAN 5580 0x1100, // 1100..11FF; HANGUL 5581 0x1200, // 1200..1248; ETHIOPIC 5582 0x1249, // 1249 ; UNKNOWN 5583 0x124A, // 124A..124D; ETHIOPIC 5584 0x124E, // 124E..124F; UNKNOWN 5585 0x1250, // 1250..1256; ETHIOPIC 5586 0x1257, // 1257 ; UNKNOWN 5587 0x1258, // 1258 ; ETHIOPIC 5588 0x1259, // 1259 ; UNKNOWN 5589 0x125A, // 125A..125D; ETHIOPIC 5590 0x125E, // 125E..125F; UNKNOWN 5591 0x1260, // 1260..1288; ETHIOPIC 5592 0x1289, // 1289 ; UNKNOWN 5593 0x128A, // 128A..128D; ETHIOPIC 5594 0x128E, // 128E..128F; UNKNOWN 5595 0x1290, // 1290..12B0; ETHIOPIC 5596 0x12B1, // 12B1 ; UNKNOWN 5597 0x12B2, // 12B2..12B5; ETHIOPIC 5598 0x12B6, // 12B6..12B7; UNKNOWN 5599 0x12B8, // 12B8..12BE; ETHIOPIC 5600 0x12BF, // 12BF ; UNKNOWN 5601 0x12C0, // 12C0 ; ETHIOPIC 5602 0x12C1, // 12C1 ; UNKNOWN 5603 0x12C2, // 12C2..12C5; ETHIOPIC 5604 0x12C6, // 12C6..12C7; UNKNOWN 5605 0x12C8, // 12C8..12D6; ETHIOPIC 5606 0x12D7, // 12D7 ; UNKNOWN 5607 0x12D8, // 12D8..1310; ETHIOPIC 5608 0x1311, // 1311 ; UNKNOWN 5609 0x1312, // 1312..1315; ETHIOPIC 5610 0x1316, // 1316..1317; UNKNOWN 5611 0x1318, // 1318..135A; ETHIOPIC 5612 0x135B, // 135B..135C; UNKNOWN 5613 0x135D, // 135D..137C; ETHIOPIC 5614 0x137D, // 137D..137F; UNKNOWN 5615 0x1380, // 1380..1399; ETHIOPIC 5616 0x139A, // 139A..139F; UNKNOWN 5617 0x13A0, // 13A0..13F5; CHEROKEE 5618 0x13F6, // 13F6..13F7; UNKNOWN 5619 0x13F8, // 13F8..13FD; CHEROKEE 5620 0x13FE, // 13FE..13FF; UNKNOWN 5621 0x1400, // 1400..167F; CANADIAN_ABORIGINAL 5622 0x1680, // 1680..169C; OGHAM 5623 0x169D, // 169D..169F; UNKNOWN 5624 0x16A0, // 16A0..16EA; RUNIC 5625 0x16EB, // 16EB..16ED; COMMON 5626 0x16EE, // 16EE..16F8; RUNIC 5627 0x16F9, // 16F9..16FF; UNKNOWN 5628 0x1700, // 1700..170C; TAGALOG 5629 0x170D, // 170D ; UNKNOWN 5630 0x170E, // 170E..1714; TAGALOG 5631 0x1715, // 1715..171F; UNKNOWN 5632 0x1720, // 1720..1734; HANUNOO 5633 0x1735, // 1735..1736; COMMON 5634 0x1737, // 1737..173F; UNKNOWN 5635 0x1740, // 1740..1753; BUHID 5636 0x1754, // 1754..175F; UNKNOWN 5637 0x1760, // 1760..176C; TAGBANWA 5638 0x176D, // 176D ; UNKNOWN 5639 0x176E, // 176E..1770; TAGBANWA 5640 0x1771, // 1771 ; UNKNOWN 5641 0x1772, // 1772..1773; TAGBANWA 5642 0x1774, // 1774..177F; UNKNOWN 5643 0x1780, // 1780..17DD; KHMER 5644 0x17DE, // 17DE..17DF; UNKNOWN 5645 0x17E0, // 17E0..17E9; KHMER 5646 0x17EA, // 17EA..17EF; UNKNOWN 5647 0x17F0, // 17F0..17F9; KHMER 5648 0x17FA, // 17FA..17FF; UNKNOWN 5649 0x1800, // 1800..1801; MONGOLIAN 5650 0x1802, // 1802..1803; COMMON 5651 0x1804, // 1804 ; MONGOLIAN 5652 0x1805, // 1805 ; COMMON 5653 0x1806, // 1806..180E; MONGOLIAN 5654 0x180F, // 180F ; UNKNOWN 5655 0x1810, // 1810..1819; MONGOLIAN 5656 0x181A, // 181A..181F; UNKNOWN 5657 0x1820, // 1820..1878; MONGOLIAN 5658 0x1879, // 1879..187F; UNKNOWN 5659 0x1880, // 1880..18AA; MONGOLIAN 5660 0x18AB, // 18AB..18AF; UNKNOWN 5661 0x18B0, // 18B0..18F5; CANADIAN_ABORIGINAL 5662 0x18F6, // 18F6..18FF; UNKNOWN 5663 0x1900, // 1900..191E; LIMBU 5664 0x191F, // 191F ; UNKNOWN 5665 0x1920, // 1920..192B; LIMBU 5666 0x192C, // 192C..192F; UNKNOWN 5667 0x1930, // 1930..193B; LIMBU 5668 0x193C, // 193C..193F; UNKNOWN 5669 0x1940, // 1940 ; LIMBU 5670 0x1941, // 1941..1943; UNKNOWN 5671 0x1944, // 1944..194F; LIMBU 5672 0x1950, // 1950..196D; TAI_LE 5673 0x196E, // 196E..196F; UNKNOWN 5674 0x1970, // 1970..1974; TAI_LE 5675 0x1975, // 1975..197F; UNKNOWN 5676 0x1980, // 1980..19AB; NEW_TAI_LUE 5677 0x19AC, // 19AC..19AF; UNKNOWN 5678 0x19B0, // 19B0..19C9; NEW_TAI_LUE 5679 0x19CA, // 19CA..19CF; UNKNOWN 5680 0x19D0, // 19D0..19DA; NEW_TAI_LUE 5681 0x19DB, // 19DB..19DD; UNKNOWN 5682 0x19DE, // 19DE..19DF; NEW_TAI_LUE 5683 0x19E0, // 19E0..19FF; KHMER 5684 0x1A00, // 1A00..1A1B; BUGINESE 5685 0x1A1C, // 1A1C..1A1D; UNKNOWN 5686 0x1A1E, // 1A1E..1A1F; BUGINESE 5687 0x1A20, // 1A20..1A5E; TAI_THAM 5688 0x1A5F, // 1A5F ; UNKNOWN 5689 0x1A60, // 1A60..1A7C; TAI_THAM 5690 0x1A7D, // 1A7D..1A7E; UNKNOWN 5691 0x1A7F, // 1A7F..1A89; TAI_THAM 5692 0x1A8A, // 1A8A..1A8F; UNKNOWN 5693 0x1A90, // 1A90..1A99; TAI_THAM 5694 0x1A9A, // 1A9A..1A9F; UNKNOWN 5695 0x1AA0, // 1AA0..1AAD; TAI_THAM 5696 0x1AAE, // 1AAE..1AAF; UNKNOWN 5697 0x1AB0, // 1AB0..1AC0; INHERITED 5698 0x1AC1, // 1AC1..1AFF; UNKNOWN 5699 0x1B00, // 1B00..1B4B; BALINESE 5700 0x1B4C, // 1B4C..1B4F; UNKNOWN 5701 0x1B50, // 1B50..1B7C; BALINESE 5702 0x1B7D, // 1B7D..1B7F; UNKNOWN 5703 0x1B80, // 1B80..1BBF; SUNDANESE 5704 0x1BC0, // 1BC0..1BF3; BATAK 5705 0x1BF4, // 1BF4..1BFB; UNKNOWN 5706 0x1BFC, // 1BFC..1BFF; BATAK 5707 0x1C00, // 1C00..1C37; LEPCHA 5708 0x1C38, // 1C38..1C3A; UNKNOWN 5709 0x1C3B, // 1C3B..1C49; LEPCHA 5710 0x1C4A, // 1C4A..1C4C; UNKNOWN 5711 0x1C4D, // 1C4D..1C4F; LEPCHA 5712 0x1C50, // 1C50..1C7F; OL_CHIKI 5713 0x1C80, // 1C80..1C88; CYRILLIC 5714 0x1C89, // 1C89..1C8F; UNKNOWN 5715 0x1C90, // 1C90..1CBA; GEORGIAN 5716 0x1CBB, // 1CBB..1CBC; UNKNOWN 5717 0x1CBD, // 1CBD..1CBF; GEORGIAN 5718 0x1CC0, // 1CC0..1CC7; SUNDANESE 5719 0x1CC8, // 1CC8..1CCF; UNKNOWN 5720 0x1CD0, // 1CD0..1CD2; INHERITED 5721 0x1CD3, // 1CD3 ; COMMON 5722 0x1CD4, // 1CD4..1CE0; INHERITED 5723 0x1CE1, // 1CE1 ; COMMON 5724 0x1CE2, // 1CE2..1CE8; INHERITED 5725 0x1CE9, // 1CE9..1CEC; COMMON 5726 0x1CED, // 1CED ; INHERITED 5727 0x1CEE, // 1CEE..1CF3; COMMON 5728 0x1CF4, // 1CF4 ; INHERITED 5729 0x1CF5, // 1CF5..1CF7; COMMON 5730 0x1CF8, // 1CF8..1CF9; INHERITED 5731 0x1CFA, // 1CFA ; COMMON 5732 0x1CFB, // 1CFB..1CFF; UNKNOWN 5733 0x1D00, // 1D00..1D25; LATIN 5734 0x1D26, // 1D26..1D2A; GREEK 5735 0x1D2B, // 1D2B ; CYRILLIC 5736 0x1D2C, // 1D2C..1D5C; LATIN 5737 0x1D5D, // 1D5D..1D61; GREEK 5738 0x1D62, // 1D62..1D65; LATIN 5739 0x1D66, // 1D66..1D6A; GREEK 5740 0x1D6B, // 1D6B..1D77; LATIN 5741 0x1D78, // 1D78 ; CYRILLIC 5742 0x1D79, // 1D79..1DBE; LATIN 5743 0x1DBF, // 1DBF ; GREEK 5744 0x1DC0, // 1DC0..1DF9; INHERITED 5745 0x1DFA, // 1DFA ; UNKNOWN 5746 0x1DFB, // 1DFB..1DFF; INHERITED 5747 0x1E00, // 1E00..1EFF; LATIN 5748 0x1F00, // 1F00..1F15; GREEK 5749 0x1F16, // 1F16..1F17; UNKNOWN 5750 0x1F18, // 1F18..1F1D; GREEK 5751 0x1F1E, // 1F1E..1F1F; UNKNOWN 5752 0x1F20, // 1F20..1F45; GREEK 5753 0x1F46, // 1F46..1F47; UNKNOWN 5754 0x1F48, // 1F48..1F4D; GREEK 5755 0x1F4E, // 1F4E..1F4F; UNKNOWN 5756 0x1F50, // 1F50..1F57; GREEK 5757 0x1F58, // 1F58 ; UNKNOWN 5758 0x1F59, // 1F59 ; GREEK 5759 0x1F5A, // 1F5A ; UNKNOWN 5760 0x1F5B, // 1F5B ; GREEK 5761 0x1F5C, // 1F5C ; UNKNOWN 5762 0x1F5D, // 1F5D ; GREEK 5763 0x1F5E, // 1F5E ; UNKNOWN 5764 0x1F5F, // 1F5F..1F7D; GREEK 5765 0x1F7E, // 1F7E..1F7F; UNKNOWN 5766 0x1F80, // 1F80..1FB4; GREEK 5767 0x1FB5, // 1FB5 ; UNKNOWN 5768 0x1FB6, // 1FB6..1FC4; GREEK 5769 0x1FC5, // 1FC5 ; UNKNOWN 5770 0x1FC6, // 1FC6..1FD3; GREEK 5771 0x1FD4, // 1FD4..1FD5; UNKNOWN 5772 0x1FD6, // 1FD6..1FDB; GREEK 5773 0x1FDC, // 1FDC ; UNKNOWN 5774 0x1FDD, // 1FDD..1FEF; GREEK 5775 0x1FF0, // 1FF0..1FF1; UNKNOWN 5776 0x1FF2, // 1FF2..1FF4; GREEK 5777 0x1FF5, // 1FF5 ; UNKNOWN 5778 0x1FF6, // 1FF6..1FFE; GREEK 5779 0x1FFF, // 1FFF ; UNKNOWN 5780 0x2000, // 2000..200B; COMMON 5781 0x200C, // 200C..200D; INHERITED 5782 0x200E, // 200E..2064; COMMON 5783 0x2065, // 2065 ; UNKNOWN 5784 0x2066, // 2066..2070; COMMON 5785 0x2071, // 2071 ; LATIN 5786 0x2072, // 2072..2073; UNKNOWN 5787 0x2074, // 2074..207E; COMMON 5788 0x207F, // 207F ; LATIN 5789 0x2080, // 2080..208E; COMMON 5790 0x208F, // 208F ; UNKNOWN 5791 0x2090, // 2090..209C; LATIN 5792 0x209D, // 209D..209F; UNKNOWN 5793 0x20A0, // 20A0..20BF; COMMON 5794 0x20C0, // 20C0..20CF; UNKNOWN 5795 0x20D0, // 20D0..20F0; INHERITED 5796 0x20F1, // 20F1..20FF; UNKNOWN 5797 0x2100, // 2100..2125; COMMON 5798 0x2126, // 2126 ; GREEK 5799 0x2127, // 2127..2129; COMMON 5800 0x212A, // 212A..212B; LATIN 5801 0x212C, // 212C..2131; COMMON 5802 0x2132, // 2132 ; LATIN 5803 0x2133, // 2133..214D; COMMON 5804 0x214E, // 214E ; LATIN 5805 0x214F, // 214F..215F; COMMON 5806 0x2160, // 2160..2188; LATIN 5807 0x2189, // 2189..218B; COMMON 5808 0x218C, // 218C..218F; UNKNOWN 5809 0x2190, // 2190..2426; COMMON 5810 0x2427, // 2427..243F; UNKNOWN 5811 0x2440, // 2440..244A; COMMON 5812 0x244B, // 244B..245F; UNKNOWN 5813 0x2460, // 2460..27FF; COMMON 5814 0x2800, // 2800..28FF; BRAILLE 5815 0x2900, // 2900..2B73; COMMON 5816 0x2B74, // 2B74..2B75; UNKNOWN 5817 0x2B76, // 2B76..2B95; COMMON 5818 0x2B96, // 2B96 ; UNKNOWN 5819 0x2B97, // 2B97..2BFF; COMMON 5820 0x2C00, // 2C00..2C2E; GLAGOLITIC 5821 0x2C2F, // 2C2F ; UNKNOWN 5822 0x2C30, // 2C30..2C5E; GLAGOLITIC 5823 0x2C5F, // 2C5F ; UNKNOWN 5824 0x2C60, // 2C60..2C7F; LATIN 5825 0x2C80, // 2C80..2CF3; COPTIC 5826 0x2CF4, // 2CF4..2CF8; UNKNOWN 5827 0x2CF9, // 2CF9..2CFF; COPTIC 5828 0x2D00, // 2D00..2D25; GEORGIAN 5829 0x2D26, // 2D26 ; UNKNOWN 5830 0x2D27, // 2D27 ; GEORGIAN 5831 0x2D28, // 2D28..2D2C; UNKNOWN 5832 0x2D2D, // 2D2D ; GEORGIAN 5833 0x2D2E, // 2D2E..2D2F; UNKNOWN 5834 0x2D30, // 2D30..2D67; TIFINAGH 5835 0x2D68, // 2D68..2D6E; UNKNOWN 5836 0x2D6F, // 2D6F..2D70; TIFINAGH 5837 0x2D71, // 2D71..2D7E; UNKNOWN 5838 0x2D7F, // 2D7F ; TIFINAGH 5839 0x2D80, // 2D80..2D96; ETHIOPIC 5840 0x2D97, // 2D97..2D9F; UNKNOWN 5841 0x2DA0, // 2DA0..2DA6; ETHIOPIC 5842 0x2DA7, // 2DA7 ; UNKNOWN 5843 0x2DA8, // 2DA8..2DAE; ETHIOPIC 5844 0x2DAF, // 2DAF ; UNKNOWN 5845 0x2DB0, // 2DB0..2DB6; ETHIOPIC 5846 0x2DB7, // 2DB7 ; UNKNOWN 5847 0x2DB8, // 2DB8..2DBE; ETHIOPIC 5848 0x2DBF, // 2DBF ; UNKNOWN 5849 0x2DC0, // 2DC0..2DC6; ETHIOPIC 5850 0x2DC7, // 2DC7 ; UNKNOWN 5851 0x2DC8, // 2DC8..2DCE; ETHIOPIC 5852 0x2DCF, // 2DCF ; UNKNOWN 5853 0x2DD0, // 2DD0..2DD6; ETHIOPIC 5854 0x2DD7, // 2DD7 ; UNKNOWN 5855 0x2DD8, // 2DD8..2DDE; ETHIOPIC 5856 0x2DDF, // 2DDF ; UNKNOWN 5857 0x2DE0, // 2DE0..2DFF; CYRILLIC 5858 0x2E00, // 2E00..2E52; COMMON 5859 0x2E53, // 2E53..2E7F; UNKNOWN 5860 0x2E80, // 2E80..2E99; HAN 5861 0x2E9A, // 2E9A ; UNKNOWN 5862 0x2E9B, // 2E9B..2EF3; HAN 5863 0x2EF4, // 2EF4..2EFF; UNKNOWN 5864 0x2F00, // 2F00..2FD5; HAN 5865 0x2FD6, // 2FD6..2FEF; UNKNOWN 5866 0x2FF0, // 2FF0..2FFB; COMMON 5867 0x2FFC, // 2FFC..2FFF; UNKNOWN 5868 0x3000, // 3000..3004; COMMON 5869 0x3005, // 3005 ; HAN 5870 0x3006, // 3006 ; COMMON 5871 0x3007, // 3007 ; HAN 5872 0x3008, // 3008..3020; COMMON 5873 0x3021, // 3021..3029; HAN 5874 0x302A, // 302A..302D; INHERITED 5875 0x302E, // 302E..302F; HANGUL 5876 0x3030, // 3030..3037; COMMON 5877 0x3038, // 3038..303B; HAN 5878 0x303C, // 303C..303F; COMMON 5879 0x3040, // 3040 ; UNKNOWN 5880 0x3041, // 3041..3096; HIRAGANA 5881 0x3097, // 3097..3098; UNKNOWN 5882 0x3099, // 3099..309A; INHERITED 5883 0x309B, // 309B..309C; COMMON 5884 0x309D, // 309D..309F; HIRAGANA 5885 0x30A0, // 30A0 ; COMMON 5886 0x30A1, // 30A1..30FA; KATAKANA 5887 0x30FB, // 30FB..30FC; COMMON 5888 0x30FD, // 30FD..30FF; KATAKANA 5889 0x3100, // 3100..3104; UNKNOWN 5890 0x3105, // 3105..312F; BOPOMOFO 5891 0x3130, // 3130 ; UNKNOWN 5892 0x3131, // 3131..318E; HANGUL 5893 0x318F, // 318F ; UNKNOWN 5894 0x3190, // 3190..319F; COMMON 5895 0x31A0, // 31A0..31BF; BOPOMOFO 5896 0x31C0, // 31C0..31E3; COMMON 5897 0x31E4, // 31E4..31EF; UNKNOWN 5898 0x31F0, // 31F0..31FF; KATAKANA 5899 0x3200, // 3200..321E; HANGUL 5900 0x321F, // 321F ; UNKNOWN 5901 0x3220, // 3220..325F; COMMON 5902 0x3260, // 3260..327E; HANGUL 5903 0x327F, // 327F..32CF; COMMON 5904 0x32D0, // 32D0..32FE; KATAKANA 5905 0x32FF, // 32FF ; COMMON 5906 0x3300, // 3300..3357; KATAKANA 5907 0x3358, // 3358..33FF; COMMON 5908 0x3400, // 3400..4DBF; HAN 5909 0x4DC0, // 4DC0..4DFF; COMMON 5910 0x4E00, // 4E00..9FFC; HAN 5911 0x9FFD, // 9FFD..9FFF; UNKNOWN 5912 0xA000, // A000..A48C; YI 5913 0xA48D, // A48D..A48F; UNKNOWN 5914 0xA490, // A490..A4C6; YI 5915 0xA4C7, // A4C7..A4CF; UNKNOWN 5916 0xA4D0, // A4D0..A4FF; LISU 5917 0xA500, // A500..A62B; VAI 5918 0xA62C, // A62C..A63F; UNKNOWN 5919 0xA640, // A640..A69F; CYRILLIC 5920 0xA6A0, // A6A0..A6F7; BAMUM 5921 0xA6F8, // A6F8..A6FF; UNKNOWN 5922 0xA700, // A700..A721; COMMON 5923 0xA722, // A722..A787; LATIN 5924 0xA788, // A788..A78A; COMMON 5925 0xA78B, // A78B..A7BF; LATIN 5926 0xA7C0, // A7C0..A7C1; UNKNOWN 5927 0xA7C2, // A7C2..A7CA; LATIN 5928 0xA7CB, // A7CB..A7F4; UNKNOWN 5929 0xA7F5, // A7F5..A7FF; LATIN 5930 0xA800, // A800..A82C; SYLOTI_NAGRI 5931 0xA82D, // A82D..A82F; UNKNOWN 5932 0xA830, // A830..A839; COMMON 5933 0xA83A, // A83A..A83F; UNKNOWN 5934 0xA840, // A840..A877; PHAGS_PA 5935 0xA878, // A878..A87F; UNKNOWN 5936 0xA880, // A880..A8C5; SAURASHTRA 5937 0xA8C6, // A8C6..A8CD; UNKNOWN 5938 0xA8CE, // A8CE..A8D9; SAURASHTRA 5939 0xA8DA, // A8DA..A8DF; UNKNOWN 5940 0xA8E0, // A8E0..A8FF; DEVANAGARI 5941 0xA900, // A900..A92D; KAYAH_LI 5942 0xA92E, // A92E ; COMMON 5943 0xA92F, // A92F ; KAYAH_LI 5944 0xA930, // A930..A953; REJANG 5945 0xA954, // A954..A95E; UNKNOWN 5946 0xA95F, // A95F ; REJANG 5947 0xA960, // A960..A97C; HANGUL 5948 0xA97D, // A97D..A97F; UNKNOWN 5949 0xA980, // A980..A9CD; JAVANESE 5950 0xA9CE, // A9CE ; UNKNOWN 5951 0xA9CF, // A9CF ; COMMON 5952 0xA9D0, // A9D0..A9D9; JAVANESE 5953 0xA9DA, // A9DA..A9DD; UNKNOWN 5954 0xA9DE, // A9DE..A9DF; JAVANESE 5955 0xA9E0, // A9E0..A9FE; MYANMAR 5956 0xA9FF, // A9FF ; UNKNOWN 5957 0xAA00, // AA00..AA36; CHAM 5958 0xAA37, // AA37..AA3F; UNKNOWN 5959 0xAA40, // AA40..AA4D; CHAM 5960 0xAA4E, // AA4E..AA4F; UNKNOWN 5961 0xAA50, // AA50..AA59; CHAM 5962 0xAA5A, // AA5A..AA5B; UNKNOWN 5963 0xAA5C, // AA5C..AA5F; CHAM 5964 0xAA60, // AA60..AA7F; MYANMAR 5965 0xAA80, // AA80..AAC2; TAI_VIET 5966 0xAAC3, // AAC3..AADA; UNKNOWN 5967 0xAADB, // AADB..AADF; TAI_VIET 5968 0xAAE0, // AAE0..AAF6; MEETEI_MAYEK 5969 0xAAF7, // AAF7..AB00; UNKNOWN 5970 0xAB01, // AB01..AB06; ETHIOPIC 5971 0xAB07, // AB07..AB08; UNKNOWN 5972 0xAB09, // AB09..AB0E; ETHIOPIC 5973 0xAB0F, // AB0F..AB10; UNKNOWN 5974 0xAB11, // AB11..AB16; ETHIOPIC 5975 0xAB17, // AB17..AB1F; UNKNOWN 5976 0xAB20, // AB20..AB26; ETHIOPIC 5977 0xAB27, // AB27 ; UNKNOWN 5978 0xAB28, // AB28..AB2E; ETHIOPIC 5979 0xAB2F, // AB2F ; UNKNOWN 5980 0xAB30, // AB30..AB5A; LATIN 5981 0xAB5B, // AB5B ; COMMON 5982 0xAB5C, // AB5C..AB64; LATIN 5983 0xAB65, // AB65 ; GREEK 5984 0xAB66, // AB66..AB69; LATIN 5985 0xAB6A, // AB6A..AB6B; COMMON 5986 0xAB6C, // AB6C..AB6F; UNKNOWN 5987 0xAB70, // AB70..ABBF; CHEROKEE 5988 0xABC0, // ABC0..ABED; MEETEI_MAYEK 5989 0xABEE, // ABEE..ABEF; UNKNOWN 5990 0xABF0, // ABF0..ABF9; MEETEI_MAYEK 5991 0xABFA, // ABFA..ABFF; UNKNOWN 5992 0xAC00, // AC00..D7A3; HANGUL 5993 0xD7A4, // D7A4..D7AF; UNKNOWN 5994 0xD7B0, // D7B0..D7C6; HANGUL 5995 0xD7C7, // D7C7..D7CA; UNKNOWN 5996 0xD7CB, // D7CB..D7FB; HANGUL 5997 0xD7FC, // D7FC..F8FF; UNKNOWN 5998 0xF900, // F900..FA6D; HAN 5999 0xFA6E, // FA6E..FA6F; UNKNOWN 6000 0xFA70, // FA70..FAD9; HAN 6001 0xFADA, // FADA..FAFF; UNKNOWN 6002 0xFB00, // FB00..FB06; LATIN 6003 0xFB07, // FB07..FB12; UNKNOWN 6004 0xFB13, // FB13..FB17; ARMENIAN 6005 0xFB18, // FB18..FB1C; UNKNOWN 6006 0xFB1D, // FB1D..FB36; HEBREW 6007 0xFB37, // FB37 ; UNKNOWN 6008 0xFB38, // FB38..FB3C; HEBREW 6009 0xFB3D, // FB3D ; UNKNOWN 6010 0xFB3E, // FB3E ; HEBREW 6011 0xFB3F, // FB3F ; UNKNOWN 6012 0xFB40, // FB40..FB41; HEBREW 6013 0xFB42, // FB42 ; UNKNOWN 6014 0xFB43, // FB43..FB44; HEBREW 6015 0xFB45, // FB45 ; UNKNOWN 6016 0xFB46, // FB46..FB4F; HEBREW 6017 0xFB50, // FB50..FBC1; ARABIC 6018 0xFBC2, // FBC2..FBD2; UNKNOWN 6019 0xFBD3, // FBD3..FD3D; ARABIC 6020 0xFD3E, // FD3E..FD3F; COMMON 6021 0xFD40, // FD40..FD4F; UNKNOWN 6022 0xFD50, // FD50..FD8F; ARABIC 6023 0xFD90, // FD90..FD91; UNKNOWN 6024 0xFD92, // FD92..FDC7; ARABIC 6025 0xFDC8, // FDC8..FDEF; UNKNOWN 6026 0xFDF0, // FDF0..FDFD; ARABIC 6027 0xFDFE, // FDFE..FDFF; UNKNOWN 6028 0xFE00, // FE00..FE0F; INHERITED 6029 0xFE10, // FE10..FE19; COMMON 6030 0xFE1A, // FE1A..FE1F; UNKNOWN 6031 0xFE20, // FE20..FE2D; INHERITED 6032 0xFE2E, // FE2E..FE2F; CYRILLIC 6033 0xFE30, // FE30..FE52; COMMON 6034 0xFE53, // FE53 ; UNKNOWN 6035 0xFE54, // FE54..FE66; COMMON 6036 0xFE67, // FE67 ; UNKNOWN 6037 0xFE68, // FE68..FE6B; COMMON 6038 0xFE6C, // FE6C..FE6F; UNKNOWN 6039 0xFE70, // FE70..FE74; ARABIC 6040 0xFE75, // FE75 ; UNKNOWN 6041 0xFE76, // FE76..FEFC; ARABIC 6042 0xFEFD, // FEFD..FEFE; UNKNOWN 6043 0xFEFF, // FEFF ; COMMON 6044 0xFF00, // FF00 ; UNKNOWN 6045 0xFF01, // FF01..FF20; COMMON 6046 0xFF21, // FF21..FF3A; LATIN 6047 0xFF3B, // FF3B..FF40; COMMON 6048 0xFF41, // FF41..FF5A; LATIN 6049 0xFF5B, // FF5B..FF65; COMMON 6050 0xFF66, // FF66..FF6F; KATAKANA 6051 0xFF70, // FF70 ; COMMON 6052 0xFF71, // FF71..FF9D; KATAKANA 6053 0xFF9E, // FF9E..FF9F; COMMON 6054 0xFFA0, // FFA0..FFBE; HANGUL 6055 0xFFBF, // FFBF..FFC1; UNKNOWN 6056 0xFFC2, // FFC2..FFC7; HANGUL 6057 0xFFC8, // FFC8..FFC9; UNKNOWN 6058 0xFFCA, // FFCA..FFCF; HANGUL 6059 0xFFD0, // FFD0..FFD1; UNKNOWN 6060 0xFFD2, // FFD2..FFD7; HANGUL 6061 0xFFD8, // FFD8..FFD9; UNKNOWN 6062 0xFFDA, // FFDA..FFDC; HANGUL 6063 0xFFDD, // FFDD..FFDF; UNKNOWN 6064 0xFFE0, // FFE0..FFE6; COMMON 6065 0xFFE7, // FFE7 ; UNKNOWN 6066 0xFFE8, // FFE8..FFEE; COMMON 6067 0xFFEF, // FFEF..FFF8; UNKNOWN 6068 0xFFF9, // FFF9..FFFD; COMMON 6069 0xFFFE, // FFFE..FFFF; UNKNOWN 6070 0x10000, // 10000..1000B; LINEAR_B 6071 0x1000C, // 1000C ; UNKNOWN 6072 0x1000D, // 1000D..10026; LINEAR_B 6073 0x10027, // 10027 ; UNKNOWN 6074 0x10028, // 10028..1003A; LINEAR_B 6075 0x1003B, // 1003B ; UNKNOWN 6076 0x1003C, // 1003C..1003D; LINEAR_B 6077 0x1003E, // 1003E ; UNKNOWN 6078 0x1003F, // 1003F..1004D; LINEAR_B 6079 0x1004E, // 1004E..1004F; UNKNOWN 6080 0x10050, // 10050..1005D; LINEAR_B 6081 0x1005E, // 1005E..1007F; UNKNOWN 6082 0x10080, // 10080..100FA; LINEAR_B 6083 0x100FB, // 100FB..100FF; UNKNOWN 6084 0x10100, // 10100..10102; COMMON 6085 0x10103, // 10103..10106; UNKNOWN 6086 0x10107, // 10107..10133; COMMON 6087 0x10134, // 10134..10136; UNKNOWN 6088 0x10137, // 10137..1013F; COMMON 6089 0x10140, // 10140..1018E; GREEK 6090 0x1018F, // 1018F ; UNKNOWN 6091 0x10190, // 10190..1019C; COMMON 6092 0x1019D, // 1019D..1019F; UNKNOWN 6093 0x101A0, // 101A0 ; GREEK 6094 0x101A1, // 101A1..101CF; UNKNOWN 6095 0x101D0, // 101D0..101FC; COMMON 6096 0x101FD, // 101FD ; INHERITED 6097 0x101FE, // 101FE..1027F; UNKNOWN 6098 0x10280, // 10280..1029C; LYCIAN 6099 0x1029D, // 1029D..1029F; UNKNOWN 6100 0x102A0, // 102A0..102D0; CARIAN 6101 0x102D1, // 102D1..102DF; UNKNOWN 6102 0x102E0, // 102E0 ; INHERITED 6103 0x102E1, // 102E1..102FB; COMMON 6104 0x102FC, // 102FC..102FF; UNKNOWN 6105 0x10300, // 10300..10323; OLD_ITALIC 6106 0x10324, // 10324..1032C; UNKNOWN 6107 0x1032D, // 1032D..1032F; OLD_ITALIC 6108 0x10330, // 10330..1034A; GOTHIC 6109 0x1034B, // 1034B..1034F; UNKNOWN 6110 0x10350, // 10350..1037A; OLD_PERMIC 6111 0x1037B, // 1037B..1037F; UNKNOWN 6112 0x10380, // 10380..1039D; UGARITIC 6113 0x1039E, // 1039E ; UNKNOWN 6114 0x1039F, // 1039F ; UGARITIC 6115 0x103A0, // 103A0..103C3; OLD_PERSIAN 6116 0x103C4, // 103C4..103C7; UNKNOWN 6117 0x103C8, // 103C8..103D5; OLD_PERSIAN 6118 0x103D6, // 103D6..103FF; UNKNOWN 6119 0x10400, // 10400..1044F; DESERET 6120 0x10450, // 10450..1047F; SHAVIAN 6121 0x10480, // 10480..1049D; OSMANYA 6122 0x1049E, // 1049E..1049F; UNKNOWN 6123 0x104A0, // 104A0..104A9; OSMANYA 6124 0x104AA, // 104AA..104AF; UNKNOWN 6125 0x104B0, // 104B0..104D3; OSAGE 6126 0x104D4, // 104D4..104D7; UNKNOWN 6127 0x104D8, // 104D8..104FB; OSAGE 6128 0x104FC, // 104FC..104FF; UNKNOWN 6129 0x10500, // 10500..10527; ELBASAN 6130 0x10528, // 10528..1052F; UNKNOWN 6131 0x10530, // 10530..10563; CAUCASIAN_ALBANIAN 6132 0x10564, // 10564..1056E; UNKNOWN 6133 0x1056F, // 1056F ; CAUCASIAN_ALBANIAN 6134 0x10570, // 10570..105FF; UNKNOWN 6135 0x10600, // 10600..10736; LINEAR_A 6136 0x10737, // 10737..1073F; UNKNOWN 6137 0x10740, // 10740..10755; LINEAR_A 6138 0x10756, // 10756..1075F; UNKNOWN 6139 0x10760, // 10760..10767; LINEAR_A 6140 0x10768, // 10768..107FF; UNKNOWN 6141 0x10800, // 10800..10805; CYPRIOT 6142 0x10806, // 10806..10807; UNKNOWN 6143 0x10808, // 10808 ; CYPRIOT 6144 0x10809, // 10809 ; UNKNOWN 6145 0x1080A, // 1080A..10835; CYPRIOT 6146 0x10836, // 10836 ; UNKNOWN 6147 0x10837, // 10837..10838; CYPRIOT 6148 0x10839, // 10839..1083B; UNKNOWN 6149 0x1083C, // 1083C ; CYPRIOT 6150 0x1083D, // 1083D..1083E; UNKNOWN 6151 0x1083F, // 1083F ; CYPRIOT 6152 0x10840, // 10840..10855; IMPERIAL_ARAMAIC 6153 0x10856, // 10856 ; UNKNOWN 6154 0x10857, // 10857..1085F; IMPERIAL_ARAMAIC 6155 0x10860, // 10860..1087F; PALMYRENE 6156 0x10880, // 10880..1089E; NABATAEAN 6157 0x1089F, // 1089F..108A6; UNKNOWN 6158 0x108A7, // 108A7..108AF; NABATAEAN 6159 0x108B0, // 108B0..108DF; UNKNOWN 6160 0x108E0, // 108E0..108F2; HATRAN 6161 0x108F3, // 108F3 ; UNKNOWN 6162 0x108F4, // 108F4..108F5; HATRAN 6163 0x108F6, // 108F6..108FA; UNKNOWN 6164 0x108FB, // 108FB..108FF; HATRAN 6165 0x10900, // 10900..1091B; PHOENICIAN 6166 0x1091C, // 1091C..1091E; UNKNOWN 6167 0x1091F, // 1091F ; PHOENICIAN 6168 0x10920, // 10920..10939; LYDIAN 6169 0x1093A, // 1093A..1093E; UNKNOWN 6170 0x1093F, // 1093F ; LYDIAN 6171 0x10940, // 10940..1097F; UNKNOWN 6172 0x10980, // 10980..1099F; MEROITIC_HIEROGLYPHS 6173 0x109A0, // 109A0..109B7; MEROITIC_CURSIVE 6174 0x109B8, // 109B8..109BB; UNKNOWN 6175 0x109BC, // 109BC..109CF; MEROITIC_CURSIVE 6176 0x109D0, // 109D0..109D1; UNKNOWN 6177 0x109D2, // 109D2..109FF; MEROITIC_CURSIVE 6178 0x10A00, // 10A00..10A03; KHAROSHTHI 6179 0x10A04, // 10A04 ; UNKNOWN 6180 0x10A05, // 10A05..10A06; KHAROSHTHI 6181 0x10A07, // 10A07..10A0B; UNKNOWN 6182 0x10A0C, // 10A0C..10A13; KHAROSHTHI 6183 0x10A14, // 10A14 ; UNKNOWN 6184 0x10A15, // 10A15..10A17; KHAROSHTHI 6185 0x10A18, // 10A18 ; UNKNOWN 6186 0x10A19, // 10A19..10A35; KHAROSHTHI 6187 0x10A36, // 10A36..10A37; UNKNOWN 6188 0x10A38, // 10A38..10A3A; KHAROSHTHI 6189 0x10A3B, // 10A3B..10A3E; UNKNOWN 6190 0x10A3F, // 10A3F..10A48; KHAROSHTHI 6191 0x10A49, // 10A49..10A4F; UNKNOWN 6192 0x10A50, // 10A50..10A58; KHAROSHTHI 6193 0x10A59, // 10A59..10A5F; UNKNOWN 6194 0x10A60, // 10A60..10A7F; OLD_SOUTH_ARABIAN 6195 0x10A80, // 10A80..10A9F; OLD_NORTH_ARABIAN 6196 0x10AA0, // 10AA0..10ABF; UNKNOWN 6197 0x10AC0, // 10AC0..10AE6; MANICHAEAN 6198 0x10AE7, // 10AE7..10AEA; UNKNOWN 6199 0x10AEB, // 10AEB..10AF6; MANICHAEAN 6200 0x10AF7, // 10AF7..10AFF; UNKNOWN 6201 0x10B00, // 10B00..10B35; AVESTAN 6202 0x10B36, // 10B36..10B38; UNKNOWN 6203 0x10B39, // 10B39..10B3F; AVESTAN 6204 0x10B40, // 10B40..10B55; INSCRIPTIONAL_PARTHIAN 6205 0x10B56, // 10B56..10B57; UNKNOWN 6206 0x10B58, // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN 6207 0x10B60, // 10B60..10B72; INSCRIPTIONAL_PAHLAVI 6208 0x10B73, // 10B73..10B77; UNKNOWN 6209 0x10B78, // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI 6210 0x10B80, // 10B80..10B91; PSALTER_PAHLAVI 6211 0x10B92, // 10B92..10B98; UNKNOWN 6212 0x10B99, // 10B99..10B9C; PSALTER_PAHLAVI 6213 0x10B9D, // 10B9D..10BA8; UNKNOWN 6214 0x10BA9, // 10BA9..10BAF; PSALTER_PAHLAVI 6215 0x10BB0, // 10BB0..10BFF; UNKNOWN 6216 0x10C00, // 10C00..10C48; OLD_TURKIC 6217 0x10C49, // 10C49..10C7F; UNKNOWN 6218 0x10C80, // 10C80..10CB2; OLD_HUNGARIAN 6219 0x10CB3, // 10CB3..10CBF; UNKNOWN 6220 0x10CC0, // 10CC0..10CF2; OLD_HUNGARIAN 6221 0x10CF3, // 10CF3..10CF9; UNKNOWN 6222 0x10CFA, // 10CFA..10CFF; OLD_HUNGARIAN 6223 0x10D00, // 10D00..10D27; HANIFI_ROHINGYA 6224 0x10D28, // 10D28..10D2F; UNKNOWN 6225 0x10D30, // 10D30..10D39; HANIFI_ROHINGYA 6226 0x10D3A, // 10D3A..10E5F; UNKNOWN 6227 0x10E60, // 10E60..10E7E; ARABIC 6228 0x10E7F, // 10E7F ; UNKNOWN 6229 0x10E80, // 10E80..10EA9; YEZIDI 6230 0x10EAA, // 10EAA ; UNKNOWN 6231 0x10EAB, // 10EAB..10EAD; YEZIDI 6232 0x10EAE, // 10EAE..10EAF; UNKNOWN 6233 0x10EB0, // 10EB0..10EB1; YEZIDI 6234 0x10EB2, // 10EB2..10EFF; UNKNOWN 6235 0x10F00, // 10F00..10F27; OLD_SOGDIAN 6236 0x10F28, // 10F28..10F2F; UNKNOWN 6237 0x10F30, // 10F30..10F59; SOGDIAN 6238 0x10F5A, // 10F5A..10FAF; UNKNOWN 6239 0x10FB0, // 10FB0..10FCB; CHORASMIAN 6240 0x10FCC, // 10FCC..10FDF; UNKNOWN 6241 0x10FE0, // 10FE0..10FF6; ELYMAIC 6242 0x10FF7, // 10FF7..10FFF; UNKNOWN 6243 0x11000, // 11000..1104D; BRAHMI 6244 0x1104E, // 1104E..11051; UNKNOWN 6245 0x11052, // 11052..1106F; BRAHMI 6246 0x11070, // 11070..1107E; UNKNOWN 6247 0x1107F, // 1107F ; BRAHMI 6248 0x11080, // 11080..110C1; KAITHI 6249 0x110C2, // 110C2..110CC; UNKNOWN 6250 0x110CD, // 110CD ; KAITHI 6251 0x110CE, // 110CE..110CF; UNKNOWN 6252 0x110D0, // 110D0..110E8; SORA_SOMPENG 6253 0x110E9, // 110E9..110EF; UNKNOWN 6254 0x110F0, // 110F0..110F9; SORA_SOMPENG 6255 0x110FA, // 110FA..110FF; UNKNOWN 6256 0x11100, // 11100..11134; CHAKMA 6257 0x11135, // 11135 ; UNKNOWN 6258 0x11136, // 11136..11147; CHAKMA 6259 0x11148, // 11148..1114F; UNKNOWN 6260 0x11150, // 11150..11176; MAHAJANI 6261 0x11177, // 11177..1117F; UNKNOWN 6262 0x11180, // 11180..111DF; SHARADA 6263 0x111E0, // 111E0 ; UNKNOWN 6264 0x111E1, // 111E1..111F4; SINHALA 6265 0x111F5, // 111F5..111FF; UNKNOWN 6266 0x11200, // 11200..11211; KHOJKI 6267 0x11212, // 11212 ; UNKNOWN 6268 0x11213, // 11213..1123E; KHOJKI 6269 0x1123F, // 1123F..1127F; UNKNOWN 6270 0x11280, // 11280..11286; MULTANI 6271 0x11287, // 11287 ; UNKNOWN 6272 0x11288, // 11288 ; MULTANI 6273 0x11289, // 11289 ; UNKNOWN 6274 0x1128A, // 1128A..1128D; MULTANI 6275 0x1128E, // 1128E ; UNKNOWN 6276 0x1128F, // 1128F..1129D; MULTANI 6277 0x1129E, // 1129E ; UNKNOWN 6278 0x1129F, // 1129F..112A9; MULTANI 6279 0x112AA, // 112AA..112AF; UNKNOWN 6280 0x112B0, // 112B0..112EA; KHUDAWADI 6281 0x112EB, // 112EB..112EF; UNKNOWN 6282 0x112F0, // 112F0..112F9; KHUDAWADI 6283 0x112FA, // 112FA..112FF; UNKNOWN 6284 0x11300, // 11300..11303; GRANTHA 6285 0x11304, // 11304 ; UNKNOWN 6286 0x11305, // 11305..1130C; GRANTHA 6287 0x1130D, // 1130D..1130E; UNKNOWN 6288 0x1130F, // 1130F..11310; GRANTHA 6289 0x11311, // 11311..11312; UNKNOWN 6290 0x11313, // 11313..11328; GRANTHA 6291 0x11329, // 11329 ; UNKNOWN 6292 0x1132A, // 1132A..11330; GRANTHA 6293 0x11331, // 11331 ; UNKNOWN 6294 0x11332, // 11332..11333; GRANTHA 6295 0x11334, // 11334 ; UNKNOWN 6296 0x11335, // 11335..11339; GRANTHA 6297 0x1133A, // 1133A ; UNKNOWN 6298 0x1133B, // 1133B ; INHERITED 6299 0x1133C, // 1133C..11344; GRANTHA 6300 0x11345, // 11345..11346; UNKNOWN 6301 0x11347, // 11347..11348; GRANTHA 6302 0x11349, // 11349..1134A; UNKNOWN 6303 0x1134B, // 1134B..1134D; GRANTHA 6304 0x1134E, // 1134E..1134F; UNKNOWN 6305 0x11350, // 11350 ; GRANTHA 6306 0x11351, // 11351..11356; UNKNOWN 6307 0x11357, // 11357 ; GRANTHA 6308 0x11358, // 11358..1135C; UNKNOWN 6309 0x1135D, // 1135D..11363; GRANTHA 6310 0x11364, // 11364..11365; UNKNOWN 6311 0x11366, // 11366..1136C; GRANTHA 6312 0x1136D, // 1136D..1136F; UNKNOWN 6313 0x11370, // 11370..11374; GRANTHA 6314 0x11375, // 11375..113FF; UNKNOWN 6315 0x11400, // 11400..1145B; NEWA 6316 0x1145C, // 1145C ; UNKNOWN 6317 0x1145D, // 1145D..11461; NEWA 6318 0x11462, // 11462..1147F; UNKNOWN 6319 0x11480, // 11480..114C7; TIRHUTA 6320 0x114C8, // 114C8..114CF; UNKNOWN 6321 0x114D0, // 114D0..114D9; TIRHUTA 6322 0x114DA, // 114DA..1157F; UNKNOWN 6323 0x11580, // 11580..115B5; SIDDHAM 6324 0x115B6, // 115B6..115B7; UNKNOWN 6325 0x115B8, // 115B8..115DD; SIDDHAM 6326 0x115DE, // 115DE..115FF; UNKNOWN 6327 0x11600, // 11600..11644; MODI 6328 0x11645, // 11645..1164F; UNKNOWN 6329 0x11650, // 11650..11659; MODI 6330 0x1165A, // 1165A..1165F; UNKNOWN 6331 0x11660, // 11660..1166C; MONGOLIAN 6332 0x1166D, // 1166D..1167F; UNKNOWN 6333 0x11680, // 11680..116B8; TAKRI 6334 0x116B9, // 116B9..116BF; UNKNOWN 6335 0x116C0, // 116C0..116C9; TAKRI 6336 0x116CA, // 116CA..116FF; UNKNOWN 6337 0x11700, // 11700..1171A; AHOM 6338 0x1171B, // 1171B..1171C; UNKNOWN 6339 0x1171D, // 1171D..1172B; AHOM 6340 0x1172C, // 1172C..1172F; UNKNOWN 6341 0x11730, // 11730..1173F; AHOM 6342 0x11740, // 11740..117FF; UNKNOWN 6343 0x11800, // 11800..1183B; DOGRA 6344 0x1183C, // 1183C..1189F; UNKNOWN 6345 0x118A0, // 118A0..118F2; WARANG_CITI 6346 0x118F3, // 118F3..118FE; UNKNOWN 6347 0x118FF, // 118FF ; WARANG_CITI 6348 0x11900, // 11900..11906; DIVES_AKURU 6349 0x11907, // 11907..11908; UNKNOWN 6350 0x11909, // 11909 ; DIVES_AKURU 6351 0x1190A, // 1190A..1190B; UNKNOWN 6352 0x1190C, // 1190C..11913; DIVES_AKURU 6353 0x11914, // 11914 ; UNKNOWN 6354 0x11915, // 11915..11916; DIVES_AKURU 6355 0x11917, // 11917 ; UNKNOWN 6356 0x11918, // 11918..11935; DIVES_AKURU 6357 0x11936, // 11936 ; UNKNOWN 6358 0x11937, // 11937..11938; DIVES_AKURU 6359 0x11939, // 11939..1193A; UNKNOWN 6360 0x1193B, // 1193B..11946; DIVES_AKURU 6361 0x11947, // 11947..1194F; UNKNOWN 6362 0x11950, // 11950..11959; DIVES_AKURU 6363 0x1195A, // 1195A..1199F; UNKNOWN 6364 0x119A0, // 119A0..119A7; NANDINAGARI 6365 0x119A8, // 119A8..119A9; UNKNOWN 6366 0x119AA, // 119AA..119D7; NANDINAGARI 6367 0x119D8, // 119D8..119D9; UNKNOWN 6368 0x119DA, // 119DA..119E4; NANDINAGARI 6369 0x119E5, // 119E5..119FF; UNKNOWN 6370 0x11A00, // 11A00..11A47; ZANABAZAR_SQUARE 6371 0x11A48, // 11A48..11A4F; UNKNOWN 6372 0x11A50, // 11A50..11AA2; SOYOMBO 6373 0x11AA3, // 11AA3..11ABF; UNKNOWN 6374 0x11AC0, // 11AC0..11AF8; PAU_CIN_HAU 6375 0x11AF9, // 11AF9..11BFF; UNKNOWN 6376 0x11C00, // 11C00..11C08; BHAIKSUKI 6377 0x11C09, // 11C09 ; UNKNOWN 6378 0x11C0A, // 11C0A..11C36; BHAIKSUKI 6379 0x11C37, // 11C37 ; UNKNOWN 6380 0x11C38, // 11C38..11C45; BHAIKSUKI 6381 0x11C46, // 11C46..11C4F; UNKNOWN 6382 0x11C50, // 11C50..11C6C; BHAIKSUKI 6383 0x11C6D, // 11C6D..11C6F; UNKNOWN 6384 0x11C70, // 11C70..11C8F; MARCHEN 6385 0x11C90, // 11C90..11C91; UNKNOWN 6386 0x11C92, // 11C92..11CA7; MARCHEN 6387 0x11CA8, // 11CA8 ; UNKNOWN 6388 0x11CA9, // 11CA9..11CB6; MARCHEN 6389 0x11CB7, // 11CB7..11CFF; UNKNOWN 6390 0x11D00, // 11D00..11D06; MASARAM_GONDI 6391 0x11D07, // 11D07 ; UNKNOWN 6392 0x11D08, // 11D08..11D09; MASARAM_GONDI 6393 0x11D0A, // 11D0A ; UNKNOWN 6394 0x11D0B, // 11D0B..11D36; MASARAM_GONDI 6395 0x11D37, // 11D37..11D39; UNKNOWN 6396 0x11D3A, // 11D3A ; MASARAM_GONDI 6397 0x11D3B, // 11D3B ; UNKNOWN 6398 0x11D3C, // 11D3C..11D3D; MASARAM_GONDI 6399 0x11D3E, // 11D3E ; UNKNOWN 6400 0x11D3F, // 11D3F..11D47; MASARAM_GONDI 6401 0x11D48, // 11D48..11D4F; UNKNOWN 6402 0x11D50, // 11D50..11D59; MASARAM_GONDI 6403 0x11D5A, // 11D5A..11D5F; UNKNOWN 6404 0x11D60, // 11D60..11D65; GUNJALA_GONDI 6405 0x11D66, // 11D66 ; UNKNOWN 6406 0x11D67, // 11D67..11D68; GUNJALA_GONDI 6407 0x11D69, // 11D69 ; UNKNOWN 6408 0x11D6A, // 11D6A..11D8E; GUNJALA_GONDI 6409 0x11D8F, // 11D8F ; UNKNOWN 6410 0x11D90, // 11D90..11D91; GUNJALA_GONDI 6411 0x11D92, // 11D92 ; UNKNOWN 6412 0x11D93, // 11D93..11D98; GUNJALA_GONDI 6413 0x11D99, // 11D99..11D9F; UNKNOWN 6414 0x11DA0, // 11DA0..11DA9; GUNJALA_GONDI 6415 0x11DAA, // 11DAA..11EDF; UNKNOWN 6416 0x11EE0, // 11EE0..11EF8; MAKASAR 6417 0x11EF9, // 11EF9..11FAF; UNKNOWN 6418 0x11FB0, // 11FB0 ; LISU 6419 0x11FB1, // 11FB1..11FBF; UNKNOWN 6420 0x11FC0, // 11FC0..11FF1; TAMIL 6421 0x11FF2, // 11FF2..11FFE; UNKNOWN 6422 0x11FFF, // 11FFF ; TAMIL 6423 0x12000, // 12000..12399; CUNEIFORM 6424 0x1239A, // 1239A..123FF; UNKNOWN 6425 0x12400, // 12400..1246E; CUNEIFORM 6426 0x1246F, // 1246F ; UNKNOWN 6427 0x12470, // 12470..12474; CUNEIFORM 6428 0x12475, // 12475..1247F; UNKNOWN 6429 0x12480, // 12480..12543; CUNEIFORM 6430 0x12544, // 12544..12FFF; UNKNOWN 6431 0x13000, // 13000..1342E; EGYPTIAN_HIEROGLYPHS 6432 0x1342F, // 1342F ; UNKNOWN 6433 0x13430, // 13430..13438; EGYPTIAN_HIEROGLYPHS 6434 0x13439, // 13439..143FF; UNKNOWN 6435 0x14400, // 14400..14646; ANATOLIAN_HIEROGLYPHS 6436 0x14647, // 14647..167FF; UNKNOWN 6437 0x16800, // 16800..16A38; BAMUM 6438 0x16A39, // 16A39..16A3F; UNKNOWN 6439 0x16A40, // 16A40..16A5E; MRO 6440 0x16A5F, // 16A5F ; UNKNOWN 6441 0x16A60, // 16A60..16A69; MRO 6442 0x16A6A, // 16A6A..16A6D; UNKNOWN 6443 0x16A6E, // 16A6E..16A6F; MRO 6444 0x16A70, // 16A70..16ACF; UNKNOWN 6445 0x16AD0, // 16AD0..16AED; BASSA_VAH 6446 0x16AEE, // 16AEE..16AEF; UNKNOWN 6447 0x16AF0, // 16AF0..16AF5; BASSA_VAH 6448 0x16AF6, // 16AF6..16AFF; UNKNOWN 6449 0x16B00, // 16B00..16B45; PAHAWH_HMONG 6450 0x16B46, // 16B46..16B4F; UNKNOWN 6451 0x16B50, // 16B50..16B59; PAHAWH_HMONG 6452 0x16B5A, // 16B5A ; UNKNOWN 6453 0x16B5B, // 16B5B..16B61; PAHAWH_HMONG 6454 0x16B62, // 16B62 ; UNKNOWN 6455 0x16B63, // 16B63..16B77; PAHAWH_HMONG 6456 0x16B78, // 16B78..16B7C; UNKNOWN 6457 0x16B7D, // 16B7D..16B8F; PAHAWH_HMONG 6458 0x16B90, // 16B90..16E3F; UNKNOWN 6459 0x16E40, // 16E40..16E9A; MEDEFAIDRIN 6460 0x16E9B, // 16E9B..16EFF; UNKNOWN 6461 0x16F00, // 16F00..16F4A; MIAO 6462 0x16F4B, // 16F4B..16F4E; UNKNOWN 6463 0x16F4F, // 16F4F..16F87; MIAO 6464 0x16F88, // 16F88..16F8E; UNKNOWN 6465 0x16F8F, // 16F8F..16F9F; MIAO 6466 0x16FA0, // 16FA0..16FDF; UNKNOWN 6467 0x16FE0, // 16FE0 ; TANGUT 6468 0x16FE1, // 16FE1 ; NUSHU 6469 0x16FE2, // 16FE2..16FE3; COMMON 6470 0x16FE4, // 16FE4 ; KHITAN_SMALL_SCRIPT 6471 0x16FE5, // 16FE5..16FEF; UNKNOWN 6472 0x16FF0, // 16FF0..16FF1; HAN 6473 0x16FF2, // 16FF2..16FFF; UNKNOWN 6474 0x17000, // 17000..187F7; TANGUT 6475 0x187F8, // 187F8..187FF; UNKNOWN 6476 0x18800, // 18800..18AFF; TANGUT 6477 0x18B00, // 18B00..18CD5; KHITAN_SMALL_SCRIPT 6478 0x18CD6, // 18CD6..18CFF; UNKNOWN 6479 0x18D00, // 18D00..18D08; TANGUT 6480 0x18D09, // 18D09..1AFFF; UNKNOWN 6481 0x1B000, // 1B000 ; KATAKANA 6482 0x1B001, // 1B001..1B11E; HIRAGANA 6483 0x1B11F, // 1B11F..1B14F; UNKNOWN 6484 0x1B150, // 1B150..1B152; HIRAGANA 6485 0x1B153, // 1B153..1B163; UNKNOWN 6486 0x1B164, // 1B164..1B167; KATAKANA 6487 0x1B168, // 1B168..1B16F; UNKNOWN 6488 0x1B170, // 1B170..1B2FB; NUSHU 6489 0x1B2FC, // 1B2FC..1BBFF; UNKNOWN 6490 0x1BC00, // 1BC00..1BC6A; DUPLOYAN 6491 0x1BC6B, // 1BC6B..1BC6F; UNKNOWN 6492 0x1BC70, // 1BC70..1BC7C; DUPLOYAN 6493 0x1BC7D, // 1BC7D..1BC7F; UNKNOWN 6494 0x1BC80, // 1BC80..1BC88; DUPLOYAN 6495 0x1BC89, // 1BC89..1BC8F; UNKNOWN 6496 0x1BC90, // 1BC90..1BC99; DUPLOYAN 6497 0x1BC9A, // 1BC9A..1BC9B; UNKNOWN 6498 0x1BC9C, // 1BC9C..1BC9F; DUPLOYAN 6499 0x1BCA0, // 1BCA0..1BCA3; COMMON 6500 0x1BCA4, // 1BCA4..1CFFF; UNKNOWN 6501 0x1D000, // 1D000..1D0F5; COMMON 6502 0x1D0F6, // 1D0F6..1D0FF; UNKNOWN 6503 0x1D100, // 1D100..1D126; COMMON 6504 0x1D127, // 1D127..1D128; UNKNOWN 6505 0x1D129, // 1D129..1D166; COMMON 6506 0x1D167, // 1D167..1D169; INHERITED 6507 0x1D16A, // 1D16A..1D17A; COMMON 6508 0x1D17B, // 1D17B..1D182; INHERITED 6509 0x1D183, // 1D183..1D184; COMMON 6510 0x1D185, // 1D185..1D18B; INHERITED 6511 0x1D18C, // 1D18C..1D1A9; COMMON 6512 0x1D1AA, // 1D1AA..1D1AD; INHERITED 6513 0x1D1AE, // 1D1AE..1D1E8; COMMON 6514 0x1D1E9, // 1D1E9..1D1FF; UNKNOWN 6515 0x1D200, // 1D200..1D245; GREEK 6516 0x1D246, // 1D246..1D2DF; UNKNOWN 6517 0x1D2E0, // 1D2E0..1D2F3; COMMON 6518 0x1D2F4, // 1D2F4..1D2FF; UNKNOWN 6519 0x1D300, // 1D300..1D356; COMMON 6520 0x1D357, // 1D357..1D35F; UNKNOWN 6521 0x1D360, // 1D360..1D378; COMMON 6522 0x1D379, // 1D379..1D3FF; UNKNOWN 6523 0x1D400, // 1D400..1D454; COMMON 6524 0x1D455, // 1D455 ; UNKNOWN 6525 0x1D456, // 1D456..1D49C; COMMON 6526 0x1D49D, // 1D49D ; UNKNOWN 6527 0x1D49E, // 1D49E..1D49F; COMMON 6528 0x1D4A0, // 1D4A0..1D4A1; UNKNOWN 6529 0x1D4A2, // 1D4A2 ; COMMON 6530 0x1D4A3, // 1D4A3..1D4A4; UNKNOWN 6531 0x1D4A5, // 1D4A5..1D4A6; COMMON 6532 0x1D4A7, // 1D4A7..1D4A8; UNKNOWN 6533 0x1D4A9, // 1D4A9..1D4AC; COMMON 6534 0x1D4AD, // 1D4AD ; UNKNOWN 6535 0x1D4AE, // 1D4AE..1D4B9; COMMON 6536 0x1D4BA, // 1D4BA ; UNKNOWN 6537 0x1D4BB, // 1D4BB ; COMMON 6538 0x1D4BC, // 1D4BC ; UNKNOWN 6539 0x1D4BD, // 1D4BD..1D4C3; COMMON 6540 0x1D4C4, // 1D4C4 ; UNKNOWN 6541 0x1D4C5, // 1D4C5..1D505; COMMON 6542 0x1D506, // 1D506 ; UNKNOWN 6543 0x1D507, // 1D507..1D50A; COMMON 6544 0x1D50B, // 1D50B..1D50C; UNKNOWN 6545 0x1D50D, // 1D50D..1D514; COMMON 6546 0x1D515, // 1D515 ; UNKNOWN 6547 0x1D516, // 1D516..1D51C; COMMON 6548 0x1D51D, // 1D51D ; UNKNOWN 6549 0x1D51E, // 1D51E..1D539; COMMON 6550 0x1D53A, // 1D53A ; UNKNOWN 6551 0x1D53B, // 1D53B..1D53E; COMMON 6552 0x1D53F, // 1D53F ; UNKNOWN 6553 0x1D540, // 1D540..1D544; COMMON 6554 0x1D545, // 1D545 ; UNKNOWN 6555 0x1D546, // 1D546 ; COMMON 6556 0x1D547, // 1D547..1D549; UNKNOWN 6557 0x1D54A, // 1D54A..1D550; COMMON 6558 0x1D551, // 1D551 ; UNKNOWN 6559 0x1D552, // 1D552..1D6A5; COMMON 6560 0x1D6A6, // 1D6A6..1D6A7; UNKNOWN 6561 0x1D6A8, // 1D6A8..1D7CB; COMMON 6562 0x1D7CC, // 1D7CC..1D7CD; UNKNOWN 6563 0x1D7CE, // 1D7CE..1D7FF; COMMON 6564 0x1D800, // 1D800..1DA8B; SIGNWRITING 6565 0x1DA8C, // 1DA8C..1DA9A; UNKNOWN 6566 0x1DA9B, // 1DA9B..1DA9F; SIGNWRITING 6567 0x1DAA0, // 1DAA0 ; UNKNOWN 6568 0x1DAA1, // 1DAA1..1DAAF; SIGNWRITING 6569 0x1DAB0, // 1DAB0..1DFFF; UNKNOWN 6570 0x1E000, // 1E000..1E006; GLAGOLITIC 6571 0x1E007, // 1E007 ; UNKNOWN 6572 0x1E008, // 1E008..1E018; GLAGOLITIC 6573 0x1E019, // 1E019..1E01A; UNKNOWN 6574 0x1E01B, // 1E01B..1E021; GLAGOLITIC 6575 0x1E022, // 1E022 ; UNKNOWN 6576 0x1E023, // 1E023..1E024; GLAGOLITIC 6577 0x1E025, // 1E025 ; UNKNOWN 6578 0x1E026, // 1E026..1E02A; GLAGOLITIC 6579 0x1E02B, // 1E02B..1E0FF; UNKNOWN 6580 0x1E100, // 1E100..1E12C; NYIAKENG_PUACHUE_HMONG 6581 0x1E12D, // 1E12D..1E12F; UNKNOWN 6582 0x1E130, // 1E130..1E13D; NYIAKENG_PUACHUE_HMONG 6583 0x1E13E, // 1E13E..1E13F; UNKNOWN 6584 0x1E140, // 1E140..1E149; NYIAKENG_PUACHUE_HMONG 6585 0x1E14A, // 1E14A..1E14D; UNKNOWN 6586 0x1E14E, // 1E14E..1E14F; NYIAKENG_PUACHUE_HMONG 6587 0x1E150, // 1E150..1E2BF; UNKNOWN 6588 0x1E2C0, // 1E2C0..1E2F9; WANCHO 6589 0x1E2FA, // 1E2FA..1E2FE; UNKNOWN 6590 0x1E2FF, // 1E2FF ; WANCHO 6591 0x1E300, // 1E300..1E7FF; UNKNOWN 6592 0x1E800, // 1E800..1E8C4; MENDE_KIKAKUI 6593 0x1E8C5, // 1E8C5..1E8C6; UNKNOWN 6594 0x1E8C7, // 1E8C7..1E8D6; MENDE_KIKAKUI 6595 0x1E8D7, // 1E8D7..1E8FF; UNKNOWN 6596 0x1E900, // 1E900..1E94B; ADLAM 6597 0x1E94C, // 1E94C..1E94F; UNKNOWN 6598 0x1E950, // 1E950..1E959; ADLAM 6599 0x1E95A, // 1E95A..1E95D; UNKNOWN 6600 0x1E95E, // 1E95E..1E95F; ADLAM 6601 0x1E960, // 1E960..1EC70; UNKNOWN 6602 0x1EC71, // 1EC71..1ECB4; COMMON 6603 0x1ECB5, // 1ECB5..1ED00; UNKNOWN 6604 0x1ED01, // 1ED01..1ED3D; COMMON 6605 0x1ED3E, // 1ED3E..1EDFF; UNKNOWN 6606 0x1EE00, // 1EE00..1EE03; ARABIC 6607 0x1EE04, // 1EE04 ; UNKNOWN 6608 0x1EE05, // 1EE05..1EE1F; ARABIC 6609 0x1EE20, // 1EE20 ; UNKNOWN 6610 0x1EE21, // 1EE21..1EE22; ARABIC 6611 0x1EE23, // 1EE23 ; UNKNOWN 6612 0x1EE24, // 1EE24 ; ARABIC 6613 0x1EE25, // 1EE25..1EE26; UNKNOWN 6614 0x1EE27, // 1EE27 ; ARABIC 6615 0x1EE28, // 1EE28 ; UNKNOWN 6616 0x1EE29, // 1EE29..1EE32; ARABIC 6617 0x1EE33, // 1EE33 ; UNKNOWN 6618 0x1EE34, // 1EE34..1EE37; ARABIC 6619 0x1EE38, // 1EE38 ; UNKNOWN 6620 0x1EE39, // 1EE39 ; ARABIC 6621 0x1EE3A, // 1EE3A ; UNKNOWN 6622 0x1EE3B, // 1EE3B ; ARABIC 6623 0x1EE3C, // 1EE3C..1EE41; UNKNOWN 6624 0x1EE42, // 1EE42 ; ARABIC 6625 0x1EE43, // 1EE43..1EE46; UNKNOWN 6626 0x1EE47, // 1EE47 ; ARABIC 6627 0x1EE48, // 1EE48 ; UNKNOWN 6628 0x1EE49, // 1EE49 ; ARABIC 6629 0x1EE4A, // 1EE4A ; UNKNOWN 6630 0x1EE4B, // 1EE4B ; ARABIC 6631 0x1EE4C, // 1EE4C ; UNKNOWN 6632 0x1EE4D, // 1EE4D..1EE4F; ARABIC 6633 0x1EE50, // 1EE50 ; UNKNOWN 6634 0x1EE51, // 1EE51..1EE52; ARABIC 6635 0x1EE53, // 1EE53 ; UNKNOWN 6636 0x1EE54, // 1EE54 ; ARABIC 6637 0x1EE55, // 1EE55..1EE56; UNKNOWN 6638 0x1EE57, // 1EE57 ; ARABIC 6639 0x1EE58, // 1EE58 ; UNKNOWN 6640 0x1EE59, // 1EE59 ; ARABIC 6641 0x1EE5A, // 1EE5A ; UNKNOWN 6642 0x1EE5B, // 1EE5B ; ARABIC 6643 0x1EE5C, // 1EE5C ; UNKNOWN 6644 0x1EE5D, // 1EE5D ; ARABIC 6645 0x1EE5E, // 1EE5E ; UNKNOWN 6646 0x1EE5F, // 1EE5F ; ARABIC 6647 0x1EE60, // 1EE60 ; UNKNOWN 6648 0x1EE61, // 1EE61..1EE62; ARABIC 6649 0x1EE63, // 1EE63 ; UNKNOWN 6650 0x1EE64, // 1EE64 ; ARABIC 6651 0x1EE65, // 1EE65..1EE66; UNKNOWN 6652 0x1EE67, // 1EE67..1EE6A; ARABIC 6653 0x1EE6B, // 1EE6B ; UNKNOWN 6654 0x1EE6C, // 1EE6C..1EE72; ARABIC 6655 0x1EE73, // 1EE73 ; UNKNOWN 6656 0x1EE74, // 1EE74..1EE77; ARABIC 6657 0x1EE78, // 1EE78 ; UNKNOWN 6658 0x1EE79, // 1EE79..1EE7C; ARABIC 6659 0x1EE7D, // 1EE7D ; UNKNOWN 6660 0x1EE7E, // 1EE7E ; ARABIC 6661 0x1EE7F, // 1EE7F ; UNKNOWN 6662 0x1EE80, // 1EE80..1EE89; ARABIC 6663 0x1EE8A, // 1EE8A ; UNKNOWN 6664 0x1EE8B, // 1EE8B..1EE9B; ARABIC 6665 0x1EE9C, // 1EE9C..1EEA0; UNKNOWN 6666 0x1EEA1, // 1EEA1..1EEA3; ARABIC 6667 0x1EEA4, // 1EEA4 ; UNKNOWN 6668 0x1EEA5, // 1EEA5..1EEA9; ARABIC 6669 0x1EEAA, // 1EEAA ; UNKNOWN 6670 0x1EEAB, // 1EEAB..1EEBB; ARABIC 6671 0x1EEBC, // 1EEBC..1EEEF; UNKNOWN 6672 0x1EEF0, // 1EEF0..1EEF1; ARABIC 6673 0x1EEF2, // 1EEF2..1EFFF; UNKNOWN 6674 0x1F000, // 1F000..1F02B; COMMON 6675 0x1F02C, // 1F02C..1F02F; UNKNOWN 6676 0x1F030, // 1F030..1F093; COMMON 6677 0x1F094, // 1F094..1F09F; UNKNOWN 6678 0x1F0A0, // 1F0A0..1F0AE; COMMON 6679 0x1F0AF, // 1F0AF..1F0B0; UNKNOWN 6680 0x1F0B1, // 1F0B1..1F0BF; COMMON 6681 0x1F0C0, // 1F0C0 ; UNKNOWN 6682 0x1F0C1, // 1F0C1..1F0CF; COMMON 6683 0x1F0D0, // 1F0D0 ; UNKNOWN 6684 0x1F0D1, // 1F0D1..1F0F5; COMMON 6685 0x1F0F6, // 1F0F6..1F0FF; UNKNOWN 6686 0x1F100, // 1F100..1F1AD; COMMON 6687 0x1F1AE, // 1F1AE..1F1E5; UNKNOWN 6688 0x1F1E6, // 1F1E6..1F1FF; COMMON 6689 0x1F200, // 1F200 ; HIRAGANA 6690 0x1F201, // 1F201..1F202; COMMON 6691 0x1F203, // 1F203..1F20F; UNKNOWN 6692 0x1F210, // 1F210..1F23B; COMMON 6693 0x1F23C, // 1F23C..1F23F; UNKNOWN 6694 0x1F240, // 1F240..1F248; COMMON 6695 0x1F249, // 1F249..1F24F; UNKNOWN 6696 0x1F250, // 1F250..1F251; COMMON 6697 0x1F252, // 1F252..1F25F; UNKNOWN 6698 0x1F260, // 1F260..1F265; COMMON 6699 0x1F266, // 1F266..1F2FF; UNKNOWN 6700 0x1F300, // 1F300..1F6D7; COMMON 6701 0x1F6D8, // 1F6D8..1F6DF; UNKNOWN 6702 0x1F6E0, // 1F6E0..1F6EC; COMMON 6703 0x1F6ED, // 1F6ED..1F6EF; UNKNOWN 6704 0x1F6F0, // 1F6F0..1F6FC; COMMON 6705 0x1F6FD, // 1F6FD..1F6FF; UNKNOWN 6706 0x1F700, // 1F700..1F773; COMMON 6707 0x1F774, // 1F774..1F77F; UNKNOWN 6708 0x1F780, // 1F780..1F7D8; COMMON 6709 0x1F7D9, // 1F7D9..1F7DF; UNKNOWN 6710 0x1F7E0, // 1F7E0..1F7EB; COMMON 6711 0x1F7EC, // 1F7EC..1F7FF; UNKNOWN 6712 0x1F800, // 1F800..1F80B; COMMON 6713 0x1F80C, // 1F80C..1F80F; UNKNOWN 6714 0x1F810, // 1F810..1F847; COMMON 6715 0x1F848, // 1F848..1F84F; UNKNOWN 6716 0x1F850, // 1F850..1F859; COMMON 6717 0x1F85A, // 1F85A..1F85F; UNKNOWN 6718 0x1F860, // 1F860..1F887; COMMON 6719 0x1F888, // 1F888..1F88F; UNKNOWN 6720 0x1F890, // 1F890..1F8AD; COMMON 6721 0x1F8AE, // 1F8AE..1F8AF; UNKNOWN 6722 0x1F8B0, // 1F8B0..1F8B1; COMMON 6723 0x1F8B2, // 1F8B2..1F8FF; UNKNOWN 6724 0x1F900, // 1F900..1F978; COMMON 6725 0x1F979, // 1F979 ; UNKNOWN 6726 0x1F97A, // 1F97A..1F9CB; COMMON 6727 0x1F9CC, // 1F9CC ; UNKNOWN 6728 0x1F9CD, // 1F9CD..1FA53; COMMON 6729 0x1FA54, // 1FA54..1FA5F; UNKNOWN 6730 0x1FA60, // 1FA60..1FA6D; COMMON 6731 0x1FA6E, // 1FA6E..1FA6F; UNKNOWN 6732 0x1FA70, // 1FA70..1FA74; COMMON 6733 0x1FA75, // 1FA75..1FA77; UNKNOWN 6734 0x1FA78, // 1FA78..1FA7A; COMMON 6735 0x1FA7B, // 1FA7B..1FA7F; UNKNOWN 6736 0x1FA80, // 1FA80..1FA86; COMMON 6737 0x1FA87, // 1FA87..1FA8F; UNKNOWN 6738 0x1FA90, // 1FA90..1FAA8; COMMON 6739 0x1FAA9, // 1FAA9..1FAAF; UNKNOWN 6740 0x1FAB0, // 1FAB0..1FAB6; COMMON 6741 0x1FAB7, // 1FAB7..1FABF; UNKNOWN 6742 0x1FAC0, // 1FAC0..1FAC2; COMMON 6743 0x1FAC3, // 1FAC3..1FACF; UNKNOWN 6744 0x1FAD0, // 1FAD0..1FAD6; COMMON 6745 0x1FAD7, // 1FAD7..1FAFF; UNKNOWN 6746 0x1FB00, // 1FB00..1FB92; COMMON 6747 0x1FB93, // 1FB93 ; UNKNOWN 6748 0x1FB94, // 1FB94..1FBCA; COMMON 6749 0x1FBCB, // 1FBCB..1FBEF; UNKNOWN 6750 0x1FBF0, // 1FBF0..1FBF9; COMMON 6751 0x1FBFA, // 1FBFA..1FFFF; UNKNOWN 6752 0x20000, // 20000..2A6DD; HAN 6753 0x2A6DE, // 2A6DE..2A6FF; UNKNOWN 6754 0x2A700, // 2A700..2B734; HAN 6755 0x2B735, // 2B735..2B73F; UNKNOWN 6756 0x2B740, // 2B740..2B81D; HAN 6757 0x2B81E, // 2B81E..2B81F; UNKNOWN 6758 0x2B820, // 2B820..2CEA1; HAN 6759 0x2CEA2, // 2CEA2..2CEAF; UNKNOWN 6760 0x2CEB0, // 2CEB0..2EBE0; HAN 6761 0x2EBE1, // 2EBE1..2F7FF; UNKNOWN 6762 0x2F800, // 2F800..2FA1D; HAN 6763 0x2FA1E, // 2FA1E..2FFFF; UNKNOWN 6764 0x30000, // 30000..3134A; HAN 6765 0x3134B, // 3134B..E0000; UNKNOWN 6766 0xE0001, // E0001 ; COMMON 6767 0xE0002, // E0002..E001F; UNKNOWN 6768 0xE0020, // E0020..E007F; COMMON 6769 0xE0080, // E0080..E00FF; UNKNOWN 6770 0xE0100, // E0100..E01EF; INHERITED 6771 0xE01F0, // E01F0..10FFFF; UNKNOWN 6772 }; 6773 6774 private static final UnicodeScript[] scripts = { 6775 COMMON, // 0000..0040 6776 LATIN, // 0041..005A 6777 COMMON, // 005B..0060 6778 LATIN, // 0061..007A 6779 COMMON, // 007B..00A9 6780 LATIN, // 00AA 6781 COMMON, // 00AB..00B9 6782 LATIN, // 00BA 6783 COMMON, // 00BB..00BF 6784 LATIN, // 00C0..00D6 6785 COMMON, // 00D7 6786 LATIN, // 00D8..00F6 6787 COMMON, // 00F7 6788 LATIN, // 00F8..02B8 6789 COMMON, // 02B9..02DF 6790 LATIN, // 02E0..02E4 6791 COMMON, // 02E5..02E9 6792 BOPOMOFO, // 02EA..02EB 6793 COMMON, // 02EC..02FF 6794 INHERITED, // 0300..036F 6795 GREEK, // 0370..0373 6796 COMMON, // 0374 6797 GREEK, // 0375..0377 6798 UNKNOWN, // 0378..0379 6799 GREEK, // 037A..037D 6800 COMMON, // 037E 6801 GREEK, // 037F 6802 UNKNOWN, // 0380..0383 6803 GREEK, // 0384 6804 COMMON, // 0385 6805 GREEK, // 0386 6806 COMMON, // 0387 6807 GREEK, // 0388..038A 6808 UNKNOWN, // 038B 6809 GREEK, // 038C 6810 UNKNOWN, // 038D 6811 GREEK, // 038E..03A1 6812 UNKNOWN, // 03A2 6813 GREEK, // 03A3..03E1 6814 COPTIC, // 03E2..03EF 6815 GREEK, // 03F0..03FF 6816 CYRILLIC, // 0400..0484 6817 INHERITED, // 0485..0486 6818 CYRILLIC, // 0487..052F 6819 UNKNOWN, // 0530 6820 ARMENIAN, // 0531..0556 6821 UNKNOWN, // 0557..0558 6822 ARMENIAN, // 0559..058A 6823 UNKNOWN, // 058B..058C 6824 ARMENIAN, // 058D..058F 6825 UNKNOWN, // 0590 6826 HEBREW, // 0591..05C7 6827 UNKNOWN, // 05C8..05CF 6828 HEBREW, // 05D0..05EA 6829 UNKNOWN, // 05EB..05EE 6830 HEBREW, // 05EF..05F4 6831 UNKNOWN, // 05F5..05FF 6832 ARABIC, // 0600..0604 6833 COMMON, // 0605 6834 ARABIC, // 0606..060B 6835 COMMON, // 060C 6836 ARABIC, // 060D..061A 6837 COMMON, // 061B 6838 ARABIC, // 061C 6839 UNKNOWN, // 061D 6840 ARABIC, // 061E 6841 COMMON, // 061F 6842 ARABIC, // 0620..063F 6843 COMMON, // 0640 6844 ARABIC, // 0641..064A 6845 INHERITED, // 064B..0655 6846 ARABIC, // 0656..066F 6847 INHERITED, // 0670 6848 ARABIC, // 0671..06DC 6849 COMMON, // 06DD 6850 ARABIC, // 06DE..06FF 6851 SYRIAC, // 0700..070D 6852 UNKNOWN, // 070E 6853 SYRIAC, // 070F..074A 6854 UNKNOWN, // 074B..074C 6855 SYRIAC, // 074D..074F 6856 ARABIC, // 0750..077F 6857 THAANA, // 0780..07B1 6858 UNKNOWN, // 07B2..07BF 6859 NKO, // 07C0..07FA 6860 UNKNOWN, // 07FB..07FC 6861 NKO, // 07FD..07FF 6862 SAMARITAN, // 0800..082D 6863 UNKNOWN, // 082E..082F 6864 SAMARITAN, // 0830..083E 6865 UNKNOWN, // 083F 6866 MANDAIC, // 0840..085B 6867 UNKNOWN, // 085C..085D 6868 MANDAIC, // 085E 6869 UNKNOWN, // 085F 6870 SYRIAC, // 0860..086A 6871 UNKNOWN, // 086B..089F 6872 ARABIC, // 08A0..08B4 6873 UNKNOWN, // 08B5 6874 ARABIC, // 08B6..08C7 6875 UNKNOWN, // 08C8..08D2 6876 ARABIC, // 08D3..08E1 6877 COMMON, // 08E2 6878 ARABIC, // 08E3..08FF 6879 DEVANAGARI, // 0900..0950 6880 INHERITED, // 0951..0954 6881 DEVANAGARI, // 0955..0963 6882 COMMON, // 0964..0965 6883 DEVANAGARI, // 0966..097F 6884 BENGALI, // 0980..0983 6885 UNKNOWN, // 0984 6886 BENGALI, // 0985..098C 6887 UNKNOWN, // 098D..098E 6888 BENGALI, // 098F..0990 6889 UNKNOWN, // 0991..0992 6890 BENGALI, // 0993..09A8 6891 UNKNOWN, // 09A9 6892 BENGALI, // 09AA..09B0 6893 UNKNOWN, // 09B1 6894 BENGALI, // 09B2 6895 UNKNOWN, // 09B3..09B5 6896 BENGALI, // 09B6..09B9 6897 UNKNOWN, // 09BA..09BB 6898 BENGALI, // 09BC..09C4 6899 UNKNOWN, // 09C5..09C6 6900 BENGALI, // 09C7..09C8 6901 UNKNOWN, // 09C9..09CA 6902 BENGALI, // 09CB..09CE 6903 UNKNOWN, // 09CF..09D6 6904 BENGALI, // 09D7 6905 UNKNOWN, // 09D8..09DB 6906 BENGALI, // 09DC..09DD 6907 UNKNOWN, // 09DE 6908 BENGALI, // 09DF..09E3 6909 UNKNOWN, // 09E4..09E5 6910 BENGALI, // 09E6..09FE 6911 UNKNOWN, // 09FF..0A00 6912 GURMUKHI, // 0A01..0A03 6913 UNKNOWN, // 0A04 6914 GURMUKHI, // 0A05..0A0A 6915 UNKNOWN, // 0A0B..0A0E 6916 GURMUKHI, // 0A0F..0A10 6917 UNKNOWN, // 0A11..0A12 6918 GURMUKHI, // 0A13..0A28 6919 UNKNOWN, // 0A29 6920 GURMUKHI, // 0A2A..0A30 6921 UNKNOWN, // 0A31 6922 GURMUKHI, // 0A32..0A33 6923 UNKNOWN, // 0A34 6924 GURMUKHI, // 0A35..0A36 6925 UNKNOWN, // 0A37 6926 GURMUKHI, // 0A38..0A39 6927 UNKNOWN, // 0A3A..0A3B 6928 GURMUKHI, // 0A3C 6929 UNKNOWN, // 0A3D 6930 GURMUKHI, // 0A3E..0A42 6931 UNKNOWN, // 0A43..0A46 6932 GURMUKHI, // 0A47..0A48 6933 UNKNOWN, // 0A49..0A4A 6934 GURMUKHI, // 0A4B..0A4D 6935 UNKNOWN, // 0A4E..0A50 6936 GURMUKHI, // 0A51 6937 UNKNOWN, // 0A52..0A58 6938 GURMUKHI, // 0A59..0A5C 6939 UNKNOWN, // 0A5D 6940 GURMUKHI, // 0A5E 6941 UNKNOWN, // 0A5F..0A65 6942 GURMUKHI, // 0A66..0A76 6943 UNKNOWN, // 0A77..0A80 6944 GUJARATI, // 0A81..0A83 6945 UNKNOWN, // 0A84 6946 GUJARATI, // 0A85..0A8D 6947 UNKNOWN, // 0A8E 6948 GUJARATI, // 0A8F..0A91 6949 UNKNOWN, // 0A92 6950 GUJARATI, // 0A93..0AA8 6951 UNKNOWN, // 0AA9 6952 GUJARATI, // 0AAA..0AB0 6953 UNKNOWN, // 0AB1 6954 GUJARATI, // 0AB2..0AB3 6955 UNKNOWN, // 0AB4 6956 GUJARATI, // 0AB5..0AB9 6957 UNKNOWN, // 0ABA..0ABB 6958 GUJARATI, // 0ABC..0AC5 6959 UNKNOWN, // 0AC6 6960 GUJARATI, // 0AC7..0AC9 6961 UNKNOWN, // 0ACA 6962 GUJARATI, // 0ACB..0ACD 6963 UNKNOWN, // 0ACE..0ACF 6964 GUJARATI, // 0AD0 6965 UNKNOWN, // 0AD1..0ADF 6966 GUJARATI, // 0AE0..0AE3 6967 UNKNOWN, // 0AE4..0AE5 6968 GUJARATI, // 0AE6..0AF1 6969 UNKNOWN, // 0AF2..0AF8 6970 GUJARATI, // 0AF9..0AFF 6971 UNKNOWN, // 0B00 6972 ORIYA, // 0B01..0B03 6973 UNKNOWN, // 0B04 6974 ORIYA, // 0B05..0B0C 6975 UNKNOWN, // 0B0D..0B0E 6976 ORIYA, // 0B0F..0B10 6977 UNKNOWN, // 0B11..0B12 6978 ORIYA, // 0B13..0B28 6979 UNKNOWN, // 0B29 6980 ORIYA, // 0B2A..0B30 6981 UNKNOWN, // 0B31 6982 ORIYA, // 0B32..0B33 6983 UNKNOWN, // 0B34 6984 ORIYA, // 0B35..0B39 6985 UNKNOWN, // 0B3A..0B3B 6986 ORIYA, // 0B3C..0B44 6987 UNKNOWN, // 0B45..0B46 6988 ORIYA, // 0B47..0B48 6989 UNKNOWN, // 0B49..0B4A 6990 ORIYA, // 0B4B..0B4D 6991 UNKNOWN, // 0B4E..0B54 6992 ORIYA, // 0B55..0B57 6993 UNKNOWN, // 0B58..0B5B 6994 ORIYA, // 0B5C..0B5D 6995 UNKNOWN, // 0B5E 6996 ORIYA, // 0B5F..0B63 6997 UNKNOWN, // 0B64..0B65 6998 ORIYA, // 0B66..0B77 6999 UNKNOWN, // 0B78..0B81 7000 TAMIL, // 0B82..0B83 7001 UNKNOWN, // 0B84 7002 TAMIL, // 0B85..0B8A 7003 UNKNOWN, // 0B8B..0B8D 7004 TAMIL, // 0B8E..0B90 7005 UNKNOWN, // 0B91 7006 TAMIL, // 0B92..0B95 7007 UNKNOWN, // 0B96..0B98 7008 TAMIL, // 0B99..0B9A 7009 UNKNOWN, // 0B9B 7010 TAMIL, // 0B9C 7011 UNKNOWN, // 0B9D 7012 TAMIL, // 0B9E..0B9F 7013 UNKNOWN, // 0BA0..0BA2 7014 TAMIL, // 0BA3..0BA4 7015 UNKNOWN, // 0BA5..0BA7 7016 TAMIL, // 0BA8..0BAA 7017 UNKNOWN, // 0BAB..0BAD 7018 TAMIL, // 0BAE..0BB9 7019 UNKNOWN, // 0BBA..0BBD 7020 TAMIL, // 0BBE..0BC2 7021 UNKNOWN, // 0BC3..0BC5 7022 TAMIL, // 0BC6..0BC8 7023 UNKNOWN, // 0BC9 7024 TAMIL, // 0BCA..0BCD 7025 UNKNOWN, // 0BCE..0BCF 7026 TAMIL, // 0BD0 7027 UNKNOWN, // 0BD1..0BD6 7028 TAMIL, // 0BD7 7029 UNKNOWN, // 0BD8..0BE5 7030 TAMIL, // 0BE6..0BFA 7031 UNKNOWN, // 0BFB..0BFF 7032 TELUGU, // 0C00..0C0C 7033 UNKNOWN, // 0C0D 7034 TELUGU, // 0C0E..0C10 7035 UNKNOWN, // 0C11 7036 TELUGU, // 0C12..0C28 7037 UNKNOWN, // 0C29 7038 TELUGU, // 0C2A..0C39 7039 UNKNOWN, // 0C3A..0C3C 7040 TELUGU, // 0C3D..0C44 7041 UNKNOWN, // 0C45 7042 TELUGU, // 0C46..0C48 7043 UNKNOWN, // 0C49 7044 TELUGU, // 0C4A..0C4D 7045 UNKNOWN, // 0C4E..0C54 7046 TELUGU, // 0C55..0C56 7047 UNKNOWN, // 0C57 7048 TELUGU, // 0C58..0C5A 7049 UNKNOWN, // 0C5B..0C5F 7050 TELUGU, // 0C60..0C63 7051 UNKNOWN, // 0C64..0C65 7052 TELUGU, // 0C66..0C6F 7053 UNKNOWN, // 0C70..0C76 7054 TELUGU, // 0C77..0C7F 7055 KANNADA, // 0C80..0C8C 7056 UNKNOWN, // 0C8D 7057 KANNADA, // 0C8E..0C90 7058 UNKNOWN, // 0C91 7059 KANNADA, // 0C92..0CA8 7060 UNKNOWN, // 0CA9 7061 KANNADA, // 0CAA..0CB3 7062 UNKNOWN, // 0CB4 7063 KANNADA, // 0CB5..0CB9 7064 UNKNOWN, // 0CBA..0CBB 7065 KANNADA, // 0CBC..0CC4 7066 UNKNOWN, // 0CC5 7067 KANNADA, // 0CC6..0CC8 7068 UNKNOWN, // 0CC9 7069 KANNADA, // 0CCA..0CCD 7070 UNKNOWN, // 0CCE..0CD4 7071 KANNADA, // 0CD5..0CD6 7072 UNKNOWN, // 0CD7..0CDD 7073 KANNADA, // 0CDE 7074 UNKNOWN, // 0CDF 7075 KANNADA, // 0CE0..0CE3 7076 UNKNOWN, // 0CE4..0CE5 7077 KANNADA, // 0CE6..0CEF 7078 UNKNOWN, // 0CF0 7079 KANNADA, // 0CF1..0CF2 7080 UNKNOWN, // 0CF3..0CFF 7081 MALAYALAM, // 0D00..0D0C 7082 UNKNOWN, // 0D0D 7083 MALAYALAM, // 0D0E..0D10 7084 UNKNOWN, // 0D11 7085 MALAYALAM, // 0D12..0D44 7086 UNKNOWN, // 0D45 7087 MALAYALAM, // 0D46..0D48 7088 UNKNOWN, // 0D49 7089 MALAYALAM, // 0D4A..0D4F 7090 UNKNOWN, // 0D50..0D53 7091 MALAYALAM, // 0D54..0D63 7092 UNKNOWN, // 0D64..0D65 7093 MALAYALAM, // 0D66..0D7F 7094 UNKNOWN, // 0D80 7095 SINHALA, // 0D81..0D83 7096 UNKNOWN, // 0D84 7097 SINHALA, // 0D85..0D96 7098 UNKNOWN, // 0D97..0D99 7099 SINHALA, // 0D9A..0DB1 7100 UNKNOWN, // 0DB2 7101 SINHALA, // 0DB3..0DBB 7102 UNKNOWN, // 0DBC 7103 SINHALA, // 0DBD 7104 UNKNOWN, // 0DBE..0DBF 7105 SINHALA, // 0DC0..0DC6 7106 UNKNOWN, // 0DC7..0DC9 7107 SINHALA, // 0DCA 7108 UNKNOWN, // 0DCB..0DCE 7109 SINHALA, // 0DCF..0DD4 7110 UNKNOWN, // 0DD5 7111 SINHALA, // 0DD6 7112 UNKNOWN, // 0DD7 7113 SINHALA, // 0DD8..0DDF 7114 UNKNOWN, // 0DE0..0DE5 7115 SINHALA, // 0DE6..0DEF 7116 UNKNOWN, // 0DF0..0DF1 7117 SINHALA, // 0DF2..0DF4 7118 UNKNOWN, // 0DF5..0E00 7119 THAI, // 0E01..0E3A 7120 UNKNOWN, // 0E3B..0E3E 7121 COMMON, // 0E3F 7122 THAI, // 0E40..0E5B 7123 UNKNOWN, // 0E5C..0E80 7124 LAO, // 0E81..0E82 7125 UNKNOWN, // 0E83 7126 LAO, // 0E84 7127 UNKNOWN, // 0E85 7128 LAO, // 0E86..0E8A 7129 UNKNOWN, // 0E8B 7130 LAO, // 0E8C..0EA3 7131 UNKNOWN, // 0EA4 7132 LAO, // 0EA5 7133 UNKNOWN, // 0EA6 7134 LAO, // 0EA7..0EBD 7135 UNKNOWN, // 0EBE..0EBF 7136 LAO, // 0EC0..0EC4 7137 UNKNOWN, // 0EC5 7138 LAO, // 0EC6 7139 UNKNOWN, // 0EC7 7140 LAO, // 0EC8..0ECD 7141 UNKNOWN, // 0ECE..0ECF 7142 LAO, // 0ED0..0ED9 7143 UNKNOWN, // 0EDA..0EDB 7144 LAO, // 0EDC..0EDF 7145 UNKNOWN, // 0EE0..0EFF 7146 TIBETAN, // 0F00..0F47 7147 UNKNOWN, // 0F48 7148 TIBETAN, // 0F49..0F6C 7149 UNKNOWN, // 0F6D..0F70 7150 TIBETAN, // 0F71..0F97 7151 UNKNOWN, // 0F98 7152 TIBETAN, // 0F99..0FBC 7153 UNKNOWN, // 0FBD 7154 TIBETAN, // 0FBE..0FCC 7155 UNKNOWN, // 0FCD 7156 TIBETAN, // 0FCE..0FD4 7157 COMMON, // 0FD5..0FD8 7158 TIBETAN, // 0FD9..0FDA 7159 UNKNOWN, // 0FDB..0FFF 7160 MYANMAR, // 1000..109F 7161 GEORGIAN, // 10A0..10C5 7162 UNKNOWN, // 10C6 7163 GEORGIAN, // 10C7 7164 UNKNOWN, // 10C8..10CC 7165 GEORGIAN, // 10CD 7166 UNKNOWN, // 10CE..10CF 7167 GEORGIAN, // 10D0..10FA 7168 COMMON, // 10FB 7169 GEORGIAN, // 10FC..10FF 7170 HANGUL, // 1100..11FF 7171 ETHIOPIC, // 1200..1248 7172 UNKNOWN, // 1249 7173 ETHIOPIC, // 124A..124D 7174 UNKNOWN, // 124E..124F 7175 ETHIOPIC, // 1250..1256 7176 UNKNOWN, // 1257 7177 ETHIOPIC, // 1258 7178 UNKNOWN, // 1259 7179 ETHIOPIC, // 125A..125D 7180 UNKNOWN, // 125E..125F 7181 ETHIOPIC, // 1260..1288 7182 UNKNOWN, // 1289 7183 ETHIOPIC, // 128A..128D 7184 UNKNOWN, // 128E..128F 7185 ETHIOPIC, // 1290..12B0 7186 UNKNOWN, // 12B1 7187 ETHIOPIC, // 12B2..12B5 7188 UNKNOWN, // 12B6..12B7 7189 ETHIOPIC, // 12B8..12BE 7190 UNKNOWN, // 12BF 7191 ETHIOPIC, // 12C0 7192 UNKNOWN, // 12C1 7193 ETHIOPIC, // 12C2..12C5 7194 UNKNOWN, // 12C6..12C7 7195 ETHIOPIC, // 12C8..12D6 7196 UNKNOWN, // 12D7 7197 ETHIOPIC, // 12D8..1310 7198 UNKNOWN, // 1311 7199 ETHIOPIC, // 1312..1315 7200 UNKNOWN, // 1316..1317 7201 ETHIOPIC, // 1318..135A 7202 UNKNOWN, // 135B..135C 7203 ETHIOPIC, // 135D..137C 7204 UNKNOWN, // 137D..137F 7205 ETHIOPIC, // 1380..1399 7206 UNKNOWN, // 139A..139F 7207 CHEROKEE, // 13A0..13F5 7208 UNKNOWN, // 13F6..13F7 7209 CHEROKEE, // 13F8..13FD 7210 UNKNOWN, // 13FE..13FF 7211 CANADIAN_ABORIGINAL, // 1400..167F 7212 OGHAM, // 1680..169C 7213 UNKNOWN, // 169D..169F 7214 RUNIC, // 16A0..16EA 7215 COMMON, // 16EB..16ED 7216 RUNIC, // 16EE..16F8 7217 UNKNOWN, // 16F9..16FF 7218 TAGALOG, // 1700..170C 7219 UNKNOWN, // 170D 7220 TAGALOG, // 170E..1714 7221 UNKNOWN, // 1715..171F 7222 HANUNOO, // 1720..1734 7223 COMMON, // 1735..1736 7224 UNKNOWN, // 1737..173F 7225 BUHID, // 1740..1753 7226 UNKNOWN, // 1754..175F 7227 TAGBANWA, // 1760..176C 7228 UNKNOWN, // 176D 7229 TAGBANWA, // 176E..1770 7230 UNKNOWN, // 1771 7231 TAGBANWA, // 1772..1773 7232 UNKNOWN, // 1774..177F 7233 KHMER, // 1780..17DD 7234 UNKNOWN, // 17DE..17DF 7235 KHMER, // 17E0..17E9 7236 UNKNOWN, // 17EA..17EF 7237 KHMER, // 17F0..17F9 7238 UNKNOWN, // 17FA..17FF 7239 MONGOLIAN, // 1800..1801 7240 COMMON, // 1802..1803 7241 MONGOLIAN, // 1804 7242 COMMON, // 1805 7243 MONGOLIAN, // 1806..180E 7244 UNKNOWN, // 180F 7245 MONGOLIAN, // 1810..1819 7246 UNKNOWN, // 181A..181F 7247 MONGOLIAN, // 1820..1878 7248 UNKNOWN, // 1879..187F 7249 MONGOLIAN, // 1880..18AA 7250 UNKNOWN, // 18AB..18AF 7251 CANADIAN_ABORIGINAL, // 18B0..18F5 7252 UNKNOWN, // 18F6..18FF 7253 LIMBU, // 1900..191E 7254 UNKNOWN, // 191F 7255 LIMBU, // 1920..192B 7256 UNKNOWN, // 192C..192F 7257 LIMBU, // 1930..193B 7258 UNKNOWN, // 193C..193F 7259 LIMBU, // 1940 7260 UNKNOWN, // 1941..1943 7261 LIMBU, // 1944..194F 7262 TAI_LE, // 1950..196D 7263 UNKNOWN, // 196E..196F 7264 TAI_LE, // 1970..1974 7265 UNKNOWN, // 1975..197F 7266 NEW_TAI_LUE, // 1980..19AB 7267 UNKNOWN, // 19AC..19AF 7268 NEW_TAI_LUE, // 19B0..19C9 7269 UNKNOWN, // 19CA..19CF 7270 NEW_TAI_LUE, // 19D0..19DA 7271 UNKNOWN, // 19DB..19DD 7272 NEW_TAI_LUE, // 19DE..19DF 7273 KHMER, // 19E0..19FF 7274 BUGINESE, // 1A00..1A1B 7275 UNKNOWN, // 1A1C..1A1D 7276 BUGINESE, // 1A1E..1A1F 7277 TAI_THAM, // 1A20..1A5E 7278 UNKNOWN, // 1A5F 7279 TAI_THAM, // 1A60..1A7C 7280 UNKNOWN, // 1A7D..1A7E 7281 TAI_THAM, // 1A7F..1A89 7282 UNKNOWN, // 1A8A..1A8F 7283 TAI_THAM, // 1A90..1A99 7284 UNKNOWN, // 1A9A..1A9F 7285 TAI_THAM, // 1AA0..1AAD 7286 UNKNOWN, // 1AAE..1AAF 7287 INHERITED, // 1AB0..1AC0 7288 UNKNOWN, // 1AC1..1AFF 7289 BALINESE, // 1B00..1B4B 7290 UNKNOWN, // 1B4C..1B4F 7291 BALINESE, // 1B50..1B7C 7292 UNKNOWN, // 1B7D..1B7F 7293 SUNDANESE, // 1B80..1BBF 7294 BATAK, // 1BC0..1BF3 7295 UNKNOWN, // 1BF4..1BFB 7296 BATAK, // 1BFC..1BFF 7297 LEPCHA, // 1C00..1C37 7298 UNKNOWN, // 1C38..1C3A 7299 LEPCHA, // 1C3B..1C49 7300 UNKNOWN, // 1C4A..1C4C 7301 LEPCHA, // 1C4D..1C4F 7302 OL_CHIKI, // 1C50..1C7F 7303 CYRILLIC, // 1C80..1C88 7304 UNKNOWN, // 1C89..1C8F 7305 GEORGIAN, // 1C90..1CBA 7306 UNKNOWN, // 1CBB..1CBC 7307 GEORGIAN, // 1CBD..1CBF 7308 SUNDANESE, // 1CC0..1CC7 7309 UNKNOWN, // 1CC8..1CCF 7310 INHERITED, // 1CD0..1CD2 7311 COMMON, // 1CD3 7312 INHERITED, // 1CD4..1CE0 7313 COMMON, // 1CE1 7314 INHERITED, // 1CE2..1CE8 7315 COMMON, // 1CE9..1CEC 7316 INHERITED, // 1CED 7317 COMMON, // 1CEE..1CF3 7318 INHERITED, // 1CF4 7319 COMMON, // 1CF5..1CF7 7320 INHERITED, // 1CF8..1CF9 7321 COMMON, // 1CFA 7322 UNKNOWN, // 1CFB..1CFF 7323 LATIN, // 1D00..1D25 7324 GREEK, // 1D26..1D2A 7325 CYRILLIC, // 1D2B 7326 LATIN, // 1D2C..1D5C 7327 GREEK, // 1D5D..1D61 7328 LATIN, // 1D62..1D65 7329 GREEK, // 1D66..1D6A 7330 LATIN, // 1D6B..1D77 7331 CYRILLIC, // 1D78 7332 LATIN, // 1D79..1DBE 7333 GREEK, // 1DBF 7334 INHERITED, // 1DC0..1DF9 7335 UNKNOWN, // 1DFA 7336 INHERITED, // 1DFB..1DFF 7337 LATIN, // 1E00..1EFF 7338 GREEK, // 1F00..1F15 7339 UNKNOWN, // 1F16..1F17 7340 GREEK, // 1F18..1F1D 7341 UNKNOWN, // 1F1E..1F1F 7342 GREEK, // 1F20..1F45 7343 UNKNOWN, // 1F46..1F47 7344 GREEK, // 1F48..1F4D 7345 UNKNOWN, // 1F4E..1F4F 7346 GREEK, // 1F50..1F57 7347 UNKNOWN, // 1F58 7348 GREEK, // 1F59 7349 UNKNOWN, // 1F5A 7350 GREEK, // 1F5B 7351 UNKNOWN, // 1F5C 7352 GREEK, // 1F5D 7353 UNKNOWN, // 1F5E 7354 GREEK, // 1F5F..1F7D 7355 UNKNOWN, // 1F7E..1F7F 7356 GREEK, // 1F80..1FB4 7357 UNKNOWN, // 1FB5 7358 GREEK, // 1FB6..1FC4 7359 UNKNOWN, // 1FC5 7360 GREEK, // 1FC6..1FD3 7361 UNKNOWN, // 1FD4..1FD5 7362 GREEK, // 1FD6..1FDB 7363 UNKNOWN, // 1FDC 7364 GREEK, // 1FDD..1FEF 7365 UNKNOWN, // 1FF0..1FF1 7366 GREEK, // 1FF2..1FF4 7367 UNKNOWN, // 1FF5 7368 GREEK, // 1FF6..1FFE 7369 UNKNOWN, // 1FFF 7370 COMMON, // 2000..200B 7371 INHERITED, // 200C..200D 7372 COMMON, // 200E..2064 7373 UNKNOWN, // 2065 7374 COMMON, // 2066..2070 7375 LATIN, // 2071 7376 UNKNOWN, // 2072..2073 7377 COMMON, // 2074..207E 7378 LATIN, // 207F 7379 COMMON, // 2080..208E 7380 UNKNOWN, // 208F 7381 LATIN, // 2090..209C 7382 UNKNOWN, // 209D..209F 7383 COMMON, // 20A0..20BF 7384 UNKNOWN, // 20C0..20CF 7385 INHERITED, // 20D0..20F0 7386 UNKNOWN, // 20F1..20FF 7387 COMMON, // 2100..2125 7388 GREEK, // 2126 7389 COMMON, // 2127..2129 7390 LATIN, // 212A..212B 7391 COMMON, // 212C..2131 7392 LATIN, // 2132 7393 COMMON, // 2133..214D 7394 LATIN, // 214E 7395 COMMON, // 214F..215F 7396 LATIN, // 2160..2188 7397 COMMON, // 2189..218B 7398 UNKNOWN, // 218C..218F 7399 COMMON, // 2190..2426 7400 UNKNOWN, // 2427..243F 7401 COMMON, // 2440..244A 7402 UNKNOWN, // 244B..245F 7403 COMMON, // 2460..27FF 7404 BRAILLE, // 2800..28FF 7405 COMMON, // 2900..2B73 7406 UNKNOWN, // 2B74..2B75 7407 COMMON, // 2B76..2B95 7408 UNKNOWN, // 2B96 7409 COMMON, // 2B97..2BFF 7410 GLAGOLITIC, // 2C00..2C2E 7411 UNKNOWN, // 2C2F 7412 GLAGOLITIC, // 2C30..2C5E 7413 UNKNOWN, // 2C5F 7414 LATIN, // 2C60..2C7F 7415 COPTIC, // 2C80..2CF3 7416 UNKNOWN, // 2CF4..2CF8 7417 COPTIC, // 2CF9..2CFF 7418 GEORGIAN, // 2D00..2D25 7419 UNKNOWN, // 2D26 7420 GEORGIAN, // 2D27 7421 UNKNOWN, // 2D28..2D2C 7422 GEORGIAN, // 2D2D 7423 UNKNOWN, // 2D2E..2D2F 7424 TIFINAGH, // 2D30..2D67 7425 UNKNOWN, // 2D68..2D6E 7426 TIFINAGH, // 2D6F..2D70 7427 UNKNOWN, // 2D71..2D7E 7428 TIFINAGH, // 2D7F 7429 ETHIOPIC, // 2D80..2D96 7430 UNKNOWN, // 2D97..2D9F 7431 ETHIOPIC, // 2DA0..2DA6 7432 UNKNOWN, // 2DA7 7433 ETHIOPIC, // 2DA8..2DAE 7434 UNKNOWN, // 2DAF 7435 ETHIOPIC, // 2DB0..2DB6 7436 UNKNOWN, // 2DB7 7437 ETHIOPIC, // 2DB8..2DBE 7438 UNKNOWN, // 2DBF 7439 ETHIOPIC, // 2DC0..2DC6 7440 UNKNOWN, // 2DC7 7441 ETHIOPIC, // 2DC8..2DCE 7442 UNKNOWN, // 2DCF 7443 ETHIOPIC, // 2DD0..2DD6 7444 UNKNOWN, // 2DD7 7445 ETHIOPIC, // 2DD8..2DDE 7446 UNKNOWN, // 2DDF 7447 CYRILLIC, // 2DE0..2DFF 7448 COMMON, // 2E00..2E52 7449 UNKNOWN, // 2E53..2E7F 7450 HAN, // 2E80..2E99 7451 UNKNOWN, // 2E9A 7452 HAN, // 2E9B..2EF3 7453 UNKNOWN, // 2EF4..2EFF 7454 HAN, // 2F00..2FD5 7455 UNKNOWN, // 2FD6..2FEF 7456 COMMON, // 2FF0..2FFB 7457 UNKNOWN, // 2FFC..2FFF 7458 COMMON, // 3000..3004 7459 HAN, // 3005 7460 COMMON, // 3006 7461 HAN, // 3007 7462 COMMON, // 3008..3020 7463 HAN, // 3021..3029 7464 INHERITED, // 302A..302D 7465 HANGUL, // 302E..302F 7466 COMMON, // 3030..3037 7467 HAN, // 3038..303B 7468 COMMON, // 303C..303F 7469 UNKNOWN, // 3040 7470 HIRAGANA, // 3041..3096 7471 UNKNOWN, // 3097..3098 7472 INHERITED, // 3099..309A 7473 COMMON, // 309B..309C 7474 HIRAGANA, // 309D..309F 7475 COMMON, // 30A0 7476 KATAKANA, // 30A1..30FA 7477 COMMON, // 30FB..30FC 7478 KATAKANA, // 30FD..30FF 7479 UNKNOWN, // 3100..3104 7480 BOPOMOFO, // 3105..312F 7481 UNKNOWN, // 3130 7482 HANGUL, // 3131..318E 7483 UNKNOWN, // 318F 7484 COMMON, // 3190..319F 7485 BOPOMOFO, // 31A0..31BF 7486 COMMON, // 31C0..31E3 7487 UNKNOWN, // 31E4..31EF 7488 KATAKANA, // 31F0..31FF 7489 HANGUL, // 3200..321E 7490 UNKNOWN, // 321F 7491 COMMON, // 3220..325F 7492 HANGUL, // 3260..327E 7493 COMMON, // 327F..32CF 7494 KATAKANA, // 32D0..32FE 7495 COMMON, // 32FF 7496 KATAKANA, // 3300..3357 7497 COMMON, // 3358..33FF 7498 HAN, // 3400..4DBF 7499 COMMON, // 4DC0..4DFF 7500 HAN, // 4E00..9FFC 7501 UNKNOWN, // 9FFD..9FFF 7502 YI, // A000..A48C 7503 UNKNOWN, // A48D..A48F 7504 YI, // A490..A4C6 7505 UNKNOWN, // A4C7..A4CF 7506 LISU, // A4D0..A4FF 7507 VAI, // A500..A62B 7508 UNKNOWN, // A62C..A63F 7509 CYRILLIC, // A640..A69F 7510 BAMUM, // A6A0..A6F7 7511 UNKNOWN, // A6F8..A6FF 7512 COMMON, // A700..A721 7513 LATIN, // A722..A787 7514 COMMON, // A788..A78A 7515 LATIN, // A78B..A7BF 7516 UNKNOWN, // A7C0..A7C1 7517 LATIN, // A7C2..A7CA 7518 UNKNOWN, // A7CB..A7F4 7519 LATIN, // A7F5..A7FF 7520 SYLOTI_NAGRI, // A800..A82C 7521 UNKNOWN, // A82D..A82F 7522 COMMON, // A830..A839 7523 UNKNOWN, // A83A..A83F 7524 PHAGS_PA, // A840..A877 7525 UNKNOWN, // A878..A87F 7526 SAURASHTRA, // A880..A8C5 7527 UNKNOWN, // A8C6..A8CD 7528 SAURASHTRA, // A8CE..A8D9 7529 UNKNOWN, // A8DA..A8DF 7530 DEVANAGARI, // A8E0..A8FF 7531 KAYAH_LI, // A900..A92D 7532 COMMON, // A92E 7533 KAYAH_LI, // A92F 7534 REJANG, // A930..A953 7535 UNKNOWN, // A954..A95E 7536 REJANG, // A95F 7537 HANGUL, // A960..A97C 7538 UNKNOWN, // A97D..A97F 7539 JAVANESE, // A980..A9CD 7540 UNKNOWN, // A9CE 7541 COMMON, // A9CF 7542 JAVANESE, // A9D0..A9D9 7543 UNKNOWN, // A9DA..A9DD 7544 JAVANESE, // A9DE..A9DF 7545 MYANMAR, // A9E0..A9FE 7546 UNKNOWN, // A9FF 7547 CHAM, // AA00..AA36 7548 UNKNOWN, // AA37..AA3F 7549 CHAM, // AA40..AA4D 7550 UNKNOWN, // AA4E..AA4F 7551 CHAM, // AA50..AA59 7552 UNKNOWN, // AA5A..AA5B 7553 CHAM, // AA5C..AA5F 7554 MYANMAR, // AA60..AA7F 7555 TAI_VIET, // AA80..AAC2 7556 UNKNOWN, // AAC3..AADA 7557 TAI_VIET, // AADB..AADF 7558 MEETEI_MAYEK, // AAE0..AAF6 7559 UNKNOWN, // AAF7..AB00 7560 ETHIOPIC, // AB01..AB06 7561 UNKNOWN, // AB07..AB08 7562 ETHIOPIC, // AB09..AB0E 7563 UNKNOWN, // AB0F..AB10 7564 ETHIOPIC, // AB11..AB16 7565 UNKNOWN, // AB17..AB1F 7566 ETHIOPIC, // AB20..AB26 7567 UNKNOWN, // AB27 7568 ETHIOPIC, // AB28..AB2E 7569 UNKNOWN, // AB2F 7570 LATIN, // AB30..AB5A 7571 COMMON, // AB5B 7572 LATIN, // AB5C..AB64 7573 GREEK, // AB65 7574 LATIN, // AB66..AB69 7575 COMMON, // AB6A..AB6B 7576 UNKNOWN, // AB6C..AB6F 7577 CHEROKEE, // AB70..ABBF 7578 MEETEI_MAYEK, // ABC0..ABED 7579 UNKNOWN, // ABEE..ABEF 7580 MEETEI_MAYEK, // ABF0..ABF9 7581 UNKNOWN, // ABFA..ABFF 7582 HANGUL, // AC00..D7A3 7583 UNKNOWN, // D7A4..D7AF 7584 HANGUL, // D7B0..D7C6 7585 UNKNOWN, // D7C7..D7CA 7586 HANGUL, // D7CB..D7FB 7587 UNKNOWN, // D7FC..F8FF 7588 HAN, // F900..FA6D 7589 UNKNOWN, // FA6E..FA6F 7590 HAN, // FA70..FAD9 7591 UNKNOWN, // FADA..FAFF 7592 LATIN, // FB00..FB06 7593 UNKNOWN, // FB07..FB12 7594 ARMENIAN, // FB13..FB17 7595 UNKNOWN, // FB18..FB1C 7596 HEBREW, // FB1D..FB36 7597 UNKNOWN, // FB37 7598 HEBREW, // FB38..FB3C 7599 UNKNOWN, // FB3D 7600 HEBREW, // FB3E 7601 UNKNOWN, // FB3F 7602 HEBREW, // FB40..FB41 7603 UNKNOWN, // FB42 7604 HEBREW, // FB43..FB44 7605 UNKNOWN, // FB45 7606 HEBREW, // FB46..FB4F 7607 ARABIC, // FB50..FBC1 7608 UNKNOWN, // FBC2..FBD2 7609 ARABIC, // FBD3..FD3D 7610 COMMON, // FD3E..FD3F 7611 UNKNOWN, // FD40..FD4F 7612 ARABIC, // FD50..FD8F 7613 UNKNOWN, // FD90..FD91 7614 ARABIC, // FD92..FDC7 7615 UNKNOWN, // FDC8..FDEF 7616 ARABIC, // FDF0..FDFD 7617 UNKNOWN, // FDFE..FDFF 7618 INHERITED, // FE00..FE0F 7619 COMMON, // FE10..FE19 7620 UNKNOWN, // FE1A..FE1F 7621 INHERITED, // FE20..FE2D 7622 CYRILLIC, // FE2E..FE2F 7623 COMMON, // FE30..FE52 7624 UNKNOWN, // FE53 7625 COMMON, // FE54..FE66 7626 UNKNOWN, // FE67 7627 COMMON, // FE68..FE6B 7628 UNKNOWN, // FE6C..FE6F 7629 ARABIC, // FE70..FE74 7630 UNKNOWN, // FE75 7631 ARABIC, // FE76..FEFC 7632 UNKNOWN, // FEFD..FEFE 7633 COMMON, // FEFF 7634 UNKNOWN, // FF00 7635 COMMON, // FF01..FF20 7636 LATIN, // FF21..FF3A 7637 COMMON, // FF3B..FF40 7638 LATIN, // FF41..FF5A 7639 COMMON, // FF5B..FF65 7640 KATAKANA, // FF66..FF6F 7641 COMMON, // FF70 7642 KATAKANA, // FF71..FF9D 7643 COMMON, // FF9E..FF9F 7644 HANGUL, // FFA0..FFBE 7645 UNKNOWN, // FFBF..FFC1 7646 HANGUL, // FFC2..FFC7 7647 UNKNOWN, // FFC8..FFC9 7648 HANGUL, // FFCA..FFCF 7649 UNKNOWN, // FFD0..FFD1 7650 HANGUL, // FFD2..FFD7 7651 UNKNOWN, // FFD8..FFD9 7652 HANGUL, // FFDA..FFDC 7653 UNKNOWN, // FFDD..FFDF 7654 COMMON, // FFE0..FFE6 7655 UNKNOWN, // FFE7 7656 COMMON, // FFE8..FFEE 7657 UNKNOWN, // FFEF..FFF8 7658 COMMON, // FFF9..FFFD 7659 UNKNOWN, // FFFE..FFFF 7660 LINEAR_B, // 10000..1000B 7661 UNKNOWN, // 1000C 7662 LINEAR_B, // 1000D..10026 7663 UNKNOWN, // 10027 7664 LINEAR_B, // 10028..1003A 7665 UNKNOWN, // 1003B 7666 LINEAR_B, // 1003C..1003D 7667 UNKNOWN, // 1003E 7668 LINEAR_B, // 1003F..1004D 7669 UNKNOWN, // 1004E..1004F 7670 LINEAR_B, // 10050..1005D 7671 UNKNOWN, // 1005E..1007F 7672 LINEAR_B, // 10080..100FA 7673 UNKNOWN, // 100FB..100FF 7674 COMMON, // 10100..10102 7675 UNKNOWN, // 10103..10106 7676 COMMON, // 10107..10133 7677 UNKNOWN, // 10134..10136 7678 COMMON, // 10137..1013F 7679 GREEK, // 10140..1018E 7680 UNKNOWN, // 1018F 7681 COMMON, // 10190..1019C 7682 UNKNOWN, // 1019D..1019F 7683 GREEK, // 101A0 7684 UNKNOWN, // 101A1..101CF 7685 COMMON, // 101D0..101FC 7686 INHERITED, // 101FD 7687 UNKNOWN, // 101FE..1027F 7688 LYCIAN, // 10280..1029C 7689 UNKNOWN, // 1029D..1029F 7690 CARIAN, // 102A0..102D0 7691 UNKNOWN, // 102D1..102DF 7692 INHERITED, // 102E0 7693 COMMON, // 102E1..102FB 7694 UNKNOWN, // 102FC..102FF 7695 OLD_ITALIC, // 10300..10323 7696 UNKNOWN, // 10324..1032C 7697 OLD_ITALIC, // 1032D..1032F 7698 GOTHIC, // 10330..1034A 7699 UNKNOWN, // 1034B..1034F 7700 OLD_PERMIC, // 10350..1037A 7701 UNKNOWN, // 1037B..1037F 7702 UGARITIC, // 10380..1039D 7703 UNKNOWN, // 1039E 7704 UGARITIC, // 1039F 7705 OLD_PERSIAN, // 103A0..103C3 7706 UNKNOWN, // 103C4..103C7 7707 OLD_PERSIAN, // 103C8..103D5 7708 UNKNOWN, // 103D6..103FF 7709 DESERET, // 10400..1044F 7710 SHAVIAN, // 10450..1047F 7711 OSMANYA, // 10480..1049D 7712 UNKNOWN, // 1049E..1049F 7713 OSMANYA, // 104A0..104A9 7714 UNKNOWN, // 104AA..104AF 7715 OSAGE, // 104B0..104D3 7716 UNKNOWN, // 104D4..104D7 7717 OSAGE, // 104D8..104FB 7718 UNKNOWN, // 104FC..104FF 7719 ELBASAN, // 10500..10527 7720 UNKNOWN, // 10528..1052F 7721 CAUCASIAN_ALBANIAN, // 10530..10563 7722 UNKNOWN, // 10564..1056E 7723 CAUCASIAN_ALBANIAN, // 1056F 7724 UNKNOWN, // 10570..105FF 7725 LINEAR_A, // 10600..10736 7726 UNKNOWN, // 10737..1073F 7727 LINEAR_A, // 10740..10755 7728 UNKNOWN, // 10756..1075F 7729 LINEAR_A, // 10760..10767 7730 UNKNOWN, // 10768..107FF 7731 CYPRIOT, // 10800..10805 7732 UNKNOWN, // 10806..10807 7733 CYPRIOT, // 10808 7734 UNKNOWN, // 10809 7735 CYPRIOT, // 1080A..10835 7736 UNKNOWN, // 10836 7737 CYPRIOT, // 10837..10838 7738 UNKNOWN, // 10839..1083B 7739 CYPRIOT, // 1083C 7740 UNKNOWN, // 1083D..1083E 7741 CYPRIOT, // 1083F 7742 IMPERIAL_ARAMAIC, // 10840..10855 7743 UNKNOWN, // 10856 7744 IMPERIAL_ARAMAIC, // 10857..1085F 7745 PALMYRENE, // 10860..1087F 7746 NABATAEAN, // 10880..1089E 7747 UNKNOWN, // 1089F..108A6 7748 NABATAEAN, // 108A7..108AF 7749 UNKNOWN, // 108B0..108DF 7750 HATRAN, // 108E0..108F2 7751 UNKNOWN, // 108F3 7752 HATRAN, // 108F4..108F5 7753 UNKNOWN, // 108F6..108FA 7754 HATRAN, // 108FB..108FF 7755 PHOENICIAN, // 10900..1091B 7756 UNKNOWN, // 1091C..1091E 7757 PHOENICIAN, // 1091F 7758 LYDIAN, // 10920..10939 7759 UNKNOWN, // 1093A..1093E 7760 LYDIAN, // 1093F 7761 UNKNOWN, // 10940..1097F 7762 MEROITIC_HIEROGLYPHS, // 10980..1099F 7763 MEROITIC_CURSIVE, // 109A0..109B7 7764 UNKNOWN, // 109B8..109BB 7765 MEROITIC_CURSIVE, // 109BC..109CF 7766 UNKNOWN, // 109D0..109D1 7767 MEROITIC_CURSIVE, // 109D2..109FF 7768 KHAROSHTHI, // 10A00..10A03 7769 UNKNOWN, // 10A04 7770 KHAROSHTHI, // 10A05..10A06 7771 UNKNOWN, // 10A07..10A0B 7772 KHAROSHTHI, // 10A0C..10A13 7773 UNKNOWN, // 10A14 7774 KHAROSHTHI, // 10A15..10A17 7775 UNKNOWN, // 10A18 7776 KHAROSHTHI, // 10A19..10A35 7777 UNKNOWN, // 10A36..10A37 7778 KHAROSHTHI, // 10A38..10A3A 7779 UNKNOWN, // 10A3B..10A3E 7780 KHAROSHTHI, // 10A3F..10A48 7781 UNKNOWN, // 10A49..10A4F 7782 KHAROSHTHI, // 10A50..10A58 7783 UNKNOWN, // 10A59..10A5F 7784 OLD_SOUTH_ARABIAN, // 10A60..10A7F 7785 OLD_NORTH_ARABIAN, // 10A80..10A9F 7786 UNKNOWN, // 10AA0..10ABF 7787 MANICHAEAN, // 10AC0..10AE6 7788 UNKNOWN, // 10AE7..10AEA 7789 MANICHAEAN, // 10AEB..10AF6 7790 UNKNOWN, // 10AF7..10AFF 7791 AVESTAN, // 10B00..10B35 7792 UNKNOWN, // 10B36..10B38 7793 AVESTAN, // 10B39..10B3F 7794 INSCRIPTIONAL_PARTHIAN, // 10B40..10B55 7795 UNKNOWN, // 10B56..10B57 7796 INSCRIPTIONAL_PARTHIAN, // 10B58..10B5F 7797 INSCRIPTIONAL_PAHLAVI, // 10B60..10B72 7798 UNKNOWN, // 10B73..10B77 7799 INSCRIPTIONAL_PAHLAVI, // 10B78..10B7F 7800 PSALTER_PAHLAVI, // 10B80..10B91 7801 UNKNOWN, // 10B92..10B98 7802 PSALTER_PAHLAVI, // 10B99..10B9C 7803 UNKNOWN, // 10B9D..10BA8 7804 PSALTER_PAHLAVI, // 10BA9..10BAF 7805 UNKNOWN, // 10BB0..10BFF 7806 OLD_TURKIC, // 10C00..10C48 7807 UNKNOWN, // 10C49..10C7F 7808 OLD_HUNGARIAN, // 10C80..10CB2 7809 UNKNOWN, // 10CB3..10CBF 7810 OLD_HUNGARIAN, // 10CC0..10CF2 7811 UNKNOWN, // 10CF3..10CF9 7812 OLD_HUNGARIAN, // 10CFA..10CFF 7813 HANIFI_ROHINGYA, // 10D00..10D27 7814 UNKNOWN, // 10D28..10D2F 7815 HANIFI_ROHINGYA, // 10D30..10D39 7816 UNKNOWN, // 10D3A..10E5F 7817 ARABIC, // 10E60..10E7E 7818 UNKNOWN, // 10E7F 7819 YEZIDI, // 10E80..10EA9 7820 UNKNOWN, // 10EAA 7821 YEZIDI, // 10EAB..10EAD 7822 UNKNOWN, // 10EAE..10EAF 7823 YEZIDI, // 10EB0..10EB1 7824 UNKNOWN, // 10EB2..10EFF 7825 OLD_SOGDIAN, // 10F00..10F27 7826 UNKNOWN, // 10F28..10F2F 7827 SOGDIAN, // 10F30..10F59 7828 UNKNOWN, // 10F5A..10FAF 7829 CHORASMIAN, // 10FB0..10FCB 7830 UNKNOWN, // 10FCC..10FDF 7831 ELYMAIC, // 10FE0..10FF6 7832 UNKNOWN, // 10FF7..10FFF 7833 BRAHMI, // 11000..1104D 7834 UNKNOWN, // 1104E..11051 7835 BRAHMI, // 11052..1106F 7836 UNKNOWN, // 11070..1107E 7837 BRAHMI, // 1107F 7838 KAITHI, // 11080..110C1 7839 UNKNOWN, // 110C2..110CC 7840 KAITHI, // 110CD 7841 UNKNOWN, // 110CE..110CF 7842 SORA_SOMPENG, // 110D0..110E8 7843 UNKNOWN, // 110E9..110EF 7844 SORA_SOMPENG, // 110F0..110F9 7845 UNKNOWN, // 110FA..110FF 7846 CHAKMA, // 11100..11134 7847 UNKNOWN, // 11135 7848 CHAKMA, // 11136..11147 7849 UNKNOWN, // 11148..1114F 7850 MAHAJANI, // 11150..11176 7851 UNKNOWN, // 11177..1117F 7852 SHARADA, // 11180..111DF 7853 UNKNOWN, // 111E0 7854 SINHALA, // 111E1..111F4 7855 UNKNOWN, // 111F5..111FF 7856 KHOJKI, // 11200..11211 7857 UNKNOWN, // 11212 7858 KHOJKI, // 11213..1123E 7859 UNKNOWN, // 1123F..1127F 7860 MULTANI, // 11280..11286 7861 UNKNOWN, // 11287 7862 MULTANI, // 11288 7863 UNKNOWN, // 11289 7864 MULTANI, // 1128A..1128D 7865 UNKNOWN, // 1128E 7866 MULTANI, // 1128F..1129D 7867 UNKNOWN, // 1129E 7868 MULTANI, // 1129F..112A9 7869 UNKNOWN, // 112AA..112AF 7870 KHUDAWADI, // 112B0..112EA 7871 UNKNOWN, // 112EB..112EF 7872 KHUDAWADI, // 112F0..112F9 7873 UNKNOWN, // 112FA..112FF 7874 GRANTHA, // 11300..11303 7875 UNKNOWN, // 11304 7876 GRANTHA, // 11305..1130C 7877 UNKNOWN, // 1130D..1130E 7878 GRANTHA, // 1130F..11310 7879 UNKNOWN, // 11311..11312 7880 GRANTHA, // 11313..11328 7881 UNKNOWN, // 11329 7882 GRANTHA, // 1132A..11330 7883 UNKNOWN, // 11331 7884 GRANTHA, // 11332..11333 7885 UNKNOWN, // 11334 7886 GRANTHA, // 11335..11339 7887 UNKNOWN, // 1133A 7888 INHERITED, // 1133B 7889 GRANTHA, // 1133C..11344 7890 UNKNOWN, // 11345..11346 7891 GRANTHA, // 11347..11348 7892 UNKNOWN, // 11349..1134A 7893 GRANTHA, // 1134B..1134D 7894 UNKNOWN, // 1134E..1134F 7895 GRANTHA, // 11350 7896 UNKNOWN, // 11351..11356 7897 GRANTHA, // 11357 7898 UNKNOWN, // 11358..1135C 7899 GRANTHA, // 1135D..11363 7900 UNKNOWN, // 11364..11365 7901 GRANTHA, // 11366..1136C 7902 UNKNOWN, // 1136D..1136F 7903 GRANTHA, // 11370..11374 7904 UNKNOWN, // 11375..113FF 7905 NEWA, // 11400..1145B 7906 UNKNOWN, // 1145C 7907 NEWA, // 1145D..11461 7908 UNKNOWN, // 11462..1147F 7909 TIRHUTA, // 11480..114C7 7910 UNKNOWN, // 114C8..114CF 7911 TIRHUTA, // 114D0..114D9 7912 UNKNOWN, // 114DA..1157F 7913 SIDDHAM, // 11580..115B5 7914 UNKNOWN, // 115B6..115B7 7915 SIDDHAM, // 115B8..115DD 7916 UNKNOWN, // 115DE..115FF 7917 MODI, // 11600..11644 7918 UNKNOWN, // 11645..1164F 7919 MODI, // 11650..11659 7920 UNKNOWN, // 1165A..1165F 7921 MONGOLIAN, // 11660..1166C 7922 UNKNOWN, // 1166D..1167F 7923 TAKRI, // 11680..116B8 7924 UNKNOWN, // 116B9..116BF 7925 TAKRI, // 116C0..116C9 7926 UNKNOWN, // 116CA..116FF 7927 AHOM, // 11700..1171A 7928 UNKNOWN, // 1171B..1171C 7929 AHOM, // 1171D..1172B 7930 UNKNOWN, // 1172C..1172F 7931 AHOM, // 11730..1173F 7932 UNKNOWN, // 11740..117FF 7933 DOGRA, // 11800..1183B 7934 UNKNOWN, // 1183C..1189F 7935 WARANG_CITI, // 118A0..118F2 7936 UNKNOWN, // 118F3..118FE 7937 WARANG_CITI, // 118FF 7938 DIVES_AKURU, // 11900..11906 7939 UNKNOWN, // 11907..11908 7940 DIVES_AKURU, // 11909 7941 UNKNOWN, // 1190A..1190B 7942 DIVES_AKURU, // 1190C..11913 7943 UNKNOWN, // 11914 7944 DIVES_AKURU, // 11915..11916 7945 UNKNOWN, // 11917 7946 DIVES_AKURU, // 11918..11935 7947 UNKNOWN, // 11936 7948 DIVES_AKURU, // 11937..11938 7949 UNKNOWN, // 11939..1193A 7950 DIVES_AKURU, // 1193B..11946 7951 UNKNOWN, // 11947..1194F 7952 DIVES_AKURU, // 11950..11959 7953 UNKNOWN, // 1195A..1199F 7954 NANDINAGARI, // 119A0..119A7 7955 UNKNOWN, // 119A8..119A9 7956 NANDINAGARI, // 119AA..119D7 7957 UNKNOWN, // 119D8..119D9 7958 NANDINAGARI, // 119DA..119E4 7959 UNKNOWN, // 119E5..119FF 7960 ZANABAZAR_SQUARE, // 11A00..11A47 7961 UNKNOWN, // 11A48..11A4F 7962 SOYOMBO, // 11A50..11AA2 7963 UNKNOWN, // 11AA3..11ABF 7964 PAU_CIN_HAU, // 11AC0..11AF8 7965 UNKNOWN, // 11AF9..11BFF 7966 BHAIKSUKI, // 11C00..11C08 7967 UNKNOWN, // 11C09 7968 BHAIKSUKI, // 11C0A..11C36 7969 UNKNOWN, // 11C37 7970 BHAIKSUKI, // 11C38..11C45 7971 UNKNOWN, // 11C46..11C4F 7972 BHAIKSUKI, // 11C50..11C6C 7973 UNKNOWN, // 11C6D..11C6F 7974 MARCHEN, // 11C70..11C8F 7975 UNKNOWN, // 11C90..11C91 7976 MARCHEN, // 11C92..11CA7 7977 UNKNOWN, // 11CA8 7978 MARCHEN, // 11CA9..11CB6 7979 UNKNOWN, // 11CB7..11CFF 7980 MASARAM_GONDI, // 11D00..11D06 7981 UNKNOWN, // 11D07 7982 MASARAM_GONDI, // 11D08..11D09 7983 UNKNOWN, // 11D0A 7984 MASARAM_GONDI, // 11D0B..11D36 7985 UNKNOWN, // 11D37..11D39 7986 MASARAM_GONDI, // 11D3A 7987 UNKNOWN, // 11D3B 7988 MASARAM_GONDI, // 11D3C..11D3D 7989 UNKNOWN, // 11D3E 7990 MASARAM_GONDI, // 11D3F..11D47 7991 UNKNOWN, // 11D48..11D4F 7992 MASARAM_GONDI, // 11D50..11D59 7993 UNKNOWN, // 11D5A..11D5F 7994 GUNJALA_GONDI, // 11D60..11D65 7995 UNKNOWN, // 11D66 7996 GUNJALA_GONDI, // 11D67..11D68 7997 UNKNOWN, // 11D69 7998 GUNJALA_GONDI, // 11D6A..11D8E 7999 UNKNOWN, // 11D8F 8000 GUNJALA_GONDI, // 11D90..11D91 8001 UNKNOWN, // 11D92 8002 GUNJALA_GONDI, // 11D93..11D98 8003 UNKNOWN, // 11D99..11D9F 8004 GUNJALA_GONDI, // 11DA0..11DA9 8005 UNKNOWN, // 11DAA..11EDF 8006 MAKASAR, // 11EE0..11EF8 8007 UNKNOWN, // 11EF9..11FAF 8008 LISU, // 11FB0 8009 UNKNOWN, // 11FB1..11FBF 8010 TAMIL, // 11FC0..11FF1 8011 UNKNOWN, // 11FF2..11FFE 8012 TAMIL, // 11FFF 8013 CUNEIFORM, // 12000..12399 8014 UNKNOWN, // 1239A..123FF 8015 CUNEIFORM, // 12400..1246E 8016 UNKNOWN, // 1246F 8017 CUNEIFORM, // 12470..12474 8018 UNKNOWN, // 12475..1247F 8019 CUNEIFORM, // 12480..12543 8020 UNKNOWN, // 12544..12FFF 8021 EGYPTIAN_HIEROGLYPHS, // 13000..1342E 8022 UNKNOWN, // 1342F 8023 EGYPTIAN_HIEROGLYPHS, // 13430..13438 8024 UNKNOWN, // 13439..143FF 8025 ANATOLIAN_HIEROGLYPHS, // 14400..14646 8026 UNKNOWN, // 14647..167FF 8027 BAMUM, // 16800..16A38 8028 UNKNOWN, // 16A39..16A3F 8029 MRO, // 16A40..16A5E 8030 UNKNOWN, // 16A5F 8031 MRO, // 16A60..16A69 8032 UNKNOWN, // 16A6A..16A6D 8033 MRO, // 16A6E..16A6F 8034 UNKNOWN, // 16A70..16ACF 8035 BASSA_VAH, // 16AD0..16AED 8036 UNKNOWN, // 16AEE..16AEF 8037 BASSA_VAH, // 16AF0..16AF5 8038 UNKNOWN, // 16AF6..16AFF 8039 PAHAWH_HMONG, // 16B00..16B45 8040 UNKNOWN, // 16B46..16B4F 8041 PAHAWH_HMONG, // 16B50..16B59 8042 UNKNOWN, // 16B5A 8043 PAHAWH_HMONG, // 16B5B..16B61 8044 UNKNOWN, // 16B62 8045 PAHAWH_HMONG, // 16B63..16B77 8046 UNKNOWN, // 16B78..16B7C 8047 PAHAWH_HMONG, // 16B7D..16B8F 8048 UNKNOWN, // 16B90..16E3F 8049 MEDEFAIDRIN, // 16E40..16E9A 8050 UNKNOWN, // 16E9B..16EFF 8051 MIAO, // 16F00..16F4A 8052 UNKNOWN, // 16F4B..16F4E 8053 MIAO, // 16F4F..16F87 8054 UNKNOWN, // 16F88..16F8E 8055 MIAO, // 16F8F..16F9F 8056 UNKNOWN, // 16FA0..16FDF 8057 TANGUT, // 16FE0 8058 NUSHU, // 16FE1 8059 COMMON, // 16FE2..16FE3 8060 KHITAN_SMALL_SCRIPT, // 16FE4 8061 UNKNOWN, // 16FE5..16FEF 8062 HAN, // 16FF0..16FF1 8063 UNKNOWN, // 16FF2..16FFF 8064 TANGUT, // 17000..187F7 8065 UNKNOWN, // 187F8..187FF 8066 TANGUT, // 18800..18AFF 8067 KHITAN_SMALL_SCRIPT, // 18B00..18CD5 8068 UNKNOWN, // 18CD6..18CFF 8069 TANGUT, // 18D00..18D08 8070 UNKNOWN, // 18D09..1AFFF 8071 KATAKANA, // 1B000 8072 HIRAGANA, // 1B001..1B11E 8073 UNKNOWN, // 1B11F..1B14F 8074 HIRAGANA, // 1B150..1B152 8075 UNKNOWN, // 1B153..1B163 8076 KATAKANA, // 1B164..1B167 8077 UNKNOWN, // 1B168..1B16F 8078 NUSHU, // 1B170..1B2FB 8079 UNKNOWN, // 1B2FC..1BBFF 8080 DUPLOYAN, // 1BC00..1BC6A 8081 UNKNOWN, // 1BC6B..1BC6F 8082 DUPLOYAN, // 1BC70..1BC7C 8083 UNKNOWN, // 1BC7D..1BC7F 8084 DUPLOYAN, // 1BC80..1BC88 8085 UNKNOWN, // 1BC89..1BC8F 8086 DUPLOYAN, // 1BC90..1BC99 8087 UNKNOWN, // 1BC9A..1BC9B 8088 DUPLOYAN, // 1BC9C..1BC9F 8089 COMMON, // 1BCA0..1BCA3 8090 UNKNOWN, // 1BCA4..1CFFF 8091 COMMON, // 1D000..1D0F5 8092 UNKNOWN, // 1D0F6..1D0FF 8093 COMMON, // 1D100..1D126 8094 UNKNOWN, // 1D127..1D128 8095 COMMON, // 1D129..1D166 8096 INHERITED, // 1D167..1D169 8097 COMMON, // 1D16A..1D17A 8098 INHERITED, // 1D17B..1D182 8099 COMMON, // 1D183..1D184 8100 INHERITED, // 1D185..1D18B 8101 COMMON, // 1D18C..1D1A9 8102 INHERITED, // 1D1AA..1D1AD 8103 COMMON, // 1D1AE..1D1E8 8104 UNKNOWN, // 1D1E9..1D1FF 8105 GREEK, // 1D200..1D245 8106 UNKNOWN, // 1D246..1D2DF 8107 COMMON, // 1D2E0..1D2F3 8108 UNKNOWN, // 1D2F4..1D2FF 8109 COMMON, // 1D300..1D356 8110 UNKNOWN, // 1D357..1D35F 8111 COMMON, // 1D360..1D378 8112 UNKNOWN, // 1D379..1D3FF 8113 COMMON, // 1D400..1D454 8114 UNKNOWN, // 1D455 8115 COMMON, // 1D456..1D49C 8116 UNKNOWN, // 1D49D 8117 COMMON, // 1D49E..1D49F 8118 UNKNOWN, // 1D4A0..1D4A1 8119 COMMON, // 1D4A2 8120 UNKNOWN, // 1D4A3..1D4A4 8121 COMMON, // 1D4A5..1D4A6 8122 UNKNOWN, // 1D4A7..1D4A8 8123 COMMON, // 1D4A9..1D4AC 8124 UNKNOWN, // 1D4AD 8125 COMMON, // 1D4AE..1D4B9 8126 UNKNOWN, // 1D4BA 8127 COMMON, // 1D4BB 8128 UNKNOWN, // 1D4BC 8129 COMMON, // 1D4BD..1D4C3 8130 UNKNOWN, // 1D4C4 8131 COMMON, // 1D4C5..1D505 8132 UNKNOWN, // 1D506 8133 COMMON, // 1D507..1D50A 8134 UNKNOWN, // 1D50B..1D50C 8135 COMMON, // 1D50D..1D514 8136 UNKNOWN, // 1D515 8137 COMMON, // 1D516..1D51C 8138 UNKNOWN, // 1D51D 8139 COMMON, // 1D51E..1D539 8140 UNKNOWN, // 1D53A 8141 COMMON, // 1D53B..1D53E 8142 UNKNOWN, // 1D53F 8143 COMMON, // 1D540..1D544 8144 UNKNOWN, // 1D545 8145 COMMON, // 1D546 8146 UNKNOWN, // 1D547..1D549 8147 COMMON, // 1D54A..1D550 8148 UNKNOWN, // 1D551 8149 COMMON, // 1D552..1D6A5 8150 UNKNOWN, // 1D6A6..1D6A7 8151 COMMON, // 1D6A8..1D7CB 8152 UNKNOWN, // 1D7CC..1D7CD 8153 COMMON, // 1D7CE..1D7FF 8154 SIGNWRITING, // 1D800..1DA8B 8155 UNKNOWN, // 1DA8C..1DA9A 8156 SIGNWRITING, // 1DA9B..1DA9F 8157 UNKNOWN, // 1DAA0 8158 SIGNWRITING, // 1DAA1..1DAAF 8159 UNKNOWN, // 1DAB0..1DFFF 8160 GLAGOLITIC, // 1E000..1E006 8161 UNKNOWN, // 1E007 8162 GLAGOLITIC, // 1E008..1E018 8163 UNKNOWN, // 1E019..1E01A 8164 GLAGOLITIC, // 1E01B..1E021 8165 UNKNOWN, // 1E022 8166 GLAGOLITIC, // 1E023..1E024 8167 UNKNOWN, // 1E025 8168 GLAGOLITIC, // 1E026..1E02A 8169 UNKNOWN, // 1E02B..1E0FF 8170 NYIAKENG_PUACHUE_HMONG, // 1E100..1E12C 8171 UNKNOWN, // 1E12D..1E12F 8172 NYIAKENG_PUACHUE_HMONG, // 1E130..1E13D 8173 UNKNOWN, // 1E13E..1E13F 8174 NYIAKENG_PUACHUE_HMONG, // 1E140..1E149 8175 UNKNOWN, // 1E14A..1E14D 8176 NYIAKENG_PUACHUE_HMONG, // 1E14E..1E14F 8177 UNKNOWN, // 1E150..1E2BF 8178 WANCHO, // 1E2C0..1E2F9 8179 UNKNOWN, // 1E2FA..1E2FE 8180 WANCHO, // 1E2FF 8181 UNKNOWN, // 1E300..1E7FF 8182 MENDE_KIKAKUI, // 1E800..1E8C4 8183 UNKNOWN, // 1E8C5..1E8C6 8184 MENDE_KIKAKUI, // 1E8C7..1E8D6 8185 UNKNOWN, // 1E8D7..1E8FF 8186 ADLAM, // 1E900..1E94B 8187 UNKNOWN, // 1E94C..1E94F 8188 ADLAM, // 1E950..1E959 8189 UNKNOWN, // 1E95A..1E95D 8190 ADLAM, // 1E95E..1E95F 8191 UNKNOWN, // 1E960..1EC70 8192 COMMON, // 1EC71..1ECB4 8193 UNKNOWN, // 1ECB5..1ED00 8194 COMMON, // 1ED01..1ED3D 8195 UNKNOWN, // 1ED3E..1EDFF 8196 ARABIC, // 1EE00..1EE03 8197 UNKNOWN, // 1EE04 8198 ARABIC, // 1EE05..1EE1F 8199 UNKNOWN, // 1EE20 8200 ARABIC, // 1EE21..1EE22 8201 UNKNOWN, // 1EE23 8202 ARABIC, // 1EE24 8203 UNKNOWN, // 1EE25..1EE26 8204 ARABIC, // 1EE27 8205 UNKNOWN, // 1EE28 8206 ARABIC, // 1EE29..1EE32 8207 UNKNOWN, // 1EE33 8208 ARABIC, // 1EE34..1EE37 8209 UNKNOWN, // 1EE38 8210 ARABIC, // 1EE39 8211 UNKNOWN, // 1EE3A 8212 ARABIC, // 1EE3B 8213 UNKNOWN, // 1EE3C..1EE41 8214 ARABIC, // 1EE42 8215 UNKNOWN, // 1EE43..1EE46 8216 ARABIC, // 1EE47 8217 UNKNOWN, // 1EE48 8218 ARABIC, // 1EE49 8219 UNKNOWN, // 1EE4A 8220 ARABIC, // 1EE4B 8221 UNKNOWN, // 1EE4C 8222 ARABIC, // 1EE4D..1EE4F 8223 UNKNOWN, // 1EE50 8224 ARABIC, // 1EE51..1EE52 8225 UNKNOWN, // 1EE53 8226 ARABIC, // 1EE54 8227 UNKNOWN, // 1EE55..1EE56 8228 ARABIC, // 1EE57 8229 UNKNOWN, // 1EE58 8230 ARABIC, // 1EE59 8231 UNKNOWN, // 1EE5A 8232 ARABIC, // 1EE5B 8233 UNKNOWN, // 1EE5C 8234 ARABIC, // 1EE5D 8235 UNKNOWN, // 1EE5E 8236 ARABIC, // 1EE5F 8237 UNKNOWN, // 1EE60 8238 ARABIC, // 1EE61..1EE62 8239 UNKNOWN, // 1EE63 8240 ARABIC, // 1EE64 8241 UNKNOWN, // 1EE65..1EE66 8242 ARABIC, // 1EE67..1EE6A 8243 UNKNOWN, // 1EE6B 8244 ARABIC, // 1EE6C..1EE72 8245 UNKNOWN, // 1EE73 8246 ARABIC, // 1EE74..1EE77 8247 UNKNOWN, // 1EE78 8248 ARABIC, // 1EE79..1EE7C 8249 UNKNOWN, // 1EE7D 8250 ARABIC, // 1EE7E 8251 UNKNOWN, // 1EE7F 8252 ARABIC, // 1EE80..1EE89 8253 UNKNOWN, // 1EE8A 8254 ARABIC, // 1EE8B..1EE9B 8255 UNKNOWN, // 1EE9C..1EEA0 8256 ARABIC, // 1EEA1..1EEA3 8257 UNKNOWN, // 1EEA4 8258 ARABIC, // 1EEA5..1EEA9 8259 UNKNOWN, // 1EEAA 8260 ARABIC, // 1EEAB..1EEBB 8261 UNKNOWN, // 1EEBC..1EEEF 8262 ARABIC, // 1EEF0..1EEF1 8263 UNKNOWN, // 1EEF2..1EFFF 8264 COMMON, // 1F000..1F02B 8265 UNKNOWN, // 1F02C..1F02F 8266 COMMON, // 1F030..1F093 8267 UNKNOWN, // 1F094..1F09F 8268 COMMON, // 1F0A0..1F0AE 8269 UNKNOWN, // 1F0AF..1F0B0 8270 COMMON, // 1F0B1..1F0BF 8271 UNKNOWN, // 1F0C0 8272 COMMON, // 1F0C1..1F0CF 8273 UNKNOWN, // 1F0D0 8274 COMMON, // 1F0D1..1F0F5 8275 UNKNOWN, // 1F0F6..1F0FF 8276 COMMON, // 1F100..1F1AD 8277 UNKNOWN, // 1F1AE..1F1E5 8278 COMMON, // 1F1E6..1F1FF 8279 HIRAGANA, // 1F200 8280 COMMON, // 1F201..1F202 8281 UNKNOWN, // 1F203..1F20F 8282 COMMON, // 1F210..1F23B 8283 UNKNOWN, // 1F23C..1F23F 8284 COMMON, // 1F240..1F248 8285 UNKNOWN, // 1F249..1F24F 8286 COMMON, // 1F250..1F251 8287 UNKNOWN, // 1F252..1F25F 8288 COMMON, // 1F260..1F265 8289 UNKNOWN, // 1F266..1F2FF 8290 COMMON, // 1F300..1F6D7 8291 UNKNOWN, // 1F6D8..1F6DF 8292 COMMON, // 1F6E0..1F6EC 8293 UNKNOWN, // 1F6ED..1F6EF 8294 COMMON, // 1F6F0..1F6FC 8295 UNKNOWN, // 1F6FD..1F6FF 8296 COMMON, // 1F700..1F773 8297 UNKNOWN, // 1F774..1F77F 8298 COMMON, // 1F780..1F7D8 8299 UNKNOWN, // 1F7D9..1F7DF 8300 COMMON, // 1F7E0..1F7EB 8301 UNKNOWN, // 1F7EC..1F7FF 8302 COMMON, // 1F800..1F80B 8303 UNKNOWN, // 1F80C..1F80F 8304 COMMON, // 1F810..1F847 8305 UNKNOWN, // 1F848..1F84F 8306 COMMON, // 1F850..1F859 8307 UNKNOWN, // 1F85A..1F85F 8308 COMMON, // 1F860..1F887 8309 UNKNOWN, // 1F888..1F88F 8310 COMMON, // 1F890..1F8AD 8311 UNKNOWN, // 1F8AE..1F8AF 8312 COMMON, // 1F8B0..1F8B1 8313 UNKNOWN, // 1F8B2..1F8FF 8314 COMMON, // 1F900..1F978 8315 UNKNOWN, // 1F979 8316 COMMON, // 1F97A..1F9CB 8317 UNKNOWN, // 1F9CC 8318 COMMON, // 1F9CD..1FA53 8319 UNKNOWN, // 1FA54..1FA5F 8320 COMMON, // 1FA60..1FA6D 8321 UNKNOWN, // 1FA6E..1FA6F 8322 COMMON, // 1FA70..1FA74 8323 UNKNOWN, // 1FA75..1FA77 8324 COMMON, // 1FA78..1FA7A 8325 UNKNOWN, // 1FA7B..1FA7F 8326 COMMON, // 1FA80..1FA86 8327 UNKNOWN, // 1FA87..1FA8F 8328 COMMON, // 1FA90..1FAA8 8329 UNKNOWN, // 1FAA9..1FAAF 8330 COMMON, // 1FAB0..1FAB6 8331 UNKNOWN, // 1FAB7..1FABF 8332 COMMON, // 1FAC0..1FAC2 8333 UNKNOWN, // 1FAC3..1FACF 8334 COMMON, // 1FAD0..1FAD6 8335 UNKNOWN, // 1FAD7..1FAFF 8336 COMMON, // 1FB00..1FB92 8337 UNKNOWN, // 1FB93 8338 COMMON, // 1FB94..1FBCA 8339 UNKNOWN, // 1FBCB..1FBEF 8340 COMMON, // 1FBF0..1FBF9 8341 UNKNOWN, // 1FBFA..1FFFF 8342 HAN, // 20000..2A6DD 8343 UNKNOWN, // 2A6DE..2A6FF 8344 HAN, // 2A700..2B734 8345 UNKNOWN, // 2B735..2B73F 8346 HAN, // 2B740..2B81D 8347 UNKNOWN, // 2B81E..2B81F 8348 HAN, // 2B820..2CEA1 8349 UNKNOWN, // 2CEA2..2CEAF 8350 HAN, // 2CEB0..2EBE0 8351 UNKNOWN, // 2EBE1..2F7FF 8352 HAN, // 2F800..2FA1D 8353 UNKNOWN, // 2FA1E..2FFFF 8354 HAN, // 30000..3134A 8355 UNKNOWN, // 3134B..E0000 8356 COMMON, // E0001 8357 UNKNOWN, // E0002..E001F 8358 COMMON, // E0020..E007F 8359 UNKNOWN, // E0080..E00FF 8360 INHERITED, // E0100..E01EF 8361 UNKNOWN, // E01F0..10FFFF 8362 }; 8363 8364 private static final HashMap<String, Character.UnicodeScript> aliases; 8365 static { 8366 aliases = new HashMap<>((int)(157 / 0.75f + 1.0f)); 8367 aliases.put("ADLM", ADLAM); 8368 aliases.put("AGHB", CAUCASIAN_ALBANIAN); 8369 aliases.put("AHOM", AHOM); 8370 aliases.put("ARAB", ARABIC); 8371 aliases.put("ARMI", IMPERIAL_ARAMAIC); 8372 aliases.put("ARMN", ARMENIAN); 8373 aliases.put("AVST", AVESTAN); 8374 aliases.put("BALI", BALINESE); 8375 aliases.put("BAMU", BAMUM); 8376 aliases.put("BASS", BASSA_VAH); 8377 aliases.put("BATK", BATAK); 8378 aliases.put("BENG", BENGALI); 8379 aliases.put("BHKS", BHAIKSUKI); 8380 aliases.put("BOPO", BOPOMOFO); 8381 aliases.put("BRAH", BRAHMI); 8382 aliases.put("BRAI", BRAILLE); 8383 aliases.put("BUGI", BUGINESE); 8384 aliases.put("BUHD", BUHID); 8385 aliases.put("CAKM", CHAKMA); 8386 aliases.put("CANS", CANADIAN_ABORIGINAL); 8387 aliases.put("CARI", CARIAN); 8388 aliases.put("CHAM", CHAM); 8389 aliases.put("CHER", CHEROKEE); 8390 aliases.put("CHRS", CHORASMIAN); 8391 aliases.put("COPT", COPTIC); 8392 aliases.put("CPRT", CYPRIOT); 8393 aliases.put("CYRL", CYRILLIC); 8394 aliases.put("DEVA", DEVANAGARI); 8395 aliases.put("DIAK", DIVES_AKURU); 8396 aliases.put("DOGR", DOGRA); 8397 aliases.put("DSRT", DESERET); 8398 aliases.put("DUPL", DUPLOYAN); 8399 aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS); 8400 aliases.put("ELBA", ELBASAN); 8401 aliases.put("ELYM", ELYMAIC); 8402 aliases.put("ETHI", ETHIOPIC); 8403 aliases.put("GEOR", GEORGIAN); 8404 aliases.put("GLAG", GLAGOLITIC); 8405 aliases.put("GONM", MASARAM_GONDI); 8406 aliases.put("GOTH", GOTHIC); 8407 aliases.put("GONG", GUNJALA_GONDI); 8408 aliases.put("GRAN", GRANTHA); 8409 aliases.put("GREK", GREEK); 8410 aliases.put("GUJR", GUJARATI); 8411 aliases.put("GURU", GURMUKHI); 8412 aliases.put("HANG", HANGUL); 8413 aliases.put("HANI", HAN); 8414 aliases.put("HANO", HANUNOO); 8415 aliases.put("HATR", HATRAN); 8416 aliases.put("HEBR", HEBREW); 8417 aliases.put("HIRA", HIRAGANA); 8418 aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS); 8419 aliases.put("HMNG", PAHAWH_HMONG); 8420 aliases.put("HMNP", NYIAKENG_PUACHUE_HMONG); 8421 // it appears we don't have the KATAKANA_OR_HIRAGANA 8422 //aliases.put("HRKT", KATAKANA_OR_HIRAGANA); 8423 aliases.put("HUNG", OLD_HUNGARIAN); 8424 aliases.put("ITAL", OLD_ITALIC); 8425 aliases.put("JAVA", JAVANESE); 8426 aliases.put("KALI", KAYAH_LI); 8427 aliases.put("KANA", KATAKANA); 8428 aliases.put("KHAR", KHAROSHTHI); 8429 aliases.put("KHMR", KHMER); 8430 aliases.put("KHOJ", KHOJKI); 8431 aliases.put("KITS", KHITAN_SMALL_SCRIPT); 8432 aliases.put("KNDA", KANNADA); 8433 aliases.put("KTHI", KAITHI); 8434 aliases.put("LANA", TAI_THAM); 8435 aliases.put("LAOO", LAO); 8436 aliases.put("LATN", LATIN); 8437 aliases.put("LEPC", LEPCHA); 8438 aliases.put("LIMB", LIMBU); 8439 aliases.put("LINA", LINEAR_A); 8440 aliases.put("LINB", LINEAR_B); 8441 aliases.put("LISU", LISU); 8442 aliases.put("LYCI", LYCIAN); 8443 aliases.put("LYDI", LYDIAN); 8444 aliases.put("MAHJ", MAHAJANI); 8445 aliases.put("MAKA", MAKASAR); 8446 aliases.put("MARC", MARCHEN); 8447 aliases.put("MAND", MANDAIC); 8448 aliases.put("MANI", MANICHAEAN); 8449 aliases.put("MEDF", MEDEFAIDRIN); 8450 aliases.put("MEND", MENDE_KIKAKUI); 8451 aliases.put("MERC", MEROITIC_CURSIVE); 8452 aliases.put("MERO", MEROITIC_HIEROGLYPHS); 8453 aliases.put("MLYM", MALAYALAM); 8454 aliases.put("MODI", MODI); 8455 aliases.put("MONG", MONGOLIAN); 8456 aliases.put("MROO", MRO); 8457 aliases.put("MTEI", MEETEI_MAYEK); 8458 aliases.put("MULT", MULTANI); 8459 aliases.put("MYMR", MYANMAR); 8460 aliases.put("NAND", NANDINAGARI); 8461 aliases.put("NARB", OLD_NORTH_ARABIAN); 8462 aliases.put("NBAT", NABATAEAN); 8463 aliases.put("NEWA", NEWA); 8464 aliases.put("NKOO", NKO); 8465 aliases.put("NSHU", NUSHU); 8466 aliases.put("OGAM", OGHAM); 8467 aliases.put("OLCK", OL_CHIKI); 8468 aliases.put("ORKH", OLD_TURKIC); 8469 aliases.put("ORYA", ORIYA); 8470 aliases.put("OSGE", OSAGE); 8471 aliases.put("OSMA", OSMANYA); 8472 aliases.put("PALM", PALMYRENE); 8473 aliases.put("PAUC", PAU_CIN_HAU); 8474 aliases.put("PERM", OLD_PERMIC); 8475 aliases.put("PHAG", PHAGS_PA); 8476 aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI); 8477 aliases.put("PHLP", PSALTER_PAHLAVI); 8478 aliases.put("PHNX", PHOENICIAN); 8479 aliases.put("PLRD", MIAO); 8480 aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN); 8481 aliases.put("RJNG", REJANG); 8482 aliases.put("ROHG", HANIFI_ROHINGYA); 8483 aliases.put("RUNR", RUNIC); 8484 aliases.put("SAMR", SAMARITAN); 8485 aliases.put("SARB", OLD_SOUTH_ARABIAN); 8486 aliases.put("SAUR", SAURASHTRA); 8487 aliases.put("SGNW", SIGNWRITING); 8488 aliases.put("SHAW", SHAVIAN); 8489 aliases.put("SHRD", SHARADA); 8490 aliases.put("SIDD", SIDDHAM); 8491 aliases.put("SIND", KHUDAWADI); 8492 aliases.put("SINH", SINHALA); 8493 aliases.put("SOGD", SOGDIAN); 8494 aliases.put("SOGO", OLD_SOGDIAN); 8495 aliases.put("SORA", SORA_SOMPENG); 8496 aliases.put("SOYO", SOYOMBO); 8497 aliases.put("SUND", SUNDANESE); 8498 aliases.put("SYLO", SYLOTI_NAGRI); 8499 aliases.put("SYRC", SYRIAC); 8500 aliases.put("TAGB", TAGBANWA); 8501 aliases.put("TAKR", TAKRI); 8502 aliases.put("TALE", TAI_LE); 8503 aliases.put("TALU", NEW_TAI_LUE); 8504 aliases.put("TAML", TAMIL); 8505 aliases.put("TANG", TANGUT); 8506 aliases.put("TAVT", TAI_VIET); 8507 aliases.put("TELU", TELUGU); 8508 aliases.put("TFNG", TIFINAGH); 8509 aliases.put("TGLG", TAGALOG); 8510 aliases.put("THAA", THAANA); 8511 aliases.put("THAI", THAI); 8512 aliases.put("TIBT", TIBETAN); 8513 aliases.put("TIRH", TIRHUTA); 8514 aliases.put("UGAR", UGARITIC); 8515 aliases.put("VAII", VAI); 8516 aliases.put("WARA", WARANG_CITI); 8517 aliases.put("WCHO", WANCHO); 8518 aliases.put("XPEO", OLD_PERSIAN); 8519 aliases.put("XSUX", CUNEIFORM); 8520 aliases.put("YIII", YI); 8521 aliases.put("YEZI", YEZIDI); 8522 aliases.put("ZANB", ZANABAZAR_SQUARE); 8523 aliases.put("ZINH", INHERITED); 8524 aliases.put("ZYYY", COMMON); 8525 aliases.put("ZZZZ", UNKNOWN); 8526 } 8527 8528 /** 8529 * Returns the enum constant representing the Unicode script of which 8530 * the given character (Unicode code point) is assigned to. 8531 * 8532 * @param codePoint the character (Unicode code point) in question. 8533 * @return The {@code UnicodeScript} constant representing the 8534 * Unicode script of which this character is assigned to. 8535 * 8536 * @throws IllegalArgumentException if the specified 8537 * {@code codePoint} is an invalid Unicode code point. 8538 * @see Character#isValidCodePoint(int) 8539 * 8540 */ of(int codePoint)8541 public static UnicodeScript of(int codePoint) { 8542 if (!isValidCodePoint(codePoint)) 8543 throw new IllegalArgumentException( 8544 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 8545 int type = getType(codePoint); 8546 // leave SURROGATE and PRIVATE_USE for table lookup 8547 if (type == UNASSIGNED) 8548 return UNKNOWN; 8549 int index = Arrays.binarySearch(scriptStarts, codePoint); 8550 if (index < 0) 8551 index = -index - 2; 8552 return scripts[index]; 8553 } 8554 8555 /** 8556 * Returns the UnicodeScript constant with the given Unicode script 8557 * name or the script name alias. Script names and their aliases are 8558 * determined by The Unicode Standard. The files {@code Scripts<version>.txt} 8559 * and {@code PropertyValueAliases<version>.txt} define script names 8560 * and the script name aliases for a particular version of the 8561 * standard. The {@link Character} class specifies the version of 8562 * the standard that it supports. 8563 * <p> 8564 * Character case is ignored for all of the valid script names. 8565 * The en_US locale's case mapping rules are used to provide 8566 * case-insensitive string comparisons for script name validation. 8567 * 8568 * @param scriptName A {@code UnicodeScript} name. 8569 * @return The {@code UnicodeScript} constant identified 8570 * by {@code scriptName} 8571 * @throws IllegalArgumentException if {@code scriptName} is an 8572 * invalid name 8573 * @throws NullPointerException if {@code scriptName} is null 8574 */ forName(String scriptName)8575 public static final UnicodeScript forName(String scriptName) { 8576 scriptName = scriptName.toUpperCase(Locale.ENGLISH); 8577 //.replace(' ', '_')); 8578 UnicodeScript sc = aliases.get(scriptName); 8579 if (sc != null) 8580 return sc; 8581 return valueOf(scriptName); 8582 } 8583 } 8584 8585 /** 8586 * The value of the {@code Character}. 8587 * 8588 * @serial 8589 */ 8590 private final char value; 8591 8592 /** use serialVersionUID from JDK 1.0.2 for interoperability */ 8593 @java.io.Serial 8594 private static final long serialVersionUID = 3786198910865385080L; 8595 8596 /** 8597 * Constructs a newly allocated {@code Character} object that 8598 * represents the specified {@code char} value. 8599 * 8600 * @param value the value to be represented by the 8601 * {@code Character} object. 8602 * 8603 * @deprecated 8604 * It is rarely appropriate to use this constructor. The static factory 8605 * {@link #valueOf(char)} is generally a better choice, as it is 8606 * likely to yield significantly better space and time performance. 8607 */ 8608 // Android-changed: not yet forRemoval on Android. 8609 @Deprecated(since="9"/*, forRemoval = true*/) Character(char value)8610 public Character(char value) { 8611 this.value = value; 8612 } 8613 8614 private static class CharacterCache { CharacterCache()8615 private CharacterCache(){} 8616 8617 static final Character[] cache; 8618 static Character[] archivedCache; 8619 8620 static { 8621 int size = 127 + 1; 8622 8623 // Load and use the archived cache if it exists 8624 // Android-removed: CDS is not used on Android. 8625 // CDS.initializeFromArchive(CharacterCache.class); 8626 if (archivedCache == null || archivedCache.length != size) { 8627 Character[] c = new Character[size]; 8628 for (int i = 0; i < size; i++) { 8629 c[i] = new Character((char) i); 8630 } 8631 archivedCache = c; 8632 } 8633 cache = archivedCache; 8634 } 8635 } 8636 8637 /** 8638 * Returns a {@code Character} instance representing the specified 8639 * {@code char} value. 8640 * If a new {@code Character} instance is not required, this method 8641 * should generally be used in preference to the constructor 8642 * {@link #Character(char)}, as this method is likely to yield 8643 * significantly better space and time performance by caching 8644 * frequently requested values. 8645 * 8646 * This method will always cache values in the range {@code 8647 * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may 8648 * cache other values outside of this range. 8649 * 8650 * @param c a char value. 8651 * @return a {@code Character} instance representing {@code c}. 8652 * @since 1.5 8653 */ 8654 @IntrinsicCandidate valueOf(char c)8655 public static Character valueOf(char c) { 8656 if (c <= 127) { // must cache 8657 return CharacterCache.cache[(int)c]; 8658 } 8659 return new Character(c); 8660 } 8661 8662 /** 8663 * Returns the value of this {@code Character} object. 8664 * @return the primitive {@code char} value represented by 8665 * this object. 8666 */ 8667 @IntrinsicCandidate charValue()8668 public char charValue() { 8669 return value; 8670 } 8671 8672 /** 8673 * Returns a hash code for this {@code Character}; equal to the result 8674 * of invoking {@code charValue()}. 8675 * 8676 * @return a hash code value for this {@code Character} 8677 */ 8678 @Override hashCode()8679 public int hashCode() { 8680 return Character.hashCode(value); 8681 } 8682 8683 /** 8684 * Returns a hash code for a {@code char} value; compatible with 8685 * {@code Character.hashCode()}. 8686 * 8687 * @since 1.8 8688 * 8689 * @param value The {@code char} for which to return a hash code. 8690 * @return a hash code value for a {@code char} value. 8691 */ hashCode(char value)8692 public static int hashCode(char value) { 8693 return (int)value; 8694 } 8695 8696 /** 8697 * Compares this object against the specified object. 8698 * The result is {@code true} if and only if the argument is not 8699 * {@code null} and is a {@code Character} object that 8700 * represents the same {@code char} value as this object. 8701 * 8702 * @param obj the object to compare with. 8703 * @return {@code true} if the objects are the same; 8704 * {@code false} otherwise. 8705 */ equals(Object obj)8706 public boolean equals(Object obj) { 8707 if (obj instanceof Character) { 8708 return value == ((Character)obj).charValue(); 8709 } 8710 return false; 8711 } 8712 8713 /** 8714 * Returns a {@code String} object representing this 8715 * {@code Character}'s value. The result is a string of 8716 * length 1 whose sole component is the primitive 8717 * {@code char} value represented by this 8718 * {@code Character} object. 8719 * 8720 * @return a string representation of this object. 8721 */ toString()8722 public String toString() { 8723 return String.valueOf(value); 8724 } 8725 8726 // Android-removed: reference to Character.toString(int) in javadoc. 8727 /** 8728 * Returns a {@code String} object representing the 8729 * specified {@code char}. The result is a string of length 8730 * 1 consisting solely of the specified {@code char}. 8731 * 8732 * @param c the {@code char} to be converted 8733 * @return the string representation of the specified {@code char} 8734 * @since 1.4 8735 */ toString(char c)8736 public static String toString(char c) { 8737 return String.valueOf(c); 8738 } 8739 8740 // BEGIN Android-removed: expose after String.valueOfCodePoint() is imported. 8741 /** 8742 * Returns a {@code String} object representing the 8743 * specified character (Unicode code point). The result is a string of 8744 * length 1 or 2, consisting solely of the specified {@code codePoint}. 8745 * 8746 * @param codePoint the {@code codePoint} to be converted 8747 * @return the string representation of the specified {@code codePoint} 8748 * @throws IllegalArgumentException if the specified 8749 * {@code codePoint} is not a {@linkplain #isValidCodePoint 8750 * valid Unicode code point}. 8751 * @since 11 8752 * 8753 public static String toString(int codePoint) { 8754 return String.valueOfCodePoint(codePoint); 8755 } 8756 */ 8757 // END Android-removed: expose after String.valueOfCodePoint() is imported. 8758 8759 /** 8760 * Determines whether the specified code point is a valid 8761 * <a href="http://www.unicode.org/glossary/#code_point"> 8762 * Unicode code point value</a>. 8763 * 8764 * @param codePoint the Unicode code point to be tested 8765 * @return {@code true} if the specified code point value is between 8766 * {@link #MIN_CODE_POINT} and 8767 * {@link #MAX_CODE_POINT} inclusive; 8768 * {@code false} otherwise. 8769 * @since 1.5 8770 */ isValidCodePoint(int codePoint)8771 public static boolean isValidCodePoint(int codePoint) { 8772 // Optimized form of: 8773 // codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT 8774 int plane = codePoint >>> 16; 8775 return plane < ((MAX_CODE_POINT + 1) >>> 16); 8776 } 8777 8778 /** 8779 * Determines whether the specified character (Unicode code point) 8780 * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>. 8781 * Such code points can be represented using a single {@code char}. 8782 * 8783 * @param codePoint the character (Unicode code point) to be to 8784 * @return {@code true} if the specified code point is between 8785 * {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive; 8786 * {@code false} otherwise. 8787 * @since 1.7 8788 */ isBmpCodePoint(int codePoint)8789 public static boolean isBmpCodePoint(int codePoint) { 8790 return codePoint >>> 16 == 0; 8791 // Optimized form of: 8792 // codePoint >= MIN_VALUE && codePoint <= MAX_VALUE 8793 // We consistently use logical shift (>>>) to facilitate 8794 // additional runtime optimizations. 8795 } 8796 8797 /** 8798 * Determines whether the specified character (Unicode code point) 8799 * is in the <a href="#supplementary">supplementary character</a> range. 8800 * 8801 * @param codePoint the character (Unicode code point) to be tested 8802 * @return {@code true} if the specified code point is between 8803 * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and 8804 * {@link #MAX_CODE_POINT} inclusive; 8805 * {@code false} otherwise. 8806 * @since 1.5 8807 */ isSupplementaryCodePoint(int codePoint)8808 public static boolean isSupplementaryCodePoint(int codePoint) { 8809 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT 8810 && codePoint < MAX_CODE_POINT + 1; 8811 } 8812 8813 /** 8814 * Determines if the given {@code char} value is a 8815 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 8816 * Unicode high-surrogate code unit</a> 8817 * (also known as <i>leading-surrogate code unit</i>). 8818 * 8819 * <p>Such values do not represent characters by themselves, 8820 * but are used in the representation of 8821 * <a href="#supplementary">supplementary characters</a> 8822 * in the UTF-16 encoding. 8823 * 8824 * @param ch the {@code char} value to be tested. 8825 * @return {@code true} if the {@code char} value is between 8826 * {@link #MIN_HIGH_SURROGATE} and 8827 * {@link #MAX_HIGH_SURROGATE} inclusive; 8828 * {@code false} otherwise. 8829 * @see Character#isLowSurrogate(char) 8830 * @see Character.UnicodeBlock#of(int) 8831 * @since 1.5 8832 */ isHighSurrogate(char ch)8833 public static boolean isHighSurrogate(char ch) { 8834 // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE 8835 return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1); 8836 } 8837 8838 /** 8839 * Determines if the given {@code char} value is a 8840 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 8841 * Unicode low-surrogate code unit</a> 8842 * (also known as <i>trailing-surrogate code unit</i>). 8843 * 8844 * <p>Such values do not represent characters by themselves, 8845 * but are used in the representation of 8846 * <a href="#supplementary">supplementary characters</a> 8847 * in the UTF-16 encoding. 8848 * 8849 * @param ch the {@code char} value to be tested. 8850 * @return {@code true} if the {@code char} value is between 8851 * {@link #MIN_LOW_SURROGATE} and 8852 * {@link #MAX_LOW_SURROGATE} inclusive; 8853 * {@code false} otherwise. 8854 * @see Character#isHighSurrogate(char) 8855 * @since 1.5 8856 */ isLowSurrogate(char ch)8857 public static boolean isLowSurrogate(char ch) { 8858 return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1); 8859 } 8860 8861 /** 8862 * Determines if the given {@code char} value is a Unicode 8863 * <i>surrogate code unit</i>. 8864 * 8865 * <p>Such values do not represent characters by themselves, 8866 * but are used in the representation of 8867 * <a href="#supplementary">supplementary characters</a> 8868 * in the UTF-16 encoding. 8869 * 8870 * <p>A char value is a surrogate code unit if and only if it is either 8871 * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or 8872 * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}. 8873 * 8874 * @param ch the {@code char} value to be tested. 8875 * @return {@code true} if the {@code char} value is between 8876 * {@link #MIN_SURROGATE} and 8877 * {@link #MAX_SURROGATE} inclusive; 8878 * {@code false} otherwise. 8879 * @since 1.7 8880 */ isSurrogate(char ch)8881 public static boolean isSurrogate(char ch) { 8882 return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1); 8883 } 8884 8885 /** 8886 * Determines whether the specified pair of {@code char} 8887 * values is a valid 8888 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 8889 * Unicode surrogate pair</a>. 8890 * 8891 * <p>This method is equivalent to the expression: 8892 * <blockquote><pre>{@code 8893 * isHighSurrogate(high) && isLowSurrogate(low) 8894 * }</pre></blockquote> 8895 * 8896 * @param high the high-surrogate code value to be tested 8897 * @param low the low-surrogate code value to be tested 8898 * @return {@code true} if the specified high and 8899 * low-surrogate code values represent a valid surrogate pair; 8900 * {@code false} otherwise. 8901 * @since 1.5 8902 */ isSurrogatePair(char high, char low)8903 public static boolean isSurrogatePair(char high, char low) { 8904 return isHighSurrogate(high) && isLowSurrogate(low); 8905 } 8906 8907 /** 8908 * Determines the number of {@code char} values needed to 8909 * represent the specified character (Unicode code point). If the 8910 * specified character is equal to or greater than 0x10000, then 8911 * the method returns 2. Otherwise, the method returns 1. 8912 * 8913 * <p>This method doesn't validate the specified character to be a 8914 * valid Unicode code point. The caller must validate the 8915 * character value using {@link #isValidCodePoint(int) isValidCodePoint} 8916 * if necessary. 8917 * 8918 * @param codePoint the character (Unicode code point) to be tested. 8919 * @return 2 if the character is a valid supplementary character; 1 otherwise. 8920 * @see Character#isSupplementaryCodePoint(int) 8921 * @since 1.5 8922 */ charCount(int codePoint)8923 public static int charCount(int codePoint) { 8924 return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1; 8925 } 8926 8927 /** 8928 * Converts the specified surrogate pair to its supplementary code 8929 * point value. This method does not validate the specified 8930 * surrogate pair. The caller must validate it using {@link 8931 * #isSurrogatePair(char, char) isSurrogatePair} if necessary. 8932 * 8933 * @param high the high-surrogate code unit 8934 * @param low the low-surrogate code unit 8935 * @return the supplementary code point composed from the 8936 * specified surrogate pair. 8937 * @since 1.5 8938 */ toCodePoint(char high, char low)8939 public static int toCodePoint(char high, char low) { 8940 // Optimized form of: 8941 // return ((high - MIN_HIGH_SURROGATE) << 10) 8942 // + (low - MIN_LOW_SURROGATE) 8943 // + MIN_SUPPLEMENTARY_CODE_POINT; 8944 return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT 8945 - (MIN_HIGH_SURROGATE << 10) 8946 - MIN_LOW_SURROGATE); 8947 } 8948 8949 /** 8950 * Returns the code point at the given index of the 8951 * {@code CharSequence}. If the {@code char} value at 8952 * the given index in the {@code CharSequence} is in the 8953 * high-surrogate range, the following index is less than the 8954 * length of the {@code CharSequence}, and the 8955 * {@code char} value at the following index is in the 8956 * low-surrogate range, then the supplementary code point 8957 * corresponding to this surrogate pair is returned. Otherwise, 8958 * the {@code char} value at the given index is returned. 8959 * 8960 * @param seq a sequence of {@code char} values (Unicode code 8961 * units) 8962 * @param index the index to the {@code char} values (Unicode 8963 * code units) in {@code seq} to be converted 8964 * @return the Unicode code point at the given index 8965 * @throws NullPointerException if {@code seq} is null. 8966 * @throws IndexOutOfBoundsException if the value 8967 * {@code index} is negative or not less than 8968 * {@link CharSequence#length() seq.length()}. 8969 * @since 1.5 8970 */ codePointAt(CharSequence seq, int index)8971 public static int codePointAt(CharSequence seq, int index) { 8972 char c1 = seq.charAt(index); 8973 if (isHighSurrogate(c1) && ++index < seq.length()) { 8974 char c2 = seq.charAt(index); 8975 if (isLowSurrogate(c2)) { 8976 return toCodePoint(c1, c2); 8977 } 8978 } 8979 return c1; 8980 } 8981 8982 /** 8983 * Returns the code point at the given index of the 8984 * {@code char} array. If the {@code char} value at 8985 * the given index in the {@code char} array is in the 8986 * high-surrogate range, the following index is less than the 8987 * length of the {@code char} array, and the 8988 * {@code char} value at the following index is in the 8989 * low-surrogate range, then the supplementary code point 8990 * corresponding to this surrogate pair is returned. Otherwise, 8991 * the {@code char} value at the given index is returned. 8992 * 8993 * @param a the {@code char} array 8994 * @param index the index to the {@code char} values (Unicode 8995 * code units) in the {@code char} array to be converted 8996 * @return the Unicode code point at the given index 8997 * @throws NullPointerException if {@code a} is null. 8998 * @throws IndexOutOfBoundsException if the value 8999 * {@code index} is negative or not less than 9000 * the length of the {@code char} array. 9001 * @since 1.5 9002 */ codePointAt(char[] a, int index)9003 public static int codePointAt(char[] a, int index) { 9004 return codePointAtImpl(a, index, a.length); 9005 } 9006 9007 /** 9008 * Returns the code point at the given index of the 9009 * {@code char} array, where only array elements with 9010 * {@code index} less than {@code limit} can be used. If 9011 * the {@code char} value at the given index in the 9012 * {@code char} array is in the high-surrogate range, the 9013 * following index is less than the {@code limit}, and the 9014 * {@code char} value at the following index is in the 9015 * low-surrogate range, then the supplementary code point 9016 * corresponding to this surrogate pair is returned. Otherwise, 9017 * the {@code char} value at the given index is returned. 9018 * 9019 * @param a the {@code char} array 9020 * @param index the index to the {@code char} values (Unicode 9021 * code units) in the {@code char} array to be converted 9022 * @param limit the index after the last array element that 9023 * can be used in the {@code char} array 9024 * @return the Unicode code point at the given index 9025 * @throws NullPointerException if {@code a} is null. 9026 * @throws IndexOutOfBoundsException if the {@code index} 9027 * argument is negative or not less than the {@code limit} 9028 * argument, or if the {@code limit} argument is negative or 9029 * greater than the length of the {@code char} array. 9030 * @since 1.5 9031 */ codePointAt(char[] a, int index, int limit)9032 public static int codePointAt(char[] a, int index, int limit) { 9033 if (index >= limit || limit < 0 || limit > a.length) { 9034 throw new IndexOutOfBoundsException(); 9035 } 9036 return codePointAtImpl(a, index, limit); 9037 } 9038 9039 // throws ArrayIndexOutOfBoundsException if index out of bounds codePointAtImpl(char[] a, int index, int limit)9040 static int codePointAtImpl(char[] a, int index, int limit) { 9041 char c1 = a[index]; 9042 if (isHighSurrogate(c1) && ++index < limit) { 9043 char c2 = a[index]; 9044 if (isLowSurrogate(c2)) { 9045 return toCodePoint(c1, c2); 9046 } 9047 } 9048 return c1; 9049 } 9050 9051 /** 9052 * Returns the code point preceding the given index of the 9053 * {@code CharSequence}. If the {@code char} value at 9054 * {@code (index - 1)} in the {@code CharSequence} is in 9055 * the low-surrogate range, {@code (index - 2)} is not 9056 * negative, and the {@code char} value at {@code (index - 2)} 9057 * in the {@code CharSequence} is in the 9058 * high-surrogate range, then the supplementary code point 9059 * corresponding to this surrogate pair is returned. Otherwise, 9060 * the {@code char} value at {@code (index - 1)} is 9061 * returned. 9062 * 9063 * @param seq the {@code CharSequence} instance 9064 * @param index the index following the code point that should be returned 9065 * @return the Unicode code point value before the given index. 9066 * @throws NullPointerException if {@code seq} is null. 9067 * @throws IndexOutOfBoundsException if the {@code index} 9068 * argument is less than 1 or greater than {@link 9069 * CharSequence#length() seq.length()}. 9070 * @since 1.5 9071 */ codePointBefore(CharSequence seq, int index)9072 public static int codePointBefore(CharSequence seq, int index) { 9073 char c2 = seq.charAt(--index); 9074 if (isLowSurrogate(c2) && index > 0) { 9075 char c1 = seq.charAt(--index); 9076 if (isHighSurrogate(c1)) { 9077 return toCodePoint(c1, c2); 9078 } 9079 } 9080 return c2; 9081 } 9082 9083 /** 9084 * Returns the code point preceding the given index of the 9085 * {@code char} array. If the {@code char} value at 9086 * {@code (index - 1)} in the {@code char} array is in 9087 * the low-surrogate range, {@code (index - 2)} is not 9088 * negative, and the {@code char} value at {@code (index - 2)} 9089 * in the {@code char} array is in the 9090 * high-surrogate range, then the supplementary code point 9091 * corresponding to this surrogate pair is returned. Otherwise, 9092 * the {@code char} value at {@code (index - 1)} is 9093 * returned. 9094 * 9095 * @param a the {@code char} array 9096 * @param index the index following the code point that should be returned 9097 * @return the Unicode code point value before the given index. 9098 * @throws NullPointerException if {@code a} is null. 9099 * @throws IndexOutOfBoundsException if the {@code index} 9100 * argument is less than 1 or greater than the length of the 9101 * {@code char} array 9102 * @since 1.5 9103 */ codePointBefore(char[] a, int index)9104 public static int codePointBefore(char[] a, int index) { 9105 return codePointBeforeImpl(a, index, 0); 9106 } 9107 9108 /** 9109 * Returns the code point preceding the given index of the 9110 * {@code char} array, where only array elements with 9111 * {@code index} greater than or equal to {@code start} 9112 * can be used. If the {@code char} value at {@code (index - 1)} 9113 * in the {@code char} array is in the 9114 * low-surrogate range, {@code (index - 2)} is not less than 9115 * {@code start}, and the {@code char} value at 9116 * {@code (index - 2)} in the {@code char} array is in 9117 * the high-surrogate range, then the supplementary code point 9118 * corresponding to this surrogate pair is returned. Otherwise, 9119 * the {@code char} value at {@code (index - 1)} is 9120 * returned. 9121 * 9122 * @param a the {@code char} array 9123 * @param index the index following the code point that should be returned 9124 * @param start the index of the first array element in the 9125 * {@code char} array 9126 * @return the Unicode code point value before the given index. 9127 * @throws NullPointerException if {@code a} is null. 9128 * @throws IndexOutOfBoundsException if the {@code index} 9129 * argument is not greater than the {@code start} argument or 9130 * is greater than the length of the {@code char} array, or 9131 * if the {@code start} argument is negative or not less than 9132 * the length of the {@code char} array. 9133 * @since 1.5 9134 */ codePointBefore(char[] a, int index, int start)9135 public static int codePointBefore(char[] a, int index, int start) { 9136 if (index <= start || start < 0 || start >= a.length) { 9137 throw new IndexOutOfBoundsException(); 9138 } 9139 return codePointBeforeImpl(a, index, start); 9140 } 9141 9142 // throws ArrayIndexOutOfBoundsException if index-1 out of bounds codePointBeforeImpl(char[] a, int index, int start)9143 static int codePointBeforeImpl(char[] a, int index, int start) { 9144 char c2 = a[--index]; 9145 if (isLowSurrogate(c2) && index > start) { 9146 char c1 = a[--index]; 9147 if (isHighSurrogate(c1)) { 9148 return toCodePoint(c1, c2); 9149 } 9150 } 9151 return c2; 9152 } 9153 9154 /** 9155 * Returns the leading surrogate (a 9156 * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit"> 9157 * high surrogate code unit</a>) of the 9158 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9159 * surrogate pair</a> 9160 * representing the specified supplementary character (Unicode 9161 * code point) in the UTF-16 encoding. If the specified character 9162 * is not a 9163 * <a href="Character.html#supplementary">supplementary character</a>, 9164 * an unspecified {@code char} is returned. 9165 * 9166 * <p>If 9167 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 9168 * is {@code true}, then 9169 * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and 9170 * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x} 9171 * are also always {@code true}. 9172 * 9173 * @param codePoint a supplementary character (Unicode code point) 9174 * @return the leading surrogate code unit used to represent the 9175 * character in the UTF-16 encoding 9176 * @since 1.7 9177 */ highSurrogate(int codePoint)9178 public static char highSurrogate(int codePoint) { 9179 return (char) ((codePoint >>> 10) 9180 + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); 9181 } 9182 9183 /** 9184 * Returns the trailing surrogate (a 9185 * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit"> 9186 * low surrogate code unit</a>) of the 9187 * <a href="http://www.unicode.org/glossary/#surrogate_pair"> 9188 * surrogate pair</a> 9189 * representing the specified supplementary character (Unicode 9190 * code point) in the UTF-16 encoding. If the specified character 9191 * is not a 9192 * <a href="Character.html#supplementary">supplementary character</a>, 9193 * an unspecified {@code char} is returned. 9194 * 9195 * <p>If 9196 * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)} 9197 * is {@code true}, then 9198 * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and 9199 * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x} 9200 * are also always {@code true}. 9201 * 9202 * @param codePoint a supplementary character (Unicode code point) 9203 * @return the trailing surrogate code unit used to represent the 9204 * character in the UTF-16 encoding 9205 * @since 1.7 9206 */ lowSurrogate(int codePoint)9207 public static char lowSurrogate(int codePoint) { 9208 return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE); 9209 } 9210 9211 /** 9212 * Converts the specified character (Unicode code point) to its 9213 * UTF-16 representation. If the specified code point is a BMP 9214 * (Basic Multilingual Plane or Plane 0) value, the same value is 9215 * stored in {@code dst[dstIndex]}, and 1 is returned. If the 9216 * specified code point is a supplementary character, its 9217 * surrogate values are stored in {@code dst[dstIndex]} 9218 * (high-surrogate) and {@code dst[dstIndex+1]} 9219 * (low-surrogate), and 2 is returned. 9220 * 9221 * @param codePoint the character (Unicode code point) to be converted. 9222 * @param dst an array of {@code char} in which the 9223 * {@code codePoint}'s UTF-16 value is stored. 9224 * @param dstIndex the start index into the {@code dst} 9225 * array where the converted value is stored. 9226 * @return 1 if the code point is a BMP code point, 2 if the 9227 * code point is a supplementary code point. 9228 * @throws IllegalArgumentException if the specified 9229 * {@code codePoint} is not a valid Unicode code point. 9230 * @throws NullPointerException if the specified {@code dst} is null. 9231 * @throws IndexOutOfBoundsException if {@code dstIndex} 9232 * is negative or not less than {@code dst.length}, or if 9233 * {@code dst} at {@code dstIndex} doesn't have enough 9234 * array element(s) to store the resulting {@code char} 9235 * value(s). (If {@code dstIndex} is equal to 9236 * {@code dst.length-1} and the specified 9237 * {@code codePoint} is a supplementary character, the 9238 * high-surrogate value is not stored in 9239 * {@code dst[dstIndex]}.) 9240 * @since 1.5 9241 */ toChars(int codePoint, char[] dst, int dstIndex)9242 public static int toChars(int codePoint, char[] dst, int dstIndex) { 9243 if (isBmpCodePoint(codePoint)) { 9244 dst[dstIndex] = (char) codePoint; 9245 return 1; 9246 } else if (isValidCodePoint(codePoint)) { 9247 toSurrogates(codePoint, dst, dstIndex); 9248 return 2; 9249 } else { 9250 throw new IllegalArgumentException( 9251 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9252 } 9253 } 9254 9255 /** 9256 * Converts the specified character (Unicode code point) to its 9257 * UTF-16 representation stored in a {@code char} array. If 9258 * the specified code point is a BMP (Basic Multilingual Plane or 9259 * Plane 0) value, the resulting {@code char} array has 9260 * the same value as {@code codePoint}. If the specified code 9261 * point is a supplementary code point, the resulting 9262 * {@code char} array has the corresponding surrogate pair. 9263 * 9264 * @param codePoint a Unicode code point 9265 * @return a {@code char} array having 9266 * {@code codePoint}'s UTF-16 representation. 9267 * @throws IllegalArgumentException if the specified 9268 * {@code codePoint} is not a valid Unicode code point. 9269 * @since 1.5 9270 */ toChars(int codePoint)9271 public static char[] toChars(int codePoint) { 9272 if (isBmpCodePoint(codePoint)) { 9273 return new char[] { (char) codePoint }; 9274 } else if (isValidCodePoint(codePoint)) { 9275 char[] result = new char[2]; 9276 toSurrogates(codePoint, result, 0); 9277 return result; 9278 } else { 9279 throw new IllegalArgumentException( 9280 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 9281 } 9282 } 9283 toSurrogates(int codePoint, char[] dst, int index)9284 static void toSurrogates(int codePoint, char[] dst, int index) { 9285 // We write elements "backwards" to guarantee all-or-nothing 9286 dst[index+1] = lowSurrogate(codePoint); 9287 dst[index] = highSurrogate(codePoint); 9288 } 9289 9290 /** 9291 * Returns the number of Unicode code points in the text range of 9292 * the specified char sequence. The text range begins at the 9293 * specified {@code beginIndex} and extends to the 9294 * {@code char} at index {@code endIndex - 1}. Thus the 9295 * length (in {@code char}s) of the text range is 9296 * {@code endIndex-beginIndex}. Unpaired surrogates within 9297 * the text range count as one code point each. 9298 * 9299 * @param seq the char sequence 9300 * @param beginIndex the index to the first {@code char} of 9301 * the text range. 9302 * @param endIndex the index after the last {@code char} of 9303 * the text range. 9304 * @return the number of Unicode code points in the specified text 9305 * range 9306 * @throws NullPointerException if {@code seq} is null. 9307 * @throws IndexOutOfBoundsException if the 9308 * {@code beginIndex} is negative, or {@code endIndex} 9309 * is larger than the length of the given sequence, or 9310 * {@code beginIndex} is larger than {@code endIndex}. 9311 * @since 1.5 9312 */ codePointCount(CharSequence seq, int beginIndex, int endIndex)9313 public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { 9314 int length = seq.length(); 9315 if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) { 9316 throw new IndexOutOfBoundsException(); 9317 } 9318 int n = endIndex - beginIndex; 9319 for (int i = beginIndex; i < endIndex; ) { 9320 if (isHighSurrogate(seq.charAt(i++)) && i < endIndex && 9321 isLowSurrogate(seq.charAt(i))) { 9322 n--; 9323 i++; 9324 } 9325 } 9326 return n; 9327 } 9328 9329 /** 9330 * Returns the number of Unicode code points in a subarray of the 9331 * {@code char} array argument. The {@code offset} 9332 * argument is the index of the first {@code char} of the 9333 * subarray and the {@code count} argument specifies the 9334 * length of the subarray in {@code char}s. Unpaired 9335 * surrogates within the subarray count as one code point each. 9336 * 9337 * @param a the {@code char} array 9338 * @param offset the index of the first {@code char} in the 9339 * given {@code char} array 9340 * @param count the length of the subarray in {@code char}s 9341 * @return the number of Unicode code points in the specified subarray 9342 * @throws NullPointerException if {@code a} is null. 9343 * @throws IndexOutOfBoundsException if {@code offset} or 9344 * {@code count} is negative, or if {@code offset + 9345 * count} is larger than the length of the given array. 9346 * @since 1.5 9347 */ codePointCount(char[] a, int offset, int count)9348 public static int codePointCount(char[] a, int offset, int count) { 9349 if (count > a.length - offset || offset < 0 || count < 0) { 9350 throw new IndexOutOfBoundsException(); 9351 } 9352 return codePointCountImpl(a, offset, count); 9353 } 9354 codePointCountImpl(char[] a, int offset, int count)9355 static int codePointCountImpl(char[] a, int offset, int count) { 9356 int endIndex = offset + count; 9357 int n = count; 9358 for (int i = offset; i < endIndex; ) { 9359 if (isHighSurrogate(a[i++]) && i < endIndex && 9360 isLowSurrogate(a[i])) { 9361 n--; 9362 i++; 9363 } 9364 } 9365 return n; 9366 } 9367 9368 /** 9369 * Returns the index within the given char sequence that is offset 9370 * from the given {@code index} by {@code codePointOffset} 9371 * code points. Unpaired surrogates within the text range given by 9372 * {@code index} and {@code codePointOffset} count as 9373 * one code point each. 9374 * 9375 * @param seq the char sequence 9376 * @param index the index to be offset 9377 * @param codePointOffset the offset in code points 9378 * @return the index within the char sequence 9379 * @throws NullPointerException if {@code seq} is null. 9380 * @throws IndexOutOfBoundsException if {@code index} 9381 * is negative or larger then the length of the char sequence, 9382 * or if {@code codePointOffset} is positive and the 9383 * subsequence starting with {@code index} has fewer than 9384 * {@code codePointOffset} code points, or if 9385 * {@code codePointOffset} is negative and the subsequence 9386 * before {@code index} has fewer than the absolute value 9387 * of {@code codePointOffset} code points. 9388 * @since 1.5 9389 */ offsetByCodePoints(CharSequence seq, int index, int codePointOffset)9390 public static int offsetByCodePoints(CharSequence seq, int index, 9391 int codePointOffset) { 9392 int length = seq.length(); 9393 if (index < 0 || index > length) { 9394 throw new IndexOutOfBoundsException(); 9395 } 9396 9397 int x = index; 9398 if (codePointOffset >= 0) { 9399 int i; 9400 for (i = 0; x < length && i < codePointOffset; i++) { 9401 if (isHighSurrogate(seq.charAt(x++)) && x < length && 9402 isLowSurrogate(seq.charAt(x))) { 9403 x++; 9404 } 9405 } 9406 if (i < codePointOffset) { 9407 throw new IndexOutOfBoundsException(); 9408 } 9409 } else { 9410 int i; 9411 for (i = codePointOffset; x > 0 && i < 0; i++) { 9412 if (isLowSurrogate(seq.charAt(--x)) && x > 0 && 9413 isHighSurrogate(seq.charAt(x-1))) { 9414 x--; 9415 } 9416 } 9417 if (i < 0) { 9418 throw new IndexOutOfBoundsException(); 9419 } 9420 } 9421 return x; 9422 } 9423 9424 /** 9425 * Returns the index within the given {@code char} subarray 9426 * that is offset from the given {@code index} by 9427 * {@code codePointOffset} code points. The 9428 * {@code start} and {@code count} arguments specify a 9429 * subarray of the {@code char} array. Unpaired surrogates 9430 * within the text range given by {@code index} and 9431 * {@code codePointOffset} count as one code point each. 9432 * 9433 * @param a the {@code char} array 9434 * @param start the index of the first {@code char} of the 9435 * subarray 9436 * @param count the length of the subarray in {@code char}s 9437 * @param index the index to be offset 9438 * @param codePointOffset the offset in code points 9439 * @return the index within the subarray 9440 * @throws NullPointerException if {@code a} is null. 9441 * @throws IndexOutOfBoundsException 9442 * if {@code start} or {@code count} is negative, 9443 * or if {@code start + count} is larger than the length of 9444 * the given array, 9445 * or if {@code index} is less than {@code start} or 9446 * larger then {@code start + count}, 9447 * or if {@code codePointOffset} is positive and the text range 9448 * starting with {@code index} and ending with {@code start + count - 1} 9449 * has fewer than {@code codePointOffset} code 9450 * points, 9451 * or if {@code codePointOffset} is negative and the text range 9452 * starting with {@code start} and ending with {@code index - 1} 9453 * has fewer than the absolute value of 9454 * {@code codePointOffset} code points. 9455 * @since 1.5 9456 */ offsetByCodePoints(char[] a, int start, int count, int index, int codePointOffset)9457 public static int offsetByCodePoints(char[] a, int start, int count, 9458 int index, int codePointOffset) { 9459 if (count > a.length-start || start < 0 || count < 0 9460 || index < start || index > start+count) { 9461 throw new IndexOutOfBoundsException(); 9462 } 9463 return offsetByCodePointsImpl(a, start, count, index, codePointOffset); 9464 } 9465 offsetByCodePointsImpl(char[]a, int start, int count, int index, int codePointOffset)9466 static int offsetByCodePointsImpl(char[]a, int start, int count, 9467 int index, int codePointOffset) { 9468 int x = index; 9469 if (codePointOffset >= 0) { 9470 int limit = start + count; 9471 int i; 9472 for (i = 0; x < limit && i < codePointOffset; i++) { 9473 if (isHighSurrogate(a[x++]) && x < limit && 9474 isLowSurrogate(a[x])) { 9475 x++; 9476 } 9477 } 9478 if (i < codePointOffset) { 9479 throw new IndexOutOfBoundsException(); 9480 } 9481 } else { 9482 int i; 9483 for (i = codePointOffset; x > start && i < 0; i++) { 9484 if (isLowSurrogate(a[--x]) && x > start && 9485 isHighSurrogate(a[x-1])) { 9486 x--; 9487 } 9488 } 9489 if (i < 0) { 9490 throw new IndexOutOfBoundsException(); 9491 } 9492 } 9493 return x; 9494 } 9495 9496 /** 9497 * Determines if the specified character is a lowercase character. 9498 * <p> 9499 * A character is lowercase if its general category type, provided 9500 * by {@code Character.getType(ch)}, is 9501 * {@code LOWERCASE_LETTER}, or it has contributory property 9502 * Other_Lowercase as defined by the Unicode Standard. 9503 * <p> 9504 * The following are examples of lowercase characters: 9505 * <blockquote><pre> 9506 * a b c d e f g h i j k l m n o p q r s t u v w x y z 9507 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 9508 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 9509 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 9510 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 9511 * </pre></blockquote> 9512 * <p> Many other Unicode characters are lowercase too. 9513 * 9514 * <p><b>Note:</b> This method cannot handle <a 9515 * href="#supplementary"> supplementary characters</a>. To support 9516 * all Unicode characters, including supplementary characters, use 9517 * the {@link #isLowerCase(int)} method. 9518 * 9519 * @param ch the character to be tested. 9520 * @return {@code true} if the character is lowercase; 9521 * {@code false} otherwise. 9522 * @see Character#isLowerCase(char) 9523 * @see Character#isTitleCase(char) 9524 * @see Character#toLowerCase(char) 9525 * @see Character#getType(char) 9526 */ isLowerCase(char ch)9527 public static boolean isLowerCase(char ch) { 9528 return isLowerCase((int)ch); 9529 } 9530 9531 /** 9532 * Determines if the specified character (Unicode code point) is a 9533 * lowercase character. 9534 * <p> 9535 * A character is lowercase if its general category type, provided 9536 * by {@link Character#getType getType(codePoint)}, is 9537 * {@code LOWERCASE_LETTER}, or it has contributory property 9538 * Other_Lowercase as defined by the Unicode Standard. 9539 * <p> 9540 * The following are examples of lowercase characters: 9541 * <blockquote><pre> 9542 * a b c d e f g h i j k l m n o p q r s t u v w x y z 9543 * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' 9544 * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' 9545 * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' 9546 * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' 9547 * </pre></blockquote> 9548 * <p> Many other Unicode characters are lowercase too. 9549 * 9550 * @param codePoint the character (Unicode code point) to be tested. 9551 * @return {@code true} if the character is lowercase; 9552 * {@code false} otherwise. 9553 * @see Character#isLowerCase(int) 9554 * @see Character#isTitleCase(int) 9555 * @see Character#toLowerCase(int) 9556 * @see Character#getType(int) 9557 * @since 1.5 9558 */ 9559 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 9560 /* 9561 public static boolean isLowerCase(int codePoint) { 9562 return CharacterData.of(codePoint).isLowerCase(codePoint); 9563 } 9564 */ isLowerCase(int codePoint)9565 public static boolean isLowerCase(int codePoint) { 9566 return isLowerCaseImpl(codePoint); 9567 } 9568 9569 @FastNative isLowerCaseImpl(int codePoint)9570 static native boolean isLowerCaseImpl(int codePoint); 9571 // END Android-changed: Reimplement methods natively on top of ICU4C. 9572 9573 /** 9574 * Determines if the specified character is an uppercase character. 9575 * <p> 9576 * A character is uppercase if its general category type, provided by 9577 * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}. 9578 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 9579 * <p> 9580 * The following are examples of uppercase characters: 9581 * <blockquote><pre> 9582 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 9583 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 9584 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 9585 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 9586 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 9587 * </pre></blockquote> 9588 * <p> Many other Unicode characters are uppercase too. 9589 * 9590 * <p><b>Note:</b> This method cannot handle <a 9591 * href="#supplementary"> supplementary characters</a>. To support 9592 * all Unicode characters, including supplementary characters, use 9593 * the {@link #isUpperCase(int)} method. 9594 * 9595 * @param ch the character to be tested. 9596 * @return {@code true} if the character is uppercase; 9597 * {@code false} otherwise. 9598 * @see Character#isLowerCase(char) 9599 * @see Character#isTitleCase(char) 9600 * @see Character#toUpperCase(char) 9601 * @see Character#getType(char) 9602 * @since 1.0 9603 */ isUpperCase(char ch)9604 public static boolean isUpperCase(char ch) { 9605 return isUpperCase((int)ch); 9606 } 9607 9608 /** 9609 * Determines if the specified character (Unicode code point) is an uppercase character. 9610 * <p> 9611 * A character is uppercase if its general category type, provided by 9612 * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}, 9613 * or it has contributory property Other_Uppercase as defined by the Unicode Standard. 9614 * <p> 9615 * The following are examples of uppercase characters: 9616 * <blockquote><pre> 9617 * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 9618 * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' 9619 * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' 9620 * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' 9621 * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' 9622 * </pre></blockquote> 9623 * <p> Many other Unicode characters are uppercase too. 9624 * 9625 * @param codePoint the character (Unicode code point) to be tested. 9626 * @return {@code true} if the character is uppercase; 9627 * {@code false} otherwise. 9628 * @see Character#isLowerCase(int) 9629 * @see Character#isTitleCase(int) 9630 * @see Character#toUpperCase(int) 9631 * @see Character#getType(int) 9632 * @since 1.5 9633 */ 9634 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 9635 /* 9636 public static boolean isUpperCase(int codePoint) { 9637 return CharacterData.of(codePoint).isUpperCase(codePoint); 9638 } 9639 */ isUpperCase(int codePoint)9640 public static boolean isUpperCase(int codePoint) { 9641 return isUpperCaseImpl(codePoint); 9642 } 9643 9644 @FastNative isUpperCaseImpl(int codePoint)9645 static native boolean isUpperCaseImpl(int codePoint); 9646 // END Android-changed: Reimplement methods natively on top of ICU4C. 9647 9648 /** 9649 * Determines if the specified character is a titlecase character. 9650 * <p> 9651 * A character is a titlecase character if its general 9652 * category type, provided by {@code Character.getType(ch)}, 9653 * is {@code TITLECASE_LETTER}. 9654 * <p> 9655 * Some characters look like pairs of Latin letters. For example, there 9656 * is an uppercase letter that looks like "LJ" and has a corresponding 9657 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 9658 * is the appropriate form to use when rendering a word in lowercase 9659 * with initial capitals, as for a book title. 9660 * <p> 9661 * These are some of the Unicode characters for which this method returns 9662 * {@code true}: 9663 * <ul> 9664 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 9665 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 9666 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 9667 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 9668 * </ul> 9669 * <p> Many other Unicode characters are titlecase too. 9670 * 9671 * <p><b>Note:</b> This method cannot handle <a 9672 * href="#supplementary"> supplementary characters</a>. To support 9673 * all Unicode characters, including supplementary characters, use 9674 * the {@link #isTitleCase(int)} method. 9675 * 9676 * @param ch the character to be tested. 9677 * @return {@code true} if the character is titlecase; 9678 * {@code false} otherwise. 9679 * @see Character#isLowerCase(char) 9680 * @see Character#isUpperCase(char) 9681 * @see Character#toTitleCase(char) 9682 * @see Character#getType(char) 9683 * @since 1.0.2 9684 */ isTitleCase(char ch)9685 public static boolean isTitleCase(char ch) { 9686 return isTitleCase((int)ch); 9687 } 9688 9689 /** 9690 * Determines if the specified character (Unicode code point) is a titlecase character. 9691 * <p> 9692 * A character is a titlecase character if its general 9693 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 9694 * is {@code TITLECASE_LETTER}. 9695 * <p> 9696 * Some characters look like pairs of Latin letters. For example, there 9697 * is an uppercase letter that looks like "LJ" and has a corresponding 9698 * lowercase letter that looks like "lj". A third form, which looks like "Lj", 9699 * is the appropriate form to use when rendering a word in lowercase 9700 * with initial capitals, as for a book title. 9701 * <p> 9702 * These are some of the Unicode characters for which this method returns 9703 * {@code true}: 9704 * <ul> 9705 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON} 9706 * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J} 9707 * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J} 9708 * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z} 9709 * </ul> 9710 * <p> Many other Unicode characters are titlecase too. 9711 * 9712 * @param codePoint the character (Unicode code point) to be tested. 9713 * @return {@code true} if the character is titlecase; 9714 * {@code false} otherwise. 9715 * @see Character#isLowerCase(int) 9716 * @see Character#isUpperCase(int) 9717 * @see Character#toTitleCase(int) 9718 * @see Character#getType(int) 9719 * @since 1.5 9720 */ 9721 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 9722 /* 9723 public static boolean isTitleCase(int codePoint) { 9724 return getType(codePoint) == Character.TITLECASE_LETTER; 9725 } 9726 */ isTitleCase(int codePoint)9727 public static boolean isTitleCase(int codePoint) { 9728 return isTitleCaseImpl(codePoint); 9729 } 9730 9731 @FastNative isTitleCaseImpl(int codePoint)9732 static native boolean isTitleCaseImpl(int codePoint); 9733 // END Android-changed: Reimplement methods natively on top of ICU4C. 9734 9735 /** 9736 * Determines if the specified character is a digit. 9737 * <p> 9738 * A character is a digit if its general category type, provided 9739 * by {@code Character.getType(ch)}, is 9740 * {@code DECIMAL_DIGIT_NUMBER}. 9741 * <p> 9742 * Some Unicode character ranges that contain digits: 9743 * <ul> 9744 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 9745 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 9746 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 9747 * Arabic-Indic digits 9748 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 9749 * Extended Arabic-Indic digits 9750 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 9751 * Devanagari digits 9752 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 9753 * Fullwidth digits 9754 * </ul> 9755 * 9756 * Many other character ranges contain digits as well. 9757 * 9758 * <p><b>Note:</b> This method cannot handle <a 9759 * href="#supplementary"> supplementary characters</a>. To support 9760 * all Unicode characters, including supplementary characters, use 9761 * the {@link #isDigit(int)} method. 9762 * 9763 * @param ch the character to be tested. 9764 * @return {@code true} if the character is a digit; 9765 * {@code false} otherwise. 9766 * @see Character#digit(char, int) 9767 * @see Character#forDigit(int, int) 9768 * @see Character#getType(char) 9769 */ isDigit(char ch)9770 public static boolean isDigit(char ch) { 9771 return isDigit((int)ch); 9772 } 9773 9774 /** 9775 * Determines if the specified character (Unicode code point) is a digit. 9776 * <p> 9777 * A character is a digit if its general category type, provided 9778 * by {@link Character#getType(int) getType(codePoint)}, is 9779 * {@code DECIMAL_DIGIT_NUMBER}. 9780 * <p> 9781 * Some Unicode character ranges that contain digits: 9782 * <ul> 9783 * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'}, 9784 * ISO-LATIN-1 digits ({@code '0'} through {@code '9'}) 9785 * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'}, 9786 * Arabic-Indic digits 9787 * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'}, 9788 * Extended Arabic-Indic digits 9789 * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'}, 9790 * Devanagari digits 9791 * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'}, 9792 * Fullwidth digits 9793 * </ul> 9794 * 9795 * Many other character ranges contain digits as well. 9796 * 9797 * @param codePoint the character (Unicode code point) to be tested. 9798 * @return {@code true} if the character is a digit; 9799 * {@code false} otherwise. 9800 * @see Character#forDigit(int, int) 9801 * @see Character#getType(int) 9802 * @since 1.5 9803 */ 9804 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 9805 /* 9806 public static boolean isDigit(int codePoint) { 9807 return CharacterData.of(codePoint).isDigit(codePoint); 9808 } 9809 */ isDigit(int codePoint)9810 public static boolean isDigit(int codePoint) { 9811 return isDigitImpl(codePoint); 9812 } 9813 9814 @FastNative isDigitImpl(int codePoint)9815 static native boolean isDigitImpl(int codePoint); 9816 // END Android-changed: Reimplement methods natively on top of ICU4C. 9817 9818 /** 9819 * Determines if a character is defined in Unicode. 9820 * <p> 9821 * A character is defined if at least one of the following is true: 9822 * <ul> 9823 * <li>It has an entry in the UnicodeData file. 9824 * <li>It has a value in a range defined by the UnicodeData file. 9825 * </ul> 9826 * 9827 * <p><b>Note:</b> This method cannot handle <a 9828 * href="#supplementary"> supplementary characters</a>. To support 9829 * all Unicode characters, including supplementary characters, use 9830 * the {@link #isDefined(int)} method. 9831 * 9832 * @param ch the character to be tested 9833 * @return {@code true} if the character has a defined meaning 9834 * in Unicode; {@code false} otherwise. 9835 * @see Character#isDigit(char) 9836 * @see Character#isLetter(char) 9837 * @see Character#isLetterOrDigit(char) 9838 * @see Character#isLowerCase(char) 9839 * @see Character#isTitleCase(char) 9840 * @see Character#isUpperCase(char) 9841 * @since 1.0.2 9842 */ isDefined(char ch)9843 public static boolean isDefined(char ch) { 9844 return isDefined((int)ch); 9845 } 9846 9847 /** 9848 * Determines if a character (Unicode code point) is defined in Unicode. 9849 * <p> 9850 * A character is defined if at least one of the following is true: 9851 * <ul> 9852 * <li>It has an entry in the UnicodeData file. 9853 * <li>It has a value in a range defined by the UnicodeData file. 9854 * </ul> 9855 * 9856 * @param codePoint the character (Unicode code point) to be tested. 9857 * @return {@code true} if the character has a defined meaning 9858 * in Unicode; {@code false} otherwise. 9859 * @see Character#isDigit(int) 9860 * @see Character#isLetter(int) 9861 * @see Character#isLetterOrDigit(int) 9862 * @see Character#isLowerCase(int) 9863 * @see Character#isTitleCase(int) 9864 * @see Character#isUpperCase(int) 9865 * @since 1.5 9866 */ 9867 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 9868 /* 9869 public static boolean isDefined(int codePoint) { 9870 return getType(codePoint) != Character.UNASSIGNED; 9871 } 9872 */ isDefined(int codePoint)9873 public static boolean isDefined(int codePoint) { 9874 return isDefinedImpl(codePoint); 9875 } 9876 9877 @FastNative isDefinedImpl(int codePoint)9878 static native boolean isDefinedImpl(int codePoint); 9879 // END Android-changed: Reimplement methods natively on top of ICU4C. 9880 9881 /** 9882 * Determines if the specified character is a letter. 9883 * <p> 9884 * A character is considered to be a letter if its general 9885 * category type, provided by {@code Character.getType(ch)}, 9886 * is any of the following: 9887 * <ul> 9888 * <li> {@code UPPERCASE_LETTER} 9889 * <li> {@code LOWERCASE_LETTER} 9890 * <li> {@code TITLECASE_LETTER} 9891 * <li> {@code MODIFIER_LETTER} 9892 * <li> {@code OTHER_LETTER} 9893 * </ul> 9894 * 9895 * Not all letters have case. Many characters are 9896 * letters but are neither uppercase nor lowercase nor titlecase. 9897 * 9898 * <p><b>Note:</b> This method cannot handle <a 9899 * href="#supplementary"> supplementary characters</a>. To support 9900 * all Unicode characters, including supplementary characters, use 9901 * the {@link #isLetter(int)} method. 9902 * 9903 * @param ch the character to be tested. 9904 * @return {@code true} if the character is a letter; 9905 * {@code false} otherwise. 9906 * @see Character#isDigit(char) 9907 * @see Character#isJavaIdentifierStart(char) 9908 * @see Character#isJavaLetter(char) 9909 * @see Character#isJavaLetterOrDigit(char) 9910 * @see Character#isLetterOrDigit(char) 9911 * @see Character#isLowerCase(char) 9912 * @see Character#isTitleCase(char) 9913 * @see Character#isUnicodeIdentifierStart(char) 9914 * @see Character#isUpperCase(char) 9915 */ isLetter(char ch)9916 public static boolean isLetter(char ch) { 9917 return isLetter((int)ch); 9918 } 9919 9920 /** 9921 * Determines if the specified character (Unicode code point) is a letter. 9922 * <p> 9923 * A character is considered to be a letter if its general 9924 * category type, provided by {@link Character#getType(int) getType(codePoint)}, 9925 * is any of the following: 9926 * <ul> 9927 * <li> {@code UPPERCASE_LETTER} 9928 * <li> {@code LOWERCASE_LETTER} 9929 * <li> {@code TITLECASE_LETTER} 9930 * <li> {@code MODIFIER_LETTER} 9931 * <li> {@code OTHER_LETTER} 9932 * </ul> 9933 * 9934 * Not all letters have case. Many characters are 9935 * letters but are neither uppercase nor lowercase nor titlecase. 9936 * 9937 * @param codePoint the character (Unicode code point) to be tested. 9938 * @return {@code true} if the character is a letter; 9939 * {@code false} otherwise. 9940 * @see Character#isDigit(int) 9941 * @see Character#isJavaIdentifierStart(int) 9942 * @see Character#isLetterOrDigit(int) 9943 * @see Character#isLowerCase(int) 9944 * @see Character#isTitleCase(int) 9945 * @see Character#isUnicodeIdentifierStart(int) 9946 * @see Character#isUpperCase(int) 9947 * @since 1.5 9948 */ 9949 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 9950 /* 9951 public static boolean isLetter(int codePoint) { 9952 return ((((1 << Character.UPPERCASE_LETTER) | 9953 (1 << Character.LOWERCASE_LETTER) | 9954 (1 << Character.TITLECASE_LETTER) | 9955 (1 << Character.MODIFIER_LETTER) | 9956 (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1) 9957 != 0; 9958 } 9959 */ isLetter(int codePoint)9960 public static boolean isLetter(int codePoint) { 9961 return isLetterImpl(codePoint); 9962 } 9963 9964 @FastNative isLetterImpl(int codePoint)9965 static native boolean isLetterImpl(int codePoint); 9966 // END Android-changed: Reimplement methods natively on top of ICU4C. 9967 9968 /** 9969 * Determines if the specified character is a letter or digit. 9970 * <p> 9971 * A character is considered to be a letter or digit if either 9972 * {@code Character.isLetter(char ch)} or 9973 * {@code Character.isDigit(char ch)} returns 9974 * {@code true} for the character. 9975 * 9976 * <p><b>Note:</b> This method cannot handle <a 9977 * href="#supplementary"> supplementary characters</a>. To support 9978 * all Unicode characters, including supplementary characters, use 9979 * the {@link #isLetterOrDigit(int)} method. 9980 * 9981 * @param ch the character to be tested. 9982 * @return {@code true} if the character is a letter or digit; 9983 * {@code false} otherwise. 9984 * @see Character#isDigit(char) 9985 * @see Character#isJavaIdentifierPart(char) 9986 * @see Character#isJavaLetter(char) 9987 * @see Character#isJavaLetterOrDigit(char) 9988 * @see Character#isLetter(char) 9989 * @see Character#isUnicodeIdentifierPart(char) 9990 * @since 1.0.2 9991 */ isLetterOrDigit(char ch)9992 public static boolean isLetterOrDigit(char ch) { 9993 return isLetterOrDigit((int)ch); 9994 } 9995 9996 /** 9997 * Determines if the specified character (Unicode code point) is a letter or digit. 9998 * <p> 9999 * A character is considered to be a letter or digit if either 10000 * {@link #isLetter(int) isLetter(codePoint)} or 10001 * {@link #isDigit(int) isDigit(codePoint)} returns 10002 * {@code true} for the character. 10003 * 10004 * @param codePoint the character (Unicode code point) to be tested. 10005 * @return {@code true} if the character is a letter or digit; 10006 * {@code false} otherwise. 10007 * @see Character#isDigit(int) 10008 * @see Character#isJavaIdentifierPart(int) 10009 * @see Character#isLetter(int) 10010 * @see Character#isUnicodeIdentifierPart(int) 10011 * @since 1.5 10012 */ 10013 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10014 /* 10015 public static boolean isLetterOrDigit(int codePoint) { 10016 return ((((1 << Character.UPPERCASE_LETTER) | 10017 (1 << Character.LOWERCASE_LETTER) | 10018 (1 << Character.TITLECASE_LETTER) | 10019 (1 << Character.MODIFIER_LETTER) | 10020 (1 << Character.OTHER_LETTER) | 10021 (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1) 10022 != 0; 10023 } 10024 */ isLetterOrDigit(int codePoint)10025 public static boolean isLetterOrDigit(int codePoint) { 10026 return isLetterOrDigitImpl(codePoint); 10027 } 10028 10029 @FastNative isLetterOrDigitImpl(int codePoint)10030 static native boolean isLetterOrDigitImpl(int codePoint); 10031 // END Android-changed: Reimplement methods natively on top of ICU4C. 10032 10033 /** 10034 * Determines if the specified character is permissible as the first 10035 * character in a Java identifier. 10036 * <p> 10037 * A character may start a Java identifier if and only if 10038 * one of the following conditions is true: 10039 * <ul> 10040 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10041 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 10042 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 10043 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 10044 * </ul> 10045 * 10046 * @param ch the character to be tested. 10047 * @return {@code true} if the character may start a Java 10048 * identifier; {@code false} otherwise. 10049 * @see Character#isJavaLetterOrDigit(char) 10050 * @see Character#isJavaIdentifierStart(char) 10051 * @see Character#isJavaIdentifierPart(char) 10052 * @see Character#isLetter(char) 10053 * @see Character#isLetterOrDigit(char) 10054 * @see Character#isUnicodeIdentifierStart(char) 10055 * @since 1.0.2 10056 * @deprecated Replaced by isJavaIdentifierStart(char). 10057 */ 10058 @Deprecated(since="1.1") isJavaLetter(char ch)10059 public static boolean isJavaLetter(char ch) { 10060 return isJavaIdentifierStart(ch); 10061 } 10062 10063 /** 10064 * Determines if the specified character may be part of a Java 10065 * identifier as other than the first character. 10066 * <p> 10067 * A character may be part of a Java identifier if and only if one 10068 * of the following conditions is true: 10069 * <ul> 10070 * <li> it is a letter 10071 * <li> it is a currency symbol (such as {@code '$'}) 10072 * <li> it is a connecting punctuation character (such as {@code '_'}) 10073 * <li> it is a digit 10074 * <li> it is a numeric letter (such as a Roman numeral character) 10075 * <li> it is a combining mark 10076 * <li> it is a non-spacing mark 10077 * <li> {@code isIdentifierIgnorable} returns 10078 * {@code true} for the character. 10079 * </ul> 10080 * 10081 * @param ch the character to be tested. 10082 * @return {@code true} if the character may be part of a 10083 * Java identifier; {@code false} otherwise. 10084 * @see Character#isJavaLetter(char) 10085 * @see Character#isJavaIdentifierStart(char) 10086 * @see Character#isJavaIdentifierPart(char) 10087 * @see Character#isLetter(char) 10088 * @see Character#isLetterOrDigit(char) 10089 * @see Character#isUnicodeIdentifierPart(char) 10090 * @see Character#isIdentifierIgnorable(char) 10091 * @since 1.0.2 10092 * @deprecated Replaced by isJavaIdentifierPart(char). 10093 */ 10094 @Deprecated(since="1.1") isJavaLetterOrDigit(char ch)10095 public static boolean isJavaLetterOrDigit(char ch) { 10096 return isJavaIdentifierPart(ch); 10097 } 10098 10099 /** 10100 * Determines if the specified character (Unicode code point) is alphabetic. 10101 * <p> 10102 * A character is considered to be alphabetic if its general category type, 10103 * provided by {@link Character#getType(int) getType(codePoint)}, is any of 10104 * the following: 10105 * <ul> 10106 * <li> {@code UPPERCASE_LETTER} 10107 * <li> {@code LOWERCASE_LETTER} 10108 * <li> {@code TITLECASE_LETTER} 10109 * <li> {@code MODIFIER_LETTER} 10110 * <li> {@code OTHER_LETTER} 10111 * <li> {@code LETTER_NUMBER} 10112 * </ul> 10113 * or it has contributory property Other_Alphabetic as defined by the 10114 * Unicode Standard. 10115 * 10116 * @param codePoint the character (Unicode code point) to be tested. 10117 * @return {@code true} if the character is a Unicode alphabet 10118 * character, {@code false} otherwise. 10119 * @since 1.7 10120 */ 10121 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10122 /* 10123 public static boolean isAlphabetic(int codePoint) { 10124 return (((((1 << Character.UPPERCASE_LETTER) | 10125 (1 << Character.LOWERCASE_LETTER) | 10126 (1 << Character.TITLECASE_LETTER) | 10127 (1 << Character.MODIFIER_LETTER) | 10128 (1 << Character.OTHER_LETTER) | 10129 (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) || 10130 CharacterData.of(codePoint).isOtherAlphabetic(codePoint); 10131 } 10132 */ isAlphabetic(int codePoint)10133 public static boolean isAlphabetic(int codePoint) { 10134 return isAlphabeticImpl(codePoint); 10135 } 10136 10137 @FastNative isAlphabeticImpl(int codePoint)10138 static native boolean isAlphabeticImpl(int codePoint); 10139 // END Android-changed: Reimplement methods natively on top of ICU4C. 10140 10141 /** 10142 * Determines if the specified character (Unicode code point) is a CJKV 10143 * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by 10144 * the Unicode Standard. 10145 * 10146 * @param codePoint the character (Unicode code point) to be tested. 10147 * @return {@code true} if the character is a Unicode ideograph 10148 * character, {@code false} otherwise. 10149 * @since 1.7 10150 */ 10151 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10152 /* 10153 public static boolean isIdeographic(int codePoint) { 10154 return CharacterData.of(codePoint).isIdeographic(codePoint); 10155 } 10156 */ isIdeographic(int codePoint)10157 public static boolean isIdeographic(int codePoint) { 10158 return isIdeographicImpl(codePoint); 10159 } 10160 @FastNative isIdeographicImpl(int codePoint)10161 static native boolean isIdeographicImpl(int codePoint); 10162 // END Android-changed: Reimplement methods natively on top of ICU4C. 10163 10164 // Android-changed: Removed @see tag (target does not exist on Android): 10165 // @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10166 /** 10167 * Determines if the specified character is 10168 * permissible as the first character in a Java identifier. 10169 * <p> 10170 * A character may start a Java identifier if and only if 10171 * one of the following conditions is true: 10172 * <ul> 10173 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10174 * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER} 10175 * <li> {@code ch} is a currency symbol (such as {@code '$'}) 10176 * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}). 10177 * </ul> 10178 * 10179 * <p><b>Note:</b> This method cannot handle <a 10180 * href="#supplementary"> supplementary characters</a>. To support 10181 * all Unicode characters, including supplementary characters, use 10182 * the {@link #isJavaIdentifierStart(int)} method. 10183 * 10184 * @param ch the character to be tested. 10185 * @return {@code true} if the character may start a Java identifier; 10186 * {@code false} otherwise. 10187 * @see Character#isJavaIdentifierPart(char) 10188 * @see Character#isLetter(char) 10189 * @see Character#isUnicodeIdentifierStart(char) 10190 * @since 1.1 10191 */ isJavaIdentifierStart(char ch)10192 public static boolean isJavaIdentifierStart(char ch) { 10193 return isJavaIdentifierStart((int)ch); 10194 } 10195 10196 // Android-changed: Removed @see tag (target does not exist on Android): 10197 // @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10198 /** 10199 * Determines if the character (Unicode code point) is 10200 * permissible as the first character in a Java identifier. 10201 * <p> 10202 * A character may start a Java identifier if and only if 10203 * one of the following conditions is true: 10204 * <ul> 10205 * <li> {@link #isLetter(int) isLetter(codePoint)} 10206 * returns {@code true} 10207 * <li> {@link #getType(int) getType(codePoint)} 10208 * returns {@code LETTER_NUMBER} 10209 * <li> the referenced character is a currency symbol (such as {@code '$'}) 10210 * <li> the referenced character is a connecting punctuation character 10211 * (such as {@code '_'}). 10212 * </ul> 10213 * 10214 * @param codePoint the character (Unicode code point) to be tested. 10215 * @return {@code true} if the character may start a Java identifier; 10216 * {@code false} otherwise. 10217 * @see Character#isJavaIdentifierPart(int) 10218 * @see Character#isLetter(int) 10219 * @see Character#isUnicodeIdentifierStart(int) 10220 * @since 1.5 10221 */ 10222 // BEGIN Android-changed: Use ICU. 10223 /* 10224 public static boolean isJavaIdentifierStart(int codePoint) { 10225 return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint); 10226 } 10227 */ isJavaIdentifierStart(int codePoint)10228 public static boolean isJavaIdentifierStart(int codePoint) { 10229 // Use precomputed bitmasks to optimize the ASCII range. 10230 if (codePoint < 64) { 10231 return (codePoint == '$'); // There's only one character in this range. 10232 } else if (codePoint < 128) { 10233 return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 10234 } 10235 return ((1 << getType(codePoint)) 10236 & ((1 << UPPERCASE_LETTER) 10237 | (1 << LOWERCASE_LETTER) 10238 | (1 << TITLECASE_LETTER) 10239 | (1 << MODIFIER_LETTER) 10240 | (1 << OTHER_LETTER) 10241 | (1 << CURRENCY_SYMBOL) 10242 | (1 << CONNECTOR_PUNCTUATION) 10243 | (1 << LETTER_NUMBER))) != 0; 10244 } 10245 // END Android-changed: Use ICU. 10246 10247 // Android-changed: Removed @see tag (target does not exist on Android): 10248 // @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10249 /** 10250 * Determines if the specified character may be part of a Java 10251 * identifier as other than the first character. 10252 * <p> 10253 * A character may be part of a Java identifier if any of the following 10254 * conditions are true: 10255 * <ul> 10256 * <li> it is a letter 10257 * <li> it is a currency symbol (such as {@code '$'}) 10258 * <li> it is a connecting punctuation character (such as {@code '_'}) 10259 * <li> it is a digit 10260 * <li> it is a numeric letter (such as a Roman numeral character) 10261 * <li> it is a combining mark 10262 * <li> it is a non-spacing mark 10263 * <li> {@code isIdentifierIgnorable} returns 10264 * {@code true} for the character 10265 * </ul> 10266 * 10267 * <p><b>Note:</b> This method cannot handle <a 10268 * href="#supplementary"> supplementary characters</a>. To support 10269 * all Unicode characters, including supplementary characters, use 10270 * the {@link #isJavaIdentifierPart(int)} method. 10271 * 10272 * @param ch the character to be tested. 10273 * @return {@code true} if the character may be part of a 10274 * Java identifier; {@code false} otherwise. 10275 * @see Character#isIdentifierIgnorable(char) 10276 * @see Character#isJavaIdentifierStart(char) 10277 * @see Character#isLetterOrDigit(char) 10278 * @see Character#isUnicodeIdentifierPart(char) 10279 * @since 1.1 10280 */ isJavaIdentifierPart(char ch)10281 public static boolean isJavaIdentifierPart(char ch) { 10282 return isJavaIdentifierPart((int)ch); 10283 } 10284 10285 // Android-changed: Removed @see tag (target does not exist on Android): 10286 // @see javax.lang.model.SourceVersion#isIdentifier(CharSequence) 10287 /** 10288 * Determines if the character (Unicode code point) may be part of a Java 10289 * identifier as other than the first character. 10290 * <p> 10291 * A character may be part of a Java identifier if any of the following 10292 * conditions are true: 10293 * <ul> 10294 * <li> it is a letter 10295 * <li> it is a currency symbol (such as {@code '$'}) 10296 * <li> it is a connecting punctuation character (such as {@code '_'}) 10297 * <li> it is a digit 10298 * <li> it is a numeric letter (such as a Roman numeral character) 10299 * <li> it is a combining mark 10300 * <li> it is a non-spacing mark 10301 * <li> {@link #isIdentifierIgnorable(int) 10302 * isIdentifierIgnorable(codePoint)} returns {@code true} for 10303 * the code point 10304 * </ul> 10305 * 10306 * @param codePoint the character (Unicode code point) to be tested. 10307 * @return {@code true} if the character may be part of a 10308 * Java identifier; {@code false} otherwise. 10309 * @see Character#isIdentifierIgnorable(int) 10310 * @see Character#isJavaIdentifierStart(int) 10311 * @see Character#isLetterOrDigit(int) 10312 * @see Character#isUnicodeIdentifierPart(int) 10313 * @since 1.5 10314 */ 10315 // BEGIN Android-changed: Use ICU. 10316 /* 10317 public static boolean isJavaIdentifierPart(int codePoint) { 10318 return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint); 10319 } 10320 */ isJavaIdentifierPart(int codePoint)10321 public static boolean isJavaIdentifierPart(int codePoint) { 10322 // Use precomputed bitmasks to optimize the ASCII range. 10323 if (codePoint < 64) { 10324 return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0; 10325 } else if (codePoint < 128) { 10326 return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0; 10327 } 10328 return ((1 << getType(codePoint)) 10329 & ((1 << UPPERCASE_LETTER) 10330 | (1 << LOWERCASE_LETTER) 10331 | (1 << TITLECASE_LETTER) 10332 | (1 << MODIFIER_LETTER) 10333 | (1 << OTHER_LETTER) 10334 | (1 << CURRENCY_SYMBOL) 10335 | (1 << CONNECTOR_PUNCTUATION) 10336 | (1 << DECIMAL_DIGIT_NUMBER) 10337 | (1 << LETTER_NUMBER) 10338 | (1 << FORMAT) 10339 | (1 << COMBINING_SPACING_MARK) 10340 | (1 << NON_SPACING_MARK))) != 0 10341 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) 10342 || (codePoint >= 0x7f && codePoint <= 0x9f); 10343 } 10344 // END Android-changed: Use ICU. 10345 10346 /** 10347 * Determines if the specified character is permissible as the 10348 * first character in a Unicode identifier. 10349 * <p> 10350 * A character may start a Unicode identifier if and only if 10351 * one of the following conditions is true: 10352 * <ul> 10353 * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true} 10354 * <li> {@link #getType(char) getType(ch)} returns 10355 * {@code LETTER_NUMBER}. 10356 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10357 * {@code Other_ID_Start}</a> character. 10358 * </ul> 10359 * <p> 10360 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10361 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10362 * with the following profile of UAX31: 10363 * <pre> 10364 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 10365 * </pre> 10366 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 10367 * compatibility. 10368 * 10369 * <p><b>Note:</b> This method cannot handle <a 10370 * href="#supplementary"> supplementary characters</a>. To support 10371 * all Unicode characters, including supplementary characters, use 10372 * the {@link #isUnicodeIdentifierStart(int)} method. 10373 * 10374 * @param ch the character to be tested. 10375 * @return {@code true} if the character may start a Unicode 10376 * identifier; {@code false} otherwise. 10377 * @see Character#isJavaIdentifierStart(char) 10378 * @see Character#isLetter(char) 10379 * @see Character#isUnicodeIdentifierPart(char) 10380 * @since 1.1 10381 */ isUnicodeIdentifierStart(char ch)10382 public static boolean isUnicodeIdentifierStart(char ch) { 10383 return isUnicodeIdentifierStart((int)ch); 10384 } 10385 10386 /** 10387 * Determines if the specified character (Unicode code point) is permissible as the 10388 * first character in a Unicode identifier. 10389 * <p> 10390 * A character may start a Unicode identifier if and only if 10391 * one of the following conditions is true: 10392 * <ul> 10393 * <li> {@link #isLetter(int) isLetter(codePoint)} 10394 * returns {@code true} 10395 * <li> {@link #getType(int) getType(codePoint)} 10396 * returns {@code LETTER_NUMBER}. 10397 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10398 * {@code Other_ID_Start}</a> character. 10399 * </ul> 10400 * <p> 10401 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10402 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10403 * with the following profile of UAX31: 10404 * <pre> 10405 * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F) 10406 * </pre> 10407 * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward 10408 * compatibility. 10409 * 10410 * @param codePoint the character (Unicode code point) to be tested. 10411 * @return {@code true} if the character may start a Unicode 10412 * identifier; {@code false} otherwise. 10413 * @see Character#isJavaIdentifierStart(int) 10414 * @see Character#isLetter(int) 10415 * @see Character#isUnicodeIdentifierPart(int) 10416 * @since 1.5 10417 */ 10418 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10419 /* 10420 public static boolean isUnicodeIdentifierStart(int codePoint) { 10421 return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint); 10422 } 10423 */ isUnicodeIdentifierStart(int codePoint)10424 public static boolean isUnicodeIdentifierStart(int codePoint) { 10425 return isUnicodeIdentifierStartImpl(codePoint); 10426 } 10427 10428 @FastNative isUnicodeIdentifierStartImpl(int codePoint)10429 static native boolean isUnicodeIdentifierStartImpl(int codePoint); 10430 // END Android-changed: Reimplement methods natively on top of ICU4C. 10431 10432 /** 10433 * Determines if the specified character may be part of a Unicode 10434 * identifier as other than the first character. 10435 * <p> 10436 * A character may be part of a Unicode identifier if and only if 10437 * one of the following statements is true: 10438 * <ul> 10439 * <li> it is a letter 10440 * <li> it is a connecting punctuation character (such as {@code '_'}) 10441 * <li> it is a digit 10442 * <li> it is a numeric letter (such as a Roman numeral character) 10443 * <li> it is a combining mark 10444 * <li> it is a non-spacing mark 10445 * <li> {@code isIdentifierIgnorable} returns 10446 * {@code true} for this character. 10447 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10448 * {@code Other_ID_Start}</a> character. 10449 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 10450 * {@code Other_ID_Continue}</a> character. 10451 * </ul> 10452 * <p> 10453 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10454 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10455 * with the following profile of UAX31: 10456 * <pre> 10457 * Continue := Start + ID_Continue + ignorable 10458 * Medial := empty 10459 * ignorable := isIdentifierIgnorable(char) returns true for the character 10460 * </pre> 10461 * {@code ignorable} is added to {@code Continue} for backward 10462 * compatibility. 10463 * 10464 * <p><b>Note:</b> This method cannot handle <a 10465 * href="#supplementary"> supplementary characters</a>. To support 10466 * all Unicode characters, including supplementary characters, use 10467 * the {@link #isUnicodeIdentifierPart(int)} method. 10468 * 10469 * @param ch the character to be tested. 10470 * @return {@code true} if the character may be part of a 10471 * Unicode identifier; {@code false} otherwise. 10472 * @see Character#isIdentifierIgnorable(char) 10473 * @see Character#isJavaIdentifierPart(char) 10474 * @see Character#isLetterOrDigit(char) 10475 * @see Character#isUnicodeIdentifierStart(char) 10476 * @since 1.1 10477 */ isUnicodeIdentifierPart(char ch)10478 public static boolean isUnicodeIdentifierPart(char ch) { 10479 return isUnicodeIdentifierPart((int)ch); 10480 } 10481 10482 /** 10483 * Determines if the specified character (Unicode code point) may be part of a Unicode 10484 * identifier as other than the first character. 10485 * <p> 10486 * A character may be part of a Unicode identifier if and only if 10487 * one of the following statements is true: 10488 * <ul> 10489 * <li> it is a letter 10490 * <li> it is a connecting punctuation character (such as {@code '_'}) 10491 * <li> it is a digit 10492 * <li> it is a numeric letter (such as a Roman numeral character) 10493 * <li> it is a combining mark 10494 * <li> it is a non-spacing mark 10495 * <li> {@code isIdentifierIgnorable} returns 10496 * {@code true} for this character. 10497 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start"> 10498 * {@code Other_ID_Start}</a> character. 10499 * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue"> 10500 * {@code Other_ID_Continue}</a> character. 10501 * </ul> 10502 * <p> 10503 * This method conforms to <a href="https://unicode.org/reports/tr31/#R1"> 10504 * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard, 10505 * with the following profile of UAX31: 10506 * <pre> 10507 * Continue := Start + ID_Continue + ignorable 10508 * Medial := empty 10509 * ignorable := isIdentifierIgnorable(int) returns true for the character 10510 * </pre> 10511 * {@code ignorable} is added to {@code Continue} for backward 10512 * compatibility. 10513 * 10514 * @param codePoint the character (Unicode code point) to be tested. 10515 * @return {@code true} if the character may be part of a 10516 * Unicode identifier; {@code false} otherwise. 10517 * @see Character#isIdentifierIgnorable(int) 10518 * @see Character#isJavaIdentifierPart(int) 10519 * @see Character#isLetterOrDigit(int) 10520 * @see Character#isUnicodeIdentifierStart(int) 10521 * @since 1.5 10522 */ 10523 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10524 /* 10525 public static boolean isUnicodeIdentifierPart(int codePoint) { 10526 return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint); 10527 } 10528 */ isUnicodeIdentifierPart(int codePoint)10529 public static boolean isUnicodeIdentifierPart(int codePoint) { 10530 return isUnicodeIdentifierPartImpl(codePoint); 10531 } 10532 10533 @FastNative isUnicodeIdentifierPartImpl(int codePoint)10534 static native boolean isUnicodeIdentifierPartImpl(int codePoint); 10535 // END Android-changed: Reimplement methods natively on top of ICU4C. 10536 10537 /** 10538 * Determines if the specified character should be regarded as 10539 * an ignorable character in a Java identifier or a Unicode identifier. 10540 * <p> 10541 * The following Unicode characters are ignorable in a Java identifier 10542 * or a Unicode identifier: 10543 * <ul> 10544 * <li>ISO control characters that are not whitespace 10545 * <ul> 10546 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 10547 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 10548 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 10549 * </ul> 10550 * 10551 * <li>all characters that have the {@code FORMAT} general 10552 * category value 10553 * </ul> 10554 * 10555 * <p><b>Note:</b> This method cannot handle <a 10556 * href="#supplementary"> supplementary characters</a>. To support 10557 * all Unicode characters, including supplementary characters, use 10558 * the {@link #isIdentifierIgnorable(int)} method. 10559 * 10560 * @param ch the character to be tested. 10561 * @return {@code true} if the character is an ignorable control 10562 * character that may be part of a Java or Unicode identifier; 10563 * {@code false} otherwise. 10564 * @see Character#isJavaIdentifierPart(char) 10565 * @see Character#isUnicodeIdentifierPart(char) 10566 * @since 1.1 10567 */ isIdentifierIgnorable(char ch)10568 public static boolean isIdentifierIgnorable(char ch) { 10569 return isIdentifierIgnorable((int)ch); 10570 } 10571 10572 /** 10573 * Determines if the specified character (Unicode code point) should be regarded as 10574 * an ignorable character in a Java identifier or a Unicode identifier. 10575 * <p> 10576 * The following Unicode characters are ignorable in a Java identifier 10577 * or a Unicode identifier: 10578 * <ul> 10579 * <li>ISO control characters that are not whitespace 10580 * <ul> 10581 * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'} 10582 * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'} 10583 * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'} 10584 * </ul> 10585 * 10586 * <li>all characters that have the {@code FORMAT} general 10587 * category value 10588 * </ul> 10589 * 10590 * @param codePoint the character (Unicode code point) to be tested. 10591 * @return {@code true} if the character is an ignorable control 10592 * character that may be part of a Java or Unicode identifier; 10593 * {@code false} otherwise. 10594 * @see Character#isJavaIdentifierPart(int) 10595 * @see Character#isUnicodeIdentifierPart(int) 10596 * @since 1.5 10597 */ 10598 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10599 /* 10600 public static boolean isIdentifierIgnorable(int codePoint) { 10601 return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint); 10602 } 10603 */ isIdentifierIgnorable(int codePoint)10604 public static boolean isIdentifierIgnorable(int codePoint) { 10605 return isIdentifierIgnorableImpl(codePoint); 10606 } 10607 10608 @FastNative isIdentifierIgnorableImpl(int codePoint)10609 static native boolean isIdentifierIgnorableImpl(int codePoint); 10610 // END Android-changed: Reimplement methods natively on top of ICU4C. 10611 10612 /** 10613 * Converts the character argument to lowercase using case 10614 * mapping information from the UnicodeData file. 10615 * <p> 10616 * Note that 10617 * {@code Character.isLowerCase(Character.toLowerCase(ch))} 10618 * does not always return {@code true} for some ranges of 10619 * characters, particularly those that are symbols or ideographs. 10620 * 10621 * <p>In general, {@link String#toLowerCase()} should be used to map 10622 * characters to lowercase. {@code String} case mapping methods 10623 * have several benefits over {@code Character} case mapping methods. 10624 * {@code String} case mapping methods can perform locale-sensitive 10625 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10626 * the {@code Character} case mapping methods cannot. 10627 * 10628 * <p><b>Note:</b> This method cannot handle <a 10629 * href="#supplementary"> supplementary characters</a>. To support 10630 * all Unicode characters, including supplementary characters, use 10631 * the {@link #toLowerCase(int)} method. 10632 * 10633 * @param ch the character to be converted. 10634 * @return the lowercase equivalent of the character, if any; 10635 * otherwise, the character itself. 10636 * @see Character#isLowerCase(char) 10637 * @see String#toLowerCase() 10638 */ toLowerCase(char ch)10639 public static char toLowerCase(char ch) { 10640 return (char)toLowerCase((int)ch); 10641 } 10642 10643 /** 10644 * Converts the character (Unicode code point) argument to 10645 * lowercase using case mapping information from the UnicodeData 10646 * file. 10647 * 10648 * <p> Note that 10649 * {@code Character.isLowerCase(Character.toLowerCase(codePoint))} 10650 * does not always return {@code true} for some ranges of 10651 * characters, particularly those that are symbols or ideographs. 10652 * 10653 * <p>In general, {@link String#toLowerCase()} should be used to map 10654 * characters to lowercase. {@code String} case mapping methods 10655 * have several benefits over {@code Character} case mapping methods. 10656 * {@code String} case mapping methods can perform locale-sensitive 10657 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10658 * the {@code Character} case mapping methods cannot. 10659 * 10660 * @param codePoint the character (Unicode code point) to be converted. 10661 * @return the lowercase equivalent of the character (Unicode code 10662 * point), if any; otherwise, the character itself. 10663 * @see Character#isLowerCase(int) 10664 * @see String#toLowerCase() 10665 * 10666 * @since 1.5 10667 */ 10668 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10669 /* 10670 public static int toLowerCase(int codePoint) { 10671 return CharacterData.of(codePoint).toLowerCase(codePoint); 10672 } 10673 */ toLowerCase(int codePoint)10674 public static int toLowerCase(int codePoint) { 10675 if (codePoint >= 'A' && codePoint <= 'Z') { 10676 return codePoint + ('a' - 'A'); 10677 } 10678 10679 // All ASCII codepoints except the ones above remain unchanged. 10680 if (codePoint < 0x80) { 10681 return codePoint; 10682 } 10683 10684 return toLowerCaseImpl(codePoint); 10685 } 10686 10687 @FastNative toLowerCaseImpl(int codePoint)10688 static native int toLowerCaseImpl(int codePoint); 10689 // END Android-changed: Reimplement methods natively on top of ICU4C. 10690 10691 /** 10692 * Converts the character argument to uppercase using case mapping 10693 * information from the UnicodeData file. 10694 * <p> 10695 * Note that 10696 * {@code Character.isUpperCase(Character.toUpperCase(ch))} 10697 * does not always return {@code true} for some ranges of 10698 * characters, particularly those that are symbols or ideographs. 10699 * 10700 * <p>In general, {@link String#toUpperCase()} should be used to map 10701 * characters to uppercase. {@code String} case mapping methods 10702 * have several benefits over {@code Character} case mapping methods. 10703 * {@code String} case mapping methods can perform locale-sensitive 10704 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10705 * the {@code Character} case mapping methods cannot. 10706 * 10707 * <p><b>Note:</b> This method cannot handle <a 10708 * href="#supplementary"> supplementary characters</a>. To support 10709 * all Unicode characters, including supplementary characters, use 10710 * the {@link #toUpperCase(int)} method. 10711 * 10712 * @param ch the character to be converted. 10713 * @return the uppercase equivalent of the character, if any; 10714 * otherwise, the character itself. 10715 * @see Character#isUpperCase(char) 10716 * @see String#toUpperCase() 10717 */ toUpperCase(char ch)10718 public static char toUpperCase(char ch) { 10719 return (char)toUpperCase((int)ch); 10720 } 10721 10722 /** 10723 * Converts the character (Unicode code point) argument to 10724 * uppercase using case mapping information from the UnicodeData 10725 * file. 10726 * 10727 * <p>Note that 10728 * {@code Character.isUpperCase(Character.toUpperCase(codePoint))} 10729 * does not always return {@code true} for some ranges of 10730 * characters, particularly those that are symbols or ideographs. 10731 * 10732 * <p>In general, {@link String#toUpperCase()} should be used to map 10733 * characters to uppercase. {@code String} case mapping methods 10734 * have several benefits over {@code Character} case mapping methods. 10735 * {@code String} case mapping methods can perform locale-sensitive 10736 * mappings, context-sensitive mappings, and 1:M character mappings, whereas 10737 * the {@code Character} case mapping methods cannot. 10738 * 10739 * @param codePoint the character (Unicode code point) to be converted. 10740 * @return the uppercase equivalent of the character, if any; 10741 * otherwise, the character itself. 10742 * @see Character#isUpperCase(int) 10743 * @see String#toUpperCase() 10744 * 10745 * @since 1.5 10746 */ 10747 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10748 /* 10749 public static int toUpperCase(int codePoint) { 10750 return CharacterData.of(codePoint).toUpperCase(codePoint); 10751 } 10752 */ toUpperCase(int codePoint)10753 public static int toUpperCase(int codePoint) { 10754 if (codePoint >= 'a' && codePoint <= 'z') { 10755 return codePoint - ('a' - 'A'); 10756 } 10757 10758 // All ASCII codepoints except the ones above remain unchanged. 10759 if (codePoint < 0x80) { 10760 return codePoint; 10761 } 10762 10763 return toUpperCaseImpl(codePoint); 10764 } 10765 10766 @FastNative toUpperCaseImpl(int codePoint)10767 static native int toUpperCaseImpl(int codePoint); 10768 // END Android-changed: Reimplement methods natively on top of ICU4C. 10769 10770 /** 10771 * Converts the character argument to titlecase using case mapping 10772 * information from the UnicodeData file. If a character has no 10773 * explicit titlecase mapping and is not itself a titlecase char 10774 * according to UnicodeData, then the uppercase mapping is 10775 * returned as an equivalent titlecase mapping. If the 10776 * {@code char} argument is already a titlecase 10777 * {@code char}, the same {@code char} value will be 10778 * returned. 10779 * <p> 10780 * Note that 10781 * {@code Character.isTitleCase(Character.toTitleCase(ch))} 10782 * does not always return {@code true} for some ranges of 10783 * characters. 10784 * 10785 * <p><b>Note:</b> This method cannot handle <a 10786 * href="#supplementary"> supplementary characters</a>. To support 10787 * all Unicode characters, including supplementary characters, use 10788 * the {@link #toTitleCase(int)} method. 10789 * 10790 * @param ch the character to be converted. 10791 * @return the titlecase equivalent of the character, if any; 10792 * otherwise, the character itself. 10793 * @see Character#isTitleCase(char) 10794 * @see Character#toLowerCase(char) 10795 * @see Character#toUpperCase(char) 10796 * @since 1.0.2 10797 */ toTitleCase(char ch)10798 public static char toTitleCase(char ch) { 10799 return (char)toTitleCase((int)ch); 10800 } 10801 10802 /** 10803 * Converts the character (Unicode code point) argument to titlecase using case mapping 10804 * information from the UnicodeData file. If a character has no 10805 * explicit titlecase mapping and is not itself a titlecase char 10806 * according to UnicodeData, then the uppercase mapping is 10807 * returned as an equivalent titlecase mapping. If the 10808 * character argument is already a titlecase 10809 * character, the same character value will be 10810 * returned. 10811 * 10812 * <p>Note that 10813 * {@code Character.isTitleCase(Character.toTitleCase(codePoint))} 10814 * does not always return {@code true} for some ranges of 10815 * characters. 10816 * 10817 * @param codePoint the character (Unicode code point) to be converted. 10818 * @return the titlecase equivalent of the character, if any; 10819 * otherwise, the character itself. 10820 * @see Character#isTitleCase(int) 10821 * @see Character#toLowerCase(int) 10822 * @see Character#toUpperCase(int) 10823 * @since 1.5 10824 */ 10825 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10826 /* 10827 public static int toTitleCase(int codePoint) { 10828 return CharacterData.of(codePoint).toTitleCase(codePoint); 10829 } 10830 */ toTitleCase(int codePoint)10831 public static int toTitleCase(int codePoint) { 10832 return toTitleCaseImpl(codePoint); 10833 } 10834 10835 @FastNative toTitleCaseImpl(int codePoint)10836 static native int toTitleCaseImpl(int codePoint); 10837 // END Android-changed: Reimplement methods natively on top of ICU4C. 10838 10839 /** 10840 * Returns the numeric value of the character {@code ch} in the 10841 * specified radix. 10842 * <p> 10843 * If the radix is not in the range {@code MIN_RADIX} ≤ 10844 * {@code radix} ≤ {@code MAX_RADIX} or if the 10845 * value of {@code ch} is not a valid digit in the specified 10846 * radix, {@code -1} is returned. A character is a valid digit 10847 * if at least one of the following is true: 10848 * <ul> 10849 * <li>The method {@code isDigit} is {@code true} of the character 10850 * and the Unicode decimal digit value of the character (or its 10851 * single-character decomposition) is less than the specified radix. 10852 * In this case the decimal digit value is returned. 10853 * <li>The character is one of the uppercase Latin letters 10854 * {@code 'A'} through {@code 'Z'} and its code is less than 10855 * {@code radix + 'A' - 10}. 10856 * In this case, {@code ch - 'A' + 10} 10857 * is returned. 10858 * <li>The character is one of the lowercase Latin letters 10859 * {@code 'a'} through {@code 'z'} and its code is less than 10860 * {@code radix + 'a' - 10}. 10861 * In this case, {@code ch - 'a' + 10} 10862 * is returned. 10863 * <li>The character is one of the fullwidth uppercase Latin letters A 10864 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 10865 * and its code is less than 10866 * {@code radix + '\u005CuFF21' - 10}. 10867 * In this case, {@code ch - '\u005CuFF21' + 10} 10868 * is returned. 10869 * <li>The character is one of the fullwidth lowercase Latin letters a 10870 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 10871 * and its code is less than 10872 * {@code radix + '\u005CuFF41' - 10}. 10873 * In this case, {@code ch - '\u005CuFF41' + 10} 10874 * is returned. 10875 * </ul> 10876 * 10877 * <p><b>Note:</b> This method cannot handle <a 10878 * href="#supplementary"> supplementary characters</a>. To support 10879 * all Unicode characters, including supplementary characters, use 10880 * the {@link #digit(int, int)} method. 10881 * 10882 * @param ch the character to be converted. 10883 * @param radix the radix. 10884 * @return the numeric value represented by the character in the 10885 * specified radix. 10886 * @see Character#forDigit(int, int) 10887 * @see Character#isDigit(char) 10888 */ digit(char ch, int radix)10889 public static int digit(char ch, int radix) { 10890 return digit((int)ch, radix); 10891 } 10892 10893 /** 10894 * Returns the numeric value of the specified character (Unicode 10895 * code point) in the specified radix. 10896 * 10897 * <p>If the radix is not in the range {@code MIN_RADIX} ≤ 10898 * {@code radix} ≤ {@code MAX_RADIX} or if the 10899 * character is not a valid digit in the specified 10900 * radix, {@code -1} is returned. A character is a valid digit 10901 * if at least one of the following is true: 10902 * <ul> 10903 * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character 10904 * and the Unicode decimal digit value of the character (or its 10905 * single-character decomposition) is less than the specified radix. 10906 * In this case the decimal digit value is returned. 10907 * <li>The character is one of the uppercase Latin letters 10908 * {@code 'A'} through {@code 'Z'} and its code is less than 10909 * {@code radix + 'A' - 10}. 10910 * In this case, {@code codePoint - 'A' + 10} 10911 * is returned. 10912 * <li>The character is one of the lowercase Latin letters 10913 * {@code 'a'} through {@code 'z'} and its code is less than 10914 * {@code radix + 'a' - 10}. 10915 * In this case, {@code codePoint - 'a' + 10} 10916 * is returned. 10917 * <li>The character is one of the fullwidth uppercase Latin letters A 10918 * ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'}) 10919 * and its code is less than 10920 * {@code radix + '\u005CuFF21' - 10}. 10921 * In this case, 10922 * {@code codePoint - '\u005CuFF21' + 10} 10923 * is returned. 10924 * <li>The character is one of the fullwidth lowercase Latin letters a 10925 * ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'}) 10926 * and its code is less than 10927 * {@code radix + '\u005CuFF41'- 10}. 10928 * In this case, 10929 * {@code codePoint - '\u005CuFF41' + 10} 10930 * is returned. 10931 * </ul> 10932 * 10933 * @param codePoint the character (Unicode code point) to be converted. 10934 * @param radix the radix. 10935 * @return the numeric value represented by the character in the 10936 * specified radix. 10937 * @see Character#forDigit(int, int) 10938 * @see Character#isDigit(int) 10939 * @since 1.5 10940 */ 10941 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 10942 /* 10943 public static int digit(int codePoint, int radix) { 10944 return CharacterData.of(codePoint).digit(codePoint, radix); 10945 } 10946 */ digit(int codePoint, int radix)10947 public static int digit(int codePoint, int radix) { 10948 if (radix < MIN_RADIX || radix > MAX_RADIX) { 10949 return -1; 10950 } 10951 if (codePoint < 128) { 10952 // Optimized for ASCII 10953 int result = -1; 10954 if ('0' <= codePoint && codePoint <= '9') { 10955 result = codePoint - '0'; 10956 } else if ('a' <= codePoint && codePoint <= 'z') { 10957 result = 10 + (codePoint - 'a'); 10958 } else if ('A' <= codePoint && codePoint <= 'Z') { 10959 result = 10 + (codePoint - 'A'); 10960 } 10961 return result < radix ? result : -1; 10962 } 10963 return digitImpl(codePoint, radix); 10964 } 10965 10966 @FastNative digitImpl(int codePoint, int radix)10967 native static int digitImpl(int codePoint, int radix); 10968 // END Android-changed: Reimplement methods natively on top of ICU4C. 10969 10970 /** 10971 * Returns the {@code int} value that the specified Unicode 10972 * character represents. For example, the character 10973 * {@code '\u005Cu216C'} (the roman numeral fifty) will return 10974 * an int with a value of 50. 10975 * <p> 10976 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 10977 * {@code '\u005Cu005A'}), lowercase 10978 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 10979 * full width variant ({@code '\u005CuFF21'} through 10980 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 10981 * {@code '\u005CuFF5A'}) forms have numeric values from 10 10982 * through 35. This is independent of the Unicode specification, 10983 * which does not assign numeric values to these {@code char} 10984 * values. 10985 * <p> 10986 * If the character does not have a numeric value, then -1 is returned. 10987 * If the character has a numeric value that cannot be represented as a 10988 * nonnegative integer (for example, a fractional value), then -2 10989 * is returned. 10990 * 10991 * <p><b>Note:</b> This method cannot handle <a 10992 * href="#supplementary"> supplementary characters</a>. To support 10993 * all Unicode characters, including supplementary characters, use 10994 * the {@link #getNumericValue(int)} method. 10995 * 10996 * @param ch the character to be converted. 10997 * @return the numeric value of the character, as a nonnegative {@code int} 10998 * value; -2 if the character has a numeric value but the value 10999 * can not be represented as a nonnegative {@code int} value; 11000 * -1 if the character has no numeric value. 11001 * @see Character#forDigit(int, int) 11002 * @see Character#isDigit(char) 11003 * @since 1.1 11004 */ getNumericValue(char ch)11005 public static int getNumericValue(char ch) { 11006 return getNumericValue((int)ch); 11007 } 11008 11009 /** 11010 * Returns the {@code int} value that the specified 11011 * character (Unicode code point) represents. For example, the character 11012 * {@code '\u005Cu216C'} (the Roman numeral fifty) will return 11013 * an {@code int} with a value of 50. 11014 * <p> 11015 * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through 11016 * {@code '\u005Cu005A'}), lowercase 11017 * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and 11018 * full width variant ({@code '\u005CuFF21'} through 11019 * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through 11020 * {@code '\u005CuFF5A'}) forms have numeric values from 10 11021 * through 35. This is independent of the Unicode specification, 11022 * which does not assign numeric values to these {@code char} 11023 * values. 11024 * <p> 11025 * If the character does not have a numeric value, then -1 is returned. 11026 * If the character has a numeric value that cannot be represented as a 11027 * nonnegative integer (for example, a fractional value), then -2 11028 * is returned. 11029 * 11030 * @param codePoint the character (Unicode code point) to be converted. 11031 * @return the numeric value of the character, as a nonnegative {@code int} 11032 * value; -2 if the character has a numeric value but the value 11033 * can not be represented as a nonnegative {@code int} value; 11034 * -1 if the character has no numeric value. 11035 * @see Character#forDigit(int, int) 11036 * @see Character#isDigit(int) 11037 * @since 1.5 11038 */ 11039 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 11040 /* 11041 public static int getNumericValue(int codePoint) { 11042 return CharacterData.of(codePoint).getNumericValue(codePoint); 11043 } 11044 */ getNumericValue(int codePoint)11045 public static int getNumericValue(int codePoint) { 11046 // This is both an optimization and papers over differences between Java and ICU. 11047 if (codePoint < 128) { 11048 if (codePoint >= '0' && codePoint <= '9') { 11049 return codePoint - '0'; 11050 } 11051 if (codePoint >= 'a' && codePoint <= 'z') { 11052 return codePoint - ('a' - 10); 11053 } 11054 if (codePoint >= 'A' && codePoint <= 'Z') { 11055 return codePoint - ('A' - 10); 11056 } 11057 return -1; 11058 } 11059 // Full-width uppercase A-Z. 11060 if (codePoint >= 0xff21 && codePoint <= 0xff3a) { 11061 return codePoint - 0xff17; 11062 } 11063 // Full-width lowercase a-z. 11064 if (codePoint >= 0xff41 && codePoint <= 0xff5a) { 11065 return codePoint - 0xff37; 11066 } 11067 return getNumericValueImpl(codePoint); 11068 } 11069 11070 @FastNative getNumericValueImpl(int codePoint)11071 native static int getNumericValueImpl(int codePoint); 11072 // END Android-changed: Reimplement methods natively on top of ICU4C. 11073 11074 /** 11075 * Determines if the specified character is ISO-LATIN-1 white space. 11076 * This method returns {@code true} for the following five 11077 * characters only: 11078 * <table class="striped"> 11079 * <caption style="display:none">truechars</caption> 11080 * <thead> 11081 * <tr><th scope="col">Character 11082 * <th scope="col">Code 11083 * <th scope="col">Name 11084 * </thead> 11085 * <tbody> 11086 * <tr><th scope="row">{@code '\t'}</th> <td>{@code U+0009}</td> 11087 * <td>{@code HORIZONTAL TABULATION}</td></tr> 11088 * <tr><th scope="row">{@code '\n'}</th> <td>{@code U+000A}</td> 11089 * <td>{@code NEW LINE}</td></tr> 11090 * <tr><th scope="row">{@code '\f'}</th> <td>{@code U+000C}</td> 11091 * <td>{@code FORM FEED}</td></tr> 11092 * <tr><th scope="row">{@code '\r'}</th> <td>{@code U+000D}</td> 11093 * <td>{@code CARRIAGE RETURN}</td></tr> 11094 * <tr><th scope="row">{@code ' '}</th> <td>{@code U+0020}</td> 11095 * <td>{@code SPACE}</td></tr> 11096 * </tbody> 11097 * </table> 11098 * 11099 * @param ch the character to be tested. 11100 * @return {@code true} if the character is ISO-LATIN-1 white 11101 * space; {@code false} otherwise. 11102 * @see Character#isSpaceChar(char) 11103 * @see Character#isWhitespace(char) 11104 * @deprecated Replaced by isWhitespace(char). 11105 */ 11106 @Deprecated(since="1.1") isSpace(char ch)11107 public static boolean isSpace(char ch) { 11108 return (ch <= 0x0020) && 11109 (((((1L << 0x0009) | 11110 (1L << 0x000A) | 11111 (1L << 0x000C) | 11112 (1L << 0x000D) | 11113 (1L << 0x0020)) >> ch) & 1L) != 0); 11114 } 11115 11116 11117 /** 11118 * Determines if the specified character is a Unicode space character. 11119 * A character is considered to be a space character if and only if 11120 * it is specified to be a space character by the Unicode Standard. This 11121 * method returns true if the character's general category type is any of 11122 * the following: 11123 * <ul> 11124 * <li> {@code SPACE_SEPARATOR} 11125 * <li> {@code LINE_SEPARATOR} 11126 * <li> {@code PARAGRAPH_SEPARATOR} 11127 * </ul> 11128 * 11129 * <p><b>Note:</b> This method cannot handle <a 11130 * href="#supplementary"> supplementary characters</a>. To support 11131 * all Unicode characters, including supplementary characters, use 11132 * the {@link #isSpaceChar(int)} method. 11133 * 11134 * @param ch the character to be tested. 11135 * @return {@code true} if the character is a space character; 11136 * {@code false} otherwise. 11137 * @see Character#isWhitespace(char) 11138 * @since 1.1 11139 */ isSpaceChar(char ch)11140 public static boolean isSpaceChar(char ch) { 11141 return isSpaceChar((int)ch); 11142 } 11143 11144 /** 11145 * Determines if the specified character (Unicode code point) is a 11146 * Unicode space character. A character is considered to be a 11147 * space character if and only if it is specified to be a space 11148 * character by the Unicode Standard. This method returns true if 11149 * the character's general category type is any of the following: 11150 * 11151 * <ul> 11152 * <li> {@link #SPACE_SEPARATOR} 11153 * <li> {@link #LINE_SEPARATOR} 11154 * <li> {@link #PARAGRAPH_SEPARATOR} 11155 * </ul> 11156 * 11157 * @param codePoint the character (Unicode code point) to be tested. 11158 * @return {@code true} if the character is a space character; 11159 * {@code false} otherwise. 11160 * @see Character#isWhitespace(int) 11161 * @since 1.5 11162 */ 11163 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 11164 /* 11165 public static boolean isSpaceChar(int codePoint) { 11166 return ((((1 << Character.SPACE_SEPARATOR) | 11167 (1 << Character.LINE_SEPARATOR) | 11168 (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1) 11169 != 0; 11170 } 11171 */ isSpaceChar(int codePoint)11172 public static boolean isSpaceChar(int codePoint) { 11173 // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that. 11174 // SPACE or NO-BREAK SPACE? 11175 if (codePoint == 0x20 || codePoint == 0xa0) { 11176 return true; 11177 } 11178 if (codePoint < 0x1000) { 11179 return false; 11180 } 11181 // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR? 11182 if (codePoint == 0x1680 || codePoint == 0x180e) { 11183 return true; 11184 } 11185 if (codePoint < 0x2000) { 11186 return false; 11187 } 11188 if (codePoint <= 0xffff) { 11189 // Other whitespace from General Punctuation... 11190 return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f || 11191 codePoint == 0x3000; // ...or CJK Symbols and Punctuation? 11192 } 11193 // Let icu4c worry about non-BMP code points. 11194 return isSpaceCharImpl(codePoint); 11195 } 11196 11197 @FastNative isSpaceCharImpl(int codePoint)11198 static native boolean isSpaceCharImpl(int codePoint); 11199 // END Android-changed: Reimplement methods natively on top of ICU4C. 11200 11201 /** 11202 * Determines if the specified character is white space according to Java. 11203 * A character is a Java whitespace character if and only if it satisfies 11204 * one of the following criteria: 11205 * <ul> 11206 * <li> It is a Unicode space character ({@code SPACE_SEPARATOR}, 11207 * {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR}) 11208 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 11209 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 11210 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 11211 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 11212 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 11213 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 11214 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 11215 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 11216 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 11217 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 11218 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 11219 * </ul> 11220 * 11221 * <p><b>Note:</b> This method cannot handle <a 11222 * href="#supplementary"> supplementary characters</a>. To support 11223 * all Unicode characters, including supplementary characters, use 11224 * the {@link #isWhitespace(int)} method. 11225 * 11226 * @param ch the character to be tested. 11227 * @return {@code true} if the character is a Java whitespace 11228 * character; {@code false} otherwise. 11229 * @see Character#isSpaceChar(char) 11230 * @since 1.1 11231 */ isWhitespace(char ch)11232 public static boolean isWhitespace(char ch) { 11233 return isWhitespace((int)ch); 11234 } 11235 11236 /** 11237 * Determines if the specified character (Unicode code point) is 11238 * white space according to Java. A character is a Java 11239 * whitespace character if and only if it satisfies one of the 11240 * following criteria: 11241 * <ul> 11242 * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR}, 11243 * {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR}) 11244 * but is not also a non-breaking space ({@code '\u005Cu00A0'}, 11245 * {@code '\u005Cu2007'}, {@code '\u005Cu202F'}). 11246 * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION. 11247 * <li> It is {@code '\u005Cn'}, U+000A LINE FEED. 11248 * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION. 11249 * <li> It is {@code '\u005Cf'}, U+000C FORM FEED. 11250 * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN. 11251 * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR. 11252 * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR. 11253 * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR. 11254 * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR. 11255 * </ul> 11256 * 11257 * @param codePoint the character (Unicode code point) to be tested. 11258 * @return {@code true} if the character is a Java whitespace 11259 * character; {@code false} otherwise. 11260 * @see Character#isSpaceChar(int) 11261 * @since 1.5 11262 */ 11263 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 11264 /* 11265 public static boolean isWhitespace(int codePoint) { 11266 return CharacterData.of(codePoint).isWhitespace(codePoint); 11267 } 11268 */ isWhitespace(int codePoint)11269 public static boolean isWhitespace(int codePoint) { 11270 // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that. 11271 // Any ASCII whitespace character? 11272 if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) { 11273 return true; 11274 } 11275 if (codePoint < 0x1000) { 11276 return false; 11277 } 11278 // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR? 11279 if (codePoint == 0x1680 || codePoint == 0x180e) { 11280 return true; 11281 } 11282 if (codePoint < 0x2000) { 11283 return false; 11284 } 11285 // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE). 11286 if (codePoint == 0x2007 || codePoint == 0x202f) { 11287 return false; 11288 } 11289 if (codePoint <= 0xffff) { 11290 // Other whitespace from General Punctuation... 11291 return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f || 11292 codePoint == 0x3000; // ...or CJK Symbols and Punctuation? 11293 } 11294 // Let icu4c worry about non-BMP code points. 11295 return isWhitespaceImpl(codePoint); 11296 } 11297 11298 @FastNative isWhitespaceImpl(int codePoint)11299 native static boolean isWhitespaceImpl(int codePoint); 11300 // END Android-changed: Reimplement methods natively on top of ICU4C. 11301 11302 /** 11303 * Determines if the specified character is an ISO control 11304 * character. A character is considered to be an ISO control 11305 * character if its code is in the range {@code '\u005Cu0000'} 11306 * through {@code '\u005Cu001F'} or in the range 11307 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 11308 * 11309 * <p><b>Note:</b> This method cannot handle <a 11310 * href="#supplementary"> supplementary characters</a>. To support 11311 * all Unicode characters, including supplementary characters, use 11312 * the {@link #isISOControl(int)} method. 11313 * 11314 * @param ch the character to be tested. 11315 * @return {@code true} if the character is an ISO control character; 11316 * {@code false} otherwise. 11317 * 11318 * @see Character#isSpaceChar(char) 11319 * @see Character#isWhitespace(char) 11320 * @since 1.1 11321 */ isISOControl(char ch)11322 public static boolean isISOControl(char ch) { 11323 return isISOControl((int)ch); 11324 } 11325 11326 /** 11327 * Determines if the referenced character (Unicode code point) is an ISO control 11328 * character. A character is considered to be an ISO control 11329 * character if its code is in the range {@code '\u005Cu0000'} 11330 * through {@code '\u005Cu001F'} or in the range 11331 * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}. 11332 * 11333 * @param codePoint the character (Unicode code point) to be tested. 11334 * @return {@code true} if the character is an ISO control character; 11335 * {@code false} otherwise. 11336 * @see Character#isSpaceChar(int) 11337 * @see Character#isWhitespace(int) 11338 * @since 1.5 11339 */ isISOControl(int codePoint)11340 public static boolean isISOControl(int codePoint) { 11341 // Optimized form of: 11342 // (codePoint >= 0x00 && codePoint <= 0x1F) || 11343 // (codePoint >= 0x7F && codePoint <= 0x9F); 11344 return codePoint <= 0x9F && 11345 (codePoint >= 0x7F || (codePoint >>> 5 == 0)); 11346 } 11347 11348 /** 11349 * Returns a value indicating a character's general category. 11350 * 11351 * <p><b>Note:</b> This method cannot handle <a 11352 * href="#supplementary"> supplementary characters</a>. To support 11353 * all Unicode characters, including supplementary characters, use 11354 * the {@link #getType(int)} method. 11355 * 11356 * @param ch the character to be tested. 11357 * @return a value of type {@code int} representing the 11358 * character's general category. 11359 * @see Character#COMBINING_SPACING_MARK 11360 * @see Character#CONNECTOR_PUNCTUATION 11361 * @see Character#CONTROL 11362 * @see Character#CURRENCY_SYMBOL 11363 * @see Character#DASH_PUNCTUATION 11364 * @see Character#DECIMAL_DIGIT_NUMBER 11365 * @see Character#ENCLOSING_MARK 11366 * @see Character#END_PUNCTUATION 11367 * @see Character#FINAL_QUOTE_PUNCTUATION 11368 * @see Character#FORMAT 11369 * @see Character#INITIAL_QUOTE_PUNCTUATION 11370 * @see Character#LETTER_NUMBER 11371 * @see Character#LINE_SEPARATOR 11372 * @see Character#LOWERCASE_LETTER 11373 * @see Character#MATH_SYMBOL 11374 * @see Character#MODIFIER_LETTER 11375 * @see Character#MODIFIER_SYMBOL 11376 * @see Character#NON_SPACING_MARK 11377 * @see Character#OTHER_LETTER 11378 * @see Character#OTHER_NUMBER 11379 * @see Character#OTHER_PUNCTUATION 11380 * @see Character#OTHER_SYMBOL 11381 * @see Character#PARAGRAPH_SEPARATOR 11382 * @see Character#PRIVATE_USE 11383 * @see Character#SPACE_SEPARATOR 11384 * @see Character#START_PUNCTUATION 11385 * @see Character#SURROGATE 11386 * @see Character#TITLECASE_LETTER 11387 * @see Character#UNASSIGNED 11388 * @see Character#UPPERCASE_LETTER 11389 * @since 1.1 11390 */ getType(char ch)11391 public static int getType(char ch) { 11392 return getType((int)ch); 11393 } 11394 11395 /** 11396 * Returns a value indicating a character's general category. 11397 * 11398 * @param codePoint the character (Unicode code point) to be tested. 11399 * @return a value of type {@code int} representing the 11400 * character's general category. 11401 * @see Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK 11402 * @see Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION 11403 * @see Character#CONTROL CONTROL 11404 * @see Character#CURRENCY_SYMBOL CURRENCY_SYMBOL 11405 * @see Character#DASH_PUNCTUATION DASH_PUNCTUATION 11406 * @see Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER 11407 * @see Character#ENCLOSING_MARK ENCLOSING_MARK 11408 * @see Character#END_PUNCTUATION END_PUNCTUATION 11409 * @see Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION 11410 * @see Character#FORMAT FORMAT 11411 * @see Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION 11412 * @see Character#LETTER_NUMBER LETTER_NUMBER 11413 * @see Character#LINE_SEPARATOR LINE_SEPARATOR 11414 * @see Character#LOWERCASE_LETTER LOWERCASE_LETTER 11415 * @see Character#MATH_SYMBOL MATH_SYMBOL 11416 * @see Character#MODIFIER_LETTER MODIFIER_LETTER 11417 * @see Character#MODIFIER_SYMBOL MODIFIER_SYMBOL 11418 * @see Character#NON_SPACING_MARK NON_SPACING_MARK 11419 * @see Character#OTHER_LETTER OTHER_LETTER 11420 * @see Character#OTHER_NUMBER OTHER_NUMBER 11421 * @see Character#OTHER_PUNCTUATION OTHER_PUNCTUATION 11422 * @see Character#OTHER_SYMBOL OTHER_SYMBOL 11423 * @see Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR 11424 * @see Character#PRIVATE_USE PRIVATE_USE 11425 * @see Character#SPACE_SEPARATOR SPACE_SEPARATOR 11426 * @see Character#START_PUNCTUATION START_PUNCTUATION 11427 * @see Character#SURROGATE SURROGATE 11428 * @see Character#TITLECASE_LETTER TITLECASE_LETTER 11429 * @see Character#UNASSIGNED UNASSIGNED 11430 * @see Character#UPPERCASE_LETTER UPPERCASE_LETTER 11431 * @since 1.5 11432 */ 11433 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 11434 /* 11435 public static int getType(int codePoint) { 11436 return CharacterData.of(codePoint).getType(codePoint); 11437 } 11438 */ getType(int codePoint)11439 public static int getType(int codePoint) { 11440 int type = getTypeImpl(codePoint); 11441 // The type values returned by ICU are not RI-compatible. The RI skips the value 17. 11442 if (type <= Character.FORMAT) { 11443 return type; 11444 } 11445 return (type + 1); 11446 } 11447 11448 @FastNative getTypeImpl(int codePoint)11449 static native int getTypeImpl(int codePoint); 11450 // END Android-changed: Reimplement methods natively on top of ICU4C. 11451 11452 /** 11453 * Determines the character representation for a specific digit in 11454 * the specified radix. If the value of {@code radix} is not a 11455 * valid radix, or the value of {@code digit} is not a valid 11456 * digit in the specified radix, the null character 11457 * ({@code '\u005Cu0000'}) is returned. 11458 * <p> 11459 * The {@code radix} argument is valid if it is greater than or 11460 * equal to {@code MIN_RADIX} and less than or equal to 11461 * {@code MAX_RADIX}. The {@code digit} argument is valid if 11462 * {@code 0 <= digit < radix}. 11463 * <p> 11464 * If the digit is less than 10, then 11465 * {@code '0' + digit} is returned. Otherwise, the value 11466 * {@code 'a' + digit - 10} is returned. 11467 * 11468 * @param digit the number to convert to a character. 11469 * @param radix the radix. 11470 * @return the {@code char} representation of the specified digit 11471 * in the specified radix. 11472 * @see Character#MIN_RADIX 11473 * @see Character#MAX_RADIX 11474 * @see Character#digit(char, int) 11475 */ forDigit(int digit, int radix)11476 public static char forDigit(int digit, int radix) { 11477 if ((digit >= radix) || (digit < 0)) { 11478 return '\0'; 11479 } 11480 if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) { 11481 return '\0'; 11482 } 11483 if (digit < 10) { 11484 return (char)('0' + digit); 11485 } 11486 return (char)('a' - 10 + digit); 11487 } 11488 11489 /** 11490 * Returns the Unicode directionality property for the given 11491 * character. Character directionality is used to calculate the 11492 * visual ordering of text. The directionality value of undefined 11493 * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}. 11494 * 11495 * <p><b>Note:</b> This method cannot handle <a 11496 * href="#supplementary"> supplementary characters</a>. To support 11497 * all Unicode characters, including supplementary characters, use 11498 * the {@link #getDirectionality(int)} method. 11499 * 11500 * @param ch {@code char} for which the directionality property 11501 * is requested. 11502 * @return the directionality property of the {@code char} value. 11503 * 11504 * @see Character#DIRECTIONALITY_UNDEFINED 11505 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT 11506 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT 11507 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 11508 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER 11509 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 11510 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 11511 * @see Character#DIRECTIONALITY_ARABIC_NUMBER 11512 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 11513 * @see Character#DIRECTIONALITY_NONSPACING_MARK 11514 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL 11515 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR 11516 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR 11517 * @see Character#DIRECTIONALITY_WHITESPACE 11518 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS 11519 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 11520 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 11521 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 11522 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 11523 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 11524 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 11525 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 11526 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE 11527 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 11528 * @since 1.4 11529 */ getDirectionality(char ch)11530 public static byte getDirectionality(char ch) { 11531 return getDirectionality((int)ch); 11532 } 11533 11534 /** 11535 * Returns the Unicode directionality property for the given 11536 * character (Unicode code point). Character directionality is 11537 * used to calculate the visual ordering of text. The 11538 * directionality value of undefined character is {@link 11539 * #DIRECTIONALITY_UNDEFINED}. 11540 * 11541 * @param codePoint the character (Unicode code point) for which 11542 * the directionality property is requested. 11543 * @return the directionality property of the character. 11544 * 11545 * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED 11546 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT 11547 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT 11548 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC 11549 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER 11550 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR 11551 * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR 11552 * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER 11553 * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR 11554 * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK 11555 * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL 11556 * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR 11557 * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR 11558 * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE 11559 * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS 11560 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING 11561 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE 11562 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING 11563 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE 11564 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT 11565 * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE 11566 * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE 11567 * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE 11568 * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE 11569 * @since 1.5 11570 */ 11571 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 11572 /* 11573 public static byte getDirectionality(int codePoint) { 11574 return CharacterData.of(codePoint).getDirectionality(codePoint); 11575 } 11576 */ getDirectionality(int codePoint)11577 public static byte getDirectionality(int codePoint) { 11578 if (getType(codePoint) == Character.UNASSIGNED) { 11579 return Character.DIRECTIONALITY_UNDEFINED; 11580 } 11581 11582 byte directionality = getDirectionalityImpl(codePoint); 11583 if (directionality >= 0 && directionality < DIRECTIONALITY.length) { 11584 return DIRECTIONALITY[directionality]; 11585 } 11586 return Character.DIRECTIONALITY_UNDEFINED; 11587 } 11588 11589 @FastNative getDirectionalityImpl(int codePoint)11590 native static byte getDirectionalityImpl(int codePoint); 11591 // END Android-changed: Reimplement methods natively on top of ICU4C. 11592 11593 /** 11594 * Determines whether the character is mirrored according to the 11595 * Unicode specification. Mirrored characters should have their 11596 * glyphs horizontally mirrored when displayed in text that is 11597 * right-to-left. For example, {@code '\u005Cu0028'} LEFT 11598 * PARENTHESIS is semantically defined to be an <i>opening 11599 * parenthesis</i>. This will appear as a "(" in text that is 11600 * left-to-right but as a ")" in text that is right-to-left. 11601 * 11602 * <p><b>Note:</b> This method cannot handle <a 11603 * href="#supplementary"> supplementary characters</a>. To support 11604 * all Unicode characters, including supplementary characters, use 11605 * the {@link #isMirrored(int)} method. 11606 * 11607 * @param ch {@code char} for which the mirrored property is requested 11608 * @return {@code true} if the char is mirrored, {@code false} 11609 * if the {@code char} is not mirrored or is not defined. 11610 * @since 1.4 11611 */ isMirrored(char ch)11612 public static boolean isMirrored(char ch) { 11613 return isMirrored((int)ch); 11614 } 11615 11616 /** 11617 * Determines whether the specified character (Unicode code point) 11618 * is mirrored according to the Unicode specification. Mirrored 11619 * characters should have their glyphs horizontally mirrored when 11620 * displayed in text that is right-to-left. For example, 11621 * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically 11622 * defined to be an <i>opening parenthesis</i>. This will appear 11623 * as a "(" in text that is left-to-right but as a ")" in text 11624 * that is right-to-left. 11625 * 11626 * @param codePoint the character (Unicode code point) to be tested. 11627 * @return {@code true} if the character is mirrored, {@code false} 11628 * if the character is not mirrored or is not defined. 11629 * @since 1.5 11630 */ 11631 // BEGIN Android-changed: Reimplement methods natively on top of ICU4C. 11632 /* 11633 public static boolean isMirrored(int codePoint) { 11634 return CharacterData.of(codePoint).isMirrored(codePoint); 11635 } 11636 */ isMirrored(int codePoint)11637 public static boolean isMirrored(int codePoint) { 11638 return isMirroredImpl(codePoint); 11639 } 11640 11641 @FastNative isMirroredImpl(int codePoint)11642 native static boolean isMirroredImpl(int codePoint); 11643 // END Android-changed: Reimplement methods natively on top of ICU4C. 11644 11645 /** 11646 * Compares two {@code Character} objects numerically. 11647 * 11648 * @param anotherCharacter the {@code Character} to be compared. 11649 * @return the value {@code 0} if the argument {@code Character} 11650 * is equal to this {@code Character}; a value less than 11651 * {@code 0} if this {@code Character} is numerically less 11652 * than the {@code Character} argument; and a value greater than 11653 * {@code 0} if this {@code Character} is numerically greater 11654 * than the {@code Character} argument (unsigned comparison). 11655 * Note that this is strictly a numerical comparison; it is not 11656 * locale-dependent. 11657 * @since 1.2 11658 */ compareTo(Character anotherCharacter)11659 public int compareTo(Character anotherCharacter) { 11660 return compare(this.value, anotherCharacter.value); 11661 } 11662 11663 /** 11664 * Compares two {@code char} values numerically. 11665 * The value returned is identical to what would be returned by: 11666 * <pre> 11667 * Character.valueOf(x).compareTo(Character.valueOf(y)) 11668 * </pre> 11669 * 11670 * @param x the first {@code char} to compare 11671 * @param y the second {@code char} to compare 11672 * @return the value {@code 0} if {@code x == y}; 11673 * a value less than {@code 0} if {@code x < y}; and 11674 * a value greater than {@code 0} if {@code x > y} 11675 * @since 1.7 11676 */ compare(char x, char y)11677 public static int compare(char x, char y) { 11678 return x - y; 11679 } 11680 11681 // BEGIN Android-removed: Use ICU. 11682 /** 11683 * Converts the character (Unicode code point) argument to uppercase using 11684 * information from the UnicodeData file. 11685 * 11686 * @param codePoint the character (Unicode code point) to be converted. 11687 * @return either the uppercase equivalent of the character, if 11688 * any, or an error flag ({@code Character.ERROR}) 11689 * that indicates that a 1:M {@code char} mapping exists. 11690 * @see Character#isLowerCase(char) 11691 * @see Character#isUpperCase(char) 11692 * @see Character#toLowerCase(char) 11693 * @see Character#toTitleCase(char) 11694 * @since 1.4 11695 * 11696 static int toUpperCaseEx(int codePoint) { 11697 assert isValidCodePoint(codePoint); 11698 return CharacterData.of(codePoint).toUpperCaseEx(codePoint); 11699 } 11700 11701 /** 11702 * Converts the character (Unicode code point) argument to uppercase using case 11703 * mapping information from the SpecialCasing file in the Unicode 11704 * specification. If a character has no explicit uppercase 11705 * mapping, then the {@code char} itself is returned in the 11706 * {@code char[]}. 11707 * 11708 * @param codePoint the character (Unicode code point) to be converted. 11709 * @return a {@code char[]} with the uppercased character. 11710 * @since 1.4 11711 * 11712 static char[] toUpperCaseCharArray(int codePoint) { 11713 // As of Unicode 6.0, 1:M uppercasings only happen in the BMP. 11714 assert isBmpCodePoint(codePoint); 11715 return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); 11716 } 11717 */ 11718 // END Android-removed: Use ICU. 11719 11720 /** 11721 * The number of bits used to represent a {@code char} value in unsigned 11722 * binary form, constant {@code 16}. 11723 * 11724 * @since 1.5 11725 */ 11726 public static final int SIZE = 16; 11727 11728 /** 11729 * The number of bytes used to represent a {@code char} value in unsigned 11730 * binary form. 11731 * 11732 * @since 1.8 11733 */ 11734 public static final int BYTES = SIZE / Byte.SIZE; 11735 11736 /** 11737 * Returns the value obtained by reversing the order of the bytes in the 11738 * specified {@code char} value. 11739 * 11740 * @param ch The {@code char} of which to reverse the byte order. 11741 * @return the value obtained by reversing (or, equivalently, swapping) 11742 * the bytes in the specified {@code char} value. 11743 * @since 1.5 11744 */ 11745 @IntrinsicCandidate reverseBytes(char ch)11746 public static char reverseBytes(char ch) { 11747 return (char) (((ch & 0xFF00) >> 8) | (ch << 8)); 11748 } 11749 11750 /** 11751 * Returns the Unicode name of the specified character 11752 * {@code codePoint}, or null if the code point is 11753 * {@link #UNASSIGNED unassigned}. 11754 * <p> 11755 * Note: if the specified character is not assigned a name by 11756 * the <i>UnicodeData</i> file (part of the Unicode Character 11757 * Database maintained by the Unicode Consortium), the returned 11758 * name is the same as the result of expression: 11759 * 11760 * <blockquote>{@code 11761 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 11762 * + " " 11763 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11764 * 11765 * }</blockquote> 11766 * 11767 * @param codePoint the character (Unicode code point) 11768 * 11769 * @return the Unicode name of the specified character, or null if 11770 * the code point is unassigned. 11771 * 11772 * @throws IllegalArgumentException if the specified 11773 * {@code codePoint} is not a valid Unicode 11774 * code point. 11775 * 11776 * @since 1.7 11777 */ getName(int codePoint)11778 public static String getName(int codePoint) { 11779 if (!isValidCodePoint(codePoint)) { 11780 throw new IllegalArgumentException( 11781 String.format("Not a valid Unicode code point: 0x%X", codePoint)); 11782 } 11783 // Android-changed: Use ICU. 11784 // String name = CharacterName.get(codePoint); 11785 String name = getNameImpl(codePoint); 11786 if (name != null) 11787 return name; 11788 if (getType(codePoint) == UNASSIGNED) 11789 return null; 11790 UnicodeBlock block = UnicodeBlock.of(codePoint); 11791 if (block != null) 11792 return block.toString().replace('_', ' ') + " " 11793 + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11794 // should never come here 11795 return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11796 } 11797 11798 // BEGIN Android-removed: expose after CharacterName.getCodePoint() is imported. 11799 /** 11800 * Returns the code point value of the Unicode character specified by 11801 * the given Unicode character name. 11802 * <p> 11803 * Note: if a character is not assigned a name by the <i>UnicodeData</i> 11804 * file (part of the Unicode Character Database maintained by the Unicode 11805 * Consortium), its name is defined as the result of expression: 11806 * 11807 * <blockquote>{@code 11808 * Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ') 11809 * + " " 11810 * + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT); 11811 * 11812 * }</blockquote> 11813 * <p> 11814 * The {@code name} matching is case insensitive, with any leading and 11815 * trailing whitespace character removed. 11816 * 11817 * @param name the Unicode character name 11818 * 11819 * @return the code point value of the character specified by its name. 11820 * 11821 * @throws IllegalArgumentException if the specified {@code name} 11822 * is not a valid Unicode character name. 11823 * @throws NullPointerException if {@code name} is {@code null} 11824 * 11825 * @since 9 11826 * 11827 public static int codePointOf(String name) { 11828 name = name.trim().toUpperCase(Locale.ROOT); 11829 int cp = CharacterName.getInstance().getCodePoint(name); 11830 if (cp != -1) 11831 return cp; 11832 try { 11833 int off = name.lastIndexOf(' '); 11834 if (off != -1) { 11835 cp = Integer.parseInt(name, off + 1, name.length(), 16); 11836 if (isValidCodePoint(cp) && name.equals(getName(cp))) 11837 return cp; 11838 } 11839 } catch (Exception x) {} 11840 throw new IllegalArgumentException("Unrecognized character name :" + name); 11841 } 11842 */ 11843 // END Android-removed: expose after CharacterName.getCodePoint() is imported. 11844 11845 // Android-added: Use ICU. 11846 // Implement getNameImpl() natively. getNameImpl(int codePoint)11847 private static native String getNameImpl(int codePoint); 11848 } 11849