• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2002, 2023, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package java.lang;
27 
28 import dalvik.annotation.optimization.FastNative;
29 // Android-removed: CDS is not used on Android.
30 // import jdk.internal.misc.CDS;
31 import jdk.internal.vm.annotation.IntrinsicCandidate;
32 import jdk.internal.vm.annotation.Stable;
33 
34 import java.util.Arrays;
35 import java.util.HashMap;
36 import java.util.Locale;
37 import java.util.Map;
38 import java.util.Objects;
39 import java.util.Optional;
40 
41 import java.lang.constant.Constable;
42 import java.lang.constant.DynamicConstantDesc;
43 import static java.lang.constant.ConstantDescs.BSM_EXPLICIT_CAST;
44 import static java.lang.constant.ConstantDescs.CD_char;
45 import static java.lang.constant.ConstantDescs.DEFAULT_NAME;
46 
47 import android.icu.lang.UProperty;
48 
49 import libcore.icu.ICU;
50 
51 // Android-changed: Remove reference to a specific unicode standard version
52 /**
53  * The {@code Character} class wraps a value of the primitive
54  * type {@code char} in an object. An object of class
55  * {@code Character} contains a single field whose type is
56  * {@code char}.
57  * <p>
58  * In addition, this class provides several methods for determining
59  * a character's category (lowercase letter, digit, etc.) and for converting
60  * characters from uppercase to lowercase and vice versa.
61  * <p>
62  * Character information is based on the Unicode Standard
63  * <p>
64  * The methods and data of class {@code Character} are defined by
65  * the information in the <i>UnicodeData</i> file that is part of the
66  * Unicode Character Database maintained by the Unicode
67  * Consortium. This file specifies various properties including name
68  * and general category for every defined Unicode code point or
69  * character range.
70  * <p>
71  * The file and its description are available from the Unicode Consortium at:
72  * <ul>
73  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
74  * </ul>
75  *
76  * <h2><a id="conformance">Unicode Conformance</a></h2>
77  * <p>
78  * The fields and methods of class {@code Character} are defined in terms
79  * of character information from the Unicode Standard, specifically the
80  * <i>UnicodeData</i> file that is part of the Unicode Character Database.
81  * This file specifies properties including name and category for every
82  * assigned Unicode code point or character range. The file is available
83  * from the Unicode Consortium at
84  * <a href="http://www.unicode.org">http://www.unicode.org</a>.
85  * <p>
86  * Character information is based on the Unicode Standard, version 15.0.
87  * <p>
88  * The Java platform has supported different versions of the Unicode
89  * Standard over time. Upgrades to newer versions of the Unicode Standard
90  * occurred in the following Java releases, each indicating the new version:
91  * <table class="striped">
92  * <caption style="display:none">Shows Java releases and supported Unicode versions</caption>
93  * <thead>
94  * <tr><th scope="col">Java release</th>
95  *     <th scope="col">Unicode version</th></tr>
96  * </thead>
97  * <tbody>
98  * <tr><th scope="row" style="text-align:left">Java SE 20</th>
99  *     <td>Unicode 15.0</td></tr>
100  * <tr><th scope="row" style="text-align:left">Java SE 19</th>
101  *     <td>Unicode 14.0</td></tr>
102  * <tr><th scope="row" style="text-align:left">Java SE 15</th>
103  *     <td>Unicode 13.0</td></tr>
104  * <tr><th scope="row" style="text-align:left">Java SE 13</th>
105  *     <td>Unicode 12.1</td></tr>
106  * <tr><th scope="row" style="text-align:left">Java SE 12</th>
107  *     <td>Unicode 11.0</td></tr>
108  * <tr><th scope="row" style="text-align:left">Java SE 11</th>
109  *     <td>Unicode 10.0</td></tr>
110  * <tr><th scope="row" style="text-align:left">Java SE 9</th>
111  *     <td>Unicode 8.0</td></tr>
112  * <tr><th scope="row" style="text-align:left">Java SE 8</th>
113  *     <td>Unicode 6.2</td></tr>
114  * <tr><th scope="row" style="text-align:left">Java SE 7</th>
115  *     <td>Unicode 6.0</td></tr>
116  * <tr><th scope="row" style="text-align:left">Java SE 5.0</th>
117  *     <td>Unicode 4.0</td></tr>
118  * <tr><th scope="row" style="text-align:left">Java SE 1.4</th>
119  *     <td>Unicode 3.0</td></tr>
120  * <tr><th scope="row" style="text-align:left">JDK 1.1</th>
121  *     <td>Unicode 2.0</td></tr>
122  * <tr><th scope="row" style="text-align:left">JDK 1.0.2</th>
123  *     <td>Unicode 1.1.5</td></tr>
124  * </tbody>
125  * </table>
126  * Variations from these base Unicode versions, such as recognized appendixes,
127  * are documented elsewhere.
128  * <h2><a id="unicode">Unicode Character Representations</a></h2>
129  *
130  * <p>The {@code char} data type (and therefore the value that a
131  * {@code Character} object encapsulates) are based on the
132  * original Unicode specification, which defined characters as
133  * fixed-width 16-bit entities. The Unicode Standard has since been
134  * changed to allow for characters whose representation requires more
135  * than 16 bits.  The range of legal <em>code point</em>s is now
136  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
137  * (Refer to the <a
138  * href="http://www.unicode.org/reports/tr27/#notation"><i>
139  * definition</i></a> of the U+<i>n</i> notation in the Unicode
140  * Standard.)
141  *
142  * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is
143  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
144  * <a id="supplementary">Characters</a> whose code points are greater
145  * than U+FFFF are called <em>supplementary character</em>s.  The Java
146  * platform uses the UTF-16 representation in {@code char} arrays and
147  * in the {@code String} and {@code StringBuffer} classes. In
148  * this representation, supplementary characters are represented as a pair
149  * of {@code char} values, the first from the <em>high-surrogates</em>
150  * range, (&#92;uD800-&#92;uDBFF), the second from the
151  * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
152  *
153  * <p>A {@code char} value, therefore, represents Basic
154  * Multilingual Plane (BMP) code points, including the surrogate
155  * code points, or code units of the UTF-16 encoding. An
156  * {@code int} value represents all Unicode code points,
157  * including supplementary code points. The lower (least significant)
158  * 21 bits of {@code int} are used to represent Unicode code
159  * points and the upper (most significant) 11 bits must be zero.
160  * Unless otherwise specified, the behavior with respect to
161  * supplementary characters and surrogate {@code char} values is
162  * as follows:
163  *
164  * <ul>
165  * <li>The methods that only accept a {@code char} value cannot support
166  * supplementary characters. They treat {@code char} values from the
167  * surrogate ranges as undefined characters. For example,
168  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
169  * this specific value if followed by any low-surrogate value in a string
170  * would represent a letter.
171  *
172  * <li>The methods that accept an {@code int} value support all
173  * Unicode characters, including supplementary characters. For
174  * example, {@code Character.isLetter(0x2F81A)} returns
175  * {@code true} because the code point value represents a letter
176  * (a CJK ideograph).
177  * </ul>
178  *
179  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
180  * used for character values in the range between U+0000 and U+10FFFF,
181  * and <em>Unicode code unit</em> is used for 16-bit
182  * {@code char} values that are code units of the <em>UTF-16</em>
183  * encoding. For more information on Unicode terminology, refer to the
184  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
185  *
186  * <!-- Android-removed: paragraph on ValueBased
187  * <p>This is a <a href="{@docRoot}/java.base/java/lang/doc-files/ValueBased.html">value-based</a>
188  * class; programmers should treat instances that are
189  * {@linkplain #equals(Object) equal} as interchangeable and should not
190  * use instances for synchronization, or unpredictable behavior may
191  * occur. For example, in a future release, synchronization may fail.
192  * -->
193  *
194  * @spec https://www.unicode.org/reports/tr27 Unicode 3.1.0
195  * @author  Lee Boynton
196  * @author  Guy Steele
197  * @author  Akira Tanaka
198  * @author  Martin Buchholz
199  * @author  Ulf Zibis
200  * @since   1.0
201  */
202 @jdk.internal.ValueBased
203 public final
204 class Character implements java.io.Serializable, Comparable<Character>, Constable {
205     /**
206      * The minimum radix available for conversion to and from strings.
207      * The constant value of this field is the smallest value permitted
208      * for the radix argument in radix-conversion methods such as the
209      * {@code digit} method, the {@code forDigit} method, and the
210      * {@code toString} method of class {@code Integer}.
211      *
212      * @see     Character#digit(char, int)
213      * @see     Character#forDigit(int, int)
214      * @see     Integer#toString(int, int)
215      * @see     Integer#valueOf(String)
216      */
217     public static final int MIN_RADIX = 2;
218 
219     /**
220      * The maximum radix available for conversion to and from strings.
221      * The constant value of this field is the largest value permitted
222      * for the radix argument in radix-conversion methods such as the
223      * {@code digit} method, the {@code forDigit} method, and the
224      * {@code toString} method of class {@code Integer}.
225      *
226      * @see     Character#digit(char, int)
227      * @see     Character#forDigit(int, int)
228      * @see     Integer#toString(int, int)
229      * @see     Integer#valueOf(String)
230      */
231     public static final int MAX_RADIX = 36;
232 
233     /**
234      * The constant value of this field is the smallest value of type
235      * {@code char}, {@code '\u005Cu0000'}.
236      *
237      * @since   1.0.2
238      */
239     public static final char MIN_VALUE = '\u0000';
240 
241     /**
242      * The constant value of this field is the largest value of type
243      * {@code char}, {@code '\u005CuFFFF'}.
244      *
245      * @since   1.0.2
246      */
247     public static final char MAX_VALUE = '\uFFFF';
248 
249     /**
250      * The {@code Class} instance representing the primitive type
251      * {@code char}.
252      *
253      * @since   1.1
254      */
255     @SuppressWarnings("unchecked")
256     public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
257 
258     /*
259      * Normative general types
260      */
261 
262     /*
263      * General character types
264      */
265 
266     /**
267      * General category "Cn" in the Unicode specification.
268      * @since   1.1
269      */
270     public static final byte UNASSIGNED = 0;
271 
272     /**
273      * General category "Lu" in the Unicode specification.
274      * @since   1.1
275      */
276     public static final byte UPPERCASE_LETTER = 1;
277 
278     /**
279      * General category "Ll" in the Unicode specification.
280      * @since   1.1
281      */
282     public static final byte LOWERCASE_LETTER = 2;
283 
284     /**
285      * General category "Lt" in the Unicode specification.
286      * @since   1.1
287      */
288     public static final byte TITLECASE_LETTER = 3;
289 
290     /**
291      * General category "Lm" in the Unicode specification.
292      * @since   1.1
293      */
294     public static final byte MODIFIER_LETTER = 4;
295 
296     /**
297      * General category "Lo" in the Unicode specification.
298      * @since   1.1
299      */
300     public static final byte OTHER_LETTER = 5;
301 
302     /**
303      * General category "Mn" in the Unicode specification.
304      * @since   1.1
305      */
306     public static final byte NON_SPACING_MARK = 6;
307 
308     /**
309      * General category "Me" in the Unicode specification.
310      * @since   1.1
311      */
312     public static final byte ENCLOSING_MARK = 7;
313 
314     /**
315      * General category "Mc" in the Unicode specification.
316      * @since   1.1
317      */
318     public static final byte COMBINING_SPACING_MARK = 8;
319 
320     /**
321      * General category "Nd" in the Unicode specification.
322      * @since   1.1
323      */
324     public static final byte DECIMAL_DIGIT_NUMBER = 9;
325 
326     /**
327      * General category "Nl" in the Unicode specification.
328      * @since   1.1
329      */
330     public static final byte LETTER_NUMBER = 10;
331 
332     /**
333      * General category "No" in the Unicode specification.
334      * @since   1.1
335      */
336     public static final byte OTHER_NUMBER = 11;
337 
338     /**
339      * General category "Zs" in the Unicode specification.
340      * @since   1.1
341      */
342     public static final byte SPACE_SEPARATOR = 12;
343 
344     /**
345      * General category "Zl" in the Unicode specification.
346      * @since   1.1
347      */
348     public static final byte LINE_SEPARATOR = 13;
349 
350     /**
351      * General category "Zp" in the Unicode specification.
352      * @since   1.1
353      */
354     public static final byte PARAGRAPH_SEPARATOR = 14;
355 
356     /**
357      * General category "Cc" in the Unicode specification.
358      * @since   1.1
359      */
360     public static final byte CONTROL = 15;
361 
362     /**
363      * General category "Cf" in the Unicode specification.
364      * @since   1.1
365      */
366     public static final byte FORMAT = 16;
367 
368     /**
369      * General category "Co" in the Unicode specification.
370      * @since   1.1
371      */
372     public static final byte PRIVATE_USE = 18;
373 
374     /**
375      * General category "Cs" in the Unicode specification.
376      * @since   1.1
377      */
378     public static final byte SURROGATE = 19;
379 
380     /**
381      * General category "Pd" in the Unicode specification.
382      * @since   1.1
383      */
384     public static final byte DASH_PUNCTUATION = 20;
385 
386     /**
387      * General category "Ps" in the Unicode specification.
388      * @since   1.1
389      */
390     public static final byte START_PUNCTUATION = 21;
391 
392     /**
393      * General category "Pe" in the Unicode specification.
394      * @since   1.1
395      */
396     public static final byte END_PUNCTUATION = 22;
397 
398     /**
399      * General category "Pc" in the Unicode specification.
400      * @since   1.1
401      */
402     public static final byte CONNECTOR_PUNCTUATION = 23;
403 
404     /**
405      * General category "Po" in the Unicode specification.
406      * @since   1.1
407      */
408     public static final byte OTHER_PUNCTUATION = 24;
409 
410     /**
411      * General category "Sm" in the Unicode specification.
412      * @since   1.1
413      */
414     public static final byte MATH_SYMBOL = 25;
415 
416     /**
417      * General category "Sc" in the Unicode specification.
418      * @since   1.1
419      */
420     public static final byte CURRENCY_SYMBOL = 26;
421 
422     /**
423      * General category "Sk" in the Unicode specification.
424      * @since   1.1
425      */
426     public static final byte MODIFIER_SYMBOL = 27;
427 
428     /**
429      * General category "So" in the Unicode specification.
430      * @since   1.1
431      */
432     public static final byte OTHER_SYMBOL = 28;
433 
434     /**
435      * General category "Pi" in the Unicode specification.
436      * @since   1.4
437      */
438     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
439 
440     /**
441      * General category "Pf" in the Unicode specification.
442      * @since   1.4
443      */
444     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
445 
446     /**
447      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
448      */
449     static final int ERROR = 0xFFFFFFFF;
450 
451 
452     /**
453      * Undefined bidirectional character type. Undefined {@code char}
454      * values have undefined directionality in the Unicode specification.
455      * @since 1.4
456      */
457     public static final byte DIRECTIONALITY_UNDEFINED = -1;
458 
459     /**
460      * Strong bidirectional character type "L" in the Unicode specification.
461      * @since 1.4
462      */
463     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
464 
465     /**
466      * Strong bidirectional character type "R" in the Unicode specification.
467      * @since 1.4
468      */
469     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
470 
471     /**
472      * Strong bidirectional character type "AL" in the Unicode specification.
473      * @since 1.4
474      */
475     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
476 
477     /**
478      * Weak bidirectional character type "EN" in the Unicode specification.
479      * @since 1.4
480      */
481     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
482 
483     /**
484      * Weak bidirectional character type "ES" in the Unicode specification.
485      * @since 1.4
486      */
487     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
488 
489     /**
490      * Weak bidirectional character type "ET" in the Unicode specification.
491      * @since 1.4
492      */
493     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
494 
495     /**
496      * Weak bidirectional character type "AN" in the Unicode specification.
497      * @since 1.4
498      */
499     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
500 
501     /**
502      * Weak bidirectional character type "CS" in the Unicode specification.
503      * @since 1.4
504      */
505     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
506 
507     /**
508      * Weak bidirectional character type "NSM" in the Unicode specification.
509      * @since 1.4
510      */
511     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
512 
513     /**
514      * Weak bidirectional character type "BN" in the Unicode specification.
515      * @since 1.4
516      */
517     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
518 
519     /**
520      * Neutral bidirectional character type "B" in the Unicode specification.
521      * @since 1.4
522      */
523     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
524 
525     /**
526      * Neutral bidirectional character type "S" in the Unicode specification.
527      * @since 1.4
528      */
529     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
530 
531     /**
532      * Neutral bidirectional character type "WS" in the Unicode specification.
533      * @since 1.4
534      */
535     public static final byte DIRECTIONALITY_WHITESPACE = 12;
536 
537     /**
538      * Neutral bidirectional character type "ON" in the Unicode specification.
539      * @since 1.4
540      */
541     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
542 
543     /**
544      * Strong bidirectional character type "LRE" in the Unicode specification.
545      * @since 1.4
546      */
547     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
548 
549     /**
550      * Strong bidirectional character type "LRO" in the Unicode specification.
551      * @since 1.4
552      */
553     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
554 
555     /**
556      * Strong bidirectional character type "RLE" in the Unicode specification.
557      * @since 1.4
558      */
559     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
560 
561     /**
562      * Strong bidirectional character type "RLO" in the Unicode specification.
563      * @since 1.4
564      */
565     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
566 
567     /**
568      * Weak bidirectional character type "PDF" in the Unicode specification.
569      * @since 1.4
570      */
571     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
572 
573     /**
574      * Weak bidirectional character type "LRI" in the Unicode specification.
575      * @since 9
576      */
577     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19;
578 
579     /**
580      * Weak bidirectional character type "RLI" in the Unicode specification.
581      * @since 9
582      */
583     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20;
584 
585     /**
586      * Weak bidirectional character type "FSI" in the Unicode specification.
587      * @since 9
588      */
589     public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21;
590 
591     /**
592      * Weak bidirectional character type "PDI" in the Unicode specification.
593      * @since 9
594      */
595     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22;
596 
597     /**
598      * The minimum value of a
599      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
600      * Unicode high-surrogate code unit</a>
601      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
602      * A high-surrogate is also known as a <i>leading-surrogate</i>.
603      *
604      * @since 1.5
605      */
606     public static final char MIN_HIGH_SURROGATE = '\uD800';
607 
608     /**
609      * The maximum value of a
610      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
611      * Unicode high-surrogate code unit</a>
612      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
613      * A high-surrogate is also known as a <i>leading-surrogate</i>.
614      *
615      * @since 1.5
616      */
617     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
618 
619     /**
620      * The minimum value of a
621      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
622      * Unicode low-surrogate code unit</a>
623      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
624      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
625      *
626      * @since 1.5
627      */
628     public static final char MIN_LOW_SURROGATE  = '\uDC00';
629 
630     /**
631      * The maximum value of a
632      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
633      * Unicode low-surrogate code unit</a>
634      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
635      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
636      *
637      * @since 1.5
638      */
639     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
640 
641     /**
642      * The minimum value of a Unicode surrogate code unit in the
643      * UTF-16 encoding, constant {@code '\u005CuD800'}.
644      *
645      * @since 1.5
646      */
647     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
648 
649     /**
650      * The maximum value of a Unicode surrogate code unit in the
651      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
652      *
653      * @since 1.5
654      */
655     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
656 
657     /**
658      * The minimum value of a
659      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
660      * Unicode supplementary code point</a>, constant {@code U+10000}.
661      *
662      * @since 1.5
663      */
664     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
665 
666     /**
667      * The minimum value of a
668      * <a href="http://www.unicode.org/glossary/#code_point">
669      * Unicode code point</a>, constant {@code U+0000}.
670      *
671      * @since 1.5
672      */
673     public static final int MIN_CODE_POINT = 0x000000;
674 
675     /**
676      * The maximum value of a
677      * <a href="http://www.unicode.org/glossary/#code_point">
678      * Unicode code point</a>, constant {@code U+10FFFF}.
679      *
680      * @since 1.5
681      */
682     public static final int MAX_CODE_POINT = 0X10FFFF;
683 
684     // BEGIN Android-added: Use ICU.
685     // The indices in int[] DIRECTIONALITY are based on icu4c's u_charDirection(),
686     // accessed via getDirectionalityImpl(), implemented in Character.cpp.
687     private static final byte[] DIRECTIONALITY = new byte[] {
688             DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
689             DIRECTIONALITY_EUROPEAN_NUMBER,
690             DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
691             DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
692             DIRECTIONALITY_ARABIC_NUMBER,
693             DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
694             DIRECTIONALITY_PARAGRAPH_SEPARATOR,
695             DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
696             DIRECTIONALITY_OTHER_NEUTRALS,
697             DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
698             DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
699             DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
700             DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
701             DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
702             DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
703             DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
704     // END Android-added: Use ICU.
705 
706     /**
707      * Returns an {@link Optional} containing the nominal descriptor for this
708      * instance.
709      *
710      * @return an {@link Optional} describing the {@linkplain Character} instance
711      * @since 15
712      * @hide
713      */
714     @Override
describeConstable()715     public Optional<DynamicConstantDesc<Character>> describeConstable() {
716         return Optional.of(DynamicConstantDesc.ofNamed(BSM_EXPLICIT_CAST, DEFAULT_NAME, CD_char, (int) value));
717     }
718 
719     /**
720      * Instances of this class represent particular subsets of the Unicode
721      * character set.  The only family of subsets defined in the
722      * {@code Character} class is {@link Character.UnicodeBlock}.
723      * Other portions of the Java API may define other subsets for their
724      * own purposes.
725      *
726      * @since 1.2
727      */
728     public static class Subset  {
729 
730         private String name;
731 
732         /**
733          * Constructs a new {@code Subset} instance.
734          *
735          * @param  name  The name of this subset
736          * @throws NullPointerException if name is {@code null}
737          */
Subset(String name)738         protected Subset(String name) {
739             if (name == null) {
740                 throw new NullPointerException("name");
741             }
742             this.name = name;
743         }
744 
745         /**
746          * Compares two {@code Subset} objects for equality.
747          * This method returns {@code true} if and only if
748          * {@code this} and the argument refer to the same
749          * object; since this method is {@code final}, this
750          * guarantee holds for all subclasses.
751          */
equals(Object obj)752         public final boolean equals(Object obj) {
753             return (this == obj);
754         }
755 
756         /**
757          * Returns the standard hash code as defined by the
758          * {@link Object#hashCode} method.  This method
759          * is {@code final} in order to ensure that the
760          * {@code equals} and {@code hashCode} methods will
761          * be consistent in all subclasses.
762          */
hashCode()763         public final int hashCode() {
764             return super.hashCode();
765         }
766 
767         /**
768          * Returns the name of this subset.
769          */
toString()770         public final String toString() {
771             return name;
772         }
773     }
774 
775     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
776     // for the latest specification of Unicode Blocks.
777 
778     /**
779      * A family of character subsets representing the character blocks in the
780      * Unicode specification. Character blocks generally define characters
781      * used for a specific script or purpose. A character is contained by
782      * at most one Unicode block.
783      *
784      * @since 1.2
785      */
786     public static final class UnicodeBlock extends Subset {
787         /**
788          * NUM_ENTITIES should match the total number of UnicodeBlocks.
789          * It should be adjusted whenever the Unicode Character Database
790          * is upgraded.
791          */
792         private static final int NUM_ENTITIES = 756;
793         private static Map<String, UnicodeBlock> map = HashMap.newHashMap(NUM_ENTITIES);
794 
795         /**
796          * Creates a UnicodeBlock with the given identifier name.
797          * This name must be the same as the block identifier.
798          */
UnicodeBlock(String idName)799         private UnicodeBlock(String idName) {
800             super(idName);
801             map.put(idName, this);
802         }
803 
804         // BEGIN Android-added: ICU consistency: Don't map deprecated SURROGATES_AREA. b/26140229
805         // Add a (String, boolean) constructor for use by SURROGATES_AREA.
UnicodeBlock(String idName, boolean isMap)806         private UnicodeBlock(String idName, boolean isMap) {
807             super(idName);
808             if (isMap) {
809                 map.put(idName, this);
810             }
811         }
812         // END Android-added: ICU consistency: Don't map deprecated SURROGATES_AREA. b/26140229
813 
814         /**
815          * Creates a UnicodeBlock with the given identifier name and
816          * alias name.
817          */
UnicodeBlock(String idName, String alias)818         private UnicodeBlock(String idName, String alias) {
819             this(idName);
820             map.put(alias, this);
821         }
822 
823         /**
824          * Creates a UnicodeBlock with the given identifier name and
825          * alias names.
826          */
UnicodeBlock(String idName, String... aliases)827         private UnicodeBlock(String idName, String... aliases) {
828             this(idName);
829             for (String alias : aliases)
830                 map.put(alias, this);
831         }
832 
833         /**
834          * Constant for the "Basic Latin" Unicode character block.
835          * @since 1.2
836          */
837         public static final UnicodeBlock  BASIC_LATIN =
838             new UnicodeBlock("BASIC_LATIN",
839                              "BASIC LATIN",
840                              "BASICLATIN");
841 
842         /**
843          * Constant for the "Latin-1 Supplement" Unicode character block.
844          * @since 1.2
845          */
846         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
847             new UnicodeBlock("LATIN_1_SUPPLEMENT",
848                              "LATIN-1 SUPPLEMENT",
849                              "LATIN-1SUPPLEMENT");
850 
851         /**
852          * Constant for the "Latin Extended-A" Unicode character block.
853          * @since 1.2
854          */
855         public static final UnicodeBlock LATIN_EXTENDED_A =
856             new UnicodeBlock("LATIN_EXTENDED_A",
857                              "LATIN EXTENDED-A",
858                              "LATINEXTENDED-A");
859 
860         /**
861          * Constant for the "Latin Extended-B" Unicode character block.
862          * @since 1.2
863          */
864         public static final UnicodeBlock LATIN_EXTENDED_B =
865             new UnicodeBlock("LATIN_EXTENDED_B",
866                              "LATIN EXTENDED-B",
867                              "LATINEXTENDED-B");
868 
869         /**
870          * Constant for the "IPA Extensions" Unicode character block.
871          * @since 1.2
872          */
873         public static final UnicodeBlock IPA_EXTENSIONS =
874             new UnicodeBlock("IPA_EXTENSIONS",
875                              "IPA EXTENSIONS",
876                              "IPAEXTENSIONS");
877 
878         /**
879          * Constant for the "Spacing Modifier Letters" Unicode character block.
880          * @since 1.2
881          */
882         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
883             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
884                              "SPACING MODIFIER LETTERS",
885                              "SPACINGMODIFIERLETTERS");
886 
887         /**
888          * Constant for the "Combining Diacritical Marks" Unicode character block.
889          * @since 1.2
890          */
891         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
892             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
893                              "COMBINING DIACRITICAL MARKS",
894                              "COMBININGDIACRITICALMARKS");
895 
896         /**
897          * Constant for the "Greek and Coptic" Unicode character block.
898          * <p>
899          * This block was previously known as the "Greek" block.
900          *
901          * @since 1.2
902          */
903         public static final UnicodeBlock GREEK =
904             new UnicodeBlock("GREEK",
905                              "GREEK AND COPTIC",
906                              "GREEKANDCOPTIC");
907 
908         /**
909          * Constant for the "Cyrillic" Unicode character block.
910          * @since 1.2
911          */
912         public static final UnicodeBlock CYRILLIC =
913             new UnicodeBlock("CYRILLIC");
914 
915         /**
916          * Constant for the "Armenian" Unicode character block.
917          * @since 1.2
918          */
919         public static final UnicodeBlock ARMENIAN =
920             new UnicodeBlock("ARMENIAN");
921 
922         /**
923          * Constant for the "Hebrew" Unicode character block.
924          * @since 1.2
925          */
926         public static final UnicodeBlock HEBREW =
927             new UnicodeBlock("HEBREW");
928 
929         /**
930          * Constant for the "Arabic" Unicode character block.
931          * @since 1.2
932          */
933         public static final UnicodeBlock ARABIC =
934             new UnicodeBlock("ARABIC");
935 
936         /**
937          * Constant for the "Devanagari" Unicode character block.
938          * @since 1.2
939          */
940         public static final UnicodeBlock DEVANAGARI =
941             new UnicodeBlock("DEVANAGARI");
942 
943         /**
944          * Constant for the "Bengali" Unicode character block.
945          * @since 1.2
946          */
947         public static final UnicodeBlock BENGALI =
948             new UnicodeBlock("BENGALI");
949 
950         /**
951          * Constant for the "Gurmukhi" Unicode character block.
952          * @since 1.2
953          */
954         public static final UnicodeBlock GURMUKHI =
955             new UnicodeBlock("GURMUKHI");
956 
957         /**
958          * Constant for the "Gujarati" Unicode character block.
959          * @since 1.2
960          */
961         public static final UnicodeBlock GUJARATI =
962             new UnicodeBlock("GUJARATI");
963 
964         /**
965          * Constant for the "Oriya" Unicode character block.
966          * @since 1.2
967          */
968         public static final UnicodeBlock ORIYA =
969             new UnicodeBlock("ORIYA");
970 
971         /**
972          * Constant for the "Tamil" Unicode character block.
973          * @since 1.2
974          */
975         public static final UnicodeBlock TAMIL =
976             new UnicodeBlock("TAMIL");
977 
978         /**
979          * Constant for the "Telugu" Unicode character block.
980          * @since 1.2
981          */
982         public static final UnicodeBlock TELUGU =
983             new UnicodeBlock("TELUGU");
984 
985         /**
986          * Constant for the "Kannada" Unicode character block.
987          * @since 1.2
988          */
989         public static final UnicodeBlock KANNADA =
990             new UnicodeBlock("KANNADA");
991 
992         /**
993          * Constant for the "Malayalam" Unicode character block.
994          * @since 1.2
995          */
996         public static final UnicodeBlock MALAYALAM =
997             new UnicodeBlock("MALAYALAM");
998 
999         /**
1000          * Constant for the "Thai" Unicode character block.
1001          * @since 1.2
1002          */
1003         public static final UnicodeBlock THAI =
1004             new UnicodeBlock("THAI");
1005 
1006         /**
1007          * Constant for the "Lao" Unicode character block.
1008          * @since 1.2
1009          */
1010         public static final UnicodeBlock LAO =
1011             new UnicodeBlock("LAO");
1012 
1013         /**
1014          * Constant for the "Tibetan" Unicode character block.
1015          * @since 1.2
1016          */
1017         public static final UnicodeBlock TIBETAN =
1018             new UnicodeBlock("TIBETAN");
1019 
1020         /**
1021          * Constant for the "Georgian" Unicode character block.
1022          * @since 1.2
1023          */
1024         public static final UnicodeBlock GEORGIAN =
1025             new UnicodeBlock("GEORGIAN");
1026 
1027         /**
1028          * Constant for the "Hangul Jamo" Unicode character block.
1029          * @since 1.2
1030          */
1031         public static final UnicodeBlock HANGUL_JAMO =
1032             new UnicodeBlock("HANGUL_JAMO",
1033                              "HANGUL JAMO",
1034                              "HANGULJAMO");
1035 
1036         /**
1037          * Constant for the "Latin Extended Additional" Unicode character block.
1038          * @since 1.2
1039          */
1040         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
1041             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
1042                              "LATIN EXTENDED ADDITIONAL",
1043                              "LATINEXTENDEDADDITIONAL");
1044 
1045         /**
1046          * Constant for the "Greek Extended" Unicode character block.
1047          * @since 1.2
1048          */
1049         public static final UnicodeBlock GREEK_EXTENDED =
1050             new UnicodeBlock("GREEK_EXTENDED",
1051                              "GREEK EXTENDED",
1052                              "GREEKEXTENDED");
1053 
1054         /**
1055          * Constant for the "General Punctuation" Unicode character block.
1056          * @since 1.2
1057          */
1058         public static final UnicodeBlock GENERAL_PUNCTUATION =
1059             new UnicodeBlock("GENERAL_PUNCTUATION",
1060                              "GENERAL PUNCTUATION",
1061                              "GENERALPUNCTUATION");
1062 
1063         /**
1064          * Constant for the "Superscripts and Subscripts" Unicode character
1065          * block.
1066          * @since 1.2
1067          */
1068         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
1069             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
1070                              "SUPERSCRIPTS AND SUBSCRIPTS",
1071                              "SUPERSCRIPTSANDSUBSCRIPTS");
1072 
1073         /**
1074          * Constant for the "Currency Symbols" Unicode character block.
1075          * @since 1.2
1076          */
1077         public static final UnicodeBlock CURRENCY_SYMBOLS =
1078             new UnicodeBlock("CURRENCY_SYMBOLS",
1079                              "CURRENCY SYMBOLS",
1080                              "CURRENCYSYMBOLS");
1081 
1082         /**
1083          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
1084          * character block.
1085          * <p>
1086          * This block was previously known as "Combining Marks for Symbols".
1087          * @since 1.2
1088          */
1089         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
1090             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
1091                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
1092                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
1093                              "COMBINING MARKS FOR SYMBOLS",
1094                              "COMBININGMARKSFORSYMBOLS");
1095 
1096         /**
1097          * Constant for the "Letterlike Symbols" Unicode character block.
1098          * @since 1.2
1099          */
1100         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
1101             new UnicodeBlock("LETTERLIKE_SYMBOLS",
1102                              "LETTERLIKE SYMBOLS",
1103                              "LETTERLIKESYMBOLS");
1104 
1105         /**
1106          * Constant for the "Number Forms" Unicode character block.
1107          * @since 1.2
1108          */
1109         public static final UnicodeBlock NUMBER_FORMS =
1110             new UnicodeBlock("NUMBER_FORMS",
1111                              "NUMBER FORMS",
1112                              "NUMBERFORMS");
1113 
1114         /**
1115          * Constant for the "Arrows" Unicode character block.
1116          * @since 1.2
1117          */
1118         public static final UnicodeBlock ARROWS =
1119             new UnicodeBlock("ARROWS");
1120 
1121         /**
1122          * Constant for the "Mathematical Operators" Unicode character block.
1123          * @since 1.2
1124          */
1125         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
1126             new UnicodeBlock("MATHEMATICAL_OPERATORS",
1127                              "MATHEMATICAL OPERATORS",
1128                              "MATHEMATICALOPERATORS");
1129 
1130         /**
1131          * Constant for the "Miscellaneous Technical" Unicode character block.
1132          * @since 1.2
1133          */
1134         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
1135             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
1136                              "MISCELLANEOUS TECHNICAL",
1137                              "MISCELLANEOUSTECHNICAL");
1138 
1139         /**
1140          * Constant for the "Control Pictures" Unicode character block.
1141          * @since 1.2
1142          */
1143         public static final UnicodeBlock CONTROL_PICTURES =
1144             new UnicodeBlock("CONTROL_PICTURES",
1145                              "CONTROL PICTURES",
1146                              "CONTROLPICTURES");
1147 
1148         /**
1149          * Constant for the "Optical Character Recognition" Unicode character block.
1150          * @since 1.2
1151          */
1152         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1153             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1154                              "OPTICAL CHARACTER RECOGNITION",
1155                              "OPTICALCHARACTERRECOGNITION");
1156 
1157         /**
1158          * Constant for the "Enclosed Alphanumerics" Unicode character block.
1159          * @since 1.2
1160          */
1161         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1162             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1163                              "ENCLOSED ALPHANUMERICS",
1164                              "ENCLOSEDALPHANUMERICS");
1165 
1166         /**
1167          * Constant for the "Box Drawing" Unicode character block.
1168          * @since 1.2
1169          */
1170         public static final UnicodeBlock BOX_DRAWING =
1171             new UnicodeBlock("BOX_DRAWING",
1172                              "BOX DRAWING",
1173                              "BOXDRAWING");
1174 
1175         /**
1176          * Constant for the "Block Elements" Unicode character block.
1177          * @since 1.2
1178          */
1179         public static final UnicodeBlock BLOCK_ELEMENTS =
1180             new UnicodeBlock("BLOCK_ELEMENTS",
1181                              "BLOCK ELEMENTS",
1182                              "BLOCKELEMENTS");
1183 
1184         /**
1185          * Constant for the "Geometric Shapes" Unicode character block.
1186          * @since 1.2
1187          */
1188         public static final UnicodeBlock GEOMETRIC_SHAPES =
1189             new UnicodeBlock("GEOMETRIC_SHAPES",
1190                              "GEOMETRIC SHAPES",
1191                              "GEOMETRICSHAPES");
1192 
1193         /**
1194          * Constant for the "Miscellaneous Symbols" Unicode character block.
1195          * @since 1.2
1196          */
1197         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1198             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1199                              "MISCELLANEOUS SYMBOLS",
1200                              "MISCELLANEOUSSYMBOLS");
1201 
1202         /**
1203          * Constant for the "Dingbats" Unicode character block.
1204          * @since 1.2
1205          */
1206         public static final UnicodeBlock DINGBATS =
1207             new UnicodeBlock("DINGBATS");
1208 
1209         /**
1210          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1211          * @since 1.2
1212          */
1213         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1214             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1215                              "CJK SYMBOLS AND PUNCTUATION",
1216                              "CJKSYMBOLSANDPUNCTUATION");
1217 
1218         /**
1219          * Constant for the "Hiragana" Unicode character block.
1220          * @since 1.2
1221          */
1222         public static final UnicodeBlock HIRAGANA =
1223             new UnicodeBlock("HIRAGANA");
1224 
1225         /**
1226          * Constant for the "Katakana" Unicode character block.
1227          * @since 1.2
1228          */
1229         public static final UnicodeBlock KATAKANA =
1230             new UnicodeBlock("KATAKANA");
1231 
1232         /**
1233          * Constant for the "Bopomofo" Unicode character block.
1234          * @since 1.2
1235          */
1236         public static final UnicodeBlock BOPOMOFO =
1237             new UnicodeBlock("BOPOMOFO");
1238 
1239         /**
1240          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1241          * @since 1.2
1242          */
1243         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1244             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1245                              "HANGUL COMPATIBILITY JAMO",
1246                              "HANGULCOMPATIBILITYJAMO");
1247 
1248         /**
1249          * Constant for the "Kanbun" Unicode character block.
1250          * @since 1.2
1251          */
1252         public static final UnicodeBlock KANBUN =
1253             new UnicodeBlock("KANBUN");
1254 
1255         /**
1256          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1257          * @since 1.2
1258          */
1259         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1260             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1261                              "ENCLOSED CJK LETTERS AND MONTHS",
1262                              "ENCLOSEDCJKLETTERSANDMONTHS");
1263 
1264         /**
1265          * Constant for the "CJK Compatibility" Unicode character block.
1266          * @since 1.2
1267          */
1268         public static final UnicodeBlock CJK_COMPATIBILITY =
1269             new UnicodeBlock("CJK_COMPATIBILITY",
1270                              "CJK COMPATIBILITY",
1271                              "CJKCOMPATIBILITY");
1272 
1273         /**
1274          * Constant for the "CJK Unified Ideographs" Unicode character block.
1275          * @since 1.2
1276          */
1277         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1278             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1279                              "CJK UNIFIED IDEOGRAPHS",
1280                              "CJKUNIFIEDIDEOGRAPHS");
1281 
1282         /**
1283          * Constant for the "Hangul Syllables" Unicode character block.
1284          * @since 1.2
1285          */
1286         public static final UnicodeBlock HANGUL_SYLLABLES =
1287             new UnicodeBlock("HANGUL_SYLLABLES",
1288                              "HANGUL SYLLABLES",
1289                              "HANGULSYLLABLES");
1290 
1291         /**
1292          * Constant for the "Private Use Area" Unicode character block.
1293          * @since 1.2
1294          */
1295         public static final UnicodeBlock PRIVATE_USE_AREA =
1296             new UnicodeBlock("PRIVATE_USE_AREA",
1297                              "PRIVATE USE AREA",
1298                              "PRIVATEUSEAREA");
1299 
1300         /**
1301          * Constant for the "CJK Compatibility Ideographs" Unicode character
1302          * block.
1303          * @since 1.2
1304          */
1305         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1306             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1307                              "CJK COMPATIBILITY IDEOGRAPHS",
1308                              "CJKCOMPATIBILITYIDEOGRAPHS");
1309 
1310         /**
1311          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1312          * @since 1.2
1313          */
1314         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1315             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1316                              "ALPHABETIC PRESENTATION FORMS",
1317                              "ALPHABETICPRESENTATIONFORMS");
1318 
1319         /**
1320          * Constant for the "Arabic Presentation Forms-A" Unicode character
1321          * block.
1322          * @since 1.2
1323          */
1324         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1325             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1326                              "ARABIC PRESENTATION FORMS-A",
1327                              "ARABICPRESENTATIONFORMS-A");
1328 
1329         /**
1330          * Constant for the "Combining Half Marks" Unicode character block.
1331          * @since 1.2
1332          */
1333         public static final UnicodeBlock COMBINING_HALF_MARKS =
1334             new UnicodeBlock("COMBINING_HALF_MARKS",
1335                              "COMBINING HALF MARKS",
1336                              "COMBININGHALFMARKS");
1337 
1338         /**
1339          * Constant for the "CJK Compatibility Forms" Unicode character block.
1340          * @since 1.2
1341          */
1342         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1343             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1344                              "CJK COMPATIBILITY FORMS",
1345                              "CJKCOMPATIBILITYFORMS");
1346 
1347         /**
1348          * Constant for the "Small Form Variants" Unicode character block.
1349          * @since 1.2
1350          */
1351         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1352             new UnicodeBlock("SMALL_FORM_VARIANTS",
1353                              "SMALL FORM VARIANTS",
1354                              "SMALLFORMVARIANTS");
1355 
1356         /**
1357          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1358          * @since 1.2
1359          */
1360         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1361             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1362                              "ARABIC PRESENTATION FORMS-B",
1363                              "ARABICPRESENTATIONFORMS-B");
1364 
1365         /**
1366          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1367          * block.
1368          * @since 1.2
1369          */
1370         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1371             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1372                              "HALFWIDTH AND FULLWIDTH FORMS",
1373                              "HALFWIDTHANDFULLWIDTHFORMS");
1374 
1375         /**
1376          * Constant for the "Specials" Unicode character block.
1377          * @since 1.2
1378          */
1379         public static final UnicodeBlock SPECIALS =
1380             new UnicodeBlock("SPECIALS");
1381 
1382         /**
1383          * @deprecated
1384          * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES},
1385          * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}.
1386          * These constants match the block definitions of the Unicode Standard.
1387          * The {@link #of(char)} and {@link #of(int)} methods return the
1388          * standard constants.
1389          */
1390         @Deprecated(since="1.5")
1391         public static final UnicodeBlock SURROGATES_AREA =
1392             // Android-changed: ICU consistency: Don't map deprecated SURROGATES_AREA. b/26140229
1393             // new UnicodeBlock("SURROGATES_AREA");
1394             new UnicodeBlock("SURROGATES_AREA", false);
1395 
1396         /**
1397          * Constant for the "Syriac" Unicode character block.
1398          * @since 1.4
1399          */
1400         public static final UnicodeBlock SYRIAC =
1401             new UnicodeBlock("SYRIAC");
1402 
1403         /**
1404          * Constant for the "Thaana" Unicode character block.
1405          * @since 1.4
1406          */
1407         public static final UnicodeBlock THAANA =
1408             new UnicodeBlock("THAANA");
1409 
1410         /**
1411          * Constant for the "Sinhala" Unicode character block.
1412          * @since 1.4
1413          */
1414         public static final UnicodeBlock SINHALA =
1415             new UnicodeBlock("SINHALA");
1416 
1417         /**
1418          * Constant for the "Myanmar" Unicode character block.
1419          * @since 1.4
1420          */
1421         public static final UnicodeBlock MYANMAR =
1422             new UnicodeBlock("MYANMAR");
1423 
1424         /**
1425          * Constant for the "Ethiopic" Unicode character block.
1426          * @since 1.4
1427          */
1428         public static final UnicodeBlock ETHIOPIC =
1429             new UnicodeBlock("ETHIOPIC");
1430 
1431         /**
1432          * Constant for the "Cherokee" Unicode character block.
1433          * @since 1.4
1434          */
1435         public static final UnicodeBlock CHEROKEE =
1436             new UnicodeBlock("CHEROKEE");
1437 
1438         /**
1439          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1440          * @since 1.4
1441          */
1442         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1443             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1444                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1445                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
1446 
1447         /**
1448          * Constant for the "Ogham" Unicode character block.
1449          * @since 1.4
1450          */
1451         public static final UnicodeBlock OGHAM =
1452             new UnicodeBlock("OGHAM");
1453 
1454         /**
1455          * Constant for the "Runic" Unicode character block.
1456          * @since 1.4
1457          */
1458         public static final UnicodeBlock RUNIC =
1459             new UnicodeBlock("RUNIC");
1460 
1461         /**
1462          * Constant for the "Khmer" Unicode character block.
1463          * @since 1.4
1464          */
1465         public static final UnicodeBlock KHMER =
1466             new UnicodeBlock("KHMER");
1467 
1468         /**
1469          * Constant for the "Mongolian" Unicode character block.
1470          * @since 1.4
1471          */
1472         public static final UnicodeBlock MONGOLIAN =
1473             new UnicodeBlock("MONGOLIAN");
1474 
1475         /**
1476          * Constant for the "Braille Patterns" Unicode character block.
1477          * @since 1.4
1478          */
1479         public static final UnicodeBlock BRAILLE_PATTERNS =
1480             new UnicodeBlock("BRAILLE_PATTERNS",
1481                              "BRAILLE PATTERNS",
1482                              "BRAILLEPATTERNS");
1483 
1484         /**
1485          * Constant for the "CJK Radicals Supplement" Unicode character block.
1486          * @since 1.4
1487          */
1488         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1489             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1490                              "CJK RADICALS SUPPLEMENT",
1491                              "CJKRADICALSSUPPLEMENT");
1492 
1493         /**
1494          * Constant for the "Kangxi Radicals" Unicode character block.
1495          * @since 1.4
1496          */
1497         public static final UnicodeBlock KANGXI_RADICALS =
1498             new UnicodeBlock("KANGXI_RADICALS",
1499                              "KANGXI RADICALS",
1500                              "KANGXIRADICALS");
1501 
1502         /**
1503          * Constant for the "Ideographic Description Characters" Unicode character block.
1504          * @since 1.4
1505          */
1506         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1507             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1508                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1509                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1510 
1511         /**
1512          * Constant for the "Bopomofo Extended" Unicode character block.
1513          * @since 1.4
1514          */
1515         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1516             new UnicodeBlock("BOPOMOFO_EXTENDED",
1517                              "BOPOMOFO EXTENDED",
1518                              "BOPOMOFOEXTENDED");
1519 
1520         /**
1521          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1522          * @since 1.4
1523          */
1524         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1525             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1526                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1527                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1528 
1529         /**
1530          * Constant for the "Yi Syllables" Unicode character block.
1531          * @since 1.4
1532          */
1533         public static final UnicodeBlock YI_SYLLABLES =
1534             new UnicodeBlock("YI_SYLLABLES",
1535                              "YI SYLLABLES",
1536                              "YISYLLABLES");
1537 
1538         /**
1539          * Constant for the "Yi Radicals" Unicode character block.
1540          * @since 1.4
1541          */
1542         public static final UnicodeBlock YI_RADICALS =
1543             new UnicodeBlock("YI_RADICALS",
1544                              "YI RADICALS",
1545                              "YIRADICALS");
1546 
1547         /**
1548          * Constant for the "Cyrillic Supplement" Unicode character block.
1549          * This block was previously known as the "Cyrillic Supplementary" block.
1550          * @since 1.5
1551          */
1552         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1553             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1554                              "CYRILLIC SUPPLEMENTARY",
1555                              "CYRILLICSUPPLEMENTARY",
1556                              "CYRILLIC SUPPLEMENT",
1557                              "CYRILLICSUPPLEMENT");
1558 
1559         /**
1560          * Constant for the "Tagalog" Unicode character block.
1561          * @since 1.5
1562          */
1563         public static final UnicodeBlock TAGALOG =
1564             new UnicodeBlock("TAGALOG");
1565 
1566         /**
1567          * Constant for the "Hanunoo" Unicode character block.
1568          * @since 1.5
1569          */
1570         public static final UnicodeBlock HANUNOO =
1571             new UnicodeBlock("HANUNOO");
1572 
1573         /**
1574          * Constant for the "Buhid" Unicode character block.
1575          * @since 1.5
1576          */
1577         public static final UnicodeBlock BUHID =
1578             new UnicodeBlock("BUHID");
1579 
1580         /**
1581          * Constant for the "Tagbanwa" Unicode character block.
1582          * @since 1.5
1583          */
1584         public static final UnicodeBlock TAGBANWA =
1585             new UnicodeBlock("TAGBANWA");
1586 
1587         /**
1588          * Constant for the "Limbu" Unicode character block.
1589          * @since 1.5
1590          */
1591         public static final UnicodeBlock LIMBU =
1592             new UnicodeBlock("LIMBU");
1593 
1594         /**
1595          * Constant for the "Tai Le" Unicode character block.
1596          * @since 1.5
1597          */
1598         public static final UnicodeBlock TAI_LE =
1599             new UnicodeBlock("TAI_LE",
1600                              "TAI LE",
1601                              "TAILE");
1602 
1603         /**
1604          * Constant for the "Khmer Symbols" Unicode character block.
1605          * @since 1.5
1606          */
1607         public static final UnicodeBlock KHMER_SYMBOLS =
1608             new UnicodeBlock("KHMER_SYMBOLS",
1609                              "KHMER SYMBOLS",
1610                              "KHMERSYMBOLS");
1611 
1612         /**
1613          * Constant for the "Phonetic Extensions" Unicode character block.
1614          * @since 1.5
1615          */
1616         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1617             new UnicodeBlock("PHONETIC_EXTENSIONS",
1618                              "PHONETIC EXTENSIONS",
1619                              "PHONETICEXTENSIONS");
1620 
1621         /**
1622          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1623          * @since 1.5
1624          */
1625         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1626             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1627                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1628                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1629 
1630         /**
1631          * Constant for the "Supplemental Arrows-A" Unicode character block.
1632          * @since 1.5
1633          */
1634         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1635             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1636                              "SUPPLEMENTAL ARROWS-A",
1637                              "SUPPLEMENTALARROWS-A");
1638 
1639         /**
1640          * Constant for the "Supplemental Arrows-B" Unicode character block.
1641          * @since 1.5
1642          */
1643         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1644             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1645                              "SUPPLEMENTAL ARROWS-B",
1646                              "SUPPLEMENTALARROWS-B");
1647 
1648         /**
1649          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1650          * character block.
1651          * @since 1.5
1652          */
1653         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1654             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1655                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1656                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1657 
1658         /**
1659          * Constant for the "Supplemental Mathematical Operators" Unicode
1660          * character block.
1661          * @since 1.5
1662          */
1663         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1664             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1665                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1666                              "SUPPLEMENTALMATHEMATICALOPERATORS");
1667 
1668         /**
1669          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1670          * block.
1671          * @since 1.5
1672          */
1673         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1674             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1675                              "MISCELLANEOUS SYMBOLS AND ARROWS",
1676                              "MISCELLANEOUSSYMBOLSANDARROWS");
1677 
1678         /**
1679          * Constant for the "Katakana Phonetic Extensions" Unicode character
1680          * block.
1681          * @since 1.5
1682          */
1683         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1684             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1685                              "KATAKANA PHONETIC EXTENSIONS",
1686                              "KATAKANAPHONETICEXTENSIONS");
1687 
1688         /**
1689          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1690          * @since 1.5
1691          */
1692         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1693             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1694                              "YIJING HEXAGRAM SYMBOLS",
1695                              "YIJINGHEXAGRAMSYMBOLS");
1696 
1697         /**
1698          * Constant for the "Variation Selectors" Unicode character block.
1699          * @since 1.5
1700          */
1701         public static final UnicodeBlock VARIATION_SELECTORS =
1702             new UnicodeBlock("VARIATION_SELECTORS",
1703                              "VARIATION SELECTORS",
1704                              "VARIATIONSELECTORS");
1705 
1706         /**
1707          * Constant for the "Linear B Syllabary" Unicode character block.
1708          * @since 1.5
1709          */
1710         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1711             new UnicodeBlock("LINEAR_B_SYLLABARY",
1712                              "LINEAR B SYLLABARY",
1713                              "LINEARBSYLLABARY");
1714 
1715         /**
1716          * Constant for the "Linear B Ideograms" Unicode character block.
1717          * @since 1.5
1718          */
1719         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1720             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1721                              "LINEAR B IDEOGRAMS",
1722                              "LINEARBIDEOGRAMS");
1723 
1724         /**
1725          * Constant for the "Aegean Numbers" Unicode character block.
1726          * @since 1.5
1727          */
1728         public static final UnicodeBlock AEGEAN_NUMBERS =
1729             new UnicodeBlock("AEGEAN_NUMBERS",
1730                              "AEGEAN NUMBERS",
1731                              "AEGEANNUMBERS");
1732 
1733         /**
1734          * Constant for the "Old Italic" Unicode character block.
1735          * @since 1.5
1736          */
1737         public static final UnicodeBlock OLD_ITALIC =
1738             new UnicodeBlock("OLD_ITALIC",
1739                              "OLD ITALIC",
1740                              "OLDITALIC");
1741 
1742         /**
1743          * Constant for the "Gothic" Unicode character block.
1744          * @since 1.5
1745          */
1746         public static final UnicodeBlock GOTHIC =
1747             new UnicodeBlock("GOTHIC");
1748 
1749         /**
1750          * Constant for the "Ugaritic" Unicode character block.
1751          * @since 1.5
1752          */
1753         public static final UnicodeBlock UGARITIC =
1754             new UnicodeBlock("UGARITIC");
1755 
1756         /**
1757          * Constant for the "Deseret" Unicode character block.
1758          * @since 1.5
1759          */
1760         public static final UnicodeBlock DESERET =
1761             new UnicodeBlock("DESERET");
1762 
1763         /**
1764          * Constant for the "Shavian" Unicode character block.
1765          * @since 1.5
1766          */
1767         public static final UnicodeBlock SHAVIAN =
1768             new UnicodeBlock("SHAVIAN");
1769 
1770         /**
1771          * Constant for the "Osmanya" Unicode character block.
1772          * @since 1.5
1773          */
1774         public static final UnicodeBlock OSMANYA =
1775             new UnicodeBlock("OSMANYA");
1776 
1777         /**
1778          * Constant for the "Cypriot Syllabary" Unicode character block.
1779          * @since 1.5
1780          */
1781         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1782             new UnicodeBlock("CYPRIOT_SYLLABARY",
1783                              "CYPRIOT SYLLABARY",
1784                              "CYPRIOTSYLLABARY");
1785 
1786         /**
1787          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1788          * @since 1.5
1789          */
1790         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1791             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1792                              "BYZANTINE MUSICAL SYMBOLS",
1793                              "BYZANTINEMUSICALSYMBOLS");
1794 
1795         /**
1796          * Constant for the "Musical Symbols" Unicode character block.
1797          * @since 1.5
1798          */
1799         public static final UnicodeBlock MUSICAL_SYMBOLS =
1800             new UnicodeBlock("MUSICAL_SYMBOLS",
1801                              "MUSICAL SYMBOLS",
1802                              "MUSICALSYMBOLS");
1803 
1804         /**
1805          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1806          * @since 1.5
1807          */
1808         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1809             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1810                              "TAI XUAN JING SYMBOLS",
1811                              "TAIXUANJINGSYMBOLS");
1812 
1813         /**
1814          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1815          * character block.
1816          * @since 1.5
1817          */
1818         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1819             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1820                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1821                              "MATHEMATICALALPHANUMERICSYMBOLS");
1822 
1823         /**
1824          * Constant for the "CJK Unified Ideographs Extension B" Unicode
1825          * character block.
1826          * @since 1.5
1827          */
1828         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1829             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1830                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1831                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1832 
1833         /**
1834          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1835          * @since 1.5
1836          */
1837         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1838             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1839                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1840                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1841 
1842         /**
1843          * Constant for the "Tags" Unicode character block.
1844          * @since 1.5
1845          */
1846         public static final UnicodeBlock TAGS =
1847             new UnicodeBlock("TAGS");
1848 
1849         /**
1850          * Constant for the "Variation Selectors Supplement" Unicode character
1851          * block.
1852          * @since 1.5
1853          */
1854         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1855             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1856                              "VARIATION SELECTORS SUPPLEMENT",
1857                              "VARIATIONSELECTORSSUPPLEMENT");
1858 
1859         /**
1860          * Constant for the "Supplementary Private Use Area-A" Unicode character
1861          * block.
1862          * @since 1.5
1863          */
1864         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1865             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1866                              "SUPPLEMENTARY PRIVATE USE AREA-A",
1867                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
1868 
1869         /**
1870          * Constant for the "Supplementary Private Use Area-B" Unicode character
1871          * block.
1872          * @since 1.5
1873          */
1874         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1875             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1876                              "SUPPLEMENTARY PRIVATE USE AREA-B",
1877                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
1878 
1879         /**
1880          * Constant for the "High Surrogates" Unicode character block.
1881          * This block represents codepoint values in the high surrogate
1882          * range: U+D800 through U+DB7F
1883          *
1884          * @since 1.5
1885          */
1886         public static final UnicodeBlock HIGH_SURROGATES =
1887             new UnicodeBlock("HIGH_SURROGATES",
1888                              "HIGH SURROGATES",
1889                              "HIGHSURROGATES");
1890 
1891         /**
1892          * Constant for the "High Private Use Surrogates" Unicode character
1893          * block.
1894          * This block represents codepoint values in the private use high
1895          * surrogate range: U+DB80 through U+DBFF
1896          *
1897          * @since 1.5
1898          */
1899         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1900             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1901                              "HIGH PRIVATE USE SURROGATES",
1902                              "HIGHPRIVATEUSESURROGATES");
1903 
1904         /**
1905          * Constant for the "Low Surrogates" Unicode character block.
1906          * This block represents codepoint values in the low surrogate
1907          * range: U+DC00 through U+DFFF
1908          *
1909          * @since 1.5
1910          */
1911         public static final UnicodeBlock LOW_SURROGATES =
1912             new UnicodeBlock("LOW_SURROGATES",
1913                              "LOW SURROGATES",
1914                              "LOWSURROGATES");
1915 
1916         /**
1917          * Constant for the "Arabic Supplement" Unicode character block.
1918          * @since 1.7
1919          */
1920         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1921             new UnicodeBlock("ARABIC_SUPPLEMENT",
1922                              "ARABIC SUPPLEMENT",
1923                              "ARABICSUPPLEMENT");
1924 
1925         /**
1926          * Constant for the "NKo" Unicode character block.
1927          * @since 1.7
1928          */
1929         public static final UnicodeBlock NKO =
1930             new UnicodeBlock("NKO");
1931 
1932         /**
1933          * Constant for the "Samaritan" Unicode character block.
1934          * @since 1.7
1935          */
1936         public static final UnicodeBlock SAMARITAN =
1937             new UnicodeBlock("SAMARITAN");
1938 
1939         /**
1940          * Constant for the "Mandaic" Unicode character block.
1941          * @since 1.7
1942          */
1943         public static final UnicodeBlock MANDAIC =
1944             new UnicodeBlock("MANDAIC");
1945 
1946         /**
1947          * Constant for the "Ethiopic Supplement" Unicode character block.
1948          * @since 1.7
1949          */
1950         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1951             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1952                              "ETHIOPIC SUPPLEMENT",
1953                              "ETHIOPICSUPPLEMENT");
1954 
1955         /**
1956          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1957          * Unicode character block.
1958          * @since 1.7
1959          */
1960         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1961             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1962                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1963                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1964 
1965         /**
1966          * Constant for the "New Tai Lue" Unicode character block.
1967          * @since 1.7
1968          */
1969         public static final UnicodeBlock NEW_TAI_LUE =
1970             new UnicodeBlock("NEW_TAI_LUE",
1971                              "NEW TAI LUE",
1972                              "NEWTAILUE");
1973 
1974         /**
1975          * Constant for the "Buginese" Unicode character block.
1976          * @since 1.7
1977          */
1978         public static final UnicodeBlock BUGINESE =
1979             new UnicodeBlock("BUGINESE");
1980 
1981         /**
1982          * Constant for the "Tai Tham" Unicode character block.
1983          * @since 1.7
1984          */
1985         public static final UnicodeBlock TAI_THAM =
1986             new UnicodeBlock("TAI_THAM",
1987                              "TAI THAM",
1988                              "TAITHAM");
1989 
1990         /**
1991          * Constant for the "Balinese" Unicode character block.
1992          * @since 1.7
1993          */
1994         public static final UnicodeBlock BALINESE =
1995             new UnicodeBlock("BALINESE");
1996 
1997         /**
1998          * Constant for the "Sundanese" Unicode character block.
1999          * @since 1.7
2000          */
2001         public static final UnicodeBlock SUNDANESE =
2002             new UnicodeBlock("SUNDANESE");
2003 
2004         /**
2005          * Constant for the "Batak" Unicode character block.
2006          * @since 1.7
2007          */
2008         public static final UnicodeBlock BATAK =
2009             new UnicodeBlock("BATAK");
2010 
2011         /**
2012          * Constant for the "Lepcha" Unicode character block.
2013          * @since 1.7
2014          */
2015         public static final UnicodeBlock LEPCHA =
2016             new UnicodeBlock("LEPCHA");
2017 
2018         /**
2019          * Constant for the "Ol Chiki" Unicode character block.
2020          * @since 1.7
2021          */
2022         public static final UnicodeBlock OL_CHIKI =
2023             new UnicodeBlock("OL_CHIKI",
2024                              "OL CHIKI",
2025                              "OLCHIKI");
2026 
2027         /**
2028          * Constant for the "Vedic Extensions" Unicode character block.
2029          * @since 1.7
2030          */
2031         public static final UnicodeBlock VEDIC_EXTENSIONS =
2032             new UnicodeBlock("VEDIC_EXTENSIONS",
2033                              "VEDIC EXTENSIONS",
2034                              "VEDICEXTENSIONS");
2035 
2036         /**
2037          * Constant for the "Phonetic Extensions Supplement" Unicode character
2038          * block.
2039          * @since 1.7
2040          */
2041         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
2042             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
2043                              "PHONETIC EXTENSIONS SUPPLEMENT",
2044                              "PHONETICEXTENSIONSSUPPLEMENT");
2045 
2046         /**
2047          * Constant for the "Combining Diacritical Marks Supplement" Unicode
2048          * character block.
2049          * @since 1.7
2050          */
2051         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
2052             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
2053                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
2054                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
2055 
2056         /**
2057          * Constant for the "Glagolitic" Unicode character block.
2058          * @since 1.7
2059          */
2060         public static final UnicodeBlock GLAGOLITIC =
2061             new UnicodeBlock("GLAGOLITIC");
2062 
2063         /**
2064          * Constant for the "Latin Extended-C" Unicode character block.
2065          * @since 1.7
2066          */
2067         public static final UnicodeBlock LATIN_EXTENDED_C =
2068             new UnicodeBlock("LATIN_EXTENDED_C",
2069                              "LATIN EXTENDED-C",
2070                              "LATINEXTENDED-C");
2071 
2072         /**
2073          * Constant for the "Coptic" Unicode character block.
2074          * @since 1.7
2075          */
2076         public static final UnicodeBlock COPTIC =
2077             new UnicodeBlock("COPTIC");
2078 
2079         /**
2080          * Constant for the "Georgian Supplement" Unicode character block.
2081          * @since 1.7
2082          */
2083         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
2084             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
2085                              "GEORGIAN SUPPLEMENT",
2086                              "GEORGIANSUPPLEMENT");
2087 
2088         /**
2089          * Constant for the "Tifinagh" Unicode character block.
2090          * @since 1.7
2091          */
2092         public static final UnicodeBlock TIFINAGH =
2093             new UnicodeBlock("TIFINAGH");
2094 
2095         /**
2096          * Constant for the "Ethiopic Extended" Unicode character block.
2097          * @since 1.7
2098          */
2099         public static final UnicodeBlock ETHIOPIC_EXTENDED =
2100             new UnicodeBlock("ETHIOPIC_EXTENDED",
2101                              "ETHIOPIC EXTENDED",
2102                              "ETHIOPICEXTENDED");
2103 
2104         /**
2105          * Constant for the "Cyrillic Extended-A" Unicode character block.
2106          * @since 1.7
2107          */
2108         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
2109             new UnicodeBlock("CYRILLIC_EXTENDED_A",
2110                              "CYRILLIC EXTENDED-A",
2111                              "CYRILLICEXTENDED-A");
2112 
2113         /**
2114          * Constant for the "Supplemental Punctuation" Unicode character block.
2115          * @since 1.7
2116          */
2117         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
2118             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
2119                              "SUPPLEMENTAL PUNCTUATION",
2120                              "SUPPLEMENTALPUNCTUATION");
2121 
2122         /**
2123          * Constant for the "CJK Strokes" Unicode character block.
2124          * @since 1.7
2125          */
2126         public static final UnicodeBlock CJK_STROKES =
2127             new UnicodeBlock("CJK_STROKES",
2128                              "CJK STROKES",
2129                              "CJKSTROKES");
2130 
2131         /**
2132          * Constant for the "Lisu" Unicode character block.
2133          * @since 1.7
2134          */
2135         public static final UnicodeBlock LISU =
2136             new UnicodeBlock("LISU");
2137 
2138         /**
2139          * Constant for the "Vai" Unicode character block.
2140          * @since 1.7
2141          */
2142         public static final UnicodeBlock VAI =
2143             new UnicodeBlock("VAI");
2144 
2145         /**
2146          * Constant for the "Cyrillic Extended-B" Unicode character block.
2147          * @since 1.7
2148          */
2149         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2150             new UnicodeBlock("CYRILLIC_EXTENDED_B",
2151                              "CYRILLIC EXTENDED-B",
2152                              "CYRILLICEXTENDED-B");
2153 
2154         /**
2155          * Constant for the "Bamum" Unicode character block.
2156          * @since 1.7
2157          */
2158         public static final UnicodeBlock BAMUM =
2159             new UnicodeBlock("BAMUM");
2160 
2161         /**
2162          * Constant for the "Modifier Tone Letters" Unicode character block.
2163          * @since 1.7
2164          */
2165         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2166             new UnicodeBlock("MODIFIER_TONE_LETTERS",
2167                              "MODIFIER TONE LETTERS",
2168                              "MODIFIERTONELETTERS");
2169 
2170         /**
2171          * Constant for the "Latin Extended-D" Unicode character block.
2172          * @since 1.7
2173          */
2174         public static final UnicodeBlock LATIN_EXTENDED_D =
2175             new UnicodeBlock("LATIN_EXTENDED_D",
2176                              "LATIN EXTENDED-D",
2177                              "LATINEXTENDED-D");
2178 
2179         /**
2180          * Constant for the "Syloti Nagri" Unicode character block.
2181          * @since 1.7
2182          */
2183         public static final UnicodeBlock SYLOTI_NAGRI =
2184             new UnicodeBlock("SYLOTI_NAGRI",
2185                              "SYLOTI NAGRI",
2186                              "SYLOTINAGRI");
2187 
2188         /**
2189          * Constant for the "Common Indic Number Forms" Unicode character block.
2190          * @since 1.7
2191          */
2192         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2193             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2194                              "COMMON INDIC NUMBER FORMS",
2195                              "COMMONINDICNUMBERFORMS");
2196 
2197         /**
2198          * Constant for the "Phags-pa" Unicode character block.
2199          * @since 1.7
2200          */
2201         public static final UnicodeBlock PHAGS_PA =
2202             new UnicodeBlock("PHAGS_PA",
2203                              "PHAGS-PA");
2204 
2205         /**
2206          * Constant for the "Saurashtra" Unicode character block.
2207          * @since 1.7
2208          */
2209         public static final UnicodeBlock SAURASHTRA =
2210             new UnicodeBlock("SAURASHTRA");
2211 
2212         /**
2213          * Constant for the "Devanagari Extended" Unicode character block.
2214          * @since 1.7
2215          */
2216         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2217             new UnicodeBlock("DEVANAGARI_EXTENDED",
2218                              "DEVANAGARI EXTENDED",
2219                              "DEVANAGARIEXTENDED");
2220 
2221         /**
2222          * Constant for the "Kayah Li" Unicode character block.
2223          * @since 1.7
2224          */
2225         public static final UnicodeBlock KAYAH_LI =
2226             new UnicodeBlock("KAYAH_LI",
2227                              "KAYAH LI",
2228                              "KAYAHLI");
2229 
2230         /**
2231          * Constant for the "Rejang" Unicode character block.
2232          * @since 1.7
2233          */
2234         public static final UnicodeBlock REJANG =
2235             new UnicodeBlock("REJANG");
2236 
2237         /**
2238          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2239          * @since 1.7
2240          */
2241         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2242             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2243                              "HANGUL JAMO EXTENDED-A",
2244                              "HANGULJAMOEXTENDED-A");
2245 
2246         /**
2247          * Constant for the "Javanese" Unicode character block.
2248          * @since 1.7
2249          */
2250         public static final UnicodeBlock JAVANESE =
2251             new UnicodeBlock("JAVANESE");
2252 
2253         /**
2254          * Constant for the "Cham" Unicode character block.
2255          * @since 1.7
2256          */
2257         public static final UnicodeBlock CHAM =
2258             new UnicodeBlock("CHAM");
2259 
2260         /**
2261          * Constant for the "Myanmar Extended-A" Unicode character block.
2262          * @since 1.7
2263          */
2264         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2265             new UnicodeBlock("MYANMAR_EXTENDED_A",
2266                              "MYANMAR EXTENDED-A",
2267                              "MYANMAREXTENDED-A");
2268 
2269         /**
2270          * Constant for the "Tai Viet" Unicode character block.
2271          * @since 1.7
2272          */
2273         public static final UnicodeBlock TAI_VIET =
2274             new UnicodeBlock("TAI_VIET",
2275                              "TAI VIET",
2276                              "TAIVIET");
2277 
2278         /**
2279          * Constant for the "Ethiopic Extended-A" Unicode character block.
2280          * @since 1.7
2281          */
2282         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2283             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2284                              "ETHIOPIC EXTENDED-A",
2285                              "ETHIOPICEXTENDED-A");
2286 
2287         /**
2288          * Constant for the "Meetei Mayek" Unicode character block.
2289          * @since 1.7
2290          */
2291         public static final UnicodeBlock MEETEI_MAYEK =
2292             new UnicodeBlock("MEETEI_MAYEK",
2293                              "MEETEI MAYEK",
2294                              "MEETEIMAYEK");
2295 
2296         /**
2297          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2298          * @since 1.7
2299          */
2300         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2301             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2302                              "HANGUL JAMO EXTENDED-B",
2303                              "HANGULJAMOEXTENDED-B");
2304 
2305         /**
2306          * Constant for the "Vertical Forms" Unicode character block.
2307          * @since 1.7
2308          */
2309         public static final UnicodeBlock VERTICAL_FORMS =
2310             new UnicodeBlock("VERTICAL_FORMS",
2311                              "VERTICAL FORMS",
2312                              "VERTICALFORMS");
2313 
2314         /**
2315          * Constant for the "Ancient Greek Numbers" Unicode character block.
2316          * @since 1.7
2317          */
2318         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2319             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2320                              "ANCIENT GREEK NUMBERS",
2321                              "ANCIENTGREEKNUMBERS");
2322 
2323         /**
2324          * Constant for the "Ancient Symbols" Unicode character block.
2325          * @since 1.7
2326          */
2327         public static final UnicodeBlock ANCIENT_SYMBOLS =
2328             new UnicodeBlock("ANCIENT_SYMBOLS",
2329                              "ANCIENT SYMBOLS",
2330                              "ANCIENTSYMBOLS");
2331 
2332         /**
2333          * Constant for the "Phaistos Disc" Unicode character block.
2334          * @since 1.7
2335          */
2336         public static final UnicodeBlock PHAISTOS_DISC =
2337             new UnicodeBlock("PHAISTOS_DISC",
2338                              "PHAISTOS DISC",
2339                              "PHAISTOSDISC");
2340 
2341         /**
2342          * Constant for the "Lycian" Unicode character block.
2343          * @since 1.7
2344          */
2345         public static final UnicodeBlock LYCIAN =
2346             new UnicodeBlock("LYCIAN");
2347 
2348         /**
2349          * Constant for the "Carian" Unicode character block.
2350          * @since 1.7
2351          */
2352         public static final UnicodeBlock CARIAN =
2353             new UnicodeBlock("CARIAN");
2354 
2355         /**
2356          * Constant for the "Old Persian" Unicode character block.
2357          * @since 1.7
2358          */
2359         public static final UnicodeBlock OLD_PERSIAN =
2360             new UnicodeBlock("OLD_PERSIAN",
2361                              "OLD PERSIAN",
2362                              "OLDPERSIAN");
2363 
2364         /**
2365          * Constant for the "Imperial Aramaic" Unicode character block.
2366          * @since 1.7
2367          */
2368         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2369             new UnicodeBlock("IMPERIAL_ARAMAIC",
2370                              "IMPERIAL ARAMAIC",
2371                              "IMPERIALARAMAIC");
2372 
2373         /**
2374          * Constant for the "Phoenician" Unicode character block.
2375          * @since 1.7
2376          */
2377         public static final UnicodeBlock PHOENICIAN =
2378             new UnicodeBlock("PHOENICIAN");
2379 
2380         /**
2381          * Constant for the "Lydian" Unicode character block.
2382          * @since 1.7
2383          */
2384         public static final UnicodeBlock LYDIAN =
2385             new UnicodeBlock("LYDIAN");
2386 
2387         /**
2388          * Constant for the "Kharoshthi" Unicode character block.
2389          * @since 1.7
2390          */
2391         public static final UnicodeBlock KHAROSHTHI =
2392             new UnicodeBlock("KHAROSHTHI");
2393 
2394         /**
2395          * Constant for the "Old South Arabian" Unicode character block.
2396          * @since 1.7
2397          */
2398         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2399             new UnicodeBlock("OLD_SOUTH_ARABIAN",
2400                              "OLD SOUTH ARABIAN",
2401                              "OLDSOUTHARABIAN");
2402 
2403         /**
2404          * Constant for the "Avestan" Unicode character block.
2405          * @since 1.7
2406          */
2407         public static final UnicodeBlock AVESTAN =
2408             new UnicodeBlock("AVESTAN");
2409 
2410         /**
2411          * Constant for the "Inscriptional Parthian" Unicode character block.
2412          * @since 1.7
2413          */
2414         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2415             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2416                              "INSCRIPTIONAL PARTHIAN",
2417                              "INSCRIPTIONALPARTHIAN");
2418 
2419         /**
2420          * Constant for the "Inscriptional Pahlavi" Unicode character block.
2421          * @since 1.7
2422          */
2423         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2424             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2425                              "INSCRIPTIONAL PAHLAVI",
2426                              "INSCRIPTIONALPAHLAVI");
2427 
2428         /**
2429          * Constant for the "Old Turkic" Unicode character block.
2430          * @since 1.7
2431          */
2432         public static final UnicodeBlock OLD_TURKIC =
2433             new UnicodeBlock("OLD_TURKIC",
2434                              "OLD TURKIC",
2435                              "OLDTURKIC");
2436 
2437         /**
2438          * Constant for the "Rumi Numeral Symbols" Unicode character block.
2439          * @since 1.7
2440          */
2441         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2442             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2443                              "RUMI NUMERAL SYMBOLS",
2444                              "RUMINUMERALSYMBOLS");
2445 
2446         /**
2447          * Constant for the "Brahmi" Unicode character block.
2448          * @since 1.7
2449          */
2450         public static final UnicodeBlock BRAHMI =
2451             new UnicodeBlock("BRAHMI");
2452 
2453         /**
2454          * Constant for the "Kaithi" Unicode character block.
2455          * @since 1.7
2456          */
2457         public static final UnicodeBlock KAITHI =
2458             new UnicodeBlock("KAITHI");
2459 
2460         /**
2461          * Constant for the "Cuneiform" Unicode character block.
2462          * @since 1.7
2463          */
2464         public static final UnicodeBlock CUNEIFORM =
2465             new UnicodeBlock("CUNEIFORM");
2466 
2467         /**
2468          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2469          * character block.
2470          * @since 1.7
2471          */
2472         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2473             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2474                              "CUNEIFORM NUMBERS AND PUNCTUATION",
2475                              "CUNEIFORMNUMBERSANDPUNCTUATION");
2476 
2477         /**
2478          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2479          * @since 1.7
2480          */
2481         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2482             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2483                              "EGYPTIAN HIEROGLYPHS",
2484                              "EGYPTIANHIEROGLYPHS");
2485 
2486         /**
2487          * Constant for the "Bamum Supplement" Unicode character block.
2488          * @since 1.7
2489          */
2490         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2491             new UnicodeBlock("BAMUM_SUPPLEMENT",
2492                              "BAMUM SUPPLEMENT",
2493                              "BAMUMSUPPLEMENT");
2494 
2495         /**
2496          * Constant for the "Kana Supplement" Unicode character block.
2497          * @since 1.7
2498          */
2499         public static final UnicodeBlock KANA_SUPPLEMENT =
2500             new UnicodeBlock("KANA_SUPPLEMENT",
2501                              "KANA SUPPLEMENT",
2502                              "KANASUPPLEMENT");
2503 
2504         /**
2505          * Constant for the "Ancient Greek Musical Notation" Unicode character
2506          * block.
2507          * @since 1.7
2508          */
2509         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2510             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2511                              "ANCIENT GREEK MUSICAL NOTATION",
2512                              "ANCIENTGREEKMUSICALNOTATION");
2513 
2514         /**
2515          * Constant for the "Counting Rod Numerals" Unicode character block.
2516          * @since 1.7
2517          */
2518         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2519             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2520                              "COUNTING ROD NUMERALS",
2521                              "COUNTINGRODNUMERALS");
2522 
2523         /**
2524          * Constant for the "Mahjong Tiles" Unicode character block.
2525          * @since 1.7
2526          */
2527         public static final UnicodeBlock MAHJONG_TILES =
2528             new UnicodeBlock("MAHJONG_TILES",
2529                              "MAHJONG TILES",
2530                              "MAHJONGTILES");
2531 
2532         /**
2533          * Constant for the "Domino Tiles" Unicode character block.
2534          * @since 1.7
2535          */
2536         public static final UnicodeBlock DOMINO_TILES =
2537             new UnicodeBlock("DOMINO_TILES",
2538                              "DOMINO TILES",
2539                              "DOMINOTILES");
2540 
2541         /**
2542          * Constant for the "Playing Cards" Unicode character block.
2543          * @since 1.7
2544          */
2545         public static final UnicodeBlock PLAYING_CARDS =
2546             new UnicodeBlock("PLAYING_CARDS",
2547                              "PLAYING CARDS",
2548                              "PLAYINGCARDS");
2549 
2550         /**
2551          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2552          * block.
2553          * @since 1.7
2554          */
2555         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2556             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2557                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2558                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
2559 
2560         /**
2561          * Constant for the "Enclosed Ideographic Supplement" Unicode character
2562          * block.
2563          * @since 1.7
2564          */
2565         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2566             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2567                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2568                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2569 
2570         /**
2571          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2572          * character block.
2573          * @since 1.7
2574          */
2575         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2576             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2577                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2578                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2579 
2580         /**
2581          * Constant for the "Emoticons" Unicode character block.
2582          * @since 1.7
2583          */
2584         public static final UnicodeBlock EMOTICONS =
2585             new UnicodeBlock("EMOTICONS");
2586 
2587         /**
2588          * Constant for the "Transport And Map Symbols" Unicode character block.
2589          * @since 1.7
2590          */
2591         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2592             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2593                              "TRANSPORT AND MAP SYMBOLS",
2594                              "TRANSPORTANDMAPSYMBOLS");
2595 
2596         /**
2597          * Constant for the "Alchemical Symbols" Unicode character block.
2598          * @since 1.7
2599          */
2600         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2601             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2602                              "ALCHEMICAL SYMBOLS",
2603                              "ALCHEMICALSYMBOLS");
2604 
2605         /**
2606          * Constant for the "CJK Unified Ideographs Extension C" Unicode
2607          * character block.
2608          * @since 1.7
2609          */
2610         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2611             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2612                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2613                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2614 
2615         /**
2616          * Constant for the "CJK Unified Ideographs Extension D" Unicode
2617          * character block.
2618          * @since 1.7
2619          */
2620         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2621             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2622                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2623                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2624 
2625         /**
2626          * Constant for the "Arabic Extended-A" Unicode character block.
2627          * @since 1.8
2628          */
2629         public static final UnicodeBlock ARABIC_EXTENDED_A =
2630             new UnicodeBlock("ARABIC_EXTENDED_A",
2631                              "ARABIC EXTENDED-A",
2632                              "ARABICEXTENDED-A");
2633 
2634         /**
2635          * Constant for the "Sundanese Supplement" Unicode character block.
2636          * @since 1.8
2637          */
2638         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2639             new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2640                              "SUNDANESE SUPPLEMENT",
2641                              "SUNDANESESUPPLEMENT");
2642 
2643         /**
2644          * Constant for the "Meetei Mayek Extensions" Unicode character block.
2645          * @since 1.8
2646          */
2647         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2648             new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2649                              "MEETEI MAYEK EXTENSIONS",
2650                              "MEETEIMAYEKEXTENSIONS");
2651 
2652         /**
2653          * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2654          * @since 1.8
2655          */
2656         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2657             new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2658                              "MEROITIC HIEROGLYPHS",
2659                              "MEROITICHIEROGLYPHS");
2660 
2661         /**
2662          * Constant for the "Meroitic Cursive" Unicode character block.
2663          * @since 1.8
2664          */
2665         public static final UnicodeBlock MEROITIC_CURSIVE =
2666             new UnicodeBlock("MEROITIC_CURSIVE",
2667                              "MEROITIC CURSIVE",
2668                              "MEROITICCURSIVE");
2669 
2670         /**
2671          * Constant for the "Sora Sompeng" Unicode character block.
2672          * @since 1.8
2673          */
2674         public static final UnicodeBlock SORA_SOMPENG =
2675             new UnicodeBlock("SORA_SOMPENG",
2676                              "SORA SOMPENG",
2677                              "SORASOMPENG");
2678 
2679         /**
2680          * Constant for the "Chakma" Unicode character block.
2681          * @since 1.8
2682          */
2683         public static final UnicodeBlock CHAKMA =
2684             new UnicodeBlock("CHAKMA");
2685 
2686         /**
2687          * Constant for the "Sharada" Unicode character block.
2688          * @since 1.8
2689          */
2690         public static final UnicodeBlock SHARADA =
2691             new UnicodeBlock("SHARADA");
2692 
2693         /**
2694          * Constant for the "Takri" Unicode character block.
2695          * @since 1.8
2696          */
2697         public static final UnicodeBlock TAKRI =
2698             new UnicodeBlock("TAKRI");
2699 
2700         /**
2701          * Constant for the "Miao" Unicode character block.
2702          * @since 1.8
2703          */
2704         public static final UnicodeBlock MIAO =
2705             new UnicodeBlock("MIAO");
2706 
2707         /**
2708          * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2709          * character block.
2710          * @since 1.8
2711          */
2712         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2713             new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2714                              "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2715                              "ARABICMATHEMATICALALPHABETICSYMBOLS");
2716 
2717         /**
2718          * Constant for the "Combining Diacritical Marks Extended" Unicode
2719          * character block.
2720          * @since 9
2721          */
2722         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2723             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED",
2724                              "COMBINING DIACRITICAL MARKS EXTENDED",
2725                              "COMBININGDIACRITICALMARKSEXTENDED");
2726 
2727         /**
2728          * Constant for the "Myanmar Extended-B" Unicode character block.
2729          * @since 9
2730          */
2731         public static final UnicodeBlock MYANMAR_EXTENDED_B =
2732             new UnicodeBlock("MYANMAR_EXTENDED_B",
2733                              "MYANMAR EXTENDED-B",
2734                              "MYANMAREXTENDED-B");
2735 
2736         /**
2737          * Constant for the "Latin Extended-E" Unicode character block.
2738          * @since 9
2739          */
2740         public static final UnicodeBlock LATIN_EXTENDED_E =
2741             new UnicodeBlock("LATIN_EXTENDED_E",
2742                              "LATIN EXTENDED-E",
2743                              "LATINEXTENDED-E");
2744 
2745         /**
2746          * Constant for the "Coptic Epact Numbers" Unicode character block.
2747          * @since 9
2748          */
2749         public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2750             new UnicodeBlock("COPTIC_EPACT_NUMBERS",
2751                              "COPTIC EPACT NUMBERS",
2752                              "COPTICEPACTNUMBERS");
2753 
2754         /**
2755          * Constant for the "Old Permic" Unicode character block.
2756          * @since 9
2757          */
2758         public static final UnicodeBlock OLD_PERMIC =
2759             new UnicodeBlock("OLD_PERMIC",
2760                              "OLD PERMIC",
2761                              "OLDPERMIC");
2762 
2763         /**
2764          * Constant for the "Elbasan" Unicode character block.
2765          * @since 9
2766          */
2767         public static final UnicodeBlock ELBASAN =
2768             new UnicodeBlock("ELBASAN");
2769 
2770         /**
2771          * Constant for the "Caucasian Albanian" Unicode character block.
2772          * @since 9
2773          */
2774         public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2775             new UnicodeBlock("CAUCASIAN_ALBANIAN",
2776                              "CAUCASIAN ALBANIAN",
2777                              "CAUCASIANALBANIAN");
2778 
2779         /**
2780          * Constant for the "Linear A" Unicode character block.
2781          * @since 9
2782          */
2783         public static final UnicodeBlock LINEAR_A =
2784             new UnicodeBlock("LINEAR_A",
2785                              "LINEAR A",
2786                              "LINEARA");
2787 
2788         /**
2789          * Constant for the "Palmyrene" Unicode character block.
2790          * @since 9
2791          */
2792         public static final UnicodeBlock PALMYRENE =
2793             new UnicodeBlock("PALMYRENE");
2794 
2795         /**
2796          * Constant for the "Nabataean" Unicode character block.
2797          * @since 9
2798          */
2799         public static final UnicodeBlock NABATAEAN =
2800             new UnicodeBlock("NABATAEAN");
2801 
2802         /**
2803          * Constant for the "Old North Arabian" Unicode character block.
2804          * @since 9
2805          */
2806         public static final UnicodeBlock OLD_NORTH_ARABIAN =
2807             new UnicodeBlock("OLD_NORTH_ARABIAN",
2808                              "OLD NORTH ARABIAN",
2809                              "OLDNORTHARABIAN");
2810 
2811         /**
2812          * Constant for the "Manichaean" Unicode character block.
2813          * @since 9
2814          */
2815         public static final UnicodeBlock MANICHAEAN =
2816             new UnicodeBlock("MANICHAEAN");
2817 
2818         /**
2819          * Constant for the "Psalter Pahlavi" Unicode character block.
2820          * @since 9
2821          */
2822         public static final UnicodeBlock PSALTER_PAHLAVI =
2823             new UnicodeBlock("PSALTER_PAHLAVI",
2824                              "PSALTER PAHLAVI",
2825                              "PSALTERPAHLAVI");
2826 
2827         /**
2828          * Constant for the "Mahajani" Unicode character block.
2829          * @since 9
2830          */
2831         public static final UnicodeBlock MAHAJANI =
2832             new UnicodeBlock("MAHAJANI");
2833 
2834         /**
2835          * Constant for the "Sinhala Archaic Numbers" Unicode character block.
2836          * @since 9
2837          */
2838         public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2839             new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS",
2840                              "SINHALA ARCHAIC NUMBERS",
2841                              "SINHALAARCHAICNUMBERS");
2842 
2843         /**
2844          * Constant for the "Khojki" Unicode character block.
2845          * @since 9
2846          */
2847         public static final UnicodeBlock KHOJKI =
2848             new UnicodeBlock("KHOJKI");
2849 
2850         /**
2851          * Constant for the "Khudawadi" Unicode character block.
2852          * @since 9
2853          */
2854         public static final UnicodeBlock KHUDAWADI =
2855             new UnicodeBlock("KHUDAWADI");
2856 
2857         /**
2858          * Constant for the "Grantha" Unicode character block.
2859          * @since 9
2860          */
2861         public static final UnicodeBlock GRANTHA =
2862             new UnicodeBlock("GRANTHA");
2863 
2864         /**
2865          * Constant for the "Tirhuta" Unicode character block.
2866          * @since 9
2867          */
2868         public static final UnicodeBlock TIRHUTA =
2869             new UnicodeBlock("TIRHUTA");
2870 
2871         /**
2872          * Constant for the "Siddham" Unicode character block.
2873          * @since 9
2874          */
2875         public static final UnicodeBlock SIDDHAM =
2876             new UnicodeBlock("SIDDHAM");
2877 
2878         /**
2879          * Constant for the "Modi" Unicode character block.
2880          * @since 9
2881          */
2882         public static final UnicodeBlock MODI =
2883             new UnicodeBlock("MODI");
2884 
2885         /**
2886          * Constant for the "Warang Citi" Unicode character block.
2887          * @since 9
2888          */
2889         public static final UnicodeBlock WARANG_CITI =
2890             new UnicodeBlock("WARANG_CITI",
2891                              "WARANG CITI",
2892                              "WARANGCITI");
2893 
2894         /**
2895          * Constant for the "Pau Cin Hau" Unicode character block.
2896          * @since 9
2897          */
2898         public static final UnicodeBlock PAU_CIN_HAU =
2899             new UnicodeBlock("PAU_CIN_HAU",
2900                              "PAU CIN HAU",
2901                              "PAUCINHAU");
2902 
2903         /**
2904          * Constant for the "Mro" Unicode character block.
2905          * @since 9
2906          */
2907         public static final UnicodeBlock MRO =
2908             new UnicodeBlock("MRO");
2909 
2910         /**
2911          * Constant for the "Bassa Vah" Unicode character block.
2912          * @since 9
2913          */
2914         public static final UnicodeBlock BASSA_VAH =
2915             new UnicodeBlock("BASSA_VAH",
2916                              "BASSA VAH",
2917                              "BASSAVAH");
2918 
2919         /**
2920          * Constant for the "Pahawh Hmong" Unicode character block.
2921          * @since 9
2922          */
2923         public static final UnicodeBlock PAHAWH_HMONG =
2924             new UnicodeBlock("PAHAWH_HMONG",
2925                              "PAHAWH HMONG",
2926                              "PAHAWHHMONG");
2927 
2928         /**
2929          * Constant for the "Duployan" Unicode character block.
2930          * @since 9
2931          */
2932         public static final UnicodeBlock DUPLOYAN =
2933             new UnicodeBlock("DUPLOYAN");
2934 
2935         /**
2936          * Constant for the "Shorthand Format Controls" Unicode character block.
2937          * @since 9
2938          */
2939         public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2940             new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS",
2941                              "SHORTHAND FORMAT CONTROLS",
2942                              "SHORTHANDFORMATCONTROLS");
2943 
2944         /**
2945          * Constant for the "Mende Kikakui" Unicode character block.
2946          * @since 9
2947          */
2948         public static final UnicodeBlock MENDE_KIKAKUI =
2949             new UnicodeBlock("MENDE_KIKAKUI",
2950                              "MENDE KIKAKUI",
2951                              "MENDEKIKAKUI");
2952 
2953         /**
2954          * Constant for the "Ornamental Dingbats" Unicode character block.
2955          * @since 9
2956          */
2957         public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2958             new UnicodeBlock("ORNAMENTAL_DINGBATS",
2959                              "ORNAMENTAL DINGBATS",
2960                              "ORNAMENTALDINGBATS");
2961 
2962         /**
2963          * Constant for the "Geometric Shapes Extended" Unicode character block.
2964          * @since 9
2965          */
2966         public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2967             new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED",
2968                              "GEOMETRIC SHAPES EXTENDED",
2969                              "GEOMETRICSHAPESEXTENDED");
2970 
2971         /**
2972          * Constant for the "Supplemental Arrows-C" Unicode character block.
2973          * @since 9
2974          */
2975         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2976             new UnicodeBlock("SUPPLEMENTAL_ARROWS_C",
2977                              "SUPPLEMENTAL ARROWS-C",
2978                              "SUPPLEMENTALARROWS-C");
2979 
2980         /**
2981          * Constant for the "Cherokee Supplement" Unicode character block.
2982          * @since 9
2983          */
2984         public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2985             new UnicodeBlock("CHEROKEE_SUPPLEMENT",
2986                              "CHEROKEE SUPPLEMENT",
2987                              "CHEROKEESUPPLEMENT");
2988 
2989         /**
2990          * Constant for the "Hatran" Unicode character block.
2991          * @since 9
2992          */
2993         public static final UnicodeBlock HATRAN =
2994             new UnicodeBlock("HATRAN");
2995 
2996         /**
2997          * Constant for the "Old Hungarian" Unicode character block.
2998          * @since 9
2999          */
3000         public static final UnicodeBlock OLD_HUNGARIAN =
3001             new UnicodeBlock("OLD_HUNGARIAN",
3002                              "OLD HUNGARIAN",
3003                              "OLDHUNGARIAN");
3004 
3005         /**
3006          * Constant for the "Multani" Unicode character block.
3007          * @since 9
3008          */
3009         public static final UnicodeBlock MULTANI =
3010             new UnicodeBlock("MULTANI");
3011 
3012         /**
3013          * Constant for the "Ahom" Unicode character block.
3014          * @since 9
3015          */
3016         public static final UnicodeBlock AHOM =
3017             new UnicodeBlock("AHOM");
3018 
3019         /**
3020          * Constant for the "Early Dynastic Cuneiform" Unicode character block.
3021          * @since 9
3022          */
3023         public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
3024             new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM",
3025                              "EARLY DYNASTIC CUNEIFORM",
3026                              "EARLYDYNASTICCUNEIFORM");
3027 
3028         /**
3029          * Constant for the "Anatolian Hieroglyphs" Unicode character block.
3030          * @since 9
3031          */
3032         public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
3033             new UnicodeBlock("ANATOLIAN_HIEROGLYPHS",
3034                              "ANATOLIAN HIEROGLYPHS",
3035                              "ANATOLIANHIEROGLYPHS");
3036 
3037         /**
3038          * Constant for the "Sutton SignWriting" Unicode character block.
3039          * @since 9
3040          */
3041         public static final UnicodeBlock SUTTON_SIGNWRITING =
3042             new UnicodeBlock("SUTTON_SIGNWRITING",
3043                              "SUTTON SIGNWRITING",
3044                              "SUTTONSIGNWRITING");
3045 
3046         /**
3047          * Constant for the "Supplemental Symbols and Pictographs" Unicode
3048          * character block.
3049          * @since 9
3050          */
3051         public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
3052             new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
3053                              "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS",
3054                              "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS");
3055 
3056         /**
3057          * Constant for the "CJK Unified Ideographs Extension E" Unicode
3058          * character block.
3059          * @since 9
3060          */
3061         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
3062             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
3063                              "CJK UNIFIED IDEOGRAPHS EXTENSION E",
3064                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONE");
3065 
3066         /**
3067          * Constant for the "Syriac Supplement" Unicode
3068          * character block.
3069          * @since 11
3070          */
3071         public static final UnicodeBlock SYRIAC_SUPPLEMENT =
3072             new UnicodeBlock("SYRIAC_SUPPLEMENT",
3073                              "SYRIAC SUPPLEMENT",
3074                              "SYRIACSUPPLEMENT");
3075 
3076         /**
3077          * Constant for the "Cyrillic Extended-C" Unicode
3078          * character block.
3079          * @since 11
3080          */
3081         public static final UnicodeBlock CYRILLIC_EXTENDED_C =
3082             new UnicodeBlock("CYRILLIC_EXTENDED_C",
3083                              "CYRILLIC EXTENDED-C",
3084                              "CYRILLICEXTENDED-C");
3085 
3086         /**
3087          * Constant for the "Osage" Unicode
3088          * character block.
3089          * @since 11
3090          */
3091         public static final UnicodeBlock OSAGE =
3092             new UnicodeBlock("OSAGE");
3093 
3094         /**
3095          * Constant for the "Newa" Unicode
3096          * character block.
3097          * @since 11
3098          */
3099         public static final UnicodeBlock NEWA =
3100             new UnicodeBlock("NEWA");
3101 
3102         /**
3103          * Constant for the "Mongolian Supplement" Unicode
3104          * character block.
3105          * @since 11
3106          */
3107         public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
3108             new UnicodeBlock("MONGOLIAN_SUPPLEMENT",
3109                              "MONGOLIAN SUPPLEMENT",
3110                              "MONGOLIANSUPPLEMENT");
3111 
3112         /**
3113          * Constant for the "Marchen" Unicode
3114          * character block.
3115          * @since 11
3116          */
3117         public static final UnicodeBlock MARCHEN =
3118             new UnicodeBlock("MARCHEN");
3119 
3120         /**
3121          * Constant for the "Ideographic Symbols and Punctuation" Unicode
3122          * character block.
3123          * @since 11
3124          */
3125         public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
3126             new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION",
3127                              "IDEOGRAPHIC SYMBOLS AND PUNCTUATION",
3128                              "IDEOGRAPHICSYMBOLSANDPUNCTUATION");
3129 
3130         /**
3131          * Constant for the "Tangut" Unicode
3132          * character block.
3133          * @since 11
3134          */
3135         public static final UnicodeBlock TANGUT =
3136             new UnicodeBlock("TANGUT");
3137 
3138         /**
3139          * Constant for the "Tangut Components" Unicode
3140          * character block.
3141          * @since 11
3142          */
3143         public static final UnicodeBlock TANGUT_COMPONENTS =
3144             new UnicodeBlock("TANGUT_COMPONENTS",
3145                              "TANGUT COMPONENTS",
3146                              "TANGUTCOMPONENTS");
3147 
3148         /**
3149          * Constant for the "Kana Extended-A" Unicode
3150          * character block.
3151          * @since 11
3152          */
3153         public static final UnicodeBlock KANA_EXTENDED_A =
3154             new UnicodeBlock("KANA_EXTENDED_A",
3155                              "KANA EXTENDED-A",
3156                              "KANAEXTENDED-A");
3157         /**
3158          * Constant for the "Glagolitic Supplement" Unicode
3159          * character block.
3160          * @since 11
3161          */
3162         public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
3163             new UnicodeBlock("GLAGOLITIC_SUPPLEMENT",
3164                              "GLAGOLITIC SUPPLEMENT",
3165                              "GLAGOLITICSUPPLEMENT");
3166         /**
3167          * Constant for the "Adlam" Unicode
3168          * character block.
3169          * @since 11
3170          */
3171         public static final UnicodeBlock ADLAM =
3172             new UnicodeBlock("ADLAM");
3173 
3174         /**
3175          * Constant for the "Masaram Gondi" Unicode
3176          * character block.
3177          * @since 11
3178          */
3179         public static final UnicodeBlock MASARAM_GONDI =
3180             new UnicodeBlock("MASARAM_GONDI",
3181                              "MASARAM GONDI",
3182                              "MASARAMGONDI");
3183 
3184         /**
3185          * Constant for the "Zanabazar Square" Unicode
3186          * character block.
3187          * @since 11
3188          */
3189         public static final UnicodeBlock ZANABAZAR_SQUARE =
3190             new UnicodeBlock("ZANABAZAR_SQUARE",
3191                              "ZANABAZAR SQUARE",
3192                              "ZANABAZARSQUARE");
3193 
3194         /**
3195          * Constant for the "Nushu" Unicode
3196          * character block.
3197          * @since 11
3198          */
3199         public static final UnicodeBlock NUSHU =
3200             new UnicodeBlock("NUSHU");
3201 
3202         /**
3203          * Constant for the "Soyombo" Unicode
3204          * character block.
3205          * @since 11
3206          */
3207         public static final UnicodeBlock SOYOMBO =
3208             new UnicodeBlock("SOYOMBO");
3209 
3210         /**
3211          * Constant for the "Bhaiksuki" Unicode
3212          * character block.
3213          * @since 11
3214          */
3215         public static final UnicodeBlock BHAIKSUKI =
3216             new UnicodeBlock("BHAIKSUKI");
3217 
3218         /**
3219          * Constant for the "CJK Unified Ideographs Extension F" Unicode
3220          * character block.
3221          * @since 11
3222          */
3223         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F =
3224             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F",
3225                              "CJK UNIFIED IDEOGRAPHS EXTENSION F",
3226                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONF");
3227         /**
3228          * Constant for the "Georgian Extended" Unicode
3229          * character block.
3230          * @since 12
3231          */
3232         public static final UnicodeBlock GEORGIAN_EXTENDED =
3233             new UnicodeBlock("GEORGIAN_EXTENDED",
3234                              "GEORGIAN EXTENDED",
3235                              "GEORGIANEXTENDED");
3236 
3237         /**
3238          * Constant for the "Hanifi Rohingya" Unicode
3239          * character block.
3240          * @since 12
3241          */
3242         public static final UnicodeBlock HANIFI_ROHINGYA =
3243             new UnicodeBlock("HANIFI_ROHINGYA",
3244                              "HANIFI ROHINGYA",
3245                              "HANIFIROHINGYA");
3246 
3247         /**
3248          * Constant for the "Old Sogdian" Unicode
3249          * character block.
3250          * @since 12
3251          */
3252         public static final UnicodeBlock OLD_SOGDIAN =
3253             new UnicodeBlock("OLD_SOGDIAN",
3254                              "OLD SOGDIAN",
3255                              "OLDSOGDIAN");
3256 
3257         /**
3258          * Constant for the "Sogdian" Unicode
3259          * character block.
3260          * @since 12
3261          */
3262         public static final UnicodeBlock SOGDIAN =
3263             new UnicodeBlock("SOGDIAN");
3264 
3265         /**
3266          * Constant for the "Dogra" Unicode
3267          * character block.
3268          * @since 12
3269          */
3270         public static final UnicodeBlock DOGRA =
3271             new UnicodeBlock("DOGRA");
3272 
3273         /**
3274          * Constant for the "Gunjala Gondi" Unicode
3275          * character block.
3276          * @since 12
3277          */
3278         public static final UnicodeBlock GUNJALA_GONDI =
3279             new UnicodeBlock("GUNJALA_GONDI",
3280                              "GUNJALA GONDI",
3281                              "GUNJALAGONDI");
3282 
3283         /**
3284          * Constant for the "Makasar" Unicode
3285          * character block.
3286          * @since 12
3287          */
3288         public static final UnicodeBlock MAKASAR =
3289             new UnicodeBlock("MAKASAR");
3290 
3291         /**
3292          * Constant for the "Medefaidrin" Unicode
3293          * character block.
3294          * @since 12
3295          */
3296         public static final UnicodeBlock MEDEFAIDRIN =
3297             new UnicodeBlock("MEDEFAIDRIN");
3298 
3299         /**
3300          * Constant for the "Mayan Numerals" Unicode
3301          * character block.
3302          * @since 12
3303          */
3304         public static final UnicodeBlock MAYAN_NUMERALS =
3305             new UnicodeBlock("MAYAN_NUMERALS",
3306                              "MAYAN NUMERALS",
3307                              "MAYANNUMERALS");
3308 
3309         /**
3310          * Constant for the "Indic Siyaq Numbers" Unicode
3311          * character block.
3312          * @since 12
3313          */
3314         public static final UnicodeBlock INDIC_SIYAQ_NUMBERS =
3315             new UnicodeBlock("INDIC_SIYAQ_NUMBERS",
3316                              "INDIC SIYAQ NUMBERS",
3317                              "INDICSIYAQNUMBERS");
3318 
3319         /**
3320          * Constant for the "Chess Symbols" Unicode
3321          * character block.
3322          * @since 12
3323          */
3324         public static final UnicodeBlock CHESS_SYMBOLS =
3325             new UnicodeBlock("CHESS_SYMBOLS",
3326                              "CHESS SYMBOLS",
3327                              "CHESSSYMBOLS");
3328 
3329         /**
3330          * Constant for the "Elymaic" Unicode
3331          * character block.
3332          * @since 13
3333          */
3334         public static final UnicodeBlock ELYMAIC =
3335             new UnicodeBlock("ELYMAIC");
3336 
3337         /**
3338          * Constant for the "Nandinagari" Unicode
3339          * character block.
3340          * @since 13
3341          */
3342         public static final UnicodeBlock NANDINAGARI =
3343             new UnicodeBlock("NANDINAGARI");
3344 
3345         /**
3346          * Constant for the "Tamil Supplement" Unicode
3347          * character block.
3348          * @since 13
3349          */
3350         public static final UnicodeBlock TAMIL_SUPPLEMENT =
3351             new UnicodeBlock("TAMIL_SUPPLEMENT",
3352                              "TAMIL SUPPLEMENT",
3353                              "TAMILSUPPLEMENT");
3354 
3355         /**
3356          * Constant for the "Egyptian Hieroglyph Format Controls" Unicode
3357          * character block.
3358          * @since 13
3359          */
3360         public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS =
3361             new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS",
3362                              "EGYPTIAN HIEROGLYPH FORMAT CONTROLS",
3363                              "EGYPTIANHIEROGLYPHFORMATCONTROLS");
3364 
3365         /**
3366          * Constant for the "Small Kana Extension" Unicode
3367          * character block.
3368          * @since 13
3369          */
3370         public static final UnicodeBlock SMALL_KANA_EXTENSION =
3371             new UnicodeBlock("SMALL_KANA_EXTENSION",
3372                              "SMALL KANA EXTENSION",
3373                              "SMALLKANAEXTENSION");
3374 
3375         /**
3376          * Constant for the "Nyiakeng Puachue Hmong" Unicode
3377          * character block.
3378          * @since 13
3379          */
3380         public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG =
3381             new UnicodeBlock("NYIAKENG_PUACHUE_HMONG",
3382                              "NYIAKENG PUACHUE HMONG",
3383                              "NYIAKENGPUACHUEHMONG");
3384 
3385         /**
3386          * Constant for the "Wancho" Unicode
3387          * character block.
3388          * @since 13
3389          */
3390         public static final UnicodeBlock WANCHO =
3391             new UnicodeBlock("WANCHO");
3392 
3393         /**
3394          * Constant for the "Ottoman Siyaq Numbers" Unicode
3395          * character block.
3396          * @since 13
3397          */
3398         public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS =
3399             new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS",
3400                              "OTTOMAN SIYAQ NUMBERS",
3401                              "OTTOMANSIYAQNUMBERS");
3402 
3403         /**
3404          * Constant for the "Symbols and Pictographs Extended-A" Unicode
3405          * character block.
3406          * @since 13
3407          */
3408         public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A =
3409             new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A",
3410                              "SYMBOLS AND PICTOGRAPHS EXTENDED-A",
3411                              "SYMBOLSANDPICTOGRAPHSEXTENDED-A");
3412 
3413         /**
3414          * Constant for the "Yezidi" Unicode
3415          * character block.
3416          * @since 15
3417          */
3418         public static final UnicodeBlock YEZIDI =
3419             new UnicodeBlock("YEZIDI");
3420 
3421         /**
3422          * Constant for the "Chorasmian" Unicode
3423          * character block.
3424          * @since 15
3425          */
3426         public static final UnicodeBlock CHORASMIAN =
3427             new UnicodeBlock("CHORASMIAN");
3428 
3429         /**
3430          * Constant for the "Dives Akuru" Unicode
3431          * character block.
3432          * @since 15
3433          */
3434         public static final UnicodeBlock DIVES_AKURU =
3435             new UnicodeBlock("DIVES_AKURU",
3436                              "DIVES AKURU",
3437                              "DIVESAKURU");
3438 
3439         /**
3440          * Constant for the "Lisu Supplement" Unicode
3441          * character block.
3442          * @since 15
3443          */
3444         public static final UnicodeBlock LISU_SUPPLEMENT =
3445             new UnicodeBlock("LISU_SUPPLEMENT",
3446                              "LISU SUPPLEMENT",
3447                              "LISUSUPPLEMENT");
3448 
3449         /**
3450          * Constant for the "Khitan Small Script" Unicode
3451          * character block.
3452          * @since 15
3453          */
3454         public static final UnicodeBlock KHITAN_SMALL_SCRIPT =
3455             new UnicodeBlock("KHITAN_SMALL_SCRIPT",
3456                              "KHITAN SMALL SCRIPT",
3457                              "KHITANSMALLSCRIPT");
3458 
3459         /**
3460          * Constant for the "Tangut Supplement" Unicode
3461          * character block.
3462          * @since 15
3463          */
3464         public static final UnicodeBlock TANGUT_SUPPLEMENT =
3465             new UnicodeBlock("TANGUT_SUPPLEMENT",
3466                              "TANGUT SUPPLEMENT",
3467                              "TANGUTSUPPLEMENT");
3468 
3469         /**
3470          * Constant for the "Symbols for Legacy Computing" Unicode
3471          * character block.
3472          * @since 15
3473          */
3474         public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING =
3475             new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING",
3476                              "SYMBOLS FOR LEGACY COMPUTING",
3477                              "SYMBOLSFORLEGACYCOMPUTING");
3478 
3479         /**
3480          * Constant for the "CJK Unified Ideographs Extension G" Unicode
3481          * character block.
3482          * @since 15
3483          */
3484         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G =
3485             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G",
3486                              "CJK UNIFIED IDEOGRAPHS EXTENSION G",
3487                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONG");
3488 
3489         /**
3490          * Constant for the "Arabic Extended-B" Unicode
3491          * character block.
3492          * @since 19
3493          */
3494         public static final UnicodeBlock ARABIC_EXTENDED_B =
3495             new UnicodeBlock("ARABIC_EXTENDED_B",
3496                     "ARABIC EXTENDED-B",
3497                     "ARABICEXTENDED-B");
3498 
3499         /**
3500          * Constant for the "Vithkuqi" Unicode
3501          * character block.
3502          * @since 19
3503          */
3504         public static final UnicodeBlock VITHKUQI =
3505             new UnicodeBlock("VITHKUQI");
3506 
3507         /**
3508          * Constant for the "Latin Extended-F" Unicode
3509          * character block.
3510          * @since 19
3511          */
3512         public static final UnicodeBlock LATIN_EXTENDED_F =
3513             new UnicodeBlock("LATIN_EXTENDED_F",
3514                     "LATIN EXTENDED-F",
3515                     "LATINEXTENDED-F");
3516 
3517         /**
3518          * Constant for the "Old Uyghur" Unicode
3519          * character block.
3520          * @since 19
3521          */
3522         public static final UnicodeBlock OLD_UYGHUR =
3523             new UnicodeBlock("OLD_UYGHUR",
3524                     "OLD UYGHUR",
3525                     "OLDUYGHUR");
3526 
3527         /**
3528          * Constant for the "Unified Canadian Aboriginal Syllabics Extended-A" Unicode
3529          * character block.
3530          * @since 19
3531          */
3532         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A =
3533             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A",
3534                     "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED-A",
3535                     "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED-A");
3536 
3537         /**
3538          * Constant for the "Cypro-Minoan" Unicode
3539          * character block.
3540          * @since 19
3541          */
3542         public static final UnicodeBlock CYPRO_MINOAN =
3543             new UnicodeBlock("CYPRO_MINOAN",
3544                     "CYPRO-MINOAN",
3545                     "CYPRO-MINOAN");
3546 
3547         /**
3548          * Constant for the "Tangsa" Unicode
3549          * character block.
3550          * @since 19
3551          */
3552         public static final UnicodeBlock TANGSA =
3553             new UnicodeBlock("TANGSA");
3554 
3555         /**
3556          * Constant for the "Kana Extended-B" Unicode
3557          * character block.
3558          * @since 19
3559          */
3560         public static final UnicodeBlock KANA_EXTENDED_B =
3561             new UnicodeBlock("KANA_EXTENDED_B",
3562                     "KANA EXTENDED-B",
3563                     "KANAEXTENDED-B");
3564 
3565         /**
3566          * Constant for the "Znamenny Musical Notation" Unicode
3567          * character block.
3568          * @since 19
3569          */
3570         public static final UnicodeBlock ZNAMENNY_MUSICAL_NOTATION =
3571             new UnicodeBlock("ZNAMENNY_MUSICAL_NOTATION",
3572                     "ZNAMENNY MUSICAL NOTATION",
3573                     "ZNAMENNYMUSICALNOTATION");
3574 
3575         /**
3576          * Constant for the "Latin Extended-G" Unicode
3577          * character block.
3578          * @since 19
3579          */
3580         public static final UnicodeBlock LATIN_EXTENDED_G =
3581             new UnicodeBlock("LATIN_EXTENDED_G",
3582                     "LATIN EXTENDED-G",
3583                     "LATINEXTENDED-G");
3584 
3585         /**
3586          * Constant for the "Toto" Unicode
3587          * character block.
3588          * @since 19
3589          */
3590         public static final UnicodeBlock TOTO =
3591             new UnicodeBlock("TOTO");
3592 
3593         /**
3594          * Constant for the "Ethiopic Extended-B" Unicode
3595          * character block.
3596          * @since 19
3597          */
3598         public static final UnicodeBlock ETHIOPIC_EXTENDED_B =
3599             new UnicodeBlock("ETHIOPIC_EXTENDED_B",
3600                     "ETHIOPIC EXTENDED-B",
3601                     "ETHIOPICEXTENDED-B");
3602 
3603         /**
3604          * Constant for the "Arabic Extended-C" Unicode
3605          * character block.
3606          * @since 20
3607          */
3608         public static final UnicodeBlock ARABIC_EXTENDED_C =
3609             new UnicodeBlock("ARABIC_EXTENDED_C",
3610                              "ARABIC EXTENDED-C",
3611                              "ARABICEXTENDED-C");
3612 
3613         /**
3614          * Constant for the "Devanagari Extended-A" Unicode
3615          * character block.
3616          * @since 20
3617          */
3618         public static final UnicodeBlock DEVANAGARI_EXTENDED_A =
3619             new UnicodeBlock("DEVANAGARI_EXTENDED_A",
3620                              "DEVANAGARI EXTENDED-A",
3621                              "DEVANAGARIEXTENDED-A");
3622 
3623         /**
3624          * Constant for the "Kawi" Unicode
3625          * character block.
3626          * @since 20
3627          */
3628         public static final UnicodeBlock KAWI =
3629             new UnicodeBlock("KAWI");
3630 
3631         /**
3632          * Constant for the "Kaktovik Numerals" Unicode
3633          * character block.
3634          * @since 20
3635          */
3636         public static final UnicodeBlock KAKTOVIK_NUMERALS =
3637             new UnicodeBlock("KAKTOVIK_NUMERALS",
3638                              "KAKTOVIK NUMERALS",
3639                              "KAKTOVIKNUMERALS");
3640 
3641         /**
3642          * Constant for the "Cyrillic Extended-D" Unicode
3643          * character block.
3644          * @since 20
3645          */
3646         public static final UnicodeBlock CYRILLIC_EXTENDED_D =
3647             new UnicodeBlock("CYRILLIC_EXTENDED_D",
3648                              "CYRILLIC EXTENDED-D",
3649                              "CYRILLICEXTENDED-D");
3650 
3651         /**
3652          * Constant for the "Nag Mundari" Unicode
3653          * character block.
3654          * @since 20
3655          */
3656         public static final UnicodeBlock NAG_MUNDARI =
3657             new UnicodeBlock("NAG_MUNDARI",
3658                              "NAG MUNDARI",
3659                              "NAGMUNDARI");
3660 
3661         /**
3662          * Constant for the "CJK Unified Ideographs Extension H" Unicode
3663          * character block.
3664          * @since 20
3665          */
3666         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H =
3667             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H",
3668                              "CJK UNIFIED IDEOGRAPHS EXTENSION H",
3669                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONH");
3670 
3671         private static final int[] blockStarts = {
3672             0x0000,   // 0000..007F; Basic Latin
3673             0x0080,   // 0080..00FF; Latin-1 Supplement
3674             0x0100,   // 0100..017F; Latin Extended-A
3675             0x0180,   // 0180..024F; Latin Extended-B
3676             0x0250,   // 0250..02AF; IPA Extensions
3677             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
3678             0x0300,   // 0300..036F; Combining Diacritical Marks
3679             0x0370,   // 0370..03FF; Greek and Coptic
3680             0x0400,   // 0400..04FF; Cyrillic
3681             0x0500,   // 0500..052F; Cyrillic Supplement
3682             0x0530,   // 0530..058F; Armenian
3683             0x0590,   // 0590..05FF; Hebrew
3684             0x0600,   // 0600..06FF; Arabic
3685             0x0700,   // 0700..074F; Syriac
3686             0x0750,   // 0750..077F; Arabic Supplement
3687             0x0780,   // 0780..07BF; Thaana
3688             0x07C0,   // 07C0..07FF; NKo
3689             0x0800,   // 0800..083F; Samaritan
3690             0x0840,   // 0840..085F; Mandaic
3691             0x0860,   // 0860..086F; Syriac Supplement
3692             0x0870,   // 0870..089F; Arabic Extended-B
3693             0x08A0,   // 08A0..08FF; Arabic Extended-A
3694             0x0900,   // 0900..097F; Devanagari
3695             0x0980,   // 0980..09FF; Bengali
3696             0x0A00,   // 0A00..0A7F; Gurmukhi
3697             0x0A80,   // 0A80..0AFF; Gujarati
3698             0x0B00,   // 0B00..0B7F; Oriya
3699             0x0B80,   // 0B80..0BFF; Tamil
3700             0x0C00,   // 0C00..0C7F; Telugu
3701             0x0C80,   // 0C80..0CFF; Kannada
3702             0x0D00,   // 0D00..0D7F; Malayalam
3703             0x0D80,   // 0D80..0DFF; Sinhala
3704             0x0E00,   // 0E00..0E7F; Thai
3705             0x0E80,   // 0E80..0EFF; Lao
3706             0x0F00,   // 0F00..0FFF; Tibetan
3707             0x1000,   // 1000..109F; Myanmar
3708             0x10A0,   // 10A0..10FF; Georgian
3709             0x1100,   // 1100..11FF; Hangul Jamo
3710             0x1200,   // 1200..137F; Ethiopic
3711             0x1380,   // 1380..139F; Ethiopic Supplement
3712             0x13A0,   // 13A0..13FF; Cherokee
3713             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
3714             0x1680,   // 1680..169F; Ogham
3715             0x16A0,   // 16A0..16FF; Runic
3716             0x1700,   // 1700..171F; Tagalog
3717             0x1720,   // 1720..173F; Hanunoo
3718             0x1740,   // 1740..175F; Buhid
3719             0x1760,   // 1760..177F; Tagbanwa
3720             0x1780,   // 1780..17FF; Khmer
3721             0x1800,   // 1800..18AF; Mongolian
3722             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
3723             0x1900,   // 1900..194F; Limbu
3724             0x1950,   // 1950..197F; Tai Le
3725             0x1980,   // 1980..19DF; New Tai Lue
3726             0x19E0,   // 19E0..19FF; Khmer Symbols
3727             0x1A00,   // 1A00..1A1F; Buginese
3728             0x1A20,   // 1A20..1AAF; Tai Tham
3729             0x1AB0,   // 1AB0..1AFF; Combining Diacritical Marks Extended
3730             0x1B00,   // 1B00..1B7F; Balinese
3731             0x1B80,   // 1B80..1BBF; Sundanese
3732             0x1BC0,   // 1BC0..1BFF; Batak
3733             0x1C00,   // 1C00..1C4F; Lepcha
3734             0x1C50,   // 1C50..1C7F; Ol Chiki
3735             0x1C80,   // 1C80..1C8F; Cyrillic Extended-C
3736             0x1C90,   // 1C90..1CBF; Georgian Extended
3737             0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
3738             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
3739             0x1D00,   // 1D00..1D7F; Phonetic Extensions
3740             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
3741             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
3742             0x1E00,   // 1E00..1EFF; Latin Extended Additional
3743             0x1F00,   // 1F00..1FFF; Greek Extended
3744             0x2000,   // 2000..206F; General Punctuation
3745             0x2070,   // 2070..209F; Superscripts and Subscripts
3746             0x20A0,   // 20A0..20CF; Currency Symbols
3747             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
3748             0x2100,   // 2100..214F; Letterlike Symbols
3749             0x2150,   // 2150..218F; Number Forms
3750             0x2190,   // 2190..21FF; Arrows
3751             0x2200,   // 2200..22FF; Mathematical Operators
3752             0x2300,   // 2300..23FF; Miscellaneous Technical
3753             0x2400,   // 2400..243F; Control Pictures
3754             0x2440,   // 2440..245F; Optical Character Recognition
3755             0x2460,   // 2460..24FF; Enclosed Alphanumerics
3756             0x2500,   // 2500..257F; Box Drawing
3757             0x2580,   // 2580..259F; Block Elements
3758             0x25A0,   // 25A0..25FF; Geometric Shapes
3759             0x2600,   // 2600..26FF; Miscellaneous Symbols
3760             0x2700,   // 2700..27BF; Dingbats
3761             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
3762             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
3763             0x2800,   // 2800..28FF; Braille Patterns
3764             0x2900,   // 2900..297F; Supplemental Arrows-B
3765             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
3766             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
3767             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
3768             0x2C00,   // 2C00..2C5F; Glagolitic
3769             0x2C60,   // 2C60..2C7F; Latin Extended-C
3770             0x2C80,   // 2C80..2CFF; Coptic
3771             0x2D00,   // 2D00..2D2F; Georgian Supplement
3772             0x2D30,   // 2D30..2D7F; Tifinagh
3773             0x2D80,   // 2D80..2DDF; Ethiopic Extended
3774             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
3775             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
3776             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
3777             0x2F00,   // 2F00..2FDF; Kangxi Radicals
3778             0x2FE0,   //             unassigned
3779             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
3780             0x3000,   // 3000..303F; CJK Symbols and Punctuation
3781             0x3040,   // 3040..309F; Hiragana
3782             0x30A0,   // 30A0..30FF; Katakana
3783             0x3100,   // 3100..312F; Bopomofo
3784             0x3130,   // 3130..318F; Hangul Compatibility Jamo
3785             0x3190,   // 3190..319F; Kanbun
3786             0x31A0,   // 31A0..31BF; Bopomofo Extended
3787             0x31C0,   // 31C0..31EF; CJK Strokes
3788             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
3789             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
3790             0x3300,   // 3300..33FF; CJK Compatibility
3791             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
3792             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
3793             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
3794             0xA000,   // A000..A48F; Yi Syllables
3795             0xA490,   // A490..A4CF; Yi Radicals
3796             0xA4D0,   // A4D0..A4FF; Lisu
3797             0xA500,   // A500..A63F; Vai
3798             0xA640,   // A640..A69F; Cyrillic Extended-B
3799             0xA6A0,   // A6A0..A6FF; Bamum
3800             0xA700,   // A700..A71F; Modifier Tone Letters
3801             0xA720,   // A720..A7FF; Latin Extended-D
3802             0xA800,   // A800..A82F; Syloti Nagri
3803             0xA830,   // A830..A83F; Common Indic Number Forms
3804             0xA840,   // A840..A87F; Phags-pa
3805             0xA880,   // A880..A8DF; Saurashtra
3806             0xA8E0,   // A8E0..A8FF; Devanagari Extended
3807             0xA900,   // A900..A92F; Kayah Li
3808             0xA930,   // A930..A95F; Rejang
3809             0xA960,   // A960..A97F; Hangul Jamo Extended-A
3810             0xA980,   // A980..A9DF; Javanese
3811             0xA9E0,   // A9E0..A9FF; Myanmar Extended-B
3812             0xAA00,   // AA00..AA5F; Cham
3813             0xAA60,   // AA60..AA7F; Myanmar Extended-A
3814             0xAA80,   // AA80..AADF; Tai Viet
3815             0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
3816             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
3817             0xAB30,   // AB30..AB6F; Latin Extended-E
3818             0xAB70,   // AB70..ABBF; Cherokee Supplement
3819             0xABC0,   // ABC0..ABFF; Meetei Mayek
3820             0xAC00,   // AC00..D7AF; Hangul Syllables
3821             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
3822             0xD800,   // D800..DB7F; High Surrogates
3823             0xDB80,   // DB80..DBFF; High Private Use Surrogates
3824             0xDC00,   // DC00..DFFF; Low Surrogates
3825             0xE000,   // E000..F8FF; Private Use Area
3826             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
3827             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
3828             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
3829             0xFE00,   // FE00..FE0F; Variation Selectors
3830             0xFE10,   // FE10..FE1F; Vertical Forms
3831             0xFE20,   // FE20..FE2F; Combining Half Marks
3832             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
3833             0xFE50,   // FE50..FE6F; Small Form Variants
3834             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
3835             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
3836             0xFFF0,   // FFF0..FFFF; Specials
3837             0x10000,  // 10000..1007F; Linear B Syllabary
3838             0x10080,  // 10080..100FF; Linear B Ideograms
3839             0x10100,  // 10100..1013F; Aegean Numbers
3840             0x10140,  // 10140..1018F; Ancient Greek Numbers
3841             0x10190,  // 10190..101CF; Ancient Symbols
3842             0x101D0,  // 101D0..101FF; Phaistos Disc
3843             0x10200,  //               unassigned
3844             0x10280,  // 10280..1029F; Lycian
3845             0x102A0,  // 102A0..102DF; Carian
3846             0x102E0,  // 102E0..102FF; Coptic Epact Numbers
3847             0x10300,  // 10300..1032F; Old Italic
3848             0x10330,  // 10330..1034F; Gothic
3849             0x10350,  // 10350..1037F; Old Permic
3850             0x10380,  // 10380..1039F; Ugaritic
3851             0x103A0,  // 103A0..103DF; Old Persian
3852             0x103E0,  //               unassigned
3853             0x10400,  // 10400..1044F; Deseret
3854             0x10450,  // 10450..1047F; Shavian
3855             0x10480,  // 10480..104AF; Osmanya
3856             0x104B0,  // 104B0..104FF; Osage
3857             0x10500,  // 10500..1052F; Elbasan
3858             0x10530,  // 10530..1056F; Caucasian Albanian
3859             0x10570,  // 10570..105BF; Vithkuqi
3860             0x105C0,  //               unassigned
3861             0x10600,  // 10600..1077F; Linear A
3862             0x10780,  // 10780..107BF; Latin Extended-F
3863             0x107C0,  //               unassigned
3864             0x10800,  // 10800..1083F; Cypriot Syllabary
3865             0x10840,  // 10840..1085F; Imperial Aramaic
3866             0x10860,  // 10860..1087F; Palmyrene
3867             0x10880,  // 10880..108AF; Nabataean
3868             0x108B0,  //               unassigned
3869             0x108E0,  // 108E0..108FF; Hatran
3870             0x10900,  // 10900..1091F; Phoenician
3871             0x10920,  // 10920..1093F; Lydian
3872             0x10940,  //               unassigned
3873             0x10980,  // 10980..1099F; Meroitic Hieroglyphs
3874             0x109A0,  // 109A0..109FF; Meroitic Cursive
3875             0x10A00,  // 10A00..10A5F; Kharoshthi
3876             0x10A60,  // 10A60..10A7F; Old South Arabian
3877             0x10A80,  // 10A80..10A9F; Old North Arabian
3878             0x10AA0,  //               unassigned
3879             0x10AC0,  // 10AC0..10AFF; Manichaean
3880             0x10B00,  // 10B00..10B3F; Avestan
3881             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
3882             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
3883             0x10B80,  // 10B80..10BAF; Psalter Pahlavi
3884             0x10BB0,  //               unassigned
3885             0x10C00,  // 10C00..10C4F; Old Turkic
3886             0x10C50,  //               unassigned
3887             0x10C80,  // 10C80..10CFF; Old Hungarian
3888             0x10D00,  // 10D00..10D3F; Hanifi Rohingya
3889             0x10D40,  //               unassigned
3890             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
3891             0x10E80,  // 10E80..10EBF; Yezidi
3892             0x10EC0,  // 10EC0..10EFF; Arabic Extended-C
3893             0x10F00,  // 10F00..10F2F; Old Sogdian
3894             0x10F30,  // 10F30..10F6F; Sogdian
3895             0x10F70,  // 10F70..10FAF; Old Uyghur
3896             0x10FB0,  // 10FB0..10FDF; Chorasmian
3897             0x10FE0,  // 10FE0..10FFF; Elymaic
3898             0x11000,  // 11000..1107F; Brahmi
3899             0x11080,  // 11080..110CF; Kaithi
3900             0x110D0,  // 110D0..110FF; Sora Sompeng
3901             0x11100,  // 11100..1114F; Chakma
3902             0x11150,  // 11150..1117F; Mahajani
3903             0x11180,  // 11180..111DF; Sharada
3904             0x111E0,  // 111E0..111FF; Sinhala Archaic Numbers
3905             0x11200,  // 11200..1124F; Khojki
3906             0x11250,  //               unassigned
3907             0x11280,  // 11280..112AF; Multani
3908             0x112B0,  // 112B0..112FF; Khudawadi
3909             0x11300,  // 11300..1137F; Grantha
3910             0x11380,  //               unassigned
3911             0x11400,  // 11400..1147F; Newa
3912             0x11480,  // 11480..114DF; Tirhuta
3913             0x114E0,  //               unassigned
3914             0x11580,  // 11580..115FF; Siddham
3915             0x11600,  // 11600..1165F; Modi
3916             0x11660,  // 11660..1167F; Mongolian Supplement
3917             0x11680,  // 11680..116CF; Takri
3918             0x116D0,  //               unassigned
3919             0x11700,  // 11700..1174F; Ahom
3920             0x11750,  //               unassigned
3921             0x11800,  // 11800..1184F; Dogra
3922             0x11850,  //               unassigned
3923             0x118A0,  // 118A0..118FF; Warang Citi
3924             0x11900,  // 11900..1195F; Dives Akuru
3925             0x11960,  //               unassigned
3926             0x119A0,  // 119A0..119FF; Nandinagari
3927             0x11A00,  // 11A00..11A4F; Zanabazar Square
3928             0x11A50,  // 11A50..11AAF; Soyombo
3929             0x11AB0,  // 11AB0..11ABF; Unified Canadian Aboriginal Syllabics Extended-A
3930             0x11AC0,  // 11AC0..11AFF; Pau Cin Hau
3931             0x11B00,  // 11B00..11B5F; Devanagari Extended-A
3932             0x11B60,  //               unassigned
3933             0x11C00,  // 11C00..11C6F; Bhaiksuki
3934             0x11C70,  // 11C70..11CBF; Marchen
3935             0x11CC0,  //               unassigned
3936             0x11D00,  // 11D00..11D5F; Masaram Gondi
3937             0x11D60,  // 11D60..11DAF; Gunjala Gondi
3938             0x11DB0,  //               unassigned
3939             0x11EE0,  // 11EE0..11EFF; Makasar
3940             0x11F00,  // 11F00..11F5F; Kawi
3941             0x11F60,  //               unassigned
3942             0x11FB0,  // 11FB0..11FBF; Lisu Supplement
3943             0x11FC0,  // 11FC0..11FFF; Tamil Supplement
3944             0x12000,  // 12000..123FF; Cuneiform
3945             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
3946             0x12480,  // 12480..1254F; Early Dynastic Cuneiform
3947             0x12550,  //               unassigned
3948             0x12F90,  // 12F90..12FFF; Cypro-Minoan
3949             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
3950             0x13430,  // 13430..1345F; Egyptian Hieroglyph Format Controls
3951             0x13460,  //               unassigned
3952             0x14400,  // 14400..1467F; Anatolian Hieroglyphs
3953             0x14680,  //               unassigned
3954             0x16800,  // 16800..16A3F; Bamum Supplement
3955             0x16A40,  // 16A40..16A6F; Mro
3956             0x16A70,  // 16A70..16ACF; Tangsa
3957             0x16AD0,  // 16AD0..16AFF; Bassa Vah
3958             0x16B00,  // 16B00..16B8F; Pahawh Hmong
3959             0x16B90,  //               unassigned
3960             0x16E40,  // 16E40..16E9F; Medefaidrin
3961             0x16EA0,  //               unassigned
3962             0x16F00,  // 16F00..16F9F; Miao
3963             0x16FA0,  //               unassigned
3964             0x16FE0,  // 16FE0..16FFF; Ideographic Symbols and Punctuation
3965             0x17000,  // 17000..187FF; Tangut
3966             0x18800,  // 18800..18AFF; Tangut Components
3967             0x18B00,  // 18B00..18CFF; Khitan Small Script
3968             0x18D00,  // 18D00..18D7F; Tangut Supplement
3969             0x18D80,  //               unassigned
3970             0x1AFF0,  // 1AFF0..1AFFF; Kana Extended-B
3971             0x1B000,  // 1B000..1B0FF; Kana Supplement
3972             0x1B100,  // 1B100..1B12F; Kana Extended-A
3973             0x1B130,  // 1B130..1B16F; Small Kana Extension
3974             0x1B170,  // 1B170..1B2FF; Nushu
3975             0x1B300,  //               unassigned
3976             0x1BC00,  // 1BC00..1BC9F; Duployan
3977             0x1BCA0,  // 1BCA0..1BCAF; Shorthand Format Controls
3978             0x1BCB0,  //               unassigned
3979             0x1CF00,  // 1CF00..1CFCF; Znamenny Musical Notation
3980             0x1CFD0,  //               unassigned
3981             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
3982             0x1D100,  // 1D100..1D1FF; Musical Symbols
3983             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
3984             0x1D250,  //               unassigned
3985             0x1D2C0,  // 1D2C0..1D2DF; Kaktovik Numerals
3986             0x1D2E0,  // 1D2E0..1D2FF; Mayan Numerals
3987             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
3988             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
3989             0x1D380,  //               unassigned
3990             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
3991             0x1D800,  // 1D800..1DAAF; Sutton SignWriting
3992             0x1DAB0,  //               unassigned
3993             0x1DF00,  // 1DF00..1DFFF; Latin Extended-G
3994             0x1E000,  // 1E000..1E02F; Glagolitic Supplement
3995             0x1E030,  // 1E030..1E08F; Cyrillic Extended-D
3996             0x1E090,  //               unassigned
3997             0x1E100,  // 1E100..1E14F; Nyiakeng Puachue Hmong
3998             0x1E150,  //               unassigned
3999             0x1E290,  // 1E290..1E2BF; Toto
4000             0x1E2C0,  // 1E2C0..1E2FF; Wancho
4001             0x1E300,  //               unassigned
4002             0x1E4D0,  // 1E4D0..1E4FF; Nag Mundari
4003             0x1E500,  //               unassigned
4004             0x1E7E0,  // 1E7E0..1E7FF; Ethiopic Extended-B
4005             0x1E800,  // 1E800..1E8DF; Mende Kikakui
4006             0x1E8E0,  //               unassigned
4007             0x1E900,  // 1E900..1E95F; Adlam
4008             0x1E960,  //               unassigned
4009             0x1EC70,  // 1EC70..1ECBF; Indic Siyaq Numbers
4010             0x1ECC0,  //               unassigned
4011             0x1ED00,  // 1ED00..1ED4F; Ottoman Siyaq Numbers
4012             0x1ED50,  //               unassigned
4013             0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
4014             0x1EF00,  //               unassigned
4015             0x1F000,  // 1F000..1F02F; Mahjong Tiles
4016             0x1F030,  // 1F030..1F09F; Domino Tiles
4017             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
4018             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
4019             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
4020             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols and Pictographs
4021             0x1F600,  // 1F600..1F64F; Emoticons
4022             0x1F650,  // 1F650..1F67F; Ornamental Dingbats
4023             0x1F680,  // 1F680..1F6FF; Transport and Map Symbols
4024             0x1F700,  // 1F700..1F77F; Alchemical Symbols
4025             0x1F780,  // 1F780..1F7FF; Geometric Shapes Extended
4026             0x1F800,  // 1F800..1F8FF; Supplemental Arrows-C
4027             0x1F900,  // 1F900..1F9FF; Supplemental Symbols and Pictographs
4028             0x1FA00,  // 1FA00..1FA6F; Chess Symbols
4029             0x1FA70,  // 1FA70..1FAFF; Symbols and Pictographs Extended-A
4030             0x1FB00,  // 1FB00..1FBFF; Symbols for Legacy Computing
4031             0x1FC00,  //               unassigned
4032             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
4033             0x2A6E0,  //               unassigned
4034             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
4035             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
4036             0x2B820,  // 2B820..2CEAF; CJK Unified Ideographs Extension E
4037             0x2CEB0,  // 2CEB0..2EBEF; CJK Unified Ideographs Extension F
4038             0x2EBF0,  //               unassigned
4039             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
4040             0x2FA20,  //               unassigned
4041             0x30000,  // 30000..3134F; CJK Unified Ideographs Extension G
4042             0x31350,  // 31350..323AF; CJK Unified Ideographs Extension H
4043             0x323B0,  //               unassigned
4044             0xE0000,  // E0000..E007F; Tags
4045             0xE0080,  //               unassigned
4046             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
4047             0xE01F0,  //               unassigned
4048             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
4049             0x100000, // 100000..10FFFF; Supplementary Private Use Area-B
4050         };
4051 
4052         private static final UnicodeBlock[] blocks = {
4053             BASIC_LATIN,
4054             LATIN_1_SUPPLEMENT,
4055             LATIN_EXTENDED_A,
4056             LATIN_EXTENDED_B,
4057             IPA_EXTENSIONS,
4058             SPACING_MODIFIER_LETTERS,
4059             COMBINING_DIACRITICAL_MARKS,
4060             GREEK,
4061             CYRILLIC,
4062             CYRILLIC_SUPPLEMENTARY,
4063             ARMENIAN,
4064             HEBREW,
4065             ARABIC,
4066             SYRIAC,
4067             ARABIC_SUPPLEMENT,
4068             THAANA,
4069             NKO,
4070             SAMARITAN,
4071             MANDAIC,
4072             SYRIAC_SUPPLEMENT,
4073             ARABIC_EXTENDED_B,
4074             ARABIC_EXTENDED_A,
4075             DEVANAGARI,
4076             BENGALI,
4077             GURMUKHI,
4078             GUJARATI,
4079             ORIYA,
4080             TAMIL,
4081             TELUGU,
4082             KANNADA,
4083             MALAYALAM,
4084             SINHALA,
4085             THAI,
4086             LAO,
4087             TIBETAN,
4088             MYANMAR,
4089             GEORGIAN,
4090             HANGUL_JAMO,
4091             ETHIOPIC,
4092             ETHIOPIC_SUPPLEMENT,
4093             CHEROKEE,
4094             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
4095             OGHAM,
4096             RUNIC,
4097             TAGALOG,
4098             HANUNOO,
4099             BUHID,
4100             TAGBANWA,
4101             KHMER,
4102             MONGOLIAN,
4103             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
4104             LIMBU,
4105             TAI_LE,
4106             NEW_TAI_LUE,
4107             KHMER_SYMBOLS,
4108             BUGINESE,
4109             TAI_THAM,
4110             COMBINING_DIACRITICAL_MARKS_EXTENDED,
4111             BALINESE,
4112             SUNDANESE,
4113             BATAK,
4114             LEPCHA,
4115             OL_CHIKI,
4116             CYRILLIC_EXTENDED_C,
4117             GEORGIAN_EXTENDED,
4118             SUNDANESE_SUPPLEMENT,
4119             VEDIC_EXTENSIONS,
4120             PHONETIC_EXTENSIONS,
4121             PHONETIC_EXTENSIONS_SUPPLEMENT,
4122             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
4123             LATIN_EXTENDED_ADDITIONAL,
4124             GREEK_EXTENDED,
4125             GENERAL_PUNCTUATION,
4126             SUPERSCRIPTS_AND_SUBSCRIPTS,
4127             CURRENCY_SYMBOLS,
4128             COMBINING_MARKS_FOR_SYMBOLS,
4129             LETTERLIKE_SYMBOLS,
4130             NUMBER_FORMS,
4131             ARROWS,
4132             MATHEMATICAL_OPERATORS,
4133             MISCELLANEOUS_TECHNICAL,
4134             CONTROL_PICTURES,
4135             OPTICAL_CHARACTER_RECOGNITION,
4136             ENCLOSED_ALPHANUMERICS,
4137             BOX_DRAWING,
4138             BLOCK_ELEMENTS,
4139             GEOMETRIC_SHAPES,
4140             MISCELLANEOUS_SYMBOLS,
4141             DINGBATS,
4142             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
4143             SUPPLEMENTAL_ARROWS_A,
4144             BRAILLE_PATTERNS,
4145             SUPPLEMENTAL_ARROWS_B,
4146             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
4147             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
4148             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
4149             GLAGOLITIC,
4150             LATIN_EXTENDED_C,
4151             COPTIC,
4152             GEORGIAN_SUPPLEMENT,
4153             TIFINAGH,
4154             ETHIOPIC_EXTENDED,
4155             CYRILLIC_EXTENDED_A,
4156             SUPPLEMENTAL_PUNCTUATION,
4157             CJK_RADICALS_SUPPLEMENT,
4158             KANGXI_RADICALS,
4159             null,
4160             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
4161             CJK_SYMBOLS_AND_PUNCTUATION,
4162             HIRAGANA,
4163             KATAKANA,
4164             BOPOMOFO,
4165             HANGUL_COMPATIBILITY_JAMO,
4166             KANBUN,
4167             BOPOMOFO_EXTENDED,
4168             CJK_STROKES,
4169             KATAKANA_PHONETIC_EXTENSIONS,
4170             ENCLOSED_CJK_LETTERS_AND_MONTHS,
4171             CJK_COMPATIBILITY,
4172             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
4173             YIJING_HEXAGRAM_SYMBOLS,
4174             CJK_UNIFIED_IDEOGRAPHS,
4175             YI_SYLLABLES,
4176             YI_RADICALS,
4177             LISU,
4178             VAI,
4179             CYRILLIC_EXTENDED_B,
4180             BAMUM,
4181             MODIFIER_TONE_LETTERS,
4182             LATIN_EXTENDED_D,
4183             SYLOTI_NAGRI,
4184             COMMON_INDIC_NUMBER_FORMS,
4185             PHAGS_PA,
4186             SAURASHTRA,
4187             DEVANAGARI_EXTENDED,
4188             KAYAH_LI,
4189             REJANG,
4190             HANGUL_JAMO_EXTENDED_A,
4191             JAVANESE,
4192             MYANMAR_EXTENDED_B,
4193             CHAM,
4194             MYANMAR_EXTENDED_A,
4195             TAI_VIET,
4196             MEETEI_MAYEK_EXTENSIONS,
4197             ETHIOPIC_EXTENDED_A,
4198             LATIN_EXTENDED_E,
4199             CHEROKEE_SUPPLEMENT,
4200             MEETEI_MAYEK,
4201             HANGUL_SYLLABLES,
4202             HANGUL_JAMO_EXTENDED_B,
4203             HIGH_SURROGATES,
4204             HIGH_PRIVATE_USE_SURROGATES,
4205             LOW_SURROGATES,
4206             PRIVATE_USE_AREA,
4207             CJK_COMPATIBILITY_IDEOGRAPHS,
4208             ALPHABETIC_PRESENTATION_FORMS,
4209             ARABIC_PRESENTATION_FORMS_A,
4210             VARIATION_SELECTORS,
4211             VERTICAL_FORMS,
4212             COMBINING_HALF_MARKS,
4213             CJK_COMPATIBILITY_FORMS,
4214             SMALL_FORM_VARIANTS,
4215             ARABIC_PRESENTATION_FORMS_B,
4216             HALFWIDTH_AND_FULLWIDTH_FORMS,
4217             SPECIALS,
4218             LINEAR_B_SYLLABARY,
4219             LINEAR_B_IDEOGRAMS,
4220             AEGEAN_NUMBERS,
4221             ANCIENT_GREEK_NUMBERS,
4222             ANCIENT_SYMBOLS,
4223             PHAISTOS_DISC,
4224             null,
4225             LYCIAN,
4226             CARIAN,
4227             COPTIC_EPACT_NUMBERS,
4228             OLD_ITALIC,
4229             GOTHIC,
4230             OLD_PERMIC,
4231             UGARITIC,
4232             OLD_PERSIAN,
4233             null,
4234             DESERET,
4235             SHAVIAN,
4236             OSMANYA,
4237             OSAGE,
4238             ELBASAN,
4239             CAUCASIAN_ALBANIAN,
4240             VITHKUQI,
4241             null,
4242             LINEAR_A,
4243             LATIN_EXTENDED_F,
4244             null,
4245             CYPRIOT_SYLLABARY,
4246             IMPERIAL_ARAMAIC,
4247             PALMYRENE,
4248             NABATAEAN,
4249             null,
4250             HATRAN,
4251             PHOENICIAN,
4252             LYDIAN,
4253             null,
4254             MEROITIC_HIEROGLYPHS,
4255             MEROITIC_CURSIVE,
4256             KHAROSHTHI,
4257             OLD_SOUTH_ARABIAN,
4258             OLD_NORTH_ARABIAN,
4259             null,
4260             MANICHAEAN,
4261             AVESTAN,
4262             INSCRIPTIONAL_PARTHIAN,
4263             INSCRIPTIONAL_PAHLAVI,
4264             PSALTER_PAHLAVI,
4265             null,
4266             OLD_TURKIC,
4267             null,
4268             OLD_HUNGARIAN,
4269             HANIFI_ROHINGYA,
4270             null,
4271             RUMI_NUMERAL_SYMBOLS,
4272             YEZIDI,
4273             ARABIC_EXTENDED_C,
4274             OLD_SOGDIAN,
4275             SOGDIAN,
4276             OLD_UYGHUR,
4277             CHORASMIAN,
4278             ELYMAIC,
4279             BRAHMI,
4280             KAITHI,
4281             SORA_SOMPENG,
4282             CHAKMA,
4283             MAHAJANI,
4284             SHARADA,
4285             SINHALA_ARCHAIC_NUMBERS,
4286             KHOJKI,
4287             null,
4288             MULTANI,
4289             KHUDAWADI,
4290             GRANTHA,
4291             null,
4292             NEWA,
4293             TIRHUTA,
4294             null,
4295             SIDDHAM,
4296             MODI,
4297             MONGOLIAN_SUPPLEMENT,
4298             TAKRI,
4299             null,
4300             AHOM,
4301             null,
4302             DOGRA,
4303             null,
4304             WARANG_CITI,
4305             DIVES_AKURU,
4306             null,
4307             NANDINAGARI,
4308             ZANABAZAR_SQUARE,
4309             SOYOMBO,
4310             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A,
4311             PAU_CIN_HAU,
4312             DEVANAGARI_EXTENDED_A,
4313             null,
4314             BHAIKSUKI,
4315             MARCHEN,
4316             null,
4317             MASARAM_GONDI,
4318             GUNJALA_GONDI,
4319             null,
4320             MAKASAR,
4321             KAWI,
4322             null,
4323             LISU_SUPPLEMENT,
4324             TAMIL_SUPPLEMENT,
4325             CUNEIFORM,
4326             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
4327             EARLY_DYNASTIC_CUNEIFORM,
4328             null,
4329             CYPRO_MINOAN,
4330             EGYPTIAN_HIEROGLYPHS,
4331             EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS,
4332             null,
4333             ANATOLIAN_HIEROGLYPHS,
4334             null,
4335             BAMUM_SUPPLEMENT,
4336             MRO,
4337             TANGSA,
4338             BASSA_VAH,
4339             PAHAWH_HMONG,
4340             null,
4341             MEDEFAIDRIN,
4342             null,
4343             MIAO,
4344             null,
4345             IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION,
4346             TANGUT,
4347             TANGUT_COMPONENTS,
4348             KHITAN_SMALL_SCRIPT,
4349             TANGUT_SUPPLEMENT,
4350             null,
4351             KANA_EXTENDED_B,
4352             KANA_SUPPLEMENT,
4353             KANA_EXTENDED_A,
4354             SMALL_KANA_EXTENSION,
4355             NUSHU,
4356             null,
4357             DUPLOYAN,
4358             SHORTHAND_FORMAT_CONTROLS,
4359             null,
4360             ZNAMENNY_MUSICAL_NOTATION,
4361             null,
4362             BYZANTINE_MUSICAL_SYMBOLS,
4363             MUSICAL_SYMBOLS,
4364             ANCIENT_GREEK_MUSICAL_NOTATION,
4365             null,
4366             KAKTOVIK_NUMERALS,
4367             MAYAN_NUMERALS,
4368             TAI_XUAN_JING_SYMBOLS,
4369             COUNTING_ROD_NUMERALS,
4370             null,
4371             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
4372             SUTTON_SIGNWRITING,
4373             null,
4374             LATIN_EXTENDED_G,
4375             GLAGOLITIC_SUPPLEMENT,
4376             CYRILLIC_EXTENDED_D,
4377             null,
4378             NYIAKENG_PUACHUE_HMONG,
4379             null,
4380             TOTO,
4381             WANCHO,
4382             null,
4383             NAG_MUNDARI,
4384             null,
4385             ETHIOPIC_EXTENDED_B,
4386             MENDE_KIKAKUI,
4387             null,
4388             ADLAM,
4389             null,
4390             INDIC_SIYAQ_NUMBERS,
4391             null,
4392             OTTOMAN_SIYAQ_NUMBERS,
4393             null,
4394             ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
4395             null,
4396             MAHJONG_TILES,
4397             DOMINO_TILES,
4398             PLAYING_CARDS,
4399             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
4400             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
4401             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
4402             EMOTICONS,
4403             ORNAMENTAL_DINGBATS,
4404             TRANSPORT_AND_MAP_SYMBOLS,
4405             ALCHEMICAL_SYMBOLS,
4406             GEOMETRIC_SHAPES_EXTENDED,
4407             SUPPLEMENTAL_ARROWS_C,
4408             SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS,
4409             CHESS_SYMBOLS,
4410             SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A,
4411             SYMBOLS_FOR_LEGACY_COMPUTING,
4412             null,
4413             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
4414             null,
4415             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
4416             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
4417             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E,
4418             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F,
4419             null,
4420             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
4421             null,
4422             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G,
4423             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H,
4424             null,
4425             TAGS,
4426             null,
4427             VARIATION_SELECTORS_SUPPLEMENT,
4428             null,
4429             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
4430             SUPPLEMENTARY_PRIVATE_USE_AREA_B,
4431         };
4432 
4433 
4434         /**
4435          * Returns the object representing the Unicode block containing the
4436          * given character, or {@code null} if the character is not a
4437          * member of a defined block.
4438          *
4439          * <p><b>Note:</b> This method cannot handle
4440          * <a href="Character.html#supplementary"> supplementary
4441          * characters</a>.  To support all Unicode characters, including
4442          * supplementary characters, use the {@link #of(int)} method.
4443          *
4444          * @param   c  The character in question
4445          * @return  The {@code UnicodeBlock} instance representing the
4446          *          Unicode block of which this character is a member, or
4447          *          {@code null} if the character is not a member of any
4448          *          Unicode block
4449          */
of(char c)4450         public static UnicodeBlock of(char c) {
4451             return of((int)c);
4452         }
4453 
4454         /**
4455          * Returns the object representing the Unicode block
4456          * containing the given character (Unicode code point), or
4457          * {@code null} if the character is not a member of a
4458          * defined block.
4459          *
4460          * @param   codePoint the character (Unicode code point) in question.
4461          * @return  The {@code UnicodeBlock} instance representing the
4462          *          Unicode block of which this character is a member, or
4463          *          {@code null} if the character is not a member of any
4464          *          Unicode block
4465          * @throws  IllegalArgumentException if the specified
4466          * {@code codePoint} is an invalid Unicode code point.
4467          * @see Character#isValidCodePoint(int)
4468          * @since   1.5
4469          */
of(int codePoint)4470         public static UnicodeBlock of(int codePoint) {
4471             if (!isValidCodePoint(codePoint)) {
4472                 throw new IllegalArgumentException(
4473                     String.format("Not a valid Unicode code point: 0x%X", codePoint));
4474             }
4475 
4476             int top, bottom, current;
4477             bottom = 0;
4478             top = blockStarts.length;
4479             current = top/2;
4480 
4481             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
4482             while (top - bottom > 1) {
4483                 if (codePoint >= blockStarts[current]) {
4484                     bottom = current;
4485                 } else {
4486                     top = current;
4487                 }
4488                 current = (top + bottom) / 2;
4489             }
4490             return blocks[current];
4491         }
4492 
4493         /**
4494          * Returns the UnicodeBlock with the given name. Block
4495          * names are determined by The Unicode Standard. The file
4496          * {@code Blocks.txt} defines blocks for a particular
4497          * version of the standard. The {@link Character} class specifies
4498          * the version of the standard that it supports.
4499          * <p>
4500          * This method accepts block names in the following forms:
4501          * <ol>
4502          * <li> Canonical block names as defined by the Unicode Standard.
4503          * For example, the standard defines a "Basic Latin" block. Therefore, this
4504          * method accepts "Basic Latin" as a valid block name. The documentation of
4505          * each UnicodeBlock provides the canonical name.
4506          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
4507          * is a valid block name for the "Basic Latin" block.
4508          * <li>The text representation of each constant UnicodeBlock identifier.
4509          * For example, this method will return the {@link #BASIC_LATIN} block if
4510          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
4511          * hyphens in the canonical name with underscores.
4512          * </ol>
4513          * Finally, character case is ignored for all of the valid block name forms.
4514          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
4515          * The en_US locale's case mapping rules are used to provide case-insensitive
4516          * string comparisons for block name validation.
4517          * <p>
4518          * If the Unicode Standard changes block names, both the previous and
4519          * current names will be accepted.
4520          *
4521          * @param blockName A {@code UnicodeBlock} name.
4522          * @return The {@code UnicodeBlock} instance identified
4523          *         by {@code blockName}
4524          * @throws IllegalArgumentException if {@code blockName} is an
4525          *         invalid name
4526          * @throws NullPointerException if {@code blockName} is null
4527          * @since 1.5
4528          */
forName(String blockName)4529         public static final UnicodeBlock forName(String blockName) {
4530             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
4531             if (block == null) {
4532                 throw new IllegalArgumentException("Not a valid block name: "
4533                             + blockName);
4534             }
4535             return block;
4536         }
4537     }
4538 
4539 
4540     /**
4541      * A family of character subsets representing the character scripts
4542      * defined in the <a href="http://www.unicode.org/reports/tr24/">
4543      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
4544      * character is assigned to a single Unicode script, either a specific
4545      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
4546      * one of the following three special values,
4547      * {@link Character.UnicodeScript#INHERITED Inherited},
4548      * {@link Character.UnicodeScript#COMMON Common} or
4549      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
4550      *
4551      * @spec https://www.unicode.org/reports/tr24 Unicode Script Property
4552      * @since 1.7
4553      */
4554     public static enum UnicodeScript {
4555         /**
4556          * Unicode script "Common".
4557          */
4558         COMMON,
4559 
4560         /**
4561          * Unicode script "Latin".
4562          */
4563         LATIN,
4564 
4565         /**
4566          * Unicode script "Greek".
4567          */
4568         GREEK,
4569 
4570         /**
4571          * Unicode script "Cyrillic".
4572          */
4573         CYRILLIC,
4574 
4575         /**
4576          * Unicode script "Armenian".
4577          */
4578         ARMENIAN,
4579 
4580         /**
4581          * Unicode script "Hebrew".
4582          */
4583         HEBREW,
4584 
4585         /**
4586          * Unicode script "Arabic".
4587          */
4588         ARABIC,
4589 
4590         /**
4591          * Unicode script "Syriac".
4592          */
4593         SYRIAC,
4594 
4595         /**
4596          * Unicode script "Thaana".
4597          */
4598         THAANA,
4599 
4600         /**
4601          * Unicode script "Devanagari".
4602          */
4603         DEVANAGARI,
4604 
4605         /**
4606          * Unicode script "Bengali".
4607          */
4608         BENGALI,
4609 
4610         /**
4611          * Unicode script "Gurmukhi".
4612          */
4613         GURMUKHI,
4614 
4615         /**
4616          * Unicode script "Gujarati".
4617          */
4618         GUJARATI,
4619 
4620         /**
4621          * Unicode script "Oriya".
4622          */
4623         ORIYA,
4624 
4625         /**
4626          * Unicode script "Tamil".
4627          */
4628         TAMIL,
4629 
4630         /**
4631          * Unicode script "Telugu".
4632          */
4633         TELUGU,
4634 
4635         /**
4636          * Unicode script "Kannada".
4637          */
4638         KANNADA,
4639 
4640         /**
4641          * Unicode script "Malayalam".
4642          */
4643         MALAYALAM,
4644 
4645         /**
4646          * Unicode script "Sinhala".
4647          */
4648         SINHALA,
4649 
4650         /**
4651          * Unicode script "Thai".
4652          */
4653         THAI,
4654 
4655         /**
4656          * Unicode script "Lao".
4657          */
4658         LAO,
4659 
4660         /**
4661          * Unicode script "Tibetan".
4662          */
4663         TIBETAN,
4664 
4665         /**
4666          * Unicode script "Myanmar".
4667          */
4668         MYANMAR,
4669 
4670         /**
4671          * Unicode script "Georgian".
4672          */
4673         GEORGIAN,
4674 
4675         /**
4676          * Unicode script "Hangul".
4677          */
4678         HANGUL,
4679 
4680         /**
4681          * Unicode script "Ethiopic".
4682          */
4683         ETHIOPIC,
4684 
4685         /**
4686          * Unicode script "Cherokee".
4687          */
4688         CHEROKEE,
4689 
4690         /**
4691          * Unicode script "Canadian_Aboriginal".
4692          */
4693         CANADIAN_ABORIGINAL,
4694 
4695         /**
4696          * Unicode script "Ogham".
4697          */
4698         OGHAM,
4699 
4700         /**
4701          * Unicode script "Runic".
4702          */
4703         RUNIC,
4704 
4705         /**
4706          * Unicode script "Khmer".
4707          */
4708         KHMER,
4709 
4710         /**
4711          * Unicode script "Mongolian".
4712          */
4713         MONGOLIAN,
4714 
4715         /**
4716          * Unicode script "Hiragana".
4717          */
4718         HIRAGANA,
4719 
4720         /**
4721          * Unicode script "Katakana".
4722          */
4723         KATAKANA,
4724 
4725         /**
4726          * Unicode script "Bopomofo".
4727          */
4728         BOPOMOFO,
4729 
4730         /**
4731          * Unicode script "Han".
4732          */
4733         HAN,
4734 
4735         /**
4736          * Unicode script "Yi".
4737          */
4738         YI,
4739 
4740         /**
4741          * Unicode script "Old_Italic".
4742          */
4743         OLD_ITALIC,
4744 
4745         /**
4746          * Unicode script "Gothic".
4747          */
4748         GOTHIC,
4749 
4750         /**
4751          * Unicode script "Deseret".
4752          */
4753         DESERET,
4754 
4755         /**
4756          * Unicode script "Inherited".
4757          */
4758         INHERITED,
4759 
4760         /**
4761          * Unicode script "Tagalog".
4762          */
4763         TAGALOG,
4764 
4765         /**
4766          * Unicode script "Hanunoo".
4767          */
4768         HANUNOO,
4769 
4770         /**
4771          * Unicode script "Buhid".
4772          */
4773         BUHID,
4774 
4775         /**
4776          * Unicode script "Tagbanwa".
4777          */
4778         TAGBANWA,
4779 
4780         /**
4781          * Unicode script "Limbu".
4782          */
4783         LIMBU,
4784 
4785         /**
4786          * Unicode script "Tai_Le".
4787          */
4788         TAI_LE,
4789 
4790         /**
4791          * Unicode script "Linear_B".
4792          */
4793         LINEAR_B,
4794 
4795         /**
4796          * Unicode script "Ugaritic".
4797          */
4798         UGARITIC,
4799 
4800         /**
4801          * Unicode script "Shavian".
4802          */
4803         SHAVIAN,
4804 
4805         /**
4806          * Unicode script "Osmanya".
4807          */
4808         OSMANYA,
4809 
4810         /**
4811          * Unicode script "Cypriot".
4812          */
4813         CYPRIOT,
4814 
4815         /**
4816          * Unicode script "Braille".
4817          */
4818         BRAILLE,
4819 
4820         /**
4821          * Unicode script "Buginese".
4822          */
4823         BUGINESE,
4824 
4825         /**
4826          * Unicode script "Coptic".
4827          */
4828         COPTIC,
4829 
4830         /**
4831          * Unicode script "New_Tai_Lue".
4832          */
4833         NEW_TAI_LUE,
4834 
4835         /**
4836          * Unicode script "Glagolitic".
4837          */
4838         GLAGOLITIC,
4839 
4840         /**
4841          * Unicode script "Tifinagh".
4842          */
4843         TIFINAGH,
4844 
4845         /**
4846          * Unicode script "Syloti_Nagri".
4847          */
4848         SYLOTI_NAGRI,
4849 
4850         /**
4851          * Unicode script "Old_Persian".
4852          */
4853         OLD_PERSIAN,
4854 
4855         /**
4856          * Unicode script "Kharoshthi".
4857          */
4858         KHAROSHTHI,
4859 
4860         /**
4861          * Unicode script "Balinese".
4862          */
4863         BALINESE,
4864 
4865         /**
4866          * Unicode script "Cuneiform".
4867          */
4868         CUNEIFORM,
4869 
4870         /**
4871          * Unicode script "Phoenician".
4872          */
4873         PHOENICIAN,
4874 
4875         /**
4876          * Unicode script "Phags_Pa".
4877          */
4878         PHAGS_PA,
4879 
4880         /**
4881          * Unicode script "Nko".
4882          */
4883         NKO,
4884 
4885         /**
4886          * Unicode script "Sundanese".
4887          */
4888         SUNDANESE,
4889 
4890         /**
4891          * Unicode script "Batak".
4892          */
4893         BATAK,
4894 
4895         /**
4896          * Unicode script "Lepcha".
4897          */
4898         LEPCHA,
4899 
4900         /**
4901          * Unicode script "Ol_Chiki".
4902          */
4903         OL_CHIKI,
4904 
4905         /**
4906          * Unicode script "Vai".
4907          */
4908         VAI,
4909 
4910         /**
4911          * Unicode script "Saurashtra".
4912          */
4913         SAURASHTRA,
4914 
4915         /**
4916          * Unicode script "Kayah_Li".
4917          */
4918         KAYAH_LI,
4919 
4920         /**
4921          * Unicode script "Rejang".
4922          */
4923         REJANG,
4924 
4925         /**
4926          * Unicode script "Lycian".
4927          */
4928         LYCIAN,
4929 
4930         /**
4931          * Unicode script "Carian".
4932          */
4933         CARIAN,
4934 
4935         /**
4936          * Unicode script "Lydian".
4937          */
4938         LYDIAN,
4939 
4940         /**
4941          * Unicode script "Cham".
4942          */
4943         CHAM,
4944 
4945         /**
4946          * Unicode script "Tai_Tham".
4947          */
4948         TAI_THAM,
4949 
4950         /**
4951          * Unicode script "Tai_Viet".
4952          */
4953         TAI_VIET,
4954 
4955         /**
4956          * Unicode script "Avestan".
4957          */
4958         AVESTAN,
4959 
4960         /**
4961          * Unicode script "Egyptian_Hieroglyphs".
4962          */
4963         EGYPTIAN_HIEROGLYPHS,
4964 
4965         /**
4966          * Unicode script "Samaritan".
4967          */
4968         SAMARITAN,
4969 
4970         /**
4971          * Unicode script "Mandaic".
4972          */
4973         MANDAIC,
4974 
4975         /**
4976          * Unicode script "Lisu".
4977          */
4978         LISU,
4979 
4980         /**
4981          * Unicode script "Bamum".
4982          */
4983         BAMUM,
4984 
4985         /**
4986          * Unicode script "Javanese".
4987          */
4988         JAVANESE,
4989 
4990         /**
4991          * Unicode script "Meetei_Mayek".
4992          */
4993         MEETEI_MAYEK,
4994 
4995         /**
4996          * Unicode script "Imperial_Aramaic".
4997          */
4998         IMPERIAL_ARAMAIC,
4999 
5000         /**
5001          * Unicode script "Old_South_Arabian".
5002          */
5003         OLD_SOUTH_ARABIAN,
5004 
5005         /**
5006          * Unicode script "Inscriptional_Parthian".
5007          */
5008         INSCRIPTIONAL_PARTHIAN,
5009 
5010         /**
5011          * Unicode script "Inscriptional_Pahlavi".
5012          */
5013         INSCRIPTIONAL_PAHLAVI,
5014 
5015         /**
5016          * Unicode script "Old_Turkic".
5017          */
5018         OLD_TURKIC,
5019 
5020         /**
5021          * Unicode script "Brahmi".
5022          */
5023         BRAHMI,
5024 
5025         /**
5026          * Unicode script "Kaithi".
5027          */
5028         KAITHI,
5029 
5030         /**
5031          * Unicode script "Meroitic Hieroglyphs".
5032          * @since 1.8
5033          */
5034         MEROITIC_HIEROGLYPHS,
5035 
5036         /**
5037          * Unicode script "Meroitic Cursive".
5038          * @since 1.8
5039          */
5040         MEROITIC_CURSIVE,
5041 
5042         /**
5043          * Unicode script "Sora Sompeng".
5044          * @since 1.8
5045          */
5046         SORA_SOMPENG,
5047 
5048         /**
5049          * Unicode script "Chakma".
5050          * @since 1.8
5051          */
5052         CHAKMA,
5053 
5054         /**
5055          * Unicode script "Sharada".
5056          * @since 1.8
5057          */
5058         SHARADA,
5059 
5060         /**
5061          * Unicode script "Takri".
5062          * @since 1.8
5063          */
5064         TAKRI,
5065 
5066         /**
5067          * Unicode script "Miao".
5068          * @since 1.8
5069          */
5070         MIAO,
5071 
5072         /**
5073          * Unicode script "Caucasian Albanian".
5074          * @since 9
5075          */
5076         CAUCASIAN_ALBANIAN,
5077 
5078         /**
5079          * Unicode script "Bassa Vah".
5080          * @since 9
5081          */
5082         BASSA_VAH,
5083 
5084         /**
5085          * Unicode script "Duployan".
5086          * @since 9
5087          */
5088         DUPLOYAN,
5089 
5090         /**
5091          * Unicode script "Elbasan".
5092          * @since 9
5093          */
5094         ELBASAN,
5095 
5096         /**
5097          * Unicode script "Grantha".
5098          * @since 9
5099          */
5100         GRANTHA,
5101 
5102         /**
5103          * Unicode script "Pahawh Hmong".
5104          * @since 9
5105          */
5106         PAHAWH_HMONG,
5107 
5108         /**
5109          * Unicode script "Khojki".
5110          * @since 9
5111          */
5112         KHOJKI,
5113 
5114         /**
5115          * Unicode script "Linear A".
5116          * @since 9
5117          */
5118         LINEAR_A,
5119 
5120         /**
5121          * Unicode script "Mahajani".
5122          * @since 9
5123          */
5124         MAHAJANI,
5125 
5126         /**
5127          * Unicode script "Manichaean".
5128          * @since 9
5129          */
5130         MANICHAEAN,
5131 
5132         /**
5133          * Unicode script "Mende Kikakui".
5134          * @since 9
5135          */
5136         MENDE_KIKAKUI,
5137 
5138         /**
5139          * Unicode script "Modi".
5140          * @since 9
5141          */
5142         MODI,
5143 
5144         /**
5145          * Unicode script "Mro".
5146          * @since 9
5147          */
5148         MRO,
5149 
5150         /**
5151          * Unicode script "Old North Arabian".
5152          * @since 9
5153          */
5154         OLD_NORTH_ARABIAN,
5155 
5156         /**
5157          * Unicode script "Nabataean".
5158          * @since 9
5159          */
5160         NABATAEAN,
5161 
5162         /**
5163          * Unicode script "Palmyrene".
5164          * @since 9
5165          */
5166         PALMYRENE,
5167 
5168         /**
5169          * Unicode script "Pau Cin Hau".
5170          * @since 9
5171          */
5172         PAU_CIN_HAU,
5173 
5174         /**
5175          * Unicode script "Old Permic".
5176          * @since 9
5177          */
5178         OLD_PERMIC,
5179 
5180         /**
5181          * Unicode script "Psalter Pahlavi".
5182          * @since 9
5183          */
5184         PSALTER_PAHLAVI,
5185 
5186         /**
5187          * Unicode script "Siddham".
5188          * @since 9
5189          */
5190         SIDDHAM,
5191 
5192         /**
5193          * Unicode script "Khudawadi".
5194          * @since 9
5195          */
5196         KHUDAWADI,
5197 
5198         /**
5199          * Unicode script "Tirhuta".
5200          * @since 9
5201          */
5202         TIRHUTA,
5203 
5204         /**
5205          * Unicode script "Warang Citi".
5206          * @since 9
5207          */
5208         WARANG_CITI,
5209 
5210         /**
5211          * Unicode script "Ahom".
5212          * @since 9
5213          */
5214         AHOM,
5215 
5216         /**
5217          * Unicode script "Anatolian Hieroglyphs".
5218          * @since 9
5219          */
5220         ANATOLIAN_HIEROGLYPHS,
5221 
5222         /**
5223          * Unicode script "Hatran".
5224          * @since 9
5225          */
5226         HATRAN,
5227 
5228         /**
5229          * Unicode script "Multani".
5230          * @since 9
5231          */
5232         MULTANI,
5233 
5234         /**
5235          * Unicode script "Old Hungarian".
5236          * @since 9
5237          */
5238         OLD_HUNGARIAN,
5239 
5240         /**
5241          * Unicode script "SignWriting".
5242          * @since 9
5243          */
5244         SIGNWRITING,
5245 
5246         /**
5247          * Unicode script "Adlam".
5248          * @since 11
5249          */
5250         ADLAM,
5251 
5252         /**
5253          * Unicode script "Bhaiksuki".
5254          * @since 11
5255          */
5256         BHAIKSUKI,
5257 
5258         /**
5259          * Unicode script "Marchen".
5260          * @since 11
5261          */
5262         MARCHEN,
5263 
5264         /**
5265          * Unicode script "Newa".
5266          * @since 11
5267          */
5268         NEWA,
5269 
5270         /**
5271          * Unicode script "Osage".
5272          * @since 11
5273          */
5274         OSAGE,
5275 
5276         /**
5277          * Unicode script "Tangut".
5278          * @since 11
5279          */
5280         TANGUT,
5281 
5282         /**
5283          * Unicode script "Masaram Gondi".
5284          * @since 11
5285          */
5286         MASARAM_GONDI,
5287 
5288         /**
5289          * Unicode script "Nushu".
5290          * @since 11
5291          */
5292         NUSHU,
5293 
5294         /**
5295          * Unicode script "Soyombo".
5296          * @since 11
5297          */
5298         SOYOMBO,
5299 
5300         /**
5301          * Unicode script "Zanabazar Square".
5302          * @since 11
5303          */
5304         ZANABAZAR_SQUARE,
5305 
5306         /**
5307          * Unicode script "Hanifi Rohingya".
5308          * @since 12
5309          */
5310         HANIFI_ROHINGYA,
5311 
5312         /**
5313          * Unicode script "Old Sogdian".
5314          * @since 12
5315          */
5316         OLD_SOGDIAN,
5317 
5318         /**
5319          * Unicode script "Sogdian".
5320          * @since 12
5321          */
5322         SOGDIAN,
5323 
5324         /**
5325          * Unicode script "Dogra".
5326          * @since 12
5327          */
5328         DOGRA,
5329 
5330         /**
5331          * Unicode script "Gunjala Gondi".
5332          * @since 12
5333          */
5334         GUNJALA_GONDI,
5335 
5336         /**
5337          * Unicode script "Makasar".
5338          * @since 12
5339          */
5340         MAKASAR,
5341 
5342         /**
5343          * Unicode script "Medefaidrin".
5344          * @since 12
5345          */
5346         MEDEFAIDRIN,
5347 
5348         /**
5349          * Unicode script "Elymaic".
5350          * @since 13
5351          */
5352         ELYMAIC,
5353 
5354         /**
5355          * Unicode script "Nandinagari".
5356          * @since 13
5357          */
5358         NANDINAGARI,
5359 
5360         /**
5361          * Unicode script "Nyiakeng Puachue Hmong".
5362          * @since 13
5363          */
5364         NYIAKENG_PUACHUE_HMONG,
5365 
5366         /**
5367          * Unicode script "Wancho".
5368          * @since 13
5369          */
5370         WANCHO,
5371 
5372         /**
5373          * Unicode script "Yezidi".
5374          * @since 15
5375          */
5376         YEZIDI,
5377 
5378         /**
5379          * Unicode script "Chorasmian".
5380          * @since 15
5381          */
5382         CHORASMIAN,
5383 
5384         /**
5385          * Unicode script "Dives Akuru".
5386          * @since 15
5387          */
5388         DIVES_AKURU,
5389 
5390         /**
5391          * Unicode script "Khitan Small Script".
5392          * @since 15
5393          */
5394         KHITAN_SMALL_SCRIPT,
5395 
5396         /**
5397          * Unicode script "Vithkuqi".
5398          * @since 19
5399          */
5400         VITHKUQI,
5401 
5402         /**
5403          * Unicode script "Old Uyghur".
5404          * @since 19
5405          */
5406         OLD_UYGHUR,
5407 
5408         /**
5409          * Unicode script "Cypro Minoan".
5410          * @since 19
5411          */
5412         CYPRO_MINOAN,
5413 
5414         /**
5415          * Unicode script "Tangsa".
5416          * @since 19
5417          */
5418         TANGSA,
5419 
5420         /**
5421          * Unicode script "Toto".
5422          * @since 19
5423          */
5424         TOTO,
5425 
5426         /**
5427          * Unicode script "Kawi".
5428          * @since 20
5429          */
5430         KAWI,
5431 
5432         /**
5433          * Unicode script "Nag Mundari".
5434          * @since 20
5435          */
5436         NAG_MUNDARI,
5437 
5438         /**
5439          * Unicode script "Unknown".
5440          */
5441         UNKNOWN; // must be the last enum constant for calculating the size of "aliases" hash map.
5442 
5443         private static final int[] scriptStarts = {
5444             0x0000,   // 0000..0040; COMMON
5445             0x0041,   // 0041..005A; LATIN
5446             0x005B,   // 005B..0060; COMMON
5447             0x0061,   // 0061..007A; LATIN
5448             0x007B,   // 007B..00A9; COMMON
5449             0x00AA,   // 00AA      ; LATIN
5450             0x00AB,   // 00AB..00B9; COMMON
5451             0x00BA,   // 00BA      ; LATIN
5452             0x00BB,   // 00BB..00BF; COMMON
5453             0x00C0,   // 00C0..00D6; LATIN
5454             0x00D7,   // 00D7      ; COMMON
5455             0x00D8,   // 00D8..00F6; LATIN
5456             0x00F7,   // 00F7      ; COMMON
5457             0x00F8,   // 00F8..02B8; LATIN
5458             0x02B9,   // 02B9..02DF; COMMON
5459             0x02E0,   // 02E0..02E4; LATIN
5460             0x02E5,   // 02E5..02E9; COMMON
5461             0x02EA,   // 02EA..02EB; BOPOMOFO
5462             0x02EC,   // 02EC..02FF; COMMON
5463             0x0300,   // 0300..036F; INHERITED
5464             0x0370,   // 0370..0373; GREEK
5465             0x0374,   // 0374      ; COMMON
5466             0x0375,   // 0375..0377; GREEK
5467             0x0378,   // 0378..0379; UNKNOWN
5468             0x037A,   // 037A..037D; GREEK
5469             0x037E,   // 037E      ; COMMON
5470             0x037F,   // 037F      ; GREEK
5471             0x0380,   // 0380..0383; UNKNOWN
5472             0x0384,   // 0384      ; GREEK
5473             0x0385,   // 0385      ; COMMON
5474             0x0386,   // 0386      ; GREEK
5475             0x0387,   // 0387      ; COMMON
5476             0x0388,   // 0388..038A; GREEK
5477             0x038B,   // 038B      ; UNKNOWN
5478             0x038C,   // 038C      ; GREEK
5479             0x038D,   // 038D      ; UNKNOWN
5480             0x038E,   // 038E..03A1; GREEK
5481             0x03A2,   // 03A2      ; UNKNOWN
5482             0x03A3,   // 03A3..03E1; GREEK
5483             0x03E2,   // 03E2..03EF; COPTIC
5484             0x03F0,   // 03F0..03FF; GREEK
5485             0x0400,   // 0400..0484; CYRILLIC
5486             0x0485,   // 0485..0486; INHERITED
5487             0x0487,   // 0487..052F; CYRILLIC
5488             0x0530,   // 0530      ; UNKNOWN
5489             0x0531,   // 0531..0556; ARMENIAN
5490             0x0557,   // 0557..0558; UNKNOWN
5491             0x0559,   // 0559..058A; ARMENIAN
5492             0x058B,   // 058B..058C; UNKNOWN
5493             0x058D,   // 058D..058F; ARMENIAN
5494             0x0590,   // 0590      ; UNKNOWN
5495             0x0591,   // 0591..05C7; HEBREW
5496             0x05C8,   // 05C8..05CF; UNKNOWN
5497             0x05D0,   // 05D0..05EA; HEBREW
5498             0x05EB,   // 05EB..05EE; UNKNOWN
5499             0x05EF,   // 05EF..05F4; HEBREW
5500             0x05F5,   // 05F5..05FF; UNKNOWN
5501             0x0600,   // 0600..0604; ARABIC
5502             0x0605,   // 0605      ; COMMON
5503             0x0606,   // 0606..060B; ARABIC
5504             0x060C,   // 060C      ; COMMON
5505             0x060D,   // 060D..061A; ARABIC
5506             0x061B,   // 061B      ; COMMON
5507             0x061C,   // 061C..061E; ARABIC
5508             0x061F,   // 061F      ; COMMON
5509             0x0620,   // 0620..063F; ARABIC
5510             0x0640,   // 0640      ; COMMON
5511             0x0641,   // 0641..064A; ARABIC
5512             0x064B,   // 064B..0655; INHERITED
5513             0x0656,   // 0656..066F; ARABIC
5514             0x0670,   // 0670      ; INHERITED
5515             0x0671,   // 0671..06DC; ARABIC
5516             0x06DD,   // 06DD      ; COMMON
5517             0x06DE,   // 06DE..06FF; ARABIC
5518             0x0700,   // 0700..070D; SYRIAC
5519             0x070E,   // 070E      ; UNKNOWN
5520             0x070F,   // 070F..074A; SYRIAC
5521             0x074B,   // 074B..074C; UNKNOWN
5522             0x074D,   // 074D..074F; SYRIAC
5523             0x0750,   // 0750..077F; ARABIC
5524             0x0780,   // 0780..07B1; THAANA
5525             0x07B2,   // 07B2..07BF; UNKNOWN
5526             0x07C0,   // 07C0..07FA; NKO
5527             0x07FB,   // 07FB..07FC; UNKNOWN
5528             0x07FD,   // 07FD..07FF; NKO
5529             0x0800,   // 0800..082D; SAMARITAN
5530             0x082E,   // 082E..082F; UNKNOWN
5531             0x0830,   // 0830..083E; SAMARITAN
5532             0x083F,   // 083F      ; UNKNOWN
5533             0x0840,   // 0840..085B; MANDAIC
5534             0x085C,   // 085C..085D; UNKNOWN
5535             0x085E,   // 085E      ; MANDAIC
5536             0x085F,   // 085F      ; UNKNOWN
5537             0x0860,   // 0860..086A; SYRIAC
5538             0x086B,   // 086B..086F; UNKNOWN
5539             0x0870,   // 0870..088E; ARABIC
5540             0x088F,   // 088F      ; UNKNOWN
5541             0x0890,   // 0890..0891; ARABIC
5542             0x0892,   // 0892..0897; UNKNOWN
5543             0x0898,   // 0898..08E1; ARABIC
5544             0x08E2,   // 08E2      ; COMMON
5545             0x08E3,   // 08E3..08FF; ARABIC
5546             0x0900,   // 0900..0950; DEVANAGARI
5547             0x0951,   // 0951..0954; INHERITED
5548             0x0955,   // 0955..0963; DEVANAGARI
5549             0x0964,   // 0964..0965; COMMON
5550             0x0966,   // 0966..097F; DEVANAGARI
5551             0x0980,   // 0980..0983; BENGALI
5552             0x0984,   // 0984      ; UNKNOWN
5553             0x0985,   // 0985..098C; BENGALI
5554             0x098D,   // 098D..098E; UNKNOWN
5555             0x098F,   // 098F..0990; BENGALI
5556             0x0991,   // 0991..0992; UNKNOWN
5557             0x0993,   // 0993..09A8; BENGALI
5558             0x09A9,   // 09A9      ; UNKNOWN
5559             0x09AA,   // 09AA..09B0; BENGALI
5560             0x09B1,   // 09B1      ; UNKNOWN
5561             0x09B2,   // 09B2      ; BENGALI
5562             0x09B3,   // 09B3..09B5; UNKNOWN
5563             0x09B6,   // 09B6..09B9; BENGALI
5564             0x09BA,   // 09BA..09BB; UNKNOWN
5565             0x09BC,   // 09BC..09C4; BENGALI
5566             0x09C5,   // 09C5..09C6; UNKNOWN
5567             0x09C7,   // 09C7..09C8; BENGALI
5568             0x09C9,   // 09C9..09CA; UNKNOWN
5569             0x09CB,   // 09CB..09CE; BENGALI
5570             0x09CF,   // 09CF..09D6; UNKNOWN
5571             0x09D7,   // 09D7      ; BENGALI
5572             0x09D8,   // 09D8..09DB; UNKNOWN
5573             0x09DC,   // 09DC..09DD; BENGALI
5574             0x09DE,   // 09DE      ; UNKNOWN
5575             0x09DF,   // 09DF..09E3; BENGALI
5576             0x09E4,   // 09E4..09E5; UNKNOWN
5577             0x09E6,   // 09E6..09FE; BENGALI
5578             0x09FF,   // 09FF..0A00; UNKNOWN
5579             0x0A01,   // 0A01..0A03; GURMUKHI
5580             0x0A04,   // 0A04      ; UNKNOWN
5581             0x0A05,   // 0A05..0A0A; GURMUKHI
5582             0x0A0B,   // 0A0B..0A0E; UNKNOWN
5583             0x0A0F,   // 0A0F..0A10; GURMUKHI
5584             0x0A11,   // 0A11..0A12; UNKNOWN
5585             0x0A13,   // 0A13..0A28; GURMUKHI
5586             0x0A29,   // 0A29      ; UNKNOWN
5587             0x0A2A,   // 0A2A..0A30; GURMUKHI
5588             0x0A31,   // 0A31      ; UNKNOWN
5589             0x0A32,   // 0A32..0A33; GURMUKHI
5590             0x0A34,   // 0A34      ; UNKNOWN
5591             0x0A35,   // 0A35..0A36; GURMUKHI
5592             0x0A37,   // 0A37      ; UNKNOWN
5593             0x0A38,   // 0A38..0A39; GURMUKHI
5594             0x0A3A,   // 0A3A..0A3B; UNKNOWN
5595             0x0A3C,   // 0A3C      ; GURMUKHI
5596             0x0A3D,   // 0A3D      ; UNKNOWN
5597             0x0A3E,   // 0A3E..0A42; GURMUKHI
5598             0x0A43,   // 0A43..0A46; UNKNOWN
5599             0x0A47,   // 0A47..0A48; GURMUKHI
5600             0x0A49,   // 0A49..0A4A; UNKNOWN
5601             0x0A4B,   // 0A4B..0A4D; GURMUKHI
5602             0x0A4E,   // 0A4E..0A50; UNKNOWN
5603             0x0A51,   // 0A51      ; GURMUKHI
5604             0x0A52,   // 0A52..0A58; UNKNOWN
5605             0x0A59,   // 0A59..0A5C; GURMUKHI
5606             0x0A5D,   // 0A5D      ; UNKNOWN
5607             0x0A5E,   // 0A5E      ; GURMUKHI
5608             0x0A5F,   // 0A5F..0A65; UNKNOWN
5609             0x0A66,   // 0A66..0A76; GURMUKHI
5610             0x0A77,   // 0A77..0A80; UNKNOWN
5611             0x0A81,   // 0A81..0A83; GUJARATI
5612             0x0A84,   // 0A84      ; UNKNOWN
5613             0x0A85,   // 0A85..0A8D; GUJARATI
5614             0x0A8E,   // 0A8E      ; UNKNOWN
5615             0x0A8F,   // 0A8F..0A91; GUJARATI
5616             0x0A92,   // 0A92      ; UNKNOWN
5617             0x0A93,   // 0A93..0AA8; GUJARATI
5618             0x0AA9,   // 0AA9      ; UNKNOWN
5619             0x0AAA,   // 0AAA..0AB0; GUJARATI
5620             0x0AB1,   // 0AB1      ; UNKNOWN
5621             0x0AB2,   // 0AB2..0AB3; GUJARATI
5622             0x0AB4,   // 0AB4      ; UNKNOWN
5623             0x0AB5,   // 0AB5..0AB9; GUJARATI
5624             0x0ABA,   // 0ABA..0ABB; UNKNOWN
5625             0x0ABC,   // 0ABC..0AC5; GUJARATI
5626             0x0AC6,   // 0AC6      ; UNKNOWN
5627             0x0AC7,   // 0AC7..0AC9; GUJARATI
5628             0x0ACA,   // 0ACA      ; UNKNOWN
5629             0x0ACB,   // 0ACB..0ACD; GUJARATI
5630             0x0ACE,   // 0ACE..0ACF; UNKNOWN
5631             0x0AD0,   // 0AD0      ; GUJARATI
5632             0x0AD1,   // 0AD1..0ADF; UNKNOWN
5633             0x0AE0,   // 0AE0..0AE3; GUJARATI
5634             0x0AE4,   // 0AE4..0AE5; UNKNOWN
5635             0x0AE6,   // 0AE6..0AF1; GUJARATI
5636             0x0AF2,   // 0AF2..0AF8; UNKNOWN
5637             0x0AF9,   // 0AF9..0AFF; GUJARATI
5638             0x0B00,   // 0B00      ; UNKNOWN
5639             0x0B01,   // 0B01..0B03; ORIYA
5640             0x0B04,   // 0B04      ; UNKNOWN
5641             0x0B05,   // 0B05..0B0C; ORIYA
5642             0x0B0D,   // 0B0D..0B0E; UNKNOWN
5643             0x0B0F,   // 0B0F..0B10; ORIYA
5644             0x0B11,   // 0B11..0B12; UNKNOWN
5645             0x0B13,   // 0B13..0B28; ORIYA
5646             0x0B29,   // 0B29      ; UNKNOWN
5647             0x0B2A,   // 0B2A..0B30; ORIYA
5648             0x0B31,   // 0B31      ; UNKNOWN
5649             0x0B32,   // 0B32..0B33; ORIYA
5650             0x0B34,   // 0B34      ; UNKNOWN
5651             0x0B35,   // 0B35..0B39; ORIYA
5652             0x0B3A,   // 0B3A..0B3B; UNKNOWN
5653             0x0B3C,   // 0B3C..0B44; ORIYA
5654             0x0B45,   // 0B45..0B46; UNKNOWN
5655             0x0B47,   // 0B47..0B48; ORIYA
5656             0x0B49,   // 0B49..0B4A; UNKNOWN
5657             0x0B4B,   // 0B4B..0B4D; ORIYA
5658             0x0B4E,   // 0B4E..0B54; UNKNOWN
5659             0x0B55,   // 0B55..0B57; ORIYA
5660             0x0B58,   // 0B58..0B5B; UNKNOWN
5661             0x0B5C,   // 0B5C..0B5D; ORIYA
5662             0x0B5E,   // 0B5E      ; UNKNOWN
5663             0x0B5F,   // 0B5F..0B63; ORIYA
5664             0x0B64,   // 0B64..0B65; UNKNOWN
5665             0x0B66,   // 0B66..0B77; ORIYA
5666             0x0B78,   // 0B78..0B81; UNKNOWN
5667             0x0B82,   // 0B82..0B83; TAMIL
5668             0x0B84,   // 0B84      ; UNKNOWN
5669             0x0B85,   // 0B85..0B8A; TAMIL
5670             0x0B8B,   // 0B8B..0B8D; UNKNOWN
5671             0x0B8E,   // 0B8E..0B90; TAMIL
5672             0x0B91,   // 0B91      ; UNKNOWN
5673             0x0B92,   // 0B92..0B95; TAMIL
5674             0x0B96,   // 0B96..0B98; UNKNOWN
5675             0x0B99,   // 0B99..0B9A; TAMIL
5676             0x0B9B,   // 0B9B      ; UNKNOWN
5677             0x0B9C,   // 0B9C      ; TAMIL
5678             0x0B9D,   // 0B9D      ; UNKNOWN
5679             0x0B9E,   // 0B9E..0B9F; TAMIL
5680             0x0BA0,   // 0BA0..0BA2; UNKNOWN
5681             0x0BA3,   // 0BA3..0BA4; TAMIL
5682             0x0BA5,   // 0BA5..0BA7; UNKNOWN
5683             0x0BA8,   // 0BA8..0BAA; TAMIL
5684             0x0BAB,   // 0BAB..0BAD; UNKNOWN
5685             0x0BAE,   // 0BAE..0BB9; TAMIL
5686             0x0BBA,   // 0BBA..0BBD; UNKNOWN
5687             0x0BBE,   // 0BBE..0BC2; TAMIL
5688             0x0BC3,   // 0BC3..0BC5; UNKNOWN
5689             0x0BC6,   // 0BC6..0BC8; TAMIL
5690             0x0BC9,   // 0BC9      ; UNKNOWN
5691             0x0BCA,   // 0BCA..0BCD; TAMIL
5692             0x0BCE,   // 0BCE..0BCF; UNKNOWN
5693             0x0BD0,   // 0BD0      ; TAMIL
5694             0x0BD1,   // 0BD1..0BD6; UNKNOWN
5695             0x0BD7,   // 0BD7      ; TAMIL
5696             0x0BD8,   // 0BD8..0BE5; UNKNOWN
5697             0x0BE6,   // 0BE6..0BFA; TAMIL
5698             0x0BFB,   // 0BFB..0BFF; UNKNOWN
5699             0x0C00,   // 0C00..0C0C; TELUGU
5700             0x0C0D,   // 0C0D      ; UNKNOWN
5701             0x0C0E,   // 0C0E..0C10; TELUGU
5702             0x0C11,   // 0C11      ; UNKNOWN
5703             0x0C12,   // 0C12..0C28; TELUGU
5704             0x0C29,   // 0C29      ; UNKNOWN
5705             0x0C2A,   // 0C2A..0C39; TELUGU
5706             0x0C3A,   // 0C3A..0C3B; UNKNOWN
5707             0x0C3C,   // 0C3C..0C44; TELUGU
5708             0x0C45,   // 0C45      ; UNKNOWN
5709             0x0C46,   // 0C46..0C48; TELUGU
5710             0x0C49,   // 0C49      ; UNKNOWN
5711             0x0C4A,   // 0C4A..0C4D; TELUGU
5712             0x0C4E,   // 0C4E..0C54; UNKNOWN
5713             0x0C55,   // 0C55..0C56; TELUGU
5714             0x0C57,   // 0C57      ; UNKNOWN
5715             0x0C58,   // 0C58..0C5A; TELUGU
5716             0x0C5B,   // 0C5B..0C5C; UNKNOWN
5717             0x0C5D,   // 0C5D      ; TELUGU
5718             0x0C5E,   // 0C5E..0C5F; UNKNOWN
5719             0x0C60,   // 0C60..0C63; TELUGU
5720             0x0C64,   // 0C64..0C65; UNKNOWN
5721             0x0C66,   // 0C66..0C6F; TELUGU
5722             0x0C70,   // 0C70..0C76; UNKNOWN
5723             0x0C77,   // 0C77..0C7F; TELUGU
5724             0x0C80,   // 0C80..0C8C; KANNADA
5725             0x0C8D,   // 0C8D      ; UNKNOWN
5726             0x0C8E,   // 0C8E..0C90; KANNADA
5727             0x0C91,   // 0C91      ; UNKNOWN
5728             0x0C92,   // 0C92..0CA8; KANNADA
5729             0x0CA9,   // 0CA9      ; UNKNOWN
5730             0x0CAA,   // 0CAA..0CB3; KANNADA
5731             0x0CB4,   // 0CB4      ; UNKNOWN
5732             0x0CB5,   // 0CB5..0CB9; KANNADA
5733             0x0CBA,   // 0CBA..0CBB; UNKNOWN
5734             0x0CBC,   // 0CBC..0CC4; KANNADA
5735             0x0CC5,   // 0CC5      ; UNKNOWN
5736             0x0CC6,   // 0CC6..0CC8; KANNADA
5737             0x0CC9,   // 0CC9      ; UNKNOWN
5738             0x0CCA,   // 0CCA..0CCD; KANNADA
5739             0x0CCE,   // 0CCE..0CD4; UNKNOWN
5740             0x0CD5,   // 0CD5..0CD6; KANNADA
5741             0x0CD7,   // 0CD7..0CDC; UNKNOWN
5742             0x0CDD,   // 0CDD..0CDE; KANNADA
5743             0x0CDF,   // 0CDF      ; UNKNOWN
5744             0x0CE0,   // 0CE0..0CE3; KANNADA
5745             0x0CE4,   // 0CE4..0CE5; UNKNOWN
5746             0x0CE6,   // 0CE6..0CEF; KANNADA
5747             0x0CF0,   // 0CF0      ; UNKNOWN
5748             0x0CF1,   // 0CF1..0CF3; KANNADA
5749             0x0CF4,   // 0CF4..0CFF; UNKNOWN
5750             0x0D00,   // 0D00..0D0C; MALAYALAM
5751             0x0D0D,   // 0D0D      ; UNKNOWN
5752             0x0D0E,   // 0D0E..0D10; MALAYALAM
5753             0x0D11,   // 0D11      ; UNKNOWN
5754             0x0D12,   // 0D12..0D44; MALAYALAM
5755             0x0D45,   // 0D45      ; UNKNOWN
5756             0x0D46,   // 0D46..0D48; MALAYALAM
5757             0x0D49,   // 0D49      ; UNKNOWN
5758             0x0D4A,   // 0D4A..0D4F; MALAYALAM
5759             0x0D50,   // 0D50..0D53; UNKNOWN
5760             0x0D54,   // 0D54..0D63; MALAYALAM
5761             0x0D64,   // 0D64..0D65; UNKNOWN
5762             0x0D66,   // 0D66..0D7F; MALAYALAM
5763             0x0D80,   // 0D80      ; UNKNOWN
5764             0x0D81,   // 0D81..0D83; SINHALA
5765             0x0D84,   // 0D84      ; UNKNOWN
5766             0x0D85,   // 0D85..0D96; SINHALA
5767             0x0D97,   // 0D97..0D99; UNKNOWN
5768             0x0D9A,   // 0D9A..0DB1; SINHALA
5769             0x0DB2,   // 0DB2      ; UNKNOWN
5770             0x0DB3,   // 0DB3..0DBB; SINHALA
5771             0x0DBC,   // 0DBC      ; UNKNOWN
5772             0x0DBD,   // 0DBD      ; SINHALA
5773             0x0DBE,   // 0DBE..0DBF; UNKNOWN
5774             0x0DC0,   // 0DC0..0DC6; SINHALA
5775             0x0DC7,   // 0DC7..0DC9; UNKNOWN
5776             0x0DCA,   // 0DCA      ; SINHALA
5777             0x0DCB,   // 0DCB..0DCE; UNKNOWN
5778             0x0DCF,   // 0DCF..0DD4; SINHALA
5779             0x0DD5,   // 0DD5      ; UNKNOWN
5780             0x0DD6,   // 0DD6      ; SINHALA
5781             0x0DD7,   // 0DD7      ; UNKNOWN
5782             0x0DD8,   // 0DD8..0DDF; SINHALA
5783             0x0DE0,   // 0DE0..0DE5; UNKNOWN
5784             0x0DE6,   // 0DE6..0DEF; SINHALA
5785             0x0DF0,   // 0DF0..0DF1; UNKNOWN
5786             0x0DF2,   // 0DF2..0DF4; SINHALA
5787             0x0DF5,   // 0DF5..0E00; UNKNOWN
5788             0x0E01,   // 0E01..0E3A; THAI
5789             0x0E3B,   // 0E3B..0E3E; UNKNOWN
5790             0x0E3F,   // 0E3F      ; COMMON
5791             0x0E40,   // 0E40..0E5B; THAI
5792             0x0E5C,   // 0E5C..0E80; UNKNOWN
5793             0x0E81,   // 0E81..0E82; LAO
5794             0x0E83,   // 0E83      ; UNKNOWN
5795             0x0E84,   // 0E84      ; LAO
5796             0x0E85,   // 0E85      ; UNKNOWN
5797             0x0E86,   // 0E86..0E8A; LAO
5798             0x0E8B,   // 0E8B      ; UNKNOWN
5799             0x0E8C,   // 0E8C..0EA3; LAO
5800             0x0EA4,   // 0EA4      ; UNKNOWN
5801             0x0EA5,   // 0EA5      ; LAO
5802             0x0EA6,   // 0EA6      ; UNKNOWN
5803             0x0EA7,   // 0EA7..0EBD; LAO
5804             0x0EBE,   // 0EBE..0EBF; UNKNOWN
5805             0x0EC0,   // 0EC0..0EC4; LAO
5806             0x0EC5,   // 0EC5      ; UNKNOWN
5807             0x0EC6,   // 0EC6      ; LAO
5808             0x0EC7,   // 0EC7      ; UNKNOWN
5809             0x0EC8,   // 0EC8..0ECE; LAO
5810             0x0ECF,   // 0ECF      ; UNKNOWN
5811             0x0ED0,   // 0ED0..0ED9; LAO
5812             0x0EDA,   // 0EDA..0EDB; UNKNOWN
5813             0x0EDC,   // 0EDC..0EDF; LAO
5814             0x0EE0,   // 0EE0..0EFF; UNKNOWN
5815             0x0F00,   // 0F00..0F47; TIBETAN
5816             0x0F48,   // 0F48      ; UNKNOWN
5817             0x0F49,   // 0F49..0F6C; TIBETAN
5818             0x0F6D,   // 0F6D..0F70; UNKNOWN
5819             0x0F71,   // 0F71..0F97; TIBETAN
5820             0x0F98,   // 0F98      ; UNKNOWN
5821             0x0F99,   // 0F99..0FBC; TIBETAN
5822             0x0FBD,   // 0FBD      ; UNKNOWN
5823             0x0FBE,   // 0FBE..0FCC; TIBETAN
5824             0x0FCD,   // 0FCD      ; UNKNOWN
5825             0x0FCE,   // 0FCE..0FD4; TIBETAN
5826             0x0FD5,   // 0FD5..0FD8; COMMON
5827             0x0FD9,   // 0FD9..0FDA; TIBETAN
5828             0x0FDB,   // 0FDB..0FFF; UNKNOWN
5829             0x1000,   // 1000..109F; MYANMAR
5830             0x10A0,   // 10A0..10C5; GEORGIAN
5831             0x10C6,   // 10C6      ; UNKNOWN
5832             0x10C7,   // 10C7      ; GEORGIAN
5833             0x10C8,   // 10C8..10CC; UNKNOWN
5834             0x10CD,   // 10CD      ; GEORGIAN
5835             0x10CE,   // 10CE..10CF; UNKNOWN
5836             0x10D0,   // 10D0..10FA; GEORGIAN
5837             0x10FB,   // 10FB      ; COMMON
5838             0x10FC,   // 10FC..10FF; GEORGIAN
5839             0x1100,   // 1100..11FF; HANGUL
5840             0x1200,   // 1200..1248; ETHIOPIC
5841             0x1249,   // 1249      ; UNKNOWN
5842             0x124A,   // 124A..124D; ETHIOPIC
5843             0x124E,   // 124E..124F; UNKNOWN
5844             0x1250,   // 1250..1256; ETHIOPIC
5845             0x1257,   // 1257      ; UNKNOWN
5846             0x1258,   // 1258      ; ETHIOPIC
5847             0x1259,   // 1259      ; UNKNOWN
5848             0x125A,   // 125A..125D; ETHIOPIC
5849             0x125E,   // 125E..125F; UNKNOWN
5850             0x1260,   // 1260..1288; ETHIOPIC
5851             0x1289,   // 1289      ; UNKNOWN
5852             0x128A,   // 128A..128D; ETHIOPIC
5853             0x128E,   // 128E..128F; UNKNOWN
5854             0x1290,   // 1290..12B0; ETHIOPIC
5855             0x12B1,   // 12B1      ; UNKNOWN
5856             0x12B2,   // 12B2..12B5; ETHIOPIC
5857             0x12B6,   // 12B6..12B7; UNKNOWN
5858             0x12B8,   // 12B8..12BE; ETHIOPIC
5859             0x12BF,   // 12BF      ; UNKNOWN
5860             0x12C0,   // 12C0      ; ETHIOPIC
5861             0x12C1,   // 12C1      ; UNKNOWN
5862             0x12C2,   // 12C2..12C5; ETHIOPIC
5863             0x12C6,   // 12C6..12C7; UNKNOWN
5864             0x12C8,   // 12C8..12D6; ETHIOPIC
5865             0x12D7,   // 12D7      ; UNKNOWN
5866             0x12D8,   // 12D8..1310; ETHIOPIC
5867             0x1311,   // 1311      ; UNKNOWN
5868             0x1312,   // 1312..1315; ETHIOPIC
5869             0x1316,   // 1316..1317; UNKNOWN
5870             0x1318,   // 1318..135A; ETHIOPIC
5871             0x135B,   // 135B..135C; UNKNOWN
5872             0x135D,   // 135D..137C; ETHIOPIC
5873             0x137D,   // 137D..137F; UNKNOWN
5874             0x1380,   // 1380..1399; ETHIOPIC
5875             0x139A,   // 139A..139F; UNKNOWN
5876             0x13A0,   // 13A0..13F5; CHEROKEE
5877             0x13F6,   // 13F6..13F7; UNKNOWN
5878             0x13F8,   // 13F8..13FD; CHEROKEE
5879             0x13FE,   // 13FE..13FF; UNKNOWN
5880             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
5881             0x1680,   // 1680..169C; OGHAM
5882             0x169D,   // 169D..169F; UNKNOWN
5883             0x16A0,   // 16A0..16EA; RUNIC
5884             0x16EB,   // 16EB..16ED; COMMON
5885             0x16EE,   // 16EE..16F8; RUNIC
5886             0x16F9,   // 16F9..16FF; UNKNOWN
5887             0x1700,   // 1700..1715; TAGALOG
5888             0x1716,   // 1716..171E; UNKNOWN
5889             0x171F,   // 171F      ; TAGALOG
5890             0x1720,   // 1720..1734; HANUNOO
5891             0x1735,   // 1735..1736; COMMON
5892             0x1737,   // 1737..173F; UNKNOWN
5893             0x1740,   // 1740..1753; BUHID
5894             0x1754,   // 1754..175F; UNKNOWN
5895             0x1760,   // 1760..176C; TAGBANWA
5896             0x176D,   // 176D      ; UNKNOWN
5897             0x176E,   // 176E..1770; TAGBANWA
5898             0x1771,   // 1771      ; UNKNOWN
5899             0x1772,   // 1772..1773; TAGBANWA
5900             0x1774,   // 1774..177F; UNKNOWN
5901             0x1780,   // 1780..17DD; KHMER
5902             0x17DE,   // 17DE..17DF; UNKNOWN
5903             0x17E0,   // 17E0..17E9; KHMER
5904             0x17EA,   // 17EA..17EF; UNKNOWN
5905             0x17F0,   // 17F0..17F9; KHMER
5906             0x17FA,   // 17FA..17FF; UNKNOWN
5907             0x1800,   // 1800..1801; MONGOLIAN
5908             0x1802,   // 1802..1803; COMMON
5909             0x1804,   // 1804      ; MONGOLIAN
5910             0x1805,   // 1805      ; COMMON
5911             0x1806,   // 1806..1819; MONGOLIAN
5912             0x181A,   // 181A..181F; UNKNOWN
5913             0x1820,   // 1820..1878; MONGOLIAN
5914             0x1879,   // 1879..187F; UNKNOWN
5915             0x1880,   // 1880..18AA; MONGOLIAN
5916             0x18AB,   // 18AB..18AF; UNKNOWN
5917             0x18B0,   // 18B0..18F5; CANADIAN_ABORIGINAL
5918             0x18F6,   // 18F6..18FF; UNKNOWN
5919             0x1900,   // 1900..191E; LIMBU
5920             0x191F,   // 191F      ; UNKNOWN
5921             0x1920,   // 1920..192B; LIMBU
5922             0x192C,   // 192C..192F; UNKNOWN
5923             0x1930,   // 1930..193B; LIMBU
5924             0x193C,   // 193C..193F; UNKNOWN
5925             0x1940,   // 1940      ; LIMBU
5926             0x1941,   // 1941..1943; UNKNOWN
5927             0x1944,   // 1944..194F; LIMBU
5928             0x1950,   // 1950..196D; TAI_LE
5929             0x196E,   // 196E..196F; UNKNOWN
5930             0x1970,   // 1970..1974; TAI_LE
5931             0x1975,   // 1975..197F; UNKNOWN
5932             0x1980,   // 1980..19AB; NEW_TAI_LUE
5933             0x19AC,   // 19AC..19AF; UNKNOWN
5934             0x19B0,   // 19B0..19C9; NEW_TAI_LUE
5935             0x19CA,   // 19CA..19CF; UNKNOWN
5936             0x19D0,   // 19D0..19DA; NEW_TAI_LUE
5937             0x19DB,   // 19DB..19DD; UNKNOWN
5938             0x19DE,   // 19DE..19DF; NEW_TAI_LUE
5939             0x19E0,   // 19E0..19FF; KHMER
5940             0x1A00,   // 1A00..1A1B; BUGINESE
5941             0x1A1C,   // 1A1C..1A1D; UNKNOWN
5942             0x1A1E,   // 1A1E..1A1F; BUGINESE
5943             0x1A20,   // 1A20..1A5E; TAI_THAM
5944             0x1A5F,   // 1A5F      ; UNKNOWN
5945             0x1A60,   // 1A60..1A7C; TAI_THAM
5946             0x1A7D,   // 1A7D..1A7E; UNKNOWN
5947             0x1A7F,   // 1A7F..1A89; TAI_THAM
5948             0x1A8A,   // 1A8A..1A8F; UNKNOWN
5949             0x1A90,   // 1A90..1A99; TAI_THAM
5950             0x1A9A,   // 1A9A..1A9F; UNKNOWN
5951             0x1AA0,   // 1AA0..1AAD; TAI_THAM
5952             0x1AAE,   // 1AAE..1AAF; UNKNOWN
5953             0x1AB0,   // 1AB0..1ACE; INHERITED
5954             0x1ACF,   // 1ACF..1AFF; UNKNOWN
5955             0x1B00,   // 1B00..1B4C; BALINESE
5956             0x1B4D,   // 1B4D..1B4F; UNKNOWN
5957             0x1B50,   // 1B50..1B7E; BALINESE
5958             0x1B7F,   // 1B7F      ; UNKNOWN
5959             0x1B80,   // 1B80..1BBF; SUNDANESE
5960             0x1BC0,   // 1BC0..1BF3; BATAK
5961             0x1BF4,   // 1BF4..1BFB; UNKNOWN
5962             0x1BFC,   // 1BFC..1BFF; BATAK
5963             0x1C00,   // 1C00..1C37; LEPCHA
5964             0x1C38,   // 1C38..1C3A; UNKNOWN
5965             0x1C3B,   // 1C3B..1C49; LEPCHA
5966             0x1C4A,   // 1C4A..1C4C; UNKNOWN
5967             0x1C4D,   // 1C4D..1C4F; LEPCHA
5968             0x1C50,   // 1C50..1C7F; OL_CHIKI
5969             0x1C80,   // 1C80..1C88; CYRILLIC
5970             0x1C89,   // 1C89..1C8F; UNKNOWN
5971             0x1C90,   // 1C90..1CBA; GEORGIAN
5972             0x1CBB,   // 1CBB..1CBC; UNKNOWN
5973             0x1CBD,   // 1CBD..1CBF; GEORGIAN
5974             0x1CC0,   // 1CC0..1CC7; SUNDANESE
5975             0x1CC8,   // 1CC8..1CCF; UNKNOWN
5976             0x1CD0,   // 1CD0..1CD2; INHERITED
5977             0x1CD3,   // 1CD3      ; COMMON
5978             0x1CD4,   // 1CD4..1CE0; INHERITED
5979             0x1CE1,   // 1CE1      ; COMMON
5980             0x1CE2,   // 1CE2..1CE8; INHERITED
5981             0x1CE9,   // 1CE9..1CEC; COMMON
5982             0x1CED,   // 1CED      ; INHERITED
5983             0x1CEE,   // 1CEE..1CF3; COMMON
5984             0x1CF4,   // 1CF4      ; INHERITED
5985             0x1CF5,   // 1CF5..1CF7; COMMON
5986             0x1CF8,   // 1CF8..1CF9; INHERITED
5987             0x1CFA,   // 1CFA      ; COMMON
5988             0x1CFB,   // 1CFB..1CFF; UNKNOWN
5989             0x1D00,   // 1D00..1D25; LATIN
5990             0x1D26,   // 1D26..1D2A; GREEK
5991             0x1D2B,   // 1D2B      ; CYRILLIC
5992             0x1D2C,   // 1D2C..1D5C; LATIN
5993             0x1D5D,   // 1D5D..1D61; GREEK
5994             0x1D62,   // 1D62..1D65; LATIN
5995             0x1D66,   // 1D66..1D6A; GREEK
5996             0x1D6B,   // 1D6B..1D77; LATIN
5997             0x1D78,   // 1D78      ; CYRILLIC
5998             0x1D79,   // 1D79..1DBE; LATIN
5999             0x1DBF,   // 1DBF      ; GREEK
6000             0x1DC0,   // 1DC0..1DFF; INHERITED
6001             0x1E00,   // 1E00..1EFF; LATIN
6002             0x1F00,   // 1F00..1F15; GREEK
6003             0x1F16,   // 1F16..1F17; UNKNOWN
6004             0x1F18,   // 1F18..1F1D; GREEK
6005             0x1F1E,   // 1F1E..1F1F; UNKNOWN
6006             0x1F20,   // 1F20..1F45; GREEK
6007             0x1F46,   // 1F46..1F47; UNKNOWN
6008             0x1F48,   // 1F48..1F4D; GREEK
6009             0x1F4E,   // 1F4E..1F4F; UNKNOWN
6010             0x1F50,   // 1F50..1F57; GREEK
6011             0x1F58,   // 1F58      ; UNKNOWN
6012             0x1F59,   // 1F59      ; GREEK
6013             0x1F5A,   // 1F5A      ; UNKNOWN
6014             0x1F5B,   // 1F5B      ; GREEK
6015             0x1F5C,   // 1F5C      ; UNKNOWN
6016             0x1F5D,   // 1F5D      ; GREEK
6017             0x1F5E,   // 1F5E      ; UNKNOWN
6018             0x1F5F,   // 1F5F..1F7D; GREEK
6019             0x1F7E,   // 1F7E..1F7F; UNKNOWN
6020             0x1F80,   // 1F80..1FB4; GREEK
6021             0x1FB5,   // 1FB5      ; UNKNOWN
6022             0x1FB6,   // 1FB6..1FC4; GREEK
6023             0x1FC5,   // 1FC5      ; UNKNOWN
6024             0x1FC6,   // 1FC6..1FD3; GREEK
6025             0x1FD4,   // 1FD4..1FD5; UNKNOWN
6026             0x1FD6,   // 1FD6..1FDB; GREEK
6027             0x1FDC,   // 1FDC      ; UNKNOWN
6028             0x1FDD,   // 1FDD..1FEF; GREEK
6029             0x1FF0,   // 1FF0..1FF1; UNKNOWN
6030             0x1FF2,   // 1FF2..1FF4; GREEK
6031             0x1FF5,   // 1FF5      ; UNKNOWN
6032             0x1FF6,   // 1FF6..1FFE; GREEK
6033             0x1FFF,   // 1FFF      ; UNKNOWN
6034             0x2000,   // 2000..200B; COMMON
6035             0x200C,   // 200C..200D; INHERITED
6036             0x200E,   // 200E..2064; COMMON
6037             0x2065,   // 2065      ; UNKNOWN
6038             0x2066,   // 2066..2070; COMMON
6039             0x2071,   // 2071      ; LATIN
6040             0x2072,   // 2072..2073; UNKNOWN
6041             0x2074,   // 2074..207E; COMMON
6042             0x207F,   // 207F      ; LATIN
6043             0x2080,   // 2080..208E; COMMON
6044             0x208F,   // 208F      ; UNKNOWN
6045             0x2090,   // 2090..209C; LATIN
6046             0x209D,   // 209D..209F; UNKNOWN
6047             0x20A0,   // 20A0..20C0; COMMON
6048             0x20C1,   // 20C1..20CF; UNKNOWN
6049             0x20D0,   // 20D0..20F0; INHERITED
6050             0x20F1,   // 20F1..20FF; UNKNOWN
6051             0x2100,   // 2100..2125; COMMON
6052             0x2126,   // 2126      ; GREEK
6053             0x2127,   // 2127..2129; COMMON
6054             0x212A,   // 212A..212B; LATIN
6055             0x212C,   // 212C..2131; COMMON
6056             0x2132,   // 2132      ; LATIN
6057             0x2133,   // 2133..214D; COMMON
6058             0x214E,   // 214E      ; LATIN
6059             0x214F,   // 214F..215F; COMMON
6060             0x2160,   // 2160..2188; LATIN
6061             0x2189,   // 2189..218B; COMMON
6062             0x218C,   // 218C..218F; UNKNOWN
6063             0x2190,   // 2190..2426; COMMON
6064             0x2427,   // 2427..243F; UNKNOWN
6065             0x2440,   // 2440..244A; COMMON
6066             0x244B,   // 244B..245F; UNKNOWN
6067             0x2460,   // 2460..27FF; COMMON
6068             0x2800,   // 2800..28FF; BRAILLE
6069             0x2900,   // 2900..2B73; COMMON
6070             0x2B74,   // 2B74..2B75; UNKNOWN
6071             0x2B76,   // 2B76..2B95; COMMON
6072             0x2B96,   // 2B96      ; UNKNOWN
6073             0x2B97,   // 2B97..2BFF; COMMON
6074             0x2C00,   // 2C00..2C5F; GLAGOLITIC
6075             0x2C60,   // 2C60..2C7F; LATIN
6076             0x2C80,   // 2C80..2CF3; COPTIC
6077             0x2CF4,   // 2CF4..2CF8; UNKNOWN
6078             0x2CF9,   // 2CF9..2CFF; COPTIC
6079             0x2D00,   // 2D00..2D25; GEORGIAN
6080             0x2D26,   // 2D26      ; UNKNOWN
6081             0x2D27,   // 2D27      ; GEORGIAN
6082             0x2D28,   // 2D28..2D2C; UNKNOWN
6083             0x2D2D,   // 2D2D      ; GEORGIAN
6084             0x2D2E,   // 2D2E..2D2F; UNKNOWN
6085             0x2D30,   // 2D30..2D67; TIFINAGH
6086             0x2D68,   // 2D68..2D6E; UNKNOWN
6087             0x2D6F,   // 2D6F..2D70; TIFINAGH
6088             0x2D71,   // 2D71..2D7E; UNKNOWN
6089             0x2D7F,   // 2D7F      ; TIFINAGH
6090             0x2D80,   // 2D80..2D96; ETHIOPIC
6091             0x2D97,   // 2D97..2D9F; UNKNOWN
6092             0x2DA0,   // 2DA0..2DA6; ETHIOPIC
6093             0x2DA7,   // 2DA7      ; UNKNOWN
6094             0x2DA8,   // 2DA8..2DAE; ETHIOPIC
6095             0x2DAF,   // 2DAF      ; UNKNOWN
6096             0x2DB0,   // 2DB0..2DB6; ETHIOPIC
6097             0x2DB7,   // 2DB7      ; UNKNOWN
6098             0x2DB8,   // 2DB8..2DBE; ETHIOPIC
6099             0x2DBF,   // 2DBF      ; UNKNOWN
6100             0x2DC0,   // 2DC0..2DC6; ETHIOPIC
6101             0x2DC7,   // 2DC7      ; UNKNOWN
6102             0x2DC8,   // 2DC8..2DCE; ETHIOPIC
6103             0x2DCF,   // 2DCF      ; UNKNOWN
6104             0x2DD0,   // 2DD0..2DD6; ETHIOPIC
6105             0x2DD7,   // 2DD7      ; UNKNOWN
6106             0x2DD8,   // 2DD8..2DDE; ETHIOPIC
6107             0x2DDF,   // 2DDF      ; UNKNOWN
6108             0x2DE0,   // 2DE0..2DFF; CYRILLIC
6109             0x2E00,   // 2E00..2E5D; COMMON
6110             0x2E5E,   // 2E5E..2E7F; UNKNOWN
6111             0x2E80,   // 2E80..2E99; HAN
6112             0x2E9A,   // 2E9A      ; UNKNOWN
6113             0x2E9B,   // 2E9B..2EF3; HAN
6114             0x2EF4,   // 2EF4..2EFF; UNKNOWN
6115             0x2F00,   // 2F00..2FD5; HAN
6116             0x2FD6,   // 2FD6..2FEF; UNKNOWN
6117             0x2FF0,   // 2FF0..2FFB; COMMON
6118             0x2FFC,   // 2FFC..2FFF; UNKNOWN
6119             0x3000,   // 3000..3004; COMMON
6120             0x3005,   // 3005      ; HAN
6121             0x3006,   // 3006      ; COMMON
6122             0x3007,   // 3007      ; HAN
6123             0x3008,   // 3008..3020; COMMON
6124             0x3021,   // 3021..3029; HAN
6125             0x302A,   // 302A..302D; INHERITED
6126             0x302E,   // 302E..302F; HANGUL
6127             0x3030,   // 3030..3037; COMMON
6128             0x3038,   // 3038..303B; HAN
6129             0x303C,   // 303C..303F; COMMON
6130             0x3040,   // 3040      ; UNKNOWN
6131             0x3041,   // 3041..3096; HIRAGANA
6132             0x3097,   // 3097..3098; UNKNOWN
6133             0x3099,   // 3099..309A; INHERITED
6134             0x309B,   // 309B..309C; COMMON
6135             0x309D,   // 309D..309F; HIRAGANA
6136             0x30A0,   // 30A0      ; COMMON
6137             0x30A1,   // 30A1..30FA; KATAKANA
6138             0x30FB,   // 30FB..30FC; COMMON
6139             0x30FD,   // 30FD..30FF; KATAKANA
6140             0x3100,   // 3100..3104; UNKNOWN
6141             0x3105,   // 3105..312F; BOPOMOFO
6142             0x3130,   // 3130      ; UNKNOWN
6143             0x3131,   // 3131..318E; HANGUL
6144             0x318F,   // 318F      ; UNKNOWN
6145             0x3190,   // 3190..319F; COMMON
6146             0x31A0,   // 31A0..31BF; BOPOMOFO
6147             0x31C0,   // 31C0..31E3; COMMON
6148             0x31E4,   // 31E4..31EF; UNKNOWN
6149             0x31F0,   // 31F0..31FF; KATAKANA
6150             0x3200,   // 3200..321E; HANGUL
6151             0x321F,   // 321F      ; UNKNOWN
6152             0x3220,   // 3220..325F; COMMON
6153             0x3260,   // 3260..327E; HANGUL
6154             0x327F,   // 327F..32CF; COMMON
6155             0x32D0,   // 32D0..32FE; KATAKANA
6156             0x32FF,   // 32FF      ; COMMON
6157             0x3300,   // 3300..3357; KATAKANA
6158             0x3358,   // 3358..33FF; COMMON
6159             0x3400,   // 3400..4DBF; HAN
6160             0x4DC0,   // 4DC0..4DFF; COMMON
6161             0x4E00,   // 4E00..9FFF; HAN
6162             0xA000,   // A000..A48C; YI
6163             0xA48D,   // A48D..A48F; UNKNOWN
6164             0xA490,   // A490..A4C6; YI
6165             0xA4C7,   // A4C7..A4CF; UNKNOWN
6166             0xA4D0,   // A4D0..A4FF; LISU
6167             0xA500,   // A500..A62B; VAI
6168             0xA62C,   // A62C..A63F; UNKNOWN
6169             0xA640,   // A640..A69F; CYRILLIC
6170             0xA6A0,   // A6A0..A6F7; BAMUM
6171             0xA6F8,   // A6F8..A6FF; UNKNOWN
6172             0xA700,   // A700..A721; COMMON
6173             0xA722,   // A722..A787; LATIN
6174             0xA788,   // A788..A78A; COMMON
6175             0xA78B,   // A78B..A7CA; LATIN
6176             0xA7CB,   // A7CB..A7CF; UNKNOWN
6177             0xA7D0,   // A7D0..A7D1; LATIN
6178             0xA7D2,   // A7D2      ; UNKNOWN
6179             0xA7D3,   // A7D3      ; LATIN
6180             0xA7D4,   // A7D4      ; UNKNOWN
6181             0xA7D5,   // A7D5..A7D9; LATIN
6182             0xA7DA,   // A7DA..A7F1; UNKNOWN
6183             0xA7F2,   // A7F2..A7FF; LATIN
6184             0xA800,   // A800..A82C; SYLOTI_NAGRI
6185             0xA82D,   // A82D..A82F; UNKNOWN
6186             0xA830,   // A830..A839; COMMON
6187             0xA83A,   // A83A..A83F; UNKNOWN
6188             0xA840,   // A840..A877; PHAGS_PA
6189             0xA878,   // A878..A87F; UNKNOWN
6190             0xA880,   // A880..A8C5; SAURASHTRA
6191             0xA8C6,   // A8C6..A8CD; UNKNOWN
6192             0xA8CE,   // A8CE..A8D9; SAURASHTRA
6193             0xA8DA,   // A8DA..A8DF; UNKNOWN
6194             0xA8E0,   // A8E0..A8FF; DEVANAGARI
6195             0xA900,   // A900..A92D; KAYAH_LI
6196             0xA92E,   // A92E      ; COMMON
6197             0xA92F,   // A92F      ; KAYAH_LI
6198             0xA930,   // A930..A953; REJANG
6199             0xA954,   // A954..A95E; UNKNOWN
6200             0xA95F,   // A95F      ; REJANG
6201             0xA960,   // A960..A97C; HANGUL
6202             0xA97D,   // A97D..A97F; UNKNOWN
6203             0xA980,   // A980..A9CD; JAVANESE
6204             0xA9CE,   // A9CE      ; UNKNOWN
6205             0xA9CF,   // A9CF      ; COMMON
6206             0xA9D0,   // A9D0..A9D9; JAVANESE
6207             0xA9DA,   // A9DA..A9DD; UNKNOWN
6208             0xA9DE,   // A9DE..A9DF; JAVANESE
6209             0xA9E0,   // A9E0..A9FE; MYANMAR
6210             0xA9FF,   // A9FF      ; UNKNOWN
6211             0xAA00,   // AA00..AA36; CHAM
6212             0xAA37,   // AA37..AA3F; UNKNOWN
6213             0xAA40,   // AA40..AA4D; CHAM
6214             0xAA4E,   // AA4E..AA4F; UNKNOWN
6215             0xAA50,   // AA50..AA59; CHAM
6216             0xAA5A,   // AA5A..AA5B; UNKNOWN
6217             0xAA5C,   // AA5C..AA5F; CHAM
6218             0xAA60,   // AA60..AA7F; MYANMAR
6219             0xAA80,   // AA80..AAC2; TAI_VIET
6220             0xAAC3,   // AAC3..AADA; UNKNOWN
6221             0xAADB,   // AADB..AADF; TAI_VIET
6222             0xAAE0,   // AAE0..AAF6; MEETEI_MAYEK
6223             0xAAF7,   // AAF7..AB00; UNKNOWN
6224             0xAB01,   // AB01..AB06; ETHIOPIC
6225             0xAB07,   // AB07..AB08; UNKNOWN
6226             0xAB09,   // AB09..AB0E; ETHIOPIC
6227             0xAB0F,   // AB0F..AB10; UNKNOWN
6228             0xAB11,   // AB11..AB16; ETHIOPIC
6229             0xAB17,   // AB17..AB1F; UNKNOWN
6230             0xAB20,   // AB20..AB26; ETHIOPIC
6231             0xAB27,   // AB27      ; UNKNOWN
6232             0xAB28,   // AB28..AB2E; ETHIOPIC
6233             0xAB2F,   // AB2F      ; UNKNOWN
6234             0xAB30,   // AB30..AB5A; LATIN
6235             0xAB5B,   // AB5B      ; COMMON
6236             0xAB5C,   // AB5C..AB64; LATIN
6237             0xAB65,   // AB65      ; GREEK
6238             0xAB66,   // AB66..AB69; LATIN
6239             0xAB6A,   // AB6A..AB6B; COMMON
6240             0xAB6C,   // AB6C..AB6F; UNKNOWN
6241             0xAB70,   // AB70..ABBF; CHEROKEE
6242             0xABC0,   // ABC0..ABED; MEETEI_MAYEK
6243             0xABEE,   // ABEE..ABEF; UNKNOWN
6244             0xABF0,   // ABF0..ABF9; MEETEI_MAYEK
6245             0xABFA,   // ABFA..ABFF; UNKNOWN
6246             0xAC00,   // AC00..D7A3; HANGUL
6247             0xD7A4,   // D7A4..D7AF; UNKNOWN
6248             0xD7B0,   // D7B0..D7C6; HANGUL
6249             0xD7C7,   // D7C7..D7CA; UNKNOWN
6250             0xD7CB,   // D7CB..D7FB; HANGUL
6251             0xD7FC,   // D7FC..F8FF; UNKNOWN
6252             0xF900,   // F900..FA6D; HAN
6253             0xFA6E,   // FA6E..FA6F; UNKNOWN
6254             0xFA70,   // FA70..FAD9; HAN
6255             0xFADA,   // FADA..FAFF; UNKNOWN
6256             0xFB00,   // FB00..FB06; LATIN
6257             0xFB07,   // FB07..FB12; UNKNOWN
6258             0xFB13,   // FB13..FB17; ARMENIAN
6259             0xFB18,   // FB18..FB1C; UNKNOWN
6260             0xFB1D,   // FB1D..FB36; HEBREW
6261             0xFB37,   // FB37      ; UNKNOWN
6262             0xFB38,   // FB38..FB3C; HEBREW
6263             0xFB3D,   // FB3D      ; UNKNOWN
6264             0xFB3E,   // FB3E      ; HEBREW
6265             0xFB3F,   // FB3F      ; UNKNOWN
6266             0xFB40,   // FB40..FB41; HEBREW
6267             0xFB42,   // FB42      ; UNKNOWN
6268             0xFB43,   // FB43..FB44; HEBREW
6269             0xFB45,   // FB45      ; UNKNOWN
6270             0xFB46,   // FB46..FB4F; HEBREW
6271             0xFB50,   // FB50..FBC2; ARABIC
6272             0xFBC3,   // FBC3..FBD2; UNKNOWN
6273             0xFBD3,   // FBD3..FD3D; ARABIC
6274             0xFD3E,   // FD3E..FD3F; COMMON
6275             0xFD40,   // FD40..FD8F; ARABIC
6276             0xFD90,   // FD90..FD91; UNKNOWN
6277             0xFD92,   // FD92..FDC7; ARABIC
6278             0xFDC8,   // FDC8..FDCE; UNKNOWN
6279             0xFDCF,   // FDCF      ; ARABIC
6280             0xFDD0,   // FDD0..FDEF; UNKNOWN
6281             0xFDF0,   // FDF0..FDFF; ARABIC
6282             0xFE00,   // FE00..FE0F; INHERITED
6283             0xFE10,   // FE10..FE19; COMMON
6284             0xFE1A,   // FE1A..FE1F; UNKNOWN
6285             0xFE20,   // FE20..FE2D; INHERITED
6286             0xFE2E,   // FE2E..FE2F; CYRILLIC
6287             0xFE30,   // FE30..FE52; COMMON
6288             0xFE53,   // FE53      ; UNKNOWN
6289             0xFE54,   // FE54..FE66; COMMON
6290             0xFE67,   // FE67      ; UNKNOWN
6291             0xFE68,   // FE68..FE6B; COMMON
6292             0xFE6C,   // FE6C..FE6F; UNKNOWN
6293             0xFE70,   // FE70..FE74; ARABIC
6294             0xFE75,   // FE75      ; UNKNOWN
6295             0xFE76,   // FE76..FEFC; ARABIC
6296             0xFEFD,   // FEFD..FEFE; UNKNOWN
6297             0xFEFF,   // FEFF      ; COMMON
6298             0xFF00,   // FF00      ; UNKNOWN
6299             0xFF01,   // FF01..FF20; COMMON
6300             0xFF21,   // FF21..FF3A; LATIN
6301             0xFF3B,   // FF3B..FF40; COMMON
6302             0xFF41,   // FF41..FF5A; LATIN
6303             0xFF5B,   // FF5B..FF65; COMMON
6304             0xFF66,   // FF66..FF6F; KATAKANA
6305             0xFF70,   // FF70      ; COMMON
6306             0xFF71,   // FF71..FF9D; KATAKANA
6307             0xFF9E,   // FF9E..FF9F; COMMON
6308             0xFFA0,   // FFA0..FFBE; HANGUL
6309             0xFFBF,   // FFBF..FFC1; UNKNOWN
6310             0xFFC2,   // FFC2..FFC7; HANGUL
6311             0xFFC8,   // FFC8..FFC9; UNKNOWN
6312             0xFFCA,   // FFCA..FFCF; HANGUL
6313             0xFFD0,   // FFD0..FFD1; UNKNOWN
6314             0xFFD2,   // FFD2..FFD7; HANGUL
6315             0xFFD8,   // FFD8..FFD9; UNKNOWN
6316             0xFFDA,   // FFDA..FFDC; HANGUL
6317             0xFFDD,   // FFDD..FFDF; UNKNOWN
6318             0xFFE0,   // FFE0..FFE6; COMMON
6319             0xFFE7,   // FFE7      ; UNKNOWN
6320             0xFFE8,   // FFE8..FFEE; COMMON
6321             0xFFEF,   // FFEF..FFF8; UNKNOWN
6322             0xFFF9,   // FFF9..FFFD; COMMON
6323             0xFFFE,   // FFFE..FFFF; UNKNOWN
6324             0x10000,  // 10000..1000B; LINEAR_B
6325             0x1000C,  // 1000C       ; UNKNOWN
6326             0x1000D,  // 1000D..10026; LINEAR_B
6327             0x10027,  // 10027       ; UNKNOWN
6328             0x10028,  // 10028..1003A; LINEAR_B
6329             0x1003B,  // 1003B       ; UNKNOWN
6330             0x1003C,  // 1003C..1003D; LINEAR_B
6331             0x1003E,  // 1003E       ; UNKNOWN
6332             0x1003F,  // 1003F..1004D; LINEAR_B
6333             0x1004E,  // 1004E..1004F; UNKNOWN
6334             0x10050,  // 10050..1005D; LINEAR_B
6335             0x1005E,  // 1005E..1007F; UNKNOWN
6336             0x10080,  // 10080..100FA; LINEAR_B
6337             0x100FB,  // 100FB..100FF; UNKNOWN
6338             0x10100,  // 10100..10102; COMMON
6339             0x10103,  // 10103..10106; UNKNOWN
6340             0x10107,  // 10107..10133; COMMON
6341             0x10134,  // 10134..10136; UNKNOWN
6342             0x10137,  // 10137..1013F; COMMON
6343             0x10140,  // 10140..1018E; GREEK
6344             0x1018F,  // 1018F       ; UNKNOWN
6345             0x10190,  // 10190..1019C; COMMON
6346             0x1019D,  // 1019D..1019F; UNKNOWN
6347             0x101A0,  // 101A0       ; GREEK
6348             0x101A1,  // 101A1..101CF; UNKNOWN
6349             0x101D0,  // 101D0..101FC; COMMON
6350             0x101FD,  // 101FD       ; INHERITED
6351             0x101FE,  // 101FE..1027F; UNKNOWN
6352             0x10280,  // 10280..1029C; LYCIAN
6353             0x1029D,  // 1029D..1029F; UNKNOWN
6354             0x102A0,  // 102A0..102D0; CARIAN
6355             0x102D1,  // 102D1..102DF; UNKNOWN
6356             0x102E0,  // 102E0       ; INHERITED
6357             0x102E1,  // 102E1..102FB; COMMON
6358             0x102FC,  // 102FC..102FF; UNKNOWN
6359             0x10300,  // 10300..10323; OLD_ITALIC
6360             0x10324,  // 10324..1032C; UNKNOWN
6361             0x1032D,  // 1032D..1032F; OLD_ITALIC
6362             0x10330,  // 10330..1034A; GOTHIC
6363             0x1034B,  // 1034B..1034F; UNKNOWN
6364             0x10350,  // 10350..1037A; OLD_PERMIC
6365             0x1037B,  // 1037B..1037F; UNKNOWN
6366             0x10380,  // 10380..1039D; UGARITIC
6367             0x1039E,  // 1039E       ; UNKNOWN
6368             0x1039F,  // 1039F       ; UGARITIC
6369             0x103A0,  // 103A0..103C3; OLD_PERSIAN
6370             0x103C4,  // 103C4..103C7; UNKNOWN
6371             0x103C8,  // 103C8..103D5; OLD_PERSIAN
6372             0x103D6,  // 103D6..103FF; UNKNOWN
6373             0x10400,  // 10400..1044F; DESERET
6374             0x10450,  // 10450..1047F; SHAVIAN
6375             0x10480,  // 10480..1049D; OSMANYA
6376             0x1049E,  // 1049E..1049F; UNKNOWN
6377             0x104A0,  // 104A0..104A9; OSMANYA
6378             0x104AA,  // 104AA..104AF; UNKNOWN
6379             0x104B0,  // 104B0..104D3; OSAGE
6380             0x104D4,  // 104D4..104D7; UNKNOWN
6381             0x104D8,  // 104D8..104FB; OSAGE
6382             0x104FC,  // 104FC..104FF; UNKNOWN
6383             0x10500,  // 10500..10527; ELBASAN
6384             0x10528,  // 10528..1052F; UNKNOWN
6385             0x10530,  // 10530..10563; CAUCASIAN_ALBANIAN
6386             0x10564,  // 10564..1056E; UNKNOWN
6387             0x1056F,  // 1056F       ; CAUCASIAN_ALBANIAN
6388             0x10570,  // 10570..1057A; VITHKUQI
6389             0x1057B,  // 1057B       ; UNKNOWN
6390             0x1057C,  // 1057C..1058A; VITHKUQI
6391             0x1058B,  // 1058B       ; UNKNOWN
6392             0x1058C,  // 1058C..10592; VITHKUQI
6393             0x10593,  // 10593       ; UNKNOWN
6394             0x10594,  // 10594..10595; VITHKUQI
6395             0x10596,  // 10596       ; UNKNOWN
6396             0x10597,  // 10597..105A1; VITHKUQI
6397             0x105A2,  // 105A2       ; UNKNOWN
6398             0x105A3,  // 105A3..105B1; VITHKUQI
6399             0x105B2,  // 105B2       ; UNKNOWN
6400             0x105B3,  // 105B3..105B9; VITHKUQI
6401             0x105BA,  // 105BA       ; UNKNOWN
6402             0x105BB,  // 105BB..105BC; VITHKUQI
6403             0x105BD,  // 105BD..105FF; UNKNOWN
6404             0x10600,  // 10600..10736; LINEAR_A
6405             0x10737,  // 10737..1073F; UNKNOWN
6406             0x10740,  // 10740..10755; LINEAR_A
6407             0x10756,  // 10756..1075F; UNKNOWN
6408             0x10760,  // 10760..10767; LINEAR_A
6409             0x10768,  // 10768..1077F; UNKNOWN
6410             0x10780,  // 10780..10785; LATIN
6411             0x10786,  // 10786       ; UNKNOWN
6412             0x10787,  // 10787..107B0; LATIN
6413             0x107B1,  // 107B1       ; UNKNOWN
6414             0x107B2,  // 107B2..107BA; LATIN
6415             0x107BB,  // 107BB..107FF; UNKNOWN
6416             0x10800,  // 10800..10805; CYPRIOT
6417             0x10806,  // 10806..10807; UNKNOWN
6418             0x10808,  // 10808       ; CYPRIOT
6419             0x10809,  // 10809       ; UNKNOWN
6420             0x1080A,  // 1080A..10835; CYPRIOT
6421             0x10836,  // 10836       ; UNKNOWN
6422             0x10837,  // 10837..10838; CYPRIOT
6423             0x10839,  // 10839..1083B; UNKNOWN
6424             0x1083C,  // 1083C       ; CYPRIOT
6425             0x1083D,  // 1083D..1083E; UNKNOWN
6426             0x1083F,  // 1083F       ; CYPRIOT
6427             0x10840,  // 10840..10855; IMPERIAL_ARAMAIC
6428             0x10856,  // 10856       ; UNKNOWN
6429             0x10857,  // 10857..1085F; IMPERIAL_ARAMAIC
6430             0x10860,  // 10860..1087F; PALMYRENE
6431             0x10880,  // 10880..1089E; NABATAEAN
6432             0x1089F,  // 1089F..108A6; UNKNOWN
6433             0x108A7,  // 108A7..108AF; NABATAEAN
6434             0x108B0,  // 108B0..108DF; UNKNOWN
6435             0x108E0,  // 108E0..108F2; HATRAN
6436             0x108F3,  // 108F3       ; UNKNOWN
6437             0x108F4,  // 108F4..108F5; HATRAN
6438             0x108F6,  // 108F6..108FA; UNKNOWN
6439             0x108FB,  // 108FB..108FF; HATRAN
6440             0x10900,  // 10900..1091B; PHOENICIAN
6441             0x1091C,  // 1091C..1091E; UNKNOWN
6442             0x1091F,  // 1091F       ; PHOENICIAN
6443             0x10920,  // 10920..10939; LYDIAN
6444             0x1093A,  // 1093A..1093E; UNKNOWN
6445             0x1093F,  // 1093F       ; LYDIAN
6446             0x10940,  // 10940..1097F; UNKNOWN
6447             0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
6448             0x109A0,  // 109A0..109B7; MEROITIC_CURSIVE
6449             0x109B8,  // 109B8..109BB; UNKNOWN
6450             0x109BC,  // 109BC..109CF; MEROITIC_CURSIVE
6451             0x109D0,  // 109D0..109D1; UNKNOWN
6452             0x109D2,  // 109D2..109FF; MEROITIC_CURSIVE
6453             0x10A00,  // 10A00..10A03; KHAROSHTHI
6454             0x10A04,  // 10A04       ; UNKNOWN
6455             0x10A05,  // 10A05..10A06; KHAROSHTHI
6456             0x10A07,  // 10A07..10A0B; UNKNOWN
6457             0x10A0C,  // 10A0C..10A13; KHAROSHTHI
6458             0x10A14,  // 10A14       ; UNKNOWN
6459             0x10A15,  // 10A15..10A17; KHAROSHTHI
6460             0x10A18,  // 10A18       ; UNKNOWN
6461             0x10A19,  // 10A19..10A35; KHAROSHTHI
6462             0x10A36,  // 10A36..10A37; UNKNOWN
6463             0x10A38,  // 10A38..10A3A; KHAROSHTHI
6464             0x10A3B,  // 10A3B..10A3E; UNKNOWN
6465             0x10A3F,  // 10A3F..10A48; KHAROSHTHI
6466             0x10A49,  // 10A49..10A4F; UNKNOWN
6467             0x10A50,  // 10A50..10A58; KHAROSHTHI
6468             0x10A59,  // 10A59..10A5F; UNKNOWN
6469             0x10A60,  // 10A60..10A7F; OLD_SOUTH_ARABIAN
6470             0x10A80,  // 10A80..10A9F; OLD_NORTH_ARABIAN
6471             0x10AA0,  // 10AA0..10ABF; UNKNOWN
6472             0x10AC0,  // 10AC0..10AE6; MANICHAEAN
6473             0x10AE7,  // 10AE7..10AEA; UNKNOWN
6474             0x10AEB,  // 10AEB..10AF6; MANICHAEAN
6475             0x10AF7,  // 10AF7..10AFF; UNKNOWN
6476             0x10B00,  // 10B00..10B35; AVESTAN
6477             0x10B36,  // 10B36..10B38; UNKNOWN
6478             0x10B39,  // 10B39..10B3F; AVESTAN
6479             0x10B40,  // 10B40..10B55; INSCRIPTIONAL_PARTHIAN
6480             0x10B56,  // 10B56..10B57; UNKNOWN
6481             0x10B58,  // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN
6482             0x10B60,  // 10B60..10B72; INSCRIPTIONAL_PAHLAVI
6483             0x10B73,  // 10B73..10B77; UNKNOWN
6484             0x10B78,  // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI
6485             0x10B80,  // 10B80..10B91; PSALTER_PAHLAVI
6486             0x10B92,  // 10B92..10B98; UNKNOWN
6487             0x10B99,  // 10B99..10B9C; PSALTER_PAHLAVI
6488             0x10B9D,  // 10B9D..10BA8; UNKNOWN
6489             0x10BA9,  // 10BA9..10BAF; PSALTER_PAHLAVI
6490             0x10BB0,  // 10BB0..10BFF; UNKNOWN
6491             0x10C00,  // 10C00..10C48; OLD_TURKIC
6492             0x10C49,  // 10C49..10C7F; UNKNOWN
6493             0x10C80,  // 10C80..10CB2; OLD_HUNGARIAN
6494             0x10CB3,  // 10CB3..10CBF; UNKNOWN
6495             0x10CC0,  // 10CC0..10CF2; OLD_HUNGARIAN
6496             0x10CF3,  // 10CF3..10CF9; UNKNOWN
6497             0x10CFA,  // 10CFA..10CFF; OLD_HUNGARIAN
6498             0x10D00,  // 10D00..10D27; HANIFI_ROHINGYA
6499             0x10D28,  // 10D28..10D2F; UNKNOWN
6500             0x10D30,  // 10D30..10D39; HANIFI_ROHINGYA
6501             0x10D3A,  // 10D3A..10E5F; UNKNOWN
6502             0x10E60,  // 10E60..10E7E; ARABIC
6503             0x10E7F,  // 10E7F       ; UNKNOWN
6504             0x10E80,  // 10E80..10EA9; YEZIDI
6505             0x10EAA,  // 10EAA       ; UNKNOWN
6506             0x10EAB,  // 10EAB..10EAD; YEZIDI
6507             0x10EAE,  // 10EAE..10EAF; UNKNOWN
6508             0x10EB0,  // 10EB0..10EB1; YEZIDI
6509             0x10EB2,  // 10EB2..10EFC; UNKNOWN
6510             0x10EFD,  // 10EFD..10EFF; ARABIC
6511             0x10F00,  // 10F00..10F27; OLD_SOGDIAN
6512             0x10F28,  // 10F28..10F2F; UNKNOWN
6513             0x10F30,  // 10F30..10F59; SOGDIAN
6514             0x10F5A,  // 10F5A..10F6F; UNKNOWN
6515             0x10F70,  // 10F70..10F89; OLD_UYGHUR
6516             0x10F8A,  // 10F8A..10FAF; UNKNOWN
6517             0x10FB0,  // 10FB0..10FCB; CHORASMIAN
6518             0x10FCC,  // 10FCC..10FDF; UNKNOWN
6519             0x10FE0,  // 10FE0..10FF6; ELYMAIC
6520             0x10FF7,  // 10FF7..10FFF; UNKNOWN
6521             0x11000,  // 11000..1104D; BRAHMI
6522             0x1104E,  // 1104E..11051; UNKNOWN
6523             0x11052,  // 11052..11075; BRAHMI
6524             0x11076,  // 11076..1107E; UNKNOWN
6525             0x1107F,  // 1107F       ; BRAHMI
6526             0x11080,  // 11080..110C2; KAITHI
6527             0x110C3,  // 110C3..110CC; UNKNOWN
6528             0x110CD,  // 110CD       ; KAITHI
6529             0x110CE,  // 110CE..110CF; UNKNOWN
6530             0x110D0,  // 110D0..110E8; SORA_SOMPENG
6531             0x110E9,  // 110E9..110EF; UNKNOWN
6532             0x110F0,  // 110F0..110F9; SORA_SOMPENG
6533             0x110FA,  // 110FA..110FF; UNKNOWN
6534             0x11100,  // 11100..11134; CHAKMA
6535             0x11135,  // 11135       ; UNKNOWN
6536             0x11136,  // 11136..11147; CHAKMA
6537             0x11148,  // 11148..1114F; UNKNOWN
6538             0x11150,  // 11150..11176; MAHAJANI
6539             0x11177,  // 11177..1117F; UNKNOWN
6540             0x11180,  // 11180..111DF; SHARADA
6541             0x111E0,  // 111E0       ; UNKNOWN
6542             0x111E1,  // 111E1..111F4; SINHALA
6543             0x111F5,  // 111F5..111FF; UNKNOWN
6544             0x11200,  // 11200..11211; KHOJKI
6545             0x11212,  // 11212       ; UNKNOWN
6546             0x11213,  // 11213..11241; KHOJKI
6547             0x11242,  // 11242..1127F; UNKNOWN
6548             0x11280,  // 11280..11286; MULTANI
6549             0x11287,  // 11287       ; UNKNOWN
6550             0x11288,  // 11288       ; MULTANI
6551             0x11289,  // 11289       ; UNKNOWN
6552             0x1128A,  // 1128A..1128D; MULTANI
6553             0x1128E,  // 1128E       ; UNKNOWN
6554             0x1128F,  // 1128F..1129D; MULTANI
6555             0x1129E,  // 1129E       ; UNKNOWN
6556             0x1129F,  // 1129F..112A9; MULTANI
6557             0x112AA,  // 112AA..112AF; UNKNOWN
6558             0x112B0,  // 112B0..112EA; KHUDAWADI
6559             0x112EB,  // 112EB..112EF; UNKNOWN
6560             0x112F0,  // 112F0..112F9; KHUDAWADI
6561             0x112FA,  // 112FA..112FF; UNKNOWN
6562             0x11300,  // 11300..11303; GRANTHA
6563             0x11304,  // 11304       ; UNKNOWN
6564             0x11305,  // 11305..1130C; GRANTHA
6565             0x1130D,  // 1130D..1130E; UNKNOWN
6566             0x1130F,  // 1130F..11310; GRANTHA
6567             0x11311,  // 11311..11312; UNKNOWN
6568             0x11313,  // 11313..11328; GRANTHA
6569             0x11329,  // 11329       ; UNKNOWN
6570             0x1132A,  // 1132A..11330; GRANTHA
6571             0x11331,  // 11331       ; UNKNOWN
6572             0x11332,  // 11332..11333; GRANTHA
6573             0x11334,  // 11334       ; UNKNOWN
6574             0x11335,  // 11335..11339; GRANTHA
6575             0x1133A,  // 1133A       ; UNKNOWN
6576             0x1133B,  // 1133B       ; INHERITED
6577             0x1133C,  // 1133C..11344; GRANTHA
6578             0x11345,  // 11345..11346; UNKNOWN
6579             0x11347,  // 11347..11348; GRANTHA
6580             0x11349,  // 11349..1134A; UNKNOWN
6581             0x1134B,  // 1134B..1134D; GRANTHA
6582             0x1134E,  // 1134E..1134F; UNKNOWN
6583             0x11350,  // 11350       ; GRANTHA
6584             0x11351,  // 11351..11356; UNKNOWN
6585             0x11357,  // 11357       ; GRANTHA
6586             0x11358,  // 11358..1135C; UNKNOWN
6587             0x1135D,  // 1135D..11363; GRANTHA
6588             0x11364,  // 11364..11365; UNKNOWN
6589             0x11366,  // 11366..1136C; GRANTHA
6590             0x1136D,  // 1136D..1136F; UNKNOWN
6591             0x11370,  // 11370..11374; GRANTHA
6592             0x11375,  // 11375..113FF; UNKNOWN
6593             0x11400,  // 11400..1145B; NEWA
6594             0x1145C,  // 1145C       ; UNKNOWN
6595             0x1145D,  // 1145D..11461; NEWA
6596             0x11462,  // 11462..1147F; UNKNOWN
6597             0x11480,  // 11480..114C7; TIRHUTA
6598             0x114C8,  // 114C8..114CF; UNKNOWN
6599             0x114D0,  // 114D0..114D9; TIRHUTA
6600             0x114DA,  // 114DA..1157F; UNKNOWN
6601             0x11580,  // 11580..115B5; SIDDHAM
6602             0x115B6,  // 115B6..115B7; UNKNOWN
6603             0x115B8,  // 115B8..115DD; SIDDHAM
6604             0x115DE,  // 115DE..115FF; UNKNOWN
6605             0x11600,  // 11600..11644; MODI
6606             0x11645,  // 11645..1164F; UNKNOWN
6607             0x11650,  // 11650..11659; MODI
6608             0x1165A,  // 1165A..1165F; UNKNOWN
6609             0x11660,  // 11660..1166C; MONGOLIAN
6610             0x1166D,  // 1166D..1167F; UNKNOWN
6611             0x11680,  // 11680..116B9; TAKRI
6612             0x116BA,  // 116BA..116BF; UNKNOWN
6613             0x116C0,  // 116C0..116C9; TAKRI
6614             0x116CA,  // 116CA..116FF; UNKNOWN
6615             0x11700,  // 11700..1171A; AHOM
6616             0x1171B,  // 1171B..1171C; UNKNOWN
6617             0x1171D,  // 1171D..1172B; AHOM
6618             0x1172C,  // 1172C..1172F; UNKNOWN
6619             0x11730,  // 11730..11746; AHOM
6620             0x11747,  // 11747..117FF; UNKNOWN
6621             0x11800,  // 11800..1183B; DOGRA
6622             0x1183C,  // 1183C..1189F; UNKNOWN
6623             0x118A0,  // 118A0..118F2; WARANG_CITI
6624             0x118F3,  // 118F3..118FE; UNKNOWN
6625             0x118FF,  // 118FF       ; WARANG_CITI
6626             0x11900,  // 11900..11906; DIVES_AKURU
6627             0x11907,  // 11907..11908; UNKNOWN
6628             0x11909,  // 11909       ; DIVES_AKURU
6629             0x1190A,  // 1190A..1190B; UNKNOWN
6630             0x1190C,  // 1190C..11913; DIVES_AKURU
6631             0x11914,  // 11914       ; UNKNOWN
6632             0x11915,  // 11915..11916; DIVES_AKURU
6633             0x11917,  // 11917       ; UNKNOWN
6634             0x11918,  // 11918..11935; DIVES_AKURU
6635             0x11936,  // 11936       ; UNKNOWN
6636             0x11937,  // 11937..11938; DIVES_AKURU
6637             0x11939,  // 11939..1193A; UNKNOWN
6638             0x1193B,  // 1193B..11946; DIVES_AKURU
6639             0x11947,  // 11947..1194F; UNKNOWN
6640             0x11950,  // 11950..11959; DIVES_AKURU
6641             0x1195A,  // 1195A..1199F; UNKNOWN
6642             0x119A0,  // 119A0..119A7; NANDINAGARI
6643             0x119A8,  // 119A8..119A9; UNKNOWN
6644             0x119AA,  // 119AA..119D7; NANDINAGARI
6645             0x119D8,  // 119D8..119D9; UNKNOWN
6646             0x119DA,  // 119DA..119E4; NANDINAGARI
6647             0x119E5,  // 119E5..119FF; UNKNOWN
6648             0x11A00,  // 11A00..11A47; ZANABAZAR_SQUARE
6649             0x11A48,  // 11A48..11A4F; UNKNOWN
6650             0x11A50,  // 11A50..11AA2; SOYOMBO
6651             0x11AA3,  // 11AA3..11AAF; UNKNOWN
6652             0x11AB0,  // 11AB0..11ABF; CANADIAN_ABORIGINAL
6653             0x11AC0,  // 11AC0..11AF8; PAU_CIN_HAU
6654             0x11AF9,  // 11AF9..11AFF; UNKNOWN
6655             0x11B00,  // 11B00..11B09; DEVANAGARI
6656             0x11B0A,  // 11B0A..11BFF; UNKNOWN
6657             0x11C00,  // 11C00..11C08; BHAIKSUKI
6658             0x11C09,  // 11C09       ; UNKNOWN
6659             0x11C0A,  // 11C0A..11C36; BHAIKSUKI
6660             0x11C37,  // 11C37       ; UNKNOWN
6661             0x11C38,  // 11C38..11C45; BHAIKSUKI
6662             0x11C46,  // 11C46..11C4F; UNKNOWN
6663             0x11C50,  // 11C50..11C6C; BHAIKSUKI
6664             0x11C6D,  // 11C6D..11C6F; UNKNOWN
6665             0x11C70,  // 11C70..11C8F; MARCHEN
6666             0x11C90,  // 11C90..11C91; UNKNOWN
6667             0x11C92,  // 11C92..11CA7; MARCHEN
6668             0x11CA8,  // 11CA8       ; UNKNOWN
6669             0x11CA9,  // 11CA9..11CB6; MARCHEN
6670             0x11CB7,  // 11CB7..11CFF; UNKNOWN
6671             0x11D00,  // 11D00..11D06; MASARAM_GONDI
6672             0x11D07,  // 11D07       ; UNKNOWN
6673             0x11D08,  // 11D08..11D09; MASARAM_GONDI
6674             0x11D0A,  // 11D0A       ; UNKNOWN
6675             0x11D0B,  // 11D0B..11D36; MASARAM_GONDI
6676             0x11D37,  // 11D37..11D39; UNKNOWN
6677             0x11D3A,  // 11D3A       ; MASARAM_GONDI
6678             0x11D3B,  // 11D3B       ; UNKNOWN
6679             0x11D3C,  // 11D3C..11D3D; MASARAM_GONDI
6680             0x11D3E,  // 11D3E       ; UNKNOWN
6681             0x11D3F,  // 11D3F..11D47; MASARAM_GONDI
6682             0x11D48,  // 11D48..11D4F; UNKNOWN
6683             0x11D50,  // 11D50..11D59; MASARAM_GONDI
6684             0x11D5A,  // 11D5A..11D5F; UNKNOWN
6685             0x11D60,  // 11D60..11D65; GUNJALA_GONDI
6686             0x11D66,  // 11D66       ; UNKNOWN
6687             0x11D67,  // 11D67..11D68; GUNJALA_GONDI
6688             0x11D69,  // 11D69       ; UNKNOWN
6689             0x11D6A,  // 11D6A..11D8E; GUNJALA_GONDI
6690             0x11D8F,  // 11D8F       ; UNKNOWN
6691             0x11D90,  // 11D90..11D91; GUNJALA_GONDI
6692             0x11D92,  // 11D92       ; UNKNOWN
6693             0x11D93,  // 11D93..11D98; GUNJALA_GONDI
6694             0x11D99,  // 11D99..11D9F; UNKNOWN
6695             0x11DA0,  // 11DA0..11DA9; GUNJALA_GONDI
6696             0x11DAA,  // 11DAA..11EDF; UNKNOWN
6697             0x11EE0,  // 11EE0..11EF8; MAKASAR
6698             0x11EF9,  // 11EF9..11EFF; UNKNOWN
6699             0x11F00,  // 11F00..11F10; KAWI
6700             0x11F11,  // 11F11       ; UNKNOWN
6701             0x11F12,  // 11F12..11F3A; KAWI
6702             0x11F3B,  // 11F3B..11F3D; UNKNOWN
6703             0x11F3E,  // 11F3E..11F59; KAWI
6704             0x11F5A,  // 11F5A..11FAF; UNKNOWN
6705             0x11FB0,  // 11FB0       ; LISU
6706             0x11FB1,  // 11FB1..11FBF; UNKNOWN
6707             0x11FC0,  // 11FC0..11FF1; TAMIL
6708             0x11FF2,  // 11FF2..11FFE; UNKNOWN
6709             0x11FFF,  // 11FFF       ; TAMIL
6710             0x12000,  // 12000..12399; CUNEIFORM
6711             0x1239A,  // 1239A..123FF; UNKNOWN
6712             0x12400,  // 12400..1246E; CUNEIFORM
6713             0x1246F,  // 1246F       ; UNKNOWN
6714             0x12470,  // 12470..12474; CUNEIFORM
6715             0x12475,  // 12475..1247F; UNKNOWN
6716             0x12480,  // 12480..12543; CUNEIFORM
6717             0x12544,  // 12544..12F8F; UNKNOWN
6718             0x12F90,  // 12F90..12FF2; CYPRO_MINOAN
6719             0x12FF3,  // 12FF3..12FFF; UNKNOWN
6720             0x13000,  // 13000..13455; EGYPTIAN_HIEROGLYPHS
6721             0x13456,  // 13456..143FF; UNKNOWN
6722             0x14400,  // 14400..14646; ANATOLIAN_HIEROGLYPHS
6723             0x14647,  // 14647..167FF; UNKNOWN
6724             0x16800,  // 16800..16A38; BAMUM
6725             0x16A39,  // 16A39..16A3F; UNKNOWN
6726             0x16A40,  // 16A40..16A5E; MRO
6727             0x16A5F,  // 16A5F       ; UNKNOWN
6728             0x16A60,  // 16A60..16A69; MRO
6729             0x16A6A,  // 16A6A..16A6D; UNKNOWN
6730             0x16A6E,  // 16A6E..16A6F; MRO
6731             0x16A70,  // 16A70..16ABE; TANGSA
6732             0x16ABF,  // 16ABF       ; UNKNOWN
6733             0x16AC0,  // 16AC0..16AC9; TANGSA
6734             0x16ACA,  // 16ACA..16ACF; UNKNOWN
6735             0x16AD0,  // 16AD0..16AED; BASSA_VAH
6736             0x16AEE,  // 16AEE..16AEF; UNKNOWN
6737             0x16AF0,  // 16AF0..16AF5; BASSA_VAH
6738             0x16AF6,  // 16AF6..16AFF; UNKNOWN
6739             0x16B00,  // 16B00..16B45; PAHAWH_HMONG
6740             0x16B46,  // 16B46..16B4F; UNKNOWN
6741             0x16B50,  // 16B50..16B59; PAHAWH_HMONG
6742             0x16B5A,  // 16B5A       ; UNKNOWN
6743             0x16B5B,  // 16B5B..16B61; PAHAWH_HMONG
6744             0x16B62,  // 16B62       ; UNKNOWN
6745             0x16B63,  // 16B63..16B77; PAHAWH_HMONG
6746             0x16B78,  // 16B78..16B7C; UNKNOWN
6747             0x16B7D,  // 16B7D..16B8F; PAHAWH_HMONG
6748             0x16B90,  // 16B90..16E3F; UNKNOWN
6749             0x16E40,  // 16E40..16E9A; MEDEFAIDRIN
6750             0x16E9B,  // 16E9B..16EFF; UNKNOWN
6751             0x16F00,  // 16F00..16F4A; MIAO
6752             0x16F4B,  // 16F4B..16F4E; UNKNOWN
6753             0x16F4F,  // 16F4F..16F87; MIAO
6754             0x16F88,  // 16F88..16F8E; UNKNOWN
6755             0x16F8F,  // 16F8F..16F9F; MIAO
6756             0x16FA0,  // 16FA0..16FDF; UNKNOWN
6757             0x16FE0,  // 16FE0       ; TANGUT
6758             0x16FE1,  // 16FE1       ; NUSHU
6759             0x16FE2,  // 16FE2..16FE3; HAN
6760             0x16FE4,  // 16FE4       ; KHITAN_SMALL_SCRIPT
6761             0x16FE5,  // 16FE5..16FEF; UNKNOWN
6762             0x16FF0,  // 16FF0..16FF1; HAN
6763             0x16FF2,  // 16FF2..16FFF; UNKNOWN
6764             0x17000,  // 17000..187F7; TANGUT
6765             0x187F8,  // 187F8..187FF; UNKNOWN
6766             0x18800,  // 18800..18AFF; TANGUT
6767             0x18B00,  // 18B00..18CD5; KHITAN_SMALL_SCRIPT
6768             0x18CD6,  // 18CD6..18CFF; UNKNOWN
6769             0x18D00,  // 18D00..18D08; TANGUT
6770             0x18D09,  // 18D09..1AFEF; UNKNOWN
6771             0x1AFF0,  // 1AFF0..1AFF3; KATAKANA
6772             0x1AFF4,  // 1AFF4       ; UNKNOWN
6773             0x1AFF5,  // 1AFF5..1AFFB; KATAKANA
6774             0x1AFFC,  // 1AFFC       ; UNKNOWN
6775             0x1AFFD,  // 1AFFD..1AFFE; KATAKANA
6776             0x1AFFF,  // 1AFFF       ; UNKNOWN
6777             0x1B000,  // 1B000       ; KATAKANA
6778             0x1B001,  // 1B001..1B11F; HIRAGANA
6779             0x1B120,  // 1B120..1B122; KATAKANA
6780             0x1B123,  // 1B123..1B131; UNKNOWN
6781             0x1B132,  // 1B132       ; HIRAGANA
6782             0x1B133,  // 1B133..1B14F; UNKNOWN
6783             0x1B150,  // 1B150..1B152; HIRAGANA
6784             0x1B153,  // 1B153..1B154; UNKNOWN
6785             0x1B155,  // 1B155       ; KATAKANA
6786             0x1B156,  // 1B156..1B163; UNKNOWN
6787             0x1B164,  // 1B164..1B167; KATAKANA
6788             0x1B168,  // 1B168..1B16F; UNKNOWN
6789             0x1B170,  // 1B170..1B2FB; NUSHU
6790             0x1B2FC,  // 1B2FC..1BBFF; UNKNOWN
6791             0x1BC00,  // 1BC00..1BC6A; DUPLOYAN
6792             0x1BC6B,  // 1BC6B..1BC6F; UNKNOWN
6793             0x1BC70,  // 1BC70..1BC7C; DUPLOYAN
6794             0x1BC7D,  // 1BC7D..1BC7F; UNKNOWN
6795             0x1BC80,  // 1BC80..1BC88; DUPLOYAN
6796             0x1BC89,  // 1BC89..1BC8F; UNKNOWN
6797             0x1BC90,  // 1BC90..1BC99; DUPLOYAN
6798             0x1BC9A,  // 1BC9A..1BC9B; UNKNOWN
6799             0x1BC9C,  // 1BC9C..1BC9F; DUPLOYAN
6800             0x1BCA0,  // 1BCA0..1BCA3; COMMON
6801             0x1BCA4,  // 1BCA4..1CEFF; UNKNOWN
6802             0x1CF00,  // 1CF00..1CF2D; INHERITED
6803             0x1CF2E,  // 1CF2E..1CF2F; UNKNOWN
6804             0x1CF30,  // 1CF30..1CF46; INHERITED
6805             0x1CF47,  // 1CF47..1CF4F; UNKNOWN
6806             0x1CF50,  // 1CF50..1CFC3; COMMON
6807             0x1CFC4,  // 1CFC4..1CFFF; UNKNOWN
6808             0x1D000,  // 1D000..1D0F5; COMMON
6809             0x1D0F6,  // 1D0F6..1D0FF; UNKNOWN
6810             0x1D100,  // 1D100..1D126; COMMON
6811             0x1D127,  // 1D127..1D128; UNKNOWN
6812             0x1D129,  // 1D129..1D166; COMMON
6813             0x1D167,  // 1D167..1D169; INHERITED
6814             0x1D16A,  // 1D16A..1D17A; COMMON
6815             0x1D17B,  // 1D17B..1D182; INHERITED
6816             0x1D183,  // 1D183..1D184; COMMON
6817             0x1D185,  // 1D185..1D18B; INHERITED
6818             0x1D18C,  // 1D18C..1D1A9; COMMON
6819             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
6820             0x1D1AE,  // 1D1AE..1D1EA; COMMON
6821             0x1D1EB,  // 1D1EB..1D1FF; UNKNOWN
6822             0x1D200,  // 1D200..1D245; GREEK
6823             0x1D246,  // 1D246..1D2BF; UNKNOWN
6824             0x1D2C0,  // 1D2C0..1D2D3; COMMON
6825             0x1D2D4,  // 1D2D4..1D2DF; UNKNOWN
6826             0x1D2E0,  // 1D2E0..1D2F3; COMMON
6827             0x1D2F4,  // 1D2F4..1D2FF; UNKNOWN
6828             0x1D300,  // 1D300..1D356; COMMON
6829             0x1D357,  // 1D357..1D35F; UNKNOWN
6830             0x1D360,  // 1D360..1D378; COMMON
6831             0x1D379,  // 1D379..1D3FF; UNKNOWN
6832             0x1D400,  // 1D400..1D454; COMMON
6833             0x1D455,  // 1D455       ; UNKNOWN
6834             0x1D456,  // 1D456..1D49C; COMMON
6835             0x1D49D,  // 1D49D       ; UNKNOWN
6836             0x1D49E,  // 1D49E..1D49F; COMMON
6837             0x1D4A0,  // 1D4A0..1D4A1; UNKNOWN
6838             0x1D4A2,  // 1D4A2       ; COMMON
6839             0x1D4A3,  // 1D4A3..1D4A4; UNKNOWN
6840             0x1D4A5,  // 1D4A5..1D4A6; COMMON
6841             0x1D4A7,  // 1D4A7..1D4A8; UNKNOWN
6842             0x1D4A9,  // 1D4A9..1D4AC; COMMON
6843             0x1D4AD,  // 1D4AD       ; UNKNOWN
6844             0x1D4AE,  // 1D4AE..1D4B9; COMMON
6845             0x1D4BA,  // 1D4BA       ; UNKNOWN
6846             0x1D4BB,  // 1D4BB       ; COMMON
6847             0x1D4BC,  // 1D4BC       ; UNKNOWN
6848             0x1D4BD,  // 1D4BD..1D4C3; COMMON
6849             0x1D4C4,  // 1D4C4       ; UNKNOWN
6850             0x1D4C5,  // 1D4C5..1D505; COMMON
6851             0x1D506,  // 1D506       ; UNKNOWN
6852             0x1D507,  // 1D507..1D50A; COMMON
6853             0x1D50B,  // 1D50B..1D50C; UNKNOWN
6854             0x1D50D,  // 1D50D..1D514; COMMON
6855             0x1D515,  // 1D515       ; UNKNOWN
6856             0x1D516,  // 1D516..1D51C; COMMON
6857             0x1D51D,  // 1D51D       ; UNKNOWN
6858             0x1D51E,  // 1D51E..1D539; COMMON
6859             0x1D53A,  // 1D53A       ; UNKNOWN
6860             0x1D53B,  // 1D53B..1D53E; COMMON
6861             0x1D53F,  // 1D53F       ; UNKNOWN
6862             0x1D540,  // 1D540..1D544; COMMON
6863             0x1D545,  // 1D545       ; UNKNOWN
6864             0x1D546,  // 1D546       ; COMMON
6865             0x1D547,  // 1D547..1D549; UNKNOWN
6866             0x1D54A,  // 1D54A..1D550; COMMON
6867             0x1D551,  // 1D551       ; UNKNOWN
6868             0x1D552,  // 1D552..1D6A5; COMMON
6869             0x1D6A6,  // 1D6A6..1D6A7; UNKNOWN
6870             0x1D6A8,  // 1D6A8..1D7CB; COMMON
6871             0x1D7CC,  // 1D7CC..1D7CD; UNKNOWN
6872             0x1D7CE,  // 1D7CE..1D7FF; COMMON
6873             0x1D800,  // 1D800..1DA8B; SIGNWRITING
6874             0x1DA8C,  // 1DA8C..1DA9A; UNKNOWN
6875             0x1DA9B,  // 1DA9B..1DA9F; SIGNWRITING
6876             0x1DAA0,  // 1DAA0       ; UNKNOWN
6877             0x1DAA1,  // 1DAA1..1DAAF; SIGNWRITING
6878             0x1DAB0,  // 1DAB0..1DEFF; UNKNOWN
6879             0x1DF00,  // 1DF00..1DF1E; LATIN
6880             0x1DF1F,  // 1DF1F..1DF24; UNKNOWN
6881             0x1DF25,  // 1DF25..1DF2A; LATIN
6882             0x1DF2B,  // 1DF2B..1DFFF; UNKNOWN
6883             0x1E000,  // 1E000..1E006; GLAGOLITIC
6884             0x1E007,  // 1E007       ; UNKNOWN
6885             0x1E008,  // 1E008..1E018; GLAGOLITIC
6886             0x1E019,  // 1E019..1E01A; UNKNOWN
6887             0x1E01B,  // 1E01B..1E021; GLAGOLITIC
6888             0x1E022,  // 1E022       ; UNKNOWN
6889             0x1E023,  // 1E023..1E024; GLAGOLITIC
6890             0x1E025,  // 1E025       ; UNKNOWN
6891             0x1E026,  // 1E026..1E02A; GLAGOLITIC
6892             0x1E02B,  // 1E02B..1E02F; UNKNOWN
6893             0x1E030,  // 1E030..1E06D; CYRILLIC
6894             0x1E06E,  // 1E06E..1E08E; UNKNOWN
6895             0x1E08F,  // 1E08F       ; CYRILLIC
6896             0x1E090,  // 1E090..1E0FF; UNKNOWN
6897             0x1E100,  // 1E100..1E12C; NYIAKENG_PUACHUE_HMONG
6898             0x1E12D,  // 1E12D..1E12F; UNKNOWN
6899             0x1E130,  // 1E130..1E13D; NYIAKENG_PUACHUE_HMONG
6900             0x1E13E,  // 1E13E..1E13F; UNKNOWN
6901             0x1E140,  // 1E140..1E149; NYIAKENG_PUACHUE_HMONG
6902             0x1E14A,  // 1E14A..1E14D; UNKNOWN
6903             0x1E14E,  // 1E14E..1E14F; NYIAKENG_PUACHUE_HMONG
6904             0x1E150,  // 1E150..1E28F; UNKNOWN
6905             0x1E290,  // 1E290..1E2AE; TOTO
6906             0x1E2AF,  // 1E2AF..1E2BF; UNKNOWN
6907             0x1E2C0,  // 1E2C0..1E2F9; WANCHO
6908             0x1E2FA,  // 1E2FA..1E2FE; UNKNOWN
6909             0x1E2FF,  // 1E2FF       ; WANCHO
6910             0x1E300,  // 1E300..1E4CF; UNKNOWN
6911             0x1E4D0,  // 1E4D0..1E4F9; NAG_MUNDARI
6912             0x1E4FA,  // 1E4FA..1E7DF; UNKNOWN
6913             0x1E7E0,  // 1E7E0..1E7E6; ETHIOPIC
6914             0x1E7E7,  // 1E7E7       ; UNKNOWN
6915             0x1E7E8,  // 1E7E8..1E7EB; ETHIOPIC
6916             0x1E7EC,  // 1E7EC       ; UNKNOWN
6917             0x1E7ED,  // 1E7ED..1E7EE; ETHIOPIC
6918             0x1E7EF,  // 1E7EF       ; UNKNOWN
6919             0x1E7F0,  // 1E7F0..1E7FE; ETHIOPIC
6920             0x1E7FF,  // 1E7FF       ; UNKNOWN
6921             0x1E800,  // 1E800..1E8C4; MENDE_KIKAKUI
6922             0x1E8C5,  // 1E8C5..1E8C6; UNKNOWN
6923             0x1E8C7,  // 1E8C7..1E8D6; MENDE_KIKAKUI
6924             0x1E8D7,  // 1E8D7..1E8FF; UNKNOWN
6925             0x1E900,  // 1E900..1E94B; ADLAM
6926             0x1E94C,  // 1E94C..1E94F; UNKNOWN
6927             0x1E950,  // 1E950..1E959; ADLAM
6928             0x1E95A,  // 1E95A..1E95D; UNKNOWN
6929             0x1E95E,  // 1E95E..1E95F; ADLAM
6930             0x1E960,  // 1E960..1EC70; UNKNOWN
6931             0x1EC71,  // 1EC71..1ECB4; COMMON
6932             0x1ECB5,  // 1ECB5..1ED00; UNKNOWN
6933             0x1ED01,  // 1ED01..1ED3D; COMMON
6934             0x1ED3E,  // 1ED3E..1EDFF; UNKNOWN
6935             0x1EE00,  // 1EE00..1EE03; ARABIC
6936             0x1EE04,  // 1EE04       ; UNKNOWN
6937             0x1EE05,  // 1EE05..1EE1F; ARABIC
6938             0x1EE20,  // 1EE20       ; UNKNOWN
6939             0x1EE21,  // 1EE21..1EE22; ARABIC
6940             0x1EE23,  // 1EE23       ; UNKNOWN
6941             0x1EE24,  // 1EE24       ; ARABIC
6942             0x1EE25,  // 1EE25..1EE26; UNKNOWN
6943             0x1EE27,  // 1EE27       ; ARABIC
6944             0x1EE28,  // 1EE28       ; UNKNOWN
6945             0x1EE29,  // 1EE29..1EE32; ARABIC
6946             0x1EE33,  // 1EE33       ; UNKNOWN
6947             0x1EE34,  // 1EE34..1EE37; ARABIC
6948             0x1EE38,  // 1EE38       ; UNKNOWN
6949             0x1EE39,  // 1EE39       ; ARABIC
6950             0x1EE3A,  // 1EE3A       ; UNKNOWN
6951             0x1EE3B,  // 1EE3B       ; ARABIC
6952             0x1EE3C,  // 1EE3C..1EE41; UNKNOWN
6953             0x1EE42,  // 1EE42       ; ARABIC
6954             0x1EE43,  // 1EE43..1EE46; UNKNOWN
6955             0x1EE47,  // 1EE47       ; ARABIC
6956             0x1EE48,  // 1EE48       ; UNKNOWN
6957             0x1EE49,  // 1EE49       ; ARABIC
6958             0x1EE4A,  // 1EE4A       ; UNKNOWN
6959             0x1EE4B,  // 1EE4B       ; ARABIC
6960             0x1EE4C,  // 1EE4C       ; UNKNOWN
6961             0x1EE4D,  // 1EE4D..1EE4F; ARABIC
6962             0x1EE50,  // 1EE50       ; UNKNOWN
6963             0x1EE51,  // 1EE51..1EE52; ARABIC
6964             0x1EE53,  // 1EE53       ; UNKNOWN
6965             0x1EE54,  // 1EE54       ; ARABIC
6966             0x1EE55,  // 1EE55..1EE56; UNKNOWN
6967             0x1EE57,  // 1EE57       ; ARABIC
6968             0x1EE58,  // 1EE58       ; UNKNOWN
6969             0x1EE59,  // 1EE59       ; ARABIC
6970             0x1EE5A,  // 1EE5A       ; UNKNOWN
6971             0x1EE5B,  // 1EE5B       ; ARABIC
6972             0x1EE5C,  // 1EE5C       ; UNKNOWN
6973             0x1EE5D,  // 1EE5D       ; ARABIC
6974             0x1EE5E,  // 1EE5E       ; UNKNOWN
6975             0x1EE5F,  // 1EE5F       ; ARABIC
6976             0x1EE60,  // 1EE60       ; UNKNOWN
6977             0x1EE61,  // 1EE61..1EE62; ARABIC
6978             0x1EE63,  // 1EE63       ; UNKNOWN
6979             0x1EE64,  // 1EE64       ; ARABIC
6980             0x1EE65,  // 1EE65..1EE66; UNKNOWN
6981             0x1EE67,  // 1EE67..1EE6A; ARABIC
6982             0x1EE6B,  // 1EE6B       ; UNKNOWN
6983             0x1EE6C,  // 1EE6C..1EE72; ARABIC
6984             0x1EE73,  // 1EE73       ; UNKNOWN
6985             0x1EE74,  // 1EE74..1EE77; ARABIC
6986             0x1EE78,  // 1EE78       ; UNKNOWN
6987             0x1EE79,  // 1EE79..1EE7C; ARABIC
6988             0x1EE7D,  // 1EE7D       ; UNKNOWN
6989             0x1EE7E,  // 1EE7E       ; ARABIC
6990             0x1EE7F,  // 1EE7F       ; UNKNOWN
6991             0x1EE80,  // 1EE80..1EE89; ARABIC
6992             0x1EE8A,  // 1EE8A       ; UNKNOWN
6993             0x1EE8B,  // 1EE8B..1EE9B; ARABIC
6994             0x1EE9C,  // 1EE9C..1EEA0; UNKNOWN
6995             0x1EEA1,  // 1EEA1..1EEA3; ARABIC
6996             0x1EEA4,  // 1EEA4       ; UNKNOWN
6997             0x1EEA5,  // 1EEA5..1EEA9; ARABIC
6998             0x1EEAA,  // 1EEAA       ; UNKNOWN
6999             0x1EEAB,  // 1EEAB..1EEBB; ARABIC
7000             0x1EEBC,  // 1EEBC..1EEEF; UNKNOWN
7001             0x1EEF0,  // 1EEF0..1EEF1; ARABIC
7002             0x1EEF2,  // 1EEF2..1EFFF; UNKNOWN
7003             0x1F000,  // 1F000..1F02B; COMMON
7004             0x1F02C,  // 1F02C..1F02F; UNKNOWN
7005             0x1F030,  // 1F030..1F093; COMMON
7006             0x1F094,  // 1F094..1F09F; UNKNOWN
7007             0x1F0A0,  // 1F0A0..1F0AE; COMMON
7008             0x1F0AF,  // 1F0AF..1F0B0; UNKNOWN
7009             0x1F0B1,  // 1F0B1..1F0BF; COMMON
7010             0x1F0C0,  // 1F0C0       ; UNKNOWN
7011             0x1F0C1,  // 1F0C1..1F0CF; COMMON
7012             0x1F0D0,  // 1F0D0       ; UNKNOWN
7013             0x1F0D1,  // 1F0D1..1F0F5; COMMON
7014             0x1F0F6,  // 1F0F6..1F0FF; UNKNOWN
7015             0x1F100,  // 1F100..1F1AD; COMMON
7016             0x1F1AE,  // 1F1AE..1F1E5; UNKNOWN
7017             0x1F1E6,  // 1F1E6..1F1FF; COMMON
7018             0x1F200,  // 1F200       ; HIRAGANA
7019             0x1F201,  // 1F201..1F202; COMMON
7020             0x1F203,  // 1F203..1F20F; UNKNOWN
7021             0x1F210,  // 1F210..1F23B; COMMON
7022             0x1F23C,  // 1F23C..1F23F; UNKNOWN
7023             0x1F240,  // 1F240..1F248; COMMON
7024             0x1F249,  // 1F249..1F24F; UNKNOWN
7025             0x1F250,  // 1F250..1F251; COMMON
7026             0x1F252,  // 1F252..1F25F; UNKNOWN
7027             0x1F260,  // 1F260..1F265; COMMON
7028             0x1F266,  // 1F266..1F2FF; UNKNOWN
7029             0x1F300,  // 1F300..1F6D7; COMMON
7030             0x1F6D8,  // 1F6D8..1F6DB; UNKNOWN
7031             0x1F6DC,  // 1F6DC..1F6EC; COMMON
7032             0x1F6ED,  // 1F6ED..1F6EF; UNKNOWN
7033             0x1F6F0,  // 1F6F0..1F6FC; COMMON
7034             0x1F6FD,  // 1F6FD..1F6FF; UNKNOWN
7035             0x1F700,  // 1F700..1F776; COMMON
7036             0x1F777,  // 1F777..1F77A; UNKNOWN
7037             0x1F77B,  // 1F77B..1F7D9; COMMON
7038             0x1F7DA,  // 1F7DA..1F7DF; UNKNOWN
7039             0x1F7E0,  // 1F7E0..1F7EB; COMMON
7040             0x1F7EC,  // 1F7EC..1F7EF; UNKNOWN
7041             0x1F7F0,  // 1F7F0       ; COMMON
7042             0x1F7F1,  // 1F7F1..1F7FF; UNKNOWN
7043             0x1F800,  // 1F800..1F80B; COMMON
7044             0x1F80C,  // 1F80C..1F80F; UNKNOWN
7045             0x1F810,  // 1F810..1F847; COMMON
7046             0x1F848,  // 1F848..1F84F; UNKNOWN
7047             0x1F850,  // 1F850..1F859; COMMON
7048             0x1F85A,  // 1F85A..1F85F; UNKNOWN
7049             0x1F860,  // 1F860..1F887; COMMON
7050             0x1F888,  // 1F888..1F88F; UNKNOWN
7051             0x1F890,  // 1F890..1F8AD; COMMON
7052             0x1F8AE,  // 1F8AE..1F8AF; UNKNOWN
7053             0x1F8B0,  // 1F8B0..1F8B1; COMMON
7054             0x1F8B2,  // 1F8B2..1F8FF; UNKNOWN
7055             0x1F900,  // 1F900..1FA53; COMMON
7056             0x1FA54,  // 1FA54..1FA5F; UNKNOWN
7057             0x1FA60,  // 1FA60..1FA6D; COMMON
7058             0x1FA6E,  // 1FA6E..1FA6F; UNKNOWN
7059             0x1FA70,  // 1FA70..1FA7C; COMMON
7060             0x1FA7D,  // 1FA7D..1FA7F; UNKNOWN
7061             0x1FA80,  // 1FA80..1FA88; COMMON
7062             0x1FA89,  // 1FA89..1FA8F; UNKNOWN
7063             0x1FA90,  // 1FA90..1FABD; COMMON
7064             0x1FABE,  // 1FABE       ; UNKNOWN
7065             0x1FABF,  // 1FABF..1FAC5; COMMON
7066             0x1FAC6,  // 1FAC6..1FACD; UNKNOWN
7067             0x1FACE,  // 1FACE..1FADB; COMMON
7068             0x1FADC,  // 1FADC..1FADF; UNKNOWN
7069             0x1FAE0,  // 1FAE0..1FAE8; COMMON
7070             0x1FAE9,  // 1FAE9..1FAEF; UNKNOWN
7071             0x1FAF0,  // 1FAF0..1FAF8; COMMON
7072             0x1FAF9,  // 1FAF9..1FAFF; UNKNOWN
7073             0x1FB00,  // 1FB00..1FB92; COMMON
7074             0x1FB93,  // 1FB93       ; UNKNOWN
7075             0x1FB94,  // 1FB94..1FBCA; COMMON
7076             0x1FBCB,  // 1FBCB..1FBEF; UNKNOWN
7077             0x1FBF0,  // 1FBF0..1FBF9; COMMON
7078             0x1FBFA,  // 1FBFA..1FFFF; UNKNOWN
7079             0x20000,  // 20000..2A6DF; HAN
7080             0x2A6E0,  // 2A6E0..2A6FF; UNKNOWN
7081             0x2A700,  // 2A700..2B739; HAN
7082             0x2B73A,  // 2B73A..2B73F; UNKNOWN
7083             0x2B740,  // 2B740..2B81D; HAN
7084             0x2B81E,  // 2B81E..2B81F; UNKNOWN
7085             0x2B820,  // 2B820..2CEA1; HAN
7086             0x2CEA2,  // 2CEA2..2CEAF; UNKNOWN
7087             0x2CEB0,  // 2CEB0..2EBE0; HAN
7088             0x2EBE1,  // 2EBE1..2F7FF; UNKNOWN
7089             0x2F800,  // 2F800..2FA1D; HAN
7090             0x2FA1E,  // 2FA1E..2FFFF; UNKNOWN
7091             0x30000,  // 30000..3134A; HAN
7092             0x3134B,  // 3134B..3134F; UNKNOWN
7093             0x31350,  // 31350..323AF; HAN
7094             0x323B0,  // 323B0..E0000; UNKNOWN
7095             0xE0001,  // E0001       ; COMMON
7096             0xE0002,  // E0002..E001F; UNKNOWN
7097             0xE0020,  // E0020..E007F; COMMON
7098             0xE0080,  // E0080..E00FF; UNKNOWN
7099             0xE0100,  // E0100..E01EF; INHERITED
7100             0xE01F0,  // E01F0..10FFFF; UNKNOWN
7101         };
7102 
7103         private static final UnicodeScript[] scripts = {
7104             COMMON,                   // 0000..0040
7105             LATIN,                    // 0041..005A
7106             COMMON,                   // 005B..0060
7107             LATIN,                    // 0061..007A
7108             COMMON,                   // 007B..00A9
7109             LATIN,                    // 00AA
7110             COMMON,                   // 00AB..00B9
7111             LATIN,                    // 00BA
7112             COMMON,                   // 00BB..00BF
7113             LATIN,                    // 00C0..00D6
7114             COMMON,                   // 00D7
7115             LATIN,                    // 00D8..00F6
7116             COMMON,                   // 00F7
7117             LATIN,                    // 00F8..02B8
7118             COMMON,                   // 02B9..02DF
7119             LATIN,                    // 02E0..02E4
7120             COMMON,                   // 02E5..02E9
7121             BOPOMOFO,                 // 02EA..02EB
7122             COMMON,                   // 02EC..02FF
7123             INHERITED,                // 0300..036F
7124             GREEK,                    // 0370..0373
7125             COMMON,                   // 0374
7126             GREEK,                    // 0375..0377
7127             UNKNOWN,                  // 0378..0379
7128             GREEK,                    // 037A..037D
7129             COMMON,                   // 037E
7130             GREEK,                    // 037F
7131             UNKNOWN,                  // 0380..0383
7132             GREEK,                    // 0384
7133             COMMON,                   // 0385
7134             GREEK,                    // 0386
7135             COMMON,                   // 0387
7136             GREEK,                    // 0388..038A
7137             UNKNOWN,                  // 038B
7138             GREEK,                    // 038C
7139             UNKNOWN,                  // 038D
7140             GREEK,                    // 038E..03A1
7141             UNKNOWN,                  // 03A2
7142             GREEK,                    // 03A3..03E1
7143             COPTIC,                   // 03E2..03EF
7144             GREEK,                    // 03F0..03FF
7145             CYRILLIC,                 // 0400..0484
7146             INHERITED,                // 0485..0486
7147             CYRILLIC,                 // 0487..052F
7148             UNKNOWN,                  // 0530
7149             ARMENIAN,                 // 0531..0556
7150             UNKNOWN,                  // 0557..0558
7151             ARMENIAN,                 // 0559..058A
7152             UNKNOWN,                  // 058B..058C
7153             ARMENIAN,                 // 058D..058F
7154             UNKNOWN,                  // 0590
7155             HEBREW,                   // 0591..05C7
7156             UNKNOWN,                  // 05C8..05CF
7157             HEBREW,                   // 05D0..05EA
7158             UNKNOWN,                  // 05EB..05EE
7159             HEBREW,                   // 05EF..05F4
7160             UNKNOWN,                  // 05F5..05FF
7161             ARABIC,                   // 0600..0604
7162             COMMON,                   // 0605
7163             ARABIC,                   // 0606..060B
7164             COMMON,                   // 060C
7165             ARABIC,                   // 060D..061A
7166             COMMON,                   // 061B
7167             ARABIC,                   // 061C..061E
7168             COMMON,                   // 061F
7169             ARABIC,                   // 0620..063F
7170             COMMON,                   // 0640
7171             ARABIC,                   // 0641..064A
7172             INHERITED,                // 064B..0655
7173             ARABIC,                   // 0656..066F
7174             INHERITED,                // 0670
7175             ARABIC,                   // 0671..06DC
7176             COMMON,                   // 06DD
7177             ARABIC,                   // 06DE..06FF
7178             SYRIAC,                   // 0700..070D
7179             UNKNOWN,                  // 070E
7180             SYRIAC,                   // 070F..074A
7181             UNKNOWN,                  // 074B..074C
7182             SYRIAC,                   // 074D..074F
7183             ARABIC,                   // 0750..077F
7184             THAANA,                   // 0780..07B1
7185             UNKNOWN,                  // 07B2..07BF
7186             NKO,                      // 07C0..07FA
7187             UNKNOWN,                  // 07FB..07FC
7188             NKO,                      // 07FD..07FF
7189             SAMARITAN,                // 0800..082D
7190             UNKNOWN,                  // 082E..082F
7191             SAMARITAN,                // 0830..083E
7192             UNKNOWN,                  // 083F
7193             MANDAIC,                  // 0840..085B
7194             UNKNOWN,                  // 085C..085D
7195             MANDAIC,                  // 085E
7196             UNKNOWN,                  // 085F
7197             SYRIAC,                   // 0860..086A
7198             UNKNOWN,                  // 086B..086F
7199             ARABIC,                   // 0870..088E
7200             UNKNOWN,                  // 088F
7201             ARABIC,                   // 0890..0891
7202             UNKNOWN,                  // 0892..0897
7203             ARABIC,                   // 0898..08E1
7204             COMMON,                   // 08E2
7205             ARABIC,                   // 08E3..08FF
7206             DEVANAGARI,               // 0900..0950
7207             INHERITED,                // 0951..0954
7208             DEVANAGARI,               // 0955..0963
7209             COMMON,                   // 0964..0965
7210             DEVANAGARI,               // 0966..097F
7211             BENGALI,                  // 0980..0983
7212             UNKNOWN,                  // 0984
7213             BENGALI,                  // 0985..098C
7214             UNKNOWN,                  // 098D..098E
7215             BENGALI,                  // 098F..0990
7216             UNKNOWN,                  // 0991..0992
7217             BENGALI,                  // 0993..09A8
7218             UNKNOWN,                  // 09A9
7219             BENGALI,                  // 09AA..09B0
7220             UNKNOWN,                  // 09B1
7221             BENGALI,                  // 09B2
7222             UNKNOWN,                  // 09B3..09B5
7223             BENGALI,                  // 09B6..09B9
7224             UNKNOWN,                  // 09BA..09BB
7225             BENGALI,                  // 09BC..09C4
7226             UNKNOWN,                  // 09C5..09C6
7227             BENGALI,                  // 09C7..09C8
7228             UNKNOWN,                  // 09C9..09CA
7229             BENGALI,                  // 09CB..09CE
7230             UNKNOWN,                  // 09CF..09D6
7231             BENGALI,                  // 09D7
7232             UNKNOWN,                  // 09D8..09DB
7233             BENGALI,                  // 09DC..09DD
7234             UNKNOWN,                  // 09DE
7235             BENGALI,                  // 09DF..09E3
7236             UNKNOWN,                  // 09E4..09E5
7237             BENGALI,                  // 09E6..09FE
7238             UNKNOWN,                  // 09FF..0A00
7239             GURMUKHI,                 // 0A01..0A03
7240             UNKNOWN,                  // 0A04
7241             GURMUKHI,                 // 0A05..0A0A
7242             UNKNOWN,                  // 0A0B..0A0E
7243             GURMUKHI,                 // 0A0F..0A10
7244             UNKNOWN,                  // 0A11..0A12
7245             GURMUKHI,                 // 0A13..0A28
7246             UNKNOWN,                  // 0A29
7247             GURMUKHI,                 // 0A2A..0A30
7248             UNKNOWN,                  // 0A31
7249             GURMUKHI,                 // 0A32..0A33
7250             UNKNOWN,                  // 0A34
7251             GURMUKHI,                 // 0A35..0A36
7252             UNKNOWN,                  // 0A37
7253             GURMUKHI,                 // 0A38..0A39
7254             UNKNOWN,                  // 0A3A..0A3B
7255             GURMUKHI,                 // 0A3C
7256             UNKNOWN,                  // 0A3D
7257             GURMUKHI,                 // 0A3E..0A42
7258             UNKNOWN,                  // 0A43..0A46
7259             GURMUKHI,                 // 0A47..0A48
7260             UNKNOWN,                  // 0A49..0A4A
7261             GURMUKHI,                 // 0A4B..0A4D
7262             UNKNOWN,                  // 0A4E..0A50
7263             GURMUKHI,                 // 0A51
7264             UNKNOWN,                  // 0A52..0A58
7265             GURMUKHI,                 // 0A59..0A5C
7266             UNKNOWN,                  // 0A5D
7267             GURMUKHI,                 // 0A5E
7268             UNKNOWN,                  // 0A5F..0A65
7269             GURMUKHI,                 // 0A66..0A76
7270             UNKNOWN,                  // 0A77..0A80
7271             GUJARATI,                 // 0A81..0A83
7272             UNKNOWN,                  // 0A84
7273             GUJARATI,                 // 0A85..0A8D
7274             UNKNOWN,                  // 0A8E
7275             GUJARATI,                 // 0A8F..0A91
7276             UNKNOWN,                  // 0A92
7277             GUJARATI,                 // 0A93..0AA8
7278             UNKNOWN,                  // 0AA9
7279             GUJARATI,                 // 0AAA..0AB0
7280             UNKNOWN,                  // 0AB1
7281             GUJARATI,                 // 0AB2..0AB3
7282             UNKNOWN,                  // 0AB4
7283             GUJARATI,                 // 0AB5..0AB9
7284             UNKNOWN,                  // 0ABA..0ABB
7285             GUJARATI,                 // 0ABC..0AC5
7286             UNKNOWN,                  // 0AC6
7287             GUJARATI,                 // 0AC7..0AC9
7288             UNKNOWN,                  // 0ACA
7289             GUJARATI,                 // 0ACB..0ACD
7290             UNKNOWN,                  // 0ACE..0ACF
7291             GUJARATI,                 // 0AD0
7292             UNKNOWN,                  // 0AD1..0ADF
7293             GUJARATI,                 // 0AE0..0AE3
7294             UNKNOWN,                  // 0AE4..0AE5
7295             GUJARATI,                 // 0AE6..0AF1
7296             UNKNOWN,                  // 0AF2..0AF8
7297             GUJARATI,                 // 0AF9..0AFF
7298             UNKNOWN,                  // 0B00
7299             ORIYA,                    // 0B01..0B03
7300             UNKNOWN,                  // 0B04
7301             ORIYA,                    // 0B05..0B0C
7302             UNKNOWN,                  // 0B0D..0B0E
7303             ORIYA,                    // 0B0F..0B10
7304             UNKNOWN,                  // 0B11..0B12
7305             ORIYA,                    // 0B13..0B28
7306             UNKNOWN,                  // 0B29
7307             ORIYA,                    // 0B2A..0B30
7308             UNKNOWN,                  // 0B31
7309             ORIYA,                    // 0B32..0B33
7310             UNKNOWN,                  // 0B34
7311             ORIYA,                    // 0B35..0B39
7312             UNKNOWN,                  // 0B3A..0B3B
7313             ORIYA,                    // 0B3C..0B44
7314             UNKNOWN,                  // 0B45..0B46
7315             ORIYA,                    // 0B47..0B48
7316             UNKNOWN,                  // 0B49..0B4A
7317             ORIYA,                    // 0B4B..0B4D
7318             UNKNOWN,                  // 0B4E..0B54
7319             ORIYA,                    // 0B55..0B57
7320             UNKNOWN,                  // 0B58..0B5B
7321             ORIYA,                    // 0B5C..0B5D
7322             UNKNOWN,                  // 0B5E
7323             ORIYA,                    // 0B5F..0B63
7324             UNKNOWN,                  // 0B64..0B65
7325             ORIYA,                    // 0B66..0B77
7326             UNKNOWN,                  // 0B78..0B81
7327             TAMIL,                    // 0B82..0B83
7328             UNKNOWN,                  // 0B84
7329             TAMIL,                    // 0B85..0B8A
7330             UNKNOWN,                  // 0B8B..0B8D
7331             TAMIL,                    // 0B8E..0B90
7332             UNKNOWN,                  // 0B91
7333             TAMIL,                    // 0B92..0B95
7334             UNKNOWN,                  // 0B96..0B98
7335             TAMIL,                    // 0B99..0B9A
7336             UNKNOWN,                  // 0B9B
7337             TAMIL,                    // 0B9C
7338             UNKNOWN,                  // 0B9D
7339             TAMIL,                    // 0B9E..0B9F
7340             UNKNOWN,                  // 0BA0..0BA2
7341             TAMIL,                    // 0BA3..0BA4
7342             UNKNOWN,                  // 0BA5..0BA7
7343             TAMIL,                    // 0BA8..0BAA
7344             UNKNOWN,                  // 0BAB..0BAD
7345             TAMIL,                    // 0BAE..0BB9
7346             UNKNOWN,                  // 0BBA..0BBD
7347             TAMIL,                    // 0BBE..0BC2
7348             UNKNOWN,                  // 0BC3..0BC5
7349             TAMIL,                    // 0BC6..0BC8
7350             UNKNOWN,                  // 0BC9
7351             TAMIL,                    // 0BCA..0BCD
7352             UNKNOWN,                  // 0BCE..0BCF
7353             TAMIL,                    // 0BD0
7354             UNKNOWN,                  // 0BD1..0BD6
7355             TAMIL,                    // 0BD7
7356             UNKNOWN,                  // 0BD8..0BE5
7357             TAMIL,                    // 0BE6..0BFA
7358             UNKNOWN,                  // 0BFB..0BFF
7359             TELUGU,                   // 0C00..0C0C
7360             UNKNOWN,                  // 0C0D
7361             TELUGU,                   // 0C0E..0C10
7362             UNKNOWN,                  // 0C11
7363             TELUGU,                   // 0C12..0C28
7364             UNKNOWN,                  // 0C29
7365             TELUGU,                   // 0C2A..0C39
7366             UNKNOWN,                  // 0C3A..0C3B
7367             TELUGU,                   // 0C3C..0C44
7368             UNKNOWN,                  // 0C45
7369             TELUGU,                   // 0C46..0C48
7370             UNKNOWN,                  // 0C49
7371             TELUGU,                   // 0C4A..0C4D
7372             UNKNOWN,                  // 0C4E..0C54
7373             TELUGU,                   // 0C55..0C56
7374             UNKNOWN,                  // 0C57
7375             TELUGU,                   // 0C58..0C5A
7376             UNKNOWN,                  // 0C5B..0C5C
7377             TELUGU,                   // 0C5D
7378             UNKNOWN,                  // 0C5E..0C5F
7379             TELUGU,                   // 0C60..0C63
7380             UNKNOWN,                  // 0C64..0C65
7381             TELUGU,                   // 0C66..0C6F
7382             UNKNOWN,                  // 0C70..0C76
7383             TELUGU,                   // 0C77..0C7F
7384             KANNADA,                  // 0C80..0C8C
7385             UNKNOWN,                  // 0C8D
7386             KANNADA,                  // 0C8E..0C90
7387             UNKNOWN,                  // 0C91
7388             KANNADA,                  // 0C92..0CA8
7389             UNKNOWN,                  // 0CA9
7390             KANNADA,                  // 0CAA..0CB3
7391             UNKNOWN,                  // 0CB4
7392             KANNADA,                  // 0CB5..0CB9
7393             UNKNOWN,                  // 0CBA..0CBB
7394             KANNADA,                  // 0CBC..0CC4
7395             UNKNOWN,                  // 0CC5
7396             KANNADA,                  // 0CC6..0CC8
7397             UNKNOWN,                  // 0CC9
7398             KANNADA,                  // 0CCA..0CCD
7399             UNKNOWN,                  // 0CCE..0CD4
7400             KANNADA,                  // 0CD5..0CD6
7401             UNKNOWN,                  // 0CD7..0CDC
7402             KANNADA,                  // 0CDD..0CDE
7403             UNKNOWN,                  // 0CDF
7404             KANNADA,                  // 0CE0..0CE3
7405             UNKNOWN,                  // 0CE4..0CE5
7406             KANNADA,                  // 0CE6..0CEF
7407             UNKNOWN,                  // 0CF0
7408             KANNADA,                  // 0CF1..0CF3
7409             UNKNOWN,                  // 0CF4..0CFF
7410             MALAYALAM,                // 0D00..0D0C
7411             UNKNOWN,                  // 0D0D
7412             MALAYALAM,                // 0D0E..0D10
7413             UNKNOWN,                  // 0D11
7414             MALAYALAM,                // 0D12..0D44
7415             UNKNOWN,                  // 0D45
7416             MALAYALAM,                // 0D46..0D48
7417             UNKNOWN,                  // 0D49
7418             MALAYALAM,                // 0D4A..0D4F
7419             UNKNOWN,                  // 0D50..0D53
7420             MALAYALAM,                // 0D54..0D63
7421             UNKNOWN,                  // 0D64..0D65
7422             MALAYALAM,                // 0D66..0D7F
7423             UNKNOWN,                  // 0D80
7424             SINHALA,                  // 0D81..0D83
7425             UNKNOWN,                  // 0D84
7426             SINHALA,                  // 0D85..0D96
7427             UNKNOWN,                  // 0D97..0D99
7428             SINHALA,                  // 0D9A..0DB1
7429             UNKNOWN,                  // 0DB2
7430             SINHALA,                  // 0DB3..0DBB
7431             UNKNOWN,                  // 0DBC
7432             SINHALA,                  // 0DBD
7433             UNKNOWN,                  // 0DBE..0DBF
7434             SINHALA,                  // 0DC0..0DC6
7435             UNKNOWN,                  // 0DC7..0DC9
7436             SINHALA,                  // 0DCA
7437             UNKNOWN,                  // 0DCB..0DCE
7438             SINHALA,                  // 0DCF..0DD4
7439             UNKNOWN,                  // 0DD5
7440             SINHALA,                  // 0DD6
7441             UNKNOWN,                  // 0DD7
7442             SINHALA,                  // 0DD8..0DDF
7443             UNKNOWN,                  // 0DE0..0DE5
7444             SINHALA,                  // 0DE6..0DEF
7445             UNKNOWN,                  // 0DF0..0DF1
7446             SINHALA,                  // 0DF2..0DF4
7447             UNKNOWN,                  // 0DF5..0E00
7448             THAI,                     // 0E01..0E3A
7449             UNKNOWN,                  // 0E3B..0E3E
7450             COMMON,                   // 0E3F
7451             THAI,                     // 0E40..0E5B
7452             UNKNOWN,                  // 0E5C..0E80
7453             LAO,                      // 0E81..0E82
7454             UNKNOWN,                  // 0E83
7455             LAO,                      // 0E84
7456             UNKNOWN,                  // 0E85
7457             LAO,                      // 0E86..0E8A
7458             UNKNOWN,                  // 0E8B
7459             LAO,                      // 0E8C..0EA3
7460             UNKNOWN,                  // 0EA4
7461             LAO,                      // 0EA5
7462             UNKNOWN,                  // 0EA6
7463             LAO,                      // 0EA7..0EBD
7464             UNKNOWN,                  // 0EBE..0EBF
7465             LAO,                      // 0EC0..0EC4
7466             UNKNOWN,                  // 0EC5
7467             LAO,                      // 0EC6
7468             UNKNOWN,                  // 0EC7
7469             LAO,                      // 0EC8..0ECE
7470             UNKNOWN,                  // 0ECF
7471             LAO,                      // 0ED0..0ED9
7472             UNKNOWN,                  // 0EDA..0EDB
7473             LAO,                      // 0EDC..0EDF
7474             UNKNOWN,                  // 0EE0..0EFF
7475             TIBETAN,                  // 0F00..0F47
7476             UNKNOWN,                  // 0F48
7477             TIBETAN,                  // 0F49..0F6C
7478             UNKNOWN,                  // 0F6D..0F70
7479             TIBETAN,                  // 0F71..0F97
7480             UNKNOWN,                  // 0F98
7481             TIBETAN,                  // 0F99..0FBC
7482             UNKNOWN,                  // 0FBD
7483             TIBETAN,                  // 0FBE..0FCC
7484             UNKNOWN,                  // 0FCD
7485             TIBETAN,                  // 0FCE..0FD4
7486             COMMON,                   // 0FD5..0FD8
7487             TIBETAN,                  // 0FD9..0FDA
7488             UNKNOWN,                  // 0FDB..0FFF
7489             MYANMAR,                  // 1000..109F
7490             GEORGIAN,                 // 10A0..10C5
7491             UNKNOWN,                  // 10C6
7492             GEORGIAN,                 // 10C7
7493             UNKNOWN,                  // 10C8..10CC
7494             GEORGIAN,                 // 10CD
7495             UNKNOWN,                  // 10CE..10CF
7496             GEORGIAN,                 // 10D0..10FA
7497             COMMON,                   // 10FB
7498             GEORGIAN,                 // 10FC..10FF
7499             HANGUL,                   // 1100..11FF
7500             ETHIOPIC,                 // 1200..1248
7501             UNKNOWN,                  // 1249
7502             ETHIOPIC,                 // 124A..124D
7503             UNKNOWN,                  // 124E..124F
7504             ETHIOPIC,                 // 1250..1256
7505             UNKNOWN,                  // 1257
7506             ETHIOPIC,                 // 1258
7507             UNKNOWN,                  // 1259
7508             ETHIOPIC,                 // 125A..125D
7509             UNKNOWN,                  // 125E..125F
7510             ETHIOPIC,                 // 1260..1288
7511             UNKNOWN,                  // 1289
7512             ETHIOPIC,                 // 128A..128D
7513             UNKNOWN,                  // 128E..128F
7514             ETHIOPIC,                 // 1290..12B0
7515             UNKNOWN,                  // 12B1
7516             ETHIOPIC,                 // 12B2..12B5
7517             UNKNOWN,                  // 12B6..12B7
7518             ETHIOPIC,                 // 12B8..12BE
7519             UNKNOWN,                  // 12BF
7520             ETHIOPIC,                 // 12C0
7521             UNKNOWN,                  // 12C1
7522             ETHIOPIC,                 // 12C2..12C5
7523             UNKNOWN,                  // 12C6..12C7
7524             ETHIOPIC,                 // 12C8..12D6
7525             UNKNOWN,                  // 12D7
7526             ETHIOPIC,                 // 12D8..1310
7527             UNKNOWN,                  // 1311
7528             ETHIOPIC,                 // 1312..1315
7529             UNKNOWN,                  // 1316..1317
7530             ETHIOPIC,                 // 1318..135A
7531             UNKNOWN,                  // 135B..135C
7532             ETHIOPIC,                 // 135D..137C
7533             UNKNOWN,                  // 137D..137F
7534             ETHIOPIC,                 // 1380..1399
7535             UNKNOWN,                  // 139A..139F
7536             CHEROKEE,                 // 13A0..13F5
7537             UNKNOWN,                  // 13F6..13F7
7538             CHEROKEE,                 // 13F8..13FD
7539             UNKNOWN,                  // 13FE..13FF
7540             CANADIAN_ABORIGINAL,      // 1400..167F
7541             OGHAM,                    // 1680..169C
7542             UNKNOWN,                  // 169D..169F
7543             RUNIC,                    // 16A0..16EA
7544             COMMON,                   // 16EB..16ED
7545             RUNIC,                    // 16EE..16F8
7546             UNKNOWN,                  // 16F9..16FF
7547             TAGALOG,                  // 1700..1715
7548             UNKNOWN,                  // 1716..171E
7549             TAGALOG,                  // 171F
7550             HANUNOO,                  // 1720..1734
7551             COMMON,                   // 1735..1736
7552             UNKNOWN,                  // 1737..173F
7553             BUHID,                    // 1740..1753
7554             UNKNOWN,                  // 1754..175F
7555             TAGBANWA,                 // 1760..176C
7556             UNKNOWN,                  // 176D
7557             TAGBANWA,                 // 176E..1770
7558             UNKNOWN,                  // 1771
7559             TAGBANWA,                 // 1772..1773
7560             UNKNOWN,                  // 1774..177F
7561             KHMER,                    // 1780..17DD
7562             UNKNOWN,                  // 17DE..17DF
7563             KHMER,                    // 17E0..17E9
7564             UNKNOWN,                  // 17EA..17EF
7565             KHMER,                    // 17F0..17F9
7566             UNKNOWN,                  // 17FA..17FF
7567             MONGOLIAN,                // 1800..1801
7568             COMMON,                   // 1802..1803
7569             MONGOLIAN,                // 1804
7570             COMMON,                   // 1805
7571             MONGOLIAN,                // 1806..1819
7572             UNKNOWN,                  // 181A..181F
7573             MONGOLIAN,                // 1820..1878
7574             UNKNOWN,                  // 1879..187F
7575             MONGOLIAN,                // 1880..18AA
7576             UNKNOWN,                  // 18AB..18AF
7577             CANADIAN_ABORIGINAL,      // 18B0..18F5
7578             UNKNOWN,                  // 18F6..18FF
7579             LIMBU,                    // 1900..191E
7580             UNKNOWN,                  // 191F
7581             LIMBU,                    // 1920..192B
7582             UNKNOWN,                  // 192C..192F
7583             LIMBU,                    // 1930..193B
7584             UNKNOWN,                  // 193C..193F
7585             LIMBU,                    // 1940
7586             UNKNOWN,                  // 1941..1943
7587             LIMBU,                    // 1944..194F
7588             TAI_LE,                   // 1950..196D
7589             UNKNOWN,                  // 196E..196F
7590             TAI_LE,                   // 1970..1974
7591             UNKNOWN,                  // 1975..197F
7592             NEW_TAI_LUE,              // 1980..19AB
7593             UNKNOWN,                  // 19AC..19AF
7594             NEW_TAI_LUE,              // 19B0..19C9
7595             UNKNOWN,                  // 19CA..19CF
7596             NEW_TAI_LUE,              // 19D0..19DA
7597             UNKNOWN,                  // 19DB..19DD
7598             NEW_TAI_LUE,              // 19DE..19DF
7599             KHMER,                    // 19E0..19FF
7600             BUGINESE,                 // 1A00..1A1B
7601             UNKNOWN,                  // 1A1C..1A1D
7602             BUGINESE,                 // 1A1E..1A1F
7603             TAI_THAM,                 // 1A20..1A5E
7604             UNKNOWN,                  // 1A5F
7605             TAI_THAM,                 // 1A60..1A7C
7606             UNKNOWN,                  // 1A7D..1A7E
7607             TAI_THAM,                 // 1A7F..1A89
7608             UNKNOWN,                  // 1A8A..1A8F
7609             TAI_THAM,                 // 1A90..1A99
7610             UNKNOWN,                  // 1A9A..1A9F
7611             TAI_THAM,                 // 1AA0..1AAD
7612             UNKNOWN,                  // 1AAE..1AAF
7613             INHERITED,                // 1AB0..1ACE
7614             UNKNOWN,                  // 1ACF..1AFF
7615             BALINESE,                 // 1B00..1B4C
7616             UNKNOWN,                  // 1B4D..1B4F
7617             BALINESE,                 // 1B50..1B7E
7618             UNKNOWN,                  // 1B7F
7619             SUNDANESE,                // 1B80..1BBF
7620             BATAK,                    // 1BC0..1BF3
7621             UNKNOWN,                  // 1BF4..1BFB
7622             BATAK,                    // 1BFC..1BFF
7623             LEPCHA,                   // 1C00..1C37
7624             UNKNOWN,                  // 1C38..1C3A
7625             LEPCHA,                   // 1C3B..1C49
7626             UNKNOWN,                  // 1C4A..1C4C
7627             LEPCHA,                   // 1C4D..1C4F
7628             OL_CHIKI,                 // 1C50..1C7F
7629             CYRILLIC,                 // 1C80..1C88
7630             UNKNOWN,                  // 1C89..1C8F
7631             GEORGIAN,                 // 1C90..1CBA
7632             UNKNOWN,                  // 1CBB..1CBC
7633             GEORGIAN,                 // 1CBD..1CBF
7634             SUNDANESE,                // 1CC0..1CC7
7635             UNKNOWN,                  // 1CC8..1CCF
7636             INHERITED,                // 1CD0..1CD2
7637             COMMON,                   // 1CD3
7638             INHERITED,                // 1CD4..1CE0
7639             COMMON,                   // 1CE1
7640             INHERITED,                // 1CE2..1CE8
7641             COMMON,                   // 1CE9..1CEC
7642             INHERITED,                // 1CED
7643             COMMON,                   // 1CEE..1CF3
7644             INHERITED,                // 1CF4
7645             COMMON,                   // 1CF5..1CF7
7646             INHERITED,                // 1CF8..1CF9
7647             COMMON,                   // 1CFA
7648             UNKNOWN,                  // 1CFB..1CFF
7649             LATIN,                    // 1D00..1D25
7650             GREEK,                    // 1D26..1D2A
7651             CYRILLIC,                 // 1D2B
7652             LATIN,                    // 1D2C..1D5C
7653             GREEK,                    // 1D5D..1D61
7654             LATIN,                    // 1D62..1D65
7655             GREEK,                    // 1D66..1D6A
7656             LATIN,                    // 1D6B..1D77
7657             CYRILLIC,                 // 1D78
7658             LATIN,                    // 1D79..1DBE
7659             GREEK,                    // 1DBF
7660             INHERITED,                // 1DC0..1DFF
7661             LATIN,                    // 1E00..1EFF
7662             GREEK,                    // 1F00..1F15
7663             UNKNOWN,                  // 1F16..1F17
7664             GREEK,                    // 1F18..1F1D
7665             UNKNOWN,                  // 1F1E..1F1F
7666             GREEK,                    // 1F20..1F45
7667             UNKNOWN,                  // 1F46..1F47
7668             GREEK,                    // 1F48..1F4D
7669             UNKNOWN,                  // 1F4E..1F4F
7670             GREEK,                    // 1F50..1F57
7671             UNKNOWN,                  // 1F58
7672             GREEK,                    // 1F59
7673             UNKNOWN,                  // 1F5A
7674             GREEK,                    // 1F5B
7675             UNKNOWN,                  // 1F5C
7676             GREEK,                    // 1F5D
7677             UNKNOWN,                  // 1F5E
7678             GREEK,                    // 1F5F..1F7D
7679             UNKNOWN,                  // 1F7E..1F7F
7680             GREEK,                    // 1F80..1FB4
7681             UNKNOWN,                  // 1FB5
7682             GREEK,                    // 1FB6..1FC4
7683             UNKNOWN,                  // 1FC5
7684             GREEK,                    // 1FC6..1FD3
7685             UNKNOWN,                  // 1FD4..1FD5
7686             GREEK,                    // 1FD6..1FDB
7687             UNKNOWN,                  // 1FDC
7688             GREEK,                    // 1FDD..1FEF
7689             UNKNOWN,                  // 1FF0..1FF1
7690             GREEK,                    // 1FF2..1FF4
7691             UNKNOWN,                  // 1FF5
7692             GREEK,                    // 1FF6..1FFE
7693             UNKNOWN,                  // 1FFF
7694             COMMON,                   // 2000..200B
7695             INHERITED,                // 200C..200D
7696             COMMON,                   // 200E..2064
7697             UNKNOWN,                  // 2065
7698             COMMON,                   // 2066..2070
7699             LATIN,                    // 2071
7700             UNKNOWN,                  // 2072..2073
7701             COMMON,                   // 2074..207E
7702             LATIN,                    // 207F
7703             COMMON,                   // 2080..208E
7704             UNKNOWN,                  // 208F
7705             LATIN,                    // 2090..209C
7706             UNKNOWN,                  // 209D..209F
7707             COMMON,                   // 20A0..20C0
7708             UNKNOWN,                  // 20C1..20CF
7709             INHERITED,                // 20D0..20F0
7710             UNKNOWN,                  // 20F1..20FF
7711             COMMON,                   // 2100..2125
7712             GREEK,                    // 2126
7713             COMMON,                   // 2127..2129
7714             LATIN,                    // 212A..212B
7715             COMMON,                   // 212C..2131
7716             LATIN,                    // 2132
7717             COMMON,                   // 2133..214D
7718             LATIN,                    // 214E
7719             COMMON,                   // 214F..215F
7720             LATIN,                    // 2160..2188
7721             COMMON,                   // 2189..218B
7722             UNKNOWN,                  // 218C..218F
7723             COMMON,                   // 2190..2426
7724             UNKNOWN,                  // 2427..243F
7725             COMMON,                   // 2440..244A
7726             UNKNOWN,                  // 244B..245F
7727             COMMON,                   // 2460..27FF
7728             BRAILLE,                  // 2800..28FF
7729             COMMON,                   // 2900..2B73
7730             UNKNOWN,                  // 2B74..2B75
7731             COMMON,                   // 2B76..2B95
7732             UNKNOWN,                  // 2B96
7733             COMMON,                   // 2B97..2BFF
7734             GLAGOLITIC,               // 2C00..2C5F
7735             LATIN,                    // 2C60..2C7F
7736             COPTIC,                   // 2C80..2CF3
7737             UNKNOWN,                  // 2CF4..2CF8
7738             COPTIC,                   // 2CF9..2CFF
7739             GEORGIAN,                 // 2D00..2D25
7740             UNKNOWN,                  // 2D26
7741             GEORGIAN,                 // 2D27
7742             UNKNOWN,                  // 2D28..2D2C
7743             GEORGIAN,                 // 2D2D
7744             UNKNOWN,                  // 2D2E..2D2F
7745             TIFINAGH,                 // 2D30..2D67
7746             UNKNOWN,                  // 2D68..2D6E
7747             TIFINAGH,                 // 2D6F..2D70
7748             UNKNOWN,                  // 2D71..2D7E
7749             TIFINAGH,                 // 2D7F
7750             ETHIOPIC,                 // 2D80..2D96
7751             UNKNOWN,                  // 2D97..2D9F
7752             ETHIOPIC,                 // 2DA0..2DA6
7753             UNKNOWN,                  // 2DA7
7754             ETHIOPIC,                 // 2DA8..2DAE
7755             UNKNOWN,                  // 2DAF
7756             ETHIOPIC,                 // 2DB0..2DB6
7757             UNKNOWN,                  // 2DB7
7758             ETHIOPIC,                 // 2DB8..2DBE
7759             UNKNOWN,                  // 2DBF
7760             ETHIOPIC,                 // 2DC0..2DC6
7761             UNKNOWN,                  // 2DC7
7762             ETHIOPIC,                 // 2DC8..2DCE
7763             UNKNOWN,                  // 2DCF
7764             ETHIOPIC,                 // 2DD0..2DD6
7765             UNKNOWN,                  // 2DD7
7766             ETHIOPIC,                 // 2DD8..2DDE
7767             UNKNOWN,                  // 2DDF
7768             CYRILLIC,                 // 2DE0..2DFF
7769             COMMON,                   // 2E00..2E5D
7770             UNKNOWN,                  // 2E5E..2E7F
7771             HAN,                      // 2E80..2E99
7772             UNKNOWN,                  // 2E9A
7773             HAN,                      // 2E9B..2EF3
7774             UNKNOWN,                  // 2EF4..2EFF
7775             HAN,                      // 2F00..2FD5
7776             UNKNOWN,                  // 2FD6..2FEF
7777             COMMON,                   // 2FF0..2FFB
7778             UNKNOWN,                  // 2FFC..2FFF
7779             COMMON,                   // 3000..3004
7780             HAN,                      // 3005
7781             COMMON,                   // 3006
7782             HAN,                      // 3007
7783             COMMON,                   // 3008..3020
7784             HAN,                      // 3021..3029
7785             INHERITED,                // 302A..302D
7786             HANGUL,                   // 302E..302F
7787             COMMON,                   // 3030..3037
7788             HAN,                      // 3038..303B
7789             COMMON,                   // 303C..303F
7790             UNKNOWN,                  // 3040
7791             HIRAGANA,                 // 3041..3096
7792             UNKNOWN,                  // 3097..3098
7793             INHERITED,                // 3099..309A
7794             COMMON,                   // 309B..309C
7795             HIRAGANA,                 // 309D..309F
7796             COMMON,                   // 30A0
7797             KATAKANA,                 // 30A1..30FA
7798             COMMON,                   // 30FB..30FC
7799             KATAKANA,                 // 30FD..30FF
7800             UNKNOWN,                  // 3100..3104
7801             BOPOMOFO,                 // 3105..312F
7802             UNKNOWN,                  // 3130
7803             HANGUL,                   // 3131..318E
7804             UNKNOWN,                  // 318F
7805             COMMON,                   // 3190..319F
7806             BOPOMOFO,                 // 31A0..31BF
7807             COMMON,                   // 31C0..31E3
7808             UNKNOWN,                  // 31E4..31EF
7809             KATAKANA,                 // 31F0..31FF
7810             HANGUL,                   // 3200..321E
7811             UNKNOWN,                  // 321F
7812             COMMON,                   // 3220..325F
7813             HANGUL,                   // 3260..327E
7814             COMMON,                   // 327F..32CF
7815             KATAKANA,                 // 32D0..32FE
7816             COMMON,                   // 32FF
7817             KATAKANA,                 // 3300..3357
7818             COMMON,                   // 3358..33FF
7819             HAN,                      // 3400..4DBF
7820             COMMON,                   // 4DC0..4DFF
7821             HAN,                      // 4E00..9FFF
7822             YI,                       // A000..A48C
7823             UNKNOWN,                  // A48D..A48F
7824             YI,                       // A490..A4C6
7825             UNKNOWN,                  // A4C7..A4CF
7826             LISU,                     // A4D0..A4FF
7827             VAI,                      // A500..A62B
7828             UNKNOWN,                  // A62C..A63F
7829             CYRILLIC,                 // A640..A69F
7830             BAMUM,                    // A6A0..A6F7
7831             UNKNOWN,                  // A6F8..A6FF
7832             COMMON,                   // A700..A721
7833             LATIN,                    // A722..A787
7834             COMMON,                   // A788..A78A
7835             LATIN,                    // A78B..A7CA
7836             UNKNOWN,                  // A7CB..A7CF
7837             LATIN,                    // A7D0..A7D1
7838             UNKNOWN,                  // A7D2
7839             LATIN,                    // A7D3
7840             UNKNOWN,                  // A7D4
7841             LATIN,                    // A7D5..A7D9
7842             UNKNOWN,                  // A7DA..A7F1
7843             LATIN,                    // A7F2..A7FF
7844             SYLOTI_NAGRI,             // A800..A82C
7845             UNKNOWN,                  // A82D..A82F
7846             COMMON,                   // A830..A839
7847             UNKNOWN,                  // A83A..A83F
7848             PHAGS_PA,                 // A840..A877
7849             UNKNOWN,                  // A878..A87F
7850             SAURASHTRA,               // A880..A8C5
7851             UNKNOWN,                  // A8C6..A8CD
7852             SAURASHTRA,               // A8CE..A8D9
7853             UNKNOWN,                  // A8DA..A8DF
7854             DEVANAGARI,               // A8E0..A8FF
7855             KAYAH_LI,                 // A900..A92D
7856             COMMON,                   // A92E
7857             KAYAH_LI,                 // A92F
7858             REJANG,                   // A930..A953
7859             UNKNOWN,                  // A954..A95E
7860             REJANG,                   // A95F
7861             HANGUL,                   // A960..A97C
7862             UNKNOWN,                  // A97D..A97F
7863             JAVANESE,                 // A980..A9CD
7864             UNKNOWN,                  // A9CE
7865             COMMON,                   // A9CF
7866             JAVANESE,                 // A9D0..A9D9
7867             UNKNOWN,                  // A9DA..A9DD
7868             JAVANESE,                 // A9DE..A9DF
7869             MYANMAR,                  // A9E0..A9FE
7870             UNKNOWN,                  // A9FF
7871             CHAM,                     // AA00..AA36
7872             UNKNOWN,                  // AA37..AA3F
7873             CHAM,                     // AA40..AA4D
7874             UNKNOWN,                  // AA4E..AA4F
7875             CHAM,                     // AA50..AA59
7876             UNKNOWN,                  // AA5A..AA5B
7877             CHAM,                     // AA5C..AA5F
7878             MYANMAR,                  // AA60..AA7F
7879             TAI_VIET,                 // AA80..AAC2
7880             UNKNOWN,                  // AAC3..AADA
7881             TAI_VIET,                 // AADB..AADF
7882             MEETEI_MAYEK,             // AAE0..AAF6
7883             UNKNOWN,                  // AAF7..AB00
7884             ETHIOPIC,                 // AB01..AB06
7885             UNKNOWN,                  // AB07..AB08
7886             ETHIOPIC,                 // AB09..AB0E
7887             UNKNOWN,                  // AB0F..AB10
7888             ETHIOPIC,                 // AB11..AB16
7889             UNKNOWN,                  // AB17..AB1F
7890             ETHIOPIC,                 // AB20..AB26
7891             UNKNOWN,                  // AB27
7892             ETHIOPIC,                 // AB28..AB2E
7893             UNKNOWN,                  // AB2F
7894             LATIN,                    // AB30..AB5A
7895             COMMON,                   // AB5B
7896             LATIN,                    // AB5C..AB64
7897             GREEK,                    // AB65
7898             LATIN,                    // AB66..AB69
7899             COMMON,                   // AB6A..AB6B
7900             UNKNOWN,                  // AB6C..AB6F
7901             CHEROKEE,                 // AB70..ABBF
7902             MEETEI_MAYEK,             // ABC0..ABED
7903             UNKNOWN,                  // ABEE..ABEF
7904             MEETEI_MAYEK,             // ABF0..ABF9
7905             UNKNOWN,                  // ABFA..ABFF
7906             HANGUL,                   // AC00..D7A3
7907             UNKNOWN,                  // D7A4..D7AF
7908             HANGUL,                   // D7B0..D7C6
7909             UNKNOWN,                  // D7C7..D7CA
7910             HANGUL,                   // D7CB..D7FB
7911             UNKNOWN,                  // D7FC..F8FF
7912             HAN,                      // F900..FA6D
7913             UNKNOWN,                  // FA6E..FA6F
7914             HAN,                      // FA70..FAD9
7915             UNKNOWN,                  // FADA..FAFF
7916             LATIN,                    // FB00..FB06
7917             UNKNOWN,                  // FB07..FB12
7918             ARMENIAN,                 // FB13..FB17
7919             UNKNOWN,                  // FB18..FB1C
7920             HEBREW,                   // FB1D..FB36
7921             UNKNOWN,                  // FB37
7922             HEBREW,                   // FB38..FB3C
7923             UNKNOWN,                  // FB3D
7924             HEBREW,                   // FB3E
7925             UNKNOWN,                  // FB3F
7926             HEBREW,                   // FB40..FB41
7927             UNKNOWN,                  // FB42
7928             HEBREW,                   // FB43..FB44
7929             UNKNOWN,                  // FB45
7930             HEBREW,                   // FB46..FB4F
7931             ARABIC,                   // FB50..FBC2
7932             UNKNOWN,                  // FBC3..FBD2
7933             ARABIC,                   // FBD3..FD3D
7934             COMMON,                   // FD3E..FD3F
7935             ARABIC,                   // FD40..FD8F
7936             UNKNOWN,                  // FD90..FD91
7937             ARABIC,                   // FD92..FDC7
7938             UNKNOWN,                  // FDC8..FDCE
7939             ARABIC,                   // FDCF
7940             UNKNOWN,                  // FDD0..FDEF
7941             ARABIC,                   // FDF0..FDFF
7942             INHERITED,                // FE00..FE0F
7943             COMMON,                   // FE10..FE19
7944             UNKNOWN,                  // FE1A..FE1F
7945             INHERITED,                // FE20..FE2D
7946             CYRILLIC,                 // FE2E..FE2F
7947             COMMON,                   // FE30..FE52
7948             UNKNOWN,                  // FE53
7949             COMMON,                   // FE54..FE66
7950             UNKNOWN,                  // FE67
7951             COMMON,                   // FE68..FE6B
7952             UNKNOWN,                  // FE6C..FE6F
7953             ARABIC,                   // FE70..FE74
7954             UNKNOWN,                  // FE75
7955             ARABIC,                   // FE76..FEFC
7956             UNKNOWN,                  // FEFD..FEFE
7957             COMMON,                   // FEFF
7958             UNKNOWN,                  // FF00
7959             COMMON,                   // FF01..FF20
7960             LATIN,                    // FF21..FF3A
7961             COMMON,                   // FF3B..FF40
7962             LATIN,                    // FF41..FF5A
7963             COMMON,                   // FF5B..FF65
7964             KATAKANA,                 // FF66..FF6F
7965             COMMON,                   // FF70
7966             KATAKANA,                 // FF71..FF9D
7967             COMMON,                   // FF9E..FF9F
7968             HANGUL,                   // FFA0..FFBE
7969             UNKNOWN,                  // FFBF..FFC1
7970             HANGUL,                   // FFC2..FFC7
7971             UNKNOWN,                  // FFC8..FFC9
7972             HANGUL,                   // FFCA..FFCF
7973             UNKNOWN,                  // FFD0..FFD1
7974             HANGUL,                   // FFD2..FFD7
7975             UNKNOWN,                  // FFD8..FFD9
7976             HANGUL,                   // FFDA..FFDC
7977             UNKNOWN,                  // FFDD..FFDF
7978             COMMON,                   // FFE0..FFE6
7979             UNKNOWN,                  // FFE7
7980             COMMON,                   // FFE8..FFEE
7981             UNKNOWN,                  // FFEF..FFF8
7982             COMMON,                   // FFF9..FFFD
7983             UNKNOWN,                  // FFFE..FFFF
7984             LINEAR_B,                 // 10000..1000B
7985             UNKNOWN,                  // 1000C
7986             LINEAR_B,                 // 1000D..10026
7987             UNKNOWN,                  // 10027
7988             LINEAR_B,                 // 10028..1003A
7989             UNKNOWN,                  // 1003B
7990             LINEAR_B,                 // 1003C..1003D
7991             UNKNOWN,                  // 1003E
7992             LINEAR_B,                 // 1003F..1004D
7993             UNKNOWN,                  // 1004E..1004F
7994             LINEAR_B,                 // 10050..1005D
7995             UNKNOWN,                  // 1005E..1007F
7996             LINEAR_B,                 // 10080..100FA
7997             UNKNOWN,                  // 100FB..100FF
7998             COMMON,                   // 10100..10102
7999             UNKNOWN,                  // 10103..10106
8000             COMMON,                   // 10107..10133
8001             UNKNOWN,                  // 10134..10136
8002             COMMON,                   // 10137..1013F
8003             GREEK,                    // 10140..1018E
8004             UNKNOWN,                  // 1018F
8005             COMMON,                   // 10190..1019C
8006             UNKNOWN,                  // 1019D..1019F
8007             GREEK,                    // 101A0
8008             UNKNOWN,                  // 101A1..101CF
8009             COMMON,                   // 101D0..101FC
8010             INHERITED,                // 101FD
8011             UNKNOWN,                  // 101FE..1027F
8012             LYCIAN,                   // 10280..1029C
8013             UNKNOWN,                  // 1029D..1029F
8014             CARIAN,                   // 102A0..102D0
8015             UNKNOWN,                  // 102D1..102DF
8016             INHERITED,                // 102E0
8017             COMMON,                   // 102E1..102FB
8018             UNKNOWN,                  // 102FC..102FF
8019             OLD_ITALIC,               // 10300..10323
8020             UNKNOWN,                  // 10324..1032C
8021             OLD_ITALIC,               // 1032D..1032F
8022             GOTHIC,                   // 10330..1034A
8023             UNKNOWN,                  // 1034B..1034F
8024             OLD_PERMIC,               // 10350..1037A
8025             UNKNOWN,                  // 1037B..1037F
8026             UGARITIC,                 // 10380..1039D
8027             UNKNOWN,                  // 1039E
8028             UGARITIC,                 // 1039F
8029             OLD_PERSIAN,              // 103A0..103C3
8030             UNKNOWN,                  // 103C4..103C7
8031             OLD_PERSIAN,              // 103C8..103D5
8032             UNKNOWN,                  // 103D6..103FF
8033             DESERET,                  // 10400..1044F
8034             SHAVIAN,                  // 10450..1047F
8035             OSMANYA,                  // 10480..1049D
8036             UNKNOWN,                  // 1049E..1049F
8037             OSMANYA,                  // 104A0..104A9
8038             UNKNOWN,                  // 104AA..104AF
8039             OSAGE,                    // 104B0..104D3
8040             UNKNOWN,                  // 104D4..104D7
8041             OSAGE,                    // 104D8..104FB
8042             UNKNOWN,                  // 104FC..104FF
8043             ELBASAN,                  // 10500..10527
8044             UNKNOWN,                  // 10528..1052F
8045             CAUCASIAN_ALBANIAN,       // 10530..10563
8046             UNKNOWN,                  // 10564..1056E
8047             CAUCASIAN_ALBANIAN,       // 1056F
8048             VITHKUQI,                 // 10570..1057A
8049             UNKNOWN,                  // 1057B
8050             VITHKUQI,                 // 1057C..1058A
8051             UNKNOWN,                  // 1058B
8052             VITHKUQI,                 // 1058C..10592
8053             UNKNOWN,                  // 10593
8054             VITHKUQI,                 // 10594..10595
8055             UNKNOWN,                  // 10596
8056             VITHKUQI,                 // 10597..105A1
8057             UNKNOWN,                  // 105A2
8058             VITHKUQI,                 // 105A3..105B1
8059             UNKNOWN,                  // 105B2
8060             VITHKUQI,                 // 105B3..105B9
8061             UNKNOWN,                  // 105BA
8062             VITHKUQI,                 // 105BB..105BC
8063             UNKNOWN,                  // 105BD..105FF
8064             LINEAR_A,                 // 10600..10736
8065             UNKNOWN,                  // 10737..1073F
8066             LINEAR_A,                 // 10740..10755
8067             UNKNOWN,                  // 10756..1075F
8068             LINEAR_A,                 // 10760..10767
8069             UNKNOWN,                  // 10768..1077F
8070             LATIN,                    // 10780..10785
8071             UNKNOWN,                  // 10786
8072             LATIN,                    // 10787..107B0
8073             UNKNOWN,                  // 107B1
8074             LATIN,                    // 107B2..107BA
8075             UNKNOWN,                  // 107BB..107FF
8076             CYPRIOT,                  // 10800..10805
8077             UNKNOWN,                  // 10806..10807
8078             CYPRIOT,                  // 10808
8079             UNKNOWN,                  // 10809
8080             CYPRIOT,                  // 1080A..10835
8081             UNKNOWN,                  // 10836
8082             CYPRIOT,                  // 10837..10838
8083             UNKNOWN,                  // 10839..1083B
8084             CYPRIOT,                  // 1083C
8085             UNKNOWN,                  // 1083D..1083E
8086             CYPRIOT,                  // 1083F
8087             IMPERIAL_ARAMAIC,         // 10840..10855
8088             UNKNOWN,                  // 10856
8089             IMPERIAL_ARAMAIC,         // 10857..1085F
8090             PALMYRENE,                // 10860..1087F
8091             NABATAEAN,                // 10880..1089E
8092             UNKNOWN,                  // 1089F..108A6
8093             NABATAEAN,                // 108A7..108AF
8094             UNKNOWN,                  // 108B0..108DF
8095             HATRAN,                   // 108E0..108F2
8096             UNKNOWN,                  // 108F3
8097             HATRAN,                   // 108F4..108F5
8098             UNKNOWN,                  // 108F6..108FA
8099             HATRAN,                   // 108FB..108FF
8100             PHOENICIAN,               // 10900..1091B
8101             UNKNOWN,                  // 1091C..1091E
8102             PHOENICIAN,               // 1091F
8103             LYDIAN,                   // 10920..10939
8104             UNKNOWN,                  // 1093A..1093E
8105             LYDIAN,                   // 1093F
8106             UNKNOWN,                  // 10940..1097F
8107             MEROITIC_HIEROGLYPHS,     // 10980..1099F
8108             MEROITIC_CURSIVE,         // 109A0..109B7
8109             UNKNOWN,                  // 109B8..109BB
8110             MEROITIC_CURSIVE,         // 109BC..109CF
8111             UNKNOWN,                  // 109D0..109D1
8112             MEROITIC_CURSIVE,         // 109D2..109FF
8113             KHAROSHTHI,               // 10A00..10A03
8114             UNKNOWN,                  // 10A04
8115             KHAROSHTHI,               // 10A05..10A06
8116             UNKNOWN,                  // 10A07..10A0B
8117             KHAROSHTHI,               // 10A0C..10A13
8118             UNKNOWN,                  // 10A14
8119             KHAROSHTHI,               // 10A15..10A17
8120             UNKNOWN,                  // 10A18
8121             KHAROSHTHI,               // 10A19..10A35
8122             UNKNOWN,                  // 10A36..10A37
8123             KHAROSHTHI,               // 10A38..10A3A
8124             UNKNOWN,                  // 10A3B..10A3E
8125             KHAROSHTHI,               // 10A3F..10A48
8126             UNKNOWN,                  // 10A49..10A4F
8127             KHAROSHTHI,               // 10A50..10A58
8128             UNKNOWN,                  // 10A59..10A5F
8129             OLD_SOUTH_ARABIAN,        // 10A60..10A7F
8130             OLD_NORTH_ARABIAN,        // 10A80..10A9F
8131             UNKNOWN,                  // 10AA0..10ABF
8132             MANICHAEAN,               // 10AC0..10AE6
8133             UNKNOWN,                  // 10AE7..10AEA
8134             MANICHAEAN,               // 10AEB..10AF6
8135             UNKNOWN,                  // 10AF7..10AFF
8136             AVESTAN,                  // 10B00..10B35
8137             UNKNOWN,                  // 10B36..10B38
8138             AVESTAN,                  // 10B39..10B3F
8139             INSCRIPTIONAL_PARTHIAN,   // 10B40..10B55
8140             UNKNOWN,                  // 10B56..10B57
8141             INSCRIPTIONAL_PARTHIAN,   // 10B58..10B5F
8142             INSCRIPTIONAL_PAHLAVI,    // 10B60..10B72
8143             UNKNOWN,                  // 10B73..10B77
8144             INSCRIPTIONAL_PAHLAVI,    // 10B78..10B7F
8145             PSALTER_PAHLAVI,          // 10B80..10B91
8146             UNKNOWN,                  // 10B92..10B98
8147             PSALTER_PAHLAVI,          // 10B99..10B9C
8148             UNKNOWN,                  // 10B9D..10BA8
8149             PSALTER_PAHLAVI,          // 10BA9..10BAF
8150             UNKNOWN,                  // 10BB0..10BFF
8151             OLD_TURKIC,               // 10C00..10C48
8152             UNKNOWN,                  // 10C49..10C7F
8153             OLD_HUNGARIAN,            // 10C80..10CB2
8154             UNKNOWN,                  // 10CB3..10CBF
8155             OLD_HUNGARIAN,            // 10CC0..10CF2
8156             UNKNOWN,                  // 10CF3..10CF9
8157             OLD_HUNGARIAN,            // 10CFA..10CFF
8158             HANIFI_ROHINGYA,          // 10D00..10D27
8159             UNKNOWN,                  // 10D28..10D2F
8160             HANIFI_ROHINGYA,          // 10D30..10D39
8161             UNKNOWN,                  // 10D3A..10E5F
8162             ARABIC,                   // 10E60..10E7E
8163             UNKNOWN,                  // 10E7F
8164             YEZIDI,                   // 10E80..10EA9
8165             UNKNOWN,                  // 10EAA
8166             YEZIDI,                   // 10EAB..10EAD
8167             UNKNOWN,                  // 10EAE..10EAF
8168             YEZIDI,                   // 10EB0..10EB1
8169             UNKNOWN,                  // 10EB2..10EFC
8170             ARABIC,                   // 10EFD..10EFF
8171             OLD_SOGDIAN,              // 10F00..10F27
8172             UNKNOWN,                  // 10F28..10F2F
8173             SOGDIAN,                  // 10F30..10F59
8174             UNKNOWN,                  // 10F5A..10F6F
8175             OLD_UYGHUR,               // 10F70..10F89
8176             UNKNOWN,                  // 10F8A..10FAF
8177             CHORASMIAN,               // 10FB0..10FCB
8178             UNKNOWN,                  // 10FCC..10FDF
8179             ELYMAIC,                  // 10FE0..10FF6
8180             UNKNOWN,                  // 10FF7..10FFF
8181             BRAHMI,                   // 11000..1104D
8182             UNKNOWN,                  // 1104E..11051
8183             BRAHMI,                   // 11052..11075
8184             UNKNOWN,                  // 11076..1107E
8185             BRAHMI,                   // 1107F
8186             KAITHI,                   // 11080..110C2
8187             UNKNOWN,                  // 110C3..110CC
8188             KAITHI,                   // 110CD
8189             UNKNOWN,                  // 110CE..110CF
8190             SORA_SOMPENG,             // 110D0..110E8
8191             UNKNOWN,                  // 110E9..110EF
8192             SORA_SOMPENG,             // 110F0..110F9
8193             UNKNOWN,                  // 110FA..110FF
8194             CHAKMA,                   // 11100..11134
8195             UNKNOWN,                  // 11135
8196             CHAKMA,                   // 11136..11147
8197             UNKNOWN,                  // 11148..1114F
8198             MAHAJANI,                 // 11150..11176
8199             UNKNOWN,                  // 11177..1117F
8200             SHARADA,                  // 11180..111DF
8201             UNKNOWN,                  // 111E0
8202             SINHALA,                  // 111E1..111F4
8203             UNKNOWN,                  // 111F5..111FF
8204             KHOJKI,                   // 11200..11211
8205             UNKNOWN,                  // 11212
8206             KHOJKI,                   // 11213..11241
8207             UNKNOWN,                  // 11242..1127F
8208             MULTANI,                  // 11280..11286
8209             UNKNOWN,                  // 11287
8210             MULTANI,                  // 11288
8211             UNKNOWN,                  // 11289
8212             MULTANI,                  // 1128A..1128D
8213             UNKNOWN,                  // 1128E
8214             MULTANI,                  // 1128F..1129D
8215             UNKNOWN,                  // 1129E
8216             MULTANI,                  // 1129F..112A9
8217             UNKNOWN,                  // 112AA..112AF
8218             KHUDAWADI,                // 112B0..112EA
8219             UNKNOWN,                  // 112EB..112EF
8220             KHUDAWADI,                // 112F0..112F9
8221             UNKNOWN,                  // 112FA..112FF
8222             GRANTHA,                  // 11300..11303
8223             UNKNOWN,                  // 11304
8224             GRANTHA,                  // 11305..1130C
8225             UNKNOWN,                  // 1130D..1130E
8226             GRANTHA,                  // 1130F..11310
8227             UNKNOWN,                  // 11311..11312
8228             GRANTHA,                  // 11313..11328
8229             UNKNOWN,                  // 11329
8230             GRANTHA,                  // 1132A..11330
8231             UNKNOWN,                  // 11331
8232             GRANTHA,                  // 11332..11333
8233             UNKNOWN,                  // 11334
8234             GRANTHA,                  // 11335..11339
8235             UNKNOWN,                  // 1133A
8236             INHERITED,                // 1133B
8237             GRANTHA,                  // 1133C..11344
8238             UNKNOWN,                  // 11345..11346
8239             GRANTHA,                  // 11347..11348
8240             UNKNOWN,                  // 11349..1134A
8241             GRANTHA,                  // 1134B..1134D
8242             UNKNOWN,                  // 1134E..1134F
8243             GRANTHA,                  // 11350
8244             UNKNOWN,                  // 11351..11356
8245             GRANTHA,                  // 11357
8246             UNKNOWN,                  // 11358..1135C
8247             GRANTHA,                  // 1135D..11363
8248             UNKNOWN,                  // 11364..11365
8249             GRANTHA,                  // 11366..1136C
8250             UNKNOWN,                  // 1136D..1136F
8251             GRANTHA,                  // 11370..11374
8252             UNKNOWN,                  // 11375..113FF
8253             NEWA,                     // 11400..1145B
8254             UNKNOWN,                  // 1145C
8255             NEWA,                     // 1145D..11461
8256             UNKNOWN,                  // 11462..1147F
8257             TIRHUTA,                  // 11480..114C7
8258             UNKNOWN,                  // 114C8..114CF
8259             TIRHUTA,                  // 114D0..114D9
8260             UNKNOWN,                  // 114DA..1157F
8261             SIDDHAM,                  // 11580..115B5
8262             UNKNOWN,                  // 115B6..115B7
8263             SIDDHAM,                  // 115B8..115DD
8264             UNKNOWN,                  // 115DE..115FF
8265             MODI,                     // 11600..11644
8266             UNKNOWN,                  // 11645..1164F
8267             MODI,                     // 11650..11659
8268             UNKNOWN,                  // 1165A..1165F
8269             MONGOLIAN,                // 11660..1166C
8270             UNKNOWN,                  // 1166D..1167F
8271             TAKRI,                    // 11680..116B9
8272             UNKNOWN,                  // 116BA..116BF
8273             TAKRI,                    // 116C0..116C9
8274             UNKNOWN,                  // 116CA..116FF
8275             AHOM,                     // 11700..1171A
8276             UNKNOWN,                  // 1171B..1171C
8277             AHOM,                     // 1171D..1172B
8278             UNKNOWN,                  // 1172C..1172F
8279             AHOM,                     // 11730..11746
8280             UNKNOWN,                  // 11747..117FF
8281             DOGRA,                    // 11800..1183B
8282             UNKNOWN,                  // 1183C..1189F
8283             WARANG_CITI,              // 118A0..118F2
8284             UNKNOWN,                  // 118F3..118FE
8285             WARANG_CITI,              // 118FF
8286             DIVES_AKURU,              // 11900..11906
8287             UNKNOWN,                  // 11907..11908
8288             DIVES_AKURU,              // 11909
8289             UNKNOWN,                  // 1190A..1190B
8290             DIVES_AKURU,              // 1190C..11913
8291             UNKNOWN,                  // 11914
8292             DIVES_AKURU,              // 11915..11916
8293             UNKNOWN,                  // 11917
8294             DIVES_AKURU,              // 11918..11935
8295             UNKNOWN,                  // 11936
8296             DIVES_AKURU,              // 11937..11938
8297             UNKNOWN,                  // 11939..1193A
8298             DIVES_AKURU,              // 1193B..11946
8299             UNKNOWN,                  // 11947..1194F
8300             DIVES_AKURU,              // 11950..11959
8301             UNKNOWN,                  // 1195A..1199F
8302             NANDINAGARI,              // 119A0..119A7
8303             UNKNOWN,                  // 119A8..119A9
8304             NANDINAGARI,              // 119AA..119D7
8305             UNKNOWN,                  // 119D8..119D9
8306             NANDINAGARI,              // 119DA..119E4
8307             UNKNOWN,                  // 119E5..119FF
8308             ZANABAZAR_SQUARE,         // 11A00..11A47
8309             UNKNOWN,                  // 11A48..11A4F
8310             SOYOMBO,                  // 11A50..11AA2
8311             UNKNOWN,                  // 11AA3..11AAF
8312             CANADIAN_ABORIGINAL,      // 11AB0..11ABF
8313             PAU_CIN_HAU,              // 11AC0..11AF8
8314             UNKNOWN,                  // 11AF9..11AFF
8315             DEVANAGARI,               // 11B00..11B09
8316             UNKNOWN,                  // 11B0A..11BFF
8317             BHAIKSUKI,                // 11C00..11C08
8318             UNKNOWN,                  // 11C09
8319             BHAIKSUKI,                // 11C0A..11C36
8320             UNKNOWN,                  // 11C37
8321             BHAIKSUKI,                // 11C38..11C45
8322             UNKNOWN,                  // 11C46..11C4F
8323             BHAIKSUKI,                // 11C50..11C6C
8324             UNKNOWN,                  // 11C6D..11C6F
8325             MARCHEN,                  // 11C70..11C8F
8326             UNKNOWN,                  // 11C90..11C91
8327             MARCHEN,                  // 11C92..11CA7
8328             UNKNOWN,                  // 11CA8
8329             MARCHEN,                  // 11CA9..11CB6
8330             UNKNOWN,                  // 11CB7..11CFF
8331             MASARAM_GONDI,            // 11D00..11D06
8332             UNKNOWN,                  // 11D07
8333             MASARAM_GONDI,            // 11D08..11D09
8334             UNKNOWN,                  // 11D0A
8335             MASARAM_GONDI,            // 11D0B..11D36
8336             UNKNOWN,                  // 11D37..11D39
8337             MASARAM_GONDI,            // 11D3A
8338             UNKNOWN,                  // 11D3B
8339             MASARAM_GONDI,            // 11D3C..11D3D
8340             UNKNOWN,                  // 11D3E
8341             MASARAM_GONDI,            // 11D3F..11D47
8342             UNKNOWN,                  // 11D48..11D4F
8343             MASARAM_GONDI,            // 11D50..11D59
8344             UNKNOWN,                  // 11D5A..11D5F
8345             GUNJALA_GONDI,            // 11D60..11D65
8346             UNKNOWN,                  // 11D66
8347             GUNJALA_GONDI,            // 11D67..11D68
8348             UNKNOWN,                  // 11D69
8349             GUNJALA_GONDI,            // 11D6A..11D8E
8350             UNKNOWN,                  // 11D8F
8351             GUNJALA_GONDI,            // 11D90..11D91
8352             UNKNOWN,                  // 11D92
8353             GUNJALA_GONDI,            // 11D93..11D98
8354             UNKNOWN,                  // 11D99..11D9F
8355             GUNJALA_GONDI,            // 11DA0..11DA9
8356             UNKNOWN,                  // 11DAA..11EDF
8357             MAKASAR,                  // 11EE0..11EF8
8358             UNKNOWN,                  // 11EF9..11EFF
8359             KAWI,                     // 11F00..11F10
8360             UNKNOWN,                  // 11F11
8361             KAWI,                     // 11F12..11F3A
8362             UNKNOWN,                  // 11F3B..11F3D
8363             KAWI,                     // 11F3E..11F59
8364             UNKNOWN,                  // 11F5A..11FAF
8365             LISU,                     // 11FB0
8366             UNKNOWN,                  // 11FB1..11FBF
8367             TAMIL,                    // 11FC0..11FF1
8368             UNKNOWN,                  // 11FF2..11FFE
8369             TAMIL,                    // 11FFF
8370             CUNEIFORM,                // 12000..12399
8371             UNKNOWN,                  // 1239A..123FF
8372             CUNEIFORM,                // 12400..1246E
8373             UNKNOWN,                  // 1246F
8374             CUNEIFORM,                // 12470..12474
8375             UNKNOWN,                  // 12475..1247F
8376             CUNEIFORM,                // 12480..12543
8377             UNKNOWN,                  // 12544..12F8F
8378             CYPRO_MINOAN,             // 12F90..12FF2
8379             UNKNOWN,                  // 12FF3..12FFF
8380             EGYPTIAN_HIEROGLYPHS,     // 13000..13455
8381             UNKNOWN,                  // 13456..143FF
8382             ANATOLIAN_HIEROGLYPHS,    // 14400..14646
8383             UNKNOWN,                  // 14647..167FF
8384             BAMUM,                    // 16800..16A38
8385             UNKNOWN,                  // 16A39..16A3F
8386             MRO,                      // 16A40..16A5E
8387             UNKNOWN,                  // 16A5F
8388             MRO,                      // 16A60..16A69
8389             UNKNOWN,                  // 16A6A..16A6D
8390             MRO,                      // 16A6E..16A6F
8391             TANGSA,                   // 16A70..16ABE
8392             UNKNOWN,                  // 16ABF
8393             TANGSA,                   // 16AC0..16AC9
8394             UNKNOWN,                  // 16ACA..16ACF
8395             BASSA_VAH,                // 16AD0..16AED
8396             UNKNOWN,                  // 16AEE..16AEF
8397             BASSA_VAH,                // 16AF0..16AF5
8398             UNKNOWN,                  // 16AF6..16AFF
8399             PAHAWH_HMONG,             // 16B00..16B45
8400             UNKNOWN,                  // 16B46..16B4F
8401             PAHAWH_HMONG,             // 16B50..16B59
8402             UNKNOWN,                  // 16B5A
8403             PAHAWH_HMONG,             // 16B5B..16B61
8404             UNKNOWN,                  // 16B62
8405             PAHAWH_HMONG,             // 16B63..16B77
8406             UNKNOWN,                  // 16B78..16B7C
8407             PAHAWH_HMONG,             // 16B7D..16B8F
8408             UNKNOWN,                  // 16B90..16E3F
8409             MEDEFAIDRIN,              // 16E40..16E9A
8410             UNKNOWN,                  // 16E9B..16EFF
8411             MIAO,                     // 16F00..16F4A
8412             UNKNOWN,                  // 16F4B..16F4E
8413             MIAO,                     // 16F4F..16F87
8414             UNKNOWN,                  // 16F88..16F8E
8415             MIAO,                     // 16F8F..16F9F
8416             UNKNOWN,                  // 16FA0..16FDF
8417             TANGUT,                   // 16FE0
8418             NUSHU,                    // 16FE1
8419             HAN,                      // 16FE2..16FE3
8420             KHITAN_SMALL_SCRIPT,      // 16FE4
8421             UNKNOWN,                  // 16FE5..16FEF
8422             HAN,                      // 16FF0..16FF1
8423             UNKNOWN,                  // 16FF2..16FFF
8424             TANGUT,                   // 17000..187F7
8425             UNKNOWN,                  // 187F8..187FF
8426             TANGUT,                   // 18800..18AFF
8427             KHITAN_SMALL_SCRIPT,      // 18B00..18CD5
8428             UNKNOWN,                  // 18CD6..18CFF
8429             TANGUT,                   // 18D00..18D08
8430             UNKNOWN,                  // 18D09..1AFEF
8431             KATAKANA,                 // 1AFF0..1AFF3
8432             UNKNOWN,                  // 1AFF4
8433             KATAKANA,                 // 1AFF5..1AFFB
8434             UNKNOWN,                  // 1AFFC
8435             KATAKANA,                 // 1AFFD..1AFFE
8436             UNKNOWN,                  // 1AFFF
8437             KATAKANA,                 // 1B000
8438             HIRAGANA,                 // 1B001..1B11F
8439             KATAKANA,                 // 1B120..1B122
8440             UNKNOWN,                  // 1B123..1B131
8441             HIRAGANA,                 // 1B132
8442             UNKNOWN,                  // 1B133..1B14F
8443             HIRAGANA,                 // 1B150..1B152
8444             UNKNOWN,                  // 1B153..1B154
8445             KATAKANA,                 // 1B155
8446             UNKNOWN,                  // 1B156..1B163
8447             KATAKANA,                 // 1B164..1B167
8448             UNKNOWN,                  // 1B168..1B16F
8449             NUSHU,                    // 1B170..1B2FB
8450             UNKNOWN,                  // 1B2FC..1BBFF
8451             DUPLOYAN,                 // 1BC00..1BC6A
8452             UNKNOWN,                  // 1BC6B..1BC6F
8453             DUPLOYAN,                 // 1BC70..1BC7C
8454             UNKNOWN,                  // 1BC7D..1BC7F
8455             DUPLOYAN,                 // 1BC80..1BC88
8456             UNKNOWN,                  // 1BC89..1BC8F
8457             DUPLOYAN,                 // 1BC90..1BC99
8458             UNKNOWN,                  // 1BC9A..1BC9B
8459             DUPLOYAN,                 // 1BC9C..1BC9F
8460             COMMON,                   // 1BCA0..1BCA3
8461             UNKNOWN,                  // 1BCA4..1CEFF
8462             INHERITED,                // 1CF00..1CF2D
8463             UNKNOWN,                  // 1CF2E..1CF2F
8464             INHERITED,                // 1CF30..1CF46
8465             UNKNOWN,                  // 1CF47..1CF4F
8466             COMMON,                   // 1CF50..1CFC3
8467             UNKNOWN,                  // 1CFC4..1CFFF
8468             COMMON,                   // 1D000..1D0F5
8469             UNKNOWN,                  // 1D0F6..1D0FF
8470             COMMON,                   // 1D100..1D126
8471             UNKNOWN,                  // 1D127..1D128
8472             COMMON,                   // 1D129..1D166
8473             INHERITED,                // 1D167..1D169
8474             COMMON,                   // 1D16A..1D17A
8475             INHERITED,                // 1D17B..1D182
8476             COMMON,                   // 1D183..1D184
8477             INHERITED,                // 1D185..1D18B
8478             COMMON,                   // 1D18C..1D1A9
8479             INHERITED,                // 1D1AA..1D1AD
8480             COMMON,                   // 1D1AE..1D1EA
8481             UNKNOWN,                  // 1D1EB..1D1FF
8482             GREEK,                    // 1D200..1D245
8483             UNKNOWN,                  // 1D246..1D2BF
8484             COMMON,                   // 1D2C0..1D2D3
8485             UNKNOWN,                  // 1D2D4..1D2DF
8486             COMMON,                   // 1D2E0..1D2F3
8487             UNKNOWN,                  // 1D2F4..1D2FF
8488             COMMON,                   // 1D300..1D356
8489             UNKNOWN,                  // 1D357..1D35F
8490             COMMON,                   // 1D360..1D378
8491             UNKNOWN,                  // 1D379..1D3FF
8492             COMMON,                   // 1D400..1D454
8493             UNKNOWN,                  // 1D455
8494             COMMON,                   // 1D456..1D49C
8495             UNKNOWN,                  // 1D49D
8496             COMMON,                   // 1D49E..1D49F
8497             UNKNOWN,                  // 1D4A0..1D4A1
8498             COMMON,                   // 1D4A2
8499             UNKNOWN,                  // 1D4A3..1D4A4
8500             COMMON,                   // 1D4A5..1D4A6
8501             UNKNOWN,                  // 1D4A7..1D4A8
8502             COMMON,                   // 1D4A9..1D4AC
8503             UNKNOWN,                  // 1D4AD
8504             COMMON,                   // 1D4AE..1D4B9
8505             UNKNOWN,                  // 1D4BA
8506             COMMON,                   // 1D4BB
8507             UNKNOWN,                  // 1D4BC
8508             COMMON,                   // 1D4BD..1D4C3
8509             UNKNOWN,                  // 1D4C4
8510             COMMON,                   // 1D4C5..1D505
8511             UNKNOWN,                  // 1D506
8512             COMMON,                   // 1D507..1D50A
8513             UNKNOWN,                  // 1D50B..1D50C
8514             COMMON,                   // 1D50D..1D514
8515             UNKNOWN,                  // 1D515
8516             COMMON,                   // 1D516..1D51C
8517             UNKNOWN,                  // 1D51D
8518             COMMON,                   // 1D51E..1D539
8519             UNKNOWN,                  // 1D53A
8520             COMMON,                   // 1D53B..1D53E
8521             UNKNOWN,                  // 1D53F
8522             COMMON,                   // 1D540..1D544
8523             UNKNOWN,                  // 1D545
8524             COMMON,                   // 1D546
8525             UNKNOWN,                  // 1D547..1D549
8526             COMMON,                   // 1D54A..1D550
8527             UNKNOWN,                  // 1D551
8528             COMMON,                   // 1D552..1D6A5
8529             UNKNOWN,                  // 1D6A6..1D6A7
8530             COMMON,                   // 1D6A8..1D7CB
8531             UNKNOWN,                  // 1D7CC..1D7CD
8532             COMMON,                   // 1D7CE..1D7FF
8533             SIGNWRITING,              // 1D800..1DA8B
8534             UNKNOWN,                  // 1DA8C..1DA9A
8535             SIGNWRITING,              // 1DA9B..1DA9F
8536             UNKNOWN,                  // 1DAA0
8537             SIGNWRITING,              // 1DAA1..1DAAF
8538             UNKNOWN,                  // 1DAB0..1DEFF
8539             LATIN,                    // 1DF00..1DF1E
8540             UNKNOWN,                  // 1DF1F..1DF24
8541             LATIN,                    // 1DF25..1DF2A
8542             UNKNOWN,                  // 1DF2B..1DFFF
8543             GLAGOLITIC,               // 1E000..1E006
8544             UNKNOWN,                  // 1E007
8545             GLAGOLITIC,               // 1E008..1E018
8546             UNKNOWN,                  // 1E019..1E01A
8547             GLAGOLITIC,               // 1E01B..1E021
8548             UNKNOWN,                  // 1E022
8549             GLAGOLITIC,               // 1E023..1E024
8550             UNKNOWN,                  // 1E025
8551             GLAGOLITIC,               // 1E026..1E02A
8552             UNKNOWN,                  // 1E02B..1E02F
8553             CYRILLIC,                 // 1E030..1E06D
8554             UNKNOWN,                  // 1E06E..1E08E
8555             CYRILLIC,                 // 1E08F
8556             UNKNOWN,                  // 1E090..1E0FF
8557             NYIAKENG_PUACHUE_HMONG,   // 1E100..1E12C
8558             UNKNOWN,                  // 1E12D..1E12F
8559             NYIAKENG_PUACHUE_HMONG,   // 1E130..1E13D
8560             UNKNOWN,                  // 1E13E..1E13F
8561             NYIAKENG_PUACHUE_HMONG,   // 1E140..1E149
8562             UNKNOWN,                  // 1E14A..1E14D
8563             NYIAKENG_PUACHUE_HMONG,   // 1E14E..1E14F
8564             UNKNOWN,                  // 1E150..1E28F
8565             TOTO,                     // 1E290..1E2AE
8566             UNKNOWN,                  // 1E2AF..1E2BF
8567             WANCHO,                   // 1E2C0..1E2F9
8568             UNKNOWN,                  // 1E2FA..1E2FE
8569             WANCHO,                   // 1E2FF
8570             UNKNOWN,                  // 1E300..1E4CF
8571             NAG_MUNDARI,              // 1E4D0..1E4F9
8572             UNKNOWN,                  // 1E4FA..1E7DF
8573             ETHIOPIC,                 // 1E7E0..1E7E6
8574             UNKNOWN,                  // 1E7E7
8575             ETHIOPIC,                 // 1E7E8..1E7EB
8576             UNKNOWN,                  // 1E7EC
8577             ETHIOPIC,                 // 1E7ED..1E7EE
8578             UNKNOWN,                  // 1E7EF
8579             ETHIOPIC,                 // 1E7F0..1E7FE
8580             UNKNOWN,                  // 1E7FF
8581             MENDE_KIKAKUI,            // 1E800..1E8C4
8582             UNKNOWN,                  // 1E8C5..1E8C6
8583             MENDE_KIKAKUI,            // 1E8C7..1E8D6
8584             UNKNOWN,                  // 1E8D7..1E8FF
8585             ADLAM,                    // 1E900..1E94B
8586             UNKNOWN,                  // 1E94C..1E94F
8587             ADLAM,                    // 1E950..1E959
8588             UNKNOWN,                  // 1E95A..1E95D
8589             ADLAM,                    // 1E95E..1E95F
8590             UNKNOWN,                  // 1E960..1EC70
8591             COMMON,                   // 1EC71..1ECB4
8592             UNKNOWN,                  // 1ECB5..1ED00
8593             COMMON,                   // 1ED01..1ED3D
8594             UNKNOWN,                  // 1ED3E..1EDFF
8595             ARABIC,                   // 1EE00..1EE03
8596             UNKNOWN,                  // 1EE04
8597             ARABIC,                   // 1EE05..1EE1F
8598             UNKNOWN,                  // 1EE20
8599             ARABIC,                   // 1EE21..1EE22
8600             UNKNOWN,                  // 1EE23
8601             ARABIC,                   // 1EE24
8602             UNKNOWN,                  // 1EE25..1EE26
8603             ARABIC,                   // 1EE27
8604             UNKNOWN,                  // 1EE28
8605             ARABIC,                   // 1EE29..1EE32
8606             UNKNOWN,                  // 1EE33
8607             ARABIC,                   // 1EE34..1EE37
8608             UNKNOWN,                  // 1EE38
8609             ARABIC,                   // 1EE39
8610             UNKNOWN,                  // 1EE3A
8611             ARABIC,                   // 1EE3B
8612             UNKNOWN,                  // 1EE3C..1EE41
8613             ARABIC,                   // 1EE42
8614             UNKNOWN,                  // 1EE43..1EE46
8615             ARABIC,                   // 1EE47
8616             UNKNOWN,                  // 1EE48
8617             ARABIC,                   // 1EE49
8618             UNKNOWN,                  // 1EE4A
8619             ARABIC,                   // 1EE4B
8620             UNKNOWN,                  // 1EE4C
8621             ARABIC,                   // 1EE4D..1EE4F
8622             UNKNOWN,                  // 1EE50
8623             ARABIC,                   // 1EE51..1EE52
8624             UNKNOWN,                  // 1EE53
8625             ARABIC,                   // 1EE54
8626             UNKNOWN,                  // 1EE55..1EE56
8627             ARABIC,                   // 1EE57
8628             UNKNOWN,                  // 1EE58
8629             ARABIC,                   // 1EE59
8630             UNKNOWN,                  // 1EE5A
8631             ARABIC,                   // 1EE5B
8632             UNKNOWN,                  // 1EE5C
8633             ARABIC,                   // 1EE5D
8634             UNKNOWN,                  // 1EE5E
8635             ARABIC,                   // 1EE5F
8636             UNKNOWN,                  // 1EE60
8637             ARABIC,                   // 1EE61..1EE62
8638             UNKNOWN,                  // 1EE63
8639             ARABIC,                   // 1EE64
8640             UNKNOWN,                  // 1EE65..1EE66
8641             ARABIC,                   // 1EE67..1EE6A
8642             UNKNOWN,                  // 1EE6B
8643             ARABIC,                   // 1EE6C..1EE72
8644             UNKNOWN,                  // 1EE73
8645             ARABIC,                   // 1EE74..1EE77
8646             UNKNOWN,                  // 1EE78
8647             ARABIC,                   // 1EE79..1EE7C
8648             UNKNOWN,                  // 1EE7D
8649             ARABIC,                   // 1EE7E
8650             UNKNOWN,                  // 1EE7F
8651             ARABIC,                   // 1EE80..1EE89
8652             UNKNOWN,                  // 1EE8A
8653             ARABIC,                   // 1EE8B..1EE9B
8654             UNKNOWN,                  // 1EE9C..1EEA0
8655             ARABIC,                   // 1EEA1..1EEA3
8656             UNKNOWN,                  // 1EEA4
8657             ARABIC,                   // 1EEA5..1EEA9
8658             UNKNOWN,                  // 1EEAA
8659             ARABIC,                   // 1EEAB..1EEBB
8660             UNKNOWN,                  // 1EEBC..1EEEF
8661             ARABIC,                   // 1EEF0..1EEF1
8662             UNKNOWN,                  // 1EEF2..1EFFF
8663             COMMON,                   // 1F000..1F02B
8664             UNKNOWN,                  // 1F02C..1F02F
8665             COMMON,                   // 1F030..1F093
8666             UNKNOWN,                  // 1F094..1F09F
8667             COMMON,                   // 1F0A0..1F0AE
8668             UNKNOWN,                  // 1F0AF..1F0B0
8669             COMMON,                   // 1F0B1..1F0BF
8670             UNKNOWN,                  // 1F0C0
8671             COMMON,                   // 1F0C1..1F0CF
8672             UNKNOWN,                  // 1F0D0
8673             COMMON,                   // 1F0D1..1F0F5
8674             UNKNOWN,                  // 1F0F6..1F0FF
8675             COMMON,                   // 1F100..1F1AD
8676             UNKNOWN,                  // 1F1AE..1F1E5
8677             COMMON,                   // 1F1E6..1F1FF
8678             HIRAGANA,                 // 1F200
8679             COMMON,                   // 1F201..1F202
8680             UNKNOWN,                  // 1F203..1F20F
8681             COMMON,                   // 1F210..1F23B
8682             UNKNOWN,                  // 1F23C..1F23F
8683             COMMON,                   // 1F240..1F248
8684             UNKNOWN,                  // 1F249..1F24F
8685             COMMON,                   // 1F250..1F251
8686             UNKNOWN,                  // 1F252..1F25F
8687             COMMON,                   // 1F260..1F265
8688             UNKNOWN,                  // 1F266..1F2FF
8689             COMMON,                   // 1F300..1F6D7
8690             UNKNOWN,                  // 1F6D8..1F6DB
8691             COMMON,                   // 1F6DC..1F6EC
8692             UNKNOWN,                  // 1F6ED..1F6EF
8693             COMMON,                   // 1F6F0..1F6FC
8694             UNKNOWN,                  // 1F6FD..1F6FF
8695             COMMON,                   // 1F700..1F776
8696             UNKNOWN,                  // 1F777..1F77A
8697             COMMON,                   // 1F77B..1F7D9
8698             UNKNOWN,                  // 1F7DA..1F7DF
8699             COMMON,                   // 1F7E0..1F7EB
8700             UNKNOWN,                  // 1F7EC..1F7EF
8701             COMMON,                   // 1F7F0
8702             UNKNOWN,                  // 1F7F1..1F7FF
8703             COMMON,                   // 1F800..1F80B
8704             UNKNOWN,                  // 1F80C..1F80F
8705             COMMON,                   // 1F810..1F847
8706             UNKNOWN,                  // 1F848..1F84F
8707             COMMON,                   // 1F850..1F859
8708             UNKNOWN,                  // 1F85A..1F85F
8709             COMMON,                   // 1F860..1F887
8710             UNKNOWN,                  // 1F888..1F88F
8711             COMMON,                   // 1F890..1F8AD
8712             UNKNOWN,                  // 1F8AE..1F8AF
8713             COMMON,                   // 1F8B0..1F8B1
8714             UNKNOWN,                  // 1F8B2..1F8FF
8715             COMMON,                   // 1F900..1FA53
8716             UNKNOWN,                  // 1FA54..1FA5F
8717             COMMON,                   // 1FA60..1FA6D
8718             UNKNOWN,                  // 1FA6E..1FA6F
8719             COMMON,                   // 1FA70..1FA7C
8720             UNKNOWN,                  // 1FA7D..1FA7F
8721             COMMON,                   // 1FA80..1FA88
8722             UNKNOWN,                  // 1FA89..1FA8F
8723             COMMON,                   // 1FA90..1FABD
8724             UNKNOWN,                  // 1FABE
8725             COMMON,                   // 1FABF..1FAC5
8726             UNKNOWN,                  // 1FAC6..1FACD
8727             COMMON,                   // 1FACE..1FADB
8728             UNKNOWN,                  // 1FADC..1FADF
8729             COMMON,                   // 1FAE0..1FAE8
8730             UNKNOWN,                  // 1FAE9..1FAEF
8731             COMMON,                   // 1FAF0..1FAF8
8732             UNKNOWN,                  // 1FAF9..1FAFF
8733             COMMON,                   // 1FB00..1FB92
8734             UNKNOWN,                  // 1FB93
8735             COMMON,                   // 1FB94..1FBCA
8736             UNKNOWN,                  // 1FBCB..1FBEF
8737             COMMON,                   // 1FBF0..1FBF9
8738             UNKNOWN,                  // 1FBFA..1FFFF
8739             HAN,                      // 20000..2A6DF
8740             UNKNOWN,                  // 2A6E0..2A6FF
8741             HAN,                      // 2A700..2B739
8742             UNKNOWN,                  // 2B73A..2B73F
8743             HAN,                      // 2B740..2B81D
8744             UNKNOWN,                  // 2B81E..2B81F
8745             HAN,                      // 2B820..2CEA1
8746             UNKNOWN,                  // 2CEA2..2CEAF
8747             HAN,                      // 2CEB0..2EBE0
8748             UNKNOWN,                  // 2EBE1..2F7FF
8749             HAN,                      // 2F800..2FA1D
8750             UNKNOWN,                  // 2FA1E..2FFFF
8751             HAN,                      // 30000..3134A
8752             UNKNOWN,                  // 3134B..3134F
8753             HAN,                      // 31350..323AF
8754             UNKNOWN,                  // 323B0..E0000
8755             COMMON,                   // E0001
8756             UNKNOWN,                  // E0002..E001F
8757             COMMON,                   // E0020..E007F
8758             UNKNOWN,                  // E0080..E00FF
8759             INHERITED,                // E0100..E01EF
8760             UNKNOWN,                  // E01F0..10FFFF
8761         };
8762 
8763         private static final HashMap<String, Character.UnicodeScript> aliases;
8764         static {
8765             aliases = HashMap.newHashMap(UNKNOWN.ordinal() + 1);
8766             aliases.put("ADLM", ADLAM);
8767             aliases.put("AGHB", CAUCASIAN_ALBANIAN);
8768             aliases.put("AHOM", AHOM);
8769             aliases.put("ARAB", ARABIC);
8770             aliases.put("ARMI", IMPERIAL_ARAMAIC);
8771             aliases.put("ARMN", ARMENIAN);
8772             aliases.put("AVST", AVESTAN);
8773             aliases.put("BALI", BALINESE);
8774             aliases.put("BAMU", BAMUM);
8775             aliases.put("BASS", BASSA_VAH);
8776             aliases.put("BATK", BATAK);
8777             aliases.put("BENG", BENGALI);
8778             aliases.put("BHKS", BHAIKSUKI);
8779             aliases.put("BOPO", BOPOMOFO);
8780             aliases.put("BRAH", BRAHMI);
8781             aliases.put("BRAI", BRAILLE);
8782             aliases.put("BUGI", BUGINESE);
8783             aliases.put("BUHD", BUHID);
8784             aliases.put("CAKM", CHAKMA);
8785             aliases.put("CANS", CANADIAN_ABORIGINAL);
8786             aliases.put("CARI", CARIAN);
8787             aliases.put("CHAM", CHAM);
8788             aliases.put("CHER", CHEROKEE);
8789             aliases.put("CHRS", CHORASMIAN);
8790             aliases.put("COPT", COPTIC);
8791             aliases.put("CPMN", CYPRO_MINOAN);
8792             aliases.put("CPRT", CYPRIOT);
8793             aliases.put("CYRL", CYRILLIC);
8794             aliases.put("DEVA", DEVANAGARI);
8795             aliases.put("DIAK", DIVES_AKURU);
8796             aliases.put("DOGR", DOGRA);
8797             aliases.put("DSRT", DESERET);
8798             aliases.put("DUPL", DUPLOYAN);
8799             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
8800             aliases.put("ELBA", ELBASAN);
8801             aliases.put("ELYM", ELYMAIC);
8802             aliases.put("ETHI", ETHIOPIC);
8803             aliases.put("GEOR", GEORGIAN);
8804             aliases.put("GLAG", GLAGOLITIC);
8805             aliases.put("GONG", GUNJALA_GONDI);
8806             aliases.put("GONM", MASARAM_GONDI);
8807             aliases.put("GOTH", GOTHIC);
8808             aliases.put("GRAN", GRANTHA);
8809             aliases.put("GREK", GREEK);
8810             aliases.put("GUJR", GUJARATI);
8811             aliases.put("GURU", GURMUKHI);
8812             aliases.put("HANG", HANGUL);
8813             aliases.put("HANI", HAN);
8814             aliases.put("HANO", HANUNOO);
8815             aliases.put("HATR", HATRAN);
8816             aliases.put("HEBR", HEBREW);
8817             aliases.put("HIRA", HIRAGANA);
8818             aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS);
8819             aliases.put("HMNG", PAHAWH_HMONG);
8820             aliases.put("HMNP", NYIAKENG_PUACHUE_HMONG);
8821             aliases.put("HUNG", OLD_HUNGARIAN);
8822             aliases.put("ITAL", OLD_ITALIC);
8823             aliases.put("JAVA", JAVANESE);
8824             aliases.put("KALI", KAYAH_LI);
8825             aliases.put("KANA", KATAKANA);
8826             aliases.put("KAWI", KAWI);
8827             aliases.put("KHAR", KHAROSHTHI);
8828             aliases.put("KHMR", KHMER);
8829             aliases.put("KHOJ", KHOJKI);
8830             aliases.put("KITS", KHITAN_SMALL_SCRIPT);
8831             aliases.put("KNDA", KANNADA);
8832             aliases.put("KTHI", KAITHI);
8833             aliases.put("LANA", TAI_THAM);
8834             aliases.put("LAOO", LAO);
8835             aliases.put("LATN", LATIN);
8836             aliases.put("LEPC", LEPCHA);
8837             aliases.put("LIMB", LIMBU);
8838             aliases.put("LINA", LINEAR_A);
8839             aliases.put("LINB", LINEAR_B);
8840             aliases.put("LISU", LISU);
8841             aliases.put("LYCI", LYCIAN);
8842             aliases.put("LYDI", LYDIAN);
8843             aliases.put("MAHJ", MAHAJANI);
8844             aliases.put("MAKA", MAKASAR);
8845             aliases.put("MAND", MANDAIC);
8846             aliases.put("MANI", MANICHAEAN);
8847             aliases.put("MARC", MARCHEN);
8848             aliases.put("MEDF", MEDEFAIDRIN);
8849             aliases.put("MEND", MENDE_KIKAKUI);
8850             aliases.put("MERC", MEROITIC_CURSIVE);
8851             aliases.put("MERO", MEROITIC_HIEROGLYPHS);
8852             aliases.put("MLYM", MALAYALAM);
8853             aliases.put("MODI", MODI);
8854             aliases.put("MONG", MONGOLIAN);
8855             aliases.put("MROO", MRO);
8856             aliases.put("MTEI", MEETEI_MAYEK);
8857             aliases.put("MULT", MULTANI);
8858             aliases.put("MYMR", MYANMAR);
8859             aliases.put("NAGM", NAG_MUNDARI);
8860             aliases.put("NAND", NANDINAGARI);
8861             aliases.put("NARB", OLD_NORTH_ARABIAN);
8862             aliases.put("NBAT", NABATAEAN);
8863             aliases.put("NEWA", NEWA);
8864             aliases.put("NKOO", NKO);
8865             aliases.put("NSHU", NUSHU);
8866             aliases.put("OGAM", OGHAM);
8867             aliases.put("OLCK", OL_CHIKI);
8868             aliases.put("ORKH", OLD_TURKIC);
8869             aliases.put("ORYA", ORIYA);
8870             aliases.put("OSGE", OSAGE);
8871             aliases.put("OSMA", OSMANYA);
8872             aliases.put("OUGR", OLD_UYGHUR);
8873             aliases.put("PALM", PALMYRENE);
8874             aliases.put("PAUC", PAU_CIN_HAU);
8875             aliases.put("PERM", OLD_PERMIC);
8876             aliases.put("PHAG", PHAGS_PA);
8877             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
8878             aliases.put("PHLP", PSALTER_PAHLAVI);
8879             aliases.put("PHNX", PHOENICIAN);
8880             aliases.put("PLRD", MIAO);
8881             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
8882             aliases.put("RJNG", REJANG);
8883             aliases.put("ROHG", HANIFI_ROHINGYA);
8884             aliases.put("RUNR", RUNIC);
8885             aliases.put("SAMR", SAMARITAN);
8886             aliases.put("SARB", OLD_SOUTH_ARABIAN);
8887             aliases.put("SAUR", SAURASHTRA);
8888             aliases.put("SGNW", SIGNWRITING);
8889             aliases.put("SHAW", SHAVIAN);
8890             aliases.put("SHRD", SHARADA);
8891             aliases.put("SIDD", SIDDHAM);
8892             aliases.put("SIND", KHUDAWADI);
8893             aliases.put("SINH", SINHALA);
8894             aliases.put("SOGD", SOGDIAN);
8895             aliases.put("SOGO", OLD_SOGDIAN);
8896             aliases.put("SORA", SORA_SOMPENG);
8897             aliases.put("SOYO", SOYOMBO);
8898             aliases.put("SUND", SUNDANESE);
8899             aliases.put("SYLO", SYLOTI_NAGRI);
8900             aliases.put("SYRC", SYRIAC);
8901             aliases.put("TAGB", TAGBANWA);
8902             aliases.put("TAKR", TAKRI);
8903             aliases.put("TALE", TAI_LE);
8904             aliases.put("TALU", NEW_TAI_LUE);
8905             aliases.put("TAML", TAMIL);
8906             aliases.put("TANG", TANGUT);
8907             aliases.put("TAVT", TAI_VIET);
8908             aliases.put("TELU", TELUGU);
8909             aliases.put("TFNG", TIFINAGH);
8910             aliases.put("TGLG", TAGALOG);
8911             aliases.put("THAA", THAANA);
8912             aliases.put("THAI", THAI);
8913             aliases.put("TIBT", TIBETAN);
8914             aliases.put("TIRH", TIRHUTA);
8915             aliases.put("TNSA", TANGSA);
8916             aliases.put("TOTO", TOTO);
8917             aliases.put("UGAR", UGARITIC);
8918             aliases.put("VAII", VAI);
8919             aliases.put("VITH", VITHKUQI);
8920             aliases.put("WARA", WARANG_CITI);
8921             aliases.put("WCHO", WANCHO);
8922             aliases.put("XPEO", OLD_PERSIAN);
8923             aliases.put("XSUX", CUNEIFORM);
8924             aliases.put("YEZI", YEZIDI);
8925             aliases.put("YIII", YI);
8926             aliases.put("ZANB", ZANABAZAR_SQUARE);
8927             aliases.put("ZINH", INHERITED);
8928             aliases.put("ZYYY", COMMON);
8929             aliases.put("ZZZZ", UNKNOWN);
8930         }
8931 
8932         /**
8933          * Returns the enum constant representing the Unicode script of which
8934          * the given character (Unicode code point) is assigned to.
8935          *
8936          * @param   codePoint the character (Unicode code point) in question.
8937          * @return  The {@code UnicodeScript} constant representing the
8938          *          Unicode script of which this character is assigned to.
8939          *
8940          * @throws  IllegalArgumentException if the specified
8941          * {@code codePoint} is an invalid Unicode code point.
8942          * @see Character#isValidCodePoint(int)
8943          *
8944          */
of(int codePoint)8945         public static UnicodeScript of(int codePoint) {
8946             if (!isValidCodePoint(codePoint))
8947                 throw new IllegalArgumentException(
8948                     String.format("Not a valid Unicode code point: 0x%X", codePoint));
8949             int type = getType(codePoint);
8950             // leave SURROGATE and PRIVATE_USE for table lookup
8951             if (type == UNASSIGNED)
8952                 return UNKNOWN;
8953             int index = Arrays.binarySearch(scriptStarts, codePoint);
8954             if (index < 0)
8955                 index = -index - 2;
8956             return scripts[index];
8957         }
8958 
8959         /**
8960          * Returns the UnicodeScript constant with the given Unicode script
8961          * name or the script name alias. Script names and their aliases are
8962          * determined by The Unicode Standard. The files {@code Scripts.txt}
8963          * and {@code PropertyValueAliases.txt} define script names
8964          * and the script name aliases for a particular version of the
8965          * standard. The {@link Character} class specifies the version of
8966          * the standard that it supports.
8967          * <p>
8968          * Character case is ignored for all of the valid script names.
8969          * The en_US locale's case mapping rules are used to provide
8970          * case-insensitive string comparisons for script name validation.
8971          *
8972          * @param scriptName A {@code UnicodeScript} name.
8973          * @return The {@code UnicodeScript} constant identified
8974          *         by {@code scriptName}
8975          * @throws IllegalArgumentException if {@code scriptName} is an
8976          *         invalid name
8977          * @throws NullPointerException if {@code scriptName} is null
8978          */
forName(String scriptName)8979         public static final UnicodeScript forName(String scriptName) {
8980             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
8981                                  //.replace(' ', '_'));
8982             UnicodeScript sc = aliases.get(scriptName);
8983             if (sc != null)
8984                 return sc;
8985             return valueOf(scriptName);
8986         }
8987     }
8988 
8989     /**
8990      * The value of the {@code Character}.
8991      *
8992      * @serial
8993      */
8994     private final char value;
8995 
8996     /** use serialVersionUID from JDK 1.0.2 for interoperability */
8997     @java.io.Serial
8998     private static final long serialVersionUID = 3786198910865385080L;
8999 
9000     /**
9001      * Constructs a newly allocated {@code Character} object that
9002      * represents the specified {@code char} value.
9003      *
9004      * @param  value   the value to be represented by the
9005      *                  {@code Character} object.
9006      *
9007      * @deprecated
9008      * It is rarely appropriate to use this constructor. The static factory
9009      * {@link #valueOf(char)} is generally a better choice, as it is
9010      * likely to yield significantly better space and time performance.
9011      */
9012     // Android-changed: not yet forRemoval on Android.
9013     @Deprecated(since="9"/*, forRemoval = true*/)
Character(char value)9014     public Character(char value) {
9015         this.value = value;
9016     }
9017 
9018     private static final class CharacterCache {
CharacterCache()9019         private CharacterCache(){}
9020 
9021         @Stable
9022         static final Character[] cache;
9023         static Character[] archivedCache;
9024 
9025         static {
9026             int size = 127 + 1;
9027 
9028             // Load and use the archived cache if it exists
9029             // Android-removed: CDS is not used on Android.
9030             // CDS.initializeFromArchive(CharacterCache.class);
9031             if (archivedCache == null || archivedCache.length != size) {
9032                 Character[] c = new Character[size];
9033                 for (int i = 0; i < size; i++) {
9034                     c[i] = new Character((char) i);
9035                 }
9036                 archivedCache = c;
9037             }
9038             cache = archivedCache;
9039         }
9040     }
9041 
9042     /**
9043      * Returns a {@code Character} instance representing the specified
9044      * {@code char} value.
9045      * If a new {@code Character} instance is not required, this method
9046      * should generally be used in preference to the constructor
9047      * {@link #Character(char)}, as this method is likely to yield
9048      * significantly better space and time performance by caching
9049      * frequently requested values.
9050      *
9051      * This method will always cache values in the range {@code
9052      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
9053      * cache other values outside of this range.
9054      *
9055      * @param  c a char value.
9056      * @return a {@code Character} instance representing {@code c}.
9057      * @since  1.5
9058      */
9059     @IntrinsicCandidate
valueOf(char c)9060     public static Character valueOf(char c) {
9061         if (c <= 127) { // must cache
9062             return CharacterCache.cache[(int)c];
9063         }
9064         return new Character(c);
9065     }
9066 
9067     /**
9068      * Returns the value of this {@code Character} object.
9069      * @return  the primitive {@code char} value represented by
9070      *          this object.
9071      */
9072     @IntrinsicCandidate
charValue()9073     public char charValue() {
9074         return value;
9075     }
9076 
9077     /**
9078      * Returns a hash code for this {@code Character}; equal to the result
9079      * of invoking {@code charValue()}.
9080      *
9081      * @return a hash code value for this {@code Character}
9082      */
9083     @Override
hashCode()9084     public int hashCode() {
9085         return Character.hashCode(value);
9086     }
9087 
9088     /**
9089      * Returns a hash code for a {@code char} value; compatible with
9090      * {@code Character.hashCode()}.
9091      *
9092      * @since 1.8
9093      *
9094      * @param value The {@code char} for which to return a hash code.
9095      * @return a hash code value for a {@code char} value.
9096      */
hashCode(char value)9097     public static int hashCode(char value) {
9098         return (int)value;
9099     }
9100 
9101     /**
9102      * Compares this object against the specified object.
9103      * The result is {@code true} if and only if the argument is not
9104      * {@code null} and is a {@code Character} object that
9105      * represents the same {@code char} value as this object.
9106      *
9107      * @param   obj   the object to compare with.
9108      * @return  {@code true} if the objects are the same;
9109      *          {@code false} otherwise.
9110      */
equals(Object obj)9111     public boolean equals(Object obj) {
9112         if (obj instanceof Character) {
9113             return value == ((Character)obj).charValue();
9114         }
9115         return false;
9116     }
9117 
9118     /**
9119      * Returns a {@code String} object representing this
9120      * {@code Character}'s value.  The result is a string of
9121      * length 1 whose sole component is the primitive
9122      * {@code char} value represented by this
9123      * {@code Character} object.
9124      *
9125      * @return  a string representation of this object.
9126      */
9127     @Override
toString()9128     public String toString() {
9129         return String.valueOf(value);
9130     }
9131 
9132     // Android-removed: reference to Character.toString(int) in javadoc.
9133     /**
9134      * Returns a {@code String} object representing the
9135      * specified {@code char}.  The result is a string of length
9136      * 1 consisting solely of the specified {@code char}.
9137      *
9138      * @param c the {@code char} to be converted
9139      * @return the string representation of the specified {@code char}
9140      * @since 1.4
9141      */
toString(char c)9142     public static String toString(char c) {
9143         return String.valueOf(c);
9144     }
9145 
9146     // BEGIN Android-removed: expose after String.valueOfCodePoint() is imported.
9147     /**
9148      * Returns a {@code String} object representing the
9149      * specified character (Unicode code point).  The result is a string of
9150      * length 1 or 2, consisting solely of the specified {@code codePoint}.
9151      *
9152      * @param codePoint the {@code codePoint} to be converted
9153      * @return the string representation of the specified {@code codePoint}
9154      * @throws IllegalArgumentException if the specified
9155      *      {@code codePoint} is not a {@linkplain #isValidCodePoint
9156      *      valid Unicode code point}.
9157      * @since 11
9158      */
toString(int codePoint)9159     public static String toString(int codePoint) {
9160         return String.valueOfCodePoint(codePoint);
9161     }
9162 
9163     /**
9164      * Determines whether the specified code point is a valid
9165      * <a href="http://www.unicode.org/glossary/#code_point">
9166      * Unicode code point value</a>.
9167      *
9168      * @param  codePoint the Unicode code point to be tested
9169      * @return {@code true} if the specified code point value is between
9170      *         {@link #MIN_CODE_POINT} and
9171      *         {@link #MAX_CODE_POINT} inclusive;
9172      *         {@code false} otherwise.
9173      * @since  1.5
9174      */
isValidCodePoint(int codePoint)9175     public static boolean isValidCodePoint(int codePoint) {
9176         // Optimized form of:
9177         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
9178         int plane = codePoint >>> 16;
9179         return plane < ((MAX_CODE_POINT + 1) >>> 16);
9180     }
9181 
9182     /**
9183      * Determines whether the specified character (Unicode code point)
9184      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
9185      * Such code points can be represented using a single {@code char}.
9186      *
9187      * @param  codePoint the character (Unicode code point) to be to
9188      * @return {@code true} if the specified code point is between
9189      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
9190      *         {@code false} otherwise.
9191      * @since  1.7
9192      */
isBmpCodePoint(int codePoint)9193     public static boolean isBmpCodePoint(int codePoint) {
9194         return codePoint >>> 16 == 0;
9195         // Optimized form of:
9196         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
9197         // We consistently use logical shift (>>>) to facilitate
9198         // additional runtime optimizations.
9199     }
9200 
9201     /**
9202      * Determines whether the specified character (Unicode code point)
9203      * is in the <a href="#supplementary">supplementary character</a> range.
9204      *
9205      * @param  codePoint the character (Unicode code point) to be tested
9206      * @return {@code true} if the specified code point is between
9207      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
9208      *         {@link #MAX_CODE_POINT} inclusive;
9209      *         {@code false} otherwise.
9210      * @since  1.5
9211      */
isSupplementaryCodePoint(int codePoint)9212     public static boolean isSupplementaryCodePoint(int codePoint) {
9213         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
9214             && codePoint <  MAX_CODE_POINT + 1;
9215     }
9216 
9217     /**
9218      * Determines if the given {@code char} value is a
9219      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
9220      * Unicode high-surrogate code unit</a>
9221      * (also known as <i>leading-surrogate code unit</i>).
9222      *
9223      * <p>Such values do not represent characters by themselves,
9224      * but are used in the representation of
9225      * <a href="#supplementary">supplementary characters</a>
9226      * in the UTF-16 encoding.
9227      *
9228      * @param  ch the {@code char} value to be tested.
9229      * @return {@code true} if the {@code char} value is between
9230      *         {@link #MIN_HIGH_SURROGATE} and
9231      *         {@link #MAX_HIGH_SURROGATE} inclusive;
9232      *         {@code false} otherwise.
9233      * @see    Character#isLowSurrogate(char)
9234      * @see    Character.UnicodeBlock#of(int)
9235      * @since  1.5
9236      */
isHighSurrogate(char ch)9237     public static boolean isHighSurrogate(char ch) {
9238         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
9239         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
9240     }
9241 
9242     /**
9243      * Determines if the given {@code char} value is a
9244      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
9245      * Unicode low-surrogate code unit</a>
9246      * (also known as <i>trailing-surrogate code unit</i>).
9247      *
9248      * <p>Such values do not represent characters by themselves,
9249      * but are used in the representation of
9250      * <a href="#supplementary">supplementary characters</a>
9251      * in the UTF-16 encoding.
9252      *
9253      * @param  ch the {@code char} value to be tested.
9254      * @return {@code true} if the {@code char} value is between
9255      *         {@link #MIN_LOW_SURROGATE} and
9256      *         {@link #MAX_LOW_SURROGATE} inclusive;
9257      *         {@code false} otherwise.
9258      * @see    Character#isHighSurrogate(char)
9259      * @since  1.5
9260      */
isLowSurrogate(char ch)9261     public static boolean isLowSurrogate(char ch) {
9262         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
9263     }
9264 
9265     /**
9266      * Determines if the given {@code char} value is a Unicode
9267      * <i>surrogate code unit</i>.
9268      *
9269      * <p>Such values do not represent characters by themselves,
9270      * but are used in the representation of
9271      * <a href="#supplementary">supplementary characters</a>
9272      * in the UTF-16 encoding.
9273      *
9274      * <p>A char value is a surrogate code unit if and only if it is either
9275      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
9276      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
9277      *
9278      * @param  ch the {@code char} value to be tested.
9279      * @return {@code true} if the {@code char} value is between
9280      *         {@link #MIN_SURROGATE} and
9281      *         {@link #MAX_SURROGATE} inclusive;
9282      *         {@code false} otherwise.
9283      * @since  1.7
9284      */
isSurrogate(char ch)9285     public static boolean isSurrogate(char ch) {
9286         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
9287     }
9288 
9289     /**
9290      * Determines whether the specified pair of {@code char}
9291      * values is a valid
9292      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
9293      * Unicode surrogate pair</a>.
9294      *
9295      * <p>This method is equivalent to the expression:
9296      * <blockquote><pre>{@code
9297      * isHighSurrogate(high) && isLowSurrogate(low)
9298      * }</pre></blockquote>
9299      *
9300      * @param  high the high-surrogate code value to be tested
9301      * @param  low the low-surrogate code value to be tested
9302      * @return {@code true} if the specified high and
9303      * low-surrogate code values represent a valid surrogate pair;
9304      * {@code false} otherwise.
9305      * @since  1.5
9306      */
isSurrogatePair(char high, char low)9307     public static boolean isSurrogatePair(char high, char low) {
9308         return isHighSurrogate(high) && isLowSurrogate(low);
9309     }
9310 
9311     /**
9312      * Determines the number of {@code char} values needed to
9313      * represent the specified character (Unicode code point). If the
9314      * specified character is equal to or greater than 0x10000, then
9315      * the method returns 2. Otherwise, the method returns 1.
9316      *
9317      * <p>This method doesn't validate the specified character to be a
9318      * valid Unicode code point. The caller must validate the
9319      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
9320      * if necessary.
9321      *
9322      * @param   codePoint the character (Unicode code point) to be tested.
9323      * @return  2 if the character is a valid supplementary character; 1 otherwise.
9324      * @see     Character#isSupplementaryCodePoint(int)
9325      * @since   1.5
9326      */
charCount(int codePoint)9327     public static int charCount(int codePoint) {
9328         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
9329     }
9330 
9331     /**
9332      * Converts the specified surrogate pair to its supplementary code
9333      * point value. This method does not validate the specified
9334      * surrogate pair. The caller must validate it using {@link
9335      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
9336      *
9337      * @param  high the high-surrogate code unit
9338      * @param  low the low-surrogate code unit
9339      * @return the supplementary code point composed from the
9340      *         specified surrogate pair.
9341      * @since  1.5
9342      */
toCodePoint(char high, char low)9343     public static int toCodePoint(char high, char low) {
9344         // Optimized form of:
9345         // return ((high - MIN_HIGH_SURROGATE) << 10)
9346         //         + (low - MIN_LOW_SURROGATE)
9347         //         + MIN_SUPPLEMENTARY_CODE_POINT;
9348         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
9349                                        - (MIN_HIGH_SURROGATE << 10)
9350                                        - MIN_LOW_SURROGATE);
9351     }
9352 
9353     /**
9354      * Returns the code point at the given index of the
9355      * {@code CharSequence}. If the {@code char} value at
9356      * the given index in the {@code CharSequence} is in the
9357      * high-surrogate range, the following index is less than the
9358      * length of the {@code CharSequence}, and the
9359      * {@code char} value at the following index is in the
9360      * low-surrogate range, then the supplementary code point
9361      * corresponding to this surrogate pair is returned. Otherwise,
9362      * the {@code char} value at the given index is returned.
9363      *
9364      * @param seq a sequence of {@code char} values (Unicode code
9365      * units)
9366      * @param index the index to the {@code char} values (Unicode
9367      * code units) in {@code seq} to be converted
9368      * @return the Unicode code point at the given index
9369      * @throws NullPointerException if {@code seq} is null.
9370      * @throws IndexOutOfBoundsException if the value
9371      * {@code index} is negative or not less than
9372      * {@link CharSequence#length() seq.length()}.
9373      * @since  1.5
9374      */
codePointAt(CharSequence seq, int index)9375     public static int codePointAt(CharSequence seq, int index) {
9376         char c1 = seq.charAt(index);
9377         if (isHighSurrogate(c1) && ++index < seq.length()) {
9378             char c2 = seq.charAt(index);
9379             if (isLowSurrogate(c2)) {
9380                 return toCodePoint(c1, c2);
9381             }
9382         }
9383         return c1;
9384     }
9385 
9386     /**
9387      * Returns the code point at the given index of the
9388      * {@code char} array. If the {@code char} value at
9389      * the given index in the {@code char} array is in the
9390      * high-surrogate range, the following index is less than the
9391      * length of the {@code char} array, and the
9392      * {@code char} value at the following index is in the
9393      * low-surrogate range, then the supplementary code point
9394      * corresponding to this surrogate pair is returned. Otherwise,
9395      * the {@code char} value at the given index is returned.
9396      *
9397      * @param a the {@code char} array
9398      * @param index the index to the {@code char} values (Unicode
9399      * code units) in the {@code char} array to be converted
9400      * @return the Unicode code point at the given index
9401      * @throws NullPointerException if {@code a} is null.
9402      * @throws IndexOutOfBoundsException if the value
9403      * {@code index} is negative or not less than
9404      * the length of the {@code char} array.
9405      * @since  1.5
9406      */
codePointAt(char[] a, int index)9407     public static int codePointAt(char[] a, int index) {
9408         return codePointAtImpl(a, index, a.length);
9409     }
9410 
9411     /**
9412      * Returns the code point at the given index of the
9413      * {@code char} array, where only array elements with
9414      * {@code index} less than {@code limit} can be used. If
9415      * the {@code char} value at the given index in the
9416      * {@code char} array is in the high-surrogate range, the
9417      * following index is less than the {@code limit}, and the
9418      * {@code char} value at the following index is in the
9419      * low-surrogate range, then the supplementary code point
9420      * corresponding to this surrogate pair is returned. Otherwise,
9421      * the {@code char} value at the given index is returned.
9422      *
9423      * @param a the {@code char} array
9424      * @param index the index to the {@code char} values (Unicode
9425      * code units) in the {@code char} array to be converted
9426      * @param limit the index after the last array element that
9427      * can be used in the {@code char} array
9428      * @return the Unicode code point at the given index
9429      * @throws NullPointerException if {@code a} is null.
9430      * @throws IndexOutOfBoundsException if the {@code index}
9431      * argument is negative or not less than the {@code limit}
9432      * argument, or if the {@code limit} argument is negative or
9433      * greater than the length of the {@code char} array.
9434      * @since  1.5
9435      */
codePointAt(char[] a, int index, int limit)9436     public static int codePointAt(char[] a, int index, int limit) {
9437         if (index >= limit || index < 0 || limit > a.length) {
9438             throw new IndexOutOfBoundsException();
9439         }
9440         return codePointAtImpl(a, index, limit);
9441     }
9442 
9443     // throws ArrayIndexOutOfBoundsException if index out of bounds
codePointAtImpl(char[] a, int index, int limit)9444     static int codePointAtImpl(char[] a, int index, int limit) {
9445         char c1 = a[index];
9446         if (isHighSurrogate(c1) && ++index < limit) {
9447             char c2 = a[index];
9448             if (isLowSurrogate(c2)) {
9449                 return toCodePoint(c1, c2);
9450             }
9451         }
9452         return c1;
9453     }
9454 
9455     /**
9456      * Returns the code point preceding the given index of the
9457      * {@code CharSequence}. If the {@code char} value at
9458      * {@code (index - 1)} in the {@code CharSequence} is in
9459      * the low-surrogate range, {@code (index - 2)} is not
9460      * negative, and the {@code char} value at {@code (index - 2)}
9461      * in the {@code CharSequence} is in the
9462      * high-surrogate range, then the supplementary code point
9463      * corresponding to this surrogate pair is returned. Otherwise,
9464      * the {@code char} value at {@code (index - 1)} is
9465      * returned.
9466      *
9467      * @param seq the {@code CharSequence} instance
9468      * @param index the index following the code point that should be returned
9469      * @return the Unicode code point value before the given index.
9470      * @throws NullPointerException if {@code seq} is null.
9471      * @throws IndexOutOfBoundsException if the {@code index}
9472      * argument is less than 1 or greater than {@link
9473      * CharSequence#length() seq.length()}.
9474      * @since  1.5
9475      */
codePointBefore(CharSequence seq, int index)9476     public static int codePointBefore(CharSequence seq, int index) {
9477         char c2 = seq.charAt(--index);
9478         if (isLowSurrogate(c2) && index > 0) {
9479             char c1 = seq.charAt(--index);
9480             if (isHighSurrogate(c1)) {
9481                 return toCodePoint(c1, c2);
9482             }
9483         }
9484         return c2;
9485     }
9486 
9487     /**
9488      * Returns the code point preceding the given index of the
9489      * {@code char} array. If the {@code char} value at
9490      * {@code (index - 1)} in the {@code char} array is in
9491      * the low-surrogate range, {@code (index - 2)} is not
9492      * negative, and the {@code char} value at {@code (index - 2)}
9493      * in the {@code char} array is in the
9494      * high-surrogate range, then the supplementary code point
9495      * corresponding to this surrogate pair is returned. Otherwise,
9496      * the {@code char} value at {@code (index - 1)} is
9497      * returned.
9498      *
9499      * @param a the {@code char} array
9500      * @param index the index following the code point that should be returned
9501      * @return the Unicode code point value before the given index.
9502      * @throws NullPointerException if {@code a} is null.
9503      * @throws IndexOutOfBoundsException if the {@code index}
9504      * argument is less than 1 or greater than the length of the
9505      * {@code char} array
9506      * @since  1.5
9507      */
codePointBefore(char[] a, int index)9508     public static int codePointBefore(char[] a, int index) {
9509         return codePointBeforeImpl(a, index, 0);
9510     }
9511 
9512     /**
9513      * Returns the code point preceding the given index of the
9514      * {@code char} array, where only array elements with
9515      * {@code index} greater than or equal to {@code start}
9516      * can be used. If the {@code char} value at {@code (index - 1)}
9517      * in the {@code char} array is in the
9518      * low-surrogate range, {@code (index - 2)} is not less than
9519      * {@code start}, and the {@code char} value at
9520      * {@code (index - 2)} in the {@code char} array is in
9521      * the high-surrogate range, then the supplementary code point
9522      * corresponding to this surrogate pair is returned. Otherwise,
9523      * the {@code char} value at {@code (index - 1)} is
9524      * returned.
9525      *
9526      * @param a the {@code char} array
9527      * @param index the index following the code point that should be returned
9528      * @param start the index of the first array element in the
9529      * {@code char} array
9530      * @return the Unicode code point value before the given index.
9531      * @throws NullPointerException if {@code a} is null.
9532      * @throws IndexOutOfBoundsException if the {@code index}
9533      * argument is not greater than the {@code start} argument or
9534      * is greater than the length of the {@code char} array, or
9535      * if the {@code start} argument is negative or not less than
9536      * the length of the {@code char} array.
9537      * @since  1.5
9538      */
codePointBefore(char[] a, int index, int start)9539     public static int codePointBefore(char[] a, int index, int start) {
9540         if (index <= start || start < 0 || index > a.length) {
9541             throw new IndexOutOfBoundsException();
9542         }
9543         return codePointBeforeImpl(a, index, start);
9544     }
9545 
9546     // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
codePointBeforeImpl(char[] a, int index, int start)9547     static int codePointBeforeImpl(char[] a, int index, int start) {
9548         char c2 = a[--index];
9549         if (isLowSurrogate(c2) && index > start) {
9550             char c1 = a[--index];
9551             if (isHighSurrogate(c1)) {
9552                 return toCodePoint(c1, c2);
9553             }
9554         }
9555         return c2;
9556     }
9557 
9558     /**
9559      * Returns the leading surrogate (a
9560      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
9561      * high surrogate code unit</a>) of the
9562      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
9563      * surrogate pair</a>
9564      * representing the specified supplementary character (Unicode
9565      * code point) in the UTF-16 encoding.  If the specified character
9566      * is not a
9567      * <a href="Character.html#supplementary">supplementary character</a>,
9568      * an unspecified {@code char} is returned.
9569      *
9570      * <p>If
9571      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
9572      * is {@code true}, then
9573      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
9574      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
9575      * are also always {@code true}.
9576      *
9577      * @param   codePoint a supplementary character (Unicode code point)
9578      * @return  the leading surrogate code unit used to represent the
9579      *          character in the UTF-16 encoding
9580      * @since   1.7
9581      */
highSurrogate(int codePoint)9582     public static char highSurrogate(int codePoint) {
9583         return (char) ((codePoint >>> 10)
9584             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
9585     }
9586 
9587     /**
9588      * Returns the trailing surrogate (a
9589      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
9590      * low surrogate code unit</a>) of the
9591      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
9592      * surrogate pair</a>
9593      * representing the specified supplementary character (Unicode
9594      * code point) in the UTF-16 encoding.  If the specified character
9595      * is not a
9596      * <a href="Character.html#supplementary">supplementary character</a>,
9597      * an unspecified {@code char} is returned.
9598      *
9599      * <p>If
9600      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
9601      * is {@code true}, then
9602      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
9603      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
9604      * are also always {@code true}.
9605      *
9606      * @param   codePoint a supplementary character (Unicode code point)
9607      * @return  the trailing surrogate code unit used to represent the
9608      *          character in the UTF-16 encoding
9609      * @since   1.7
9610      */
lowSurrogate(int codePoint)9611     public static char lowSurrogate(int codePoint) {
9612         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
9613     }
9614 
9615     /**
9616      * Converts the specified character (Unicode code point) to its
9617      * UTF-16 representation. If the specified code point is a BMP
9618      * (Basic Multilingual Plane or Plane 0) value, the same value is
9619      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
9620      * specified code point is a supplementary character, its
9621      * surrogate values are stored in {@code dst[dstIndex]}
9622      * (high-surrogate) and {@code dst[dstIndex+1]}
9623      * (low-surrogate), and 2 is returned.
9624      *
9625      * @param  codePoint the character (Unicode code point) to be converted.
9626      * @param  dst an array of {@code char} in which the
9627      * {@code codePoint}'s UTF-16 value is stored.
9628      * @param dstIndex the start index into the {@code dst}
9629      * array where the converted value is stored.
9630      * @return 1 if the code point is a BMP code point, 2 if the
9631      * code point is a supplementary code point.
9632      * @throws IllegalArgumentException if the specified
9633      * {@code codePoint} is not a valid Unicode code point.
9634      * @throws NullPointerException if the specified {@code dst} is null.
9635      * @throws IndexOutOfBoundsException if {@code dstIndex}
9636      * is negative or not less than {@code dst.length}, or if
9637      * {@code dst} at {@code dstIndex} doesn't have enough
9638      * array element(s) to store the resulting {@code char}
9639      * value(s). (If {@code dstIndex} is equal to
9640      * {@code dst.length-1} and the specified
9641      * {@code codePoint} is a supplementary character, the
9642      * high-surrogate value is not stored in
9643      * {@code dst[dstIndex]}.)
9644      * @since  1.5
9645      */
toChars(int codePoint, char[] dst, int dstIndex)9646     public static int toChars(int codePoint, char[] dst, int dstIndex) {
9647         if (isBmpCodePoint(codePoint)) {
9648             dst[dstIndex] = (char) codePoint;
9649             return 1;
9650         } else if (isValidCodePoint(codePoint)) {
9651             toSurrogates(codePoint, dst, dstIndex);
9652             return 2;
9653         } else {
9654             throw new IllegalArgumentException(
9655                 String.format("Not a valid Unicode code point: 0x%X", codePoint));
9656         }
9657     }
9658 
9659     /**
9660      * Converts the specified character (Unicode code point) to its
9661      * UTF-16 representation stored in a {@code char} array. If
9662      * the specified code point is a BMP (Basic Multilingual Plane or
9663      * Plane 0) value, the resulting {@code char} array has
9664      * the same value as {@code codePoint}. If the specified code
9665      * point is a supplementary code point, the resulting
9666      * {@code char} array has the corresponding surrogate pair.
9667      *
9668      * @param  codePoint a Unicode code point
9669      * @return a {@code char} array having
9670      *         {@code codePoint}'s UTF-16 representation.
9671      * @throws IllegalArgumentException if the specified
9672      * {@code codePoint} is not a valid Unicode code point.
9673      * @since  1.5
9674      */
toChars(int codePoint)9675     public static char[] toChars(int codePoint) {
9676         if (isBmpCodePoint(codePoint)) {
9677             return new char[] { (char) codePoint };
9678         } else if (isValidCodePoint(codePoint)) {
9679             char[] result = new char[2];
9680             toSurrogates(codePoint, result, 0);
9681             return result;
9682         } else {
9683             throw new IllegalArgumentException(
9684                 String.format("Not a valid Unicode code point: 0x%X", codePoint));
9685         }
9686     }
9687 
toSurrogates(int codePoint, char[] dst, int index)9688     static void toSurrogates(int codePoint, char[] dst, int index) {
9689         // We write elements "backwards" to guarantee all-or-nothing
9690         dst[index+1] = lowSurrogate(codePoint);
9691         dst[index] = highSurrogate(codePoint);
9692     }
9693 
9694     /**
9695      * Returns the number of Unicode code points in the text range of
9696      * the specified char sequence. The text range begins at the
9697      * specified {@code beginIndex} and extends to the
9698      * {@code char} at index {@code endIndex - 1}. Thus the
9699      * length (in {@code char}s) of the text range is
9700      * {@code endIndex-beginIndex}. Unpaired surrogates within
9701      * the text range count as one code point each.
9702      *
9703      * @param seq the char sequence
9704      * @param beginIndex the index to the first {@code char} of
9705      * the text range.
9706      * @param endIndex the index after the last {@code char} of
9707      * the text range.
9708      * @return the number of Unicode code points in the specified text
9709      * range
9710      * @throws NullPointerException if {@code seq} is null.
9711      * @throws IndexOutOfBoundsException if the
9712      * {@code beginIndex} is negative, or {@code endIndex}
9713      * is larger than the length of the given sequence, or
9714      * {@code beginIndex} is larger than {@code endIndex}.
9715      * @since  1.5
9716      */
codePointCount(CharSequence seq, int beginIndex, int endIndex)9717     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
9718         Objects.checkFromToIndex(beginIndex, endIndex, seq.length());
9719         int n = endIndex - beginIndex;
9720         for (int i = beginIndex; i < endIndex; ) {
9721             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
9722                 isLowSurrogate(seq.charAt(i))) {
9723                 n--;
9724                 i++;
9725             }
9726         }
9727         return n;
9728     }
9729 
9730     /**
9731      * Returns the number of Unicode code points in a subarray of the
9732      * {@code char} array argument. The {@code offset}
9733      * argument is the index of the first {@code char} of the
9734      * subarray and the {@code count} argument specifies the
9735      * length of the subarray in {@code char}s. Unpaired
9736      * surrogates within the subarray count as one code point each.
9737      *
9738      * @param a the {@code char} array
9739      * @param offset the index of the first {@code char} in the
9740      * given {@code char} array
9741      * @param count the length of the subarray in {@code char}s
9742      * @return the number of Unicode code points in the specified subarray
9743      * @throws NullPointerException if {@code a} is null.
9744      * @throws IndexOutOfBoundsException if {@code offset} or
9745      * {@code count} is negative, or if {@code offset +
9746      * count} is larger than the length of the given array.
9747      * @since  1.5
9748      */
codePointCount(char[] a, int offset, int count)9749     public static int codePointCount(char[] a, int offset, int count) {
9750         Objects.checkFromIndexSize(offset, count, a.length);
9751         return codePointCountImpl(a, offset, count);
9752     }
9753 
codePointCountImpl(char[] a, int offset, int count)9754     static int codePointCountImpl(char[] a, int offset, int count) {
9755         int endIndex = offset + count;
9756         int n = count;
9757         for (int i = offset; i < endIndex; ) {
9758             if (isHighSurrogate(a[i++]) && i < endIndex &&
9759                 isLowSurrogate(a[i])) {
9760                 n--;
9761                 i++;
9762             }
9763         }
9764         return n;
9765     }
9766 
9767     /**
9768      * Returns the index within the given char sequence that is offset
9769      * from the given {@code index} by {@code codePointOffset}
9770      * code points. Unpaired surrogates within the text range given by
9771      * {@code index} and {@code codePointOffset} count as
9772      * one code point each.
9773      *
9774      * @param seq the char sequence
9775      * @param index the index to be offset
9776      * @param codePointOffset the offset in code points
9777      * @return the index within the char sequence
9778      * @throws NullPointerException if {@code seq} is null.
9779      * @throws IndexOutOfBoundsException if {@code index}
9780      *   is negative or larger than the length of the char sequence,
9781      *   or if {@code codePointOffset} is positive and the
9782      *   subsequence starting with {@code index} has fewer than
9783      *   {@code codePointOffset} code points, or if
9784      *   {@code codePointOffset} is negative and the subsequence
9785      *   before {@code index} has fewer than the absolute value
9786      *   of {@code codePointOffset} code points.
9787      * @since 1.5
9788      */
offsetByCodePoints(CharSequence seq, int index, int codePointOffset)9789     public static int offsetByCodePoints(CharSequence seq, int index,
9790                                          int codePointOffset) {
9791         int length = seq.length();
9792         if (index < 0 || index > length) {
9793             throw new IndexOutOfBoundsException();
9794         }
9795 
9796         int x = index;
9797         if (codePointOffset >= 0) {
9798             int i;
9799             for (i = 0; x < length && i < codePointOffset; i++) {
9800                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
9801                     isLowSurrogate(seq.charAt(x))) {
9802                     x++;
9803                 }
9804             }
9805             if (i < codePointOffset) {
9806                 throw new IndexOutOfBoundsException();
9807             }
9808         } else {
9809             int i;
9810             for (i = codePointOffset; x > 0 && i < 0; i++) {
9811                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
9812                     isHighSurrogate(seq.charAt(x-1))) {
9813                     x--;
9814                 }
9815             }
9816             if (i < 0) {
9817                 throw new IndexOutOfBoundsException();
9818             }
9819         }
9820         return x;
9821     }
9822 
9823     /**
9824      * Returns the index within the given {@code char} subarray
9825      * that is offset from the given {@code index} by
9826      * {@code codePointOffset} code points. The
9827      * {@code start} and {@code count} arguments specify a
9828      * subarray of the {@code char} array. Unpaired surrogates
9829      * within the text range given by {@code index} and
9830      * {@code codePointOffset} count as one code point each.
9831      *
9832      * @param a the {@code char} array
9833      * @param start the index of the first {@code char} of the
9834      * subarray
9835      * @param count the length of the subarray in {@code char}s
9836      * @param index the index to be offset
9837      * @param codePointOffset the offset in code points
9838      * @return the index within the subarray
9839      * @throws NullPointerException if {@code a} is null.
9840      * @throws IndexOutOfBoundsException
9841      *   if {@code start} or {@code count} is negative,
9842      *   or if {@code start + count} is larger than the length of
9843      *   the given array,
9844      *   or if {@code index} is less than {@code start} or
9845      *   larger then {@code start + count},
9846      *   or if {@code codePointOffset} is positive and the text range
9847      *   starting with {@code index} and ending with {@code start + count - 1}
9848      *   has fewer than {@code codePointOffset} code
9849      *   points,
9850      *   or if {@code codePointOffset} is negative and the text range
9851      *   starting with {@code start} and ending with {@code index - 1}
9852      *   has fewer than the absolute value of
9853      *   {@code codePointOffset} code points.
9854      * @since 1.5
9855      */
offsetByCodePoints(char[] a, int start, int count, int index, int codePointOffset)9856     public static int offsetByCodePoints(char[] a, int start, int count,
9857                                          int index, int codePointOffset) {
9858         if (count > a.length-start || start < 0 || count < 0
9859             || index < start || index > start+count) {
9860             throw new IndexOutOfBoundsException();
9861         }
9862         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
9863     }
9864 
offsetByCodePointsImpl(char[]a, int start, int count, int index, int codePointOffset)9865     static int offsetByCodePointsImpl(char[]a, int start, int count,
9866                                       int index, int codePointOffset) {
9867         int x = index;
9868         if (codePointOffset >= 0) {
9869             int limit = start + count;
9870             int i;
9871             for (i = 0; x < limit && i < codePointOffset; i++) {
9872                 if (isHighSurrogate(a[x++]) && x < limit &&
9873                     isLowSurrogate(a[x])) {
9874                     x++;
9875                 }
9876             }
9877             if (i < codePointOffset) {
9878                 throw new IndexOutOfBoundsException();
9879             }
9880         } else {
9881             int i;
9882             for (i = codePointOffset; x > start && i < 0; i++) {
9883                 if (isLowSurrogate(a[--x]) && x > start &&
9884                     isHighSurrogate(a[x-1])) {
9885                     x--;
9886                 }
9887             }
9888             if (i < 0) {
9889                 throw new IndexOutOfBoundsException();
9890             }
9891         }
9892         return x;
9893     }
9894 
9895     /**
9896      * Determines if the specified character is a lowercase character.
9897      * <p>
9898      * A character is lowercase if its general category type, provided
9899      * by {@code Character.getType(ch)}, is
9900      * {@code LOWERCASE_LETTER}, or it has contributory property
9901      * Other_Lowercase as defined by the Unicode Standard.
9902      * <p>
9903      * The following are examples of lowercase characters:
9904      * <blockquote><pre>
9905      * a b c d e f g h i j k l m n o p q r s t u v w x y z
9906      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
9907      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
9908      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
9909      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
9910      * </pre></blockquote>
9911      * <p> Many other Unicode characters are lowercase too.
9912      *
9913      * <p><b>Note:</b> This method cannot handle <a
9914      * href="#supplementary"> supplementary characters</a>. To support
9915      * all Unicode characters, including supplementary characters, use
9916      * the {@link #isLowerCase(int)} method.
9917      *
9918      * @param   ch   the character to be tested.
9919      * @return  {@code true} if the character is lowercase;
9920      *          {@code false} otherwise.
9921      * @see     Character#isLowerCase(char)
9922      * @see     Character#isTitleCase(char)
9923      * @see     Character#toLowerCase(char)
9924      * @see     Character#getType(char)
9925      */
isLowerCase(char ch)9926     public static boolean isLowerCase(char ch) {
9927         return isLowerCase((int)ch);
9928     }
9929 
9930     /**
9931      * Determines if the specified character (Unicode code point) is a
9932      * lowercase character.
9933      * <p>
9934      * A character is lowercase if its general category type, provided
9935      * by {@link Character#getType getType(codePoint)}, is
9936      * {@code LOWERCASE_LETTER}, or it has contributory property
9937      * Other_Lowercase as defined by the Unicode Standard.
9938      * <p>
9939      * The following are examples of lowercase characters:
9940      * <blockquote><pre>
9941      * a b c d e f g h i j k l m n o p q r s t u v w x y z
9942      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
9943      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
9944      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
9945      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
9946      * </pre></blockquote>
9947      * <p> Many other Unicode characters are lowercase too.
9948      *
9949      * @param   codePoint the character (Unicode code point) to be tested.
9950      * @return  {@code true} if the character is lowercase;
9951      *          {@code false} otherwise.
9952      * @see     Character#isLowerCase(int)
9953      * @see     Character#isTitleCase(int)
9954      * @see     Character#toLowerCase(int)
9955      * @see     Character#getType(int)
9956      * @since   1.5
9957      */
9958     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
9959     /*
9960     public static boolean isLowerCase(int codePoint) {
9961         return CharacterData.of(codePoint).isLowerCase(codePoint);
9962     }
9963     */
isLowerCase(int codePoint)9964     public static boolean isLowerCase(int codePoint) {
9965         return isLowerCaseImpl(codePoint);
9966     }
9967 
9968     @FastNative
isLowerCaseImpl(int codePoint)9969     static native boolean isLowerCaseImpl(int codePoint);
9970     // END Android-changed: Reimplement methods natively on top of ICU4C.
9971 
9972     /**
9973      * Determines if the specified character is an uppercase character.
9974      * <p>
9975      * A character is uppercase if its general category type, provided by
9976      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
9977      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
9978      * <p>
9979      * The following are examples of uppercase characters:
9980      * <blockquote><pre>
9981      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
9982      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
9983      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
9984      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
9985      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
9986      * </pre></blockquote>
9987      * <p> Many other Unicode characters are uppercase too.
9988      *
9989      * <p><b>Note:</b> This method cannot handle <a
9990      * href="#supplementary"> supplementary characters</a>. To support
9991      * all Unicode characters, including supplementary characters, use
9992      * the {@link #isUpperCase(int)} method.
9993      *
9994      * @param   ch   the character to be tested.
9995      * @return  {@code true} if the character is uppercase;
9996      *          {@code false} otherwise.
9997      * @see     Character#isLowerCase(char)
9998      * @see     Character#isTitleCase(char)
9999      * @see     Character#toUpperCase(char)
10000      * @see     Character#getType(char)
10001      * @since   1.0
10002      */
isUpperCase(char ch)10003     public static boolean isUpperCase(char ch) {
10004         return isUpperCase((int)ch);
10005     }
10006 
10007     /**
10008      * Determines if the specified character (Unicode code point) is an uppercase character.
10009      * <p>
10010      * A character is uppercase if its general category type, provided by
10011      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
10012      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
10013      * <p>
10014      * The following are examples of uppercase characters:
10015      * <blockquote><pre>
10016      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
10017      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
10018      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
10019      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
10020      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
10021      * </pre></blockquote>
10022      * <p> Many other Unicode characters are uppercase too.
10023      *
10024      * @param   codePoint the character (Unicode code point) to be tested.
10025      * @return  {@code true} if the character is uppercase;
10026      *          {@code false} otherwise.
10027      * @see     Character#isLowerCase(int)
10028      * @see     Character#isTitleCase(int)
10029      * @see     Character#toUpperCase(int)
10030      * @see     Character#getType(int)
10031      * @since   1.5
10032      */
10033     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10034     /*
10035     public static boolean isUpperCase(int codePoint) {
10036         return CharacterData.of(codePoint).isUpperCase(codePoint);
10037     }
10038     */
isUpperCase(int codePoint)10039     public static boolean isUpperCase(int codePoint) {
10040         return isUpperCaseImpl(codePoint);
10041     }
10042 
10043     @FastNative
isUpperCaseImpl(int codePoint)10044     static native boolean isUpperCaseImpl(int codePoint);
10045     // END Android-changed: Reimplement methods natively on top of ICU4C.
10046 
10047     /**
10048      * Determines if the specified character is a titlecase character.
10049      * <p>
10050      * A character is a titlecase character if its general
10051      * category type, provided by {@code Character.getType(ch)},
10052      * is {@code TITLECASE_LETTER}.
10053      * <p>
10054      * Some characters look like pairs of Latin letters. For example, there
10055      * is an uppercase letter that looks like "LJ" and has a corresponding
10056      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
10057      * is the appropriate form to use when rendering a word in lowercase
10058      * with initial capitals, as for a book title.
10059      * <p>
10060      * These are some of the Unicode characters for which this method returns
10061      * {@code true}:
10062      * <ul>
10063      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
10064      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
10065      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
10066      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
10067      * </ul>
10068      * <p> Many other Unicode characters are titlecase too.
10069      *
10070      * <p><b>Note:</b> This method cannot handle <a
10071      * href="#supplementary"> supplementary characters</a>. To support
10072      * all Unicode characters, including supplementary characters, use
10073      * the {@link #isTitleCase(int)} method.
10074      *
10075      * @param   ch   the character to be tested.
10076      * @return  {@code true} if the character is titlecase;
10077      *          {@code false} otherwise.
10078      * @see     Character#isLowerCase(char)
10079      * @see     Character#isUpperCase(char)
10080      * @see     Character#toTitleCase(char)
10081      * @see     Character#getType(char)
10082      * @since   1.0.2
10083      */
isTitleCase(char ch)10084     public static boolean isTitleCase(char ch) {
10085         return isTitleCase((int)ch);
10086     }
10087 
10088     /**
10089      * Determines if the specified character (Unicode code point) is a titlecase character.
10090      * <p>
10091      * A character is a titlecase character if its general
10092      * category type, provided by {@link Character#getType(int) getType(codePoint)},
10093      * is {@code TITLECASE_LETTER}.
10094      * <p>
10095      * Some characters look like pairs of Latin letters. For example, there
10096      * is an uppercase letter that looks like "LJ" and has a corresponding
10097      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
10098      * is the appropriate form to use when rendering a word in lowercase
10099      * with initial capitals, as for a book title.
10100      * <p>
10101      * These are some of the Unicode characters for which this method returns
10102      * {@code true}:
10103      * <ul>
10104      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
10105      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
10106      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
10107      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
10108      * </ul>
10109      * <p> Many other Unicode characters are titlecase too.
10110      *
10111      * @param   codePoint the character (Unicode code point) to be tested.
10112      * @return  {@code true} if the character is titlecase;
10113      *          {@code false} otherwise.
10114      * @see     Character#isLowerCase(int)
10115      * @see     Character#isUpperCase(int)
10116      * @see     Character#toTitleCase(int)
10117      * @see     Character#getType(int)
10118      * @since   1.5
10119      */
10120     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10121     /*
10122     public static boolean isTitleCase(int codePoint) {
10123         return getType(codePoint) == Character.TITLECASE_LETTER;
10124     }
10125     */
isTitleCase(int codePoint)10126     public static boolean isTitleCase(int codePoint) {
10127         return isTitleCaseImpl(codePoint);
10128     }
10129 
10130     @FastNative
isTitleCaseImpl(int codePoint)10131     static native boolean isTitleCaseImpl(int codePoint);
10132     // END Android-changed: Reimplement methods natively on top of ICU4C.
10133 
10134     /**
10135      * Determines if the specified character is a digit.
10136      * <p>
10137      * A character is a digit if its general category type, provided
10138      * by {@code Character.getType(ch)}, is
10139      * {@code DECIMAL_DIGIT_NUMBER}.
10140      * <p>
10141      * Some Unicode character ranges that contain digits:
10142      * <ul>
10143      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
10144      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
10145      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
10146      *     Arabic-Indic digits
10147      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
10148      *     Extended Arabic-Indic digits
10149      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
10150      *     Devanagari digits
10151      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
10152      *     Fullwidth digits
10153      * </ul>
10154      *
10155      * Many other character ranges contain digits as well.
10156      *
10157      * <p><b>Note:</b> This method cannot handle <a
10158      * href="#supplementary"> supplementary characters</a>. To support
10159      * all Unicode characters, including supplementary characters, use
10160      * the {@link #isDigit(int)} method.
10161      *
10162      * @param   ch   the character to be tested.
10163      * @return  {@code true} if the character is a digit;
10164      *          {@code false} otherwise.
10165      * @see     Character#digit(char, int)
10166      * @see     Character#forDigit(int, int)
10167      * @see     Character#getType(char)
10168      */
isDigit(char ch)10169     public static boolean isDigit(char ch) {
10170         return isDigit((int)ch);
10171     }
10172 
10173     /**
10174      * Determines if the specified character (Unicode code point) is a digit.
10175      * <p>
10176      * A character is a digit if its general category type, provided
10177      * by {@link Character#getType(int) getType(codePoint)}, is
10178      * {@code DECIMAL_DIGIT_NUMBER}.
10179      * <p>
10180      * Some Unicode character ranges that contain digits:
10181      * <ul>
10182      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
10183      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
10184      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
10185      *     Arabic-Indic digits
10186      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
10187      *     Extended Arabic-Indic digits
10188      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
10189      *     Devanagari digits
10190      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
10191      *     Fullwidth digits
10192      * </ul>
10193      *
10194      * Many other character ranges contain digits as well.
10195      *
10196      * @param   codePoint the character (Unicode code point) to be tested.
10197      * @return  {@code true} if the character is a digit;
10198      *          {@code false} otherwise.
10199      * @see     Character#forDigit(int, int)
10200      * @see     Character#getType(int)
10201      * @since   1.5
10202      */
10203     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10204     /*
10205     public static boolean isDigit(int codePoint) {
10206         return CharacterData.of(codePoint).isDigit(codePoint);
10207     }
10208     */
isDigit(int codePoint)10209     public static boolean isDigit(int codePoint) {
10210         return isDigitImpl(codePoint);
10211     }
10212 
10213     @FastNative
isDigitImpl(int codePoint)10214     static native boolean isDigitImpl(int codePoint);
10215     // END Android-changed: Reimplement methods natively on top of ICU4C.
10216 
10217     /**
10218      * Determines if a character is defined in Unicode.
10219      * <p>
10220      * A character is defined if at least one of the following is true:
10221      * <ul>
10222      * <li>It has an entry in the UnicodeData file.
10223      * <li>It has a value in a range defined by the UnicodeData file.
10224      * </ul>
10225      *
10226      * <p><b>Note:</b> This method cannot handle <a
10227      * href="#supplementary"> supplementary characters</a>. To support
10228      * all Unicode characters, including supplementary characters, use
10229      * the {@link #isDefined(int)} method.
10230      *
10231      * @param   ch   the character to be tested
10232      * @return  {@code true} if the character has a defined meaning
10233      *          in Unicode; {@code false} otherwise.
10234      * @see     Character#isDigit(char)
10235      * @see     Character#isLetter(char)
10236      * @see     Character#isLetterOrDigit(char)
10237      * @see     Character#isLowerCase(char)
10238      * @see     Character#isTitleCase(char)
10239      * @see     Character#isUpperCase(char)
10240      * @since   1.0.2
10241      */
isDefined(char ch)10242     public static boolean isDefined(char ch) {
10243         return isDefined((int)ch);
10244     }
10245 
10246     /**
10247      * Determines if a character (Unicode code point) is defined in Unicode.
10248      * <p>
10249      * A character is defined if at least one of the following is true:
10250      * <ul>
10251      * <li>It has an entry in the UnicodeData file.
10252      * <li>It has a value in a range defined by the UnicodeData file.
10253      * </ul>
10254      *
10255      * @param   codePoint the character (Unicode code point) to be tested.
10256      * @return  {@code true} if the character has a defined meaning
10257      *          in Unicode; {@code false} otherwise.
10258      * @see     Character#isDigit(int)
10259      * @see     Character#isLetter(int)
10260      * @see     Character#isLetterOrDigit(int)
10261      * @see     Character#isLowerCase(int)
10262      * @see     Character#isTitleCase(int)
10263      * @see     Character#isUpperCase(int)
10264      * @since   1.5
10265      */
10266     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10267     /*
10268     public static boolean isDefined(int codePoint) {
10269         return getType(codePoint) != Character.UNASSIGNED;
10270     }
10271     */
isDefined(int codePoint)10272     public static boolean isDefined(int codePoint) {
10273         return isDefinedImpl(codePoint);
10274     }
10275 
10276     @FastNative
isDefinedImpl(int codePoint)10277     static native boolean isDefinedImpl(int codePoint);
10278     // END Android-changed: Reimplement methods natively on top of ICU4C.
10279 
10280     /**
10281      * Determines if the specified character is a letter.
10282      * <p>
10283      * A character is considered to be a letter if its general
10284      * category type, provided by {@code Character.getType(ch)},
10285      * is any of the following:
10286      * <ul>
10287      * <li> {@code UPPERCASE_LETTER}
10288      * <li> {@code LOWERCASE_LETTER}
10289      * <li> {@code TITLECASE_LETTER}
10290      * <li> {@code MODIFIER_LETTER}
10291      * <li> {@code OTHER_LETTER}
10292      * </ul>
10293      *
10294      * Not all letters have case. Many characters are
10295      * letters but are neither uppercase nor lowercase nor titlecase.
10296      *
10297      * <p><b>Note:</b> This method cannot handle <a
10298      * href="#supplementary"> supplementary characters</a>. To support
10299      * all Unicode characters, including supplementary characters, use
10300      * the {@link #isLetter(int)} method.
10301      *
10302      * @param   ch   the character to be tested.
10303      * @return  {@code true} if the character is a letter;
10304      *          {@code false} otherwise.
10305      * @see     Character#isDigit(char)
10306      * @see     Character#isJavaIdentifierStart(char)
10307      * @see     Character#isJavaLetter(char)
10308      * @see     Character#isJavaLetterOrDigit(char)
10309      * @see     Character#isLetterOrDigit(char)
10310      * @see     Character#isLowerCase(char)
10311      * @see     Character#isTitleCase(char)
10312      * @see     Character#isUnicodeIdentifierStart(char)
10313      * @see     Character#isUpperCase(char)
10314      */
isLetter(char ch)10315     public static boolean isLetter(char ch) {
10316         return isLetter((int)ch);
10317     }
10318 
10319     /**
10320      * Determines if the specified character (Unicode code point) is a letter.
10321      * <p>
10322      * A character is considered to be a letter if its general
10323      * category type, provided by {@link Character#getType(int) getType(codePoint)},
10324      * is any of the following:
10325      * <ul>
10326      * <li> {@code UPPERCASE_LETTER}
10327      * <li> {@code LOWERCASE_LETTER}
10328      * <li> {@code TITLECASE_LETTER}
10329      * <li> {@code MODIFIER_LETTER}
10330      * <li> {@code OTHER_LETTER}
10331      * </ul>
10332      *
10333      * Not all letters have case. Many characters are
10334      * letters but are neither uppercase nor lowercase nor titlecase.
10335      *
10336      * @param   codePoint the character (Unicode code point) to be tested.
10337      * @return  {@code true} if the character is a letter;
10338      *          {@code false} otherwise.
10339      * @see     Character#isDigit(int)
10340      * @see     Character#isJavaIdentifierStart(int)
10341      * @see     Character#isLetterOrDigit(int)
10342      * @see     Character#isLowerCase(int)
10343      * @see     Character#isTitleCase(int)
10344      * @see     Character#isUnicodeIdentifierStart(int)
10345      * @see     Character#isUpperCase(int)
10346      * @since   1.5
10347      */
10348     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10349     /*
10350     public static boolean isLetter(int codePoint) {
10351         return ((((1 << Character.UPPERCASE_LETTER) |
10352             (1 << Character.LOWERCASE_LETTER) |
10353             (1 << Character.TITLECASE_LETTER) |
10354             (1 << Character.MODIFIER_LETTER) |
10355             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
10356             != 0;
10357     }
10358     */
isLetter(int codePoint)10359     public static boolean isLetter(int codePoint) {
10360         return isLetterImpl(codePoint);
10361     }
10362 
10363     @FastNative
isLetterImpl(int codePoint)10364     static native boolean isLetterImpl(int codePoint);
10365     // END Android-changed: Reimplement methods natively on top of ICU4C.
10366 
10367     /**
10368      * Determines if the specified character is a letter or digit.
10369      * <p>
10370      * A character is considered to be a letter or digit if either
10371      * {@code Character.isLetter(char ch)} or
10372      * {@code Character.isDigit(char ch)} returns
10373      * {@code true} for the character.
10374      *
10375      * <p><b>Note:</b> This method cannot handle <a
10376      * href="#supplementary"> supplementary characters</a>. To support
10377      * all Unicode characters, including supplementary characters, use
10378      * the {@link #isLetterOrDigit(int)} method.
10379      *
10380      * @param   ch   the character to be tested.
10381      * @return  {@code true} if the character is a letter or digit;
10382      *          {@code false} otherwise.
10383      * @see     Character#isDigit(char)
10384      * @see     Character#isJavaIdentifierPart(char)
10385      * @see     Character#isJavaLetter(char)
10386      * @see     Character#isJavaLetterOrDigit(char)
10387      * @see     Character#isLetter(char)
10388      * @see     Character#isUnicodeIdentifierPart(char)
10389      * @since   1.0.2
10390      */
isLetterOrDigit(char ch)10391     public static boolean isLetterOrDigit(char ch) {
10392         return isLetterOrDigit((int)ch);
10393     }
10394 
10395     /**
10396      * Determines if the specified character (Unicode code point) is a letter or digit.
10397      * <p>
10398      * A character is considered to be a letter or digit if either
10399      * {@link #isLetter(int) isLetter(codePoint)} or
10400      * {@link #isDigit(int) isDigit(codePoint)} returns
10401      * {@code true} for the character.
10402      *
10403      * @param   codePoint the character (Unicode code point) to be tested.
10404      * @return  {@code true} if the character is a letter or digit;
10405      *          {@code false} otherwise.
10406      * @see     Character#isDigit(int)
10407      * @see     Character#isJavaIdentifierPart(int)
10408      * @see     Character#isLetter(int)
10409      * @see     Character#isUnicodeIdentifierPart(int)
10410      * @since   1.5
10411      */
10412     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10413     /*
10414     public static boolean isLetterOrDigit(int codePoint) {
10415         return ((((1 << Character.UPPERCASE_LETTER) |
10416             (1 << Character.LOWERCASE_LETTER) |
10417             (1 << Character.TITLECASE_LETTER) |
10418             (1 << Character.MODIFIER_LETTER) |
10419             (1 << Character.OTHER_LETTER) |
10420             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
10421             != 0;
10422     }
10423     */
isLetterOrDigit(int codePoint)10424     public static boolean isLetterOrDigit(int codePoint) {
10425         return isLetterOrDigitImpl(codePoint);
10426     }
10427 
10428     @FastNative
isLetterOrDigitImpl(int codePoint)10429     static native boolean isLetterOrDigitImpl(int codePoint);
10430     // END Android-changed: Reimplement methods natively on top of ICU4C.
10431 
10432     /**
10433      * Determines if the specified character is permissible as the first
10434      * character in a Java identifier.
10435      * <p>
10436      * A character may start a Java identifier if and only if
10437      * one of the following conditions is true:
10438      * <ul>
10439      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
10440      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
10441      * <li> {@code ch} is a currency symbol (such as {@code '$'})
10442      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
10443      * </ul>
10444      *
10445      * @param   ch the character to be tested.
10446      * @return  {@code true} if the character may start a Java
10447      *          identifier; {@code false} otherwise.
10448      * @see     Character#isJavaLetterOrDigit(char)
10449      * @see     Character#isJavaIdentifierStart(char)
10450      * @see     Character#isJavaIdentifierPart(char)
10451      * @see     Character#isLetter(char)
10452      * @see     Character#isLetterOrDigit(char)
10453      * @see     Character#isUnicodeIdentifierStart(char)
10454      * @since   1.0.2
10455      * @deprecated Replaced by isJavaIdentifierStart(char).
10456      */
10457     @Deprecated(since="1.1")
isJavaLetter(char ch)10458     public static boolean isJavaLetter(char ch) {
10459         return isJavaIdentifierStart(ch);
10460     }
10461 
10462     /**
10463      * Determines if the specified character may be part of a Java
10464      * identifier as other than the first character.
10465      * <p>
10466      * A character may be part of a Java identifier if and only if one
10467      * of the following conditions is true:
10468      * <ul>
10469      * <li>  it is a letter
10470      * <li>  it is a currency symbol (such as {@code '$'})
10471      * <li>  it is a connecting punctuation character (such as {@code '_'})
10472      * <li>  it is a digit
10473      * <li>  it is a numeric letter (such as a Roman numeral character)
10474      * <li>  it is a combining mark
10475      * <li>  it is a non-spacing mark
10476      * <li> {@code isIdentifierIgnorable} returns
10477      * {@code true} for the character.
10478      * </ul>
10479      *
10480      * @param   ch the character to be tested.
10481      * @return  {@code true} if the character may be part of a
10482      *          Java identifier; {@code false} otherwise.
10483      * @see     Character#isJavaLetter(char)
10484      * @see     Character#isJavaIdentifierStart(char)
10485      * @see     Character#isJavaIdentifierPart(char)
10486      * @see     Character#isLetter(char)
10487      * @see     Character#isLetterOrDigit(char)
10488      * @see     Character#isUnicodeIdentifierPart(char)
10489      * @see     Character#isIdentifierIgnorable(char)
10490      * @since   1.0.2
10491      * @deprecated Replaced by isJavaIdentifierPart(char).
10492      */
10493     @Deprecated(since="1.1")
isJavaLetterOrDigit(char ch)10494     public static boolean isJavaLetterOrDigit(char ch) {
10495         return isJavaIdentifierPart(ch);
10496     }
10497 
10498     /**
10499      * Determines if the specified character (Unicode code point) is alphabetic.
10500      * <p>
10501      * A character is considered to be alphabetic if its general category type,
10502      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
10503      * the following:
10504      * <ul>
10505      * <li> {@code UPPERCASE_LETTER}
10506      * <li> {@code LOWERCASE_LETTER}
10507      * <li> {@code TITLECASE_LETTER}
10508      * <li> {@code MODIFIER_LETTER}
10509      * <li> {@code OTHER_LETTER}
10510      * <li> {@code LETTER_NUMBER}
10511      * </ul>
10512      * or it has contributory property Other_Alphabetic as defined by the
10513      * Unicode Standard.
10514      *
10515      * @param   codePoint the character (Unicode code point) to be tested.
10516      * @return  {@code true} if the character is a Unicode alphabet
10517      *          character, {@code false} otherwise.
10518      * @since   1.7
10519      */
10520     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10521     /*
10522     public static boolean isAlphabetic(int codePoint) {
10523         return (((((1 << Character.UPPERCASE_LETTER) |
10524             (1 << Character.LOWERCASE_LETTER) |
10525             (1 << Character.TITLECASE_LETTER) |
10526             (1 << Character.MODIFIER_LETTER) |
10527             (1 << Character.OTHER_LETTER) |
10528             (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
10529             CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
10530     }
10531     */
isAlphabetic(int codePoint)10532     public static boolean isAlphabetic(int codePoint) {
10533         return isAlphabeticImpl(codePoint);
10534     }
10535 
10536     @FastNative
isAlphabeticImpl(int codePoint)10537     static native boolean isAlphabeticImpl(int codePoint);
10538     // END Android-changed: Reimplement methods natively on top of ICU4C.
10539 
10540     /**
10541      * Determines if the specified character (Unicode code point) is a CJKV
10542      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
10543      * the Unicode Standard.
10544      *
10545      * @param   codePoint the character (Unicode code point) to be tested.
10546      * @return  {@code true} if the character is a Unicode ideograph
10547      *          character, {@code false} otherwise.
10548      * @since   1.7
10549      */
10550     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10551     /*
10552     public static boolean isIdeographic(int codePoint) {
10553         return CharacterData.of(codePoint).isIdeographic(codePoint);
10554     }
10555     */
isIdeographic(int codePoint)10556     public static boolean isIdeographic(int codePoint) {
10557         return isIdeographicImpl(codePoint);
10558     }
10559     @FastNative
isIdeographicImpl(int codePoint)10560     static native boolean isIdeographicImpl(int codePoint);
10561     // END Android-changed: Reimplement methods natively on top of ICU4C.
10562 
10563     // Android-changed: Removed @see tag (target does not exist on Android):
10564     // @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10565     /**
10566      * Determines if the specified character is
10567      * permissible as the first character in a Java identifier.
10568      * <p>
10569      * A character may start a Java identifier if and only if
10570      * one of the following conditions is true:
10571      * <ul>
10572      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
10573      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
10574      * <li> {@code ch} is a currency symbol (such as {@code '$'})
10575      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
10576      * </ul>
10577      *
10578      * <p><b>Note:</b> This method cannot handle <a
10579      * href="#supplementary"> supplementary characters</a>. To support
10580      * all Unicode characters, including supplementary characters, use
10581      * the {@link #isJavaIdentifierStart(int)} method.
10582      *
10583      * @param   ch the character to be tested.
10584      * @return  {@code true} if the character may start a Java identifier;
10585      *          {@code false} otherwise.
10586      * @see     Character#isJavaIdentifierPart(char)
10587      * @see     Character#isLetter(char)
10588      * @see     Character#isUnicodeIdentifierStart(char)
10589      * @since   1.1
10590      */
10591     @SuppressWarnings("doclint:reference") // cross-module links
isJavaIdentifierStart(char ch)10592     public static boolean isJavaIdentifierStart(char ch) {
10593         return isJavaIdentifierStart((int)ch);
10594     }
10595 
10596     // Android-changed: Removed @see tag (target does not exist on Android):
10597     // @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10598     /**
10599      * Determines if the character (Unicode code point) is
10600      * permissible as the first character in a Java identifier.
10601      * <p>
10602      * A character may start a Java identifier if and only if
10603      * one of the following conditions is true:
10604      * <ul>
10605      * <li> {@link #isLetter(int) isLetter(codePoint)}
10606      *      returns {@code true}
10607      * <li> {@link #getType(int) getType(codePoint)}
10608      *      returns {@code LETTER_NUMBER}
10609      * <li> the referenced character is a currency symbol (such as {@code '$'})
10610      * <li> the referenced character is a connecting punctuation character
10611      *      (such as {@code '_'}).
10612      * </ul>
10613      *
10614      * @param   codePoint the character (Unicode code point) to be tested.
10615      * @return  {@code true} if the character may start a Java identifier;
10616      *          {@code false} otherwise.
10617      * @see     Character#isJavaIdentifierPart(int)
10618      * @see     Character#isLetter(int)
10619      * @see     Character#isUnicodeIdentifierStart(int)
10620      * @since   1.5
10621      */
10622     // BEGIN Android-changed: Use ICU.
10623     /*
10624     public static boolean isJavaIdentifierStart(int codePoint) {
10625         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
10626     }
10627     */
10628     @SuppressWarnings("doclint:reference") // cross-module links
isJavaIdentifierStart(int codePoint)10629     public static boolean isJavaIdentifierStart(int codePoint) {
10630         // Use precomputed bitmasks to optimize the ASCII range.
10631         if (codePoint < 64) {
10632             return (codePoint == '$'); // There's only one character in this range.
10633         } else if (codePoint < 128) {
10634             return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
10635         }
10636         return ((1 << getType(codePoint))
10637                 & ((1 << UPPERCASE_LETTER)
10638                    | (1 << LOWERCASE_LETTER)
10639                    | (1  << TITLECASE_LETTER)
10640                    | (1  << MODIFIER_LETTER)
10641                    | (1  << OTHER_LETTER)
10642                    | (1  << CURRENCY_SYMBOL)
10643                    | (1  << CONNECTOR_PUNCTUATION)
10644                    | (1  << LETTER_NUMBER))) != 0;
10645     }
10646     // END Android-changed: Use ICU.
10647 
10648     // Android-changed: Removed @see tag (target does not exist on Android):
10649     // @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10650     /**
10651      * Determines if the specified character may be part of a Java
10652      * identifier as other than the first character.
10653      * <p>
10654      * A character may be part of a Java identifier if any of the following
10655      * conditions are true:
10656      * <ul>
10657      * <li>  it is a letter
10658      * <li>  it is a currency symbol (such as {@code '$'})
10659      * <li>  it is a connecting punctuation character (such as {@code '_'})
10660      * <li>  it is a digit
10661      * <li>  it is a numeric letter (such as a Roman numeral character)
10662      * <li>  it is a combining mark
10663      * <li>  it is a non-spacing mark
10664      * <li> {@code isIdentifierIgnorable} returns
10665      * {@code true} for the character
10666      * </ul>
10667      *
10668      * <p><b>Note:</b> This method cannot handle <a
10669      * href="#supplementary"> supplementary characters</a>. To support
10670      * all Unicode characters, including supplementary characters, use
10671      * the {@link #isJavaIdentifierPart(int)} method.
10672      *
10673      * @param   ch      the character to be tested.
10674      * @return {@code true} if the character may be part of a
10675      *          Java identifier; {@code false} otherwise.
10676      * @see     Character#isIdentifierIgnorable(char)
10677      * @see     Character#isJavaIdentifierStart(char)
10678      * @see     Character#isLetterOrDigit(char)
10679      * @see     Character#isUnicodeIdentifierPart(char)
10680      * @since   1.1
10681      */
10682     @SuppressWarnings("doclint:reference") // cross-module links
isJavaIdentifierPart(char ch)10683     public static boolean isJavaIdentifierPart(char ch) {
10684         return isJavaIdentifierPart((int)ch);
10685     }
10686 
10687     // Android-changed: Removed @see tag (target does not exist on Android):
10688     // @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10689     /**
10690      * Determines if the character (Unicode code point) may be part of a Java
10691      * identifier as other than the first character.
10692      * <p>
10693      * A character may be part of a Java identifier if any of the following
10694      * conditions are true:
10695      * <ul>
10696      * <li>  it is a letter
10697      * <li>  it is a currency symbol (such as {@code '$'})
10698      * <li>  it is a connecting punctuation character (such as {@code '_'})
10699      * <li>  it is a digit
10700      * <li>  it is a numeric letter (such as a Roman numeral character)
10701      * <li>  it is a combining mark
10702      * <li>  it is a non-spacing mark
10703      * <li> {@link #isIdentifierIgnorable(int)
10704      * isIdentifierIgnorable(codePoint)} returns {@code true} for
10705      * the code point
10706      * </ul>
10707      *
10708      * @param   codePoint the character (Unicode code point) to be tested.
10709      * @return {@code true} if the character may be part of a
10710      *          Java identifier; {@code false} otherwise.
10711      * @see     Character#isIdentifierIgnorable(int)
10712      * @see     Character#isJavaIdentifierStart(int)
10713      * @see     Character#isLetterOrDigit(int)
10714      * @see     Character#isUnicodeIdentifierPart(int)
10715      * @since   1.5
10716      */
10717     // BEGIN Android-changed: Use ICU.
10718     /*
10719     public static boolean isJavaIdentifierPart(int codePoint) {
10720         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
10721     }
10722     */
10723     @SuppressWarnings("doclint:reference") // cross-module links
isJavaIdentifierPart(int codePoint)10724     public static boolean isJavaIdentifierPart(int codePoint) {
10725         // Use precomputed bitmasks to optimize the ASCII range.
10726         if (codePoint < 64) {
10727             return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
10728         } else if (codePoint < 128) {
10729             return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
10730         }
10731         return ((1 << getType(codePoint))
10732                 & ((1 << UPPERCASE_LETTER)
10733                    | (1 << LOWERCASE_LETTER)
10734                    | (1 << TITLECASE_LETTER)
10735                    | (1 << MODIFIER_LETTER)
10736                    | (1 << OTHER_LETTER)
10737                    | (1 << CURRENCY_SYMBOL)
10738                    | (1 << CONNECTOR_PUNCTUATION)
10739                    | (1 << DECIMAL_DIGIT_NUMBER)
10740                    | (1 << LETTER_NUMBER)
10741                    | (1 << FORMAT)
10742                    | (1 << COMBINING_SPACING_MARK)
10743                    | (1 << NON_SPACING_MARK))) != 0
10744                 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
10745                 || (codePoint >= 0x7f && codePoint <= 0x9f);
10746     }
10747     // END Android-changed: Use ICU.
10748 
10749     /**
10750      * Determines if the specified character is permissible as the
10751      * first character in a Unicode identifier.
10752      * <p>
10753      * A character may start a Unicode identifier if and only if
10754      * one of the following conditions is true:
10755      * <ul>
10756      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
10757      * <li> {@link #getType(char) getType(ch)} returns
10758      *      {@code LETTER_NUMBER}.
10759      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
10760      *      {@code Other_ID_Start}</a> character.
10761      * </ul>
10762      * <p>
10763      * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
10764      * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
10765      * with the following profile of UAX31:
10766      * <pre>
10767      * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
10768      * </pre>
10769      * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
10770      * compatibility.
10771      *
10772      * <p><b>Note:</b> This method cannot handle <a
10773      * href="#supplementary"> supplementary characters</a>. To support
10774      * all Unicode characters, including supplementary characters, use
10775      * the {@link #isUnicodeIdentifierStart(int)} method.
10776      *
10777      * @param   ch      the character to be tested.
10778      * @return  {@code true} if the character may start a Unicode
10779      *          identifier; {@code false} otherwise.
10780      *
10781      * @spec https://www.unicode.org/reports/tr44 Unicode Character Database
10782      * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax
10783      * @see     Character#isJavaIdentifierStart(char)
10784      * @see     Character#isLetter(char)
10785      * @see     Character#isUnicodeIdentifierPart(char)
10786      * @since   1.1
10787      */
isUnicodeIdentifierStart(char ch)10788     public static boolean isUnicodeIdentifierStart(char ch) {
10789         return isUnicodeIdentifierStart((int)ch);
10790     }
10791 
10792     /**
10793      * Determines if the specified character (Unicode code point) is permissible as the
10794      * first character in a Unicode identifier.
10795      * <p>
10796      * A character may start a Unicode identifier if and only if
10797      * one of the following conditions is true:
10798      * <ul>
10799      * <li> {@link #isLetter(int) isLetter(codePoint)}
10800      *      returns {@code true}
10801      * <li> {@link #getType(int) getType(codePoint)}
10802      *      returns {@code LETTER_NUMBER}.
10803      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
10804      *      {@code Other_ID_Start}</a> character.
10805      * </ul>
10806      * <p>
10807      * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
10808      * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
10809      * with the following profile of UAX31:
10810      * <pre>
10811      * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
10812      * </pre>
10813      * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
10814      * compatibility.
10815      *
10816      * @param   codePoint the character (Unicode code point) to be tested.
10817      * @return  {@code true} if the character may start a Unicode
10818      *          identifier; {@code false} otherwise.
10819      *
10820      * @spec https://www.unicode.org/reports/tr44 Unicode Character Database
10821      * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax
10822      * @see     Character#isJavaIdentifierStart(int)
10823      * @see     Character#isLetter(int)
10824      * @see     Character#isUnicodeIdentifierPart(int)
10825      * @since   1.5
10826      */
10827     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10828     /*
10829     public static boolean isUnicodeIdentifierStart(int codePoint) {
10830         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
10831     }
10832     */
isUnicodeIdentifierStart(int codePoint)10833     public static boolean isUnicodeIdentifierStart(int codePoint) {
10834         return isUnicodeIdentifierStartImpl(codePoint);
10835     }
10836 
10837     @FastNative
isUnicodeIdentifierStartImpl(int codePoint)10838     static native boolean isUnicodeIdentifierStartImpl(int codePoint);
10839     // END Android-changed: Reimplement methods natively on top of ICU4C.
10840 
10841     /**
10842      * Determines if the specified character may be part of a Unicode
10843      * identifier as other than the first character.
10844      * <p>
10845      * A character may be part of a Unicode identifier if and only if
10846      * one of the following statements is true:
10847      * <ul>
10848      * <li>  it is a letter
10849      * <li>  it is a connecting punctuation character (such as {@code '_'})
10850      * <li>  it is a digit
10851      * <li>  it is a numeric letter (such as a Roman numeral character)
10852      * <li>  it is a combining mark
10853      * <li>  it is a non-spacing mark
10854      * <li> {@code isIdentifierIgnorable} returns
10855      * {@code true} for this character.
10856      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
10857      *      {@code Other_ID_Start}</a> character.
10858      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
10859      *      {@code Other_ID_Continue}</a> character.
10860      * </ul>
10861      * <p>
10862      * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
10863      * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
10864      * with the following profile of UAX31:
10865      * <pre>
10866      * Continue := Start + ID_Continue + ignorable
10867      * Medial := empty
10868      * ignorable := isIdentifierIgnorable(char) returns true for the character
10869      * </pre>
10870      * {@code ignorable} is added to {@code Continue} for backward
10871      * compatibility.
10872      *
10873      * <p><b>Note:</b> This method cannot handle <a
10874      * href="#supplementary"> supplementary characters</a>. To support
10875      * all Unicode characters, including supplementary characters, use
10876      * the {@link #isUnicodeIdentifierPart(int)} method.
10877      *
10878      * @param   ch      the character to be tested.
10879      * @return  {@code true} if the character may be part of a
10880      *          Unicode identifier; {@code false} otherwise.
10881      *
10882      * @spec https://www.unicode.org/reports/tr44 Unicode Character Database
10883      * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax
10884      * @see     Character#isIdentifierIgnorable(char)
10885      * @see     Character#isJavaIdentifierPart(char)
10886      * @see     Character#isLetterOrDigit(char)
10887      * @see     Character#isUnicodeIdentifierStart(char)
10888      * @since   1.1
10889      */
isUnicodeIdentifierPart(char ch)10890     public static boolean isUnicodeIdentifierPart(char ch) {
10891         return isUnicodeIdentifierPart((int)ch);
10892     }
10893 
10894     /**
10895      * Determines if the specified character (Unicode code point) may be part of a Unicode
10896      * identifier as other than the first character.
10897      * <p>
10898      * A character may be part of a Unicode identifier if and only if
10899      * one of the following statements is true:
10900      * <ul>
10901      * <li>  it is a letter
10902      * <li>  it is a connecting punctuation character (such as {@code '_'})
10903      * <li>  it is a digit
10904      * <li>  it is a numeric letter (such as a Roman numeral character)
10905      * <li>  it is a combining mark
10906      * <li>  it is a non-spacing mark
10907      * <li> {@code isIdentifierIgnorable} returns
10908      * {@code true} for this character.
10909      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
10910      *      {@code Other_ID_Start}</a> character.
10911      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
10912      *      {@code Other_ID_Continue}</a> character.
10913      * </ul>
10914      * <p>
10915      * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
10916      * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
10917      * with the following profile of UAX31:
10918      * <pre>
10919      * Continue := Start + ID_Continue + ignorable
10920      * Medial := empty
10921      * ignorable := isIdentifierIgnorable(int) returns true for the character
10922      * </pre>
10923      * {@code ignorable} is added to {@code Continue} for backward
10924      * compatibility.
10925      *
10926      * @param   codePoint the character (Unicode code point) to be tested.
10927      * @return  {@code true} if the character may be part of a
10928      *          Unicode identifier; {@code false} otherwise.
10929      *
10930      * @spec https://www.unicode.org/reports/tr44 Unicode Character Database
10931      * @spec https://www.unicode.org/reports/tr31 Unicode Identifier and Pattern Syntax
10932      * @see     Character#isIdentifierIgnorable(int)
10933      * @see     Character#isJavaIdentifierPart(int)
10934      * @see     Character#isLetterOrDigit(int)
10935      * @see     Character#isUnicodeIdentifierStart(int)
10936      * @since   1.5
10937      */
10938     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10939     /*
10940     public static boolean isUnicodeIdentifierPart(int codePoint) {
10941         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
10942     }
10943     */
isUnicodeIdentifierPart(int codePoint)10944     public static boolean isUnicodeIdentifierPart(int codePoint) {
10945         return isUnicodeIdentifierPartImpl(codePoint);
10946     }
10947 
10948     @FastNative
isUnicodeIdentifierPartImpl(int codePoint)10949     static native boolean isUnicodeIdentifierPartImpl(int codePoint);
10950     // END Android-changed: Reimplement methods natively on top of ICU4C.
10951 
10952     /**
10953      * Determines if the specified character should be regarded as
10954      * an ignorable character in a Java identifier or a Unicode identifier.
10955      * <p>
10956      * The following Unicode characters are ignorable in a Java identifier
10957      * or a Unicode identifier:
10958      * <ul>
10959      * <li>ISO control characters that are not whitespace
10960      * <ul>
10961      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
10962      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
10963      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
10964      * </ul>
10965      *
10966      * <li>all characters that have the {@code FORMAT} general
10967      * category value
10968      * </ul>
10969      *
10970      * <p><b>Note:</b> This method cannot handle <a
10971      * href="#supplementary"> supplementary characters</a>. To support
10972      * all Unicode characters, including supplementary characters, use
10973      * the {@link #isIdentifierIgnorable(int)} method.
10974      *
10975      * @param   ch      the character to be tested.
10976      * @return  {@code true} if the character is an ignorable control
10977      *          character that may be part of a Java or Unicode identifier;
10978      *           {@code false} otherwise.
10979      * @see     Character#isJavaIdentifierPart(char)
10980      * @see     Character#isUnicodeIdentifierPart(char)
10981      * @since   1.1
10982      */
isIdentifierIgnorable(char ch)10983     public static boolean isIdentifierIgnorable(char ch) {
10984         return isIdentifierIgnorable((int)ch);
10985     }
10986 
10987     /**
10988      * Determines if the specified character (Unicode code point) should be regarded as
10989      * an ignorable character in a Java identifier or a Unicode identifier.
10990      * <p>
10991      * The following Unicode characters are ignorable in a Java identifier
10992      * or a Unicode identifier:
10993      * <ul>
10994      * <li>ISO control characters that are not whitespace
10995      * <ul>
10996      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
10997      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
10998      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
10999      * </ul>
11000      *
11001      * <li>all characters that have the {@code FORMAT} general
11002      * category value
11003      * </ul>
11004      *
11005      * @param   codePoint the character (Unicode code point) to be tested.
11006      * @return  {@code true} if the character is an ignorable control
11007      *          character that may be part of a Java or Unicode identifier;
11008      *          {@code false} otherwise.
11009      * @see     Character#isJavaIdentifierPart(int)
11010      * @see     Character#isUnicodeIdentifierPart(int)
11011      * @since   1.5
11012      */
11013     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
11014     /*
11015     public static boolean isIdentifierIgnorable(int codePoint) {
11016         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
11017     }
11018     */
isIdentifierIgnorable(int codePoint)11019     public static boolean isIdentifierIgnorable(int codePoint) {
11020         return isIdentifierIgnorableImpl(codePoint);
11021     }
11022 
11023     @FastNative
isIdentifierIgnorableImpl(int codePoint)11024     static native boolean isIdentifierIgnorableImpl(int codePoint);
11025     // END Android-changed: Reimplement methods natively on top of ICU4C.
11026 
11027     /**
11028      * Determines if the specified character (Unicode code point) is an Emoji.
11029      * <p>
11030      * A character is considered to be an Emoji if and only if it has the {@code Emoji}
11031      * property, defined in
11032      * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11033      * Unicode Emoji (Technical Standard #51)</a>.
11034      *
11035      * @param   codePoint the character (Unicode code point) to be tested.
11036      * @return  {@code true} if the character is an Emoji;
11037      *          {@code false} otherwise.
11038      * @since   21
11039      */
isEmoji(int codePoint)11040     public static boolean isEmoji(int codePoint) {
11041         // Android-changed: Use ICU.
11042         // return CharacterData.of(codePoint).isEmoji(codePoint);
11043         return ICU.hasBinaryProperty(codePoint, UProperty.EMOJI);
11044     }
11045 
11046     /**
11047      * Determines if the specified character (Unicode code point) has the
11048      * Emoji Presentation property by default.
11049      * <p>
11050      * A character is considered to have the Emoji Presentation property if and
11051      * only if it has the {@code Emoji_Presentation} property, defined in
11052      * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11053      * Unicode Emoji (Technical Standard #51)</a>.
11054      *
11055      * @param   codePoint the character (Unicode code point) to be tested.
11056      * @return  {@code true} if the character has the Emoji Presentation
11057      *          property; {@code false} otherwise.
11058      * @since   21
11059      */
isEmojiPresentation(int codePoint)11060     public static boolean isEmojiPresentation(int codePoint) {
11061         // Android-changed: Use ICU.
11062         // return CharacterData.of(codePoint).isEmojiPresentation(codePoint);
11063         return ICU.hasBinaryProperty(codePoint, UProperty.EMOJI_PRESENTATION);
11064     }
11065 
11066     /**
11067      * Determines if the specified character (Unicode code point) is an
11068      * Emoji Modifier.
11069      * <p>
11070      * A character is considered to be an Emoji Modifier if and only if it has
11071      * the {@code Emoji_Modifier} property, defined in
11072      * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11073      * Unicode Emoji (Technical Standard #51)</a>.
11074      *
11075      * @param   codePoint the character (Unicode code point) to be tested.
11076      * @return  {@code true} if the character is an Emoji Modifier;
11077      *          {@code false} otherwise.
11078      * @since   21
11079      */
isEmojiModifier(int codePoint)11080     public static boolean isEmojiModifier(int codePoint) {
11081         // Android-changed: Use ICU.
11082         // return CharacterData.of(codePoint).isEmojiModifier(codePoint);
11083         return ICU.hasBinaryProperty(codePoint, UProperty.EMOJI_MODIFIER);
11084     }
11085 
11086     /**
11087      * Determines if the specified character (Unicode code point) is an
11088      * Emoji Modifier Base.
11089      * <p>
11090      * A character is considered to be an Emoji Modifier Base if and only if it has
11091      * the {@code Emoji_Modifier_Base} property, defined in
11092      * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11093      * Unicode Emoji (Technical Standard #51)</a>.
11094      *
11095      * @param   codePoint the character (Unicode code point) to be tested.
11096      * @return  {@code true} if the character is an Emoji Modifier Base;
11097      *          {@code false} otherwise.
11098      * @since   21
11099      */
isEmojiModifierBase(int codePoint)11100     public static boolean isEmojiModifierBase(int codePoint) {
11101         // Android-changed: Use ICU.
11102         // return CharacterData.of(codePoint).isEmojiModifierBase(codePoint);
11103         return ICU.hasBinaryProperty(codePoint, UProperty.EMOJI_MODIFIER_BASE);
11104     }
11105 
11106     /**
11107      * Determines if the specified character (Unicode code point) is an
11108      * Emoji Component.
11109      * <p>
11110      * A character is considered to be an Emoji Component if and only if it has
11111      * the {@code Emoji_Component} property, defined in
11112      * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11113      * Unicode Emoji (Technical Standard #51)</a>.
11114      *
11115      * @param   codePoint the character (Unicode code point) to be tested.
11116      * @return  {@code true} if the character is an Emoji Component;
11117      *          {@code false} otherwise.
11118      * @since   21
11119      */
isEmojiComponent(int codePoint)11120     public static boolean isEmojiComponent(int codePoint) {
11121         // Android-changed: Use ICU.
11122         // return CharacterData.of(codePoint).isEmojiComponent(codePoint);
11123         return ICU.hasBinaryProperty(codePoint, UProperty.EMOJI_COMPONENT);
11124     }
11125 
11126     /**
11127      * Determines if the specified character (Unicode code point) is
11128      * an Extended Pictographic.
11129      * <p>
11130      * A character is considered to be an Extended Pictographic if and only if it has
11131      * the {@code Extended_Pictographic} property, defined in
11132      * <a href="https://unicode.org/reports/tr51/#Emoji_Properties_and_Data_Files">
11133      * Unicode Emoji (Technical Standard #51)</a>.
11134      *
11135      * @param   codePoint the character (Unicode code point) to be tested.
11136      * @return  {@code true} if the character is an Extended Pictographic;
11137      *          {@code false} otherwise.
11138      * @since   21
11139      */
isExtendedPictographic(int codePoint)11140     public static boolean isExtendedPictographic(int codePoint) {
11141         // Android-changed: Use ICU.
11142         // return CharacterData.of(codePoint).isExtendedPictographic(codePoint);
11143         return ICU.hasBinaryProperty(codePoint, UProperty.EXTENDED_PICTOGRAPHIC);
11144     }
11145 
11146     /**
11147      * Converts the character argument to lowercase using case
11148      * mapping information from the UnicodeData file.
11149      * <p>
11150      * Note that
11151      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
11152      * does not always return {@code true} for some ranges of
11153      * characters, particularly those that are symbols or ideographs.
11154      *
11155      * <p>In general, {@link String#toLowerCase()} should be used to map
11156      * characters to lowercase. {@code String} case mapping methods
11157      * have several benefits over {@code Character} case mapping methods.
11158      * {@code String} case mapping methods can perform locale-sensitive
11159      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
11160      * the {@code Character} case mapping methods cannot.
11161      *
11162      * <p><b>Note:</b> This method cannot handle <a
11163      * href="#supplementary"> supplementary characters</a>. To support
11164      * all Unicode characters, including supplementary characters, use
11165      * the {@link #toLowerCase(int)} method.
11166      *
11167      * @param   ch   the character to be converted.
11168      * @return  the lowercase equivalent of the character, if any;
11169      *          otherwise, the character itself.
11170      * @see     Character#isLowerCase(char)
11171      * @see     String#toLowerCase()
11172      */
toLowerCase(char ch)11173     public static char toLowerCase(char ch) {
11174         return (char)toLowerCase((int)ch);
11175     }
11176 
11177     /**
11178      * Converts the character (Unicode code point) argument to
11179      * lowercase using case mapping information from the UnicodeData
11180      * file.
11181      *
11182      * <p> Note that
11183      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
11184      * does not always return {@code true} for some ranges of
11185      * characters, particularly those that are symbols or ideographs.
11186      *
11187      * <p>In general, {@link String#toLowerCase()} should be used to map
11188      * characters to lowercase. {@code String} case mapping methods
11189      * have several benefits over {@code Character} case mapping methods.
11190      * {@code String} case mapping methods can perform locale-sensitive
11191      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
11192      * the {@code Character} case mapping methods cannot.
11193      *
11194      * @param   codePoint   the character (Unicode code point) to be converted.
11195      * @return  the lowercase equivalent of the character (Unicode code
11196      *          point), if any; otherwise, the character itself.
11197      * @see     Character#isLowerCase(int)
11198      * @see     String#toLowerCase()
11199      *
11200      * @since   1.5
11201      */
11202     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
11203     /*
11204     public static int toLowerCase(int codePoint) {
11205         return CharacterData.of(codePoint).toLowerCase(codePoint);
11206     }
11207     */
toLowerCase(int codePoint)11208     public static int toLowerCase(int codePoint) {
11209         if (codePoint >= 'A' && codePoint <= 'Z') {
11210             return codePoint + ('a' - 'A');
11211         }
11212 
11213         // All ASCII codepoints except the ones above remain unchanged.
11214         if (codePoint < 0x80) {
11215             return codePoint;
11216         }
11217 
11218         return toLowerCaseImpl(codePoint);
11219     }
11220 
11221     @FastNative
toLowerCaseImpl(int codePoint)11222     static native int toLowerCaseImpl(int codePoint);
11223     // END Android-changed: Reimplement methods natively on top of ICU4C.
11224 
11225     /**
11226      * Converts the character argument to uppercase using case mapping
11227      * information from the UnicodeData file.
11228      * <p>
11229      * Note that
11230      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
11231      * does not always return {@code true} for some ranges of
11232      * characters, particularly those that are symbols or ideographs.
11233      *
11234      * <p>In general, {@link String#toUpperCase()} should be used to map
11235      * characters to uppercase. {@code String} case mapping methods
11236      * have several benefits over {@code Character} case mapping methods.
11237      * {@code String} case mapping methods can perform locale-sensitive
11238      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
11239      * the {@code Character} case mapping methods cannot.
11240      *
11241      * <p><b>Note:</b> This method cannot handle <a
11242      * href="#supplementary"> supplementary characters</a>. To support
11243      * all Unicode characters, including supplementary characters, use
11244      * the {@link #toUpperCase(int)} method.
11245      *
11246      * @param   ch   the character to be converted.
11247      * @return  the uppercase equivalent of the character, if any;
11248      *          otherwise, the character itself.
11249      * @see     Character#isUpperCase(char)
11250      * @see     String#toUpperCase()
11251      */
toUpperCase(char ch)11252     public static char toUpperCase(char ch) {
11253         return (char)toUpperCase((int)ch);
11254     }
11255 
11256     /**
11257      * Converts the character (Unicode code point) argument to
11258      * uppercase using case mapping information from the UnicodeData
11259      * file.
11260      *
11261      * <p>Note that
11262      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
11263      * does not always return {@code true} for some ranges of
11264      * characters, particularly those that are symbols or ideographs.
11265      *
11266      * <p>In general, {@link String#toUpperCase()} should be used to map
11267      * characters to uppercase. {@code String} case mapping methods
11268      * have several benefits over {@code Character} case mapping methods.
11269      * {@code String} case mapping methods can perform locale-sensitive
11270      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
11271      * the {@code Character} case mapping methods cannot.
11272      *
11273      * @param   codePoint   the character (Unicode code point) to be converted.
11274      * @return  the uppercase equivalent of the character, if any;
11275      *          otherwise, the character itself.
11276      * @see     Character#isUpperCase(int)
11277      * @see     String#toUpperCase()
11278      *
11279      * @since   1.5
11280      */
11281     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
11282     /*
11283     public static int toUpperCase(int codePoint) {
11284         return CharacterData.of(codePoint).toUpperCase(codePoint);
11285     }
11286     */
toUpperCase(int codePoint)11287     public static int toUpperCase(int codePoint) {
11288         if (codePoint >= 'a' && codePoint <= 'z') {
11289             return codePoint - ('a' - 'A');
11290         }
11291 
11292         // All ASCII codepoints except the ones above remain unchanged.
11293         if (codePoint < 0x80) {
11294             return codePoint;
11295         }
11296 
11297         return toUpperCaseImpl(codePoint);
11298     }
11299 
11300     @FastNative
toUpperCaseImpl(int codePoint)11301     static native int toUpperCaseImpl(int codePoint);
11302     // END Android-changed: Reimplement methods natively on top of ICU4C.
11303 
11304     /**
11305      * Converts the character argument to titlecase using case mapping
11306      * information from the UnicodeData file. If a character has no
11307      * explicit titlecase mapping and is not itself a titlecase char
11308      * according to UnicodeData, then the uppercase mapping is
11309      * returned as an equivalent titlecase mapping. If the
11310      * {@code char} argument is already a titlecase
11311      * {@code char}, the same {@code char} value will be
11312      * returned.
11313      * <p>
11314      * Note that
11315      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
11316      * does not always return {@code true} for some ranges of
11317      * characters.
11318      *
11319      * <p><b>Note:</b> This method cannot handle <a
11320      * href="#supplementary"> supplementary characters</a>. To support
11321      * all Unicode characters, including supplementary characters, use
11322      * the {@link #toTitleCase(int)} method.
11323      *
11324      * @param   ch   the character to be converted.
11325      * @return  the titlecase equivalent of the character, if any;
11326      *          otherwise, the character itself.
11327      * @see     Character#isTitleCase(char)
11328      * @see     Character#toLowerCase(char)
11329      * @see     Character#toUpperCase(char)
11330      * @since   1.0.2
11331      */
toTitleCase(char ch)11332     public static char toTitleCase(char ch) {
11333         return (char)toTitleCase((int)ch);
11334     }
11335 
11336     /**
11337      * Converts the character (Unicode code point) argument to titlecase using case mapping
11338      * information from the UnicodeData file. If a character has no
11339      * explicit titlecase mapping and is not itself a titlecase char
11340      * according to UnicodeData, then the uppercase mapping is
11341      * returned as an equivalent titlecase mapping. If the
11342      * character argument is already a titlecase
11343      * character, the same character value will be
11344      * returned.
11345      *
11346      * <p>Note that
11347      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
11348      * does not always return {@code true} for some ranges of
11349      * characters.
11350      *
11351      * @param   codePoint   the character (Unicode code point) to be converted.
11352      * @return  the titlecase equivalent of the character, if any;
11353      *          otherwise, the character itself.
11354      * @see     Character#isTitleCase(int)
11355      * @see     Character#toLowerCase(int)
11356      * @see     Character#toUpperCase(int)
11357      * @since   1.5
11358      */
11359     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
11360     /*
11361     public static int toTitleCase(int codePoint) {
11362         return CharacterData.of(codePoint).toTitleCase(codePoint);
11363     }
11364     */
toTitleCase(int codePoint)11365     public static int toTitleCase(int codePoint) {
11366         return toTitleCaseImpl(codePoint);
11367     }
11368 
11369     @FastNative
toTitleCaseImpl(int codePoint)11370     static native int toTitleCaseImpl(int codePoint);
11371     // END Android-changed: Reimplement methods natively on top of ICU4C.
11372 
11373     /**
11374      * Returns the numeric value of the character {@code ch} in the
11375      * specified radix.
11376      * <p>
11377      * If the radix is not in the range {@code MIN_RADIX} &le;
11378      * {@code radix} &le; {@code MAX_RADIX} or if the
11379      * value of {@code ch} is not a valid digit in the specified
11380      * radix, {@code -1} is returned. A character is a valid digit
11381      * if at least one of the following is true:
11382      * <ul>
11383      * <li>The method {@code isDigit} is {@code true} of the character
11384      *     and the Unicode decimal digit value of the character (or its
11385      *     single-character decomposition) is less than the specified radix.
11386      *     In this case the decimal digit value is returned.
11387      * <li>The character is one of the uppercase Latin letters
11388      *     {@code 'A'} through {@code 'Z'} and its code is less than
11389      *     {@code radix + 'A' - 10}.
11390      *     In this case, {@code ch - 'A' + 10}
11391      *     is returned.
11392      * <li>The character is one of the lowercase Latin letters
11393      *     {@code 'a'} through {@code 'z'} and its code is less than
11394      *     {@code radix + 'a' - 10}.
11395      *     In this case, {@code ch - 'a' + 10}
11396      *     is returned.
11397      * <li>The character is one of the fullwidth uppercase Latin letters A
11398      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
11399      *     and its code is less than
11400      *     {@code radix + '\u005CuFF21' - 10}.
11401      *     In this case, {@code ch - '\u005CuFF21' + 10}
11402      *     is returned.
11403      * <li>The character is one of the fullwidth lowercase Latin letters a
11404      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
11405      *     and its code is less than
11406      *     {@code radix + '\u005CuFF41' - 10}.
11407      *     In this case, {@code ch - '\u005CuFF41' + 10}
11408      *     is returned.
11409      * </ul>
11410      *
11411      * <p><b>Note:</b> This method cannot handle <a
11412      * href="#supplementary"> supplementary characters</a>. To support
11413      * all Unicode characters, including supplementary characters, use
11414      * the {@link #digit(int, int)} method.
11415      *
11416      * @param   ch      the character to be converted.
11417      * @param   radix   the radix.
11418      * @return  the numeric value represented by the character in the
11419      *          specified radix.
11420      * @see     Character#forDigit(int, int)
11421      * @see     Character#isDigit(char)
11422      */
digit(char ch, int radix)11423     public static int digit(char ch, int radix) {
11424         return digit((int)ch, radix);
11425     }
11426 
11427     /**
11428      * Returns the numeric value of the specified character (Unicode
11429      * code point) in the specified radix.
11430      *
11431      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
11432      * {@code radix} &le; {@code MAX_RADIX} or if the
11433      * character is not a valid digit in the specified
11434      * radix, {@code -1} is returned. A character is a valid digit
11435      * if at least one of the following is true:
11436      * <ul>
11437      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
11438      *     and the Unicode decimal digit value of the character (or its
11439      *     single-character decomposition) is less than the specified radix.
11440      *     In this case the decimal digit value is returned.
11441      * <li>The character is one of the uppercase Latin letters
11442      *     {@code 'A'} through {@code 'Z'} and its code is less than
11443      *     {@code radix + 'A' - 10}.
11444      *     In this case, {@code codePoint - 'A' + 10}
11445      *     is returned.
11446      * <li>The character is one of the lowercase Latin letters
11447      *     {@code 'a'} through {@code 'z'} and its code is less than
11448      *     {@code radix + 'a' - 10}.
11449      *     In this case, {@code codePoint - 'a' + 10}
11450      *     is returned.
11451      * <li>The character is one of the fullwidth uppercase Latin letters A
11452      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
11453      *     and its code is less than
11454      *     {@code radix + '\u005CuFF21' - 10}.
11455      *     In this case,
11456      *     {@code codePoint - '\u005CuFF21' + 10}
11457      *     is returned.
11458      * <li>The character is one of the fullwidth lowercase Latin letters a
11459      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
11460      *     and its code is less than
11461      *     {@code radix + '\u005CuFF41'- 10}.
11462      *     In this case,
11463      *     {@code codePoint - '\u005CuFF41' + 10}
11464      *     is returned.
11465      * </ul>
11466      *
11467      * @param   codePoint the character (Unicode code point) to be converted.
11468      * @param   radix   the radix.
11469      * @return  the numeric value represented by the character in the
11470      *          specified radix.
11471      * @see     Character#forDigit(int, int)
11472      * @see     Character#isDigit(int)
11473      * @since   1.5
11474      */
11475     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
11476     /*
11477     public static int digit(int codePoint, int radix) {
11478         return CharacterData.of(codePoint).digit(codePoint, radix);
11479     }
11480     */
digit(int codePoint, int radix)11481     public static int digit(int codePoint, int radix) {
11482         if (radix < MIN_RADIX || radix > MAX_RADIX) {
11483             return -1;
11484         }
11485         if (codePoint < 128) {
11486             // Optimized for ASCII
11487             int result = -1;
11488             if ('0' <= codePoint && codePoint <= '9') {
11489                 result = codePoint - '0';
11490             } else if ('a' <= codePoint && codePoint <= 'z') {
11491                 result = 10 + (codePoint - 'a');
11492             } else if ('A' <= codePoint && codePoint <= 'Z') {
11493                 result = 10 + (codePoint - 'A');
11494             }
11495             return result < radix ? result : -1;
11496         }
11497         return digitImpl(codePoint, radix);
11498     }
11499 
11500     @FastNative
digitImpl(int codePoint, int radix)11501     native static int digitImpl(int codePoint, int radix);
11502     // END Android-changed: Reimplement methods natively on top of ICU4C.
11503 
11504     /**
11505      * Returns the {@code int} value that the specified Unicode
11506      * character represents. For example, the character
11507      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
11508      * an int with a value of 50.
11509      * <p>
11510      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
11511      * {@code '\u005Cu005A'}), lowercase
11512      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
11513      * full width variant ({@code '\u005CuFF21'} through
11514      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
11515      * {@code '\u005CuFF5A'}) forms have numeric values from 10
11516      * through 35. This is independent of the Unicode specification,
11517      * which does not assign numeric values to these {@code char}
11518      * values.
11519      * <p>
11520      * If the character does not have a numeric value, then -1 is returned.
11521      * If the character has a numeric value that cannot be represented as a
11522      * nonnegative integer (for example, a fractional value), then -2
11523      * is returned.
11524      *
11525      * <p><b>Note:</b> This method cannot handle <a
11526      * href="#supplementary"> supplementary characters</a>. To support
11527      * all Unicode characters, including supplementary characters, use
11528      * the {@link #getNumericValue(int)} method.
11529      *
11530      * @param   ch      the character to be converted.
11531      * @return  the numeric value of the character, as a nonnegative {@code int}
11532      *          value; -2 if the character has a numeric value but the value
11533      *          can not be represented as a nonnegative {@code int} value;
11534      *          -1 if the character has no numeric value.
11535      * @see     Character#forDigit(int, int)
11536      * @see     Character#isDigit(char)
11537      * @since   1.1
11538      */
getNumericValue(char ch)11539     public static int getNumericValue(char ch) {
11540         return getNumericValue((int)ch);
11541     }
11542 
11543     /**
11544      * Returns the {@code int} value that the specified
11545      * character (Unicode code point) represents. For example, the character
11546      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
11547      * an {@code int} with a value of 50.
11548      * <p>
11549      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
11550      * {@code '\u005Cu005A'}), lowercase
11551      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
11552      * full width variant ({@code '\u005CuFF21'} through
11553      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
11554      * {@code '\u005CuFF5A'}) forms have numeric values from 10
11555      * through 35. This is independent of the Unicode specification,
11556      * which does not assign numeric values to these {@code char}
11557      * values.
11558      * <p>
11559      * If the character does not have a numeric value, then -1 is returned.
11560      * If the character has a numeric value that cannot be represented as a
11561      * nonnegative integer (for example, a fractional value), then -2
11562      * is returned.
11563      *
11564      * @param   codePoint the character (Unicode code point) to be converted.
11565      * @return  the numeric value of the character, as a nonnegative {@code int}
11566      *          value; -2 if the character has a numeric value but the value
11567      *          can not be represented as a nonnegative {@code int} value;
11568      *          -1 if the character has no numeric value.
11569      * @see     Character#forDigit(int, int)
11570      * @see     Character#isDigit(int)
11571      * @since   1.5
11572      */
11573     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
11574     /*
11575     public static int getNumericValue(int codePoint) {
11576         return CharacterData.of(codePoint).getNumericValue(codePoint);
11577     }
11578     */
getNumericValue(int codePoint)11579     public static int getNumericValue(int codePoint) {
11580         // This is both an optimization and papers over differences between Java and ICU.
11581         if (codePoint < 128) {
11582             if (codePoint >= '0' && codePoint <= '9') {
11583                 return codePoint - '0';
11584             }
11585             if (codePoint >= 'a' && codePoint <= 'z') {
11586                 return codePoint - ('a' - 10);
11587             }
11588             if (codePoint >= 'A' && codePoint <= 'Z') {
11589                 return codePoint - ('A' - 10);
11590             }
11591             return -1;
11592         }
11593         // Full-width uppercase A-Z.
11594         if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
11595             return codePoint - 0xff17;
11596         }
11597         // Full-width lowercase a-z.
11598         if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
11599             return codePoint - 0xff37;
11600         }
11601         return getNumericValueImpl(codePoint);
11602     }
11603 
11604     @FastNative
getNumericValueImpl(int codePoint)11605     native static int getNumericValueImpl(int codePoint);
11606     // END Android-changed: Reimplement methods natively on top of ICU4C.
11607 
11608     /**
11609      * Determines if the specified character is ISO-LATIN-1 white space.
11610      * This method returns {@code true} for the following five
11611      * characters only:
11612      * <table class="striped">
11613      * <caption style="display:none">truechars</caption>
11614      * <thead>
11615      * <tr><th scope="col">Character
11616      *     <th scope="col">Code
11617      *     <th scope="col">Name
11618      * </thead>
11619      * <tbody>
11620      * <tr><th scope="row">{@code '\t'}</th>            <td>{@code U+0009}</td>
11621      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
11622      * <tr><th scope="row">{@code '\n'}</th>            <td>{@code U+000A}</td>
11623      *     <td>{@code NEW LINE}</td></tr>
11624      * <tr><th scope="row">{@code '\f'}</th>            <td>{@code U+000C}</td>
11625      *     <td>{@code FORM FEED}</td></tr>
11626      * <tr><th scope="row">{@code '\r'}</th>            <td>{@code U+000D}</td>
11627      *     <td>{@code CARRIAGE RETURN}</td></tr>
11628      * <tr><th scope="row">{@code ' '}</th>  <td>{@code U+0020}</td>
11629      *     <td>{@code SPACE}</td></tr>
11630      * </tbody>
11631      * </table>
11632      *
11633      * @param      ch   the character to be tested.
11634      * @return     {@code true} if the character is ISO-LATIN-1 white
11635      *             space; {@code false} otherwise.
11636      * @see        Character#isSpaceChar(char)
11637      * @see        Character#isWhitespace(char)
11638      * @deprecated Replaced by isWhitespace(char).
11639      */
11640     @Deprecated(since="1.1")
isSpace(char ch)11641     public static boolean isSpace(char ch) {
11642         return (ch <= 0x0020) &&
11643             (((((1L << 0x0009) |
11644             (1L << 0x000A) |
11645             (1L << 0x000C) |
11646             (1L << 0x000D) |
11647             (1L << 0x0020)) >> ch) & 1L) != 0);
11648     }
11649 
11650 
11651     /**
11652      * Determines if the specified character is a Unicode space character.
11653      * A character is considered to be a space character if and only if
11654      * it is specified to be a space character by the Unicode Standard. This
11655      * method returns true if the character's general category type is any of
11656      * the following:
11657      * <ul>
11658      * <li> {@code SPACE_SEPARATOR}
11659      * <li> {@code LINE_SEPARATOR}
11660      * <li> {@code PARAGRAPH_SEPARATOR}
11661      * </ul>
11662      *
11663      * <p><b>Note:</b> This method cannot handle <a
11664      * href="#supplementary"> supplementary characters</a>. To support
11665      * all Unicode characters, including supplementary characters, use
11666      * the {@link #isSpaceChar(int)} method.
11667      *
11668      * @param   ch      the character to be tested.
11669      * @return  {@code true} if the character is a space character;
11670      *          {@code false} otherwise.
11671      * @see     Character#isWhitespace(char)
11672      * @since   1.1
11673      */
isSpaceChar(char ch)11674     public static boolean isSpaceChar(char ch) {
11675         return isSpaceChar((int)ch);
11676     }
11677 
11678     /**
11679      * Determines if the specified character (Unicode code point) is a
11680      * Unicode space character.  A character is considered to be a
11681      * space character if and only if it is specified to be a space
11682      * character by the Unicode Standard. This method returns true if
11683      * the character's general category type is any of the following:
11684      *
11685      * <ul>
11686      * <li> {@link #SPACE_SEPARATOR}
11687      * <li> {@link #LINE_SEPARATOR}
11688      * <li> {@link #PARAGRAPH_SEPARATOR}
11689      * </ul>
11690      *
11691      * @param   codePoint the character (Unicode code point) to be tested.
11692      * @return  {@code true} if the character is a space character;
11693      *          {@code false} otherwise.
11694      * @see     Character#isWhitespace(int)
11695      * @since   1.5
11696      */
11697     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
11698     /*
11699     public static boolean isSpaceChar(int codePoint) {
11700         return ((((1 << Character.SPACE_SEPARATOR) |
11701                   (1 << Character.LINE_SEPARATOR) |
11702                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
11703             != 0;
11704     }
11705     */
isSpaceChar(int codePoint)11706     public static boolean isSpaceChar(int codePoint) {
11707         // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
11708         // SPACE or NO-BREAK SPACE?
11709         if (codePoint == 0x20 || codePoint == 0xa0) {
11710             return true;
11711         }
11712         if (codePoint < 0x1000) {
11713             return false;
11714         }
11715         // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
11716         if (codePoint == 0x1680 || codePoint == 0x180e) {
11717             return true;
11718         }
11719         if (codePoint < 0x2000) {
11720             return false;
11721         }
11722         if (codePoint <= 0xffff) {
11723             // Other whitespace from General Punctuation...
11724             return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f ||
11725                 codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
11726         }
11727         // Let icu4c worry about non-BMP code points.
11728         return isSpaceCharImpl(codePoint);
11729     }
11730 
11731     @FastNative
isSpaceCharImpl(int codePoint)11732     static native boolean isSpaceCharImpl(int codePoint);
11733     // END Android-changed: Reimplement methods natively on top of ICU4C.
11734 
11735     /**
11736      * Determines if the specified character is white space according to Java.
11737      * A character is a Java whitespace character if and only if it satisfies
11738      * one of the following criteria:
11739      * <ul>
11740      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
11741      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
11742      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
11743      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
11744      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
11745      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
11746      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
11747      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
11748      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
11749      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
11750      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
11751      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
11752      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
11753      * </ul>
11754      *
11755      * <p><b>Note:</b> This method cannot handle <a
11756      * href="#supplementary"> supplementary characters</a>. To support
11757      * all Unicode characters, including supplementary characters, use
11758      * the {@link #isWhitespace(int)} method.
11759      *
11760      * @param   ch the character to be tested.
11761      * @return  {@code true} if the character is a Java whitespace
11762      *          character; {@code false} otherwise.
11763      * @see     Character#isSpaceChar(char)
11764      * @since   1.1
11765      */
isWhitespace(char ch)11766     public static boolean isWhitespace(char ch) {
11767         return isWhitespace((int)ch);
11768     }
11769 
11770     /**
11771      * Determines if the specified character (Unicode code point) is
11772      * white space according to Java.  A character is a Java
11773      * whitespace character if and only if it satisfies one of the
11774      * following criteria:
11775      * <ul>
11776      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
11777      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
11778      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
11779      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
11780      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
11781      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
11782      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
11783      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
11784      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
11785      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
11786      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
11787      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
11788      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
11789      * </ul>
11790      *
11791      * @param   codePoint the character (Unicode code point) to be tested.
11792      * @return  {@code true} if the character is a Java whitespace
11793      *          character; {@code false} otherwise.
11794      * @see     Character#isSpaceChar(int)
11795      * @since   1.5
11796      */
11797     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
11798     /*
11799     public static boolean isWhitespace(int codePoint) {
11800         return CharacterData.of(codePoint).isWhitespace(codePoint);
11801     }
11802     */
isWhitespace(int codePoint)11803     public static boolean isWhitespace(int codePoint) {
11804         // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
11805         // Any ASCII whitespace character?
11806         if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) {
11807             return true;
11808         }
11809         if (codePoint < 0x1000) {
11810             return false;
11811         }
11812         // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
11813         if (codePoint == 0x1680 || codePoint == 0x180e) {
11814             return true;
11815         }
11816         if (codePoint < 0x2000) {
11817             return false;
11818         }
11819         // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE).
11820         if (codePoint == 0x2007 || codePoint == 0x202f) {
11821             return false;
11822         }
11823         if (codePoint <= 0xffff) {
11824             // Other whitespace from General Punctuation...
11825             return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f ||
11826                 codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
11827         }
11828         // Let icu4c worry about non-BMP code points.
11829         return isWhitespaceImpl(codePoint);
11830     }
11831 
11832     @FastNative
isWhitespaceImpl(int codePoint)11833     native static boolean isWhitespaceImpl(int codePoint);
11834     // END Android-changed: Reimplement methods natively on top of ICU4C.
11835 
11836     /**
11837      * Determines if the specified character is an ISO control
11838      * character.  A character is considered to be an ISO control
11839      * character if its code is in the range {@code '\u005Cu0000'}
11840      * through {@code '\u005Cu001F'} or in the range
11841      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
11842      *
11843      * <p><b>Note:</b> This method cannot handle <a
11844      * href="#supplementary"> supplementary characters</a>. To support
11845      * all Unicode characters, including supplementary characters, use
11846      * the {@link #isISOControl(int)} method.
11847      *
11848      * @param   ch      the character to be tested.
11849      * @return  {@code true} if the character is an ISO control character;
11850      *          {@code false} otherwise.
11851      *
11852      * @see     Character#isSpaceChar(char)
11853      * @see     Character#isWhitespace(char)
11854      * @since   1.1
11855      */
isISOControl(char ch)11856     public static boolean isISOControl(char ch) {
11857         return isISOControl((int)ch);
11858     }
11859 
11860     /**
11861      * Determines if the referenced character (Unicode code point) is an ISO control
11862      * character.  A character is considered to be an ISO control
11863      * character if its code is in the range {@code '\u005Cu0000'}
11864      * through {@code '\u005Cu001F'} or in the range
11865      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
11866      *
11867      * @param   codePoint the character (Unicode code point) to be tested.
11868      * @return  {@code true} if the character is an ISO control character;
11869      *          {@code false} otherwise.
11870      * @see     Character#isSpaceChar(int)
11871      * @see     Character#isWhitespace(int)
11872      * @since   1.5
11873      */
isISOControl(int codePoint)11874     public static boolean isISOControl(int codePoint) {
11875         // Optimized form of:
11876         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
11877         //     (codePoint >= 0x7F && codePoint <= 0x9F);
11878         return codePoint <= 0x9F &&
11879             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
11880     }
11881 
11882     /**
11883      * Returns a value indicating a character's general category.
11884      *
11885      * <p><b>Note:</b> This method cannot handle <a
11886      * href="#supplementary"> supplementary characters</a>. To support
11887      * all Unicode characters, including supplementary characters, use
11888      * the {@link #getType(int)} method.
11889      *
11890      * @param   ch      the character to be tested.
11891      * @return  a value of type {@code int} representing the
11892      *          character's general category.
11893      * @see     Character#COMBINING_SPACING_MARK
11894      * @see     Character#CONNECTOR_PUNCTUATION
11895      * @see     Character#CONTROL
11896      * @see     Character#CURRENCY_SYMBOL
11897      * @see     Character#DASH_PUNCTUATION
11898      * @see     Character#DECIMAL_DIGIT_NUMBER
11899      * @see     Character#ENCLOSING_MARK
11900      * @see     Character#END_PUNCTUATION
11901      * @see     Character#FINAL_QUOTE_PUNCTUATION
11902      * @see     Character#FORMAT
11903      * @see     Character#INITIAL_QUOTE_PUNCTUATION
11904      * @see     Character#LETTER_NUMBER
11905      * @see     Character#LINE_SEPARATOR
11906      * @see     Character#LOWERCASE_LETTER
11907      * @see     Character#MATH_SYMBOL
11908      * @see     Character#MODIFIER_LETTER
11909      * @see     Character#MODIFIER_SYMBOL
11910      * @see     Character#NON_SPACING_MARK
11911      * @see     Character#OTHER_LETTER
11912      * @see     Character#OTHER_NUMBER
11913      * @see     Character#OTHER_PUNCTUATION
11914      * @see     Character#OTHER_SYMBOL
11915      * @see     Character#PARAGRAPH_SEPARATOR
11916      * @see     Character#PRIVATE_USE
11917      * @see     Character#SPACE_SEPARATOR
11918      * @see     Character#START_PUNCTUATION
11919      * @see     Character#SURROGATE
11920      * @see     Character#TITLECASE_LETTER
11921      * @see     Character#UNASSIGNED
11922      * @see     Character#UPPERCASE_LETTER
11923      * @since   1.1
11924      */
getType(char ch)11925     public static int getType(char ch) {
11926         return getType((int)ch);
11927     }
11928 
11929     /**
11930      * Returns a value indicating a character's general category.
11931      *
11932      * @param   codePoint the character (Unicode code point) to be tested.
11933      * @return  a value of type {@code int} representing the
11934      *          character's general category.
11935      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
11936      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
11937      * @see     Character#CONTROL CONTROL
11938      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
11939      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
11940      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
11941      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
11942      * @see     Character#END_PUNCTUATION END_PUNCTUATION
11943      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
11944      * @see     Character#FORMAT FORMAT
11945      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
11946      * @see     Character#LETTER_NUMBER LETTER_NUMBER
11947      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
11948      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
11949      * @see     Character#MATH_SYMBOL MATH_SYMBOL
11950      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
11951      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
11952      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
11953      * @see     Character#OTHER_LETTER OTHER_LETTER
11954      * @see     Character#OTHER_NUMBER OTHER_NUMBER
11955      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
11956      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
11957      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
11958      * @see     Character#PRIVATE_USE PRIVATE_USE
11959      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
11960      * @see     Character#START_PUNCTUATION START_PUNCTUATION
11961      * @see     Character#SURROGATE SURROGATE
11962      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
11963      * @see     Character#UNASSIGNED UNASSIGNED
11964      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
11965      * @since   1.5
11966      */
11967     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
11968     /*
11969     public static int getType(int codePoint) {
11970         return CharacterData.of(codePoint).getType(codePoint);
11971     }
11972     */
getType(int codePoint)11973     public static int getType(int codePoint) {
11974         int type = getTypeImpl(codePoint);
11975         // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
11976         if (type <= Character.FORMAT) {
11977             return type;
11978         }
11979         return (type + 1);
11980     }
11981 
11982     @FastNative
getTypeImpl(int codePoint)11983     static native int getTypeImpl(int codePoint);
11984     // END Android-changed: Reimplement methods natively on top of ICU4C.
11985 
11986     /**
11987      * Determines the character representation for a specific digit in
11988      * the specified radix. If the value of {@code radix} is not a
11989      * valid radix, or the value of {@code digit} is not a valid
11990      * digit in the specified radix, the null character
11991      * ({@code '\u005Cu0000'}) is returned.
11992      * <p>
11993      * The {@code radix} argument is valid if it is greater than or
11994      * equal to {@code MIN_RADIX} and less than or equal to
11995      * {@code MAX_RADIX}. The {@code digit} argument is valid if
11996      * {@code 0 <= digit < radix}.
11997      * <p>
11998      * If the digit is less than 10, then
11999      * {@code '0' + digit} is returned. Otherwise, the value
12000      * {@code 'a' + digit - 10} is returned.
12001      *
12002      * @param   digit   the number to convert to a character.
12003      * @param   radix   the radix.
12004      * @return  the {@code char} representation of the specified digit
12005      *          in the specified radix.
12006      * @see     Character#MIN_RADIX
12007      * @see     Character#MAX_RADIX
12008      * @see     Character#digit(char, int)
12009      */
forDigit(int digit, int radix)12010     public static char forDigit(int digit, int radix) {
12011         if ((digit >= radix) || (digit < 0)) {
12012             return '\0';
12013         }
12014         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
12015             return '\0';
12016         }
12017         if (digit < 10) {
12018             return (char)('0' + digit);
12019         }
12020         return (char)('a' - 10 + digit);
12021     }
12022 
12023     /**
12024      * Returns the Unicode directionality property for the given
12025      * character.  Character directionality is used to calculate the
12026      * visual ordering of text. The directionality value of undefined
12027      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
12028      *
12029      * <p><b>Note:</b> This method cannot handle <a
12030      * href="#supplementary"> supplementary characters</a>. To support
12031      * all Unicode characters, including supplementary characters, use
12032      * the {@link #getDirectionality(int)} method.
12033      *
12034      * @param  ch {@code char} for which the directionality property
12035      *            is requested.
12036      * @return the directionality property of the {@code char} value.
12037      *
12038      * @see Character#DIRECTIONALITY_UNDEFINED
12039      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
12040      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
12041      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
12042      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
12043      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
12044      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
12045      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
12046      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
12047      * @see Character#DIRECTIONALITY_NONSPACING_MARK
12048      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
12049      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
12050      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
12051      * @see Character#DIRECTIONALITY_WHITESPACE
12052      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
12053      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
12054      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
12055      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
12056      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
12057      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
12058      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
12059      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
12060      * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE
12061      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
12062      * @since 1.4
12063      */
getDirectionality(char ch)12064     public static byte getDirectionality(char ch) {
12065         return getDirectionality((int)ch);
12066     }
12067 
12068     /**
12069      * Returns the Unicode directionality property for the given
12070      * character (Unicode code point).  Character directionality is
12071      * used to calculate the visual ordering of text. The
12072      * directionality value of undefined character is {@link
12073      * #DIRECTIONALITY_UNDEFINED}.
12074      *
12075      * @param   codePoint the character (Unicode code point) for which
12076      *          the directionality property is requested.
12077      * @return the directionality property of the character.
12078      *
12079      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
12080      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
12081      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
12082      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
12083      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
12084      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
12085      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
12086      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
12087      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
12088      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
12089      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
12090      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
12091      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
12092      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
12093      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
12094      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
12095      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
12096      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
12097      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
12098      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
12099      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
12100      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
12101      * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE
12102      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
12103      * @since    1.5
12104      */
12105     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
12106     /*
12107     public static byte getDirectionality(int codePoint) {
12108         return CharacterData.of(codePoint).getDirectionality(codePoint);
12109     }
12110     */
getDirectionality(int codePoint)12111     public static byte getDirectionality(int codePoint) {
12112         if (getType(codePoint) == Character.UNASSIGNED) {
12113             return Character.DIRECTIONALITY_UNDEFINED;
12114         }
12115 
12116         byte directionality = getDirectionalityImpl(codePoint);
12117         if (directionality >= 0 && directionality < DIRECTIONALITY.length) {
12118             return DIRECTIONALITY[directionality];
12119         }
12120         return Character.DIRECTIONALITY_UNDEFINED;
12121     }
12122 
12123     @FastNative
getDirectionalityImpl(int codePoint)12124     native static byte getDirectionalityImpl(int codePoint);
12125     // END Android-changed: Reimplement methods natively on top of ICU4C.
12126 
12127     /**
12128      * Determines whether the character is mirrored according to the
12129      * Unicode specification.  Mirrored characters should have their
12130      * glyphs horizontally mirrored when displayed in text that is
12131      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
12132      * PARENTHESIS is semantically defined to be an <i>opening
12133      * parenthesis</i>.  This will appear as a "(" in text that is
12134      * left-to-right but as a ")" in text that is right-to-left.
12135      *
12136      * <p><b>Note:</b> This method cannot handle <a
12137      * href="#supplementary"> supplementary characters</a>. To support
12138      * all Unicode characters, including supplementary characters, use
12139      * the {@link #isMirrored(int)} method.
12140      *
12141      * @param  ch {@code char} for which the mirrored property is requested
12142      * @return {@code true} if the char is mirrored, {@code false}
12143      *         if the {@code char} is not mirrored or is not defined.
12144      * @since 1.4
12145      */
isMirrored(char ch)12146     public static boolean isMirrored(char ch) {
12147         return isMirrored((int)ch);
12148     }
12149 
12150     /**
12151      * Determines whether the specified character (Unicode code point)
12152      * is mirrored according to the Unicode specification.  Mirrored
12153      * characters should have their glyphs horizontally mirrored when
12154      * displayed in text that is right-to-left.  For example,
12155      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
12156      * defined to be an <i>opening parenthesis</i>.  This will appear
12157      * as a "(" in text that is left-to-right but as a ")" in text
12158      * that is right-to-left.
12159      *
12160      * @param   codePoint the character (Unicode code point) to be tested.
12161      * @return  {@code true} if the character is mirrored, {@code false}
12162      *          if the character is not mirrored or is not defined.
12163      * @since   1.5
12164      */
12165     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
12166     /*
12167     public static boolean isMirrored(int codePoint) {
12168         return CharacterData.of(codePoint).isMirrored(codePoint);
12169     }
12170     */
isMirrored(int codePoint)12171     public static boolean isMirrored(int codePoint) {
12172         return isMirroredImpl(codePoint);
12173     }
12174 
12175     @FastNative
isMirroredImpl(int codePoint)12176     native static boolean isMirroredImpl(int codePoint);
12177     // END Android-changed: Reimplement methods natively on top of ICU4C.
12178 
12179     /**
12180      * Compares two {@code Character} objects numerically.
12181      *
12182      * @param   anotherCharacter   the {@code Character} to be compared.
12183      * @return  the value {@code 0} if the argument {@code Character}
12184      *          is equal to this {@code Character}; a value less than
12185      *          {@code 0} if this {@code Character} is numerically less
12186      *          than the {@code Character} argument; and a value greater than
12187      *          {@code 0} if this {@code Character} is numerically greater
12188      *          than the {@code Character} argument (unsigned comparison).
12189      *          Note that this is strictly a numerical comparison; it is not
12190      *          locale-dependent.
12191      * @since   1.2
12192      */
compareTo(Character anotherCharacter)12193     public int compareTo(Character anotherCharacter) {
12194         return compare(this.value, anotherCharacter.value);
12195     }
12196 
12197     /**
12198      * Compares two {@code char} values numerically.
12199      * The value returned is identical to what would be returned by:
12200      * <pre>
12201      *    Character.valueOf(x).compareTo(Character.valueOf(y))
12202      * </pre>
12203      *
12204      * @param  x the first {@code char} to compare
12205      * @param  y the second {@code char} to compare
12206      * @return the value {@code 0} if {@code x == y};
12207      *         a value less than {@code 0} if {@code x < y}; and
12208      *         a value greater than {@code 0} if {@code x > y}
12209      * @since 1.7
12210      */
compare(char x, char y)12211     public static int compare(char x, char y) {
12212         return x - y;
12213     }
12214 
12215     // BEGIN Android-removed: Use ICU.
12216     /*
12217      * Converts the character (Unicode code point) argument to uppercase using
12218      * information from the UnicodeData file.
12219      *
12220      * @param   codePoint   the character (Unicode code point) to be converted.
12221      * @return  either the uppercase equivalent of the character, if
12222      *          any, or an error flag ({@code Character.ERROR})
12223      *          that indicates that a 1:M {@code char} mapping exists.
12224      * @see     Character#isLowerCase(char)
12225      * @see     Character#isUpperCase(char)
12226      * @see     Character#toLowerCase(char)
12227      * @see     Character#toTitleCase(char)
12228      * @since 1.4
12229      *
12230     static int toUpperCaseEx(int codePoint) {
12231         assert isValidCodePoint(codePoint);
12232         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
12233     }
12234 
12235     /**
12236      * Converts the character (Unicode code point) argument to uppercase using case
12237      * mapping information from the SpecialCasing file in the Unicode
12238      * specification. If a character has no explicit uppercase
12239      * mapping, then the {@code char} itself is returned in the
12240      * {@code char[]}.
12241      *
12242      * @param   codePoint   the character (Unicode code point) to be converted.
12243      * @return a {@code char[]} with the uppercased character.
12244      * @since 1.4
12245      *
12246     static char[] toUpperCaseCharArray(int codePoint) {
12247         // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
12248         assert isBmpCodePoint(codePoint);
12249         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
12250     }
12251     */
12252     // END Android-removed: Use ICU.
12253 
12254     /**
12255      * The number of bits used to represent a {@code char} value in unsigned
12256      * binary form, constant {@code 16}.
12257      *
12258      * @since 1.5
12259      */
12260     public static final int SIZE = 16;
12261 
12262     /**
12263      * The number of bytes used to represent a {@code char} value in unsigned
12264      * binary form.
12265      *
12266      * @since 1.8
12267      */
12268     public static final int BYTES = SIZE / Byte.SIZE;
12269 
12270     /**
12271      * Returns the value obtained by reversing the order of the bytes in the
12272      * specified {@code char} value.
12273      *
12274      * @param ch The {@code char} of which to reverse the byte order.
12275      * @return the value obtained by reversing (or, equivalently, swapping)
12276      *     the bytes in the specified {@code char} value.
12277      * @since 1.5
12278      */
12279     @IntrinsicCandidate
reverseBytes(char ch)12280     public static char reverseBytes(char ch) {
12281         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
12282     }
12283 
12284     /**
12285      * Returns the name of the specified character
12286      * {@code codePoint}, or null if the code point is
12287      * {@link #UNASSIGNED unassigned}.
12288      * <p>
12289      * If the specified character is not assigned a name by
12290      * the <i>UnicodeData</i> file (part of the Unicode Character
12291      * Database maintained by the Unicode Consortium), the returned
12292      * name is the same as the result of the expression:
12293      *
12294      * <blockquote>{@code
12295      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
12296      *     + " "
12297      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
12298      *
12299      * }</blockquote>
12300      *
12301      * For the {@code codePoint}s in the <i>UnicodeData</i> file, the name
12302      * returned by this method follows the naming scheme in the
12303      * "Unicode Name Property" section of the Unicode Standard. For other
12304      * code points, such as Hangul/Ideographs, The name generation rule above
12305      * differs from the one defined in the Unicode Standard.
12306      *
12307      * @param  codePoint the character (Unicode code point)
12308      *
12309      * @return the name of the specified character, or null if
12310      *         the code point is unassigned.
12311      *
12312      * @throws IllegalArgumentException if the specified
12313      *            {@code codePoint} is not a valid Unicode
12314      *            code point.
12315      *
12316      * @since 1.7
12317      */
getName(int codePoint)12318     public static String getName(int codePoint) {
12319         if (!isValidCodePoint(codePoint)) {
12320             throw new IllegalArgumentException(
12321                 String.format("Not a valid Unicode code point: 0x%X", codePoint));
12322         }
12323         // Android-changed: Use ICU.
12324         // String name = CharacterName.get(codePoint);
12325         String name = getNameImpl(codePoint);
12326         if (name != null)
12327             return name;
12328         if (getType(codePoint) == UNASSIGNED)
12329             return null;
12330         UnicodeBlock block = UnicodeBlock.of(codePoint);
12331         if (block != null)
12332             return block.toString().replace('_', ' ') + " "
12333                    + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
12334         // should never come here
12335         return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
12336     }
12337 
12338     // BEGIN Android-removed: expose after CharacterName.getCodePoint() is imported.
12339     /**
12340      * Returns the code point value of the Unicode character specified by
12341      * the given character name.
12342      * <p>
12343      * If a character is not assigned a name by the <i>UnicodeData</i>
12344      * file (part of the Unicode Character Database maintained by the Unicode
12345      * Consortium), its name is defined as the result of the expression:
12346      *
12347      * <blockquote>{@code
12348      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
12349      *     + " "
12350      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
12351      *
12352      * }</blockquote>
12353      * <p>
12354      * The {@code name} matching is case insensitive, with any leading and
12355      * trailing whitespace character removed.
12356      *
12357      * For the code points in the <i>UnicodeData</i> file, this method
12358      * recognizes the name which conforms to the name defined in the
12359      * "Unicode Name Property" section in the Unicode Standard. For other
12360      * code points, this method recognizes the name generated with
12361      * {@link #getName(int)} method.
12362      *
12363      * @param  name the character name
12364      *
12365      * @return the code point value of the character specified by its name.
12366      *
12367      * @throws IllegalArgumentException if the specified {@code name}
12368      *         is not a valid character name.
12369      * @throws NullPointerException if {@code name} is {@code null}
12370      *
12371      * @since 9
12372      */
codePointOf(String name)12373     public static int codePointOf(String name) {
12374         name = name.trim().toUpperCase(Locale.ROOT);
12375         // Android-changed: Use ICU4C.
12376         // int cp = CharacterName.getInstance().getCodePoint(name);
12377         int cp = codePointOfImpl(name);
12378         if (cp != -1)
12379             return cp;
12380         try {
12381             int off = name.lastIndexOf(' ');
12382             if (off != -1) {
12383                 cp = Integer.parseInt(name, off + 1, name.length(), 16);
12384                 if (isValidCodePoint(cp) && name.equals(getName(cp)))
12385                     return cp;
12386             }
12387         } catch (Exception x) {}
12388         throw new IllegalArgumentException("Unrecognized character name :" + name);
12389     }
12390     // END Android-removed: expose after CharacterName.getCodePoint() is imported.
12391 
12392     // Android-added: Use ICU.
12393     // Implement getNameImpl() and codePointOfImpl() natively.
getNameImpl(int codePoint)12394     private static native String getNameImpl(int codePoint);
12395 
12396     @FastNative
codePointOfImpl(String name)12397     private static native int codePointOfImpl(String name);
12398 }
12399