• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2002, 2021, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package java.lang;
27 
28 import dalvik.annotation.optimization.FastNative;
29 // Android-removed: CDS is not used on Android.
30 // import jdk.internal.misc.CDS;
31 import jdk.internal.vm.annotation.IntrinsicCandidate;
32 
33 import java.util.Arrays;
34 import java.util.HashMap;
35 import java.util.Locale;
36 import java.util.Map;
37 
38 // BEGIN Android-removed: dynamic constants not supported on Android.
39 /*
40 import java.lang.constant.Constable;
41 import java.lang.constant.DynamicConstantDesc;
42 import java.util.Optional;
43 import static java.lang.constant.ConstantDescs.BSM_EXPLICIT_CAST;
44 import static java.lang.constant.ConstantDescs.CD_char;
45 import static java.lang.constant.ConstantDescs.CD_int;
46 import static java.lang.constant.ConstantDescs.DEFAULT_NAME;
47 */
48 // END Android-removed: dynamic constants not supported on Android.
49 
50 // Android-changed: Remove reference to a specific unicode standard version
51 /**
52  * The {@code Character} class wraps a value of the primitive
53  * type {@code char} in an object. An object of class
54  * {@code Character} contains a single field whose type is
55  * {@code char}.
56  * <p>
57  * In addition, this class provides several methods for determining
58  * a character's category (lowercase letter, digit, etc.) and for converting
59  * characters from uppercase to lowercase and vice versa.
60  * <p>
61  * Character information is based on the Unicode Standard
62  * <p>
63  * The methods and data of class {@code Character} are defined by
64  * the information in the <i>UnicodeData</i> file that is part of the
65  * Unicode Character Database maintained by the Unicode
66  * Consortium. This file specifies various properties including name
67  * and general category for every defined Unicode code point or
68  * character range.
69  * <p>
70  * The file and its description are available from the Unicode Consortium at:
71  * <ul>
72  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
73  * </ul>
74  *
75  * <h2><a id="conformance">Unicode Conformance</a></h2>
76  * <p>
77  * The fields and methods of class {@code Character} are defined in terms
78  * of character information from the Unicode Standard, specifically the
79  * <i>UnicodeData</i> file that is part of the Unicode Character Database.
80  * This file specifies properties including name and category for every
81  * assigned Unicode code point or character range. The file is available
82  * from the Unicode Consortium at
83  * <a href="http://www.unicode.org">http://www.unicode.org</a>.
84  * <p>
85  * Character information is based on the Unicode Standard, version 13.0.
86  * <p>
87  * The Java platform has supported different versions of the Unicode
88  * Standard over time. Upgrades to newer versions of the Unicode Standard
89  * occurred in the following Java releases, each indicating the new version:
90  * <table class="striped">
91  * <caption style="display:none">Shows Java releases and supported Unicode versions</caption>
92  * <thead>
93  * <tr><th scope="col">Java release</th>
94  *     <th scope="col">Unicode version</th></tr>
95  * </thead>
96  * <tbody>
97  * <tr><td>Java SE 15</td>
98  *     <td>Unicode 13.0</td></tr>
99  * <tr><td>Java SE 13</td>
100  *     <td>Unicode 12.1</td></tr>
101  * <tr><td>Java SE 12</td>
102  *     <td>Unicode 11.0</td></tr>
103  * <tr><td>Java SE 11</td>
104  *     <td>Unicode 10.0</td></tr>
105  * <tr><td>Java SE 9</td>
106  *     <td>Unicode 8.0</td></tr>
107  * <tr><td>Java SE 8</td>
108  *     <td>Unicode 6.2</td></tr>
109  * <tr><td>Java SE 7</td>
110  *     <td>Unicode 6.0</td></tr>
111  * <tr><td>Java SE 5.0</td>
112  *     <td>Unicode 4.0</td></tr>
113  * <tr><td>Java SE 1.4</td>
114  *     <td>Unicode 3.0</td></tr>
115  * <tr><td>JDK 1.1</td>
116  *     <td>Unicode 2.0</td></tr>
117  * <tr><td>JDK 1.0.2</td>
118  *     <td>Unicode 1.1.5</td></tr>
119  * </tbody>
120  * </table>
121  * Variations from these base Unicode versions, such as recognized appendixes,
122  * are documented elsewhere.
123  * <h2><a id="unicode">Unicode Character Representations</a></h2>
124  *
125  * <p>The {@code char} data type (and therefore the value that a
126  * {@code Character} object encapsulates) are based on the
127  * original Unicode specification, which defined characters as
128  * fixed-width 16-bit entities. The Unicode Standard has since been
129  * changed to allow for characters whose representation requires more
130  * than 16 bits.  The range of legal <em>code point</em>s is now
131  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
132  * (Refer to the <a
133  * href="http://www.unicode.org/reports/tr27/#notation"><i>
134  * definition</i></a> of the U+<i>n</i> notation in the Unicode
135  * Standard.)
136  *
137  * <p><a id="BMP">The set of characters from U+0000 to U+FFFF</a> is
138  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
139  * <a id="supplementary">Characters</a> whose code points are greater
140  * than U+FFFF are called <em>supplementary character</em>s.  The Java
141  * platform uses the UTF-16 representation in {@code char} arrays and
142  * in the {@code String} and {@code StringBuffer} classes. In
143  * this representation, supplementary characters are represented as a pair
144  * of {@code char} values, the first from the <em>high-surrogates</em>
145  * range, (&#92;uD800-&#92;uDBFF), the second from the
146  * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
147  *
148  * <p>A {@code char} value, therefore, represents Basic
149  * Multilingual Plane (BMP) code points, including the surrogate
150  * code points, or code units of the UTF-16 encoding. An
151  * {@code int} value represents all Unicode code points,
152  * including supplementary code points. The lower (least significant)
153  * 21 bits of {@code int} are used to represent Unicode code
154  * points and the upper (most significant) 11 bits must be zero.
155  * Unless otherwise specified, the behavior with respect to
156  * supplementary characters and surrogate {@code char} values is
157  * as follows:
158  *
159  * <ul>
160  * <li>The methods that only accept a {@code char} value cannot support
161  * supplementary characters. They treat {@code char} values from the
162  * surrogate ranges as undefined characters. For example,
163  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
164  * this specific value if followed by any low-surrogate value in a string
165  * would represent a letter.
166  *
167  * <li>The methods that accept an {@code int} value support all
168  * Unicode characters, including supplementary characters. For
169  * example, {@code Character.isLetter(0x2F81A)} returns
170  * {@code true} because the code point value represents a letter
171  * (a CJK ideograph).
172  * </ul>
173  *
174  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
175  * used for character values in the range between U+0000 and U+10FFFF,
176  * and <em>Unicode code unit</em> is used for 16-bit
177  * {@code char} values that are code units of the <em>UTF-16</em>
178  * encoding. For more information on Unicode terminology, refer to the
179  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
180  *
181  * <!-- Android-removed: paragraph on ValueBased
182  * <p>This is a <a href="{@docRoot}/java.base/java/lang/doc-files/ValueBased.html">value-based</a>
183  * class; programmers should treat instances that are
184  * {@linkplain #equals(Object) equal} as interchangeable and should not
185  * use instances for synchronization, or unpredictable behavior may
186  * occur. For example, in a future release, synchronization may fail.
187  * -->
188  *
189  * @author  Lee Boynton
190  * @author  Guy Steele
191  * @author  Akira Tanaka
192  * @author  Martin Buchholz
193  * @author  Ulf Zibis
194  * @since   1.0
195  */
196 @jdk.internal.ValueBased
197 public final
198 class Character implements java.io.Serializable, Comparable<Character> {
199 // Android-removed: no Constable support.
200 // , Constable
201     /**
202      * The minimum radix available for conversion to and from strings.
203      * The constant value of this field is the smallest value permitted
204      * for the radix argument in radix-conversion methods such as the
205      * {@code digit} method, the {@code forDigit} method, and the
206      * {@code toString} method of class {@code Integer}.
207      *
208      * @see     Character#digit(char, int)
209      * @see     Character#forDigit(int, int)
210      * @see     Integer#toString(int, int)
211      * @see     Integer#valueOf(String)
212      */
213     public static final int MIN_RADIX = 2;
214 
215     /**
216      * The maximum radix available for conversion to and from strings.
217      * The constant value of this field is the largest value permitted
218      * for the radix argument in radix-conversion methods such as the
219      * {@code digit} method, the {@code forDigit} method, and the
220      * {@code toString} method of class {@code Integer}.
221      *
222      * @see     Character#digit(char, int)
223      * @see     Character#forDigit(int, int)
224      * @see     Integer#toString(int, int)
225      * @see     Integer#valueOf(String)
226      */
227     public static final int MAX_RADIX = 36;
228 
229     /**
230      * The constant value of this field is the smallest value of type
231      * {@code char}, {@code '\u005Cu0000'}.
232      *
233      * @since   1.0.2
234      */
235     public static final char MIN_VALUE = '\u0000';
236 
237     /**
238      * The constant value of this field is the largest value of type
239      * {@code char}, {@code '\u005CuFFFF'}.
240      *
241      * @since   1.0.2
242      */
243     public static final char MAX_VALUE = '\uFFFF';
244 
245     /**
246      * The {@code Class} instance representing the primitive type
247      * {@code char}.
248      *
249      * @since   1.1
250      */
251     @SuppressWarnings("unchecked")
252     public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
253 
254     /*
255      * Normative general types
256      */
257 
258     /*
259      * General character types
260      */
261 
262     /**
263      * General category "Cn" in the Unicode specification.
264      * @since   1.1
265      */
266     public static final byte UNASSIGNED = 0;
267 
268     /**
269      * General category "Lu" in the Unicode specification.
270      * @since   1.1
271      */
272     public static final byte UPPERCASE_LETTER = 1;
273 
274     /**
275      * General category "Ll" in the Unicode specification.
276      * @since   1.1
277      */
278     public static final byte LOWERCASE_LETTER = 2;
279 
280     /**
281      * General category "Lt" in the Unicode specification.
282      * @since   1.1
283      */
284     public static final byte TITLECASE_LETTER = 3;
285 
286     /**
287      * General category "Lm" in the Unicode specification.
288      * @since   1.1
289      */
290     public static final byte MODIFIER_LETTER = 4;
291 
292     /**
293      * General category "Lo" in the Unicode specification.
294      * @since   1.1
295      */
296     public static final byte OTHER_LETTER = 5;
297 
298     /**
299      * General category "Mn" in the Unicode specification.
300      * @since   1.1
301      */
302     public static final byte NON_SPACING_MARK = 6;
303 
304     /**
305      * General category "Me" in the Unicode specification.
306      * @since   1.1
307      */
308     public static final byte ENCLOSING_MARK = 7;
309 
310     /**
311      * General category "Mc" in the Unicode specification.
312      * @since   1.1
313      */
314     public static final byte COMBINING_SPACING_MARK = 8;
315 
316     /**
317      * General category "Nd" in the Unicode specification.
318      * @since   1.1
319      */
320     public static final byte DECIMAL_DIGIT_NUMBER = 9;
321 
322     /**
323      * General category "Nl" in the Unicode specification.
324      * @since   1.1
325      */
326     public static final byte LETTER_NUMBER = 10;
327 
328     /**
329      * General category "No" in the Unicode specification.
330      * @since   1.1
331      */
332     public static final byte OTHER_NUMBER = 11;
333 
334     /**
335      * General category "Zs" in the Unicode specification.
336      * @since   1.1
337      */
338     public static final byte SPACE_SEPARATOR = 12;
339 
340     /**
341      * General category "Zl" in the Unicode specification.
342      * @since   1.1
343      */
344     public static final byte LINE_SEPARATOR = 13;
345 
346     /**
347      * General category "Zp" in the Unicode specification.
348      * @since   1.1
349      */
350     public static final byte PARAGRAPH_SEPARATOR = 14;
351 
352     /**
353      * General category "Cc" in the Unicode specification.
354      * @since   1.1
355      */
356     public static final byte CONTROL = 15;
357 
358     /**
359      * General category "Cf" in the Unicode specification.
360      * @since   1.1
361      */
362     public static final byte FORMAT = 16;
363 
364     /**
365      * General category "Co" in the Unicode specification.
366      * @since   1.1
367      */
368     public static final byte PRIVATE_USE = 18;
369 
370     /**
371      * General category "Cs" in the Unicode specification.
372      * @since   1.1
373      */
374     public static final byte SURROGATE = 19;
375 
376     /**
377      * General category "Pd" in the Unicode specification.
378      * @since   1.1
379      */
380     public static final byte DASH_PUNCTUATION = 20;
381 
382     /**
383      * General category "Ps" in the Unicode specification.
384      * @since   1.1
385      */
386     public static final byte START_PUNCTUATION = 21;
387 
388     /**
389      * General category "Pe" in the Unicode specification.
390      * @since   1.1
391      */
392     public static final byte END_PUNCTUATION = 22;
393 
394     /**
395      * General category "Pc" in the Unicode specification.
396      * @since   1.1
397      */
398     public static final byte CONNECTOR_PUNCTUATION = 23;
399 
400     /**
401      * General category "Po" in the Unicode specification.
402      * @since   1.1
403      */
404     public static final byte OTHER_PUNCTUATION = 24;
405 
406     /**
407      * General category "Sm" in the Unicode specification.
408      * @since   1.1
409      */
410     public static final byte MATH_SYMBOL = 25;
411 
412     /**
413      * General category "Sc" in the Unicode specification.
414      * @since   1.1
415      */
416     public static final byte CURRENCY_SYMBOL = 26;
417 
418     /**
419      * General category "Sk" in the Unicode specification.
420      * @since   1.1
421      */
422     public static final byte MODIFIER_SYMBOL = 27;
423 
424     /**
425      * General category "So" in the Unicode specification.
426      * @since   1.1
427      */
428     public static final byte OTHER_SYMBOL = 28;
429 
430     /**
431      * General category "Pi" in the Unicode specification.
432      * @since   1.4
433      */
434     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
435 
436     /**
437      * General category "Pf" in the Unicode specification.
438      * @since   1.4
439      */
440     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
441 
442     /**
443      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
444      */
445     static final int ERROR = 0xFFFFFFFF;
446 
447 
448     /**
449      * Undefined bidirectional character type. Undefined {@code char}
450      * values have undefined directionality in the Unicode specification.
451      * @since 1.4
452      */
453     public static final byte DIRECTIONALITY_UNDEFINED = -1;
454 
455     /**
456      * Strong bidirectional character type "L" in the Unicode specification.
457      * @since 1.4
458      */
459     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
460 
461     /**
462      * Strong bidirectional character type "R" in the Unicode specification.
463      * @since 1.4
464      */
465     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
466 
467     /**
468      * Strong bidirectional character type "AL" in the Unicode specification.
469      * @since 1.4
470      */
471     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
472 
473     /**
474      * Weak bidirectional character type "EN" in the Unicode specification.
475      * @since 1.4
476      */
477     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
478 
479     /**
480      * Weak bidirectional character type "ES" in the Unicode specification.
481      * @since 1.4
482      */
483     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
484 
485     /**
486      * Weak bidirectional character type "ET" in the Unicode specification.
487      * @since 1.4
488      */
489     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
490 
491     /**
492      * Weak bidirectional character type "AN" in the Unicode specification.
493      * @since 1.4
494      */
495     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
496 
497     /**
498      * Weak bidirectional character type "CS" in the Unicode specification.
499      * @since 1.4
500      */
501     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
502 
503     /**
504      * Weak bidirectional character type "NSM" in the Unicode specification.
505      * @since 1.4
506      */
507     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
508 
509     /**
510      * Weak bidirectional character type "BN" in the Unicode specification.
511      * @since 1.4
512      */
513     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
514 
515     /**
516      * Neutral bidirectional character type "B" in the Unicode specification.
517      * @since 1.4
518      */
519     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
520 
521     /**
522      * Neutral bidirectional character type "S" in the Unicode specification.
523      * @since 1.4
524      */
525     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
526 
527     /**
528      * Neutral bidirectional character type "WS" in the Unicode specification.
529      * @since 1.4
530      */
531     public static final byte DIRECTIONALITY_WHITESPACE = 12;
532 
533     /**
534      * Neutral bidirectional character type "ON" in the Unicode specification.
535      * @since 1.4
536      */
537     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
538 
539     /**
540      * Strong bidirectional character type "LRE" in the Unicode specification.
541      * @since 1.4
542      */
543     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
544 
545     /**
546      * Strong bidirectional character type "LRO" in the Unicode specification.
547      * @since 1.4
548      */
549     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
550 
551     /**
552      * Strong bidirectional character type "RLE" in the Unicode specification.
553      * @since 1.4
554      */
555     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
556 
557     /**
558      * Strong bidirectional character type "RLO" in the Unicode specification.
559      * @since 1.4
560      */
561     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
562 
563     /**
564      * Weak bidirectional character type "PDF" in the Unicode specification.
565      * @since 1.4
566      */
567     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
568 
569     /**
570      * Weak bidirectional character type "LRI" in the Unicode specification.
571      * @since 9
572      */
573     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE = 19;
574 
575     /**
576      * Weak bidirectional character type "RLI" in the Unicode specification.
577      * @since 9
578      */
579     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE = 20;
580 
581     /**
582      * Weak bidirectional character type "FSI" in the Unicode specification.
583      * @since 9
584      */
585     public static final byte DIRECTIONALITY_FIRST_STRONG_ISOLATE = 21;
586 
587     /**
588      * Weak bidirectional character type "PDI" in the Unicode specification.
589      * @since 9
590      */
591     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE = 22;
592 
593     /**
594      * The minimum value of a
595      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
596      * Unicode high-surrogate code unit</a>
597      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
598      * A high-surrogate is also known as a <i>leading-surrogate</i>.
599      *
600      * @since 1.5
601      */
602     public static final char MIN_HIGH_SURROGATE = '\uD800';
603 
604     /**
605      * The maximum value of a
606      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
607      * Unicode high-surrogate code unit</a>
608      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
609      * A high-surrogate is also known as a <i>leading-surrogate</i>.
610      *
611      * @since 1.5
612      */
613     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
614 
615     /**
616      * The minimum value of a
617      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
618      * Unicode low-surrogate code unit</a>
619      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
620      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
621      *
622      * @since 1.5
623      */
624     public static final char MIN_LOW_SURROGATE  = '\uDC00';
625 
626     /**
627      * The maximum value of a
628      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
629      * Unicode low-surrogate code unit</a>
630      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
631      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
632      *
633      * @since 1.5
634      */
635     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
636 
637     /**
638      * The minimum value of a Unicode surrogate code unit in the
639      * UTF-16 encoding, constant {@code '\u005CuD800'}.
640      *
641      * @since 1.5
642      */
643     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
644 
645     /**
646      * The maximum value of a Unicode surrogate code unit in the
647      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
648      *
649      * @since 1.5
650      */
651     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
652 
653     /**
654      * The minimum value of a
655      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
656      * Unicode supplementary code point</a>, constant {@code U+10000}.
657      *
658      * @since 1.5
659      */
660     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
661 
662     /**
663      * The minimum value of a
664      * <a href="http://www.unicode.org/glossary/#code_point">
665      * Unicode code point</a>, constant {@code U+0000}.
666      *
667      * @since 1.5
668      */
669     public static final int MIN_CODE_POINT = 0x000000;
670 
671     /**
672      * The maximum value of a
673      * <a href="http://www.unicode.org/glossary/#code_point">
674      * Unicode code point</a>, constant {@code U+10FFFF}.
675      *
676      * @since 1.5
677      */
678     public static final int MAX_CODE_POINT = 0X10FFFF;
679 
680     // BEGIN Android-added: Use ICU.
681     // The indices in int[] DIRECTIONALITY are based on icu4c's u_charDirection(),
682     // accessed via getDirectionalityImpl(), implemented in Character.cpp.
683     private static final byte[] DIRECTIONALITY = new byte[] {
684             DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
685             DIRECTIONALITY_EUROPEAN_NUMBER,
686             DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
687             DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
688             DIRECTIONALITY_ARABIC_NUMBER,
689             DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
690             DIRECTIONALITY_PARAGRAPH_SEPARATOR,
691             DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
692             DIRECTIONALITY_OTHER_NEUTRALS,
693             DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
694             DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
695             DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
696             DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
697             DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
698             DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
699             DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
700     // END Android-added: Use ICU.
701 
702     // BEGIN Android-removed: dynamic constants not supported on Android.
703     /**
704      * Returns an {@link Optional} containing the nominal descriptor for this
705      * instance.
706      *
707      * @return an {@link Optional} describing the {@linkplain Character} instance
708      * @since 15
709      *
710     @Override
711     public Optional<DynamicConstantDesc<Character>> describeConstable() {
712         return Optional.of(DynamicConstantDesc.ofNamed(BSM_EXPLICIT_CAST, DEFAULT_NAME, CD_char, (int) value));
713     }
714     */
715     // END Android-removed: dynamic constants not supported on Android.
716 
717     /**
718      * Instances of this class represent particular subsets of the Unicode
719      * character set.  The only family of subsets defined in the
720      * {@code Character} class is {@link Character.UnicodeBlock}.
721      * Other portions of the Java API may define other subsets for their
722      * own purposes.
723      *
724      * @since 1.2
725      */
726     public static class Subset  {
727 
728         private String name;
729 
730         /**
731          * Constructs a new {@code Subset} instance.
732          *
733          * @param  name  The name of this subset
734          * @throws NullPointerException if name is {@code null}
735          */
Subset(String name)736         protected Subset(String name) {
737             if (name == null) {
738                 throw new NullPointerException("name");
739             }
740             this.name = name;
741         }
742 
743         /**
744          * Compares two {@code Subset} objects for equality.
745          * This method returns {@code true} if and only if
746          * {@code this} and the argument refer to the same
747          * object; since this method is {@code final}, this
748          * guarantee holds for all subclasses.
749          */
equals(Object obj)750         public final boolean equals(Object obj) {
751             return (this == obj);
752         }
753 
754         /**
755          * Returns the standard hash code as defined by the
756          * {@link Object#hashCode} method.  This method
757          * is {@code final} in order to ensure that the
758          * {@code equals} and {@code hashCode} methods will
759          * be consistent in all subclasses.
760          */
hashCode()761         public final int hashCode() {
762             return super.hashCode();
763         }
764 
765         /**
766          * Returns the name of this subset.
767          */
toString()768         public final String toString() {
769             return name;
770         }
771     }
772 
773     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
774     // for the latest specification of Unicode Blocks.
775 
776     /**
777      * A family of character subsets representing the character blocks in the
778      * Unicode specification. Character blocks generally define characters
779      * used for a specific script or purpose. A character is contained by
780      * at most one Unicode block.
781      *
782      * @since 1.2
783      */
784     public static final class UnicodeBlock extends Subset {
785         /**
786          * 684 - the expected number of entities
787          * 0.75 - the default load factor of HashMap
788          */
789         private static final int NUM_ENTITIES = 684;
790         private static Map<String, UnicodeBlock> map =
791                 new HashMap<>((int)(NUM_ENTITIES / 0.75f + 1.0f));
792 
793         /**
794          * Creates a UnicodeBlock with the given identifier name.
795          * This name must be the same as the block identifier.
796          */
UnicodeBlock(String idName)797         private UnicodeBlock(String idName) {
798             super(idName);
799             map.put(idName, this);
800         }
801 
802         // BEGIN Android-added: ICU consistency: Don't map deprecated SURROGATES_AREA. b/26140229
803         // Add a (String, boolean) constructor for use by SURROGATES_AREA.
UnicodeBlock(String idName, boolean isMap)804         private UnicodeBlock(String idName, boolean isMap) {
805             super(idName);
806             if (isMap) {
807                 map.put(idName, this);
808             }
809         }
810         // END Android-added: ICU consistency: Don't map deprecated SURROGATES_AREA. b/26140229
811 
812         /**
813          * Creates a UnicodeBlock with the given identifier name and
814          * alias name.
815          */
UnicodeBlock(String idName, String alias)816         private UnicodeBlock(String idName, String alias) {
817             this(idName);
818             map.put(alias, this);
819         }
820 
821         /**
822          * Creates a UnicodeBlock with the given identifier name and
823          * alias names.
824          */
UnicodeBlock(String idName, String... aliases)825         private UnicodeBlock(String idName, String... aliases) {
826             this(idName);
827             for (String alias : aliases)
828                 map.put(alias, this);
829         }
830 
831         /**
832          * Constant for the "Basic Latin" Unicode character block.
833          * @since 1.2
834          */
835         public static final UnicodeBlock  BASIC_LATIN =
836             new UnicodeBlock("BASIC_LATIN",
837                              "BASIC LATIN",
838                              "BASICLATIN");
839 
840         /**
841          * Constant for the "Latin-1 Supplement" Unicode character block.
842          * @since 1.2
843          */
844         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
845             new UnicodeBlock("LATIN_1_SUPPLEMENT",
846                              "LATIN-1 SUPPLEMENT",
847                              "LATIN-1SUPPLEMENT");
848 
849         /**
850          * Constant for the "Latin Extended-A" Unicode character block.
851          * @since 1.2
852          */
853         public static final UnicodeBlock LATIN_EXTENDED_A =
854             new UnicodeBlock("LATIN_EXTENDED_A",
855                              "LATIN EXTENDED-A",
856                              "LATINEXTENDED-A");
857 
858         /**
859          * Constant for the "Latin Extended-B" Unicode character block.
860          * @since 1.2
861          */
862         public static final UnicodeBlock LATIN_EXTENDED_B =
863             new UnicodeBlock("LATIN_EXTENDED_B",
864                              "LATIN EXTENDED-B",
865                              "LATINEXTENDED-B");
866 
867         /**
868          * Constant for the "IPA Extensions" Unicode character block.
869          * @since 1.2
870          */
871         public static final UnicodeBlock IPA_EXTENSIONS =
872             new UnicodeBlock("IPA_EXTENSIONS",
873                              "IPA EXTENSIONS",
874                              "IPAEXTENSIONS");
875 
876         /**
877          * Constant for the "Spacing Modifier Letters" Unicode character block.
878          * @since 1.2
879          */
880         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
881             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
882                              "SPACING MODIFIER LETTERS",
883                              "SPACINGMODIFIERLETTERS");
884 
885         /**
886          * Constant for the "Combining Diacritical Marks" Unicode character block.
887          * @since 1.2
888          */
889         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
890             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
891                              "COMBINING DIACRITICAL MARKS",
892                              "COMBININGDIACRITICALMARKS");
893 
894         /**
895          * Constant for the "Greek and Coptic" Unicode character block.
896          * <p>
897          * This block was previously known as the "Greek" block.
898          *
899          * @since 1.2
900          */
901         public static final UnicodeBlock GREEK =
902             new UnicodeBlock("GREEK",
903                              "GREEK AND COPTIC",
904                              "GREEKANDCOPTIC");
905 
906         /**
907          * Constant for the "Cyrillic" Unicode character block.
908          * @since 1.2
909          */
910         public static final UnicodeBlock CYRILLIC =
911             new UnicodeBlock("CYRILLIC");
912 
913         /**
914          * Constant for the "Armenian" Unicode character block.
915          * @since 1.2
916          */
917         public static final UnicodeBlock ARMENIAN =
918             new UnicodeBlock("ARMENIAN");
919 
920         /**
921          * Constant for the "Hebrew" Unicode character block.
922          * @since 1.2
923          */
924         public static final UnicodeBlock HEBREW =
925             new UnicodeBlock("HEBREW");
926 
927         /**
928          * Constant for the "Arabic" Unicode character block.
929          * @since 1.2
930          */
931         public static final UnicodeBlock ARABIC =
932             new UnicodeBlock("ARABIC");
933 
934         /**
935          * Constant for the "Devanagari" Unicode character block.
936          * @since 1.2
937          */
938         public static final UnicodeBlock DEVANAGARI =
939             new UnicodeBlock("DEVANAGARI");
940 
941         /**
942          * Constant for the "Bengali" Unicode character block.
943          * @since 1.2
944          */
945         public static final UnicodeBlock BENGALI =
946             new UnicodeBlock("BENGALI");
947 
948         /**
949          * Constant for the "Gurmukhi" Unicode character block.
950          * @since 1.2
951          */
952         public static final UnicodeBlock GURMUKHI =
953             new UnicodeBlock("GURMUKHI");
954 
955         /**
956          * Constant for the "Gujarati" Unicode character block.
957          * @since 1.2
958          */
959         public static final UnicodeBlock GUJARATI =
960             new UnicodeBlock("GUJARATI");
961 
962         /**
963          * Constant for the "Oriya" Unicode character block.
964          * @since 1.2
965          */
966         public static final UnicodeBlock ORIYA =
967             new UnicodeBlock("ORIYA");
968 
969         /**
970          * Constant for the "Tamil" Unicode character block.
971          * @since 1.2
972          */
973         public static final UnicodeBlock TAMIL =
974             new UnicodeBlock("TAMIL");
975 
976         /**
977          * Constant for the "Telugu" Unicode character block.
978          * @since 1.2
979          */
980         public static final UnicodeBlock TELUGU =
981             new UnicodeBlock("TELUGU");
982 
983         /**
984          * Constant for the "Kannada" Unicode character block.
985          * @since 1.2
986          */
987         public static final UnicodeBlock KANNADA =
988             new UnicodeBlock("KANNADA");
989 
990         /**
991          * Constant for the "Malayalam" Unicode character block.
992          * @since 1.2
993          */
994         public static final UnicodeBlock MALAYALAM =
995             new UnicodeBlock("MALAYALAM");
996 
997         /**
998          * Constant for the "Thai" Unicode character block.
999          * @since 1.2
1000          */
1001         public static final UnicodeBlock THAI =
1002             new UnicodeBlock("THAI");
1003 
1004         /**
1005          * Constant for the "Lao" Unicode character block.
1006          * @since 1.2
1007          */
1008         public static final UnicodeBlock LAO =
1009             new UnicodeBlock("LAO");
1010 
1011         /**
1012          * Constant for the "Tibetan" Unicode character block.
1013          * @since 1.2
1014          */
1015         public static final UnicodeBlock TIBETAN =
1016             new UnicodeBlock("TIBETAN");
1017 
1018         /**
1019          * Constant for the "Georgian" Unicode character block.
1020          * @since 1.2
1021          */
1022         public static final UnicodeBlock GEORGIAN =
1023             new UnicodeBlock("GEORGIAN");
1024 
1025         /**
1026          * Constant for the "Hangul Jamo" Unicode character block.
1027          * @since 1.2
1028          */
1029         public static final UnicodeBlock HANGUL_JAMO =
1030             new UnicodeBlock("HANGUL_JAMO",
1031                              "HANGUL JAMO",
1032                              "HANGULJAMO");
1033 
1034         /**
1035          * Constant for the "Latin Extended Additional" Unicode character block.
1036          * @since 1.2
1037          */
1038         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
1039             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
1040                              "LATIN EXTENDED ADDITIONAL",
1041                              "LATINEXTENDEDADDITIONAL");
1042 
1043         /**
1044          * Constant for the "Greek Extended" Unicode character block.
1045          * @since 1.2
1046          */
1047         public static final UnicodeBlock GREEK_EXTENDED =
1048             new UnicodeBlock("GREEK_EXTENDED",
1049                              "GREEK EXTENDED",
1050                              "GREEKEXTENDED");
1051 
1052         /**
1053          * Constant for the "General Punctuation" Unicode character block.
1054          * @since 1.2
1055          */
1056         public static final UnicodeBlock GENERAL_PUNCTUATION =
1057             new UnicodeBlock("GENERAL_PUNCTUATION",
1058                              "GENERAL PUNCTUATION",
1059                              "GENERALPUNCTUATION");
1060 
1061         /**
1062          * Constant for the "Superscripts and Subscripts" Unicode character
1063          * block.
1064          * @since 1.2
1065          */
1066         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
1067             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
1068                              "SUPERSCRIPTS AND SUBSCRIPTS",
1069                              "SUPERSCRIPTSANDSUBSCRIPTS");
1070 
1071         /**
1072          * Constant for the "Currency Symbols" Unicode character block.
1073          * @since 1.2
1074          */
1075         public static final UnicodeBlock CURRENCY_SYMBOLS =
1076             new UnicodeBlock("CURRENCY_SYMBOLS",
1077                              "CURRENCY SYMBOLS",
1078                              "CURRENCYSYMBOLS");
1079 
1080         /**
1081          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
1082          * character block.
1083          * <p>
1084          * This block was previously known as "Combining Marks for Symbols".
1085          * @since 1.2
1086          */
1087         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
1088             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
1089                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
1090                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
1091                              "COMBINING MARKS FOR SYMBOLS",
1092                              "COMBININGMARKSFORSYMBOLS");
1093 
1094         /**
1095          * Constant for the "Letterlike Symbols" Unicode character block.
1096          * @since 1.2
1097          */
1098         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
1099             new UnicodeBlock("LETTERLIKE_SYMBOLS",
1100                              "LETTERLIKE SYMBOLS",
1101                              "LETTERLIKESYMBOLS");
1102 
1103         /**
1104          * Constant for the "Number Forms" Unicode character block.
1105          * @since 1.2
1106          */
1107         public static final UnicodeBlock NUMBER_FORMS =
1108             new UnicodeBlock("NUMBER_FORMS",
1109                              "NUMBER FORMS",
1110                              "NUMBERFORMS");
1111 
1112         /**
1113          * Constant for the "Arrows" Unicode character block.
1114          * @since 1.2
1115          */
1116         public static final UnicodeBlock ARROWS =
1117             new UnicodeBlock("ARROWS");
1118 
1119         /**
1120          * Constant for the "Mathematical Operators" Unicode character block.
1121          * @since 1.2
1122          */
1123         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
1124             new UnicodeBlock("MATHEMATICAL_OPERATORS",
1125                              "MATHEMATICAL OPERATORS",
1126                              "MATHEMATICALOPERATORS");
1127 
1128         /**
1129          * Constant for the "Miscellaneous Technical" Unicode character block.
1130          * @since 1.2
1131          */
1132         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
1133             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
1134                              "MISCELLANEOUS TECHNICAL",
1135                              "MISCELLANEOUSTECHNICAL");
1136 
1137         /**
1138          * Constant for the "Control Pictures" Unicode character block.
1139          * @since 1.2
1140          */
1141         public static final UnicodeBlock CONTROL_PICTURES =
1142             new UnicodeBlock("CONTROL_PICTURES",
1143                              "CONTROL PICTURES",
1144                              "CONTROLPICTURES");
1145 
1146         /**
1147          * Constant for the "Optical Character Recognition" Unicode character block.
1148          * @since 1.2
1149          */
1150         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1151             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1152                              "OPTICAL CHARACTER RECOGNITION",
1153                              "OPTICALCHARACTERRECOGNITION");
1154 
1155         /**
1156          * Constant for the "Enclosed Alphanumerics" Unicode character block.
1157          * @since 1.2
1158          */
1159         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1160             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1161                              "ENCLOSED ALPHANUMERICS",
1162                              "ENCLOSEDALPHANUMERICS");
1163 
1164         /**
1165          * Constant for the "Box Drawing" Unicode character block.
1166          * @since 1.2
1167          */
1168         public static final UnicodeBlock BOX_DRAWING =
1169             new UnicodeBlock("BOX_DRAWING",
1170                              "BOX DRAWING",
1171                              "BOXDRAWING");
1172 
1173         /**
1174          * Constant for the "Block Elements" Unicode character block.
1175          * @since 1.2
1176          */
1177         public static final UnicodeBlock BLOCK_ELEMENTS =
1178             new UnicodeBlock("BLOCK_ELEMENTS",
1179                              "BLOCK ELEMENTS",
1180                              "BLOCKELEMENTS");
1181 
1182         /**
1183          * Constant for the "Geometric Shapes" Unicode character block.
1184          * @since 1.2
1185          */
1186         public static final UnicodeBlock GEOMETRIC_SHAPES =
1187             new UnicodeBlock("GEOMETRIC_SHAPES",
1188                              "GEOMETRIC SHAPES",
1189                              "GEOMETRICSHAPES");
1190 
1191         /**
1192          * Constant for the "Miscellaneous Symbols" Unicode character block.
1193          * @since 1.2
1194          */
1195         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1196             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1197                              "MISCELLANEOUS SYMBOLS",
1198                              "MISCELLANEOUSSYMBOLS");
1199 
1200         /**
1201          * Constant for the "Dingbats" Unicode character block.
1202          * @since 1.2
1203          */
1204         public static final UnicodeBlock DINGBATS =
1205             new UnicodeBlock("DINGBATS");
1206 
1207         /**
1208          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1209          * @since 1.2
1210          */
1211         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1212             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1213                              "CJK SYMBOLS AND PUNCTUATION",
1214                              "CJKSYMBOLSANDPUNCTUATION");
1215 
1216         /**
1217          * Constant for the "Hiragana" Unicode character block.
1218          * @since 1.2
1219          */
1220         public static final UnicodeBlock HIRAGANA =
1221             new UnicodeBlock("HIRAGANA");
1222 
1223         /**
1224          * Constant for the "Katakana" Unicode character block.
1225          * @since 1.2
1226          */
1227         public static final UnicodeBlock KATAKANA =
1228             new UnicodeBlock("KATAKANA");
1229 
1230         /**
1231          * Constant for the "Bopomofo" Unicode character block.
1232          * @since 1.2
1233          */
1234         public static final UnicodeBlock BOPOMOFO =
1235             new UnicodeBlock("BOPOMOFO");
1236 
1237         /**
1238          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1239          * @since 1.2
1240          */
1241         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1242             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1243                              "HANGUL COMPATIBILITY JAMO",
1244                              "HANGULCOMPATIBILITYJAMO");
1245 
1246         /**
1247          * Constant for the "Kanbun" Unicode character block.
1248          * @since 1.2
1249          */
1250         public static final UnicodeBlock KANBUN =
1251             new UnicodeBlock("KANBUN");
1252 
1253         /**
1254          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1255          * @since 1.2
1256          */
1257         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1258             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1259                              "ENCLOSED CJK LETTERS AND MONTHS",
1260                              "ENCLOSEDCJKLETTERSANDMONTHS");
1261 
1262         /**
1263          * Constant for the "CJK Compatibility" Unicode character block.
1264          * @since 1.2
1265          */
1266         public static final UnicodeBlock CJK_COMPATIBILITY =
1267             new UnicodeBlock("CJK_COMPATIBILITY",
1268                              "CJK COMPATIBILITY",
1269                              "CJKCOMPATIBILITY");
1270 
1271         /**
1272          * Constant for the "CJK Unified Ideographs" Unicode character block.
1273          * @since 1.2
1274          */
1275         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1276             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1277                              "CJK UNIFIED IDEOGRAPHS",
1278                              "CJKUNIFIEDIDEOGRAPHS");
1279 
1280         /**
1281          * Constant for the "Hangul Syllables" Unicode character block.
1282          * @since 1.2
1283          */
1284         public static final UnicodeBlock HANGUL_SYLLABLES =
1285             new UnicodeBlock("HANGUL_SYLLABLES",
1286                              "HANGUL SYLLABLES",
1287                              "HANGULSYLLABLES");
1288 
1289         /**
1290          * Constant for the "Private Use Area" Unicode character block.
1291          * @since 1.2
1292          */
1293         public static final UnicodeBlock PRIVATE_USE_AREA =
1294             new UnicodeBlock("PRIVATE_USE_AREA",
1295                              "PRIVATE USE AREA",
1296                              "PRIVATEUSEAREA");
1297 
1298         /**
1299          * Constant for the "CJK Compatibility Ideographs" Unicode character
1300          * block.
1301          * @since 1.2
1302          */
1303         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1304             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1305                              "CJK COMPATIBILITY IDEOGRAPHS",
1306                              "CJKCOMPATIBILITYIDEOGRAPHS");
1307 
1308         /**
1309          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1310          * @since 1.2
1311          */
1312         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1313             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1314                              "ALPHABETIC PRESENTATION FORMS",
1315                              "ALPHABETICPRESENTATIONFORMS");
1316 
1317         /**
1318          * Constant for the "Arabic Presentation Forms-A" Unicode character
1319          * block.
1320          * @since 1.2
1321          */
1322         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1323             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1324                              "ARABIC PRESENTATION FORMS-A",
1325                              "ARABICPRESENTATIONFORMS-A");
1326 
1327         /**
1328          * Constant for the "Combining Half Marks" Unicode character block.
1329          * @since 1.2
1330          */
1331         public static final UnicodeBlock COMBINING_HALF_MARKS =
1332             new UnicodeBlock("COMBINING_HALF_MARKS",
1333                              "COMBINING HALF MARKS",
1334                              "COMBININGHALFMARKS");
1335 
1336         /**
1337          * Constant for the "CJK Compatibility Forms" Unicode character block.
1338          * @since 1.2
1339          */
1340         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1341             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1342                              "CJK COMPATIBILITY FORMS",
1343                              "CJKCOMPATIBILITYFORMS");
1344 
1345         /**
1346          * Constant for the "Small Form Variants" Unicode character block.
1347          * @since 1.2
1348          */
1349         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1350             new UnicodeBlock("SMALL_FORM_VARIANTS",
1351                              "SMALL FORM VARIANTS",
1352                              "SMALLFORMVARIANTS");
1353 
1354         /**
1355          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1356          * @since 1.2
1357          */
1358         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1359             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1360                              "ARABIC PRESENTATION FORMS-B",
1361                              "ARABICPRESENTATIONFORMS-B");
1362 
1363         /**
1364          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1365          * block.
1366          * @since 1.2
1367          */
1368         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1369             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1370                              "HALFWIDTH AND FULLWIDTH FORMS",
1371                              "HALFWIDTHANDFULLWIDTHFORMS");
1372 
1373         /**
1374          * Constant for the "Specials" Unicode character block.
1375          * @since 1.2
1376          */
1377         public static final UnicodeBlock SPECIALS =
1378             new UnicodeBlock("SPECIALS");
1379 
1380         /**
1381          * @deprecated
1382          * Instead of {@code SURROGATES_AREA}, use {@link #HIGH_SURROGATES},
1383          * {@link #HIGH_PRIVATE_USE_SURROGATES}, and {@link #LOW_SURROGATES}.
1384          * These constants match the block definitions of the Unicode Standard.
1385          * The {@link #of(char)} and {@link #of(int)} methods return the
1386          * standard constants.
1387          */
1388         @Deprecated(since="1.5")
1389         public static final UnicodeBlock SURROGATES_AREA =
1390             // Android-changed: ICU consistency: Don't map deprecated SURROGATES_AREA. b/26140229
1391             // new UnicodeBlock("SURROGATES_AREA");
1392             new UnicodeBlock("SURROGATES_AREA", false);
1393 
1394         /**
1395          * Constant for the "Syriac" Unicode character block.
1396          * @since 1.4
1397          */
1398         public static final UnicodeBlock SYRIAC =
1399             new UnicodeBlock("SYRIAC");
1400 
1401         /**
1402          * Constant for the "Thaana" Unicode character block.
1403          * @since 1.4
1404          */
1405         public static final UnicodeBlock THAANA =
1406             new UnicodeBlock("THAANA");
1407 
1408         /**
1409          * Constant for the "Sinhala" Unicode character block.
1410          * @since 1.4
1411          */
1412         public static final UnicodeBlock SINHALA =
1413             new UnicodeBlock("SINHALA");
1414 
1415         /**
1416          * Constant for the "Myanmar" Unicode character block.
1417          * @since 1.4
1418          */
1419         public static final UnicodeBlock MYANMAR =
1420             new UnicodeBlock("MYANMAR");
1421 
1422         /**
1423          * Constant for the "Ethiopic" Unicode character block.
1424          * @since 1.4
1425          */
1426         public static final UnicodeBlock ETHIOPIC =
1427             new UnicodeBlock("ETHIOPIC");
1428 
1429         /**
1430          * Constant for the "Cherokee" Unicode character block.
1431          * @since 1.4
1432          */
1433         public static final UnicodeBlock CHEROKEE =
1434             new UnicodeBlock("CHEROKEE");
1435 
1436         /**
1437          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1438          * @since 1.4
1439          */
1440         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1441             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1442                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1443                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
1444 
1445         /**
1446          * Constant for the "Ogham" Unicode character block.
1447          * @since 1.4
1448          */
1449         public static final UnicodeBlock OGHAM =
1450             new UnicodeBlock("OGHAM");
1451 
1452         /**
1453          * Constant for the "Runic" Unicode character block.
1454          * @since 1.4
1455          */
1456         public static final UnicodeBlock RUNIC =
1457             new UnicodeBlock("RUNIC");
1458 
1459         /**
1460          * Constant for the "Khmer" Unicode character block.
1461          * @since 1.4
1462          */
1463         public static final UnicodeBlock KHMER =
1464             new UnicodeBlock("KHMER");
1465 
1466         /**
1467          * Constant for the "Mongolian" Unicode character block.
1468          * @since 1.4
1469          */
1470         public static final UnicodeBlock MONGOLIAN =
1471             new UnicodeBlock("MONGOLIAN");
1472 
1473         /**
1474          * Constant for the "Braille Patterns" Unicode character block.
1475          * @since 1.4
1476          */
1477         public static final UnicodeBlock BRAILLE_PATTERNS =
1478             new UnicodeBlock("BRAILLE_PATTERNS",
1479                              "BRAILLE PATTERNS",
1480                              "BRAILLEPATTERNS");
1481 
1482         /**
1483          * Constant for the "CJK Radicals Supplement" Unicode character block.
1484          * @since 1.4
1485          */
1486         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1487             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1488                              "CJK RADICALS SUPPLEMENT",
1489                              "CJKRADICALSSUPPLEMENT");
1490 
1491         /**
1492          * Constant for the "Kangxi Radicals" Unicode character block.
1493          * @since 1.4
1494          */
1495         public static final UnicodeBlock KANGXI_RADICALS =
1496             new UnicodeBlock("KANGXI_RADICALS",
1497                              "KANGXI RADICALS",
1498                              "KANGXIRADICALS");
1499 
1500         /**
1501          * Constant for the "Ideographic Description Characters" Unicode character block.
1502          * @since 1.4
1503          */
1504         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1505             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1506                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1507                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1508 
1509         /**
1510          * Constant for the "Bopomofo Extended" Unicode character block.
1511          * @since 1.4
1512          */
1513         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1514             new UnicodeBlock("BOPOMOFO_EXTENDED",
1515                              "BOPOMOFO EXTENDED",
1516                              "BOPOMOFOEXTENDED");
1517 
1518         /**
1519          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1520          * @since 1.4
1521          */
1522         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1523             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1524                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1525                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1526 
1527         /**
1528          * Constant for the "Yi Syllables" Unicode character block.
1529          * @since 1.4
1530          */
1531         public static final UnicodeBlock YI_SYLLABLES =
1532             new UnicodeBlock("YI_SYLLABLES",
1533                              "YI SYLLABLES",
1534                              "YISYLLABLES");
1535 
1536         /**
1537          * Constant for the "Yi Radicals" Unicode character block.
1538          * @since 1.4
1539          */
1540         public static final UnicodeBlock YI_RADICALS =
1541             new UnicodeBlock("YI_RADICALS",
1542                              "YI RADICALS",
1543                              "YIRADICALS");
1544 
1545         /**
1546          * Constant for the "Cyrillic Supplement" Unicode character block.
1547          * This block was previously known as the "Cyrillic Supplementary" block.
1548          * @since 1.5
1549          */
1550         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1551             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1552                              "CYRILLIC SUPPLEMENTARY",
1553                              "CYRILLICSUPPLEMENTARY",
1554                              "CYRILLIC SUPPLEMENT",
1555                              "CYRILLICSUPPLEMENT");
1556 
1557         /**
1558          * Constant for the "Tagalog" Unicode character block.
1559          * @since 1.5
1560          */
1561         public static final UnicodeBlock TAGALOG =
1562             new UnicodeBlock("TAGALOG");
1563 
1564         /**
1565          * Constant for the "Hanunoo" Unicode character block.
1566          * @since 1.5
1567          */
1568         public static final UnicodeBlock HANUNOO =
1569             new UnicodeBlock("HANUNOO");
1570 
1571         /**
1572          * Constant for the "Buhid" Unicode character block.
1573          * @since 1.5
1574          */
1575         public static final UnicodeBlock BUHID =
1576             new UnicodeBlock("BUHID");
1577 
1578         /**
1579          * Constant for the "Tagbanwa" Unicode character block.
1580          * @since 1.5
1581          */
1582         public static final UnicodeBlock TAGBANWA =
1583             new UnicodeBlock("TAGBANWA");
1584 
1585         /**
1586          * Constant for the "Limbu" Unicode character block.
1587          * @since 1.5
1588          */
1589         public static final UnicodeBlock LIMBU =
1590             new UnicodeBlock("LIMBU");
1591 
1592         /**
1593          * Constant for the "Tai Le" Unicode character block.
1594          * @since 1.5
1595          */
1596         public static final UnicodeBlock TAI_LE =
1597             new UnicodeBlock("TAI_LE",
1598                              "TAI LE",
1599                              "TAILE");
1600 
1601         /**
1602          * Constant for the "Khmer Symbols" Unicode character block.
1603          * @since 1.5
1604          */
1605         public static final UnicodeBlock KHMER_SYMBOLS =
1606             new UnicodeBlock("KHMER_SYMBOLS",
1607                              "KHMER SYMBOLS",
1608                              "KHMERSYMBOLS");
1609 
1610         /**
1611          * Constant for the "Phonetic Extensions" Unicode character block.
1612          * @since 1.5
1613          */
1614         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1615             new UnicodeBlock("PHONETIC_EXTENSIONS",
1616                              "PHONETIC EXTENSIONS",
1617                              "PHONETICEXTENSIONS");
1618 
1619         /**
1620          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1621          * @since 1.5
1622          */
1623         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1624             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1625                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1626                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1627 
1628         /**
1629          * Constant for the "Supplemental Arrows-A" Unicode character block.
1630          * @since 1.5
1631          */
1632         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1633             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1634                              "SUPPLEMENTAL ARROWS-A",
1635                              "SUPPLEMENTALARROWS-A");
1636 
1637         /**
1638          * Constant for the "Supplemental Arrows-B" Unicode character block.
1639          * @since 1.5
1640          */
1641         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1642             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1643                              "SUPPLEMENTAL ARROWS-B",
1644                              "SUPPLEMENTALARROWS-B");
1645 
1646         /**
1647          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1648          * character block.
1649          * @since 1.5
1650          */
1651         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1652             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1653                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1654                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1655 
1656         /**
1657          * Constant for the "Supplemental Mathematical Operators" Unicode
1658          * character block.
1659          * @since 1.5
1660          */
1661         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1662             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1663                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1664                              "SUPPLEMENTALMATHEMATICALOPERATORS");
1665 
1666         /**
1667          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1668          * block.
1669          * @since 1.5
1670          */
1671         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1672             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1673                              "MISCELLANEOUS SYMBOLS AND ARROWS",
1674                              "MISCELLANEOUSSYMBOLSANDARROWS");
1675 
1676         /**
1677          * Constant for the "Katakana Phonetic Extensions" Unicode character
1678          * block.
1679          * @since 1.5
1680          */
1681         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1682             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1683                              "KATAKANA PHONETIC EXTENSIONS",
1684                              "KATAKANAPHONETICEXTENSIONS");
1685 
1686         /**
1687          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1688          * @since 1.5
1689          */
1690         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1691             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1692                              "YIJING HEXAGRAM SYMBOLS",
1693                              "YIJINGHEXAGRAMSYMBOLS");
1694 
1695         /**
1696          * Constant for the "Variation Selectors" Unicode character block.
1697          * @since 1.5
1698          */
1699         public static final UnicodeBlock VARIATION_SELECTORS =
1700             new UnicodeBlock("VARIATION_SELECTORS",
1701                              "VARIATION SELECTORS",
1702                              "VARIATIONSELECTORS");
1703 
1704         /**
1705          * Constant for the "Linear B Syllabary" Unicode character block.
1706          * @since 1.5
1707          */
1708         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1709             new UnicodeBlock("LINEAR_B_SYLLABARY",
1710                              "LINEAR B SYLLABARY",
1711                              "LINEARBSYLLABARY");
1712 
1713         /**
1714          * Constant for the "Linear B Ideograms" Unicode character block.
1715          * @since 1.5
1716          */
1717         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1718             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1719                              "LINEAR B IDEOGRAMS",
1720                              "LINEARBIDEOGRAMS");
1721 
1722         /**
1723          * Constant for the "Aegean Numbers" Unicode character block.
1724          * @since 1.5
1725          */
1726         public static final UnicodeBlock AEGEAN_NUMBERS =
1727             new UnicodeBlock("AEGEAN_NUMBERS",
1728                              "AEGEAN NUMBERS",
1729                              "AEGEANNUMBERS");
1730 
1731         /**
1732          * Constant for the "Old Italic" Unicode character block.
1733          * @since 1.5
1734          */
1735         public static final UnicodeBlock OLD_ITALIC =
1736             new UnicodeBlock("OLD_ITALIC",
1737                              "OLD ITALIC",
1738                              "OLDITALIC");
1739 
1740         /**
1741          * Constant for the "Gothic" Unicode character block.
1742          * @since 1.5
1743          */
1744         public static final UnicodeBlock GOTHIC =
1745             new UnicodeBlock("GOTHIC");
1746 
1747         /**
1748          * Constant for the "Ugaritic" Unicode character block.
1749          * @since 1.5
1750          */
1751         public static final UnicodeBlock UGARITIC =
1752             new UnicodeBlock("UGARITIC");
1753 
1754         /**
1755          * Constant for the "Deseret" Unicode character block.
1756          * @since 1.5
1757          */
1758         public static final UnicodeBlock DESERET =
1759             new UnicodeBlock("DESERET");
1760 
1761         /**
1762          * Constant for the "Shavian" Unicode character block.
1763          * @since 1.5
1764          */
1765         public static final UnicodeBlock SHAVIAN =
1766             new UnicodeBlock("SHAVIAN");
1767 
1768         /**
1769          * Constant for the "Osmanya" Unicode character block.
1770          * @since 1.5
1771          */
1772         public static final UnicodeBlock OSMANYA =
1773             new UnicodeBlock("OSMANYA");
1774 
1775         /**
1776          * Constant for the "Cypriot Syllabary" Unicode character block.
1777          * @since 1.5
1778          */
1779         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1780             new UnicodeBlock("CYPRIOT_SYLLABARY",
1781                              "CYPRIOT SYLLABARY",
1782                              "CYPRIOTSYLLABARY");
1783 
1784         /**
1785          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1786          * @since 1.5
1787          */
1788         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1789             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1790                              "BYZANTINE MUSICAL SYMBOLS",
1791                              "BYZANTINEMUSICALSYMBOLS");
1792 
1793         /**
1794          * Constant for the "Musical Symbols" Unicode character block.
1795          * @since 1.5
1796          */
1797         public static final UnicodeBlock MUSICAL_SYMBOLS =
1798             new UnicodeBlock("MUSICAL_SYMBOLS",
1799                              "MUSICAL SYMBOLS",
1800                              "MUSICALSYMBOLS");
1801 
1802         /**
1803          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1804          * @since 1.5
1805          */
1806         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1807             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1808                              "TAI XUAN JING SYMBOLS",
1809                              "TAIXUANJINGSYMBOLS");
1810 
1811         /**
1812          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1813          * character block.
1814          * @since 1.5
1815          */
1816         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1817             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1818                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1819                              "MATHEMATICALALPHANUMERICSYMBOLS");
1820 
1821         /**
1822          * Constant for the "CJK Unified Ideographs Extension B" Unicode
1823          * character block.
1824          * @since 1.5
1825          */
1826         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1827             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1828                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1829                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1830 
1831         /**
1832          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1833          * @since 1.5
1834          */
1835         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1836             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1837                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1838                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1839 
1840         /**
1841          * Constant for the "Tags" Unicode character block.
1842          * @since 1.5
1843          */
1844         public static final UnicodeBlock TAGS =
1845             new UnicodeBlock("TAGS");
1846 
1847         /**
1848          * Constant for the "Variation Selectors Supplement" Unicode character
1849          * block.
1850          * @since 1.5
1851          */
1852         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1853             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1854                              "VARIATION SELECTORS SUPPLEMENT",
1855                              "VARIATIONSELECTORSSUPPLEMENT");
1856 
1857         /**
1858          * Constant for the "Supplementary Private Use Area-A" Unicode character
1859          * block.
1860          * @since 1.5
1861          */
1862         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1863             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1864                              "SUPPLEMENTARY PRIVATE USE AREA-A",
1865                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
1866 
1867         /**
1868          * Constant for the "Supplementary Private Use Area-B" Unicode character
1869          * block.
1870          * @since 1.5
1871          */
1872         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1873             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1874                              "SUPPLEMENTARY PRIVATE USE AREA-B",
1875                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
1876 
1877         /**
1878          * Constant for the "High Surrogates" Unicode character block.
1879          * This block represents codepoint values in the high surrogate
1880          * range: U+D800 through U+DB7F
1881          *
1882          * @since 1.5
1883          */
1884         public static final UnicodeBlock HIGH_SURROGATES =
1885             new UnicodeBlock("HIGH_SURROGATES",
1886                              "HIGH SURROGATES",
1887                              "HIGHSURROGATES");
1888 
1889         /**
1890          * Constant for the "High Private Use Surrogates" Unicode character
1891          * block.
1892          * This block represents codepoint values in the private use high
1893          * surrogate range: U+DB80 through U+DBFF
1894          *
1895          * @since 1.5
1896          */
1897         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1898             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1899                              "HIGH PRIVATE USE SURROGATES",
1900                              "HIGHPRIVATEUSESURROGATES");
1901 
1902         /**
1903          * Constant for the "Low Surrogates" Unicode character block.
1904          * This block represents codepoint values in the low surrogate
1905          * range: U+DC00 through U+DFFF
1906          *
1907          * @since 1.5
1908          */
1909         public static final UnicodeBlock LOW_SURROGATES =
1910             new UnicodeBlock("LOW_SURROGATES",
1911                              "LOW SURROGATES",
1912                              "LOWSURROGATES");
1913 
1914         /**
1915          * Constant for the "Arabic Supplement" Unicode character block.
1916          * @since 1.7
1917          */
1918         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1919             new UnicodeBlock("ARABIC_SUPPLEMENT",
1920                              "ARABIC SUPPLEMENT",
1921                              "ARABICSUPPLEMENT");
1922 
1923         /**
1924          * Constant for the "NKo" Unicode character block.
1925          * @since 1.7
1926          */
1927         public static final UnicodeBlock NKO =
1928             new UnicodeBlock("NKO");
1929 
1930         /**
1931          * Constant for the "Samaritan" Unicode character block.
1932          * @since 1.7
1933          */
1934         public static final UnicodeBlock SAMARITAN =
1935             new UnicodeBlock("SAMARITAN");
1936 
1937         /**
1938          * Constant for the "Mandaic" Unicode character block.
1939          * @since 1.7
1940          */
1941         public static final UnicodeBlock MANDAIC =
1942             new UnicodeBlock("MANDAIC");
1943 
1944         /**
1945          * Constant for the "Ethiopic Supplement" Unicode character block.
1946          * @since 1.7
1947          */
1948         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1949             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1950                              "ETHIOPIC SUPPLEMENT",
1951                              "ETHIOPICSUPPLEMENT");
1952 
1953         /**
1954          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1955          * Unicode character block.
1956          * @since 1.7
1957          */
1958         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1959             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1960                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1961                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1962 
1963         /**
1964          * Constant for the "New Tai Lue" Unicode character block.
1965          * @since 1.7
1966          */
1967         public static final UnicodeBlock NEW_TAI_LUE =
1968             new UnicodeBlock("NEW_TAI_LUE",
1969                              "NEW TAI LUE",
1970                              "NEWTAILUE");
1971 
1972         /**
1973          * Constant for the "Buginese" Unicode character block.
1974          * @since 1.7
1975          */
1976         public static final UnicodeBlock BUGINESE =
1977             new UnicodeBlock("BUGINESE");
1978 
1979         /**
1980          * Constant for the "Tai Tham" Unicode character block.
1981          * @since 1.7
1982          */
1983         public static final UnicodeBlock TAI_THAM =
1984             new UnicodeBlock("TAI_THAM",
1985                              "TAI THAM",
1986                              "TAITHAM");
1987 
1988         /**
1989          * Constant for the "Balinese" Unicode character block.
1990          * @since 1.7
1991          */
1992         public static final UnicodeBlock BALINESE =
1993             new UnicodeBlock("BALINESE");
1994 
1995         /**
1996          * Constant for the "Sundanese" Unicode character block.
1997          * @since 1.7
1998          */
1999         public static final UnicodeBlock SUNDANESE =
2000             new UnicodeBlock("SUNDANESE");
2001 
2002         /**
2003          * Constant for the "Batak" Unicode character block.
2004          * @since 1.7
2005          */
2006         public static final UnicodeBlock BATAK =
2007             new UnicodeBlock("BATAK");
2008 
2009         /**
2010          * Constant for the "Lepcha" Unicode character block.
2011          * @since 1.7
2012          */
2013         public static final UnicodeBlock LEPCHA =
2014             new UnicodeBlock("LEPCHA");
2015 
2016         /**
2017          * Constant for the "Ol Chiki" Unicode character block.
2018          * @since 1.7
2019          */
2020         public static final UnicodeBlock OL_CHIKI =
2021             new UnicodeBlock("OL_CHIKI",
2022                              "OL CHIKI",
2023                              "OLCHIKI");
2024 
2025         /**
2026          * Constant for the "Vedic Extensions" Unicode character block.
2027          * @since 1.7
2028          */
2029         public static final UnicodeBlock VEDIC_EXTENSIONS =
2030             new UnicodeBlock("VEDIC_EXTENSIONS",
2031                              "VEDIC EXTENSIONS",
2032                              "VEDICEXTENSIONS");
2033 
2034         /**
2035          * Constant for the "Phonetic Extensions Supplement" Unicode character
2036          * block.
2037          * @since 1.7
2038          */
2039         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
2040             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
2041                              "PHONETIC EXTENSIONS SUPPLEMENT",
2042                              "PHONETICEXTENSIONSSUPPLEMENT");
2043 
2044         /**
2045          * Constant for the "Combining Diacritical Marks Supplement" Unicode
2046          * character block.
2047          * @since 1.7
2048          */
2049         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
2050             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
2051                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
2052                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
2053 
2054         /**
2055          * Constant for the "Glagolitic" Unicode character block.
2056          * @since 1.7
2057          */
2058         public static final UnicodeBlock GLAGOLITIC =
2059             new UnicodeBlock("GLAGOLITIC");
2060 
2061         /**
2062          * Constant for the "Latin Extended-C" Unicode character block.
2063          * @since 1.7
2064          */
2065         public static final UnicodeBlock LATIN_EXTENDED_C =
2066             new UnicodeBlock("LATIN_EXTENDED_C",
2067                              "LATIN EXTENDED-C",
2068                              "LATINEXTENDED-C");
2069 
2070         /**
2071          * Constant for the "Coptic" Unicode character block.
2072          * @since 1.7
2073          */
2074         public static final UnicodeBlock COPTIC =
2075             new UnicodeBlock("COPTIC");
2076 
2077         /**
2078          * Constant for the "Georgian Supplement" Unicode character block.
2079          * @since 1.7
2080          */
2081         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
2082             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
2083                              "GEORGIAN SUPPLEMENT",
2084                              "GEORGIANSUPPLEMENT");
2085 
2086         /**
2087          * Constant for the "Tifinagh" Unicode character block.
2088          * @since 1.7
2089          */
2090         public static final UnicodeBlock TIFINAGH =
2091             new UnicodeBlock("TIFINAGH");
2092 
2093         /**
2094          * Constant for the "Ethiopic Extended" Unicode character block.
2095          * @since 1.7
2096          */
2097         public static final UnicodeBlock ETHIOPIC_EXTENDED =
2098             new UnicodeBlock("ETHIOPIC_EXTENDED",
2099                              "ETHIOPIC EXTENDED",
2100                              "ETHIOPICEXTENDED");
2101 
2102         /**
2103          * Constant for the "Cyrillic Extended-A" Unicode character block.
2104          * @since 1.7
2105          */
2106         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
2107             new UnicodeBlock("CYRILLIC_EXTENDED_A",
2108                              "CYRILLIC EXTENDED-A",
2109                              "CYRILLICEXTENDED-A");
2110 
2111         /**
2112          * Constant for the "Supplemental Punctuation" Unicode character block.
2113          * @since 1.7
2114          */
2115         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
2116             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
2117                              "SUPPLEMENTAL PUNCTUATION",
2118                              "SUPPLEMENTALPUNCTUATION");
2119 
2120         /**
2121          * Constant for the "CJK Strokes" Unicode character block.
2122          * @since 1.7
2123          */
2124         public static final UnicodeBlock CJK_STROKES =
2125             new UnicodeBlock("CJK_STROKES",
2126                              "CJK STROKES",
2127                              "CJKSTROKES");
2128 
2129         /**
2130          * Constant for the "Lisu" Unicode character block.
2131          * @since 1.7
2132          */
2133         public static final UnicodeBlock LISU =
2134             new UnicodeBlock("LISU");
2135 
2136         /**
2137          * Constant for the "Vai" Unicode character block.
2138          * @since 1.7
2139          */
2140         public static final UnicodeBlock VAI =
2141             new UnicodeBlock("VAI");
2142 
2143         /**
2144          * Constant for the "Cyrillic Extended-B" Unicode character block.
2145          * @since 1.7
2146          */
2147         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2148             new UnicodeBlock("CYRILLIC_EXTENDED_B",
2149                              "CYRILLIC EXTENDED-B",
2150                              "CYRILLICEXTENDED-B");
2151 
2152         /**
2153          * Constant for the "Bamum" Unicode character block.
2154          * @since 1.7
2155          */
2156         public static final UnicodeBlock BAMUM =
2157             new UnicodeBlock("BAMUM");
2158 
2159         /**
2160          * Constant for the "Modifier Tone Letters" Unicode character block.
2161          * @since 1.7
2162          */
2163         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2164             new UnicodeBlock("MODIFIER_TONE_LETTERS",
2165                              "MODIFIER TONE LETTERS",
2166                              "MODIFIERTONELETTERS");
2167 
2168         /**
2169          * Constant for the "Latin Extended-D" Unicode character block.
2170          * @since 1.7
2171          */
2172         public static final UnicodeBlock LATIN_EXTENDED_D =
2173             new UnicodeBlock("LATIN_EXTENDED_D",
2174                              "LATIN EXTENDED-D",
2175                              "LATINEXTENDED-D");
2176 
2177         /**
2178          * Constant for the "Syloti Nagri" Unicode character block.
2179          * @since 1.7
2180          */
2181         public static final UnicodeBlock SYLOTI_NAGRI =
2182             new UnicodeBlock("SYLOTI_NAGRI",
2183                              "SYLOTI NAGRI",
2184                              "SYLOTINAGRI");
2185 
2186         /**
2187          * Constant for the "Common Indic Number Forms" Unicode character block.
2188          * @since 1.7
2189          */
2190         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2191             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2192                              "COMMON INDIC NUMBER FORMS",
2193                              "COMMONINDICNUMBERFORMS");
2194 
2195         /**
2196          * Constant for the "Phags-pa" Unicode character block.
2197          * @since 1.7
2198          */
2199         public static final UnicodeBlock PHAGS_PA =
2200             new UnicodeBlock("PHAGS_PA",
2201                              "PHAGS-PA");
2202 
2203         /**
2204          * Constant for the "Saurashtra" Unicode character block.
2205          * @since 1.7
2206          */
2207         public static final UnicodeBlock SAURASHTRA =
2208             new UnicodeBlock("SAURASHTRA");
2209 
2210         /**
2211          * Constant for the "Devanagari Extended" Unicode character block.
2212          * @since 1.7
2213          */
2214         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2215             new UnicodeBlock("DEVANAGARI_EXTENDED",
2216                              "DEVANAGARI EXTENDED",
2217                              "DEVANAGARIEXTENDED");
2218 
2219         /**
2220          * Constant for the "Kayah Li" Unicode character block.
2221          * @since 1.7
2222          */
2223         public static final UnicodeBlock KAYAH_LI =
2224             new UnicodeBlock("KAYAH_LI",
2225                              "KAYAH LI",
2226                              "KAYAHLI");
2227 
2228         /**
2229          * Constant for the "Rejang" Unicode character block.
2230          * @since 1.7
2231          */
2232         public static final UnicodeBlock REJANG =
2233             new UnicodeBlock("REJANG");
2234 
2235         /**
2236          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2237          * @since 1.7
2238          */
2239         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2240             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2241                              "HANGUL JAMO EXTENDED-A",
2242                              "HANGULJAMOEXTENDED-A");
2243 
2244         /**
2245          * Constant for the "Javanese" Unicode character block.
2246          * @since 1.7
2247          */
2248         public static final UnicodeBlock JAVANESE =
2249             new UnicodeBlock("JAVANESE");
2250 
2251         /**
2252          * Constant for the "Cham" Unicode character block.
2253          * @since 1.7
2254          */
2255         public static final UnicodeBlock CHAM =
2256             new UnicodeBlock("CHAM");
2257 
2258         /**
2259          * Constant for the "Myanmar Extended-A" Unicode character block.
2260          * @since 1.7
2261          */
2262         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2263             new UnicodeBlock("MYANMAR_EXTENDED_A",
2264                              "MYANMAR EXTENDED-A",
2265                              "MYANMAREXTENDED-A");
2266 
2267         /**
2268          * Constant for the "Tai Viet" Unicode character block.
2269          * @since 1.7
2270          */
2271         public static final UnicodeBlock TAI_VIET =
2272             new UnicodeBlock("TAI_VIET",
2273                              "TAI VIET",
2274                              "TAIVIET");
2275 
2276         /**
2277          * Constant for the "Ethiopic Extended-A" Unicode character block.
2278          * @since 1.7
2279          */
2280         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2281             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2282                              "ETHIOPIC EXTENDED-A",
2283                              "ETHIOPICEXTENDED-A");
2284 
2285         /**
2286          * Constant for the "Meetei Mayek" Unicode character block.
2287          * @since 1.7
2288          */
2289         public static final UnicodeBlock MEETEI_MAYEK =
2290             new UnicodeBlock("MEETEI_MAYEK",
2291                              "MEETEI MAYEK",
2292                              "MEETEIMAYEK");
2293 
2294         /**
2295          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2296          * @since 1.7
2297          */
2298         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2299             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2300                              "HANGUL JAMO EXTENDED-B",
2301                              "HANGULJAMOEXTENDED-B");
2302 
2303         /**
2304          * Constant for the "Vertical Forms" Unicode character block.
2305          * @since 1.7
2306          */
2307         public static final UnicodeBlock VERTICAL_FORMS =
2308             new UnicodeBlock("VERTICAL_FORMS",
2309                              "VERTICAL FORMS",
2310                              "VERTICALFORMS");
2311 
2312         /**
2313          * Constant for the "Ancient Greek Numbers" Unicode character block.
2314          * @since 1.7
2315          */
2316         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2317             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2318                              "ANCIENT GREEK NUMBERS",
2319                              "ANCIENTGREEKNUMBERS");
2320 
2321         /**
2322          * Constant for the "Ancient Symbols" Unicode character block.
2323          * @since 1.7
2324          */
2325         public static final UnicodeBlock ANCIENT_SYMBOLS =
2326             new UnicodeBlock("ANCIENT_SYMBOLS",
2327                              "ANCIENT SYMBOLS",
2328                              "ANCIENTSYMBOLS");
2329 
2330         /**
2331          * Constant for the "Phaistos Disc" Unicode character block.
2332          * @since 1.7
2333          */
2334         public static final UnicodeBlock PHAISTOS_DISC =
2335             new UnicodeBlock("PHAISTOS_DISC",
2336                              "PHAISTOS DISC",
2337                              "PHAISTOSDISC");
2338 
2339         /**
2340          * Constant for the "Lycian" Unicode character block.
2341          * @since 1.7
2342          */
2343         public static final UnicodeBlock LYCIAN =
2344             new UnicodeBlock("LYCIAN");
2345 
2346         /**
2347          * Constant for the "Carian" Unicode character block.
2348          * @since 1.7
2349          */
2350         public static final UnicodeBlock CARIAN =
2351             new UnicodeBlock("CARIAN");
2352 
2353         /**
2354          * Constant for the "Old Persian" Unicode character block.
2355          * @since 1.7
2356          */
2357         public static final UnicodeBlock OLD_PERSIAN =
2358             new UnicodeBlock("OLD_PERSIAN",
2359                              "OLD PERSIAN",
2360                              "OLDPERSIAN");
2361 
2362         /**
2363          * Constant for the "Imperial Aramaic" Unicode character block.
2364          * @since 1.7
2365          */
2366         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2367             new UnicodeBlock("IMPERIAL_ARAMAIC",
2368                              "IMPERIAL ARAMAIC",
2369                              "IMPERIALARAMAIC");
2370 
2371         /**
2372          * Constant for the "Phoenician" Unicode character block.
2373          * @since 1.7
2374          */
2375         public static final UnicodeBlock PHOENICIAN =
2376             new UnicodeBlock("PHOENICIAN");
2377 
2378         /**
2379          * Constant for the "Lydian" Unicode character block.
2380          * @since 1.7
2381          */
2382         public static final UnicodeBlock LYDIAN =
2383             new UnicodeBlock("LYDIAN");
2384 
2385         /**
2386          * Constant for the "Kharoshthi" Unicode character block.
2387          * @since 1.7
2388          */
2389         public static final UnicodeBlock KHAROSHTHI =
2390             new UnicodeBlock("KHAROSHTHI");
2391 
2392         /**
2393          * Constant for the "Old South Arabian" Unicode character block.
2394          * @since 1.7
2395          */
2396         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2397             new UnicodeBlock("OLD_SOUTH_ARABIAN",
2398                              "OLD SOUTH ARABIAN",
2399                              "OLDSOUTHARABIAN");
2400 
2401         /**
2402          * Constant for the "Avestan" Unicode character block.
2403          * @since 1.7
2404          */
2405         public static final UnicodeBlock AVESTAN =
2406             new UnicodeBlock("AVESTAN");
2407 
2408         /**
2409          * Constant for the "Inscriptional Parthian" Unicode character block.
2410          * @since 1.7
2411          */
2412         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2413             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2414                              "INSCRIPTIONAL PARTHIAN",
2415                              "INSCRIPTIONALPARTHIAN");
2416 
2417         /**
2418          * Constant for the "Inscriptional Pahlavi" Unicode character block.
2419          * @since 1.7
2420          */
2421         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2422             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2423                              "INSCRIPTIONAL PAHLAVI",
2424                              "INSCRIPTIONALPAHLAVI");
2425 
2426         /**
2427          * Constant for the "Old Turkic" Unicode character block.
2428          * @since 1.7
2429          */
2430         public static final UnicodeBlock OLD_TURKIC =
2431             new UnicodeBlock("OLD_TURKIC",
2432                              "OLD TURKIC",
2433                              "OLDTURKIC");
2434 
2435         /**
2436          * Constant for the "Rumi Numeral Symbols" Unicode character block.
2437          * @since 1.7
2438          */
2439         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2440             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2441                              "RUMI NUMERAL SYMBOLS",
2442                              "RUMINUMERALSYMBOLS");
2443 
2444         /**
2445          * Constant for the "Brahmi" Unicode character block.
2446          * @since 1.7
2447          */
2448         public static final UnicodeBlock BRAHMI =
2449             new UnicodeBlock("BRAHMI");
2450 
2451         /**
2452          * Constant for the "Kaithi" Unicode character block.
2453          * @since 1.7
2454          */
2455         public static final UnicodeBlock KAITHI =
2456             new UnicodeBlock("KAITHI");
2457 
2458         /**
2459          * Constant for the "Cuneiform" Unicode character block.
2460          * @since 1.7
2461          */
2462         public static final UnicodeBlock CUNEIFORM =
2463             new UnicodeBlock("CUNEIFORM");
2464 
2465         /**
2466          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2467          * character block.
2468          * @since 1.7
2469          */
2470         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2471             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2472                              "CUNEIFORM NUMBERS AND PUNCTUATION",
2473                              "CUNEIFORMNUMBERSANDPUNCTUATION");
2474 
2475         /**
2476          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2477          * @since 1.7
2478          */
2479         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2480             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2481                              "EGYPTIAN HIEROGLYPHS",
2482                              "EGYPTIANHIEROGLYPHS");
2483 
2484         /**
2485          * Constant for the "Bamum Supplement" Unicode character block.
2486          * @since 1.7
2487          */
2488         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2489             new UnicodeBlock("BAMUM_SUPPLEMENT",
2490                              "BAMUM SUPPLEMENT",
2491                              "BAMUMSUPPLEMENT");
2492 
2493         /**
2494          * Constant for the "Kana Supplement" Unicode character block.
2495          * @since 1.7
2496          */
2497         public static final UnicodeBlock KANA_SUPPLEMENT =
2498             new UnicodeBlock("KANA_SUPPLEMENT",
2499                              "KANA SUPPLEMENT",
2500                              "KANASUPPLEMENT");
2501 
2502         /**
2503          * Constant for the "Ancient Greek Musical Notation" Unicode character
2504          * block.
2505          * @since 1.7
2506          */
2507         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2508             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2509                              "ANCIENT GREEK MUSICAL NOTATION",
2510                              "ANCIENTGREEKMUSICALNOTATION");
2511 
2512         /**
2513          * Constant for the "Counting Rod Numerals" Unicode character block.
2514          * @since 1.7
2515          */
2516         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2517             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2518                              "COUNTING ROD NUMERALS",
2519                              "COUNTINGRODNUMERALS");
2520 
2521         /**
2522          * Constant for the "Mahjong Tiles" Unicode character block.
2523          * @since 1.7
2524          */
2525         public static final UnicodeBlock MAHJONG_TILES =
2526             new UnicodeBlock("MAHJONG_TILES",
2527                              "MAHJONG TILES",
2528                              "MAHJONGTILES");
2529 
2530         /**
2531          * Constant for the "Domino Tiles" Unicode character block.
2532          * @since 1.7
2533          */
2534         public static final UnicodeBlock DOMINO_TILES =
2535             new UnicodeBlock("DOMINO_TILES",
2536                              "DOMINO TILES",
2537                              "DOMINOTILES");
2538 
2539         /**
2540          * Constant for the "Playing Cards" Unicode character block.
2541          * @since 1.7
2542          */
2543         public static final UnicodeBlock PLAYING_CARDS =
2544             new UnicodeBlock("PLAYING_CARDS",
2545                              "PLAYING CARDS",
2546                              "PLAYINGCARDS");
2547 
2548         /**
2549          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2550          * block.
2551          * @since 1.7
2552          */
2553         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2554             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2555                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2556                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
2557 
2558         /**
2559          * Constant for the "Enclosed Ideographic Supplement" Unicode character
2560          * block.
2561          * @since 1.7
2562          */
2563         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2564             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2565                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2566                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2567 
2568         /**
2569          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2570          * character block.
2571          * @since 1.7
2572          */
2573         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2574             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2575                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2576                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2577 
2578         /**
2579          * Constant for the "Emoticons" Unicode character block.
2580          * @since 1.7
2581          */
2582         public static final UnicodeBlock EMOTICONS =
2583             new UnicodeBlock("EMOTICONS");
2584 
2585         /**
2586          * Constant for the "Transport And Map Symbols" Unicode character block.
2587          * @since 1.7
2588          */
2589         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2590             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2591                              "TRANSPORT AND MAP SYMBOLS",
2592                              "TRANSPORTANDMAPSYMBOLS");
2593 
2594         /**
2595          * Constant for the "Alchemical Symbols" Unicode character block.
2596          * @since 1.7
2597          */
2598         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2599             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2600                              "ALCHEMICAL SYMBOLS",
2601                              "ALCHEMICALSYMBOLS");
2602 
2603         /**
2604          * Constant for the "CJK Unified Ideographs Extension C" Unicode
2605          * character block.
2606          * @since 1.7
2607          */
2608         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2609             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2610                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2611                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2612 
2613         /**
2614          * Constant for the "CJK Unified Ideographs Extension D" Unicode
2615          * character block.
2616          * @since 1.7
2617          */
2618         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2619             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2620                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2621                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2622 
2623         /**
2624          * Constant for the "Arabic Extended-A" Unicode character block.
2625          * @since 1.8
2626          */
2627         public static final UnicodeBlock ARABIC_EXTENDED_A =
2628             new UnicodeBlock("ARABIC_EXTENDED_A",
2629                              "ARABIC EXTENDED-A",
2630                              "ARABICEXTENDED-A");
2631 
2632         /**
2633          * Constant for the "Sundanese Supplement" Unicode character block.
2634          * @since 1.8
2635          */
2636         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2637             new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2638                              "SUNDANESE SUPPLEMENT",
2639                              "SUNDANESESUPPLEMENT");
2640 
2641         /**
2642          * Constant for the "Meetei Mayek Extensions" Unicode character block.
2643          * @since 1.8
2644          */
2645         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2646             new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2647                              "MEETEI MAYEK EXTENSIONS",
2648                              "MEETEIMAYEKEXTENSIONS");
2649 
2650         /**
2651          * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2652          * @since 1.8
2653          */
2654         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2655             new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2656                              "MEROITIC HIEROGLYPHS",
2657                              "MEROITICHIEROGLYPHS");
2658 
2659         /**
2660          * Constant for the "Meroitic Cursive" Unicode character block.
2661          * @since 1.8
2662          */
2663         public static final UnicodeBlock MEROITIC_CURSIVE =
2664             new UnicodeBlock("MEROITIC_CURSIVE",
2665                              "MEROITIC CURSIVE",
2666                              "MEROITICCURSIVE");
2667 
2668         /**
2669          * Constant for the "Sora Sompeng" Unicode character block.
2670          * @since 1.8
2671          */
2672         public static final UnicodeBlock SORA_SOMPENG =
2673             new UnicodeBlock("SORA_SOMPENG",
2674                              "SORA SOMPENG",
2675                              "SORASOMPENG");
2676 
2677         /**
2678          * Constant for the "Chakma" Unicode character block.
2679          * @since 1.8
2680          */
2681         public static final UnicodeBlock CHAKMA =
2682             new UnicodeBlock("CHAKMA");
2683 
2684         /**
2685          * Constant for the "Sharada" Unicode character block.
2686          * @since 1.8
2687          */
2688         public static final UnicodeBlock SHARADA =
2689             new UnicodeBlock("SHARADA");
2690 
2691         /**
2692          * Constant for the "Takri" Unicode character block.
2693          * @since 1.8
2694          */
2695         public static final UnicodeBlock TAKRI =
2696             new UnicodeBlock("TAKRI");
2697 
2698         /**
2699          * Constant for the "Miao" Unicode character block.
2700          * @since 1.8
2701          */
2702         public static final UnicodeBlock MIAO =
2703             new UnicodeBlock("MIAO");
2704 
2705         /**
2706          * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2707          * character block.
2708          * @since 1.8
2709          */
2710         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2711             new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2712                              "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2713                              "ARABICMATHEMATICALALPHABETICSYMBOLS");
2714 
2715         /**
2716          * Constant for the "Combining Diacritical Marks Extended" Unicode
2717          * character block.
2718          * @since 9
2719          */
2720         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2721             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED",
2722                              "COMBINING DIACRITICAL MARKS EXTENDED",
2723                              "COMBININGDIACRITICALMARKSEXTENDED");
2724 
2725         /**
2726          * Constant for the "Myanmar Extended-B" Unicode character block.
2727          * @since 9
2728          */
2729         public static final UnicodeBlock MYANMAR_EXTENDED_B =
2730             new UnicodeBlock("MYANMAR_EXTENDED_B",
2731                              "MYANMAR EXTENDED-B",
2732                              "MYANMAREXTENDED-B");
2733 
2734         /**
2735          * Constant for the "Latin Extended-E" Unicode character block.
2736          * @since 9
2737          */
2738         public static final UnicodeBlock LATIN_EXTENDED_E =
2739             new UnicodeBlock("LATIN_EXTENDED_E",
2740                              "LATIN EXTENDED-E",
2741                              "LATINEXTENDED-E");
2742 
2743         /**
2744          * Constant for the "Coptic Epact Numbers" Unicode character block.
2745          * @since 9
2746          */
2747         public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2748             new UnicodeBlock("COPTIC_EPACT_NUMBERS",
2749                              "COPTIC EPACT NUMBERS",
2750                              "COPTICEPACTNUMBERS");
2751 
2752         /**
2753          * Constant for the "Old Permic" Unicode character block.
2754          * @since 9
2755          */
2756         public static final UnicodeBlock OLD_PERMIC =
2757             new UnicodeBlock("OLD_PERMIC",
2758                              "OLD PERMIC",
2759                              "OLDPERMIC");
2760 
2761         /**
2762          * Constant for the "Elbasan" Unicode character block.
2763          * @since 9
2764          */
2765         public static final UnicodeBlock ELBASAN =
2766             new UnicodeBlock("ELBASAN");
2767 
2768         /**
2769          * Constant for the "Caucasian Albanian" Unicode character block.
2770          * @since 9
2771          */
2772         public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2773             new UnicodeBlock("CAUCASIAN_ALBANIAN",
2774                              "CAUCASIAN ALBANIAN",
2775                              "CAUCASIANALBANIAN");
2776 
2777         /**
2778          * Constant for the "Linear A" Unicode character block.
2779          * @since 9
2780          */
2781         public static final UnicodeBlock LINEAR_A =
2782             new UnicodeBlock("LINEAR_A",
2783                              "LINEAR A",
2784                              "LINEARA");
2785 
2786         /**
2787          * Constant for the "Palmyrene" Unicode character block.
2788          * @since 9
2789          */
2790         public static final UnicodeBlock PALMYRENE =
2791             new UnicodeBlock("PALMYRENE");
2792 
2793         /**
2794          * Constant for the "Nabataean" Unicode character block.
2795          * @since 9
2796          */
2797         public static final UnicodeBlock NABATAEAN =
2798             new UnicodeBlock("NABATAEAN");
2799 
2800         /**
2801          * Constant for the "Old North Arabian" Unicode character block.
2802          * @since 9
2803          */
2804         public static final UnicodeBlock OLD_NORTH_ARABIAN =
2805             new UnicodeBlock("OLD_NORTH_ARABIAN",
2806                              "OLD NORTH ARABIAN",
2807                              "OLDNORTHARABIAN");
2808 
2809         /**
2810          * Constant for the "Manichaean" Unicode character block.
2811          * @since 9
2812          */
2813         public static final UnicodeBlock MANICHAEAN =
2814             new UnicodeBlock("MANICHAEAN");
2815 
2816         /**
2817          * Constant for the "Psalter Pahlavi" Unicode character block.
2818          * @since 9
2819          */
2820         public static final UnicodeBlock PSALTER_PAHLAVI =
2821             new UnicodeBlock("PSALTER_PAHLAVI",
2822                              "PSALTER PAHLAVI",
2823                              "PSALTERPAHLAVI");
2824 
2825         /**
2826          * Constant for the "Mahajani" Unicode character block.
2827          * @since 9
2828          */
2829         public static final UnicodeBlock MAHAJANI =
2830             new UnicodeBlock("MAHAJANI");
2831 
2832         /**
2833          * Constant for the "Sinhala Archaic Numbers" Unicode character block.
2834          * @since 9
2835          */
2836         public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2837             new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS",
2838                              "SINHALA ARCHAIC NUMBERS",
2839                              "SINHALAARCHAICNUMBERS");
2840 
2841         /**
2842          * Constant for the "Khojki" Unicode character block.
2843          * @since 9
2844          */
2845         public static final UnicodeBlock KHOJKI =
2846             new UnicodeBlock("KHOJKI");
2847 
2848         /**
2849          * Constant for the "Khudawadi" Unicode character block.
2850          * @since 9
2851          */
2852         public static final UnicodeBlock KHUDAWADI =
2853             new UnicodeBlock("KHUDAWADI");
2854 
2855         /**
2856          * Constant for the "Grantha" Unicode character block.
2857          * @since 9
2858          */
2859         public static final UnicodeBlock GRANTHA =
2860             new UnicodeBlock("GRANTHA");
2861 
2862         /**
2863          * Constant for the "Tirhuta" Unicode character block.
2864          * @since 9
2865          */
2866         public static final UnicodeBlock TIRHUTA =
2867             new UnicodeBlock("TIRHUTA");
2868 
2869         /**
2870          * Constant for the "Siddham" Unicode character block.
2871          * @since 9
2872          */
2873         public static final UnicodeBlock SIDDHAM =
2874             new UnicodeBlock("SIDDHAM");
2875 
2876         /**
2877          * Constant for the "Modi" Unicode character block.
2878          * @since 9
2879          */
2880         public static final UnicodeBlock MODI =
2881             new UnicodeBlock("MODI");
2882 
2883         /**
2884          * Constant for the "Warang Citi" Unicode character block.
2885          * @since 9
2886          */
2887         public static final UnicodeBlock WARANG_CITI =
2888             new UnicodeBlock("WARANG_CITI",
2889                              "WARANG CITI",
2890                              "WARANGCITI");
2891 
2892         /**
2893          * Constant for the "Pau Cin Hau" Unicode character block.
2894          * @since 9
2895          */
2896         public static final UnicodeBlock PAU_CIN_HAU =
2897             new UnicodeBlock("PAU_CIN_HAU",
2898                              "PAU CIN HAU",
2899                              "PAUCINHAU");
2900 
2901         /**
2902          * Constant for the "Mro" Unicode character block.
2903          * @since 9
2904          */
2905         public static final UnicodeBlock MRO =
2906             new UnicodeBlock("MRO");
2907 
2908         /**
2909          * Constant for the "Bassa Vah" Unicode character block.
2910          * @since 9
2911          */
2912         public static final UnicodeBlock BASSA_VAH =
2913             new UnicodeBlock("BASSA_VAH",
2914                              "BASSA VAH",
2915                              "BASSAVAH");
2916 
2917         /**
2918          * Constant for the "Pahawh Hmong" Unicode character block.
2919          * @since 9
2920          */
2921         public static final UnicodeBlock PAHAWH_HMONG =
2922             new UnicodeBlock("PAHAWH_HMONG",
2923                              "PAHAWH HMONG",
2924                              "PAHAWHHMONG");
2925 
2926         /**
2927          * Constant for the "Duployan" Unicode character block.
2928          * @since 9
2929          */
2930         public static final UnicodeBlock DUPLOYAN =
2931             new UnicodeBlock("DUPLOYAN");
2932 
2933         /**
2934          * Constant for the "Shorthand Format Controls" Unicode character block.
2935          * @since 9
2936          */
2937         public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2938             new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS",
2939                              "SHORTHAND FORMAT CONTROLS",
2940                              "SHORTHANDFORMATCONTROLS");
2941 
2942         /**
2943          * Constant for the "Mende Kikakui" Unicode character block.
2944          * @since 9
2945          */
2946         public static final UnicodeBlock MENDE_KIKAKUI =
2947             new UnicodeBlock("MENDE_KIKAKUI",
2948                              "MENDE KIKAKUI",
2949                              "MENDEKIKAKUI");
2950 
2951         /**
2952          * Constant for the "Ornamental Dingbats" Unicode character block.
2953          * @since 9
2954          */
2955         public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2956             new UnicodeBlock("ORNAMENTAL_DINGBATS",
2957                              "ORNAMENTAL DINGBATS",
2958                              "ORNAMENTALDINGBATS");
2959 
2960         /**
2961          * Constant for the "Geometric Shapes Extended" Unicode character block.
2962          * @since 9
2963          */
2964         public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2965             new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED",
2966                              "GEOMETRIC SHAPES EXTENDED",
2967                              "GEOMETRICSHAPESEXTENDED");
2968 
2969         /**
2970          * Constant for the "Supplemental Arrows-C" Unicode character block.
2971          * @since 9
2972          */
2973         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2974             new UnicodeBlock("SUPPLEMENTAL_ARROWS_C",
2975                              "SUPPLEMENTAL ARROWS-C",
2976                              "SUPPLEMENTALARROWS-C");
2977 
2978         /**
2979          * Constant for the "Cherokee Supplement" Unicode character block.
2980          * @since 9
2981          */
2982         public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2983             new UnicodeBlock("CHEROKEE_SUPPLEMENT",
2984                              "CHEROKEE SUPPLEMENT",
2985                              "CHEROKEESUPPLEMENT");
2986 
2987         /**
2988          * Constant for the "Hatran" Unicode character block.
2989          * @since 9
2990          */
2991         public static final UnicodeBlock HATRAN =
2992             new UnicodeBlock("HATRAN");
2993 
2994         /**
2995          * Constant for the "Old Hungarian" Unicode character block.
2996          * @since 9
2997          */
2998         public static final UnicodeBlock OLD_HUNGARIAN =
2999             new UnicodeBlock("OLD_HUNGARIAN",
3000                              "OLD HUNGARIAN",
3001                              "OLDHUNGARIAN");
3002 
3003         /**
3004          * Constant for the "Multani" Unicode character block.
3005          * @since 9
3006          */
3007         public static final UnicodeBlock MULTANI =
3008             new UnicodeBlock("MULTANI");
3009 
3010         /**
3011          * Constant for the "Ahom" Unicode character block.
3012          * @since 9
3013          */
3014         public static final UnicodeBlock AHOM =
3015             new UnicodeBlock("AHOM");
3016 
3017         /**
3018          * Constant for the "Early Dynastic Cuneiform" Unicode character block.
3019          * @since 9
3020          */
3021         public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
3022             new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM",
3023                              "EARLY DYNASTIC CUNEIFORM",
3024                              "EARLYDYNASTICCUNEIFORM");
3025 
3026         /**
3027          * Constant for the "Anatolian Hieroglyphs" Unicode character block.
3028          * @since 9
3029          */
3030         public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
3031             new UnicodeBlock("ANATOLIAN_HIEROGLYPHS",
3032                              "ANATOLIAN HIEROGLYPHS",
3033                              "ANATOLIANHIEROGLYPHS");
3034 
3035         /**
3036          * Constant for the "Sutton SignWriting" Unicode character block.
3037          * @since 9
3038          */
3039         public static final UnicodeBlock SUTTON_SIGNWRITING =
3040             new UnicodeBlock("SUTTON_SIGNWRITING",
3041                              "SUTTON SIGNWRITING",
3042                              "SUTTONSIGNWRITING");
3043 
3044         /**
3045          * Constant for the "Supplemental Symbols and Pictographs" Unicode
3046          * character block.
3047          * @since 9
3048          */
3049         public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
3050             new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
3051                              "SUPPLEMENTAL SYMBOLS AND PICTOGRAPHS",
3052                              "SUPPLEMENTALSYMBOLSANDPICTOGRAPHS");
3053 
3054         /**
3055          * Constant for the "CJK Unified Ideographs Extension E" Unicode
3056          * character block.
3057          * @since 9
3058          */
3059         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
3060             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
3061                              "CJK UNIFIED IDEOGRAPHS EXTENSION E",
3062                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONE");
3063 
3064         /**
3065          * Constant for the "Syriac Supplement" Unicode
3066          * character block.
3067          * @since 11
3068          */
3069         public static final UnicodeBlock SYRIAC_SUPPLEMENT =
3070             new UnicodeBlock("SYRIAC_SUPPLEMENT",
3071                              "SYRIAC SUPPLEMENT",
3072                              "SYRIACSUPPLEMENT");
3073 
3074         /**
3075          * Constant for the "Cyrillic Extended-C" Unicode
3076          * character block.
3077          * @since 11
3078          */
3079         public static final UnicodeBlock CYRILLIC_EXTENDED_C =
3080             new UnicodeBlock("CYRILLIC_EXTENDED_C",
3081                              "CYRILLIC EXTENDED-C",
3082                              "CYRILLICEXTENDED-C");
3083 
3084         /**
3085          * Constant for the "Osage" Unicode
3086          * character block.
3087          * @since 11
3088          */
3089         public static final UnicodeBlock OSAGE =
3090             new UnicodeBlock("OSAGE");
3091 
3092         /**
3093          * Constant for the "Newa" Unicode
3094          * character block.
3095          * @since 11
3096          */
3097         public static final UnicodeBlock NEWA =
3098             new UnicodeBlock("NEWA");
3099 
3100         /**
3101          * Constant for the "Mongolian Supplement" Unicode
3102          * character block.
3103          * @since 11
3104          */
3105         public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
3106             new UnicodeBlock("MONGOLIAN_SUPPLEMENT",
3107                              "MONGOLIAN SUPPLEMENT",
3108                              "MONGOLIANSUPPLEMENT");
3109 
3110         /**
3111          * Constant for the "Marchen" Unicode
3112          * character block.
3113          * @since 11
3114          */
3115         public static final UnicodeBlock MARCHEN =
3116             new UnicodeBlock("MARCHEN");
3117 
3118         /**
3119          * Constant for the "Ideographic Symbols and Punctuation" Unicode
3120          * character block.
3121          * @since 11
3122          */
3123         public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
3124             new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION",
3125                              "IDEOGRAPHIC SYMBOLS AND PUNCTUATION",
3126                              "IDEOGRAPHICSYMBOLSANDPUNCTUATION");
3127 
3128         /**
3129          * Constant for the "Tangut" Unicode
3130          * character block.
3131          * @since 11
3132          */
3133         public static final UnicodeBlock TANGUT =
3134             new UnicodeBlock("TANGUT");
3135 
3136         /**
3137          * Constant for the "Tangut Components" Unicode
3138          * character block.
3139          * @since 11
3140          */
3141         public static final UnicodeBlock TANGUT_COMPONENTS =
3142             new UnicodeBlock("TANGUT_COMPONENTS",
3143                              "TANGUT COMPONENTS",
3144                              "TANGUTCOMPONENTS");
3145 
3146         /**
3147          * Constant for the "Kana Extended-A" Unicode
3148          * character block.
3149          * @since 11
3150          */
3151         public static final UnicodeBlock KANA_EXTENDED_A =
3152             new UnicodeBlock("KANA_EXTENDED_A",
3153                              "KANA EXTENDED-A",
3154                              "KANAEXTENDED-A");
3155         /**
3156          * Constant for the "Glagolitic Supplement" Unicode
3157          * character block.
3158          * @since 11
3159          */
3160         public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
3161             new UnicodeBlock("GLAGOLITIC_SUPPLEMENT",
3162                              "GLAGOLITIC SUPPLEMENT",
3163                              "GLAGOLITICSUPPLEMENT");
3164         /**
3165          * Constant for the "Adlam" Unicode
3166          * character block.
3167          * @since 11
3168          */
3169         public static final UnicodeBlock ADLAM =
3170             new UnicodeBlock("ADLAM");
3171 
3172         /**
3173          * Constant for the "Masaram Gondi" Unicode
3174          * character block.
3175          * @since 11
3176          */
3177         public static final UnicodeBlock MASARAM_GONDI =
3178             new UnicodeBlock("MASARAM_GONDI",
3179                              "MASARAM GONDI",
3180                              "MASARAMGONDI");
3181 
3182         /**
3183          * Constant for the "Zanabazar Square" Unicode
3184          * character block.
3185          * @since 11
3186          */
3187         public static final UnicodeBlock ZANABAZAR_SQUARE =
3188             new UnicodeBlock("ZANABAZAR_SQUARE",
3189                              "ZANABAZAR SQUARE",
3190                              "ZANABAZARSQUARE");
3191 
3192         /**
3193          * Constant for the "Nushu" Unicode
3194          * character block.
3195          * @since 11
3196          */
3197         public static final UnicodeBlock NUSHU =
3198             new UnicodeBlock("NUSHU");
3199 
3200         /**
3201          * Constant for the "Soyombo" Unicode
3202          * character block.
3203          * @since 11
3204          */
3205         public static final UnicodeBlock SOYOMBO =
3206             new UnicodeBlock("SOYOMBO");
3207 
3208         /**
3209          * Constant for the "Bhaiksuki" Unicode
3210          * character block.
3211          * @since 11
3212          */
3213         public static final UnicodeBlock BHAIKSUKI =
3214             new UnicodeBlock("BHAIKSUKI");
3215 
3216         /**
3217          * Constant for the "CJK Unified Ideographs Extension F" Unicode
3218          * character block.
3219          * @since 11
3220          */
3221         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F =
3222             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F",
3223                              "CJK UNIFIED IDEOGRAPHS EXTENSION F",
3224                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONF");
3225         /**
3226          * Constant for the "Georgian Extended" Unicode
3227          * character block.
3228          * @since 12
3229          */
3230         public static final UnicodeBlock GEORGIAN_EXTENDED =
3231             new UnicodeBlock("GEORGIAN_EXTENDED",
3232                              "GEORGIAN EXTENDED",
3233                              "GEORGIANEXTENDED");
3234 
3235         /**
3236          * Constant for the "Hanifi Rohingya" Unicode
3237          * character block.
3238          * @since 12
3239          */
3240         public static final UnicodeBlock HANIFI_ROHINGYA =
3241             new UnicodeBlock("HANIFI_ROHINGYA",
3242                              "HANIFI ROHINGYA",
3243                              "HANIFIROHINGYA");
3244 
3245         /**
3246          * Constant for the "Old Sogdian" Unicode
3247          * character block.
3248          * @since 12
3249          */
3250         public static final UnicodeBlock OLD_SOGDIAN =
3251             new UnicodeBlock("OLD_SOGDIAN",
3252                              "OLD SOGDIAN",
3253                              "OLDSOGDIAN");
3254 
3255         /**
3256          * Constant for the "Sogdian" Unicode
3257          * character block.
3258          * @since 12
3259          */
3260         public static final UnicodeBlock SOGDIAN =
3261             new UnicodeBlock("SOGDIAN");
3262 
3263         /**
3264          * Constant for the "Dogra" Unicode
3265          * character block.
3266          * @since 12
3267          */
3268         public static final UnicodeBlock DOGRA =
3269             new UnicodeBlock("DOGRA");
3270 
3271         /**
3272          * Constant for the "Gunjala Gondi" Unicode
3273          * character block.
3274          * @since 12
3275          */
3276         public static final UnicodeBlock GUNJALA_GONDI =
3277             new UnicodeBlock("GUNJALA_GONDI",
3278                              "GUNJALA GONDI",
3279                              "GUNJALAGONDI");
3280 
3281         /**
3282          * Constant for the "Makasar" Unicode
3283          * character block.
3284          * @since 12
3285          */
3286         public static final UnicodeBlock MAKASAR =
3287             new UnicodeBlock("MAKASAR");
3288 
3289         /**
3290          * Constant for the "Medefaidrin" Unicode
3291          * character block.
3292          * @since 12
3293          */
3294         public static final UnicodeBlock MEDEFAIDRIN =
3295             new UnicodeBlock("MEDEFAIDRIN");
3296 
3297         /**
3298          * Constant for the "Mayan Numerals" Unicode
3299          * character block.
3300          * @since 12
3301          */
3302         public static final UnicodeBlock MAYAN_NUMERALS =
3303             new UnicodeBlock("MAYAN_NUMERALS",
3304                              "MAYAN NUMERALS",
3305                              "MAYANNUMERALS");
3306 
3307         /**
3308          * Constant for the "Indic Siyaq Numbers" Unicode
3309          * character block.
3310          * @since 12
3311          */
3312         public static final UnicodeBlock INDIC_SIYAQ_NUMBERS =
3313             new UnicodeBlock("INDIC_SIYAQ_NUMBERS",
3314                              "INDIC SIYAQ NUMBERS",
3315                              "INDICSIYAQNUMBERS");
3316 
3317         /**
3318          * Constant for the "Chess Symbols" Unicode
3319          * character block.
3320          * @since 12
3321          */
3322         public static final UnicodeBlock CHESS_SYMBOLS =
3323             new UnicodeBlock("CHESS_SYMBOLS",
3324                              "CHESS SYMBOLS",
3325                              "CHESSSYMBOLS");
3326 
3327         /**
3328          * Constant for the "Elymaic" Unicode
3329          * character block.
3330          * @since 13
3331          */
3332         public static final UnicodeBlock ELYMAIC =
3333             new UnicodeBlock("ELYMAIC");
3334 
3335         /**
3336          * Constant for the "Nandinagari" Unicode
3337          * character block.
3338          * @since 13
3339          */
3340         public static final UnicodeBlock NANDINAGARI =
3341             new UnicodeBlock("NANDINAGARI");
3342 
3343         /**
3344          * Constant for the "Tamil Supplement" Unicode
3345          * character block.
3346          * @since 13
3347          */
3348         public static final UnicodeBlock TAMIL_SUPPLEMENT =
3349             new UnicodeBlock("TAMIL_SUPPLEMENT",
3350                              "TAMIL SUPPLEMENT",
3351                              "TAMILSUPPLEMENT");
3352 
3353         /**
3354          * Constant for the "Egyptian Hieroglyph Format Controls" Unicode
3355          * character block.
3356          * @since 13
3357          */
3358         public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS =
3359             new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS",
3360                              "EGYPTIAN HIEROGLYPH FORMAT CONTROLS",
3361                              "EGYPTIANHIEROGLYPHFORMATCONTROLS");
3362 
3363         /**
3364          * Constant for the "Small Kana Extension" Unicode
3365          * character block.
3366          * @since 13
3367          */
3368         public static final UnicodeBlock SMALL_KANA_EXTENSION =
3369             new UnicodeBlock("SMALL_KANA_EXTENSION",
3370                              "SMALL KANA EXTENSION",
3371                              "SMALLKANAEXTENSION");
3372 
3373         /**
3374          * Constant for the "Nyiakeng Puachue Hmong" Unicode
3375          * character block.
3376          * @since 13
3377          */
3378         public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG =
3379             new UnicodeBlock("NYIAKENG_PUACHUE_HMONG",
3380                              "NYIAKENG PUACHUE HMONG",
3381                              "NYIAKENGPUACHUEHMONG");
3382 
3383         /**
3384          * Constant for the "Wancho" Unicode
3385          * character block.
3386          * @since 13
3387          */
3388         public static final UnicodeBlock WANCHO =
3389             new UnicodeBlock("WANCHO");
3390 
3391         /**
3392          * Constant for the "Ottoman Siyaq Numbers" Unicode
3393          * character block.
3394          * @since 13
3395          */
3396         public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS =
3397             new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS",
3398                              "OTTOMAN SIYAQ NUMBERS",
3399                              "OTTOMANSIYAQNUMBERS");
3400 
3401         /**
3402          * Constant for the "Symbols and Pictographs Extended-A" Unicode
3403          * character block.
3404          * @since 13
3405          */
3406         public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A =
3407             new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A",
3408                              "SYMBOLS AND PICTOGRAPHS EXTENDED-A",
3409                              "SYMBOLSANDPICTOGRAPHSEXTENDED-A");
3410 
3411         /**
3412          * Constant for the "Yezidi" Unicode
3413          * character block.
3414          * @since 15
3415          */
3416         public static final UnicodeBlock YEZIDI =
3417             new UnicodeBlock("YEZIDI");
3418 
3419         /**
3420          * Constant for the "Chorasmian" Unicode
3421          * character block.
3422          * @since 15
3423          */
3424         public static final UnicodeBlock CHORASMIAN =
3425             new UnicodeBlock("CHORASMIAN");
3426 
3427         /**
3428          * Constant for the "Dives Akuru" Unicode
3429          * character block.
3430          * @since 15
3431          */
3432         public static final UnicodeBlock DIVES_AKURU =
3433             new UnicodeBlock("DIVES_AKURU",
3434                              "DIVES AKURU",
3435                              "DIVESAKURU");
3436 
3437         /**
3438          * Constant for the "Lisu Supplement" Unicode
3439          * character block.
3440          * @since 15
3441          */
3442         public static final UnicodeBlock LISU_SUPPLEMENT =
3443             new UnicodeBlock("LISU_SUPPLEMENT",
3444                              "LISU SUPPLEMENT",
3445                              "LISUSUPPLEMENT");
3446 
3447         /**
3448          * Constant for the "Khitan Small Script" Unicode
3449          * character block.
3450          * @since 15
3451          */
3452         public static final UnicodeBlock KHITAN_SMALL_SCRIPT =
3453             new UnicodeBlock("KHITAN_SMALL_SCRIPT",
3454                              "KHITAN SMALL SCRIPT",
3455                              "KHITANSMALLSCRIPT");
3456 
3457         /**
3458          * Constant for the "Tangut Supplement" Unicode
3459          * character block.
3460          * @since 15
3461          */
3462         public static final UnicodeBlock TANGUT_SUPPLEMENT =
3463             new UnicodeBlock("TANGUT_SUPPLEMENT",
3464                              "TANGUT SUPPLEMENT",
3465                              "TANGUTSUPPLEMENT");
3466 
3467         /**
3468          * Constant for the "Symbols for Legacy Computing" Unicode
3469          * character block.
3470          * @since 15
3471          */
3472         public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING =
3473             new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING",
3474                              "SYMBOLS FOR LEGACY COMPUTING",
3475                              "SYMBOLSFORLEGACYCOMPUTING");
3476 
3477         /**
3478          * Constant for the "CJK Unified Ideographs Extension G" Unicode
3479          * character block.
3480          * @since 15
3481          */
3482         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G =
3483             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G",
3484                              "CJK UNIFIED IDEOGRAPHS EXTENSION G",
3485                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONG");
3486 
3487         private static final int[] blockStarts = {
3488             0x0000,   // 0000..007F; Basic Latin
3489             0x0080,   // 0080..00FF; Latin-1 Supplement
3490             0x0100,   // 0100..017F; Latin Extended-A
3491             0x0180,   // 0180..024F; Latin Extended-B
3492             0x0250,   // 0250..02AF; IPA Extensions
3493             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
3494             0x0300,   // 0300..036F; Combining Diacritical Marks
3495             0x0370,   // 0370..03FF; Greek and Coptic
3496             0x0400,   // 0400..04FF; Cyrillic
3497             0x0500,   // 0500..052F; Cyrillic Supplement
3498             0x0530,   // 0530..058F; Armenian
3499             0x0590,   // 0590..05FF; Hebrew
3500             0x0600,   // 0600..06FF; Arabic
3501             0x0700,   // 0700..074F; Syriac
3502             0x0750,   // 0750..077F; Arabic Supplement
3503             0x0780,   // 0780..07BF; Thaana
3504             0x07C0,   // 07C0..07FF; NKo
3505             0x0800,   // 0800..083F; Samaritan
3506             0x0840,   // 0840..085F; Mandaic
3507             0x0860,   // 0860..086F; Syriac Supplement
3508             0x0870,   //             unassigned
3509             0x08A0,   // 08A0..08FF; Arabic Extended-A
3510             0x0900,   // 0900..097F; Devanagari
3511             0x0980,   // 0980..09FF; Bengali
3512             0x0A00,   // 0A00..0A7F; Gurmukhi
3513             0x0A80,   // 0A80..0AFF; Gujarati
3514             0x0B00,   // 0B00..0B7F; Oriya
3515             0x0B80,   // 0B80..0BFF; Tamil
3516             0x0C00,   // 0C00..0C7F; Telugu
3517             0x0C80,   // 0C80..0CFF; Kannada
3518             0x0D00,   // 0D00..0D7F; Malayalam
3519             0x0D80,   // 0D80..0DFF; Sinhala
3520             0x0E00,   // 0E00..0E7F; Thai
3521             0x0E80,   // 0E80..0EFF; Lao
3522             0x0F00,   // 0F00..0FFF; Tibetan
3523             0x1000,   // 1000..109F; Myanmar
3524             0x10A0,   // 10A0..10FF; Georgian
3525             0x1100,   // 1100..11FF; Hangul Jamo
3526             0x1200,   // 1200..137F; Ethiopic
3527             0x1380,   // 1380..139F; Ethiopic Supplement
3528             0x13A0,   // 13A0..13FF; Cherokee
3529             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
3530             0x1680,   // 1680..169F; Ogham
3531             0x16A0,   // 16A0..16FF; Runic
3532             0x1700,   // 1700..171F; Tagalog
3533             0x1720,   // 1720..173F; Hanunoo
3534             0x1740,   // 1740..175F; Buhid
3535             0x1760,   // 1760..177F; Tagbanwa
3536             0x1780,   // 1780..17FF; Khmer
3537             0x1800,   // 1800..18AF; Mongolian
3538             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
3539             0x1900,   // 1900..194F; Limbu
3540             0x1950,   // 1950..197F; Tai Le
3541             0x1980,   // 1980..19DF; New Tai Lue
3542             0x19E0,   // 19E0..19FF; Khmer Symbols
3543             0x1A00,   // 1A00..1A1F; Buginese
3544             0x1A20,   // 1A20..1AAF; Tai Tham
3545             0x1AB0,   // 1AB0..1AFF; Combining Diacritical Marks Extended
3546             0x1B00,   // 1B00..1B7F; Balinese
3547             0x1B80,   // 1B80..1BBF; Sundanese
3548             0x1BC0,   // 1BC0..1BFF; Batak
3549             0x1C00,   // 1C00..1C4F; Lepcha
3550             0x1C50,   // 1C50..1C7F; Ol Chiki
3551             0x1C80,   // 1C80..1C8F; Cyrillic Extended-C
3552             0x1C90,   // 1C90..1CBF; Georgian Extended
3553             0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
3554             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
3555             0x1D00,   // 1D00..1D7F; Phonetic Extensions
3556             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
3557             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
3558             0x1E00,   // 1E00..1EFF; Latin Extended Additional
3559             0x1F00,   // 1F00..1FFF; Greek Extended
3560             0x2000,   // 2000..206F; General Punctuation
3561             0x2070,   // 2070..209F; Superscripts and Subscripts
3562             0x20A0,   // 20A0..20CF; Currency Symbols
3563             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
3564             0x2100,   // 2100..214F; Letterlike Symbols
3565             0x2150,   // 2150..218F; Number Forms
3566             0x2190,   // 2190..21FF; Arrows
3567             0x2200,   // 2200..22FF; Mathematical Operators
3568             0x2300,   // 2300..23FF; Miscellaneous Technical
3569             0x2400,   // 2400..243F; Control Pictures
3570             0x2440,   // 2440..245F; Optical Character Recognition
3571             0x2460,   // 2460..24FF; Enclosed Alphanumerics
3572             0x2500,   // 2500..257F; Box Drawing
3573             0x2580,   // 2580..259F; Block Elements
3574             0x25A0,   // 25A0..25FF; Geometric Shapes
3575             0x2600,   // 2600..26FF; Miscellaneous Symbols
3576             0x2700,   // 2700..27BF; Dingbats
3577             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
3578             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
3579             0x2800,   // 2800..28FF; Braille Patterns
3580             0x2900,   // 2900..297F; Supplemental Arrows-B
3581             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
3582             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
3583             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
3584             0x2C00,   // 2C00..2C5F; Glagolitic
3585             0x2C60,   // 2C60..2C7F; Latin Extended-C
3586             0x2C80,   // 2C80..2CFF; Coptic
3587             0x2D00,   // 2D00..2D2F; Georgian Supplement
3588             0x2D30,   // 2D30..2D7F; Tifinagh
3589             0x2D80,   // 2D80..2DDF; Ethiopic Extended
3590             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
3591             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
3592             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
3593             0x2F00,   // 2F00..2FDF; Kangxi Radicals
3594             0x2FE0,   //             unassigned
3595             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
3596             0x3000,   // 3000..303F; CJK Symbols and Punctuation
3597             0x3040,   // 3040..309F; Hiragana
3598             0x30A0,   // 30A0..30FF; Katakana
3599             0x3100,   // 3100..312F; Bopomofo
3600             0x3130,   // 3130..318F; Hangul Compatibility Jamo
3601             0x3190,   // 3190..319F; Kanbun
3602             0x31A0,   // 31A0..31BF; Bopomofo Extended
3603             0x31C0,   // 31C0..31EF; CJK Strokes
3604             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
3605             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
3606             0x3300,   // 3300..33FF; CJK Compatibility
3607             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
3608             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
3609             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
3610             0xA000,   // A000..A48F; Yi Syllables
3611             0xA490,   // A490..A4CF; Yi Radicals
3612             0xA4D0,   // A4D0..A4FF; Lisu
3613             0xA500,   // A500..A63F; Vai
3614             0xA640,   // A640..A69F; Cyrillic Extended-B
3615             0xA6A0,   // A6A0..A6FF; Bamum
3616             0xA700,   // A700..A71F; Modifier Tone Letters
3617             0xA720,   // A720..A7FF; Latin Extended-D
3618             0xA800,   // A800..A82F; Syloti Nagri
3619             0xA830,   // A830..A83F; Common Indic Number Forms
3620             0xA840,   // A840..A87F; Phags-pa
3621             0xA880,   // A880..A8DF; Saurashtra
3622             0xA8E0,   // A8E0..A8FF; Devanagari Extended
3623             0xA900,   // A900..A92F; Kayah Li
3624             0xA930,   // A930..A95F; Rejang
3625             0xA960,   // A960..A97F; Hangul Jamo Extended-A
3626             0xA980,   // A980..A9DF; Javanese
3627             0xA9E0,   // A9E0..A9FF; Myanmar Extended-B
3628             0xAA00,   // AA00..AA5F; Cham
3629             0xAA60,   // AA60..AA7F; Myanmar Extended-A
3630             0xAA80,   // AA80..AADF; Tai Viet
3631             0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
3632             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
3633             0xAB30,   // AB30..AB6F; Latin Extended-E
3634             0xAB70,   // AB70..ABBF; Cherokee Supplement
3635             0xABC0,   // ABC0..ABFF; Meetei Mayek
3636             0xAC00,   // AC00..D7AF; Hangul Syllables
3637             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
3638             0xD800,   // D800..DB7F; High Surrogates
3639             0xDB80,   // DB80..DBFF; High Private Use Surrogates
3640             0xDC00,   // DC00..DFFF; Low Surrogates
3641             0xE000,   // E000..F8FF; Private Use Area
3642             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
3643             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
3644             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
3645             0xFE00,   // FE00..FE0F; Variation Selectors
3646             0xFE10,   // FE10..FE1F; Vertical Forms
3647             0xFE20,   // FE20..FE2F; Combining Half Marks
3648             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
3649             0xFE50,   // FE50..FE6F; Small Form Variants
3650             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
3651             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
3652             0xFFF0,   // FFF0..FFFF; Specials
3653             0x10000,  // 10000..1007F; Linear B Syllabary
3654             0x10080,  // 10080..100FF; Linear B Ideograms
3655             0x10100,  // 10100..1013F; Aegean Numbers
3656             0x10140,  // 10140..1018F; Ancient Greek Numbers
3657             0x10190,  // 10190..101CF; Ancient Symbols
3658             0x101D0,  // 101D0..101FF; Phaistos Disc
3659             0x10200,  //               unassigned
3660             0x10280,  // 10280..1029F; Lycian
3661             0x102A0,  // 102A0..102DF; Carian
3662             0x102E0,  // 102E0..102FF; Coptic Epact Numbers
3663             0x10300,  // 10300..1032F; Old Italic
3664             0x10330,  // 10330..1034F; Gothic
3665             0x10350,  // 10350..1037F; Old Permic
3666             0x10380,  // 10380..1039F; Ugaritic
3667             0x103A0,  // 103A0..103DF; Old Persian
3668             0x103E0,  //               unassigned
3669             0x10400,  // 10400..1044F; Deseret
3670             0x10450,  // 10450..1047F; Shavian
3671             0x10480,  // 10480..104AF; Osmanya
3672             0x104B0,  // 104B0..104FF; Osage
3673             0x10500,  // 10500..1052F; Elbasan
3674             0x10530,  // 10530..1056F; Caucasian Albanian
3675             0x10570,  //               unassigned
3676             0x10600,  // 10600..1077F; Linear A
3677             0x10780,  //               unassigned
3678             0x10800,  // 10800..1083F; Cypriot Syllabary
3679             0x10840,  // 10840..1085F; Imperial Aramaic
3680             0x10860,  // 10860..1087F; Palmyrene
3681             0x10880,  // 10880..108AF; Nabataean
3682             0x108B0,  //               unassigned
3683             0x108E0,  // 108E0..108FF; Hatran
3684             0x10900,  // 10900..1091F; Phoenician
3685             0x10920,  // 10920..1093F; Lydian
3686             0x10940,  //               unassigned
3687             0x10980,  // 10980..1099F; Meroitic Hieroglyphs
3688             0x109A0,  // 109A0..109FF; Meroitic Cursive
3689             0x10A00,  // 10A00..10A5F; Kharoshthi
3690             0x10A60,  // 10A60..10A7F; Old South Arabian
3691             0x10A80,  // 10A80..10A9F; Old North Arabian
3692             0x10AA0,  //               unassigned
3693             0x10AC0,  // 10AC0..10AFF; Manichaean
3694             0x10B00,  // 10B00..10B3F; Avestan
3695             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
3696             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
3697             0x10B80,  // 10B80..10BAF; Psalter Pahlavi
3698             0x10BB0,  //               unassigned
3699             0x10C00,  // 10C00..10C4F; Old Turkic
3700             0x10C50,  //               unassigned
3701             0x10C80,  // 10C80..10CFF; Old Hungarian
3702             0x10D00,  // 10D00..10D3F; Hanifi Rohingya
3703             0x10D40,  //               unassigned
3704             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
3705             0x10E80,  // 10E80..10EBF; Yezidi
3706             0x10EC0,  //               unassigned
3707             0x10F00,  // 10F00..10F2F; Old Sogdian
3708             0x10F30,  // 10F30..10F6F; Sogdian
3709             0x10F70,  //               unassigned
3710             0x10FB0,  // 10FB0..10FDF; Chorasmian
3711             0x10FE0,  // 10FE0..10FFF; Elymaic
3712             0x11000,  // 11000..1107F; Brahmi
3713             0x11080,  // 11080..110CF; Kaithi
3714             0x110D0,  // 110D0..110FF; Sora Sompeng
3715             0x11100,  // 11100..1114F; Chakma
3716             0x11150,  // 11150..1117F; Mahajani
3717             0x11180,  // 11180..111DF; Sharada
3718             0x111E0,  // 111E0..111FF; Sinhala Archaic Numbers
3719             0x11200,  // 11200..1124F; Khojki
3720             0x11250,  //               unassigned
3721             0x11280,  // 11280..112AF; Multani
3722             0x112B0,  // 112B0..112FF; Khudawadi
3723             0x11300,  // 11300..1137F; Grantha
3724             0x11380,  //               unassigned
3725             0x11400,  // 11400..1147F; Newa
3726             0x11480,  // 11480..114DF; Tirhuta
3727             0x114E0,  //               unassigned
3728             0x11580,  // 11580..115FF; Siddham
3729             0x11600,  // 11600..1165F; Modi
3730             0x11660,  // 11660..1167F; Mongolian Supplement
3731             0x11680,  // 11680..116CF; Takri
3732             0x116D0,  //               unassigned
3733             0x11700,  // 11700..1173F; Ahom
3734             0x11740,  //               unassigned
3735             0x11800,  // 11800..1184F; Dogra
3736             0x11850,  //               unassigned
3737             0x118A0,  // 118A0..118FF; Warang Citi
3738             0x11900,  // 11900..1195F; Dives Akuru
3739             0x11960,  //               unassigned
3740             0x119A0,  // 119A0..119FF; Nandinagari
3741             0x11A00,  // 11A00..11A4F; Zanabazar Square
3742             0x11A50,  // 11A50..11AAF; Soyombo
3743             0x11AB0,  //               unassigned
3744             0x11AC0,  // 11AC0..11AFF; Pau Cin Hau
3745             0x11B00,  //               unassigned
3746             0x11C00,  // 11C00..11C6F; Bhaiksuki
3747             0x11C70,  // 11C70..11CBF; Marchen
3748             0x11CC0,  //               unassigned
3749             0x11D00,  // 11D00..11D5F; Masaram Gondi
3750             0x11D60,  // 11D60..11DAF; Gunjala Gondi
3751             0x11DB0,  //               unassigned
3752             0x11EE0,  // 11EE0..11EFF; Makasar
3753             0x11F00,  //               unassigned
3754             0x11FB0,  // 11FB0..11FBF; Lisu Supplement
3755             0x11FC0,  // 11FC0..11FFF; Tamil Supplement
3756             0x12000,  // 12000..123FF; Cuneiform
3757             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
3758             0x12480,  // 12480..1254F; Early Dynastic Cuneiform
3759             0x12550,  //               unassigned
3760             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
3761             0x13430,  // 13430..1343F; Egyptian Hieroglyph Format Controls
3762             0x13440,  //               unassigned
3763             0x14400,  // 14400..1467F; Anatolian Hieroglyphs
3764             0x14680,  //               unassigned
3765             0x16800,  // 16800..16A3F; Bamum Supplement
3766             0x16A40,  // 16A40..16A6F; Mro
3767             0x16A70,  //               unassigned
3768             0x16AD0,  // 16AD0..16AFF; Bassa Vah
3769             0x16B00,  // 16B00..16B8F; Pahawh Hmong
3770             0x16B90,  //               unassigned
3771             0x16E40,  // 16E40..16E9F; Medefaidrin
3772             0x16EA0,  //               unassigned
3773             0x16F00,  // 16F00..16F9F; Miao
3774             0x16FA0,  //               unassigned
3775             0x16FE0,  // 16FE0..16FFF; Ideographic Symbols and Punctuation
3776             0x17000,  // 17000..187FF; Tangut
3777             0x18800,  // 18800..18AFF; Tangut Components
3778             0x18B00,  // 18B00..18CFF; Khitan Small Script
3779             0x18D00,  // 18D00..18D8F; Tangut Supplement
3780             0x18D90,  //               unassigned
3781             0x1B000,  // 1B000..1B0FF; Kana Supplement
3782             0x1B100,  // 1B100..1B12F; Kana Extended-A
3783             0x1B130,  // 1B130..1B16F; Small Kana Extension
3784             0x1B170,  // 1B170..1B2FF; Nushu
3785             0x1B300,  //               unassigned
3786             0x1BC00,  // 1BC00..1BC9F; Duployan
3787             0x1BCA0,  // 1BCA0..1BCAF; Shorthand Format Controls
3788             0x1BCB0,  //               unassigned
3789             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
3790             0x1D100,  // 1D100..1D1FF; Musical Symbols
3791             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
3792             0x1D250,  //               unassigned
3793             0x1D2E0,  // 1D2E0..1D2FF; Mayan Numerals
3794             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
3795             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
3796             0x1D380,  //               unassigned
3797             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
3798             0x1D800,  // 1D800..1DAAF; Sutton SignWriting
3799             0x1DAB0,  //               unassigned
3800             0x1E000,  // 1E000..1E02F; Glagolitic Supplement
3801             0x1E030,  //               unassigned
3802             0x1E100,  // 1E100..1E14F; Nyiakeng Puachue Hmong
3803             0x1E150,  //               unassigned
3804             0x1E2C0,  // 1E2C0..1E2FF; Wancho
3805             0x1E300,  //               unassigned
3806             0x1E800,  // 1E800..1E8DF; Mende Kikakui
3807             0x1E8E0,  //               unassigned
3808             0x1E900,  // 1E900..1E95F; Adlam
3809             0x1E960,  //               unassigned
3810             0x1EC70,  // 1EC70..1ECBF; Indic Siyaq Numbers
3811             0x1ECC0,  //               unassigned
3812             0x1ED00,  // 1ED00..1ED4F; Ottoman Siyaq Numbers
3813             0x1ED50,  //               unassigned
3814             0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
3815             0x1EF00,  //               unassigned
3816             0x1F000,  // 1F000..1F02F; Mahjong Tiles
3817             0x1F030,  // 1F030..1F09F; Domino Tiles
3818             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
3819             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
3820             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
3821             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols and Pictographs
3822             0x1F600,  // 1F600..1F64F; Emoticons
3823             0x1F650,  // 1F650..1F67F; Ornamental Dingbats
3824             0x1F680,  // 1F680..1F6FF; Transport and Map Symbols
3825             0x1F700,  // 1F700..1F77F; Alchemical Symbols
3826             0x1F780,  // 1F780..1F7FF; Geometric Shapes Extended
3827             0x1F800,  // 1F800..1F8FF; Supplemental Arrows-C
3828             0x1F900,  // 1F900..1F9FF; Supplemental Symbols and Pictographs
3829             0x1FA00,  // 1FA00..1FA6F; Chess Symbols
3830             0x1FA70,  // 1FA70..1FAFF; Symbols and Pictographs Extended-A
3831             0x1FB00,  // 1FB00..1FBFF; Symbols for Legacy Computing
3832             0x1FC00,  //               unassigned
3833             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
3834             0x2A6E0,  //               unassigned
3835             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
3836             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
3837             0x2B820,  // 2B820..2CEAF; CJK Unified Ideographs Extension E
3838             0x2CEB0,  // 2CEB0..2EBEF; CJK Unified Ideographs Extension F
3839             0x2EBF0,  //               unassigned
3840             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
3841             0x2FA20,  //               unassigned
3842             0x30000,  // 30000..3134F; CJK Unified Ideographs Extension G
3843             0x31350,  //               unassigned
3844             0xE0000,  // E0000..E007F; Tags
3845             0xE0080,  //               unassigned
3846             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
3847             0xE01F0,  //               unassigned
3848             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
3849             0x100000, // 100000..10FFFF; Supplementary Private Use Area-B
3850         };
3851 
3852         private static final UnicodeBlock[] blocks = {
3853             BASIC_LATIN,
3854             LATIN_1_SUPPLEMENT,
3855             LATIN_EXTENDED_A,
3856             LATIN_EXTENDED_B,
3857             IPA_EXTENSIONS,
3858             SPACING_MODIFIER_LETTERS,
3859             COMBINING_DIACRITICAL_MARKS,
3860             GREEK,
3861             CYRILLIC,
3862             CYRILLIC_SUPPLEMENTARY,
3863             ARMENIAN,
3864             HEBREW,
3865             ARABIC,
3866             SYRIAC,
3867             ARABIC_SUPPLEMENT,
3868             THAANA,
3869             NKO,
3870             SAMARITAN,
3871             MANDAIC,
3872             SYRIAC_SUPPLEMENT,
3873             null,
3874             ARABIC_EXTENDED_A,
3875             DEVANAGARI,
3876             BENGALI,
3877             GURMUKHI,
3878             GUJARATI,
3879             ORIYA,
3880             TAMIL,
3881             TELUGU,
3882             KANNADA,
3883             MALAYALAM,
3884             SINHALA,
3885             THAI,
3886             LAO,
3887             TIBETAN,
3888             MYANMAR,
3889             GEORGIAN,
3890             HANGUL_JAMO,
3891             ETHIOPIC,
3892             ETHIOPIC_SUPPLEMENT,
3893             CHEROKEE,
3894             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
3895             OGHAM,
3896             RUNIC,
3897             TAGALOG,
3898             HANUNOO,
3899             BUHID,
3900             TAGBANWA,
3901             KHMER,
3902             MONGOLIAN,
3903             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
3904             LIMBU,
3905             TAI_LE,
3906             NEW_TAI_LUE,
3907             KHMER_SYMBOLS,
3908             BUGINESE,
3909             TAI_THAM,
3910             COMBINING_DIACRITICAL_MARKS_EXTENDED,
3911             BALINESE,
3912             SUNDANESE,
3913             BATAK,
3914             LEPCHA,
3915             OL_CHIKI,
3916             CYRILLIC_EXTENDED_C,
3917             GEORGIAN_EXTENDED,
3918             SUNDANESE_SUPPLEMENT,
3919             VEDIC_EXTENSIONS,
3920             PHONETIC_EXTENSIONS,
3921             PHONETIC_EXTENSIONS_SUPPLEMENT,
3922             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
3923             LATIN_EXTENDED_ADDITIONAL,
3924             GREEK_EXTENDED,
3925             GENERAL_PUNCTUATION,
3926             SUPERSCRIPTS_AND_SUBSCRIPTS,
3927             CURRENCY_SYMBOLS,
3928             COMBINING_MARKS_FOR_SYMBOLS,
3929             LETTERLIKE_SYMBOLS,
3930             NUMBER_FORMS,
3931             ARROWS,
3932             MATHEMATICAL_OPERATORS,
3933             MISCELLANEOUS_TECHNICAL,
3934             CONTROL_PICTURES,
3935             OPTICAL_CHARACTER_RECOGNITION,
3936             ENCLOSED_ALPHANUMERICS,
3937             BOX_DRAWING,
3938             BLOCK_ELEMENTS,
3939             GEOMETRIC_SHAPES,
3940             MISCELLANEOUS_SYMBOLS,
3941             DINGBATS,
3942             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
3943             SUPPLEMENTAL_ARROWS_A,
3944             BRAILLE_PATTERNS,
3945             SUPPLEMENTAL_ARROWS_B,
3946             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
3947             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
3948             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
3949             GLAGOLITIC,
3950             LATIN_EXTENDED_C,
3951             COPTIC,
3952             GEORGIAN_SUPPLEMENT,
3953             TIFINAGH,
3954             ETHIOPIC_EXTENDED,
3955             CYRILLIC_EXTENDED_A,
3956             SUPPLEMENTAL_PUNCTUATION,
3957             CJK_RADICALS_SUPPLEMENT,
3958             KANGXI_RADICALS,
3959             null,
3960             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
3961             CJK_SYMBOLS_AND_PUNCTUATION,
3962             HIRAGANA,
3963             KATAKANA,
3964             BOPOMOFO,
3965             HANGUL_COMPATIBILITY_JAMO,
3966             KANBUN,
3967             BOPOMOFO_EXTENDED,
3968             CJK_STROKES,
3969             KATAKANA_PHONETIC_EXTENSIONS,
3970             ENCLOSED_CJK_LETTERS_AND_MONTHS,
3971             CJK_COMPATIBILITY,
3972             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
3973             YIJING_HEXAGRAM_SYMBOLS,
3974             CJK_UNIFIED_IDEOGRAPHS,
3975             YI_SYLLABLES,
3976             YI_RADICALS,
3977             LISU,
3978             VAI,
3979             CYRILLIC_EXTENDED_B,
3980             BAMUM,
3981             MODIFIER_TONE_LETTERS,
3982             LATIN_EXTENDED_D,
3983             SYLOTI_NAGRI,
3984             COMMON_INDIC_NUMBER_FORMS,
3985             PHAGS_PA,
3986             SAURASHTRA,
3987             DEVANAGARI_EXTENDED,
3988             KAYAH_LI,
3989             REJANG,
3990             HANGUL_JAMO_EXTENDED_A,
3991             JAVANESE,
3992             MYANMAR_EXTENDED_B,
3993             CHAM,
3994             MYANMAR_EXTENDED_A,
3995             TAI_VIET,
3996             MEETEI_MAYEK_EXTENSIONS,
3997             ETHIOPIC_EXTENDED_A,
3998             LATIN_EXTENDED_E,
3999             CHEROKEE_SUPPLEMENT,
4000             MEETEI_MAYEK,
4001             HANGUL_SYLLABLES,
4002             HANGUL_JAMO_EXTENDED_B,
4003             HIGH_SURROGATES,
4004             HIGH_PRIVATE_USE_SURROGATES,
4005             LOW_SURROGATES,
4006             PRIVATE_USE_AREA,
4007             CJK_COMPATIBILITY_IDEOGRAPHS,
4008             ALPHABETIC_PRESENTATION_FORMS,
4009             ARABIC_PRESENTATION_FORMS_A,
4010             VARIATION_SELECTORS,
4011             VERTICAL_FORMS,
4012             COMBINING_HALF_MARKS,
4013             CJK_COMPATIBILITY_FORMS,
4014             SMALL_FORM_VARIANTS,
4015             ARABIC_PRESENTATION_FORMS_B,
4016             HALFWIDTH_AND_FULLWIDTH_FORMS,
4017             SPECIALS,
4018             LINEAR_B_SYLLABARY,
4019             LINEAR_B_IDEOGRAMS,
4020             AEGEAN_NUMBERS,
4021             ANCIENT_GREEK_NUMBERS,
4022             ANCIENT_SYMBOLS,
4023             PHAISTOS_DISC,
4024             null,
4025             LYCIAN,
4026             CARIAN,
4027             COPTIC_EPACT_NUMBERS,
4028             OLD_ITALIC,
4029             GOTHIC,
4030             OLD_PERMIC,
4031             UGARITIC,
4032             OLD_PERSIAN,
4033             null,
4034             DESERET,
4035             SHAVIAN,
4036             OSMANYA,
4037             OSAGE,
4038             ELBASAN,
4039             CAUCASIAN_ALBANIAN,
4040             null,
4041             LINEAR_A,
4042             null,
4043             CYPRIOT_SYLLABARY,
4044             IMPERIAL_ARAMAIC,
4045             PALMYRENE,
4046             NABATAEAN,
4047             null,
4048             HATRAN,
4049             PHOENICIAN,
4050             LYDIAN,
4051             null,
4052             MEROITIC_HIEROGLYPHS,
4053             MEROITIC_CURSIVE,
4054             KHAROSHTHI,
4055             OLD_SOUTH_ARABIAN,
4056             OLD_NORTH_ARABIAN,
4057             null,
4058             MANICHAEAN,
4059             AVESTAN,
4060             INSCRIPTIONAL_PARTHIAN,
4061             INSCRIPTIONAL_PAHLAVI,
4062             PSALTER_PAHLAVI,
4063             null,
4064             OLD_TURKIC,
4065             null,
4066             OLD_HUNGARIAN,
4067             HANIFI_ROHINGYA,
4068             null,
4069             RUMI_NUMERAL_SYMBOLS,
4070             YEZIDI,
4071             null,
4072             OLD_SOGDIAN,
4073             SOGDIAN,
4074             null,
4075             CHORASMIAN,
4076             ELYMAIC,
4077             BRAHMI,
4078             KAITHI,
4079             SORA_SOMPENG,
4080             CHAKMA,
4081             MAHAJANI,
4082             SHARADA,
4083             SINHALA_ARCHAIC_NUMBERS,
4084             KHOJKI,
4085             null,
4086             MULTANI,
4087             KHUDAWADI,
4088             GRANTHA,
4089             null,
4090             NEWA,
4091             TIRHUTA,
4092             null,
4093             SIDDHAM,
4094             MODI,
4095             MONGOLIAN_SUPPLEMENT,
4096             TAKRI,
4097             null,
4098             AHOM,
4099             null,
4100             DOGRA,
4101             null,
4102             WARANG_CITI,
4103             DIVES_AKURU,
4104             null,
4105             NANDINAGARI,
4106             ZANABAZAR_SQUARE,
4107             SOYOMBO,
4108             null,
4109             PAU_CIN_HAU,
4110             null,
4111             BHAIKSUKI,
4112             MARCHEN,
4113             null,
4114             MASARAM_GONDI,
4115             GUNJALA_GONDI,
4116             null,
4117             MAKASAR,
4118             null,
4119             LISU_SUPPLEMENT,
4120             TAMIL_SUPPLEMENT,
4121             CUNEIFORM,
4122             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
4123             EARLY_DYNASTIC_CUNEIFORM,
4124             null,
4125             EGYPTIAN_HIEROGLYPHS,
4126             EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS,
4127             null,
4128             ANATOLIAN_HIEROGLYPHS,
4129             null,
4130             BAMUM_SUPPLEMENT,
4131             MRO,
4132             null,
4133             BASSA_VAH,
4134             PAHAWH_HMONG,
4135             null,
4136             MEDEFAIDRIN,
4137             null,
4138             MIAO,
4139             null,
4140             IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION,
4141             TANGUT,
4142             TANGUT_COMPONENTS,
4143             KHITAN_SMALL_SCRIPT,
4144             TANGUT_SUPPLEMENT,
4145             null,
4146             KANA_SUPPLEMENT,
4147             KANA_EXTENDED_A,
4148             SMALL_KANA_EXTENSION,
4149             NUSHU,
4150             null,
4151             DUPLOYAN,
4152             SHORTHAND_FORMAT_CONTROLS,
4153             null,
4154             BYZANTINE_MUSICAL_SYMBOLS,
4155             MUSICAL_SYMBOLS,
4156             ANCIENT_GREEK_MUSICAL_NOTATION,
4157             null,
4158             MAYAN_NUMERALS,
4159             TAI_XUAN_JING_SYMBOLS,
4160             COUNTING_ROD_NUMERALS,
4161             null,
4162             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
4163             SUTTON_SIGNWRITING,
4164             null,
4165             GLAGOLITIC_SUPPLEMENT,
4166             null,
4167             NYIAKENG_PUACHUE_HMONG,
4168             null,
4169             WANCHO,
4170             null,
4171             MENDE_KIKAKUI,
4172             null,
4173             ADLAM,
4174             null,
4175             INDIC_SIYAQ_NUMBERS,
4176             null,
4177             OTTOMAN_SIYAQ_NUMBERS,
4178             null,
4179             ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
4180             null,
4181             MAHJONG_TILES,
4182             DOMINO_TILES,
4183             PLAYING_CARDS,
4184             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
4185             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
4186             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
4187             EMOTICONS,
4188             ORNAMENTAL_DINGBATS,
4189             TRANSPORT_AND_MAP_SYMBOLS,
4190             ALCHEMICAL_SYMBOLS,
4191             GEOMETRIC_SHAPES_EXTENDED,
4192             SUPPLEMENTAL_ARROWS_C,
4193             SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS,
4194             CHESS_SYMBOLS,
4195             SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A,
4196             SYMBOLS_FOR_LEGACY_COMPUTING,
4197             null,
4198             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
4199             null,
4200             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
4201             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
4202             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E,
4203             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F,
4204             null,
4205             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
4206             null,
4207             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G,
4208             null,
4209             TAGS,
4210             null,
4211             VARIATION_SELECTORS_SUPPLEMENT,
4212             null,
4213             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
4214             SUPPLEMENTARY_PRIVATE_USE_AREA_B,
4215         };
4216 
4217 
4218         /**
4219          * Returns the object representing the Unicode block containing the
4220          * given character, or {@code null} if the character is not a
4221          * member of a defined block.
4222          *
4223          * <p><b>Note:</b> This method cannot handle
4224          * <a href="Character.html#supplementary"> supplementary
4225          * characters</a>.  To support all Unicode characters, including
4226          * supplementary characters, use the {@link #of(int)} method.
4227          *
4228          * @param   c  The character in question
4229          * @return  The {@code UnicodeBlock} instance representing the
4230          *          Unicode block of which this character is a member, or
4231          *          {@code null} if the character is not a member of any
4232          *          Unicode block
4233          */
of(char c)4234         public static UnicodeBlock of(char c) {
4235             return of((int)c);
4236         }
4237 
4238         /**
4239          * Returns the object representing the Unicode block
4240          * containing the given character (Unicode code point), or
4241          * {@code null} if the character is not a member of a
4242          * defined block.
4243          *
4244          * @param   codePoint the character (Unicode code point) in question.
4245          * @return  The {@code UnicodeBlock} instance representing the
4246          *          Unicode block of which this character is a member, or
4247          *          {@code null} if the character is not a member of any
4248          *          Unicode block
4249          * @throws  IllegalArgumentException if the specified
4250          * {@code codePoint} is an invalid Unicode code point.
4251          * @see Character#isValidCodePoint(int)
4252          * @since   1.5
4253          */
of(int codePoint)4254         public static UnicodeBlock of(int codePoint) {
4255             if (!isValidCodePoint(codePoint)) {
4256                 throw new IllegalArgumentException(
4257                     String.format("Not a valid Unicode code point: 0x%X", codePoint));
4258             }
4259 
4260             int top, bottom, current;
4261             bottom = 0;
4262             top = blockStarts.length;
4263             current = top/2;
4264 
4265             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
4266             while (top - bottom > 1) {
4267                 if (codePoint >= blockStarts[current]) {
4268                     bottom = current;
4269                 } else {
4270                     top = current;
4271                 }
4272                 current = (top + bottom) / 2;
4273             }
4274             return blocks[current];
4275         }
4276 
4277         /**
4278          * Returns the UnicodeBlock with the given name. Block
4279          * names are determined by The Unicode Standard. The file
4280          * {@code Blocks-<version>.txt} defines blocks for a particular
4281          * version of the standard. The {@link Character} class specifies
4282          * the version of the standard that it supports.
4283          * <p>
4284          * This method accepts block names in the following forms:
4285          * <ol>
4286          * <li> Canonical block names as defined by the Unicode Standard.
4287          * For example, the standard defines a "Basic Latin" block. Therefore, this
4288          * method accepts "Basic Latin" as a valid block name. The documentation of
4289          * each UnicodeBlock provides the canonical name.
4290          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
4291          * is a valid block name for the "Basic Latin" block.
4292          * <li>The text representation of each constant UnicodeBlock identifier.
4293          * For example, this method will return the {@link #BASIC_LATIN} block if
4294          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
4295          * hyphens in the canonical name with underscores.
4296          * </ol>
4297          * Finally, character case is ignored for all of the valid block name forms.
4298          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
4299          * The en_US locale's case mapping rules are used to provide case-insensitive
4300          * string comparisons for block name validation.
4301          * <p>
4302          * If the Unicode Standard changes block names, both the previous and
4303          * current names will be accepted.
4304          *
4305          * @param blockName A {@code UnicodeBlock} name.
4306          * @return The {@code UnicodeBlock} instance identified
4307          *         by {@code blockName}
4308          * @throws IllegalArgumentException if {@code blockName} is an
4309          *         invalid name
4310          * @throws NullPointerException if {@code blockName} is null
4311          * @since 1.5
4312          */
forName(String blockName)4313         public static final UnicodeBlock forName(String blockName) {
4314             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
4315             if (block == null) {
4316                 throw new IllegalArgumentException("Not a valid block name: "
4317                             + blockName);
4318             }
4319             return block;
4320         }
4321     }
4322 
4323 
4324     /**
4325      * A family of character subsets representing the character scripts
4326      * defined in the <a href="http://www.unicode.org/reports/tr24/">
4327      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
4328      * character is assigned to a single Unicode script, either a specific
4329      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
4330      * one of the following three special values,
4331      * {@link Character.UnicodeScript#INHERITED Inherited},
4332      * {@link Character.UnicodeScript#COMMON Common} or
4333      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
4334      *
4335      * @since 1.7
4336      */
4337     public static enum UnicodeScript {
4338         /**
4339          * Unicode script "Common".
4340          */
4341         COMMON,
4342 
4343         /**
4344          * Unicode script "Latin".
4345          */
4346         LATIN,
4347 
4348         /**
4349          * Unicode script "Greek".
4350          */
4351         GREEK,
4352 
4353         /**
4354          * Unicode script "Cyrillic".
4355          */
4356         CYRILLIC,
4357 
4358         /**
4359          * Unicode script "Armenian".
4360          */
4361         ARMENIAN,
4362 
4363         /**
4364          * Unicode script "Hebrew".
4365          */
4366         HEBREW,
4367 
4368         /**
4369          * Unicode script "Arabic".
4370          */
4371         ARABIC,
4372 
4373         /**
4374          * Unicode script "Syriac".
4375          */
4376         SYRIAC,
4377 
4378         /**
4379          * Unicode script "Thaana".
4380          */
4381         THAANA,
4382 
4383         /**
4384          * Unicode script "Devanagari".
4385          */
4386         DEVANAGARI,
4387 
4388         /**
4389          * Unicode script "Bengali".
4390          */
4391         BENGALI,
4392 
4393         /**
4394          * Unicode script "Gurmukhi".
4395          */
4396         GURMUKHI,
4397 
4398         /**
4399          * Unicode script "Gujarati".
4400          */
4401         GUJARATI,
4402 
4403         /**
4404          * Unicode script "Oriya".
4405          */
4406         ORIYA,
4407 
4408         /**
4409          * Unicode script "Tamil".
4410          */
4411         TAMIL,
4412 
4413         /**
4414          * Unicode script "Telugu".
4415          */
4416         TELUGU,
4417 
4418         /**
4419          * Unicode script "Kannada".
4420          */
4421         KANNADA,
4422 
4423         /**
4424          * Unicode script "Malayalam".
4425          */
4426         MALAYALAM,
4427 
4428         /**
4429          * Unicode script "Sinhala".
4430          */
4431         SINHALA,
4432 
4433         /**
4434          * Unicode script "Thai".
4435          */
4436         THAI,
4437 
4438         /**
4439          * Unicode script "Lao".
4440          */
4441         LAO,
4442 
4443         /**
4444          * Unicode script "Tibetan".
4445          */
4446         TIBETAN,
4447 
4448         /**
4449          * Unicode script "Myanmar".
4450          */
4451         MYANMAR,
4452 
4453         /**
4454          * Unicode script "Georgian".
4455          */
4456         GEORGIAN,
4457 
4458         /**
4459          * Unicode script "Hangul".
4460          */
4461         HANGUL,
4462 
4463         /**
4464          * Unicode script "Ethiopic".
4465          */
4466         ETHIOPIC,
4467 
4468         /**
4469          * Unicode script "Cherokee".
4470          */
4471         CHEROKEE,
4472 
4473         /**
4474          * Unicode script "Canadian_Aboriginal".
4475          */
4476         CANADIAN_ABORIGINAL,
4477 
4478         /**
4479          * Unicode script "Ogham".
4480          */
4481         OGHAM,
4482 
4483         /**
4484          * Unicode script "Runic".
4485          */
4486         RUNIC,
4487 
4488         /**
4489          * Unicode script "Khmer".
4490          */
4491         KHMER,
4492 
4493         /**
4494          * Unicode script "Mongolian".
4495          */
4496         MONGOLIAN,
4497 
4498         /**
4499          * Unicode script "Hiragana".
4500          */
4501         HIRAGANA,
4502 
4503         /**
4504          * Unicode script "Katakana".
4505          */
4506         KATAKANA,
4507 
4508         /**
4509          * Unicode script "Bopomofo".
4510          */
4511         BOPOMOFO,
4512 
4513         /**
4514          * Unicode script "Han".
4515          */
4516         HAN,
4517 
4518         /**
4519          * Unicode script "Yi".
4520          */
4521         YI,
4522 
4523         /**
4524          * Unicode script "Old_Italic".
4525          */
4526         OLD_ITALIC,
4527 
4528         /**
4529          * Unicode script "Gothic".
4530          */
4531         GOTHIC,
4532 
4533         /**
4534          * Unicode script "Deseret".
4535          */
4536         DESERET,
4537 
4538         /**
4539          * Unicode script "Inherited".
4540          */
4541         INHERITED,
4542 
4543         /**
4544          * Unicode script "Tagalog".
4545          */
4546         TAGALOG,
4547 
4548         /**
4549          * Unicode script "Hanunoo".
4550          */
4551         HANUNOO,
4552 
4553         /**
4554          * Unicode script "Buhid".
4555          */
4556         BUHID,
4557 
4558         /**
4559          * Unicode script "Tagbanwa".
4560          */
4561         TAGBANWA,
4562 
4563         /**
4564          * Unicode script "Limbu".
4565          */
4566         LIMBU,
4567 
4568         /**
4569          * Unicode script "Tai_Le".
4570          */
4571         TAI_LE,
4572 
4573         /**
4574          * Unicode script "Linear_B".
4575          */
4576         LINEAR_B,
4577 
4578         /**
4579          * Unicode script "Ugaritic".
4580          */
4581         UGARITIC,
4582 
4583         /**
4584          * Unicode script "Shavian".
4585          */
4586         SHAVIAN,
4587 
4588         /**
4589          * Unicode script "Osmanya".
4590          */
4591         OSMANYA,
4592 
4593         /**
4594          * Unicode script "Cypriot".
4595          */
4596         CYPRIOT,
4597 
4598         /**
4599          * Unicode script "Braille".
4600          */
4601         BRAILLE,
4602 
4603         /**
4604          * Unicode script "Buginese".
4605          */
4606         BUGINESE,
4607 
4608         /**
4609          * Unicode script "Coptic".
4610          */
4611         COPTIC,
4612 
4613         /**
4614          * Unicode script "New_Tai_Lue".
4615          */
4616         NEW_TAI_LUE,
4617 
4618         /**
4619          * Unicode script "Glagolitic".
4620          */
4621         GLAGOLITIC,
4622 
4623         /**
4624          * Unicode script "Tifinagh".
4625          */
4626         TIFINAGH,
4627 
4628         /**
4629          * Unicode script "Syloti_Nagri".
4630          */
4631         SYLOTI_NAGRI,
4632 
4633         /**
4634          * Unicode script "Old_Persian".
4635          */
4636         OLD_PERSIAN,
4637 
4638         /**
4639          * Unicode script "Kharoshthi".
4640          */
4641         KHAROSHTHI,
4642 
4643         /**
4644          * Unicode script "Balinese".
4645          */
4646         BALINESE,
4647 
4648         /**
4649          * Unicode script "Cuneiform".
4650          */
4651         CUNEIFORM,
4652 
4653         /**
4654          * Unicode script "Phoenician".
4655          */
4656         PHOENICIAN,
4657 
4658         /**
4659          * Unicode script "Phags_Pa".
4660          */
4661         PHAGS_PA,
4662 
4663         /**
4664          * Unicode script "Nko".
4665          */
4666         NKO,
4667 
4668         /**
4669          * Unicode script "Sundanese".
4670          */
4671         SUNDANESE,
4672 
4673         /**
4674          * Unicode script "Batak".
4675          */
4676         BATAK,
4677 
4678         /**
4679          * Unicode script "Lepcha".
4680          */
4681         LEPCHA,
4682 
4683         /**
4684          * Unicode script "Ol_Chiki".
4685          */
4686         OL_CHIKI,
4687 
4688         /**
4689          * Unicode script "Vai".
4690          */
4691         VAI,
4692 
4693         /**
4694          * Unicode script "Saurashtra".
4695          */
4696         SAURASHTRA,
4697 
4698         /**
4699          * Unicode script "Kayah_Li".
4700          */
4701         KAYAH_LI,
4702 
4703         /**
4704          * Unicode script "Rejang".
4705          */
4706         REJANG,
4707 
4708         /**
4709          * Unicode script "Lycian".
4710          */
4711         LYCIAN,
4712 
4713         /**
4714          * Unicode script "Carian".
4715          */
4716         CARIAN,
4717 
4718         /**
4719          * Unicode script "Lydian".
4720          */
4721         LYDIAN,
4722 
4723         /**
4724          * Unicode script "Cham".
4725          */
4726         CHAM,
4727 
4728         /**
4729          * Unicode script "Tai_Tham".
4730          */
4731         TAI_THAM,
4732 
4733         /**
4734          * Unicode script "Tai_Viet".
4735          */
4736         TAI_VIET,
4737 
4738         /**
4739          * Unicode script "Avestan".
4740          */
4741         AVESTAN,
4742 
4743         /**
4744          * Unicode script "Egyptian_Hieroglyphs".
4745          */
4746         EGYPTIAN_HIEROGLYPHS,
4747 
4748         /**
4749          * Unicode script "Samaritan".
4750          */
4751         SAMARITAN,
4752 
4753         /**
4754          * Unicode script "Mandaic".
4755          */
4756         MANDAIC,
4757 
4758         /**
4759          * Unicode script "Lisu".
4760          */
4761         LISU,
4762 
4763         /**
4764          * Unicode script "Bamum".
4765          */
4766         BAMUM,
4767 
4768         /**
4769          * Unicode script "Javanese".
4770          */
4771         JAVANESE,
4772 
4773         /**
4774          * Unicode script "Meetei_Mayek".
4775          */
4776         MEETEI_MAYEK,
4777 
4778         /**
4779          * Unicode script "Imperial_Aramaic".
4780          */
4781         IMPERIAL_ARAMAIC,
4782 
4783         /**
4784          * Unicode script "Old_South_Arabian".
4785          */
4786         OLD_SOUTH_ARABIAN,
4787 
4788         /**
4789          * Unicode script "Inscriptional_Parthian".
4790          */
4791         INSCRIPTIONAL_PARTHIAN,
4792 
4793         /**
4794          * Unicode script "Inscriptional_Pahlavi".
4795          */
4796         INSCRIPTIONAL_PAHLAVI,
4797 
4798         /**
4799          * Unicode script "Old_Turkic".
4800          */
4801         OLD_TURKIC,
4802 
4803         /**
4804          * Unicode script "Brahmi".
4805          */
4806         BRAHMI,
4807 
4808         /**
4809          * Unicode script "Kaithi".
4810          */
4811         KAITHI,
4812 
4813         /**
4814          * Unicode script "Meroitic Hieroglyphs".
4815          * @since 1.8
4816          */
4817         MEROITIC_HIEROGLYPHS,
4818 
4819         /**
4820          * Unicode script "Meroitic Cursive".
4821          * @since 1.8
4822          */
4823         MEROITIC_CURSIVE,
4824 
4825         /**
4826          * Unicode script "Sora Sompeng".
4827          * @since 1.8
4828          */
4829         SORA_SOMPENG,
4830 
4831         /**
4832          * Unicode script "Chakma".
4833          * @since 1.8
4834          */
4835         CHAKMA,
4836 
4837         /**
4838          * Unicode script "Sharada".
4839          * @since 1.8
4840          */
4841         SHARADA,
4842 
4843         /**
4844          * Unicode script "Takri".
4845          * @since 1.8
4846          */
4847         TAKRI,
4848 
4849         /**
4850          * Unicode script "Miao".
4851          * @since 1.8
4852          */
4853         MIAO,
4854 
4855         /**
4856          * Unicode script "Caucasian Albanian".
4857          * @since 9
4858          */
4859         CAUCASIAN_ALBANIAN,
4860 
4861         /**
4862          * Unicode script "Bassa Vah".
4863          * @since 9
4864          */
4865         BASSA_VAH,
4866 
4867         /**
4868          * Unicode script "Duployan".
4869          * @since 9
4870          */
4871         DUPLOYAN,
4872 
4873         /**
4874          * Unicode script "Elbasan".
4875          * @since 9
4876          */
4877         ELBASAN,
4878 
4879         /**
4880          * Unicode script "Grantha".
4881          * @since 9
4882          */
4883         GRANTHA,
4884 
4885         /**
4886          * Unicode script "Pahawh Hmong".
4887          * @since 9
4888          */
4889         PAHAWH_HMONG,
4890 
4891         /**
4892          * Unicode script "Khojki".
4893          * @since 9
4894          */
4895         KHOJKI,
4896 
4897         /**
4898          * Unicode script "Linear A".
4899          * @since 9
4900          */
4901         LINEAR_A,
4902 
4903         /**
4904          * Unicode script "Mahajani".
4905          * @since 9
4906          */
4907         MAHAJANI,
4908 
4909         /**
4910          * Unicode script "Manichaean".
4911          * @since 9
4912          */
4913         MANICHAEAN,
4914 
4915         /**
4916          * Unicode script "Mende Kikakui".
4917          * @since 9
4918          */
4919         MENDE_KIKAKUI,
4920 
4921         /**
4922          * Unicode script "Modi".
4923          * @since 9
4924          */
4925         MODI,
4926 
4927         /**
4928          * Unicode script "Mro".
4929          * @since 9
4930          */
4931         MRO,
4932 
4933         /**
4934          * Unicode script "Old North Arabian".
4935          * @since 9
4936          */
4937         OLD_NORTH_ARABIAN,
4938 
4939         /**
4940          * Unicode script "Nabataean".
4941          * @since 9
4942          */
4943         NABATAEAN,
4944 
4945         /**
4946          * Unicode script "Palmyrene".
4947          * @since 9
4948          */
4949         PALMYRENE,
4950 
4951         /**
4952          * Unicode script "Pau Cin Hau".
4953          * @since 9
4954          */
4955         PAU_CIN_HAU,
4956 
4957         /**
4958          * Unicode script "Old Permic".
4959          * @since 9
4960          */
4961         OLD_PERMIC,
4962 
4963         /**
4964          * Unicode script "Psalter Pahlavi".
4965          * @since 9
4966          */
4967         PSALTER_PAHLAVI,
4968 
4969         /**
4970          * Unicode script "Siddham".
4971          * @since 9
4972          */
4973         SIDDHAM,
4974 
4975         /**
4976          * Unicode script "Khudawadi".
4977          * @since 9
4978          */
4979         KHUDAWADI,
4980 
4981         /**
4982          * Unicode script "Tirhuta".
4983          * @since 9
4984          */
4985         TIRHUTA,
4986 
4987         /**
4988          * Unicode script "Warang Citi".
4989          * @since 9
4990          */
4991         WARANG_CITI,
4992 
4993         /**
4994          * Unicode script "Ahom".
4995          * @since 9
4996          */
4997         AHOM,
4998 
4999         /**
5000          * Unicode script "Anatolian Hieroglyphs".
5001          * @since 9
5002          */
5003         ANATOLIAN_HIEROGLYPHS,
5004 
5005         /**
5006          * Unicode script "Hatran".
5007          * @since 9
5008          */
5009         HATRAN,
5010 
5011         /**
5012          * Unicode script "Multani".
5013          * @since 9
5014          */
5015         MULTANI,
5016 
5017         /**
5018          * Unicode script "Old Hungarian".
5019          * @since 9
5020          */
5021         OLD_HUNGARIAN,
5022 
5023         /**
5024          * Unicode script "SignWriting".
5025          * @since 9
5026          */
5027         SIGNWRITING,
5028 
5029         /**
5030          * Unicode script "Adlam".
5031          * @since 11
5032          */
5033         ADLAM,
5034 
5035         /**
5036          * Unicode script "Bhaiksuki".
5037          * @since 11
5038          */
5039         BHAIKSUKI,
5040 
5041         /**
5042          * Unicode script "Marchen".
5043          * @since 11
5044          */
5045         MARCHEN,
5046 
5047         /**
5048          * Unicode script "Newa".
5049          * @since 11
5050          */
5051         NEWA,
5052 
5053         /**
5054          * Unicode script "Osage".
5055          * @since 11
5056          */
5057         OSAGE,
5058 
5059         /**
5060          * Unicode script "Tangut".
5061          * @since 11
5062          */
5063         TANGUT,
5064 
5065         /**
5066          * Unicode script "Masaram Gondi".
5067          * @since 11
5068          */
5069         MASARAM_GONDI,
5070 
5071         /**
5072          * Unicode script "Nushu".
5073          * @since 11
5074          */
5075         NUSHU,
5076 
5077         /**
5078          * Unicode script "Soyombo".
5079          * @since 11
5080          */
5081         SOYOMBO,
5082 
5083         /**
5084          * Unicode script "Zanabazar Square".
5085          * @since 11
5086          */
5087         ZANABAZAR_SQUARE,
5088 
5089         /**
5090          * Unicode script "Hanifi Rohingya".
5091          * @since 12
5092          */
5093         HANIFI_ROHINGYA,
5094 
5095         /**
5096          * Unicode script "Old Sogdian".
5097          * @since 12
5098          */
5099         OLD_SOGDIAN,
5100 
5101         /**
5102          * Unicode script "Sogdian".
5103          * @since 12
5104          */
5105         SOGDIAN,
5106 
5107         /**
5108          * Unicode script "Dogra".
5109          * @since 12
5110          */
5111         DOGRA,
5112 
5113         /**
5114          * Unicode script "Gunjala Gondi".
5115          * @since 12
5116          */
5117         GUNJALA_GONDI,
5118 
5119         /**
5120          * Unicode script "Makasar".
5121          * @since 12
5122          */
5123         MAKASAR,
5124 
5125         /**
5126          * Unicode script "Medefaidrin".
5127          * @since 12
5128          */
5129         MEDEFAIDRIN,
5130 
5131         /**
5132          * Unicode script "Elymaic".
5133          * @since 13
5134          */
5135         ELYMAIC,
5136 
5137         /**
5138          * Unicode script "Nandinagari".
5139          * @since 13
5140          */
5141         NANDINAGARI,
5142 
5143         /**
5144          * Unicode script "Nyiakeng Puachue Hmong".
5145          * @since 13
5146          */
5147         NYIAKENG_PUACHUE_HMONG,
5148 
5149         /**
5150          * Unicode script "Wancho".
5151          * @since 13
5152          */
5153         WANCHO,
5154 
5155         /**
5156          * Unicode script "Yezidi".
5157          * @since 15
5158          */
5159         YEZIDI,
5160 
5161         /**
5162          * Unicode script "Chorasmian".
5163          * @since 15
5164          */
5165         CHORASMIAN,
5166 
5167         /**
5168          * Unicode script "Dives Akuru".
5169          * @since 15
5170          */
5171         DIVES_AKURU,
5172 
5173         /**
5174          * Unicode script "Khitan Small Script".
5175          * @since 15
5176          */
5177         KHITAN_SMALL_SCRIPT,
5178 
5179         /**
5180          * Unicode script "Unknown".
5181          */
5182         UNKNOWN;
5183 
5184         private static final int[] scriptStarts = {
5185             0x0000,   // 0000..0040; COMMON
5186             0x0041,   // 0041..005A; LATIN
5187             0x005B,   // 005B..0060; COMMON
5188             0x0061,   // 0061..007A; LATIN
5189             0x007B,   // 007B..00A9; COMMON
5190             0x00AA,   // 00AA      ; LATIN
5191             0x00AB,   // 00AB..00B9; COMMON
5192             0x00BA,   // 00BA      ; LATIN
5193             0x00BB,   // 00BB..00BF; COMMON
5194             0x00C0,   // 00C0..00D6; LATIN
5195             0x00D7,   // 00D7      ; COMMON
5196             0x00D8,   // 00D8..00F6; LATIN
5197             0x00F7,   // 00F7      ; COMMON
5198             0x00F8,   // 00F8..02B8; LATIN
5199             0x02B9,   // 02B9..02DF; COMMON
5200             0x02E0,   // 02E0..02E4; LATIN
5201             0x02E5,   // 02E5..02E9; COMMON
5202             0x02EA,   // 02EA..02EB; BOPOMOFO
5203             0x02EC,   // 02EC..02FF; COMMON
5204             0x0300,   // 0300..036F; INHERITED
5205             0x0370,   // 0370..0373; GREEK
5206             0x0374,   // 0374      ; COMMON
5207             0x0375,   // 0375..0377; GREEK
5208             0x0378,   // 0378..0379; UNKNOWN
5209             0x037A,   // 037A..037D; GREEK
5210             0x037E,   // 037E      ; COMMON
5211             0x037F,   // 037F      ; GREEK
5212             0x0380,   // 0380..0383; UNKNOWN
5213             0x0384,   // 0384      ; GREEK
5214             0x0385,   // 0385      ; COMMON
5215             0x0386,   // 0386      ; GREEK
5216             0x0387,   // 0387      ; COMMON
5217             0x0388,   // 0388..038A; GREEK
5218             0x038B,   // 038B      ; UNKNOWN
5219             0x038C,   // 038C      ; GREEK
5220             0x038D,   // 038D      ; UNKNOWN
5221             0x038E,   // 038E..03A1; GREEK
5222             0x03A2,   // 03A2      ; UNKNOWN
5223             0x03A3,   // 03A3..03E1; GREEK
5224             0x03E2,   // 03E2..03EF; COPTIC
5225             0x03F0,   // 03F0..03FF; GREEK
5226             0x0400,   // 0400..0484; CYRILLIC
5227             0x0485,   // 0485..0486; INHERITED
5228             0x0487,   // 0487..052F; CYRILLIC
5229             0x0530,   // 0530      ; UNKNOWN
5230             0x0531,   // 0531..0556; ARMENIAN
5231             0x0557,   // 0557..0558; UNKNOWN
5232             0x0559,   // 0559..058A; ARMENIAN
5233             0x058B,   // 058B..058C; UNKNOWN
5234             0x058D,   // 058D..058F; ARMENIAN
5235             0x0590,   // 0590      ; UNKNOWN
5236             0x0591,   // 0591..05C7; HEBREW
5237             0x05C8,   // 05C8..05CF; UNKNOWN
5238             0x05D0,   // 05D0..05EA; HEBREW
5239             0x05EB,   // 05EB..05EE; UNKNOWN
5240             0x05EF,   // 05EF..05F4; HEBREW
5241             0x05F5,   // 05F5..05FF; UNKNOWN
5242             0x0600,   // 0600..0604; ARABIC
5243             0x0605,   // 0605      ; COMMON
5244             0x0606,   // 0606..060B; ARABIC
5245             0x060C,   // 060C      ; COMMON
5246             0x060D,   // 060D..061A; ARABIC
5247             0x061B,   // 061B      ; COMMON
5248             0x061C,   // 061C      ; ARABIC
5249             0x061D,   // 061D      ; UNKNOWN
5250             0x061E,   // 061E      ; ARABIC
5251             0x061F,   // 061F      ; COMMON
5252             0x0620,   // 0620..063F; ARABIC
5253             0x0640,   // 0640      ; COMMON
5254             0x0641,   // 0641..064A; ARABIC
5255             0x064B,   // 064B..0655; INHERITED
5256             0x0656,   // 0656..066F; ARABIC
5257             0x0670,   // 0670      ; INHERITED
5258             0x0671,   // 0671..06DC; ARABIC
5259             0x06DD,   // 06DD      ; COMMON
5260             0x06DE,   // 06DE..06FF; ARABIC
5261             0x0700,   // 0700..070D; SYRIAC
5262             0x070E,   // 070E      ; UNKNOWN
5263             0x070F,   // 070F..074A; SYRIAC
5264             0x074B,   // 074B..074C; UNKNOWN
5265             0x074D,   // 074D..074F; SYRIAC
5266             0x0750,   // 0750..077F; ARABIC
5267             0x0780,   // 0780..07B1; THAANA
5268             0x07B2,   // 07B2..07BF; UNKNOWN
5269             0x07C0,   // 07C0..07FA; NKO
5270             0x07FB,   // 07FB..07FC; UNKNOWN
5271             0x07FD,   // 07FD..07FF; NKO
5272             0x0800,   // 0800..082D; SAMARITAN
5273             0x082E,   // 082E..082F; UNKNOWN
5274             0x0830,   // 0830..083E; SAMARITAN
5275             0x083F,   // 083F      ; UNKNOWN
5276             0x0840,   // 0840..085B; MANDAIC
5277             0x085C,   // 085C..085D; UNKNOWN
5278             0x085E,   // 085E      ; MANDAIC
5279             0x085F,   // 085F      ; UNKNOWN
5280             0x0860,   // 0860..086A; SYRIAC
5281             0x086B,   // 086B..089F; UNKNOWN
5282             0x08A0,   // 08A0..08B4; ARABIC
5283             0x08B5,   // 08B5      ; UNKNOWN
5284             0x08B6,   // 08B6..08C7; ARABIC
5285             0x08C8,   // 08C8..08D2; UNKNOWN
5286             0x08D3,   // 08D3..08E1; ARABIC
5287             0x08E2,   // 08E2      ; COMMON
5288             0x08E3,   // 08E3..08FF; ARABIC
5289             0x0900,   // 0900..0950; DEVANAGARI
5290             0x0951,   // 0951..0954; INHERITED
5291             0x0955,   // 0955..0963; DEVANAGARI
5292             0x0964,   // 0964..0965; COMMON
5293             0x0966,   // 0966..097F; DEVANAGARI
5294             0x0980,   // 0980..0983; BENGALI
5295             0x0984,   // 0984      ; UNKNOWN
5296             0x0985,   // 0985..098C; BENGALI
5297             0x098D,   // 098D..098E; UNKNOWN
5298             0x098F,   // 098F..0990; BENGALI
5299             0x0991,   // 0991..0992; UNKNOWN
5300             0x0993,   // 0993..09A8; BENGALI
5301             0x09A9,   // 09A9      ; UNKNOWN
5302             0x09AA,   // 09AA..09B0; BENGALI
5303             0x09B1,   // 09B1      ; UNKNOWN
5304             0x09B2,   // 09B2      ; BENGALI
5305             0x09B3,   // 09B3..09B5; UNKNOWN
5306             0x09B6,   // 09B6..09B9; BENGALI
5307             0x09BA,   // 09BA..09BB; UNKNOWN
5308             0x09BC,   // 09BC..09C4; BENGALI
5309             0x09C5,   // 09C5..09C6; UNKNOWN
5310             0x09C7,   // 09C7..09C8; BENGALI
5311             0x09C9,   // 09C9..09CA; UNKNOWN
5312             0x09CB,   // 09CB..09CE; BENGALI
5313             0x09CF,   // 09CF..09D6; UNKNOWN
5314             0x09D7,   // 09D7      ; BENGALI
5315             0x09D8,   // 09D8..09DB; UNKNOWN
5316             0x09DC,   // 09DC..09DD; BENGALI
5317             0x09DE,   // 09DE      ; UNKNOWN
5318             0x09DF,   // 09DF..09E3; BENGALI
5319             0x09E4,   // 09E4..09E5; UNKNOWN
5320             0x09E6,   // 09E6..09FE; BENGALI
5321             0x09FF,   // 09FF..0A00; UNKNOWN
5322             0x0A01,   // 0A01..0A03; GURMUKHI
5323             0x0A04,   // 0A04      ; UNKNOWN
5324             0x0A05,   // 0A05..0A0A; GURMUKHI
5325             0x0A0B,   // 0A0B..0A0E; UNKNOWN
5326             0x0A0F,   // 0A0F..0A10; GURMUKHI
5327             0x0A11,   // 0A11..0A12; UNKNOWN
5328             0x0A13,   // 0A13..0A28; GURMUKHI
5329             0x0A29,   // 0A29      ; UNKNOWN
5330             0x0A2A,   // 0A2A..0A30; GURMUKHI
5331             0x0A31,   // 0A31      ; UNKNOWN
5332             0x0A32,   // 0A32..0A33; GURMUKHI
5333             0x0A34,   // 0A34      ; UNKNOWN
5334             0x0A35,   // 0A35..0A36; GURMUKHI
5335             0x0A37,   // 0A37      ; UNKNOWN
5336             0x0A38,   // 0A38..0A39; GURMUKHI
5337             0x0A3A,   // 0A3A..0A3B; UNKNOWN
5338             0x0A3C,   // 0A3C      ; GURMUKHI
5339             0x0A3D,   // 0A3D      ; UNKNOWN
5340             0x0A3E,   // 0A3E..0A42; GURMUKHI
5341             0x0A43,   // 0A43..0A46; UNKNOWN
5342             0x0A47,   // 0A47..0A48; GURMUKHI
5343             0x0A49,   // 0A49..0A4A; UNKNOWN
5344             0x0A4B,   // 0A4B..0A4D; GURMUKHI
5345             0x0A4E,   // 0A4E..0A50; UNKNOWN
5346             0x0A51,   // 0A51      ; GURMUKHI
5347             0x0A52,   // 0A52..0A58; UNKNOWN
5348             0x0A59,   // 0A59..0A5C; GURMUKHI
5349             0x0A5D,   // 0A5D      ; UNKNOWN
5350             0x0A5E,   // 0A5E      ; GURMUKHI
5351             0x0A5F,   // 0A5F..0A65; UNKNOWN
5352             0x0A66,   // 0A66..0A76; GURMUKHI
5353             0x0A77,   // 0A77..0A80; UNKNOWN
5354             0x0A81,   // 0A81..0A83; GUJARATI
5355             0x0A84,   // 0A84      ; UNKNOWN
5356             0x0A85,   // 0A85..0A8D; GUJARATI
5357             0x0A8E,   // 0A8E      ; UNKNOWN
5358             0x0A8F,   // 0A8F..0A91; GUJARATI
5359             0x0A92,   // 0A92      ; UNKNOWN
5360             0x0A93,   // 0A93..0AA8; GUJARATI
5361             0x0AA9,   // 0AA9      ; UNKNOWN
5362             0x0AAA,   // 0AAA..0AB0; GUJARATI
5363             0x0AB1,   // 0AB1      ; UNKNOWN
5364             0x0AB2,   // 0AB2..0AB3; GUJARATI
5365             0x0AB4,   // 0AB4      ; UNKNOWN
5366             0x0AB5,   // 0AB5..0AB9; GUJARATI
5367             0x0ABA,   // 0ABA..0ABB; UNKNOWN
5368             0x0ABC,   // 0ABC..0AC5; GUJARATI
5369             0x0AC6,   // 0AC6      ; UNKNOWN
5370             0x0AC7,   // 0AC7..0AC9; GUJARATI
5371             0x0ACA,   // 0ACA      ; UNKNOWN
5372             0x0ACB,   // 0ACB..0ACD; GUJARATI
5373             0x0ACE,   // 0ACE..0ACF; UNKNOWN
5374             0x0AD0,   // 0AD0      ; GUJARATI
5375             0x0AD1,   // 0AD1..0ADF; UNKNOWN
5376             0x0AE0,   // 0AE0..0AE3; GUJARATI
5377             0x0AE4,   // 0AE4..0AE5; UNKNOWN
5378             0x0AE6,   // 0AE6..0AF1; GUJARATI
5379             0x0AF2,   // 0AF2..0AF8; UNKNOWN
5380             0x0AF9,   // 0AF9..0AFF; GUJARATI
5381             0x0B00,   // 0B00      ; UNKNOWN
5382             0x0B01,   // 0B01..0B03; ORIYA
5383             0x0B04,   // 0B04      ; UNKNOWN
5384             0x0B05,   // 0B05..0B0C; ORIYA
5385             0x0B0D,   // 0B0D..0B0E; UNKNOWN
5386             0x0B0F,   // 0B0F..0B10; ORIYA
5387             0x0B11,   // 0B11..0B12; UNKNOWN
5388             0x0B13,   // 0B13..0B28; ORIYA
5389             0x0B29,   // 0B29      ; UNKNOWN
5390             0x0B2A,   // 0B2A..0B30; ORIYA
5391             0x0B31,   // 0B31      ; UNKNOWN
5392             0x0B32,   // 0B32..0B33; ORIYA
5393             0x0B34,   // 0B34      ; UNKNOWN
5394             0x0B35,   // 0B35..0B39; ORIYA
5395             0x0B3A,   // 0B3A..0B3B; UNKNOWN
5396             0x0B3C,   // 0B3C..0B44; ORIYA
5397             0x0B45,   // 0B45..0B46; UNKNOWN
5398             0x0B47,   // 0B47..0B48; ORIYA
5399             0x0B49,   // 0B49..0B4A; UNKNOWN
5400             0x0B4B,   // 0B4B..0B4D; ORIYA
5401             0x0B4E,   // 0B4E..0B54; UNKNOWN
5402             0x0B55,   // 0B55..0B57; ORIYA
5403             0x0B58,   // 0B58..0B5B; UNKNOWN
5404             0x0B5C,   // 0B5C..0B5D; ORIYA
5405             0x0B5E,   // 0B5E      ; UNKNOWN
5406             0x0B5F,   // 0B5F..0B63; ORIYA
5407             0x0B64,   // 0B64..0B65; UNKNOWN
5408             0x0B66,   // 0B66..0B77; ORIYA
5409             0x0B78,   // 0B78..0B81; UNKNOWN
5410             0x0B82,   // 0B82..0B83; TAMIL
5411             0x0B84,   // 0B84      ; UNKNOWN
5412             0x0B85,   // 0B85..0B8A; TAMIL
5413             0x0B8B,   // 0B8B..0B8D; UNKNOWN
5414             0x0B8E,   // 0B8E..0B90; TAMIL
5415             0x0B91,   // 0B91      ; UNKNOWN
5416             0x0B92,   // 0B92..0B95; TAMIL
5417             0x0B96,   // 0B96..0B98; UNKNOWN
5418             0x0B99,   // 0B99..0B9A; TAMIL
5419             0x0B9B,   // 0B9B      ; UNKNOWN
5420             0x0B9C,   // 0B9C      ; TAMIL
5421             0x0B9D,   // 0B9D      ; UNKNOWN
5422             0x0B9E,   // 0B9E..0B9F; TAMIL
5423             0x0BA0,   // 0BA0..0BA2; UNKNOWN
5424             0x0BA3,   // 0BA3..0BA4; TAMIL
5425             0x0BA5,   // 0BA5..0BA7; UNKNOWN
5426             0x0BA8,   // 0BA8..0BAA; TAMIL
5427             0x0BAB,   // 0BAB..0BAD; UNKNOWN
5428             0x0BAE,   // 0BAE..0BB9; TAMIL
5429             0x0BBA,   // 0BBA..0BBD; UNKNOWN
5430             0x0BBE,   // 0BBE..0BC2; TAMIL
5431             0x0BC3,   // 0BC3..0BC5; UNKNOWN
5432             0x0BC6,   // 0BC6..0BC8; TAMIL
5433             0x0BC9,   // 0BC9      ; UNKNOWN
5434             0x0BCA,   // 0BCA..0BCD; TAMIL
5435             0x0BCE,   // 0BCE..0BCF; UNKNOWN
5436             0x0BD0,   // 0BD0      ; TAMIL
5437             0x0BD1,   // 0BD1..0BD6; UNKNOWN
5438             0x0BD7,   // 0BD7      ; TAMIL
5439             0x0BD8,   // 0BD8..0BE5; UNKNOWN
5440             0x0BE6,   // 0BE6..0BFA; TAMIL
5441             0x0BFB,   // 0BFB..0BFF; UNKNOWN
5442             0x0C00,   // 0C00..0C0C; TELUGU
5443             0x0C0D,   // 0C0D      ; UNKNOWN
5444             0x0C0E,   // 0C0E..0C10; TELUGU
5445             0x0C11,   // 0C11      ; UNKNOWN
5446             0x0C12,   // 0C12..0C28; TELUGU
5447             0x0C29,   // 0C29      ; UNKNOWN
5448             0x0C2A,   // 0C2A..0C39; TELUGU
5449             0x0C3A,   // 0C3A..0C3C; UNKNOWN
5450             0x0C3D,   // 0C3D..0C44; TELUGU
5451             0x0C45,   // 0C45      ; UNKNOWN
5452             0x0C46,   // 0C46..0C48; TELUGU
5453             0x0C49,   // 0C49      ; UNKNOWN
5454             0x0C4A,   // 0C4A..0C4D; TELUGU
5455             0x0C4E,   // 0C4E..0C54; UNKNOWN
5456             0x0C55,   // 0C55..0C56; TELUGU
5457             0x0C57,   // 0C57      ; UNKNOWN
5458             0x0C58,   // 0C58..0C5A; TELUGU
5459             0x0C5B,   // 0C5B..0C5F; UNKNOWN
5460             0x0C60,   // 0C60..0C63; TELUGU
5461             0x0C64,   // 0C64..0C65; UNKNOWN
5462             0x0C66,   // 0C66..0C6F; TELUGU
5463             0x0C70,   // 0C70..0C76; UNKNOWN
5464             0x0C77,   // 0C77..0C7F; TELUGU
5465             0x0C80,   // 0C80..0C8C; KANNADA
5466             0x0C8D,   // 0C8D      ; UNKNOWN
5467             0x0C8E,   // 0C8E..0C90; KANNADA
5468             0x0C91,   // 0C91      ; UNKNOWN
5469             0x0C92,   // 0C92..0CA8; KANNADA
5470             0x0CA9,   // 0CA9      ; UNKNOWN
5471             0x0CAA,   // 0CAA..0CB3; KANNADA
5472             0x0CB4,   // 0CB4      ; UNKNOWN
5473             0x0CB5,   // 0CB5..0CB9; KANNADA
5474             0x0CBA,   // 0CBA..0CBB; UNKNOWN
5475             0x0CBC,   // 0CBC..0CC4; KANNADA
5476             0x0CC5,   // 0CC5      ; UNKNOWN
5477             0x0CC6,   // 0CC6..0CC8; KANNADA
5478             0x0CC9,   // 0CC9      ; UNKNOWN
5479             0x0CCA,   // 0CCA..0CCD; KANNADA
5480             0x0CCE,   // 0CCE..0CD4; UNKNOWN
5481             0x0CD5,   // 0CD5..0CD6; KANNADA
5482             0x0CD7,   // 0CD7..0CDD; UNKNOWN
5483             0x0CDE,   // 0CDE      ; KANNADA
5484             0x0CDF,   // 0CDF      ; UNKNOWN
5485             0x0CE0,   // 0CE0..0CE3; KANNADA
5486             0x0CE4,   // 0CE4..0CE5; UNKNOWN
5487             0x0CE6,   // 0CE6..0CEF; KANNADA
5488             0x0CF0,   // 0CF0      ; UNKNOWN
5489             0x0CF1,   // 0CF1..0CF2; KANNADA
5490             0x0CF3,   // 0CF3..0CFF; UNKNOWN
5491             0x0D00,   // 0D00..0D0C; MALAYALAM
5492             0x0D0D,   // 0D0D      ; UNKNOWN
5493             0x0D0E,   // 0D0E..0D10; MALAYALAM
5494             0x0D11,   // 0D11      ; UNKNOWN
5495             0x0D12,   // 0D12..0D44; MALAYALAM
5496             0x0D45,   // 0D45      ; UNKNOWN
5497             0x0D46,   // 0D46..0D48; MALAYALAM
5498             0x0D49,   // 0D49      ; UNKNOWN
5499             0x0D4A,   // 0D4A..0D4F; MALAYALAM
5500             0x0D50,   // 0D50..0D53; UNKNOWN
5501             0x0D54,   // 0D54..0D63; MALAYALAM
5502             0x0D64,   // 0D64..0D65; UNKNOWN
5503             0x0D66,   // 0D66..0D7F; MALAYALAM
5504             0x0D80,   // 0D80      ; UNKNOWN
5505             0x0D81,   // 0D81..0D83; SINHALA
5506             0x0D84,   // 0D84      ; UNKNOWN
5507             0x0D85,   // 0D85..0D96; SINHALA
5508             0x0D97,   // 0D97..0D99; UNKNOWN
5509             0x0D9A,   // 0D9A..0DB1; SINHALA
5510             0x0DB2,   // 0DB2      ; UNKNOWN
5511             0x0DB3,   // 0DB3..0DBB; SINHALA
5512             0x0DBC,   // 0DBC      ; UNKNOWN
5513             0x0DBD,   // 0DBD      ; SINHALA
5514             0x0DBE,   // 0DBE..0DBF; UNKNOWN
5515             0x0DC0,   // 0DC0..0DC6; SINHALA
5516             0x0DC7,   // 0DC7..0DC9; UNKNOWN
5517             0x0DCA,   // 0DCA      ; SINHALA
5518             0x0DCB,   // 0DCB..0DCE; UNKNOWN
5519             0x0DCF,   // 0DCF..0DD4; SINHALA
5520             0x0DD5,   // 0DD5      ; UNKNOWN
5521             0x0DD6,   // 0DD6      ; SINHALA
5522             0x0DD7,   // 0DD7      ; UNKNOWN
5523             0x0DD8,   // 0DD8..0DDF; SINHALA
5524             0x0DE0,   // 0DE0..0DE5; UNKNOWN
5525             0x0DE6,   // 0DE6..0DEF; SINHALA
5526             0x0DF0,   // 0DF0..0DF1; UNKNOWN
5527             0x0DF2,   // 0DF2..0DF4; SINHALA
5528             0x0DF5,   // 0DF5..0E00; UNKNOWN
5529             0x0E01,   // 0E01..0E3A; THAI
5530             0x0E3B,   // 0E3B..0E3E; UNKNOWN
5531             0x0E3F,   // 0E3F      ; COMMON
5532             0x0E40,   // 0E40..0E5B; THAI
5533             0x0E5C,   // 0E5C..0E80; UNKNOWN
5534             0x0E81,   // 0E81..0E82; LAO
5535             0x0E83,   // 0E83      ; UNKNOWN
5536             0x0E84,   // 0E84      ; LAO
5537             0x0E85,   // 0E85      ; UNKNOWN
5538             0x0E86,   // 0E86..0E8A; LAO
5539             0x0E8B,   // 0E8B      ; UNKNOWN
5540             0x0E8C,   // 0E8C..0EA3; LAO
5541             0x0EA4,   // 0EA4      ; UNKNOWN
5542             0x0EA5,   // 0EA5      ; LAO
5543             0x0EA6,   // 0EA6      ; UNKNOWN
5544             0x0EA7,   // 0EA7..0EBD; LAO
5545             0x0EBE,   // 0EBE..0EBF; UNKNOWN
5546             0x0EC0,   // 0EC0..0EC4; LAO
5547             0x0EC5,   // 0EC5      ; UNKNOWN
5548             0x0EC6,   // 0EC6      ; LAO
5549             0x0EC7,   // 0EC7      ; UNKNOWN
5550             0x0EC8,   // 0EC8..0ECD; LAO
5551             0x0ECE,   // 0ECE..0ECF; UNKNOWN
5552             0x0ED0,   // 0ED0..0ED9; LAO
5553             0x0EDA,   // 0EDA..0EDB; UNKNOWN
5554             0x0EDC,   // 0EDC..0EDF; LAO
5555             0x0EE0,   // 0EE0..0EFF; UNKNOWN
5556             0x0F00,   // 0F00..0F47; TIBETAN
5557             0x0F48,   // 0F48      ; UNKNOWN
5558             0x0F49,   // 0F49..0F6C; TIBETAN
5559             0x0F6D,   // 0F6D..0F70; UNKNOWN
5560             0x0F71,   // 0F71..0F97; TIBETAN
5561             0x0F98,   // 0F98      ; UNKNOWN
5562             0x0F99,   // 0F99..0FBC; TIBETAN
5563             0x0FBD,   // 0FBD      ; UNKNOWN
5564             0x0FBE,   // 0FBE..0FCC; TIBETAN
5565             0x0FCD,   // 0FCD      ; UNKNOWN
5566             0x0FCE,   // 0FCE..0FD4; TIBETAN
5567             0x0FD5,   // 0FD5..0FD8; COMMON
5568             0x0FD9,   // 0FD9..0FDA; TIBETAN
5569             0x0FDB,   // 0FDB..0FFF; UNKNOWN
5570             0x1000,   // 1000..109F; MYANMAR
5571             0x10A0,   // 10A0..10C5; GEORGIAN
5572             0x10C6,   // 10C6      ; UNKNOWN
5573             0x10C7,   // 10C7      ; GEORGIAN
5574             0x10C8,   // 10C8..10CC; UNKNOWN
5575             0x10CD,   // 10CD      ; GEORGIAN
5576             0x10CE,   // 10CE..10CF; UNKNOWN
5577             0x10D0,   // 10D0..10FA; GEORGIAN
5578             0x10FB,   // 10FB      ; COMMON
5579             0x10FC,   // 10FC..10FF; GEORGIAN
5580             0x1100,   // 1100..11FF; HANGUL
5581             0x1200,   // 1200..1248; ETHIOPIC
5582             0x1249,   // 1249      ; UNKNOWN
5583             0x124A,   // 124A..124D; ETHIOPIC
5584             0x124E,   // 124E..124F; UNKNOWN
5585             0x1250,   // 1250..1256; ETHIOPIC
5586             0x1257,   // 1257      ; UNKNOWN
5587             0x1258,   // 1258      ; ETHIOPIC
5588             0x1259,   // 1259      ; UNKNOWN
5589             0x125A,   // 125A..125D; ETHIOPIC
5590             0x125E,   // 125E..125F; UNKNOWN
5591             0x1260,   // 1260..1288; ETHIOPIC
5592             0x1289,   // 1289      ; UNKNOWN
5593             0x128A,   // 128A..128D; ETHIOPIC
5594             0x128E,   // 128E..128F; UNKNOWN
5595             0x1290,   // 1290..12B0; ETHIOPIC
5596             0x12B1,   // 12B1      ; UNKNOWN
5597             0x12B2,   // 12B2..12B5; ETHIOPIC
5598             0x12B6,   // 12B6..12B7; UNKNOWN
5599             0x12B8,   // 12B8..12BE; ETHIOPIC
5600             0x12BF,   // 12BF      ; UNKNOWN
5601             0x12C0,   // 12C0      ; ETHIOPIC
5602             0x12C1,   // 12C1      ; UNKNOWN
5603             0x12C2,   // 12C2..12C5; ETHIOPIC
5604             0x12C6,   // 12C6..12C7; UNKNOWN
5605             0x12C8,   // 12C8..12D6; ETHIOPIC
5606             0x12D7,   // 12D7      ; UNKNOWN
5607             0x12D8,   // 12D8..1310; ETHIOPIC
5608             0x1311,   // 1311      ; UNKNOWN
5609             0x1312,   // 1312..1315; ETHIOPIC
5610             0x1316,   // 1316..1317; UNKNOWN
5611             0x1318,   // 1318..135A; ETHIOPIC
5612             0x135B,   // 135B..135C; UNKNOWN
5613             0x135D,   // 135D..137C; ETHIOPIC
5614             0x137D,   // 137D..137F; UNKNOWN
5615             0x1380,   // 1380..1399; ETHIOPIC
5616             0x139A,   // 139A..139F; UNKNOWN
5617             0x13A0,   // 13A0..13F5; CHEROKEE
5618             0x13F6,   // 13F6..13F7; UNKNOWN
5619             0x13F8,   // 13F8..13FD; CHEROKEE
5620             0x13FE,   // 13FE..13FF; UNKNOWN
5621             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
5622             0x1680,   // 1680..169C; OGHAM
5623             0x169D,   // 169D..169F; UNKNOWN
5624             0x16A0,   // 16A0..16EA; RUNIC
5625             0x16EB,   // 16EB..16ED; COMMON
5626             0x16EE,   // 16EE..16F8; RUNIC
5627             0x16F9,   // 16F9..16FF; UNKNOWN
5628             0x1700,   // 1700..170C; TAGALOG
5629             0x170D,   // 170D      ; UNKNOWN
5630             0x170E,   // 170E..1714; TAGALOG
5631             0x1715,   // 1715..171F; UNKNOWN
5632             0x1720,   // 1720..1734; HANUNOO
5633             0x1735,   // 1735..1736; COMMON
5634             0x1737,   // 1737..173F; UNKNOWN
5635             0x1740,   // 1740..1753; BUHID
5636             0x1754,   // 1754..175F; UNKNOWN
5637             0x1760,   // 1760..176C; TAGBANWA
5638             0x176D,   // 176D      ; UNKNOWN
5639             0x176E,   // 176E..1770; TAGBANWA
5640             0x1771,   // 1771      ; UNKNOWN
5641             0x1772,   // 1772..1773; TAGBANWA
5642             0x1774,   // 1774..177F; UNKNOWN
5643             0x1780,   // 1780..17DD; KHMER
5644             0x17DE,   // 17DE..17DF; UNKNOWN
5645             0x17E0,   // 17E0..17E9; KHMER
5646             0x17EA,   // 17EA..17EF; UNKNOWN
5647             0x17F0,   // 17F0..17F9; KHMER
5648             0x17FA,   // 17FA..17FF; UNKNOWN
5649             0x1800,   // 1800..1801; MONGOLIAN
5650             0x1802,   // 1802..1803; COMMON
5651             0x1804,   // 1804      ; MONGOLIAN
5652             0x1805,   // 1805      ; COMMON
5653             0x1806,   // 1806..180E; MONGOLIAN
5654             0x180F,   // 180F      ; UNKNOWN
5655             0x1810,   // 1810..1819; MONGOLIAN
5656             0x181A,   // 181A..181F; UNKNOWN
5657             0x1820,   // 1820..1878; MONGOLIAN
5658             0x1879,   // 1879..187F; UNKNOWN
5659             0x1880,   // 1880..18AA; MONGOLIAN
5660             0x18AB,   // 18AB..18AF; UNKNOWN
5661             0x18B0,   // 18B0..18F5; CANADIAN_ABORIGINAL
5662             0x18F6,   // 18F6..18FF; UNKNOWN
5663             0x1900,   // 1900..191E; LIMBU
5664             0x191F,   // 191F      ; UNKNOWN
5665             0x1920,   // 1920..192B; LIMBU
5666             0x192C,   // 192C..192F; UNKNOWN
5667             0x1930,   // 1930..193B; LIMBU
5668             0x193C,   // 193C..193F; UNKNOWN
5669             0x1940,   // 1940      ; LIMBU
5670             0x1941,   // 1941..1943; UNKNOWN
5671             0x1944,   // 1944..194F; LIMBU
5672             0x1950,   // 1950..196D; TAI_LE
5673             0x196E,   // 196E..196F; UNKNOWN
5674             0x1970,   // 1970..1974; TAI_LE
5675             0x1975,   // 1975..197F; UNKNOWN
5676             0x1980,   // 1980..19AB; NEW_TAI_LUE
5677             0x19AC,   // 19AC..19AF; UNKNOWN
5678             0x19B0,   // 19B0..19C9; NEW_TAI_LUE
5679             0x19CA,   // 19CA..19CF; UNKNOWN
5680             0x19D0,   // 19D0..19DA; NEW_TAI_LUE
5681             0x19DB,   // 19DB..19DD; UNKNOWN
5682             0x19DE,   // 19DE..19DF; NEW_TAI_LUE
5683             0x19E0,   // 19E0..19FF; KHMER
5684             0x1A00,   // 1A00..1A1B; BUGINESE
5685             0x1A1C,   // 1A1C..1A1D; UNKNOWN
5686             0x1A1E,   // 1A1E..1A1F; BUGINESE
5687             0x1A20,   // 1A20..1A5E; TAI_THAM
5688             0x1A5F,   // 1A5F      ; UNKNOWN
5689             0x1A60,   // 1A60..1A7C; TAI_THAM
5690             0x1A7D,   // 1A7D..1A7E; UNKNOWN
5691             0x1A7F,   // 1A7F..1A89; TAI_THAM
5692             0x1A8A,   // 1A8A..1A8F; UNKNOWN
5693             0x1A90,   // 1A90..1A99; TAI_THAM
5694             0x1A9A,   // 1A9A..1A9F; UNKNOWN
5695             0x1AA0,   // 1AA0..1AAD; TAI_THAM
5696             0x1AAE,   // 1AAE..1AAF; UNKNOWN
5697             0x1AB0,   // 1AB0..1AC0; INHERITED
5698             0x1AC1,   // 1AC1..1AFF; UNKNOWN
5699             0x1B00,   // 1B00..1B4B; BALINESE
5700             0x1B4C,   // 1B4C..1B4F; UNKNOWN
5701             0x1B50,   // 1B50..1B7C; BALINESE
5702             0x1B7D,   // 1B7D..1B7F; UNKNOWN
5703             0x1B80,   // 1B80..1BBF; SUNDANESE
5704             0x1BC0,   // 1BC0..1BF3; BATAK
5705             0x1BF4,   // 1BF4..1BFB; UNKNOWN
5706             0x1BFC,   // 1BFC..1BFF; BATAK
5707             0x1C00,   // 1C00..1C37; LEPCHA
5708             0x1C38,   // 1C38..1C3A; UNKNOWN
5709             0x1C3B,   // 1C3B..1C49; LEPCHA
5710             0x1C4A,   // 1C4A..1C4C; UNKNOWN
5711             0x1C4D,   // 1C4D..1C4F; LEPCHA
5712             0x1C50,   // 1C50..1C7F; OL_CHIKI
5713             0x1C80,   // 1C80..1C88; CYRILLIC
5714             0x1C89,   // 1C89..1C8F; UNKNOWN
5715             0x1C90,   // 1C90..1CBA; GEORGIAN
5716             0x1CBB,   // 1CBB..1CBC; UNKNOWN
5717             0x1CBD,   // 1CBD..1CBF; GEORGIAN
5718             0x1CC0,   // 1CC0..1CC7; SUNDANESE
5719             0x1CC8,   // 1CC8..1CCF; UNKNOWN
5720             0x1CD0,   // 1CD0..1CD2; INHERITED
5721             0x1CD3,   // 1CD3      ; COMMON
5722             0x1CD4,   // 1CD4..1CE0; INHERITED
5723             0x1CE1,   // 1CE1      ; COMMON
5724             0x1CE2,   // 1CE2..1CE8; INHERITED
5725             0x1CE9,   // 1CE9..1CEC; COMMON
5726             0x1CED,   // 1CED      ; INHERITED
5727             0x1CEE,   // 1CEE..1CF3; COMMON
5728             0x1CF4,   // 1CF4      ; INHERITED
5729             0x1CF5,   // 1CF5..1CF7; COMMON
5730             0x1CF8,   // 1CF8..1CF9; INHERITED
5731             0x1CFA,   // 1CFA      ; COMMON
5732             0x1CFB,   // 1CFB..1CFF; UNKNOWN
5733             0x1D00,   // 1D00..1D25; LATIN
5734             0x1D26,   // 1D26..1D2A; GREEK
5735             0x1D2B,   // 1D2B      ; CYRILLIC
5736             0x1D2C,   // 1D2C..1D5C; LATIN
5737             0x1D5D,   // 1D5D..1D61; GREEK
5738             0x1D62,   // 1D62..1D65; LATIN
5739             0x1D66,   // 1D66..1D6A; GREEK
5740             0x1D6B,   // 1D6B..1D77; LATIN
5741             0x1D78,   // 1D78      ; CYRILLIC
5742             0x1D79,   // 1D79..1DBE; LATIN
5743             0x1DBF,   // 1DBF      ; GREEK
5744             0x1DC0,   // 1DC0..1DF9; INHERITED
5745             0x1DFA,   // 1DFA      ; UNKNOWN
5746             0x1DFB,   // 1DFB..1DFF; INHERITED
5747             0x1E00,   // 1E00..1EFF; LATIN
5748             0x1F00,   // 1F00..1F15; GREEK
5749             0x1F16,   // 1F16..1F17; UNKNOWN
5750             0x1F18,   // 1F18..1F1D; GREEK
5751             0x1F1E,   // 1F1E..1F1F; UNKNOWN
5752             0x1F20,   // 1F20..1F45; GREEK
5753             0x1F46,   // 1F46..1F47; UNKNOWN
5754             0x1F48,   // 1F48..1F4D; GREEK
5755             0x1F4E,   // 1F4E..1F4F; UNKNOWN
5756             0x1F50,   // 1F50..1F57; GREEK
5757             0x1F58,   // 1F58      ; UNKNOWN
5758             0x1F59,   // 1F59      ; GREEK
5759             0x1F5A,   // 1F5A      ; UNKNOWN
5760             0x1F5B,   // 1F5B      ; GREEK
5761             0x1F5C,   // 1F5C      ; UNKNOWN
5762             0x1F5D,   // 1F5D      ; GREEK
5763             0x1F5E,   // 1F5E      ; UNKNOWN
5764             0x1F5F,   // 1F5F..1F7D; GREEK
5765             0x1F7E,   // 1F7E..1F7F; UNKNOWN
5766             0x1F80,   // 1F80..1FB4; GREEK
5767             0x1FB5,   // 1FB5      ; UNKNOWN
5768             0x1FB6,   // 1FB6..1FC4; GREEK
5769             0x1FC5,   // 1FC5      ; UNKNOWN
5770             0x1FC6,   // 1FC6..1FD3; GREEK
5771             0x1FD4,   // 1FD4..1FD5; UNKNOWN
5772             0x1FD6,   // 1FD6..1FDB; GREEK
5773             0x1FDC,   // 1FDC      ; UNKNOWN
5774             0x1FDD,   // 1FDD..1FEF; GREEK
5775             0x1FF0,   // 1FF0..1FF1; UNKNOWN
5776             0x1FF2,   // 1FF2..1FF4; GREEK
5777             0x1FF5,   // 1FF5      ; UNKNOWN
5778             0x1FF6,   // 1FF6..1FFE; GREEK
5779             0x1FFF,   // 1FFF      ; UNKNOWN
5780             0x2000,   // 2000..200B; COMMON
5781             0x200C,   // 200C..200D; INHERITED
5782             0x200E,   // 200E..2064; COMMON
5783             0x2065,   // 2065      ; UNKNOWN
5784             0x2066,   // 2066..2070; COMMON
5785             0x2071,   // 2071      ; LATIN
5786             0x2072,   // 2072..2073; UNKNOWN
5787             0x2074,   // 2074..207E; COMMON
5788             0x207F,   // 207F      ; LATIN
5789             0x2080,   // 2080..208E; COMMON
5790             0x208F,   // 208F      ; UNKNOWN
5791             0x2090,   // 2090..209C; LATIN
5792             0x209D,   // 209D..209F; UNKNOWN
5793             0x20A0,   // 20A0..20BF; COMMON
5794             0x20C0,   // 20C0..20CF; UNKNOWN
5795             0x20D0,   // 20D0..20F0; INHERITED
5796             0x20F1,   // 20F1..20FF; UNKNOWN
5797             0x2100,   // 2100..2125; COMMON
5798             0x2126,   // 2126      ; GREEK
5799             0x2127,   // 2127..2129; COMMON
5800             0x212A,   // 212A..212B; LATIN
5801             0x212C,   // 212C..2131; COMMON
5802             0x2132,   // 2132      ; LATIN
5803             0x2133,   // 2133..214D; COMMON
5804             0x214E,   // 214E      ; LATIN
5805             0x214F,   // 214F..215F; COMMON
5806             0x2160,   // 2160..2188; LATIN
5807             0x2189,   // 2189..218B; COMMON
5808             0x218C,   // 218C..218F; UNKNOWN
5809             0x2190,   // 2190..2426; COMMON
5810             0x2427,   // 2427..243F; UNKNOWN
5811             0x2440,   // 2440..244A; COMMON
5812             0x244B,   // 244B..245F; UNKNOWN
5813             0x2460,   // 2460..27FF; COMMON
5814             0x2800,   // 2800..28FF; BRAILLE
5815             0x2900,   // 2900..2B73; COMMON
5816             0x2B74,   // 2B74..2B75; UNKNOWN
5817             0x2B76,   // 2B76..2B95; COMMON
5818             0x2B96,   // 2B96      ; UNKNOWN
5819             0x2B97,   // 2B97..2BFF; COMMON
5820             0x2C00,   // 2C00..2C2E; GLAGOLITIC
5821             0x2C2F,   // 2C2F      ; UNKNOWN
5822             0x2C30,   // 2C30..2C5E; GLAGOLITIC
5823             0x2C5F,   // 2C5F      ; UNKNOWN
5824             0x2C60,   // 2C60..2C7F; LATIN
5825             0x2C80,   // 2C80..2CF3; COPTIC
5826             0x2CF4,   // 2CF4..2CF8; UNKNOWN
5827             0x2CF9,   // 2CF9..2CFF; COPTIC
5828             0x2D00,   // 2D00..2D25; GEORGIAN
5829             0x2D26,   // 2D26      ; UNKNOWN
5830             0x2D27,   // 2D27      ; GEORGIAN
5831             0x2D28,   // 2D28..2D2C; UNKNOWN
5832             0x2D2D,   // 2D2D      ; GEORGIAN
5833             0x2D2E,   // 2D2E..2D2F; UNKNOWN
5834             0x2D30,   // 2D30..2D67; TIFINAGH
5835             0x2D68,   // 2D68..2D6E; UNKNOWN
5836             0x2D6F,   // 2D6F..2D70; TIFINAGH
5837             0x2D71,   // 2D71..2D7E; UNKNOWN
5838             0x2D7F,   // 2D7F      ; TIFINAGH
5839             0x2D80,   // 2D80..2D96; ETHIOPIC
5840             0x2D97,   // 2D97..2D9F; UNKNOWN
5841             0x2DA0,   // 2DA0..2DA6; ETHIOPIC
5842             0x2DA7,   // 2DA7      ; UNKNOWN
5843             0x2DA8,   // 2DA8..2DAE; ETHIOPIC
5844             0x2DAF,   // 2DAF      ; UNKNOWN
5845             0x2DB0,   // 2DB0..2DB6; ETHIOPIC
5846             0x2DB7,   // 2DB7      ; UNKNOWN
5847             0x2DB8,   // 2DB8..2DBE; ETHIOPIC
5848             0x2DBF,   // 2DBF      ; UNKNOWN
5849             0x2DC0,   // 2DC0..2DC6; ETHIOPIC
5850             0x2DC7,   // 2DC7      ; UNKNOWN
5851             0x2DC8,   // 2DC8..2DCE; ETHIOPIC
5852             0x2DCF,   // 2DCF      ; UNKNOWN
5853             0x2DD0,   // 2DD0..2DD6; ETHIOPIC
5854             0x2DD7,   // 2DD7      ; UNKNOWN
5855             0x2DD8,   // 2DD8..2DDE; ETHIOPIC
5856             0x2DDF,   // 2DDF      ; UNKNOWN
5857             0x2DE0,   // 2DE0..2DFF; CYRILLIC
5858             0x2E00,   // 2E00..2E52; COMMON
5859             0x2E53,   // 2E53..2E7F; UNKNOWN
5860             0x2E80,   // 2E80..2E99; HAN
5861             0x2E9A,   // 2E9A      ; UNKNOWN
5862             0x2E9B,   // 2E9B..2EF3; HAN
5863             0x2EF4,   // 2EF4..2EFF; UNKNOWN
5864             0x2F00,   // 2F00..2FD5; HAN
5865             0x2FD6,   // 2FD6..2FEF; UNKNOWN
5866             0x2FF0,   // 2FF0..2FFB; COMMON
5867             0x2FFC,   // 2FFC..2FFF; UNKNOWN
5868             0x3000,   // 3000..3004; COMMON
5869             0x3005,   // 3005      ; HAN
5870             0x3006,   // 3006      ; COMMON
5871             0x3007,   // 3007      ; HAN
5872             0x3008,   // 3008..3020; COMMON
5873             0x3021,   // 3021..3029; HAN
5874             0x302A,   // 302A..302D; INHERITED
5875             0x302E,   // 302E..302F; HANGUL
5876             0x3030,   // 3030..3037; COMMON
5877             0x3038,   // 3038..303B; HAN
5878             0x303C,   // 303C..303F; COMMON
5879             0x3040,   // 3040      ; UNKNOWN
5880             0x3041,   // 3041..3096; HIRAGANA
5881             0x3097,   // 3097..3098; UNKNOWN
5882             0x3099,   // 3099..309A; INHERITED
5883             0x309B,   // 309B..309C; COMMON
5884             0x309D,   // 309D..309F; HIRAGANA
5885             0x30A0,   // 30A0      ; COMMON
5886             0x30A1,   // 30A1..30FA; KATAKANA
5887             0x30FB,   // 30FB..30FC; COMMON
5888             0x30FD,   // 30FD..30FF; KATAKANA
5889             0x3100,   // 3100..3104; UNKNOWN
5890             0x3105,   // 3105..312F; BOPOMOFO
5891             0x3130,   // 3130      ; UNKNOWN
5892             0x3131,   // 3131..318E; HANGUL
5893             0x318F,   // 318F      ; UNKNOWN
5894             0x3190,   // 3190..319F; COMMON
5895             0x31A0,   // 31A0..31BF; BOPOMOFO
5896             0x31C0,   // 31C0..31E3; COMMON
5897             0x31E4,   // 31E4..31EF; UNKNOWN
5898             0x31F0,   // 31F0..31FF; KATAKANA
5899             0x3200,   // 3200..321E; HANGUL
5900             0x321F,   // 321F      ; UNKNOWN
5901             0x3220,   // 3220..325F; COMMON
5902             0x3260,   // 3260..327E; HANGUL
5903             0x327F,   // 327F..32CF; COMMON
5904             0x32D0,   // 32D0..32FE; KATAKANA
5905             0x32FF,   // 32FF      ; COMMON
5906             0x3300,   // 3300..3357; KATAKANA
5907             0x3358,   // 3358..33FF; COMMON
5908             0x3400,   // 3400..4DBF; HAN
5909             0x4DC0,   // 4DC0..4DFF; COMMON
5910             0x4E00,   // 4E00..9FFC; HAN
5911             0x9FFD,   // 9FFD..9FFF; UNKNOWN
5912             0xA000,   // A000..A48C; YI
5913             0xA48D,   // A48D..A48F; UNKNOWN
5914             0xA490,   // A490..A4C6; YI
5915             0xA4C7,   // A4C7..A4CF; UNKNOWN
5916             0xA4D0,   // A4D0..A4FF; LISU
5917             0xA500,   // A500..A62B; VAI
5918             0xA62C,   // A62C..A63F; UNKNOWN
5919             0xA640,   // A640..A69F; CYRILLIC
5920             0xA6A0,   // A6A0..A6F7; BAMUM
5921             0xA6F8,   // A6F8..A6FF; UNKNOWN
5922             0xA700,   // A700..A721; COMMON
5923             0xA722,   // A722..A787; LATIN
5924             0xA788,   // A788..A78A; COMMON
5925             0xA78B,   // A78B..A7BF; LATIN
5926             0xA7C0,   // A7C0..A7C1; UNKNOWN
5927             0xA7C2,   // A7C2..A7CA; LATIN
5928             0xA7CB,   // A7CB..A7F4; UNKNOWN
5929             0xA7F5,   // A7F5..A7FF; LATIN
5930             0xA800,   // A800..A82C; SYLOTI_NAGRI
5931             0xA82D,   // A82D..A82F; UNKNOWN
5932             0xA830,   // A830..A839; COMMON
5933             0xA83A,   // A83A..A83F; UNKNOWN
5934             0xA840,   // A840..A877; PHAGS_PA
5935             0xA878,   // A878..A87F; UNKNOWN
5936             0xA880,   // A880..A8C5; SAURASHTRA
5937             0xA8C6,   // A8C6..A8CD; UNKNOWN
5938             0xA8CE,   // A8CE..A8D9; SAURASHTRA
5939             0xA8DA,   // A8DA..A8DF; UNKNOWN
5940             0xA8E0,   // A8E0..A8FF; DEVANAGARI
5941             0xA900,   // A900..A92D; KAYAH_LI
5942             0xA92E,   // A92E      ; COMMON
5943             0xA92F,   // A92F      ; KAYAH_LI
5944             0xA930,   // A930..A953; REJANG
5945             0xA954,   // A954..A95E; UNKNOWN
5946             0xA95F,   // A95F      ; REJANG
5947             0xA960,   // A960..A97C; HANGUL
5948             0xA97D,   // A97D..A97F; UNKNOWN
5949             0xA980,   // A980..A9CD; JAVANESE
5950             0xA9CE,   // A9CE      ; UNKNOWN
5951             0xA9CF,   // A9CF      ; COMMON
5952             0xA9D0,   // A9D0..A9D9; JAVANESE
5953             0xA9DA,   // A9DA..A9DD; UNKNOWN
5954             0xA9DE,   // A9DE..A9DF; JAVANESE
5955             0xA9E0,   // A9E0..A9FE; MYANMAR
5956             0xA9FF,   // A9FF      ; UNKNOWN
5957             0xAA00,   // AA00..AA36; CHAM
5958             0xAA37,   // AA37..AA3F; UNKNOWN
5959             0xAA40,   // AA40..AA4D; CHAM
5960             0xAA4E,   // AA4E..AA4F; UNKNOWN
5961             0xAA50,   // AA50..AA59; CHAM
5962             0xAA5A,   // AA5A..AA5B; UNKNOWN
5963             0xAA5C,   // AA5C..AA5F; CHAM
5964             0xAA60,   // AA60..AA7F; MYANMAR
5965             0xAA80,   // AA80..AAC2; TAI_VIET
5966             0xAAC3,   // AAC3..AADA; UNKNOWN
5967             0xAADB,   // AADB..AADF; TAI_VIET
5968             0xAAE0,   // AAE0..AAF6; MEETEI_MAYEK
5969             0xAAF7,   // AAF7..AB00; UNKNOWN
5970             0xAB01,   // AB01..AB06; ETHIOPIC
5971             0xAB07,   // AB07..AB08; UNKNOWN
5972             0xAB09,   // AB09..AB0E; ETHIOPIC
5973             0xAB0F,   // AB0F..AB10; UNKNOWN
5974             0xAB11,   // AB11..AB16; ETHIOPIC
5975             0xAB17,   // AB17..AB1F; UNKNOWN
5976             0xAB20,   // AB20..AB26; ETHIOPIC
5977             0xAB27,   // AB27      ; UNKNOWN
5978             0xAB28,   // AB28..AB2E; ETHIOPIC
5979             0xAB2F,   // AB2F      ; UNKNOWN
5980             0xAB30,   // AB30..AB5A; LATIN
5981             0xAB5B,   // AB5B      ; COMMON
5982             0xAB5C,   // AB5C..AB64; LATIN
5983             0xAB65,   // AB65      ; GREEK
5984             0xAB66,   // AB66..AB69; LATIN
5985             0xAB6A,   // AB6A..AB6B; COMMON
5986             0xAB6C,   // AB6C..AB6F; UNKNOWN
5987             0xAB70,   // AB70..ABBF; CHEROKEE
5988             0xABC0,   // ABC0..ABED; MEETEI_MAYEK
5989             0xABEE,   // ABEE..ABEF; UNKNOWN
5990             0xABF0,   // ABF0..ABF9; MEETEI_MAYEK
5991             0xABFA,   // ABFA..ABFF; UNKNOWN
5992             0xAC00,   // AC00..D7A3; HANGUL
5993             0xD7A4,   // D7A4..D7AF; UNKNOWN
5994             0xD7B0,   // D7B0..D7C6; HANGUL
5995             0xD7C7,   // D7C7..D7CA; UNKNOWN
5996             0xD7CB,   // D7CB..D7FB; HANGUL
5997             0xD7FC,   // D7FC..F8FF; UNKNOWN
5998             0xF900,   // F900..FA6D; HAN
5999             0xFA6E,   // FA6E..FA6F; UNKNOWN
6000             0xFA70,   // FA70..FAD9; HAN
6001             0xFADA,   // FADA..FAFF; UNKNOWN
6002             0xFB00,   // FB00..FB06; LATIN
6003             0xFB07,   // FB07..FB12; UNKNOWN
6004             0xFB13,   // FB13..FB17; ARMENIAN
6005             0xFB18,   // FB18..FB1C; UNKNOWN
6006             0xFB1D,   // FB1D..FB36; HEBREW
6007             0xFB37,   // FB37      ; UNKNOWN
6008             0xFB38,   // FB38..FB3C; HEBREW
6009             0xFB3D,   // FB3D      ; UNKNOWN
6010             0xFB3E,   // FB3E      ; HEBREW
6011             0xFB3F,   // FB3F      ; UNKNOWN
6012             0xFB40,   // FB40..FB41; HEBREW
6013             0xFB42,   // FB42      ; UNKNOWN
6014             0xFB43,   // FB43..FB44; HEBREW
6015             0xFB45,   // FB45      ; UNKNOWN
6016             0xFB46,   // FB46..FB4F; HEBREW
6017             0xFB50,   // FB50..FBC1; ARABIC
6018             0xFBC2,   // FBC2..FBD2; UNKNOWN
6019             0xFBD3,   // FBD3..FD3D; ARABIC
6020             0xFD3E,   // FD3E..FD3F; COMMON
6021             0xFD40,   // FD40..FD4F; UNKNOWN
6022             0xFD50,   // FD50..FD8F; ARABIC
6023             0xFD90,   // FD90..FD91; UNKNOWN
6024             0xFD92,   // FD92..FDC7; ARABIC
6025             0xFDC8,   // FDC8..FDEF; UNKNOWN
6026             0xFDF0,   // FDF0..FDFD; ARABIC
6027             0xFDFE,   // FDFE..FDFF; UNKNOWN
6028             0xFE00,   // FE00..FE0F; INHERITED
6029             0xFE10,   // FE10..FE19; COMMON
6030             0xFE1A,   // FE1A..FE1F; UNKNOWN
6031             0xFE20,   // FE20..FE2D; INHERITED
6032             0xFE2E,   // FE2E..FE2F; CYRILLIC
6033             0xFE30,   // FE30..FE52; COMMON
6034             0xFE53,   // FE53      ; UNKNOWN
6035             0xFE54,   // FE54..FE66; COMMON
6036             0xFE67,   // FE67      ; UNKNOWN
6037             0xFE68,   // FE68..FE6B; COMMON
6038             0xFE6C,   // FE6C..FE6F; UNKNOWN
6039             0xFE70,   // FE70..FE74; ARABIC
6040             0xFE75,   // FE75      ; UNKNOWN
6041             0xFE76,   // FE76..FEFC; ARABIC
6042             0xFEFD,   // FEFD..FEFE; UNKNOWN
6043             0xFEFF,   // FEFF      ; COMMON
6044             0xFF00,   // FF00      ; UNKNOWN
6045             0xFF01,   // FF01..FF20; COMMON
6046             0xFF21,   // FF21..FF3A; LATIN
6047             0xFF3B,   // FF3B..FF40; COMMON
6048             0xFF41,   // FF41..FF5A; LATIN
6049             0xFF5B,   // FF5B..FF65; COMMON
6050             0xFF66,   // FF66..FF6F; KATAKANA
6051             0xFF70,   // FF70      ; COMMON
6052             0xFF71,   // FF71..FF9D; KATAKANA
6053             0xFF9E,   // FF9E..FF9F; COMMON
6054             0xFFA0,   // FFA0..FFBE; HANGUL
6055             0xFFBF,   // FFBF..FFC1; UNKNOWN
6056             0xFFC2,   // FFC2..FFC7; HANGUL
6057             0xFFC8,   // FFC8..FFC9; UNKNOWN
6058             0xFFCA,   // FFCA..FFCF; HANGUL
6059             0xFFD0,   // FFD0..FFD1; UNKNOWN
6060             0xFFD2,   // FFD2..FFD7; HANGUL
6061             0xFFD8,   // FFD8..FFD9; UNKNOWN
6062             0xFFDA,   // FFDA..FFDC; HANGUL
6063             0xFFDD,   // FFDD..FFDF; UNKNOWN
6064             0xFFE0,   // FFE0..FFE6; COMMON
6065             0xFFE7,   // FFE7      ; UNKNOWN
6066             0xFFE8,   // FFE8..FFEE; COMMON
6067             0xFFEF,   // FFEF..FFF8; UNKNOWN
6068             0xFFF9,   // FFF9..FFFD; COMMON
6069             0xFFFE,   // FFFE..FFFF; UNKNOWN
6070             0x10000,  // 10000..1000B; LINEAR_B
6071             0x1000C,  // 1000C       ; UNKNOWN
6072             0x1000D,  // 1000D..10026; LINEAR_B
6073             0x10027,  // 10027       ; UNKNOWN
6074             0x10028,  // 10028..1003A; LINEAR_B
6075             0x1003B,  // 1003B       ; UNKNOWN
6076             0x1003C,  // 1003C..1003D; LINEAR_B
6077             0x1003E,  // 1003E       ; UNKNOWN
6078             0x1003F,  // 1003F..1004D; LINEAR_B
6079             0x1004E,  // 1004E..1004F; UNKNOWN
6080             0x10050,  // 10050..1005D; LINEAR_B
6081             0x1005E,  // 1005E..1007F; UNKNOWN
6082             0x10080,  // 10080..100FA; LINEAR_B
6083             0x100FB,  // 100FB..100FF; UNKNOWN
6084             0x10100,  // 10100..10102; COMMON
6085             0x10103,  // 10103..10106; UNKNOWN
6086             0x10107,  // 10107..10133; COMMON
6087             0x10134,  // 10134..10136; UNKNOWN
6088             0x10137,  // 10137..1013F; COMMON
6089             0x10140,  // 10140..1018E; GREEK
6090             0x1018F,  // 1018F       ; UNKNOWN
6091             0x10190,  // 10190..1019C; COMMON
6092             0x1019D,  // 1019D..1019F; UNKNOWN
6093             0x101A0,  // 101A0       ; GREEK
6094             0x101A1,  // 101A1..101CF; UNKNOWN
6095             0x101D0,  // 101D0..101FC; COMMON
6096             0x101FD,  // 101FD       ; INHERITED
6097             0x101FE,  // 101FE..1027F; UNKNOWN
6098             0x10280,  // 10280..1029C; LYCIAN
6099             0x1029D,  // 1029D..1029F; UNKNOWN
6100             0x102A0,  // 102A0..102D0; CARIAN
6101             0x102D1,  // 102D1..102DF; UNKNOWN
6102             0x102E0,  // 102E0       ; INHERITED
6103             0x102E1,  // 102E1..102FB; COMMON
6104             0x102FC,  // 102FC..102FF; UNKNOWN
6105             0x10300,  // 10300..10323; OLD_ITALIC
6106             0x10324,  // 10324..1032C; UNKNOWN
6107             0x1032D,  // 1032D..1032F; OLD_ITALIC
6108             0x10330,  // 10330..1034A; GOTHIC
6109             0x1034B,  // 1034B..1034F; UNKNOWN
6110             0x10350,  // 10350..1037A; OLD_PERMIC
6111             0x1037B,  // 1037B..1037F; UNKNOWN
6112             0x10380,  // 10380..1039D; UGARITIC
6113             0x1039E,  // 1039E       ; UNKNOWN
6114             0x1039F,  // 1039F       ; UGARITIC
6115             0x103A0,  // 103A0..103C3; OLD_PERSIAN
6116             0x103C4,  // 103C4..103C7; UNKNOWN
6117             0x103C8,  // 103C8..103D5; OLD_PERSIAN
6118             0x103D6,  // 103D6..103FF; UNKNOWN
6119             0x10400,  // 10400..1044F; DESERET
6120             0x10450,  // 10450..1047F; SHAVIAN
6121             0x10480,  // 10480..1049D; OSMANYA
6122             0x1049E,  // 1049E..1049F; UNKNOWN
6123             0x104A0,  // 104A0..104A9; OSMANYA
6124             0x104AA,  // 104AA..104AF; UNKNOWN
6125             0x104B0,  // 104B0..104D3; OSAGE
6126             0x104D4,  // 104D4..104D7; UNKNOWN
6127             0x104D8,  // 104D8..104FB; OSAGE
6128             0x104FC,  // 104FC..104FF; UNKNOWN
6129             0x10500,  // 10500..10527; ELBASAN
6130             0x10528,  // 10528..1052F; UNKNOWN
6131             0x10530,  // 10530..10563; CAUCASIAN_ALBANIAN
6132             0x10564,  // 10564..1056E; UNKNOWN
6133             0x1056F,  // 1056F       ; CAUCASIAN_ALBANIAN
6134             0x10570,  // 10570..105FF; UNKNOWN
6135             0x10600,  // 10600..10736; LINEAR_A
6136             0x10737,  // 10737..1073F; UNKNOWN
6137             0x10740,  // 10740..10755; LINEAR_A
6138             0x10756,  // 10756..1075F; UNKNOWN
6139             0x10760,  // 10760..10767; LINEAR_A
6140             0x10768,  // 10768..107FF; UNKNOWN
6141             0x10800,  // 10800..10805; CYPRIOT
6142             0x10806,  // 10806..10807; UNKNOWN
6143             0x10808,  // 10808       ; CYPRIOT
6144             0x10809,  // 10809       ; UNKNOWN
6145             0x1080A,  // 1080A..10835; CYPRIOT
6146             0x10836,  // 10836       ; UNKNOWN
6147             0x10837,  // 10837..10838; CYPRIOT
6148             0x10839,  // 10839..1083B; UNKNOWN
6149             0x1083C,  // 1083C       ; CYPRIOT
6150             0x1083D,  // 1083D..1083E; UNKNOWN
6151             0x1083F,  // 1083F       ; CYPRIOT
6152             0x10840,  // 10840..10855; IMPERIAL_ARAMAIC
6153             0x10856,  // 10856       ; UNKNOWN
6154             0x10857,  // 10857..1085F; IMPERIAL_ARAMAIC
6155             0x10860,  // 10860..1087F; PALMYRENE
6156             0x10880,  // 10880..1089E; NABATAEAN
6157             0x1089F,  // 1089F..108A6; UNKNOWN
6158             0x108A7,  // 108A7..108AF; NABATAEAN
6159             0x108B0,  // 108B0..108DF; UNKNOWN
6160             0x108E0,  // 108E0..108F2; HATRAN
6161             0x108F3,  // 108F3       ; UNKNOWN
6162             0x108F4,  // 108F4..108F5; HATRAN
6163             0x108F6,  // 108F6..108FA; UNKNOWN
6164             0x108FB,  // 108FB..108FF; HATRAN
6165             0x10900,  // 10900..1091B; PHOENICIAN
6166             0x1091C,  // 1091C..1091E; UNKNOWN
6167             0x1091F,  // 1091F       ; PHOENICIAN
6168             0x10920,  // 10920..10939; LYDIAN
6169             0x1093A,  // 1093A..1093E; UNKNOWN
6170             0x1093F,  // 1093F       ; LYDIAN
6171             0x10940,  // 10940..1097F; UNKNOWN
6172             0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
6173             0x109A0,  // 109A0..109B7; MEROITIC_CURSIVE
6174             0x109B8,  // 109B8..109BB; UNKNOWN
6175             0x109BC,  // 109BC..109CF; MEROITIC_CURSIVE
6176             0x109D0,  // 109D0..109D1; UNKNOWN
6177             0x109D2,  // 109D2..109FF; MEROITIC_CURSIVE
6178             0x10A00,  // 10A00..10A03; KHAROSHTHI
6179             0x10A04,  // 10A04       ; UNKNOWN
6180             0x10A05,  // 10A05..10A06; KHAROSHTHI
6181             0x10A07,  // 10A07..10A0B; UNKNOWN
6182             0x10A0C,  // 10A0C..10A13; KHAROSHTHI
6183             0x10A14,  // 10A14       ; UNKNOWN
6184             0x10A15,  // 10A15..10A17; KHAROSHTHI
6185             0x10A18,  // 10A18       ; UNKNOWN
6186             0x10A19,  // 10A19..10A35; KHAROSHTHI
6187             0x10A36,  // 10A36..10A37; UNKNOWN
6188             0x10A38,  // 10A38..10A3A; KHAROSHTHI
6189             0x10A3B,  // 10A3B..10A3E; UNKNOWN
6190             0x10A3F,  // 10A3F..10A48; KHAROSHTHI
6191             0x10A49,  // 10A49..10A4F; UNKNOWN
6192             0x10A50,  // 10A50..10A58; KHAROSHTHI
6193             0x10A59,  // 10A59..10A5F; UNKNOWN
6194             0x10A60,  // 10A60..10A7F; OLD_SOUTH_ARABIAN
6195             0x10A80,  // 10A80..10A9F; OLD_NORTH_ARABIAN
6196             0x10AA0,  // 10AA0..10ABF; UNKNOWN
6197             0x10AC0,  // 10AC0..10AE6; MANICHAEAN
6198             0x10AE7,  // 10AE7..10AEA; UNKNOWN
6199             0x10AEB,  // 10AEB..10AF6; MANICHAEAN
6200             0x10AF7,  // 10AF7..10AFF; UNKNOWN
6201             0x10B00,  // 10B00..10B35; AVESTAN
6202             0x10B36,  // 10B36..10B38; UNKNOWN
6203             0x10B39,  // 10B39..10B3F; AVESTAN
6204             0x10B40,  // 10B40..10B55; INSCRIPTIONAL_PARTHIAN
6205             0x10B56,  // 10B56..10B57; UNKNOWN
6206             0x10B58,  // 10B58..10B5F; INSCRIPTIONAL_PARTHIAN
6207             0x10B60,  // 10B60..10B72; INSCRIPTIONAL_PAHLAVI
6208             0x10B73,  // 10B73..10B77; UNKNOWN
6209             0x10B78,  // 10B78..10B7F; INSCRIPTIONAL_PAHLAVI
6210             0x10B80,  // 10B80..10B91; PSALTER_PAHLAVI
6211             0x10B92,  // 10B92..10B98; UNKNOWN
6212             0x10B99,  // 10B99..10B9C; PSALTER_PAHLAVI
6213             0x10B9D,  // 10B9D..10BA8; UNKNOWN
6214             0x10BA9,  // 10BA9..10BAF; PSALTER_PAHLAVI
6215             0x10BB0,  // 10BB0..10BFF; UNKNOWN
6216             0x10C00,  // 10C00..10C48; OLD_TURKIC
6217             0x10C49,  // 10C49..10C7F; UNKNOWN
6218             0x10C80,  // 10C80..10CB2; OLD_HUNGARIAN
6219             0x10CB3,  // 10CB3..10CBF; UNKNOWN
6220             0x10CC0,  // 10CC0..10CF2; OLD_HUNGARIAN
6221             0x10CF3,  // 10CF3..10CF9; UNKNOWN
6222             0x10CFA,  // 10CFA..10CFF; OLD_HUNGARIAN
6223             0x10D00,  // 10D00..10D27; HANIFI_ROHINGYA
6224             0x10D28,  // 10D28..10D2F; UNKNOWN
6225             0x10D30,  // 10D30..10D39; HANIFI_ROHINGYA
6226             0x10D3A,  // 10D3A..10E5F; UNKNOWN
6227             0x10E60,  // 10E60..10E7E; ARABIC
6228             0x10E7F,  // 10E7F       ; UNKNOWN
6229             0x10E80,  // 10E80..10EA9; YEZIDI
6230             0x10EAA,  // 10EAA       ; UNKNOWN
6231             0x10EAB,  // 10EAB..10EAD; YEZIDI
6232             0x10EAE,  // 10EAE..10EAF; UNKNOWN
6233             0x10EB0,  // 10EB0..10EB1; YEZIDI
6234             0x10EB2,  // 10EB2..10EFF; UNKNOWN
6235             0x10F00,  // 10F00..10F27; OLD_SOGDIAN
6236             0x10F28,  // 10F28..10F2F; UNKNOWN
6237             0x10F30,  // 10F30..10F59; SOGDIAN
6238             0x10F5A,  // 10F5A..10FAF; UNKNOWN
6239             0x10FB0,  // 10FB0..10FCB; CHORASMIAN
6240             0x10FCC,  // 10FCC..10FDF; UNKNOWN
6241             0x10FE0,  // 10FE0..10FF6; ELYMAIC
6242             0x10FF7,  // 10FF7..10FFF; UNKNOWN
6243             0x11000,  // 11000..1104D; BRAHMI
6244             0x1104E,  // 1104E..11051; UNKNOWN
6245             0x11052,  // 11052..1106F; BRAHMI
6246             0x11070,  // 11070..1107E; UNKNOWN
6247             0x1107F,  // 1107F       ; BRAHMI
6248             0x11080,  // 11080..110C1; KAITHI
6249             0x110C2,  // 110C2..110CC; UNKNOWN
6250             0x110CD,  // 110CD       ; KAITHI
6251             0x110CE,  // 110CE..110CF; UNKNOWN
6252             0x110D0,  // 110D0..110E8; SORA_SOMPENG
6253             0x110E9,  // 110E9..110EF; UNKNOWN
6254             0x110F0,  // 110F0..110F9; SORA_SOMPENG
6255             0x110FA,  // 110FA..110FF; UNKNOWN
6256             0x11100,  // 11100..11134; CHAKMA
6257             0x11135,  // 11135       ; UNKNOWN
6258             0x11136,  // 11136..11147; CHAKMA
6259             0x11148,  // 11148..1114F; UNKNOWN
6260             0x11150,  // 11150..11176; MAHAJANI
6261             0x11177,  // 11177..1117F; UNKNOWN
6262             0x11180,  // 11180..111DF; SHARADA
6263             0x111E0,  // 111E0       ; UNKNOWN
6264             0x111E1,  // 111E1..111F4; SINHALA
6265             0x111F5,  // 111F5..111FF; UNKNOWN
6266             0x11200,  // 11200..11211; KHOJKI
6267             0x11212,  // 11212       ; UNKNOWN
6268             0x11213,  // 11213..1123E; KHOJKI
6269             0x1123F,  // 1123F..1127F; UNKNOWN
6270             0x11280,  // 11280..11286; MULTANI
6271             0x11287,  // 11287       ; UNKNOWN
6272             0x11288,  // 11288       ; MULTANI
6273             0x11289,  // 11289       ; UNKNOWN
6274             0x1128A,  // 1128A..1128D; MULTANI
6275             0x1128E,  // 1128E       ; UNKNOWN
6276             0x1128F,  // 1128F..1129D; MULTANI
6277             0x1129E,  // 1129E       ; UNKNOWN
6278             0x1129F,  // 1129F..112A9; MULTANI
6279             0x112AA,  // 112AA..112AF; UNKNOWN
6280             0x112B0,  // 112B0..112EA; KHUDAWADI
6281             0x112EB,  // 112EB..112EF; UNKNOWN
6282             0x112F0,  // 112F0..112F9; KHUDAWADI
6283             0x112FA,  // 112FA..112FF; UNKNOWN
6284             0x11300,  // 11300..11303; GRANTHA
6285             0x11304,  // 11304       ; UNKNOWN
6286             0x11305,  // 11305..1130C; GRANTHA
6287             0x1130D,  // 1130D..1130E; UNKNOWN
6288             0x1130F,  // 1130F..11310; GRANTHA
6289             0x11311,  // 11311..11312; UNKNOWN
6290             0x11313,  // 11313..11328; GRANTHA
6291             0x11329,  // 11329       ; UNKNOWN
6292             0x1132A,  // 1132A..11330; GRANTHA
6293             0x11331,  // 11331       ; UNKNOWN
6294             0x11332,  // 11332..11333; GRANTHA
6295             0x11334,  // 11334       ; UNKNOWN
6296             0x11335,  // 11335..11339; GRANTHA
6297             0x1133A,  // 1133A       ; UNKNOWN
6298             0x1133B,  // 1133B       ; INHERITED
6299             0x1133C,  // 1133C..11344; GRANTHA
6300             0x11345,  // 11345..11346; UNKNOWN
6301             0x11347,  // 11347..11348; GRANTHA
6302             0x11349,  // 11349..1134A; UNKNOWN
6303             0x1134B,  // 1134B..1134D; GRANTHA
6304             0x1134E,  // 1134E..1134F; UNKNOWN
6305             0x11350,  // 11350       ; GRANTHA
6306             0x11351,  // 11351..11356; UNKNOWN
6307             0x11357,  // 11357       ; GRANTHA
6308             0x11358,  // 11358..1135C; UNKNOWN
6309             0x1135D,  // 1135D..11363; GRANTHA
6310             0x11364,  // 11364..11365; UNKNOWN
6311             0x11366,  // 11366..1136C; GRANTHA
6312             0x1136D,  // 1136D..1136F; UNKNOWN
6313             0x11370,  // 11370..11374; GRANTHA
6314             0x11375,  // 11375..113FF; UNKNOWN
6315             0x11400,  // 11400..1145B; NEWA
6316             0x1145C,  // 1145C       ; UNKNOWN
6317             0x1145D,  // 1145D..11461; NEWA
6318             0x11462,  // 11462..1147F; UNKNOWN
6319             0x11480,  // 11480..114C7; TIRHUTA
6320             0x114C8,  // 114C8..114CF; UNKNOWN
6321             0x114D0,  // 114D0..114D9; TIRHUTA
6322             0x114DA,  // 114DA..1157F; UNKNOWN
6323             0x11580,  // 11580..115B5; SIDDHAM
6324             0x115B6,  // 115B6..115B7; UNKNOWN
6325             0x115B8,  // 115B8..115DD; SIDDHAM
6326             0x115DE,  // 115DE..115FF; UNKNOWN
6327             0x11600,  // 11600..11644; MODI
6328             0x11645,  // 11645..1164F; UNKNOWN
6329             0x11650,  // 11650..11659; MODI
6330             0x1165A,  // 1165A..1165F; UNKNOWN
6331             0x11660,  // 11660..1166C; MONGOLIAN
6332             0x1166D,  // 1166D..1167F; UNKNOWN
6333             0x11680,  // 11680..116B8; TAKRI
6334             0x116B9,  // 116B9..116BF; UNKNOWN
6335             0x116C0,  // 116C0..116C9; TAKRI
6336             0x116CA,  // 116CA..116FF; UNKNOWN
6337             0x11700,  // 11700..1171A; AHOM
6338             0x1171B,  // 1171B..1171C; UNKNOWN
6339             0x1171D,  // 1171D..1172B; AHOM
6340             0x1172C,  // 1172C..1172F; UNKNOWN
6341             0x11730,  // 11730..1173F; AHOM
6342             0x11740,  // 11740..117FF; UNKNOWN
6343             0x11800,  // 11800..1183B; DOGRA
6344             0x1183C,  // 1183C..1189F; UNKNOWN
6345             0x118A0,  // 118A0..118F2; WARANG_CITI
6346             0x118F3,  // 118F3..118FE; UNKNOWN
6347             0x118FF,  // 118FF       ; WARANG_CITI
6348             0x11900,  // 11900..11906; DIVES_AKURU
6349             0x11907,  // 11907..11908; UNKNOWN
6350             0x11909,  // 11909       ; DIVES_AKURU
6351             0x1190A,  // 1190A..1190B; UNKNOWN
6352             0x1190C,  // 1190C..11913; DIVES_AKURU
6353             0x11914,  // 11914       ; UNKNOWN
6354             0x11915,  // 11915..11916; DIVES_AKURU
6355             0x11917,  // 11917       ; UNKNOWN
6356             0x11918,  // 11918..11935; DIVES_AKURU
6357             0x11936,  // 11936       ; UNKNOWN
6358             0x11937,  // 11937..11938; DIVES_AKURU
6359             0x11939,  // 11939..1193A; UNKNOWN
6360             0x1193B,  // 1193B..11946; DIVES_AKURU
6361             0x11947,  // 11947..1194F; UNKNOWN
6362             0x11950,  // 11950..11959; DIVES_AKURU
6363             0x1195A,  // 1195A..1199F; UNKNOWN
6364             0x119A0,  // 119A0..119A7; NANDINAGARI
6365             0x119A8,  // 119A8..119A9; UNKNOWN
6366             0x119AA,  // 119AA..119D7; NANDINAGARI
6367             0x119D8,  // 119D8..119D9; UNKNOWN
6368             0x119DA,  // 119DA..119E4; NANDINAGARI
6369             0x119E5,  // 119E5..119FF; UNKNOWN
6370             0x11A00,  // 11A00..11A47; ZANABAZAR_SQUARE
6371             0x11A48,  // 11A48..11A4F; UNKNOWN
6372             0x11A50,  // 11A50..11AA2; SOYOMBO
6373             0x11AA3,  // 11AA3..11ABF; UNKNOWN
6374             0x11AC0,  // 11AC0..11AF8; PAU_CIN_HAU
6375             0x11AF9,  // 11AF9..11BFF; UNKNOWN
6376             0x11C00,  // 11C00..11C08; BHAIKSUKI
6377             0x11C09,  // 11C09       ; UNKNOWN
6378             0x11C0A,  // 11C0A..11C36; BHAIKSUKI
6379             0x11C37,  // 11C37       ; UNKNOWN
6380             0x11C38,  // 11C38..11C45; BHAIKSUKI
6381             0x11C46,  // 11C46..11C4F; UNKNOWN
6382             0x11C50,  // 11C50..11C6C; BHAIKSUKI
6383             0x11C6D,  // 11C6D..11C6F; UNKNOWN
6384             0x11C70,  // 11C70..11C8F; MARCHEN
6385             0x11C90,  // 11C90..11C91; UNKNOWN
6386             0x11C92,  // 11C92..11CA7; MARCHEN
6387             0x11CA8,  // 11CA8       ; UNKNOWN
6388             0x11CA9,  // 11CA9..11CB6; MARCHEN
6389             0x11CB7,  // 11CB7..11CFF; UNKNOWN
6390             0x11D00,  // 11D00..11D06; MASARAM_GONDI
6391             0x11D07,  // 11D07       ; UNKNOWN
6392             0x11D08,  // 11D08..11D09; MASARAM_GONDI
6393             0x11D0A,  // 11D0A       ; UNKNOWN
6394             0x11D0B,  // 11D0B..11D36; MASARAM_GONDI
6395             0x11D37,  // 11D37..11D39; UNKNOWN
6396             0x11D3A,  // 11D3A       ; MASARAM_GONDI
6397             0x11D3B,  // 11D3B       ; UNKNOWN
6398             0x11D3C,  // 11D3C..11D3D; MASARAM_GONDI
6399             0x11D3E,  // 11D3E       ; UNKNOWN
6400             0x11D3F,  // 11D3F..11D47; MASARAM_GONDI
6401             0x11D48,  // 11D48..11D4F; UNKNOWN
6402             0x11D50,  // 11D50..11D59; MASARAM_GONDI
6403             0x11D5A,  // 11D5A..11D5F; UNKNOWN
6404             0x11D60,  // 11D60..11D65; GUNJALA_GONDI
6405             0x11D66,  // 11D66       ; UNKNOWN
6406             0x11D67,  // 11D67..11D68; GUNJALA_GONDI
6407             0x11D69,  // 11D69       ; UNKNOWN
6408             0x11D6A,  // 11D6A..11D8E; GUNJALA_GONDI
6409             0x11D8F,  // 11D8F       ; UNKNOWN
6410             0x11D90,  // 11D90..11D91; GUNJALA_GONDI
6411             0x11D92,  // 11D92       ; UNKNOWN
6412             0x11D93,  // 11D93..11D98; GUNJALA_GONDI
6413             0x11D99,  // 11D99..11D9F; UNKNOWN
6414             0x11DA0,  // 11DA0..11DA9; GUNJALA_GONDI
6415             0x11DAA,  // 11DAA..11EDF; UNKNOWN
6416             0x11EE0,  // 11EE0..11EF8; MAKASAR
6417             0x11EF9,  // 11EF9..11FAF; UNKNOWN
6418             0x11FB0,  // 11FB0       ; LISU
6419             0x11FB1,  // 11FB1..11FBF; UNKNOWN
6420             0x11FC0,  // 11FC0..11FF1; TAMIL
6421             0x11FF2,  // 11FF2..11FFE; UNKNOWN
6422             0x11FFF,  // 11FFF       ; TAMIL
6423             0x12000,  // 12000..12399; CUNEIFORM
6424             0x1239A,  // 1239A..123FF; UNKNOWN
6425             0x12400,  // 12400..1246E; CUNEIFORM
6426             0x1246F,  // 1246F       ; UNKNOWN
6427             0x12470,  // 12470..12474; CUNEIFORM
6428             0x12475,  // 12475..1247F; UNKNOWN
6429             0x12480,  // 12480..12543; CUNEIFORM
6430             0x12544,  // 12544..12FFF; UNKNOWN
6431             0x13000,  // 13000..1342E; EGYPTIAN_HIEROGLYPHS
6432             0x1342F,  // 1342F       ; UNKNOWN
6433             0x13430,  // 13430..13438; EGYPTIAN_HIEROGLYPHS
6434             0x13439,  // 13439..143FF; UNKNOWN
6435             0x14400,  // 14400..14646; ANATOLIAN_HIEROGLYPHS
6436             0x14647,  // 14647..167FF; UNKNOWN
6437             0x16800,  // 16800..16A38; BAMUM
6438             0x16A39,  // 16A39..16A3F; UNKNOWN
6439             0x16A40,  // 16A40..16A5E; MRO
6440             0x16A5F,  // 16A5F       ; UNKNOWN
6441             0x16A60,  // 16A60..16A69; MRO
6442             0x16A6A,  // 16A6A..16A6D; UNKNOWN
6443             0x16A6E,  // 16A6E..16A6F; MRO
6444             0x16A70,  // 16A70..16ACF; UNKNOWN
6445             0x16AD0,  // 16AD0..16AED; BASSA_VAH
6446             0x16AEE,  // 16AEE..16AEF; UNKNOWN
6447             0x16AF0,  // 16AF0..16AF5; BASSA_VAH
6448             0x16AF6,  // 16AF6..16AFF; UNKNOWN
6449             0x16B00,  // 16B00..16B45; PAHAWH_HMONG
6450             0x16B46,  // 16B46..16B4F; UNKNOWN
6451             0x16B50,  // 16B50..16B59; PAHAWH_HMONG
6452             0x16B5A,  // 16B5A       ; UNKNOWN
6453             0x16B5B,  // 16B5B..16B61; PAHAWH_HMONG
6454             0x16B62,  // 16B62       ; UNKNOWN
6455             0x16B63,  // 16B63..16B77; PAHAWH_HMONG
6456             0x16B78,  // 16B78..16B7C; UNKNOWN
6457             0x16B7D,  // 16B7D..16B8F; PAHAWH_HMONG
6458             0x16B90,  // 16B90..16E3F; UNKNOWN
6459             0x16E40,  // 16E40..16E9A; MEDEFAIDRIN
6460             0x16E9B,  // 16E9B..16EFF; UNKNOWN
6461             0x16F00,  // 16F00..16F4A; MIAO
6462             0x16F4B,  // 16F4B..16F4E; UNKNOWN
6463             0x16F4F,  // 16F4F..16F87; MIAO
6464             0x16F88,  // 16F88..16F8E; UNKNOWN
6465             0x16F8F,  // 16F8F..16F9F; MIAO
6466             0x16FA0,  // 16FA0..16FDF; UNKNOWN
6467             0x16FE0,  // 16FE0       ; TANGUT
6468             0x16FE1,  // 16FE1       ; NUSHU
6469             0x16FE2,  // 16FE2..16FE3; COMMON
6470             0x16FE4,  // 16FE4       ; KHITAN_SMALL_SCRIPT
6471             0x16FE5,  // 16FE5..16FEF; UNKNOWN
6472             0x16FF0,  // 16FF0..16FF1; HAN
6473             0x16FF2,  // 16FF2..16FFF; UNKNOWN
6474             0x17000,  // 17000..187F7; TANGUT
6475             0x187F8,  // 187F8..187FF; UNKNOWN
6476             0x18800,  // 18800..18AFF; TANGUT
6477             0x18B00,  // 18B00..18CD5; KHITAN_SMALL_SCRIPT
6478             0x18CD6,  // 18CD6..18CFF; UNKNOWN
6479             0x18D00,  // 18D00..18D08; TANGUT
6480             0x18D09,  // 18D09..1AFFF; UNKNOWN
6481             0x1B000,  // 1B000       ; KATAKANA
6482             0x1B001,  // 1B001..1B11E; HIRAGANA
6483             0x1B11F,  // 1B11F..1B14F; UNKNOWN
6484             0x1B150,  // 1B150..1B152; HIRAGANA
6485             0x1B153,  // 1B153..1B163; UNKNOWN
6486             0x1B164,  // 1B164..1B167; KATAKANA
6487             0x1B168,  // 1B168..1B16F; UNKNOWN
6488             0x1B170,  // 1B170..1B2FB; NUSHU
6489             0x1B2FC,  // 1B2FC..1BBFF; UNKNOWN
6490             0x1BC00,  // 1BC00..1BC6A; DUPLOYAN
6491             0x1BC6B,  // 1BC6B..1BC6F; UNKNOWN
6492             0x1BC70,  // 1BC70..1BC7C; DUPLOYAN
6493             0x1BC7D,  // 1BC7D..1BC7F; UNKNOWN
6494             0x1BC80,  // 1BC80..1BC88; DUPLOYAN
6495             0x1BC89,  // 1BC89..1BC8F; UNKNOWN
6496             0x1BC90,  // 1BC90..1BC99; DUPLOYAN
6497             0x1BC9A,  // 1BC9A..1BC9B; UNKNOWN
6498             0x1BC9C,  // 1BC9C..1BC9F; DUPLOYAN
6499             0x1BCA0,  // 1BCA0..1BCA3; COMMON
6500             0x1BCA4,  // 1BCA4..1CFFF; UNKNOWN
6501             0x1D000,  // 1D000..1D0F5; COMMON
6502             0x1D0F6,  // 1D0F6..1D0FF; UNKNOWN
6503             0x1D100,  // 1D100..1D126; COMMON
6504             0x1D127,  // 1D127..1D128; UNKNOWN
6505             0x1D129,  // 1D129..1D166; COMMON
6506             0x1D167,  // 1D167..1D169; INHERITED
6507             0x1D16A,  // 1D16A..1D17A; COMMON
6508             0x1D17B,  // 1D17B..1D182; INHERITED
6509             0x1D183,  // 1D183..1D184; COMMON
6510             0x1D185,  // 1D185..1D18B; INHERITED
6511             0x1D18C,  // 1D18C..1D1A9; COMMON
6512             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
6513             0x1D1AE,  // 1D1AE..1D1E8; COMMON
6514             0x1D1E9,  // 1D1E9..1D1FF; UNKNOWN
6515             0x1D200,  // 1D200..1D245; GREEK
6516             0x1D246,  // 1D246..1D2DF; UNKNOWN
6517             0x1D2E0,  // 1D2E0..1D2F3; COMMON
6518             0x1D2F4,  // 1D2F4..1D2FF; UNKNOWN
6519             0x1D300,  // 1D300..1D356; COMMON
6520             0x1D357,  // 1D357..1D35F; UNKNOWN
6521             0x1D360,  // 1D360..1D378; COMMON
6522             0x1D379,  // 1D379..1D3FF; UNKNOWN
6523             0x1D400,  // 1D400..1D454; COMMON
6524             0x1D455,  // 1D455       ; UNKNOWN
6525             0x1D456,  // 1D456..1D49C; COMMON
6526             0x1D49D,  // 1D49D       ; UNKNOWN
6527             0x1D49E,  // 1D49E..1D49F; COMMON
6528             0x1D4A0,  // 1D4A0..1D4A1; UNKNOWN
6529             0x1D4A2,  // 1D4A2       ; COMMON
6530             0x1D4A3,  // 1D4A3..1D4A4; UNKNOWN
6531             0x1D4A5,  // 1D4A5..1D4A6; COMMON
6532             0x1D4A7,  // 1D4A7..1D4A8; UNKNOWN
6533             0x1D4A9,  // 1D4A9..1D4AC; COMMON
6534             0x1D4AD,  // 1D4AD       ; UNKNOWN
6535             0x1D4AE,  // 1D4AE..1D4B9; COMMON
6536             0x1D4BA,  // 1D4BA       ; UNKNOWN
6537             0x1D4BB,  // 1D4BB       ; COMMON
6538             0x1D4BC,  // 1D4BC       ; UNKNOWN
6539             0x1D4BD,  // 1D4BD..1D4C3; COMMON
6540             0x1D4C4,  // 1D4C4       ; UNKNOWN
6541             0x1D4C5,  // 1D4C5..1D505; COMMON
6542             0x1D506,  // 1D506       ; UNKNOWN
6543             0x1D507,  // 1D507..1D50A; COMMON
6544             0x1D50B,  // 1D50B..1D50C; UNKNOWN
6545             0x1D50D,  // 1D50D..1D514; COMMON
6546             0x1D515,  // 1D515       ; UNKNOWN
6547             0x1D516,  // 1D516..1D51C; COMMON
6548             0x1D51D,  // 1D51D       ; UNKNOWN
6549             0x1D51E,  // 1D51E..1D539; COMMON
6550             0x1D53A,  // 1D53A       ; UNKNOWN
6551             0x1D53B,  // 1D53B..1D53E; COMMON
6552             0x1D53F,  // 1D53F       ; UNKNOWN
6553             0x1D540,  // 1D540..1D544; COMMON
6554             0x1D545,  // 1D545       ; UNKNOWN
6555             0x1D546,  // 1D546       ; COMMON
6556             0x1D547,  // 1D547..1D549; UNKNOWN
6557             0x1D54A,  // 1D54A..1D550; COMMON
6558             0x1D551,  // 1D551       ; UNKNOWN
6559             0x1D552,  // 1D552..1D6A5; COMMON
6560             0x1D6A6,  // 1D6A6..1D6A7; UNKNOWN
6561             0x1D6A8,  // 1D6A8..1D7CB; COMMON
6562             0x1D7CC,  // 1D7CC..1D7CD; UNKNOWN
6563             0x1D7CE,  // 1D7CE..1D7FF; COMMON
6564             0x1D800,  // 1D800..1DA8B; SIGNWRITING
6565             0x1DA8C,  // 1DA8C..1DA9A; UNKNOWN
6566             0x1DA9B,  // 1DA9B..1DA9F; SIGNWRITING
6567             0x1DAA0,  // 1DAA0       ; UNKNOWN
6568             0x1DAA1,  // 1DAA1..1DAAF; SIGNWRITING
6569             0x1DAB0,  // 1DAB0..1DFFF; UNKNOWN
6570             0x1E000,  // 1E000..1E006; GLAGOLITIC
6571             0x1E007,  // 1E007       ; UNKNOWN
6572             0x1E008,  // 1E008..1E018; GLAGOLITIC
6573             0x1E019,  // 1E019..1E01A; UNKNOWN
6574             0x1E01B,  // 1E01B..1E021; GLAGOLITIC
6575             0x1E022,  // 1E022       ; UNKNOWN
6576             0x1E023,  // 1E023..1E024; GLAGOLITIC
6577             0x1E025,  // 1E025       ; UNKNOWN
6578             0x1E026,  // 1E026..1E02A; GLAGOLITIC
6579             0x1E02B,  // 1E02B..1E0FF; UNKNOWN
6580             0x1E100,  // 1E100..1E12C; NYIAKENG_PUACHUE_HMONG
6581             0x1E12D,  // 1E12D..1E12F; UNKNOWN
6582             0x1E130,  // 1E130..1E13D; NYIAKENG_PUACHUE_HMONG
6583             0x1E13E,  // 1E13E..1E13F; UNKNOWN
6584             0x1E140,  // 1E140..1E149; NYIAKENG_PUACHUE_HMONG
6585             0x1E14A,  // 1E14A..1E14D; UNKNOWN
6586             0x1E14E,  // 1E14E..1E14F; NYIAKENG_PUACHUE_HMONG
6587             0x1E150,  // 1E150..1E2BF; UNKNOWN
6588             0x1E2C0,  // 1E2C0..1E2F9; WANCHO
6589             0x1E2FA,  // 1E2FA..1E2FE; UNKNOWN
6590             0x1E2FF,  // 1E2FF       ; WANCHO
6591             0x1E300,  // 1E300..1E7FF; UNKNOWN
6592             0x1E800,  // 1E800..1E8C4; MENDE_KIKAKUI
6593             0x1E8C5,  // 1E8C5..1E8C6; UNKNOWN
6594             0x1E8C7,  // 1E8C7..1E8D6; MENDE_KIKAKUI
6595             0x1E8D7,  // 1E8D7..1E8FF; UNKNOWN
6596             0x1E900,  // 1E900..1E94B; ADLAM
6597             0x1E94C,  // 1E94C..1E94F; UNKNOWN
6598             0x1E950,  // 1E950..1E959; ADLAM
6599             0x1E95A,  // 1E95A..1E95D; UNKNOWN
6600             0x1E95E,  // 1E95E..1E95F; ADLAM
6601             0x1E960,  // 1E960..1EC70; UNKNOWN
6602             0x1EC71,  // 1EC71..1ECB4; COMMON
6603             0x1ECB5,  // 1ECB5..1ED00; UNKNOWN
6604             0x1ED01,  // 1ED01..1ED3D; COMMON
6605             0x1ED3E,  // 1ED3E..1EDFF; UNKNOWN
6606             0x1EE00,  // 1EE00..1EE03; ARABIC
6607             0x1EE04,  // 1EE04       ; UNKNOWN
6608             0x1EE05,  // 1EE05..1EE1F; ARABIC
6609             0x1EE20,  // 1EE20       ; UNKNOWN
6610             0x1EE21,  // 1EE21..1EE22; ARABIC
6611             0x1EE23,  // 1EE23       ; UNKNOWN
6612             0x1EE24,  // 1EE24       ; ARABIC
6613             0x1EE25,  // 1EE25..1EE26; UNKNOWN
6614             0x1EE27,  // 1EE27       ; ARABIC
6615             0x1EE28,  // 1EE28       ; UNKNOWN
6616             0x1EE29,  // 1EE29..1EE32; ARABIC
6617             0x1EE33,  // 1EE33       ; UNKNOWN
6618             0x1EE34,  // 1EE34..1EE37; ARABIC
6619             0x1EE38,  // 1EE38       ; UNKNOWN
6620             0x1EE39,  // 1EE39       ; ARABIC
6621             0x1EE3A,  // 1EE3A       ; UNKNOWN
6622             0x1EE3B,  // 1EE3B       ; ARABIC
6623             0x1EE3C,  // 1EE3C..1EE41; UNKNOWN
6624             0x1EE42,  // 1EE42       ; ARABIC
6625             0x1EE43,  // 1EE43..1EE46; UNKNOWN
6626             0x1EE47,  // 1EE47       ; ARABIC
6627             0x1EE48,  // 1EE48       ; UNKNOWN
6628             0x1EE49,  // 1EE49       ; ARABIC
6629             0x1EE4A,  // 1EE4A       ; UNKNOWN
6630             0x1EE4B,  // 1EE4B       ; ARABIC
6631             0x1EE4C,  // 1EE4C       ; UNKNOWN
6632             0x1EE4D,  // 1EE4D..1EE4F; ARABIC
6633             0x1EE50,  // 1EE50       ; UNKNOWN
6634             0x1EE51,  // 1EE51..1EE52; ARABIC
6635             0x1EE53,  // 1EE53       ; UNKNOWN
6636             0x1EE54,  // 1EE54       ; ARABIC
6637             0x1EE55,  // 1EE55..1EE56; UNKNOWN
6638             0x1EE57,  // 1EE57       ; ARABIC
6639             0x1EE58,  // 1EE58       ; UNKNOWN
6640             0x1EE59,  // 1EE59       ; ARABIC
6641             0x1EE5A,  // 1EE5A       ; UNKNOWN
6642             0x1EE5B,  // 1EE5B       ; ARABIC
6643             0x1EE5C,  // 1EE5C       ; UNKNOWN
6644             0x1EE5D,  // 1EE5D       ; ARABIC
6645             0x1EE5E,  // 1EE5E       ; UNKNOWN
6646             0x1EE5F,  // 1EE5F       ; ARABIC
6647             0x1EE60,  // 1EE60       ; UNKNOWN
6648             0x1EE61,  // 1EE61..1EE62; ARABIC
6649             0x1EE63,  // 1EE63       ; UNKNOWN
6650             0x1EE64,  // 1EE64       ; ARABIC
6651             0x1EE65,  // 1EE65..1EE66; UNKNOWN
6652             0x1EE67,  // 1EE67..1EE6A; ARABIC
6653             0x1EE6B,  // 1EE6B       ; UNKNOWN
6654             0x1EE6C,  // 1EE6C..1EE72; ARABIC
6655             0x1EE73,  // 1EE73       ; UNKNOWN
6656             0x1EE74,  // 1EE74..1EE77; ARABIC
6657             0x1EE78,  // 1EE78       ; UNKNOWN
6658             0x1EE79,  // 1EE79..1EE7C; ARABIC
6659             0x1EE7D,  // 1EE7D       ; UNKNOWN
6660             0x1EE7E,  // 1EE7E       ; ARABIC
6661             0x1EE7F,  // 1EE7F       ; UNKNOWN
6662             0x1EE80,  // 1EE80..1EE89; ARABIC
6663             0x1EE8A,  // 1EE8A       ; UNKNOWN
6664             0x1EE8B,  // 1EE8B..1EE9B; ARABIC
6665             0x1EE9C,  // 1EE9C..1EEA0; UNKNOWN
6666             0x1EEA1,  // 1EEA1..1EEA3; ARABIC
6667             0x1EEA4,  // 1EEA4       ; UNKNOWN
6668             0x1EEA5,  // 1EEA5..1EEA9; ARABIC
6669             0x1EEAA,  // 1EEAA       ; UNKNOWN
6670             0x1EEAB,  // 1EEAB..1EEBB; ARABIC
6671             0x1EEBC,  // 1EEBC..1EEEF; UNKNOWN
6672             0x1EEF0,  // 1EEF0..1EEF1; ARABIC
6673             0x1EEF2,  // 1EEF2..1EFFF; UNKNOWN
6674             0x1F000,  // 1F000..1F02B; COMMON
6675             0x1F02C,  // 1F02C..1F02F; UNKNOWN
6676             0x1F030,  // 1F030..1F093; COMMON
6677             0x1F094,  // 1F094..1F09F; UNKNOWN
6678             0x1F0A0,  // 1F0A0..1F0AE; COMMON
6679             0x1F0AF,  // 1F0AF..1F0B0; UNKNOWN
6680             0x1F0B1,  // 1F0B1..1F0BF; COMMON
6681             0x1F0C0,  // 1F0C0       ; UNKNOWN
6682             0x1F0C1,  // 1F0C1..1F0CF; COMMON
6683             0x1F0D0,  // 1F0D0       ; UNKNOWN
6684             0x1F0D1,  // 1F0D1..1F0F5; COMMON
6685             0x1F0F6,  // 1F0F6..1F0FF; UNKNOWN
6686             0x1F100,  // 1F100..1F1AD; COMMON
6687             0x1F1AE,  // 1F1AE..1F1E5; UNKNOWN
6688             0x1F1E6,  // 1F1E6..1F1FF; COMMON
6689             0x1F200,  // 1F200       ; HIRAGANA
6690             0x1F201,  // 1F201..1F202; COMMON
6691             0x1F203,  // 1F203..1F20F; UNKNOWN
6692             0x1F210,  // 1F210..1F23B; COMMON
6693             0x1F23C,  // 1F23C..1F23F; UNKNOWN
6694             0x1F240,  // 1F240..1F248; COMMON
6695             0x1F249,  // 1F249..1F24F; UNKNOWN
6696             0x1F250,  // 1F250..1F251; COMMON
6697             0x1F252,  // 1F252..1F25F; UNKNOWN
6698             0x1F260,  // 1F260..1F265; COMMON
6699             0x1F266,  // 1F266..1F2FF; UNKNOWN
6700             0x1F300,  // 1F300..1F6D7; COMMON
6701             0x1F6D8,  // 1F6D8..1F6DF; UNKNOWN
6702             0x1F6E0,  // 1F6E0..1F6EC; COMMON
6703             0x1F6ED,  // 1F6ED..1F6EF; UNKNOWN
6704             0x1F6F0,  // 1F6F0..1F6FC; COMMON
6705             0x1F6FD,  // 1F6FD..1F6FF; UNKNOWN
6706             0x1F700,  // 1F700..1F773; COMMON
6707             0x1F774,  // 1F774..1F77F; UNKNOWN
6708             0x1F780,  // 1F780..1F7D8; COMMON
6709             0x1F7D9,  // 1F7D9..1F7DF; UNKNOWN
6710             0x1F7E0,  // 1F7E0..1F7EB; COMMON
6711             0x1F7EC,  // 1F7EC..1F7FF; UNKNOWN
6712             0x1F800,  // 1F800..1F80B; COMMON
6713             0x1F80C,  // 1F80C..1F80F; UNKNOWN
6714             0x1F810,  // 1F810..1F847; COMMON
6715             0x1F848,  // 1F848..1F84F; UNKNOWN
6716             0x1F850,  // 1F850..1F859; COMMON
6717             0x1F85A,  // 1F85A..1F85F; UNKNOWN
6718             0x1F860,  // 1F860..1F887; COMMON
6719             0x1F888,  // 1F888..1F88F; UNKNOWN
6720             0x1F890,  // 1F890..1F8AD; COMMON
6721             0x1F8AE,  // 1F8AE..1F8AF; UNKNOWN
6722             0x1F8B0,  // 1F8B0..1F8B1; COMMON
6723             0x1F8B2,  // 1F8B2..1F8FF; UNKNOWN
6724             0x1F900,  // 1F900..1F978; COMMON
6725             0x1F979,  // 1F979       ; UNKNOWN
6726             0x1F97A,  // 1F97A..1F9CB; COMMON
6727             0x1F9CC,  // 1F9CC       ; UNKNOWN
6728             0x1F9CD,  // 1F9CD..1FA53; COMMON
6729             0x1FA54,  // 1FA54..1FA5F; UNKNOWN
6730             0x1FA60,  // 1FA60..1FA6D; COMMON
6731             0x1FA6E,  // 1FA6E..1FA6F; UNKNOWN
6732             0x1FA70,  // 1FA70..1FA74; COMMON
6733             0x1FA75,  // 1FA75..1FA77; UNKNOWN
6734             0x1FA78,  // 1FA78..1FA7A; COMMON
6735             0x1FA7B,  // 1FA7B..1FA7F; UNKNOWN
6736             0x1FA80,  // 1FA80..1FA86; COMMON
6737             0x1FA87,  // 1FA87..1FA8F; UNKNOWN
6738             0x1FA90,  // 1FA90..1FAA8; COMMON
6739             0x1FAA9,  // 1FAA9..1FAAF; UNKNOWN
6740             0x1FAB0,  // 1FAB0..1FAB6; COMMON
6741             0x1FAB7,  // 1FAB7..1FABF; UNKNOWN
6742             0x1FAC0,  // 1FAC0..1FAC2; COMMON
6743             0x1FAC3,  // 1FAC3..1FACF; UNKNOWN
6744             0x1FAD0,  // 1FAD0..1FAD6; COMMON
6745             0x1FAD7,  // 1FAD7..1FAFF; UNKNOWN
6746             0x1FB00,  // 1FB00..1FB92; COMMON
6747             0x1FB93,  // 1FB93       ; UNKNOWN
6748             0x1FB94,  // 1FB94..1FBCA; COMMON
6749             0x1FBCB,  // 1FBCB..1FBEF; UNKNOWN
6750             0x1FBF0,  // 1FBF0..1FBF9; COMMON
6751             0x1FBFA,  // 1FBFA..1FFFF; UNKNOWN
6752             0x20000,  // 20000..2A6DD; HAN
6753             0x2A6DE,  // 2A6DE..2A6FF; UNKNOWN
6754             0x2A700,  // 2A700..2B734; HAN
6755             0x2B735,  // 2B735..2B73F; UNKNOWN
6756             0x2B740,  // 2B740..2B81D; HAN
6757             0x2B81E,  // 2B81E..2B81F; UNKNOWN
6758             0x2B820,  // 2B820..2CEA1; HAN
6759             0x2CEA2,  // 2CEA2..2CEAF; UNKNOWN
6760             0x2CEB0,  // 2CEB0..2EBE0; HAN
6761             0x2EBE1,  // 2EBE1..2F7FF; UNKNOWN
6762             0x2F800,  // 2F800..2FA1D; HAN
6763             0x2FA1E,  // 2FA1E..2FFFF; UNKNOWN
6764             0x30000,  // 30000..3134A; HAN
6765             0x3134B,  // 3134B..E0000; UNKNOWN
6766             0xE0001,  // E0001       ; COMMON
6767             0xE0002,  // E0002..E001F; UNKNOWN
6768             0xE0020,  // E0020..E007F; COMMON
6769             0xE0080,  // E0080..E00FF; UNKNOWN
6770             0xE0100,  // E0100..E01EF; INHERITED
6771             0xE01F0,  // E01F0..10FFFF; UNKNOWN
6772         };
6773 
6774         private static final UnicodeScript[] scripts = {
6775             COMMON,                   // 0000..0040
6776             LATIN,                    // 0041..005A
6777             COMMON,                   // 005B..0060
6778             LATIN,                    // 0061..007A
6779             COMMON,                   // 007B..00A9
6780             LATIN,                    // 00AA
6781             COMMON,                   // 00AB..00B9
6782             LATIN,                    // 00BA
6783             COMMON,                   // 00BB..00BF
6784             LATIN,                    // 00C0..00D6
6785             COMMON,                   // 00D7
6786             LATIN,                    // 00D8..00F6
6787             COMMON,                   // 00F7
6788             LATIN,                    // 00F8..02B8
6789             COMMON,                   // 02B9..02DF
6790             LATIN,                    // 02E0..02E4
6791             COMMON,                   // 02E5..02E9
6792             BOPOMOFO,                 // 02EA..02EB
6793             COMMON,                   // 02EC..02FF
6794             INHERITED,                // 0300..036F
6795             GREEK,                    // 0370..0373
6796             COMMON,                   // 0374
6797             GREEK,                    // 0375..0377
6798             UNKNOWN,                  // 0378..0379
6799             GREEK,                    // 037A..037D
6800             COMMON,                   // 037E
6801             GREEK,                    // 037F
6802             UNKNOWN,                  // 0380..0383
6803             GREEK,                    // 0384
6804             COMMON,                   // 0385
6805             GREEK,                    // 0386
6806             COMMON,                   // 0387
6807             GREEK,                    // 0388..038A
6808             UNKNOWN,                  // 038B
6809             GREEK,                    // 038C
6810             UNKNOWN,                  // 038D
6811             GREEK,                    // 038E..03A1
6812             UNKNOWN,                  // 03A2
6813             GREEK,                    // 03A3..03E1
6814             COPTIC,                   // 03E2..03EF
6815             GREEK,                    // 03F0..03FF
6816             CYRILLIC,                 // 0400..0484
6817             INHERITED,                // 0485..0486
6818             CYRILLIC,                 // 0487..052F
6819             UNKNOWN,                  // 0530
6820             ARMENIAN,                 // 0531..0556
6821             UNKNOWN,                  // 0557..0558
6822             ARMENIAN,                 // 0559..058A
6823             UNKNOWN,                  // 058B..058C
6824             ARMENIAN,                 // 058D..058F
6825             UNKNOWN,                  // 0590
6826             HEBREW,                   // 0591..05C7
6827             UNKNOWN,                  // 05C8..05CF
6828             HEBREW,                   // 05D0..05EA
6829             UNKNOWN,                  // 05EB..05EE
6830             HEBREW,                   // 05EF..05F4
6831             UNKNOWN,                  // 05F5..05FF
6832             ARABIC,                   // 0600..0604
6833             COMMON,                   // 0605
6834             ARABIC,                   // 0606..060B
6835             COMMON,                   // 060C
6836             ARABIC,                   // 060D..061A
6837             COMMON,                   // 061B
6838             ARABIC,                   // 061C
6839             UNKNOWN,                  // 061D
6840             ARABIC,                   // 061E
6841             COMMON,                   // 061F
6842             ARABIC,                   // 0620..063F
6843             COMMON,                   // 0640
6844             ARABIC,                   // 0641..064A
6845             INHERITED,                // 064B..0655
6846             ARABIC,                   // 0656..066F
6847             INHERITED,                // 0670
6848             ARABIC,                   // 0671..06DC
6849             COMMON,                   // 06DD
6850             ARABIC,                   // 06DE..06FF
6851             SYRIAC,                   // 0700..070D
6852             UNKNOWN,                  // 070E
6853             SYRIAC,                   // 070F..074A
6854             UNKNOWN,                  // 074B..074C
6855             SYRIAC,                   // 074D..074F
6856             ARABIC,                   // 0750..077F
6857             THAANA,                   // 0780..07B1
6858             UNKNOWN,                  // 07B2..07BF
6859             NKO,                      // 07C0..07FA
6860             UNKNOWN,                  // 07FB..07FC
6861             NKO,                      // 07FD..07FF
6862             SAMARITAN,                // 0800..082D
6863             UNKNOWN,                  // 082E..082F
6864             SAMARITAN,                // 0830..083E
6865             UNKNOWN,                  // 083F
6866             MANDAIC,                  // 0840..085B
6867             UNKNOWN,                  // 085C..085D
6868             MANDAIC,                  // 085E
6869             UNKNOWN,                  // 085F
6870             SYRIAC,                   // 0860..086A
6871             UNKNOWN,                  // 086B..089F
6872             ARABIC,                   // 08A0..08B4
6873             UNKNOWN,                  // 08B5
6874             ARABIC,                   // 08B6..08C7
6875             UNKNOWN,                  // 08C8..08D2
6876             ARABIC,                   // 08D3..08E1
6877             COMMON,                   // 08E2
6878             ARABIC,                   // 08E3..08FF
6879             DEVANAGARI,               // 0900..0950
6880             INHERITED,                // 0951..0954
6881             DEVANAGARI,               // 0955..0963
6882             COMMON,                   // 0964..0965
6883             DEVANAGARI,               // 0966..097F
6884             BENGALI,                  // 0980..0983
6885             UNKNOWN,                  // 0984
6886             BENGALI,                  // 0985..098C
6887             UNKNOWN,                  // 098D..098E
6888             BENGALI,                  // 098F..0990
6889             UNKNOWN,                  // 0991..0992
6890             BENGALI,                  // 0993..09A8
6891             UNKNOWN,                  // 09A9
6892             BENGALI,                  // 09AA..09B0
6893             UNKNOWN,                  // 09B1
6894             BENGALI,                  // 09B2
6895             UNKNOWN,                  // 09B3..09B5
6896             BENGALI,                  // 09B6..09B9
6897             UNKNOWN,                  // 09BA..09BB
6898             BENGALI,                  // 09BC..09C4
6899             UNKNOWN,                  // 09C5..09C6
6900             BENGALI,                  // 09C7..09C8
6901             UNKNOWN,                  // 09C9..09CA
6902             BENGALI,                  // 09CB..09CE
6903             UNKNOWN,                  // 09CF..09D6
6904             BENGALI,                  // 09D7
6905             UNKNOWN,                  // 09D8..09DB
6906             BENGALI,                  // 09DC..09DD
6907             UNKNOWN,                  // 09DE
6908             BENGALI,                  // 09DF..09E3
6909             UNKNOWN,                  // 09E4..09E5
6910             BENGALI,                  // 09E6..09FE
6911             UNKNOWN,                  // 09FF..0A00
6912             GURMUKHI,                 // 0A01..0A03
6913             UNKNOWN,                  // 0A04
6914             GURMUKHI,                 // 0A05..0A0A
6915             UNKNOWN,                  // 0A0B..0A0E
6916             GURMUKHI,                 // 0A0F..0A10
6917             UNKNOWN,                  // 0A11..0A12
6918             GURMUKHI,                 // 0A13..0A28
6919             UNKNOWN,                  // 0A29
6920             GURMUKHI,                 // 0A2A..0A30
6921             UNKNOWN,                  // 0A31
6922             GURMUKHI,                 // 0A32..0A33
6923             UNKNOWN,                  // 0A34
6924             GURMUKHI,                 // 0A35..0A36
6925             UNKNOWN,                  // 0A37
6926             GURMUKHI,                 // 0A38..0A39
6927             UNKNOWN,                  // 0A3A..0A3B
6928             GURMUKHI,                 // 0A3C
6929             UNKNOWN,                  // 0A3D
6930             GURMUKHI,                 // 0A3E..0A42
6931             UNKNOWN,                  // 0A43..0A46
6932             GURMUKHI,                 // 0A47..0A48
6933             UNKNOWN,                  // 0A49..0A4A
6934             GURMUKHI,                 // 0A4B..0A4D
6935             UNKNOWN,                  // 0A4E..0A50
6936             GURMUKHI,                 // 0A51
6937             UNKNOWN,                  // 0A52..0A58
6938             GURMUKHI,                 // 0A59..0A5C
6939             UNKNOWN,                  // 0A5D
6940             GURMUKHI,                 // 0A5E
6941             UNKNOWN,                  // 0A5F..0A65
6942             GURMUKHI,                 // 0A66..0A76
6943             UNKNOWN,                  // 0A77..0A80
6944             GUJARATI,                 // 0A81..0A83
6945             UNKNOWN,                  // 0A84
6946             GUJARATI,                 // 0A85..0A8D
6947             UNKNOWN,                  // 0A8E
6948             GUJARATI,                 // 0A8F..0A91
6949             UNKNOWN,                  // 0A92
6950             GUJARATI,                 // 0A93..0AA8
6951             UNKNOWN,                  // 0AA9
6952             GUJARATI,                 // 0AAA..0AB0
6953             UNKNOWN,                  // 0AB1
6954             GUJARATI,                 // 0AB2..0AB3
6955             UNKNOWN,                  // 0AB4
6956             GUJARATI,                 // 0AB5..0AB9
6957             UNKNOWN,                  // 0ABA..0ABB
6958             GUJARATI,                 // 0ABC..0AC5
6959             UNKNOWN,                  // 0AC6
6960             GUJARATI,                 // 0AC7..0AC9
6961             UNKNOWN,                  // 0ACA
6962             GUJARATI,                 // 0ACB..0ACD
6963             UNKNOWN,                  // 0ACE..0ACF
6964             GUJARATI,                 // 0AD0
6965             UNKNOWN,                  // 0AD1..0ADF
6966             GUJARATI,                 // 0AE0..0AE3
6967             UNKNOWN,                  // 0AE4..0AE5
6968             GUJARATI,                 // 0AE6..0AF1
6969             UNKNOWN,                  // 0AF2..0AF8
6970             GUJARATI,                 // 0AF9..0AFF
6971             UNKNOWN,                  // 0B00
6972             ORIYA,                    // 0B01..0B03
6973             UNKNOWN,                  // 0B04
6974             ORIYA,                    // 0B05..0B0C
6975             UNKNOWN,                  // 0B0D..0B0E
6976             ORIYA,                    // 0B0F..0B10
6977             UNKNOWN,                  // 0B11..0B12
6978             ORIYA,                    // 0B13..0B28
6979             UNKNOWN,                  // 0B29
6980             ORIYA,                    // 0B2A..0B30
6981             UNKNOWN,                  // 0B31
6982             ORIYA,                    // 0B32..0B33
6983             UNKNOWN,                  // 0B34
6984             ORIYA,                    // 0B35..0B39
6985             UNKNOWN,                  // 0B3A..0B3B
6986             ORIYA,                    // 0B3C..0B44
6987             UNKNOWN,                  // 0B45..0B46
6988             ORIYA,                    // 0B47..0B48
6989             UNKNOWN,                  // 0B49..0B4A
6990             ORIYA,                    // 0B4B..0B4D
6991             UNKNOWN,                  // 0B4E..0B54
6992             ORIYA,                    // 0B55..0B57
6993             UNKNOWN,                  // 0B58..0B5B
6994             ORIYA,                    // 0B5C..0B5D
6995             UNKNOWN,                  // 0B5E
6996             ORIYA,                    // 0B5F..0B63
6997             UNKNOWN,                  // 0B64..0B65
6998             ORIYA,                    // 0B66..0B77
6999             UNKNOWN,                  // 0B78..0B81
7000             TAMIL,                    // 0B82..0B83
7001             UNKNOWN,                  // 0B84
7002             TAMIL,                    // 0B85..0B8A
7003             UNKNOWN,                  // 0B8B..0B8D
7004             TAMIL,                    // 0B8E..0B90
7005             UNKNOWN,                  // 0B91
7006             TAMIL,                    // 0B92..0B95
7007             UNKNOWN,                  // 0B96..0B98
7008             TAMIL,                    // 0B99..0B9A
7009             UNKNOWN,                  // 0B9B
7010             TAMIL,                    // 0B9C
7011             UNKNOWN,                  // 0B9D
7012             TAMIL,                    // 0B9E..0B9F
7013             UNKNOWN,                  // 0BA0..0BA2
7014             TAMIL,                    // 0BA3..0BA4
7015             UNKNOWN,                  // 0BA5..0BA7
7016             TAMIL,                    // 0BA8..0BAA
7017             UNKNOWN,                  // 0BAB..0BAD
7018             TAMIL,                    // 0BAE..0BB9
7019             UNKNOWN,                  // 0BBA..0BBD
7020             TAMIL,                    // 0BBE..0BC2
7021             UNKNOWN,                  // 0BC3..0BC5
7022             TAMIL,                    // 0BC6..0BC8
7023             UNKNOWN,                  // 0BC9
7024             TAMIL,                    // 0BCA..0BCD
7025             UNKNOWN,                  // 0BCE..0BCF
7026             TAMIL,                    // 0BD0
7027             UNKNOWN,                  // 0BD1..0BD6
7028             TAMIL,                    // 0BD7
7029             UNKNOWN,                  // 0BD8..0BE5
7030             TAMIL,                    // 0BE6..0BFA
7031             UNKNOWN,                  // 0BFB..0BFF
7032             TELUGU,                   // 0C00..0C0C
7033             UNKNOWN,                  // 0C0D
7034             TELUGU,                   // 0C0E..0C10
7035             UNKNOWN,                  // 0C11
7036             TELUGU,                   // 0C12..0C28
7037             UNKNOWN,                  // 0C29
7038             TELUGU,                   // 0C2A..0C39
7039             UNKNOWN,                  // 0C3A..0C3C
7040             TELUGU,                   // 0C3D..0C44
7041             UNKNOWN,                  // 0C45
7042             TELUGU,                   // 0C46..0C48
7043             UNKNOWN,                  // 0C49
7044             TELUGU,                   // 0C4A..0C4D
7045             UNKNOWN,                  // 0C4E..0C54
7046             TELUGU,                   // 0C55..0C56
7047             UNKNOWN,                  // 0C57
7048             TELUGU,                   // 0C58..0C5A
7049             UNKNOWN,                  // 0C5B..0C5F
7050             TELUGU,                   // 0C60..0C63
7051             UNKNOWN,                  // 0C64..0C65
7052             TELUGU,                   // 0C66..0C6F
7053             UNKNOWN,                  // 0C70..0C76
7054             TELUGU,                   // 0C77..0C7F
7055             KANNADA,                  // 0C80..0C8C
7056             UNKNOWN,                  // 0C8D
7057             KANNADA,                  // 0C8E..0C90
7058             UNKNOWN,                  // 0C91
7059             KANNADA,                  // 0C92..0CA8
7060             UNKNOWN,                  // 0CA9
7061             KANNADA,                  // 0CAA..0CB3
7062             UNKNOWN,                  // 0CB4
7063             KANNADA,                  // 0CB5..0CB9
7064             UNKNOWN,                  // 0CBA..0CBB
7065             KANNADA,                  // 0CBC..0CC4
7066             UNKNOWN,                  // 0CC5
7067             KANNADA,                  // 0CC6..0CC8
7068             UNKNOWN,                  // 0CC9
7069             KANNADA,                  // 0CCA..0CCD
7070             UNKNOWN,                  // 0CCE..0CD4
7071             KANNADA,                  // 0CD5..0CD6
7072             UNKNOWN,                  // 0CD7..0CDD
7073             KANNADA,                  // 0CDE
7074             UNKNOWN,                  // 0CDF
7075             KANNADA,                  // 0CE0..0CE3
7076             UNKNOWN,                  // 0CE4..0CE5
7077             KANNADA,                  // 0CE6..0CEF
7078             UNKNOWN,                  // 0CF0
7079             KANNADA,                  // 0CF1..0CF2
7080             UNKNOWN,                  // 0CF3..0CFF
7081             MALAYALAM,                // 0D00..0D0C
7082             UNKNOWN,                  // 0D0D
7083             MALAYALAM,                // 0D0E..0D10
7084             UNKNOWN,                  // 0D11
7085             MALAYALAM,                // 0D12..0D44
7086             UNKNOWN,                  // 0D45
7087             MALAYALAM,                // 0D46..0D48
7088             UNKNOWN,                  // 0D49
7089             MALAYALAM,                // 0D4A..0D4F
7090             UNKNOWN,                  // 0D50..0D53
7091             MALAYALAM,                // 0D54..0D63
7092             UNKNOWN,                  // 0D64..0D65
7093             MALAYALAM,                // 0D66..0D7F
7094             UNKNOWN,                  // 0D80
7095             SINHALA,                  // 0D81..0D83
7096             UNKNOWN,                  // 0D84
7097             SINHALA,                  // 0D85..0D96
7098             UNKNOWN,                  // 0D97..0D99
7099             SINHALA,                  // 0D9A..0DB1
7100             UNKNOWN,                  // 0DB2
7101             SINHALA,                  // 0DB3..0DBB
7102             UNKNOWN,                  // 0DBC
7103             SINHALA,                  // 0DBD
7104             UNKNOWN,                  // 0DBE..0DBF
7105             SINHALA,                  // 0DC0..0DC6
7106             UNKNOWN,                  // 0DC7..0DC9
7107             SINHALA,                  // 0DCA
7108             UNKNOWN,                  // 0DCB..0DCE
7109             SINHALA,                  // 0DCF..0DD4
7110             UNKNOWN,                  // 0DD5
7111             SINHALA,                  // 0DD6
7112             UNKNOWN,                  // 0DD7
7113             SINHALA,                  // 0DD8..0DDF
7114             UNKNOWN,                  // 0DE0..0DE5
7115             SINHALA,                  // 0DE6..0DEF
7116             UNKNOWN,                  // 0DF0..0DF1
7117             SINHALA,                  // 0DF2..0DF4
7118             UNKNOWN,                  // 0DF5..0E00
7119             THAI,                     // 0E01..0E3A
7120             UNKNOWN,                  // 0E3B..0E3E
7121             COMMON,                   // 0E3F
7122             THAI,                     // 0E40..0E5B
7123             UNKNOWN,                  // 0E5C..0E80
7124             LAO,                      // 0E81..0E82
7125             UNKNOWN,                  // 0E83
7126             LAO,                      // 0E84
7127             UNKNOWN,                  // 0E85
7128             LAO,                      // 0E86..0E8A
7129             UNKNOWN,                  // 0E8B
7130             LAO,                      // 0E8C..0EA3
7131             UNKNOWN,                  // 0EA4
7132             LAO,                      // 0EA5
7133             UNKNOWN,                  // 0EA6
7134             LAO,                      // 0EA7..0EBD
7135             UNKNOWN,                  // 0EBE..0EBF
7136             LAO,                      // 0EC0..0EC4
7137             UNKNOWN,                  // 0EC5
7138             LAO,                      // 0EC6
7139             UNKNOWN,                  // 0EC7
7140             LAO,                      // 0EC8..0ECD
7141             UNKNOWN,                  // 0ECE..0ECF
7142             LAO,                      // 0ED0..0ED9
7143             UNKNOWN,                  // 0EDA..0EDB
7144             LAO,                      // 0EDC..0EDF
7145             UNKNOWN,                  // 0EE0..0EFF
7146             TIBETAN,                  // 0F00..0F47
7147             UNKNOWN,                  // 0F48
7148             TIBETAN,                  // 0F49..0F6C
7149             UNKNOWN,                  // 0F6D..0F70
7150             TIBETAN,                  // 0F71..0F97
7151             UNKNOWN,                  // 0F98
7152             TIBETAN,                  // 0F99..0FBC
7153             UNKNOWN,                  // 0FBD
7154             TIBETAN,                  // 0FBE..0FCC
7155             UNKNOWN,                  // 0FCD
7156             TIBETAN,                  // 0FCE..0FD4
7157             COMMON,                   // 0FD5..0FD8
7158             TIBETAN,                  // 0FD9..0FDA
7159             UNKNOWN,                  // 0FDB..0FFF
7160             MYANMAR,                  // 1000..109F
7161             GEORGIAN,                 // 10A0..10C5
7162             UNKNOWN,                  // 10C6
7163             GEORGIAN,                 // 10C7
7164             UNKNOWN,                  // 10C8..10CC
7165             GEORGIAN,                 // 10CD
7166             UNKNOWN,                  // 10CE..10CF
7167             GEORGIAN,                 // 10D0..10FA
7168             COMMON,                   // 10FB
7169             GEORGIAN,                 // 10FC..10FF
7170             HANGUL,                   // 1100..11FF
7171             ETHIOPIC,                 // 1200..1248
7172             UNKNOWN,                  // 1249
7173             ETHIOPIC,                 // 124A..124D
7174             UNKNOWN,                  // 124E..124F
7175             ETHIOPIC,                 // 1250..1256
7176             UNKNOWN,                  // 1257
7177             ETHIOPIC,                 // 1258
7178             UNKNOWN,                  // 1259
7179             ETHIOPIC,                 // 125A..125D
7180             UNKNOWN,                  // 125E..125F
7181             ETHIOPIC,                 // 1260..1288
7182             UNKNOWN,                  // 1289
7183             ETHIOPIC,                 // 128A..128D
7184             UNKNOWN,                  // 128E..128F
7185             ETHIOPIC,                 // 1290..12B0
7186             UNKNOWN,                  // 12B1
7187             ETHIOPIC,                 // 12B2..12B5
7188             UNKNOWN,                  // 12B6..12B7
7189             ETHIOPIC,                 // 12B8..12BE
7190             UNKNOWN,                  // 12BF
7191             ETHIOPIC,                 // 12C0
7192             UNKNOWN,                  // 12C1
7193             ETHIOPIC,                 // 12C2..12C5
7194             UNKNOWN,                  // 12C6..12C7
7195             ETHIOPIC,                 // 12C8..12D6
7196             UNKNOWN,                  // 12D7
7197             ETHIOPIC,                 // 12D8..1310
7198             UNKNOWN,                  // 1311
7199             ETHIOPIC,                 // 1312..1315
7200             UNKNOWN,                  // 1316..1317
7201             ETHIOPIC,                 // 1318..135A
7202             UNKNOWN,                  // 135B..135C
7203             ETHIOPIC,                 // 135D..137C
7204             UNKNOWN,                  // 137D..137F
7205             ETHIOPIC,                 // 1380..1399
7206             UNKNOWN,                  // 139A..139F
7207             CHEROKEE,                 // 13A0..13F5
7208             UNKNOWN,                  // 13F6..13F7
7209             CHEROKEE,                 // 13F8..13FD
7210             UNKNOWN,                  // 13FE..13FF
7211             CANADIAN_ABORIGINAL,      // 1400..167F
7212             OGHAM,                    // 1680..169C
7213             UNKNOWN,                  // 169D..169F
7214             RUNIC,                    // 16A0..16EA
7215             COMMON,                   // 16EB..16ED
7216             RUNIC,                    // 16EE..16F8
7217             UNKNOWN,                  // 16F9..16FF
7218             TAGALOG,                  // 1700..170C
7219             UNKNOWN,                  // 170D
7220             TAGALOG,                  // 170E..1714
7221             UNKNOWN,                  // 1715..171F
7222             HANUNOO,                  // 1720..1734
7223             COMMON,                   // 1735..1736
7224             UNKNOWN,                  // 1737..173F
7225             BUHID,                    // 1740..1753
7226             UNKNOWN,                  // 1754..175F
7227             TAGBANWA,                 // 1760..176C
7228             UNKNOWN,                  // 176D
7229             TAGBANWA,                 // 176E..1770
7230             UNKNOWN,                  // 1771
7231             TAGBANWA,                 // 1772..1773
7232             UNKNOWN,                  // 1774..177F
7233             KHMER,                    // 1780..17DD
7234             UNKNOWN,                  // 17DE..17DF
7235             KHMER,                    // 17E0..17E9
7236             UNKNOWN,                  // 17EA..17EF
7237             KHMER,                    // 17F0..17F9
7238             UNKNOWN,                  // 17FA..17FF
7239             MONGOLIAN,                // 1800..1801
7240             COMMON,                   // 1802..1803
7241             MONGOLIAN,                // 1804
7242             COMMON,                   // 1805
7243             MONGOLIAN,                // 1806..180E
7244             UNKNOWN,                  // 180F
7245             MONGOLIAN,                // 1810..1819
7246             UNKNOWN,                  // 181A..181F
7247             MONGOLIAN,                // 1820..1878
7248             UNKNOWN,                  // 1879..187F
7249             MONGOLIAN,                // 1880..18AA
7250             UNKNOWN,                  // 18AB..18AF
7251             CANADIAN_ABORIGINAL,      // 18B0..18F5
7252             UNKNOWN,                  // 18F6..18FF
7253             LIMBU,                    // 1900..191E
7254             UNKNOWN,                  // 191F
7255             LIMBU,                    // 1920..192B
7256             UNKNOWN,                  // 192C..192F
7257             LIMBU,                    // 1930..193B
7258             UNKNOWN,                  // 193C..193F
7259             LIMBU,                    // 1940
7260             UNKNOWN,                  // 1941..1943
7261             LIMBU,                    // 1944..194F
7262             TAI_LE,                   // 1950..196D
7263             UNKNOWN,                  // 196E..196F
7264             TAI_LE,                   // 1970..1974
7265             UNKNOWN,                  // 1975..197F
7266             NEW_TAI_LUE,              // 1980..19AB
7267             UNKNOWN,                  // 19AC..19AF
7268             NEW_TAI_LUE,              // 19B0..19C9
7269             UNKNOWN,                  // 19CA..19CF
7270             NEW_TAI_LUE,              // 19D0..19DA
7271             UNKNOWN,                  // 19DB..19DD
7272             NEW_TAI_LUE,              // 19DE..19DF
7273             KHMER,                    // 19E0..19FF
7274             BUGINESE,                 // 1A00..1A1B
7275             UNKNOWN,                  // 1A1C..1A1D
7276             BUGINESE,                 // 1A1E..1A1F
7277             TAI_THAM,                 // 1A20..1A5E
7278             UNKNOWN,                  // 1A5F
7279             TAI_THAM,                 // 1A60..1A7C
7280             UNKNOWN,                  // 1A7D..1A7E
7281             TAI_THAM,                 // 1A7F..1A89
7282             UNKNOWN,                  // 1A8A..1A8F
7283             TAI_THAM,                 // 1A90..1A99
7284             UNKNOWN,                  // 1A9A..1A9F
7285             TAI_THAM,                 // 1AA0..1AAD
7286             UNKNOWN,                  // 1AAE..1AAF
7287             INHERITED,                // 1AB0..1AC0
7288             UNKNOWN,                  // 1AC1..1AFF
7289             BALINESE,                 // 1B00..1B4B
7290             UNKNOWN,                  // 1B4C..1B4F
7291             BALINESE,                 // 1B50..1B7C
7292             UNKNOWN,                  // 1B7D..1B7F
7293             SUNDANESE,                // 1B80..1BBF
7294             BATAK,                    // 1BC0..1BF3
7295             UNKNOWN,                  // 1BF4..1BFB
7296             BATAK,                    // 1BFC..1BFF
7297             LEPCHA,                   // 1C00..1C37
7298             UNKNOWN,                  // 1C38..1C3A
7299             LEPCHA,                   // 1C3B..1C49
7300             UNKNOWN,                  // 1C4A..1C4C
7301             LEPCHA,                   // 1C4D..1C4F
7302             OL_CHIKI,                 // 1C50..1C7F
7303             CYRILLIC,                 // 1C80..1C88
7304             UNKNOWN,                  // 1C89..1C8F
7305             GEORGIAN,                 // 1C90..1CBA
7306             UNKNOWN,                  // 1CBB..1CBC
7307             GEORGIAN,                 // 1CBD..1CBF
7308             SUNDANESE,                // 1CC0..1CC7
7309             UNKNOWN,                  // 1CC8..1CCF
7310             INHERITED,                // 1CD0..1CD2
7311             COMMON,                   // 1CD3
7312             INHERITED,                // 1CD4..1CE0
7313             COMMON,                   // 1CE1
7314             INHERITED,                // 1CE2..1CE8
7315             COMMON,                   // 1CE9..1CEC
7316             INHERITED,                // 1CED
7317             COMMON,                   // 1CEE..1CF3
7318             INHERITED,                // 1CF4
7319             COMMON,                   // 1CF5..1CF7
7320             INHERITED,                // 1CF8..1CF9
7321             COMMON,                   // 1CFA
7322             UNKNOWN,                  // 1CFB..1CFF
7323             LATIN,                    // 1D00..1D25
7324             GREEK,                    // 1D26..1D2A
7325             CYRILLIC,                 // 1D2B
7326             LATIN,                    // 1D2C..1D5C
7327             GREEK,                    // 1D5D..1D61
7328             LATIN,                    // 1D62..1D65
7329             GREEK,                    // 1D66..1D6A
7330             LATIN,                    // 1D6B..1D77
7331             CYRILLIC,                 // 1D78
7332             LATIN,                    // 1D79..1DBE
7333             GREEK,                    // 1DBF
7334             INHERITED,                // 1DC0..1DF9
7335             UNKNOWN,                  // 1DFA
7336             INHERITED,                // 1DFB..1DFF
7337             LATIN,                    // 1E00..1EFF
7338             GREEK,                    // 1F00..1F15
7339             UNKNOWN,                  // 1F16..1F17
7340             GREEK,                    // 1F18..1F1D
7341             UNKNOWN,                  // 1F1E..1F1F
7342             GREEK,                    // 1F20..1F45
7343             UNKNOWN,                  // 1F46..1F47
7344             GREEK,                    // 1F48..1F4D
7345             UNKNOWN,                  // 1F4E..1F4F
7346             GREEK,                    // 1F50..1F57
7347             UNKNOWN,                  // 1F58
7348             GREEK,                    // 1F59
7349             UNKNOWN,                  // 1F5A
7350             GREEK,                    // 1F5B
7351             UNKNOWN,                  // 1F5C
7352             GREEK,                    // 1F5D
7353             UNKNOWN,                  // 1F5E
7354             GREEK,                    // 1F5F..1F7D
7355             UNKNOWN,                  // 1F7E..1F7F
7356             GREEK,                    // 1F80..1FB4
7357             UNKNOWN,                  // 1FB5
7358             GREEK,                    // 1FB6..1FC4
7359             UNKNOWN,                  // 1FC5
7360             GREEK,                    // 1FC6..1FD3
7361             UNKNOWN,                  // 1FD4..1FD5
7362             GREEK,                    // 1FD6..1FDB
7363             UNKNOWN,                  // 1FDC
7364             GREEK,                    // 1FDD..1FEF
7365             UNKNOWN,                  // 1FF0..1FF1
7366             GREEK,                    // 1FF2..1FF4
7367             UNKNOWN,                  // 1FF5
7368             GREEK,                    // 1FF6..1FFE
7369             UNKNOWN,                  // 1FFF
7370             COMMON,                   // 2000..200B
7371             INHERITED,                // 200C..200D
7372             COMMON,                   // 200E..2064
7373             UNKNOWN,                  // 2065
7374             COMMON,                   // 2066..2070
7375             LATIN,                    // 2071
7376             UNKNOWN,                  // 2072..2073
7377             COMMON,                   // 2074..207E
7378             LATIN,                    // 207F
7379             COMMON,                   // 2080..208E
7380             UNKNOWN,                  // 208F
7381             LATIN,                    // 2090..209C
7382             UNKNOWN,                  // 209D..209F
7383             COMMON,                   // 20A0..20BF
7384             UNKNOWN,                  // 20C0..20CF
7385             INHERITED,                // 20D0..20F0
7386             UNKNOWN,                  // 20F1..20FF
7387             COMMON,                   // 2100..2125
7388             GREEK,                    // 2126
7389             COMMON,                   // 2127..2129
7390             LATIN,                    // 212A..212B
7391             COMMON,                   // 212C..2131
7392             LATIN,                    // 2132
7393             COMMON,                   // 2133..214D
7394             LATIN,                    // 214E
7395             COMMON,                   // 214F..215F
7396             LATIN,                    // 2160..2188
7397             COMMON,                   // 2189..218B
7398             UNKNOWN,                  // 218C..218F
7399             COMMON,                   // 2190..2426
7400             UNKNOWN,                  // 2427..243F
7401             COMMON,                   // 2440..244A
7402             UNKNOWN,                  // 244B..245F
7403             COMMON,                   // 2460..27FF
7404             BRAILLE,                  // 2800..28FF
7405             COMMON,                   // 2900..2B73
7406             UNKNOWN,                  // 2B74..2B75
7407             COMMON,                   // 2B76..2B95
7408             UNKNOWN,                  // 2B96
7409             COMMON,                   // 2B97..2BFF
7410             GLAGOLITIC,               // 2C00..2C2E
7411             UNKNOWN,                  // 2C2F
7412             GLAGOLITIC,               // 2C30..2C5E
7413             UNKNOWN,                  // 2C5F
7414             LATIN,                    // 2C60..2C7F
7415             COPTIC,                   // 2C80..2CF3
7416             UNKNOWN,                  // 2CF4..2CF8
7417             COPTIC,                   // 2CF9..2CFF
7418             GEORGIAN,                 // 2D00..2D25
7419             UNKNOWN,                  // 2D26
7420             GEORGIAN,                 // 2D27
7421             UNKNOWN,                  // 2D28..2D2C
7422             GEORGIAN,                 // 2D2D
7423             UNKNOWN,                  // 2D2E..2D2F
7424             TIFINAGH,                 // 2D30..2D67
7425             UNKNOWN,                  // 2D68..2D6E
7426             TIFINAGH,                 // 2D6F..2D70
7427             UNKNOWN,                  // 2D71..2D7E
7428             TIFINAGH,                 // 2D7F
7429             ETHIOPIC,                 // 2D80..2D96
7430             UNKNOWN,                  // 2D97..2D9F
7431             ETHIOPIC,                 // 2DA0..2DA6
7432             UNKNOWN,                  // 2DA7
7433             ETHIOPIC,                 // 2DA8..2DAE
7434             UNKNOWN,                  // 2DAF
7435             ETHIOPIC,                 // 2DB0..2DB6
7436             UNKNOWN,                  // 2DB7
7437             ETHIOPIC,                 // 2DB8..2DBE
7438             UNKNOWN,                  // 2DBF
7439             ETHIOPIC,                 // 2DC0..2DC6
7440             UNKNOWN,                  // 2DC7
7441             ETHIOPIC,                 // 2DC8..2DCE
7442             UNKNOWN,                  // 2DCF
7443             ETHIOPIC,                 // 2DD0..2DD6
7444             UNKNOWN,                  // 2DD7
7445             ETHIOPIC,                 // 2DD8..2DDE
7446             UNKNOWN,                  // 2DDF
7447             CYRILLIC,                 // 2DE0..2DFF
7448             COMMON,                   // 2E00..2E52
7449             UNKNOWN,                  // 2E53..2E7F
7450             HAN,                      // 2E80..2E99
7451             UNKNOWN,                  // 2E9A
7452             HAN,                      // 2E9B..2EF3
7453             UNKNOWN,                  // 2EF4..2EFF
7454             HAN,                      // 2F00..2FD5
7455             UNKNOWN,                  // 2FD6..2FEF
7456             COMMON,                   // 2FF0..2FFB
7457             UNKNOWN,                  // 2FFC..2FFF
7458             COMMON,                   // 3000..3004
7459             HAN,                      // 3005
7460             COMMON,                   // 3006
7461             HAN,                      // 3007
7462             COMMON,                   // 3008..3020
7463             HAN,                      // 3021..3029
7464             INHERITED,                // 302A..302D
7465             HANGUL,                   // 302E..302F
7466             COMMON,                   // 3030..3037
7467             HAN,                      // 3038..303B
7468             COMMON,                   // 303C..303F
7469             UNKNOWN,                  // 3040
7470             HIRAGANA,                 // 3041..3096
7471             UNKNOWN,                  // 3097..3098
7472             INHERITED,                // 3099..309A
7473             COMMON,                   // 309B..309C
7474             HIRAGANA,                 // 309D..309F
7475             COMMON,                   // 30A0
7476             KATAKANA,                 // 30A1..30FA
7477             COMMON,                   // 30FB..30FC
7478             KATAKANA,                 // 30FD..30FF
7479             UNKNOWN,                  // 3100..3104
7480             BOPOMOFO,                 // 3105..312F
7481             UNKNOWN,                  // 3130
7482             HANGUL,                   // 3131..318E
7483             UNKNOWN,                  // 318F
7484             COMMON,                   // 3190..319F
7485             BOPOMOFO,                 // 31A0..31BF
7486             COMMON,                   // 31C0..31E3
7487             UNKNOWN,                  // 31E4..31EF
7488             KATAKANA,                 // 31F0..31FF
7489             HANGUL,                   // 3200..321E
7490             UNKNOWN,                  // 321F
7491             COMMON,                   // 3220..325F
7492             HANGUL,                   // 3260..327E
7493             COMMON,                   // 327F..32CF
7494             KATAKANA,                 // 32D0..32FE
7495             COMMON,                   // 32FF
7496             KATAKANA,                 // 3300..3357
7497             COMMON,                   // 3358..33FF
7498             HAN,                      // 3400..4DBF
7499             COMMON,                   // 4DC0..4DFF
7500             HAN,                      // 4E00..9FFC
7501             UNKNOWN,                  // 9FFD..9FFF
7502             YI,                       // A000..A48C
7503             UNKNOWN,                  // A48D..A48F
7504             YI,                       // A490..A4C6
7505             UNKNOWN,                  // A4C7..A4CF
7506             LISU,                     // A4D0..A4FF
7507             VAI,                      // A500..A62B
7508             UNKNOWN,                  // A62C..A63F
7509             CYRILLIC,                 // A640..A69F
7510             BAMUM,                    // A6A0..A6F7
7511             UNKNOWN,                  // A6F8..A6FF
7512             COMMON,                   // A700..A721
7513             LATIN,                    // A722..A787
7514             COMMON,                   // A788..A78A
7515             LATIN,                    // A78B..A7BF
7516             UNKNOWN,                  // A7C0..A7C1
7517             LATIN,                    // A7C2..A7CA
7518             UNKNOWN,                  // A7CB..A7F4
7519             LATIN,                    // A7F5..A7FF
7520             SYLOTI_NAGRI,             // A800..A82C
7521             UNKNOWN,                  // A82D..A82F
7522             COMMON,                   // A830..A839
7523             UNKNOWN,                  // A83A..A83F
7524             PHAGS_PA,                 // A840..A877
7525             UNKNOWN,                  // A878..A87F
7526             SAURASHTRA,               // A880..A8C5
7527             UNKNOWN,                  // A8C6..A8CD
7528             SAURASHTRA,               // A8CE..A8D9
7529             UNKNOWN,                  // A8DA..A8DF
7530             DEVANAGARI,               // A8E0..A8FF
7531             KAYAH_LI,                 // A900..A92D
7532             COMMON,                   // A92E
7533             KAYAH_LI,                 // A92F
7534             REJANG,                   // A930..A953
7535             UNKNOWN,                  // A954..A95E
7536             REJANG,                   // A95F
7537             HANGUL,                   // A960..A97C
7538             UNKNOWN,                  // A97D..A97F
7539             JAVANESE,                 // A980..A9CD
7540             UNKNOWN,                  // A9CE
7541             COMMON,                   // A9CF
7542             JAVANESE,                 // A9D0..A9D9
7543             UNKNOWN,                  // A9DA..A9DD
7544             JAVANESE,                 // A9DE..A9DF
7545             MYANMAR,                  // A9E0..A9FE
7546             UNKNOWN,                  // A9FF
7547             CHAM,                     // AA00..AA36
7548             UNKNOWN,                  // AA37..AA3F
7549             CHAM,                     // AA40..AA4D
7550             UNKNOWN,                  // AA4E..AA4F
7551             CHAM,                     // AA50..AA59
7552             UNKNOWN,                  // AA5A..AA5B
7553             CHAM,                     // AA5C..AA5F
7554             MYANMAR,                  // AA60..AA7F
7555             TAI_VIET,                 // AA80..AAC2
7556             UNKNOWN,                  // AAC3..AADA
7557             TAI_VIET,                 // AADB..AADF
7558             MEETEI_MAYEK,             // AAE0..AAF6
7559             UNKNOWN,                  // AAF7..AB00
7560             ETHIOPIC,                 // AB01..AB06
7561             UNKNOWN,                  // AB07..AB08
7562             ETHIOPIC,                 // AB09..AB0E
7563             UNKNOWN,                  // AB0F..AB10
7564             ETHIOPIC,                 // AB11..AB16
7565             UNKNOWN,                  // AB17..AB1F
7566             ETHIOPIC,                 // AB20..AB26
7567             UNKNOWN,                  // AB27
7568             ETHIOPIC,                 // AB28..AB2E
7569             UNKNOWN,                  // AB2F
7570             LATIN,                    // AB30..AB5A
7571             COMMON,                   // AB5B
7572             LATIN,                    // AB5C..AB64
7573             GREEK,                    // AB65
7574             LATIN,                    // AB66..AB69
7575             COMMON,                   // AB6A..AB6B
7576             UNKNOWN,                  // AB6C..AB6F
7577             CHEROKEE,                 // AB70..ABBF
7578             MEETEI_MAYEK,             // ABC0..ABED
7579             UNKNOWN,                  // ABEE..ABEF
7580             MEETEI_MAYEK,             // ABF0..ABF9
7581             UNKNOWN,                  // ABFA..ABFF
7582             HANGUL,                   // AC00..D7A3
7583             UNKNOWN,                  // D7A4..D7AF
7584             HANGUL,                   // D7B0..D7C6
7585             UNKNOWN,                  // D7C7..D7CA
7586             HANGUL,                   // D7CB..D7FB
7587             UNKNOWN,                  // D7FC..F8FF
7588             HAN,                      // F900..FA6D
7589             UNKNOWN,                  // FA6E..FA6F
7590             HAN,                      // FA70..FAD9
7591             UNKNOWN,                  // FADA..FAFF
7592             LATIN,                    // FB00..FB06
7593             UNKNOWN,                  // FB07..FB12
7594             ARMENIAN,                 // FB13..FB17
7595             UNKNOWN,                  // FB18..FB1C
7596             HEBREW,                   // FB1D..FB36
7597             UNKNOWN,                  // FB37
7598             HEBREW,                   // FB38..FB3C
7599             UNKNOWN,                  // FB3D
7600             HEBREW,                   // FB3E
7601             UNKNOWN,                  // FB3F
7602             HEBREW,                   // FB40..FB41
7603             UNKNOWN,                  // FB42
7604             HEBREW,                   // FB43..FB44
7605             UNKNOWN,                  // FB45
7606             HEBREW,                   // FB46..FB4F
7607             ARABIC,                   // FB50..FBC1
7608             UNKNOWN,                  // FBC2..FBD2
7609             ARABIC,                   // FBD3..FD3D
7610             COMMON,                   // FD3E..FD3F
7611             UNKNOWN,                  // FD40..FD4F
7612             ARABIC,                   // FD50..FD8F
7613             UNKNOWN,                  // FD90..FD91
7614             ARABIC,                   // FD92..FDC7
7615             UNKNOWN,                  // FDC8..FDEF
7616             ARABIC,                   // FDF0..FDFD
7617             UNKNOWN,                  // FDFE..FDFF
7618             INHERITED,                // FE00..FE0F
7619             COMMON,                   // FE10..FE19
7620             UNKNOWN,                  // FE1A..FE1F
7621             INHERITED,                // FE20..FE2D
7622             CYRILLIC,                 // FE2E..FE2F
7623             COMMON,                   // FE30..FE52
7624             UNKNOWN,                  // FE53
7625             COMMON,                   // FE54..FE66
7626             UNKNOWN,                  // FE67
7627             COMMON,                   // FE68..FE6B
7628             UNKNOWN,                  // FE6C..FE6F
7629             ARABIC,                   // FE70..FE74
7630             UNKNOWN,                  // FE75
7631             ARABIC,                   // FE76..FEFC
7632             UNKNOWN,                  // FEFD..FEFE
7633             COMMON,                   // FEFF
7634             UNKNOWN,                  // FF00
7635             COMMON,                   // FF01..FF20
7636             LATIN,                    // FF21..FF3A
7637             COMMON,                   // FF3B..FF40
7638             LATIN,                    // FF41..FF5A
7639             COMMON,                   // FF5B..FF65
7640             KATAKANA,                 // FF66..FF6F
7641             COMMON,                   // FF70
7642             KATAKANA,                 // FF71..FF9D
7643             COMMON,                   // FF9E..FF9F
7644             HANGUL,                   // FFA0..FFBE
7645             UNKNOWN,                  // FFBF..FFC1
7646             HANGUL,                   // FFC2..FFC7
7647             UNKNOWN,                  // FFC8..FFC9
7648             HANGUL,                   // FFCA..FFCF
7649             UNKNOWN,                  // FFD0..FFD1
7650             HANGUL,                   // FFD2..FFD7
7651             UNKNOWN,                  // FFD8..FFD9
7652             HANGUL,                   // FFDA..FFDC
7653             UNKNOWN,                  // FFDD..FFDF
7654             COMMON,                   // FFE0..FFE6
7655             UNKNOWN,                  // FFE7
7656             COMMON,                   // FFE8..FFEE
7657             UNKNOWN,                  // FFEF..FFF8
7658             COMMON,                   // FFF9..FFFD
7659             UNKNOWN,                  // FFFE..FFFF
7660             LINEAR_B,                 // 10000..1000B
7661             UNKNOWN,                  // 1000C
7662             LINEAR_B,                 // 1000D..10026
7663             UNKNOWN,                  // 10027
7664             LINEAR_B,                 // 10028..1003A
7665             UNKNOWN,                  // 1003B
7666             LINEAR_B,                 // 1003C..1003D
7667             UNKNOWN,                  // 1003E
7668             LINEAR_B,                 // 1003F..1004D
7669             UNKNOWN,                  // 1004E..1004F
7670             LINEAR_B,                 // 10050..1005D
7671             UNKNOWN,                  // 1005E..1007F
7672             LINEAR_B,                 // 10080..100FA
7673             UNKNOWN,                  // 100FB..100FF
7674             COMMON,                   // 10100..10102
7675             UNKNOWN,                  // 10103..10106
7676             COMMON,                   // 10107..10133
7677             UNKNOWN,                  // 10134..10136
7678             COMMON,                   // 10137..1013F
7679             GREEK,                    // 10140..1018E
7680             UNKNOWN,                  // 1018F
7681             COMMON,                   // 10190..1019C
7682             UNKNOWN,                  // 1019D..1019F
7683             GREEK,                    // 101A0
7684             UNKNOWN,                  // 101A1..101CF
7685             COMMON,                   // 101D0..101FC
7686             INHERITED,                // 101FD
7687             UNKNOWN,                  // 101FE..1027F
7688             LYCIAN,                   // 10280..1029C
7689             UNKNOWN,                  // 1029D..1029F
7690             CARIAN,                   // 102A0..102D0
7691             UNKNOWN,                  // 102D1..102DF
7692             INHERITED,                // 102E0
7693             COMMON,                   // 102E1..102FB
7694             UNKNOWN,                  // 102FC..102FF
7695             OLD_ITALIC,               // 10300..10323
7696             UNKNOWN,                  // 10324..1032C
7697             OLD_ITALIC,               // 1032D..1032F
7698             GOTHIC,                   // 10330..1034A
7699             UNKNOWN,                  // 1034B..1034F
7700             OLD_PERMIC,               // 10350..1037A
7701             UNKNOWN,                  // 1037B..1037F
7702             UGARITIC,                 // 10380..1039D
7703             UNKNOWN,                  // 1039E
7704             UGARITIC,                 // 1039F
7705             OLD_PERSIAN,              // 103A0..103C3
7706             UNKNOWN,                  // 103C4..103C7
7707             OLD_PERSIAN,              // 103C8..103D5
7708             UNKNOWN,                  // 103D6..103FF
7709             DESERET,                  // 10400..1044F
7710             SHAVIAN,                  // 10450..1047F
7711             OSMANYA,                  // 10480..1049D
7712             UNKNOWN,                  // 1049E..1049F
7713             OSMANYA,                  // 104A0..104A9
7714             UNKNOWN,                  // 104AA..104AF
7715             OSAGE,                    // 104B0..104D3
7716             UNKNOWN,                  // 104D4..104D7
7717             OSAGE,                    // 104D8..104FB
7718             UNKNOWN,                  // 104FC..104FF
7719             ELBASAN,                  // 10500..10527
7720             UNKNOWN,                  // 10528..1052F
7721             CAUCASIAN_ALBANIAN,       // 10530..10563
7722             UNKNOWN,                  // 10564..1056E
7723             CAUCASIAN_ALBANIAN,       // 1056F
7724             UNKNOWN,                  // 10570..105FF
7725             LINEAR_A,                 // 10600..10736
7726             UNKNOWN,                  // 10737..1073F
7727             LINEAR_A,                 // 10740..10755
7728             UNKNOWN,                  // 10756..1075F
7729             LINEAR_A,                 // 10760..10767
7730             UNKNOWN,                  // 10768..107FF
7731             CYPRIOT,                  // 10800..10805
7732             UNKNOWN,                  // 10806..10807
7733             CYPRIOT,                  // 10808
7734             UNKNOWN,                  // 10809
7735             CYPRIOT,                  // 1080A..10835
7736             UNKNOWN,                  // 10836
7737             CYPRIOT,                  // 10837..10838
7738             UNKNOWN,                  // 10839..1083B
7739             CYPRIOT,                  // 1083C
7740             UNKNOWN,                  // 1083D..1083E
7741             CYPRIOT,                  // 1083F
7742             IMPERIAL_ARAMAIC,         // 10840..10855
7743             UNKNOWN,                  // 10856
7744             IMPERIAL_ARAMAIC,         // 10857..1085F
7745             PALMYRENE,                // 10860..1087F
7746             NABATAEAN,                // 10880..1089E
7747             UNKNOWN,                  // 1089F..108A6
7748             NABATAEAN,                // 108A7..108AF
7749             UNKNOWN,                  // 108B0..108DF
7750             HATRAN,                   // 108E0..108F2
7751             UNKNOWN,                  // 108F3
7752             HATRAN,                   // 108F4..108F5
7753             UNKNOWN,                  // 108F6..108FA
7754             HATRAN,                   // 108FB..108FF
7755             PHOENICIAN,               // 10900..1091B
7756             UNKNOWN,                  // 1091C..1091E
7757             PHOENICIAN,               // 1091F
7758             LYDIAN,                   // 10920..10939
7759             UNKNOWN,                  // 1093A..1093E
7760             LYDIAN,                   // 1093F
7761             UNKNOWN,                  // 10940..1097F
7762             MEROITIC_HIEROGLYPHS,     // 10980..1099F
7763             MEROITIC_CURSIVE,         // 109A0..109B7
7764             UNKNOWN,                  // 109B8..109BB
7765             MEROITIC_CURSIVE,         // 109BC..109CF
7766             UNKNOWN,                  // 109D0..109D1
7767             MEROITIC_CURSIVE,         // 109D2..109FF
7768             KHAROSHTHI,               // 10A00..10A03
7769             UNKNOWN,                  // 10A04
7770             KHAROSHTHI,               // 10A05..10A06
7771             UNKNOWN,                  // 10A07..10A0B
7772             KHAROSHTHI,               // 10A0C..10A13
7773             UNKNOWN,                  // 10A14
7774             KHAROSHTHI,               // 10A15..10A17
7775             UNKNOWN,                  // 10A18
7776             KHAROSHTHI,               // 10A19..10A35
7777             UNKNOWN,                  // 10A36..10A37
7778             KHAROSHTHI,               // 10A38..10A3A
7779             UNKNOWN,                  // 10A3B..10A3E
7780             KHAROSHTHI,               // 10A3F..10A48
7781             UNKNOWN,                  // 10A49..10A4F
7782             KHAROSHTHI,               // 10A50..10A58
7783             UNKNOWN,                  // 10A59..10A5F
7784             OLD_SOUTH_ARABIAN,        // 10A60..10A7F
7785             OLD_NORTH_ARABIAN,        // 10A80..10A9F
7786             UNKNOWN,                  // 10AA0..10ABF
7787             MANICHAEAN,               // 10AC0..10AE6
7788             UNKNOWN,                  // 10AE7..10AEA
7789             MANICHAEAN,               // 10AEB..10AF6
7790             UNKNOWN,                  // 10AF7..10AFF
7791             AVESTAN,                  // 10B00..10B35
7792             UNKNOWN,                  // 10B36..10B38
7793             AVESTAN,                  // 10B39..10B3F
7794             INSCRIPTIONAL_PARTHIAN,   // 10B40..10B55
7795             UNKNOWN,                  // 10B56..10B57
7796             INSCRIPTIONAL_PARTHIAN,   // 10B58..10B5F
7797             INSCRIPTIONAL_PAHLAVI,    // 10B60..10B72
7798             UNKNOWN,                  // 10B73..10B77
7799             INSCRIPTIONAL_PAHLAVI,    // 10B78..10B7F
7800             PSALTER_PAHLAVI,          // 10B80..10B91
7801             UNKNOWN,                  // 10B92..10B98
7802             PSALTER_PAHLAVI,          // 10B99..10B9C
7803             UNKNOWN,                  // 10B9D..10BA8
7804             PSALTER_PAHLAVI,          // 10BA9..10BAF
7805             UNKNOWN,                  // 10BB0..10BFF
7806             OLD_TURKIC,               // 10C00..10C48
7807             UNKNOWN,                  // 10C49..10C7F
7808             OLD_HUNGARIAN,            // 10C80..10CB2
7809             UNKNOWN,                  // 10CB3..10CBF
7810             OLD_HUNGARIAN,            // 10CC0..10CF2
7811             UNKNOWN,                  // 10CF3..10CF9
7812             OLD_HUNGARIAN,            // 10CFA..10CFF
7813             HANIFI_ROHINGYA,          // 10D00..10D27
7814             UNKNOWN,                  // 10D28..10D2F
7815             HANIFI_ROHINGYA,          // 10D30..10D39
7816             UNKNOWN,                  // 10D3A..10E5F
7817             ARABIC,                   // 10E60..10E7E
7818             UNKNOWN,                  // 10E7F
7819             YEZIDI,                   // 10E80..10EA9
7820             UNKNOWN,                  // 10EAA
7821             YEZIDI,                   // 10EAB..10EAD
7822             UNKNOWN,                  // 10EAE..10EAF
7823             YEZIDI,                   // 10EB0..10EB1
7824             UNKNOWN,                  // 10EB2..10EFF
7825             OLD_SOGDIAN,              // 10F00..10F27
7826             UNKNOWN,                  // 10F28..10F2F
7827             SOGDIAN,                  // 10F30..10F59
7828             UNKNOWN,                  // 10F5A..10FAF
7829             CHORASMIAN,               // 10FB0..10FCB
7830             UNKNOWN,                  // 10FCC..10FDF
7831             ELYMAIC,                  // 10FE0..10FF6
7832             UNKNOWN,                  // 10FF7..10FFF
7833             BRAHMI,                   // 11000..1104D
7834             UNKNOWN,                  // 1104E..11051
7835             BRAHMI,                   // 11052..1106F
7836             UNKNOWN,                  // 11070..1107E
7837             BRAHMI,                   // 1107F
7838             KAITHI,                   // 11080..110C1
7839             UNKNOWN,                  // 110C2..110CC
7840             KAITHI,                   // 110CD
7841             UNKNOWN,                  // 110CE..110CF
7842             SORA_SOMPENG,             // 110D0..110E8
7843             UNKNOWN,                  // 110E9..110EF
7844             SORA_SOMPENG,             // 110F0..110F9
7845             UNKNOWN,                  // 110FA..110FF
7846             CHAKMA,                   // 11100..11134
7847             UNKNOWN,                  // 11135
7848             CHAKMA,                   // 11136..11147
7849             UNKNOWN,                  // 11148..1114F
7850             MAHAJANI,                 // 11150..11176
7851             UNKNOWN,                  // 11177..1117F
7852             SHARADA,                  // 11180..111DF
7853             UNKNOWN,                  // 111E0
7854             SINHALA,                  // 111E1..111F4
7855             UNKNOWN,                  // 111F5..111FF
7856             KHOJKI,                   // 11200..11211
7857             UNKNOWN,                  // 11212
7858             KHOJKI,                   // 11213..1123E
7859             UNKNOWN,                  // 1123F..1127F
7860             MULTANI,                  // 11280..11286
7861             UNKNOWN,                  // 11287
7862             MULTANI,                  // 11288
7863             UNKNOWN,                  // 11289
7864             MULTANI,                  // 1128A..1128D
7865             UNKNOWN,                  // 1128E
7866             MULTANI,                  // 1128F..1129D
7867             UNKNOWN,                  // 1129E
7868             MULTANI,                  // 1129F..112A9
7869             UNKNOWN,                  // 112AA..112AF
7870             KHUDAWADI,                // 112B0..112EA
7871             UNKNOWN,                  // 112EB..112EF
7872             KHUDAWADI,                // 112F0..112F9
7873             UNKNOWN,                  // 112FA..112FF
7874             GRANTHA,                  // 11300..11303
7875             UNKNOWN,                  // 11304
7876             GRANTHA,                  // 11305..1130C
7877             UNKNOWN,                  // 1130D..1130E
7878             GRANTHA,                  // 1130F..11310
7879             UNKNOWN,                  // 11311..11312
7880             GRANTHA,                  // 11313..11328
7881             UNKNOWN,                  // 11329
7882             GRANTHA,                  // 1132A..11330
7883             UNKNOWN,                  // 11331
7884             GRANTHA,                  // 11332..11333
7885             UNKNOWN,                  // 11334
7886             GRANTHA,                  // 11335..11339
7887             UNKNOWN,                  // 1133A
7888             INHERITED,                // 1133B
7889             GRANTHA,                  // 1133C..11344
7890             UNKNOWN,                  // 11345..11346
7891             GRANTHA,                  // 11347..11348
7892             UNKNOWN,                  // 11349..1134A
7893             GRANTHA,                  // 1134B..1134D
7894             UNKNOWN,                  // 1134E..1134F
7895             GRANTHA,                  // 11350
7896             UNKNOWN,                  // 11351..11356
7897             GRANTHA,                  // 11357
7898             UNKNOWN,                  // 11358..1135C
7899             GRANTHA,                  // 1135D..11363
7900             UNKNOWN,                  // 11364..11365
7901             GRANTHA,                  // 11366..1136C
7902             UNKNOWN,                  // 1136D..1136F
7903             GRANTHA,                  // 11370..11374
7904             UNKNOWN,                  // 11375..113FF
7905             NEWA,                     // 11400..1145B
7906             UNKNOWN,                  // 1145C
7907             NEWA,                     // 1145D..11461
7908             UNKNOWN,                  // 11462..1147F
7909             TIRHUTA,                  // 11480..114C7
7910             UNKNOWN,                  // 114C8..114CF
7911             TIRHUTA,                  // 114D0..114D9
7912             UNKNOWN,                  // 114DA..1157F
7913             SIDDHAM,                  // 11580..115B5
7914             UNKNOWN,                  // 115B6..115B7
7915             SIDDHAM,                  // 115B8..115DD
7916             UNKNOWN,                  // 115DE..115FF
7917             MODI,                     // 11600..11644
7918             UNKNOWN,                  // 11645..1164F
7919             MODI,                     // 11650..11659
7920             UNKNOWN,                  // 1165A..1165F
7921             MONGOLIAN,                // 11660..1166C
7922             UNKNOWN,                  // 1166D..1167F
7923             TAKRI,                    // 11680..116B8
7924             UNKNOWN,                  // 116B9..116BF
7925             TAKRI,                    // 116C0..116C9
7926             UNKNOWN,                  // 116CA..116FF
7927             AHOM,                     // 11700..1171A
7928             UNKNOWN,                  // 1171B..1171C
7929             AHOM,                     // 1171D..1172B
7930             UNKNOWN,                  // 1172C..1172F
7931             AHOM,                     // 11730..1173F
7932             UNKNOWN,                  // 11740..117FF
7933             DOGRA,                    // 11800..1183B
7934             UNKNOWN,                  // 1183C..1189F
7935             WARANG_CITI,              // 118A0..118F2
7936             UNKNOWN,                  // 118F3..118FE
7937             WARANG_CITI,              // 118FF
7938             DIVES_AKURU,              // 11900..11906
7939             UNKNOWN,                  // 11907..11908
7940             DIVES_AKURU,              // 11909
7941             UNKNOWN,                  // 1190A..1190B
7942             DIVES_AKURU,              // 1190C..11913
7943             UNKNOWN,                  // 11914
7944             DIVES_AKURU,              // 11915..11916
7945             UNKNOWN,                  // 11917
7946             DIVES_AKURU,              // 11918..11935
7947             UNKNOWN,                  // 11936
7948             DIVES_AKURU,              // 11937..11938
7949             UNKNOWN,                  // 11939..1193A
7950             DIVES_AKURU,              // 1193B..11946
7951             UNKNOWN,                  // 11947..1194F
7952             DIVES_AKURU,              // 11950..11959
7953             UNKNOWN,                  // 1195A..1199F
7954             NANDINAGARI,              // 119A0..119A7
7955             UNKNOWN,                  // 119A8..119A9
7956             NANDINAGARI,              // 119AA..119D7
7957             UNKNOWN,                  // 119D8..119D9
7958             NANDINAGARI,              // 119DA..119E4
7959             UNKNOWN,                  // 119E5..119FF
7960             ZANABAZAR_SQUARE,         // 11A00..11A47
7961             UNKNOWN,                  // 11A48..11A4F
7962             SOYOMBO,                  // 11A50..11AA2
7963             UNKNOWN,                  // 11AA3..11ABF
7964             PAU_CIN_HAU,              // 11AC0..11AF8
7965             UNKNOWN,                  // 11AF9..11BFF
7966             BHAIKSUKI,                // 11C00..11C08
7967             UNKNOWN,                  // 11C09
7968             BHAIKSUKI,                // 11C0A..11C36
7969             UNKNOWN,                  // 11C37
7970             BHAIKSUKI,                // 11C38..11C45
7971             UNKNOWN,                  // 11C46..11C4F
7972             BHAIKSUKI,                // 11C50..11C6C
7973             UNKNOWN,                  // 11C6D..11C6F
7974             MARCHEN,                  // 11C70..11C8F
7975             UNKNOWN,                  // 11C90..11C91
7976             MARCHEN,                  // 11C92..11CA7
7977             UNKNOWN,                  // 11CA8
7978             MARCHEN,                  // 11CA9..11CB6
7979             UNKNOWN,                  // 11CB7..11CFF
7980             MASARAM_GONDI,            // 11D00..11D06
7981             UNKNOWN,                  // 11D07
7982             MASARAM_GONDI,            // 11D08..11D09
7983             UNKNOWN,                  // 11D0A
7984             MASARAM_GONDI,            // 11D0B..11D36
7985             UNKNOWN,                  // 11D37..11D39
7986             MASARAM_GONDI,            // 11D3A
7987             UNKNOWN,                  // 11D3B
7988             MASARAM_GONDI,            // 11D3C..11D3D
7989             UNKNOWN,                  // 11D3E
7990             MASARAM_GONDI,            // 11D3F..11D47
7991             UNKNOWN,                  // 11D48..11D4F
7992             MASARAM_GONDI,            // 11D50..11D59
7993             UNKNOWN,                  // 11D5A..11D5F
7994             GUNJALA_GONDI,            // 11D60..11D65
7995             UNKNOWN,                  // 11D66
7996             GUNJALA_GONDI,            // 11D67..11D68
7997             UNKNOWN,                  // 11D69
7998             GUNJALA_GONDI,            // 11D6A..11D8E
7999             UNKNOWN,                  // 11D8F
8000             GUNJALA_GONDI,            // 11D90..11D91
8001             UNKNOWN,                  // 11D92
8002             GUNJALA_GONDI,            // 11D93..11D98
8003             UNKNOWN,                  // 11D99..11D9F
8004             GUNJALA_GONDI,            // 11DA0..11DA9
8005             UNKNOWN,                  // 11DAA..11EDF
8006             MAKASAR,                  // 11EE0..11EF8
8007             UNKNOWN,                  // 11EF9..11FAF
8008             LISU,                     // 11FB0
8009             UNKNOWN,                  // 11FB1..11FBF
8010             TAMIL,                    // 11FC0..11FF1
8011             UNKNOWN,                  // 11FF2..11FFE
8012             TAMIL,                    // 11FFF
8013             CUNEIFORM,                // 12000..12399
8014             UNKNOWN,                  // 1239A..123FF
8015             CUNEIFORM,                // 12400..1246E
8016             UNKNOWN,                  // 1246F
8017             CUNEIFORM,                // 12470..12474
8018             UNKNOWN,                  // 12475..1247F
8019             CUNEIFORM,                // 12480..12543
8020             UNKNOWN,                  // 12544..12FFF
8021             EGYPTIAN_HIEROGLYPHS,     // 13000..1342E
8022             UNKNOWN,                  // 1342F
8023             EGYPTIAN_HIEROGLYPHS,     // 13430..13438
8024             UNKNOWN,                  // 13439..143FF
8025             ANATOLIAN_HIEROGLYPHS,    // 14400..14646
8026             UNKNOWN,                  // 14647..167FF
8027             BAMUM,                    // 16800..16A38
8028             UNKNOWN,                  // 16A39..16A3F
8029             MRO,                      // 16A40..16A5E
8030             UNKNOWN,                  // 16A5F
8031             MRO,                      // 16A60..16A69
8032             UNKNOWN,                  // 16A6A..16A6D
8033             MRO,                      // 16A6E..16A6F
8034             UNKNOWN,                  // 16A70..16ACF
8035             BASSA_VAH,                // 16AD0..16AED
8036             UNKNOWN,                  // 16AEE..16AEF
8037             BASSA_VAH,                // 16AF0..16AF5
8038             UNKNOWN,                  // 16AF6..16AFF
8039             PAHAWH_HMONG,             // 16B00..16B45
8040             UNKNOWN,                  // 16B46..16B4F
8041             PAHAWH_HMONG,             // 16B50..16B59
8042             UNKNOWN,                  // 16B5A
8043             PAHAWH_HMONG,             // 16B5B..16B61
8044             UNKNOWN,                  // 16B62
8045             PAHAWH_HMONG,             // 16B63..16B77
8046             UNKNOWN,                  // 16B78..16B7C
8047             PAHAWH_HMONG,             // 16B7D..16B8F
8048             UNKNOWN,                  // 16B90..16E3F
8049             MEDEFAIDRIN,              // 16E40..16E9A
8050             UNKNOWN,                  // 16E9B..16EFF
8051             MIAO,                     // 16F00..16F4A
8052             UNKNOWN,                  // 16F4B..16F4E
8053             MIAO,                     // 16F4F..16F87
8054             UNKNOWN,                  // 16F88..16F8E
8055             MIAO,                     // 16F8F..16F9F
8056             UNKNOWN,                  // 16FA0..16FDF
8057             TANGUT,                   // 16FE0
8058             NUSHU,                    // 16FE1
8059             COMMON,                   // 16FE2..16FE3
8060             KHITAN_SMALL_SCRIPT,      // 16FE4
8061             UNKNOWN,                  // 16FE5..16FEF
8062             HAN,                      // 16FF0..16FF1
8063             UNKNOWN,                  // 16FF2..16FFF
8064             TANGUT,                   // 17000..187F7
8065             UNKNOWN,                  // 187F8..187FF
8066             TANGUT,                   // 18800..18AFF
8067             KHITAN_SMALL_SCRIPT,      // 18B00..18CD5
8068             UNKNOWN,                  // 18CD6..18CFF
8069             TANGUT,                   // 18D00..18D08
8070             UNKNOWN,                  // 18D09..1AFFF
8071             KATAKANA,                 // 1B000
8072             HIRAGANA,                 // 1B001..1B11E
8073             UNKNOWN,                  // 1B11F..1B14F
8074             HIRAGANA,                 // 1B150..1B152
8075             UNKNOWN,                  // 1B153..1B163
8076             KATAKANA,                 // 1B164..1B167
8077             UNKNOWN,                  // 1B168..1B16F
8078             NUSHU,                    // 1B170..1B2FB
8079             UNKNOWN,                  // 1B2FC..1BBFF
8080             DUPLOYAN,                 // 1BC00..1BC6A
8081             UNKNOWN,                  // 1BC6B..1BC6F
8082             DUPLOYAN,                 // 1BC70..1BC7C
8083             UNKNOWN,                  // 1BC7D..1BC7F
8084             DUPLOYAN,                 // 1BC80..1BC88
8085             UNKNOWN,                  // 1BC89..1BC8F
8086             DUPLOYAN,                 // 1BC90..1BC99
8087             UNKNOWN,                  // 1BC9A..1BC9B
8088             DUPLOYAN,                 // 1BC9C..1BC9F
8089             COMMON,                   // 1BCA0..1BCA3
8090             UNKNOWN,                  // 1BCA4..1CFFF
8091             COMMON,                   // 1D000..1D0F5
8092             UNKNOWN,                  // 1D0F6..1D0FF
8093             COMMON,                   // 1D100..1D126
8094             UNKNOWN,                  // 1D127..1D128
8095             COMMON,                   // 1D129..1D166
8096             INHERITED,                // 1D167..1D169
8097             COMMON,                   // 1D16A..1D17A
8098             INHERITED,                // 1D17B..1D182
8099             COMMON,                   // 1D183..1D184
8100             INHERITED,                // 1D185..1D18B
8101             COMMON,                   // 1D18C..1D1A9
8102             INHERITED,                // 1D1AA..1D1AD
8103             COMMON,                   // 1D1AE..1D1E8
8104             UNKNOWN,                  // 1D1E9..1D1FF
8105             GREEK,                    // 1D200..1D245
8106             UNKNOWN,                  // 1D246..1D2DF
8107             COMMON,                   // 1D2E0..1D2F3
8108             UNKNOWN,                  // 1D2F4..1D2FF
8109             COMMON,                   // 1D300..1D356
8110             UNKNOWN,                  // 1D357..1D35F
8111             COMMON,                   // 1D360..1D378
8112             UNKNOWN,                  // 1D379..1D3FF
8113             COMMON,                   // 1D400..1D454
8114             UNKNOWN,                  // 1D455
8115             COMMON,                   // 1D456..1D49C
8116             UNKNOWN,                  // 1D49D
8117             COMMON,                   // 1D49E..1D49F
8118             UNKNOWN,                  // 1D4A0..1D4A1
8119             COMMON,                   // 1D4A2
8120             UNKNOWN,                  // 1D4A3..1D4A4
8121             COMMON,                   // 1D4A5..1D4A6
8122             UNKNOWN,                  // 1D4A7..1D4A8
8123             COMMON,                   // 1D4A9..1D4AC
8124             UNKNOWN,                  // 1D4AD
8125             COMMON,                   // 1D4AE..1D4B9
8126             UNKNOWN,                  // 1D4BA
8127             COMMON,                   // 1D4BB
8128             UNKNOWN,                  // 1D4BC
8129             COMMON,                   // 1D4BD..1D4C3
8130             UNKNOWN,                  // 1D4C4
8131             COMMON,                   // 1D4C5..1D505
8132             UNKNOWN,                  // 1D506
8133             COMMON,                   // 1D507..1D50A
8134             UNKNOWN,                  // 1D50B..1D50C
8135             COMMON,                   // 1D50D..1D514
8136             UNKNOWN,                  // 1D515
8137             COMMON,                   // 1D516..1D51C
8138             UNKNOWN,                  // 1D51D
8139             COMMON,                   // 1D51E..1D539
8140             UNKNOWN,                  // 1D53A
8141             COMMON,                   // 1D53B..1D53E
8142             UNKNOWN,                  // 1D53F
8143             COMMON,                   // 1D540..1D544
8144             UNKNOWN,                  // 1D545
8145             COMMON,                   // 1D546
8146             UNKNOWN,                  // 1D547..1D549
8147             COMMON,                   // 1D54A..1D550
8148             UNKNOWN,                  // 1D551
8149             COMMON,                   // 1D552..1D6A5
8150             UNKNOWN,                  // 1D6A6..1D6A7
8151             COMMON,                   // 1D6A8..1D7CB
8152             UNKNOWN,                  // 1D7CC..1D7CD
8153             COMMON,                   // 1D7CE..1D7FF
8154             SIGNWRITING,              // 1D800..1DA8B
8155             UNKNOWN,                  // 1DA8C..1DA9A
8156             SIGNWRITING,              // 1DA9B..1DA9F
8157             UNKNOWN,                  // 1DAA0
8158             SIGNWRITING,              // 1DAA1..1DAAF
8159             UNKNOWN,                  // 1DAB0..1DFFF
8160             GLAGOLITIC,               // 1E000..1E006
8161             UNKNOWN,                  // 1E007
8162             GLAGOLITIC,               // 1E008..1E018
8163             UNKNOWN,                  // 1E019..1E01A
8164             GLAGOLITIC,               // 1E01B..1E021
8165             UNKNOWN,                  // 1E022
8166             GLAGOLITIC,               // 1E023..1E024
8167             UNKNOWN,                  // 1E025
8168             GLAGOLITIC,               // 1E026..1E02A
8169             UNKNOWN,                  // 1E02B..1E0FF
8170             NYIAKENG_PUACHUE_HMONG,   // 1E100..1E12C
8171             UNKNOWN,                  // 1E12D..1E12F
8172             NYIAKENG_PUACHUE_HMONG,   // 1E130..1E13D
8173             UNKNOWN,                  // 1E13E..1E13F
8174             NYIAKENG_PUACHUE_HMONG,   // 1E140..1E149
8175             UNKNOWN,                  // 1E14A..1E14D
8176             NYIAKENG_PUACHUE_HMONG,   // 1E14E..1E14F
8177             UNKNOWN,                  // 1E150..1E2BF
8178             WANCHO,                   // 1E2C0..1E2F9
8179             UNKNOWN,                  // 1E2FA..1E2FE
8180             WANCHO,                   // 1E2FF
8181             UNKNOWN,                  // 1E300..1E7FF
8182             MENDE_KIKAKUI,            // 1E800..1E8C4
8183             UNKNOWN,                  // 1E8C5..1E8C6
8184             MENDE_KIKAKUI,            // 1E8C7..1E8D6
8185             UNKNOWN,                  // 1E8D7..1E8FF
8186             ADLAM,                    // 1E900..1E94B
8187             UNKNOWN,                  // 1E94C..1E94F
8188             ADLAM,                    // 1E950..1E959
8189             UNKNOWN,                  // 1E95A..1E95D
8190             ADLAM,                    // 1E95E..1E95F
8191             UNKNOWN,                  // 1E960..1EC70
8192             COMMON,                   // 1EC71..1ECB4
8193             UNKNOWN,                  // 1ECB5..1ED00
8194             COMMON,                   // 1ED01..1ED3D
8195             UNKNOWN,                  // 1ED3E..1EDFF
8196             ARABIC,                   // 1EE00..1EE03
8197             UNKNOWN,                  // 1EE04
8198             ARABIC,                   // 1EE05..1EE1F
8199             UNKNOWN,                  // 1EE20
8200             ARABIC,                   // 1EE21..1EE22
8201             UNKNOWN,                  // 1EE23
8202             ARABIC,                   // 1EE24
8203             UNKNOWN,                  // 1EE25..1EE26
8204             ARABIC,                   // 1EE27
8205             UNKNOWN,                  // 1EE28
8206             ARABIC,                   // 1EE29..1EE32
8207             UNKNOWN,                  // 1EE33
8208             ARABIC,                   // 1EE34..1EE37
8209             UNKNOWN,                  // 1EE38
8210             ARABIC,                   // 1EE39
8211             UNKNOWN,                  // 1EE3A
8212             ARABIC,                   // 1EE3B
8213             UNKNOWN,                  // 1EE3C..1EE41
8214             ARABIC,                   // 1EE42
8215             UNKNOWN,                  // 1EE43..1EE46
8216             ARABIC,                   // 1EE47
8217             UNKNOWN,                  // 1EE48
8218             ARABIC,                   // 1EE49
8219             UNKNOWN,                  // 1EE4A
8220             ARABIC,                   // 1EE4B
8221             UNKNOWN,                  // 1EE4C
8222             ARABIC,                   // 1EE4D..1EE4F
8223             UNKNOWN,                  // 1EE50
8224             ARABIC,                   // 1EE51..1EE52
8225             UNKNOWN,                  // 1EE53
8226             ARABIC,                   // 1EE54
8227             UNKNOWN,                  // 1EE55..1EE56
8228             ARABIC,                   // 1EE57
8229             UNKNOWN,                  // 1EE58
8230             ARABIC,                   // 1EE59
8231             UNKNOWN,                  // 1EE5A
8232             ARABIC,                   // 1EE5B
8233             UNKNOWN,                  // 1EE5C
8234             ARABIC,                   // 1EE5D
8235             UNKNOWN,                  // 1EE5E
8236             ARABIC,                   // 1EE5F
8237             UNKNOWN,                  // 1EE60
8238             ARABIC,                   // 1EE61..1EE62
8239             UNKNOWN,                  // 1EE63
8240             ARABIC,                   // 1EE64
8241             UNKNOWN,                  // 1EE65..1EE66
8242             ARABIC,                   // 1EE67..1EE6A
8243             UNKNOWN,                  // 1EE6B
8244             ARABIC,                   // 1EE6C..1EE72
8245             UNKNOWN,                  // 1EE73
8246             ARABIC,                   // 1EE74..1EE77
8247             UNKNOWN,                  // 1EE78
8248             ARABIC,                   // 1EE79..1EE7C
8249             UNKNOWN,                  // 1EE7D
8250             ARABIC,                   // 1EE7E
8251             UNKNOWN,                  // 1EE7F
8252             ARABIC,                   // 1EE80..1EE89
8253             UNKNOWN,                  // 1EE8A
8254             ARABIC,                   // 1EE8B..1EE9B
8255             UNKNOWN,                  // 1EE9C..1EEA0
8256             ARABIC,                   // 1EEA1..1EEA3
8257             UNKNOWN,                  // 1EEA4
8258             ARABIC,                   // 1EEA5..1EEA9
8259             UNKNOWN,                  // 1EEAA
8260             ARABIC,                   // 1EEAB..1EEBB
8261             UNKNOWN,                  // 1EEBC..1EEEF
8262             ARABIC,                   // 1EEF0..1EEF1
8263             UNKNOWN,                  // 1EEF2..1EFFF
8264             COMMON,                   // 1F000..1F02B
8265             UNKNOWN,                  // 1F02C..1F02F
8266             COMMON,                   // 1F030..1F093
8267             UNKNOWN,                  // 1F094..1F09F
8268             COMMON,                   // 1F0A0..1F0AE
8269             UNKNOWN,                  // 1F0AF..1F0B0
8270             COMMON,                   // 1F0B1..1F0BF
8271             UNKNOWN,                  // 1F0C0
8272             COMMON,                   // 1F0C1..1F0CF
8273             UNKNOWN,                  // 1F0D0
8274             COMMON,                   // 1F0D1..1F0F5
8275             UNKNOWN,                  // 1F0F6..1F0FF
8276             COMMON,                   // 1F100..1F1AD
8277             UNKNOWN,                  // 1F1AE..1F1E5
8278             COMMON,                   // 1F1E6..1F1FF
8279             HIRAGANA,                 // 1F200
8280             COMMON,                   // 1F201..1F202
8281             UNKNOWN,                  // 1F203..1F20F
8282             COMMON,                   // 1F210..1F23B
8283             UNKNOWN,                  // 1F23C..1F23F
8284             COMMON,                   // 1F240..1F248
8285             UNKNOWN,                  // 1F249..1F24F
8286             COMMON,                   // 1F250..1F251
8287             UNKNOWN,                  // 1F252..1F25F
8288             COMMON,                   // 1F260..1F265
8289             UNKNOWN,                  // 1F266..1F2FF
8290             COMMON,                   // 1F300..1F6D7
8291             UNKNOWN,                  // 1F6D8..1F6DF
8292             COMMON,                   // 1F6E0..1F6EC
8293             UNKNOWN,                  // 1F6ED..1F6EF
8294             COMMON,                   // 1F6F0..1F6FC
8295             UNKNOWN,                  // 1F6FD..1F6FF
8296             COMMON,                   // 1F700..1F773
8297             UNKNOWN,                  // 1F774..1F77F
8298             COMMON,                   // 1F780..1F7D8
8299             UNKNOWN,                  // 1F7D9..1F7DF
8300             COMMON,                   // 1F7E0..1F7EB
8301             UNKNOWN,                  // 1F7EC..1F7FF
8302             COMMON,                   // 1F800..1F80B
8303             UNKNOWN,                  // 1F80C..1F80F
8304             COMMON,                   // 1F810..1F847
8305             UNKNOWN,                  // 1F848..1F84F
8306             COMMON,                   // 1F850..1F859
8307             UNKNOWN,                  // 1F85A..1F85F
8308             COMMON,                   // 1F860..1F887
8309             UNKNOWN,                  // 1F888..1F88F
8310             COMMON,                   // 1F890..1F8AD
8311             UNKNOWN,                  // 1F8AE..1F8AF
8312             COMMON,                   // 1F8B0..1F8B1
8313             UNKNOWN,                  // 1F8B2..1F8FF
8314             COMMON,                   // 1F900..1F978
8315             UNKNOWN,                  // 1F979
8316             COMMON,                   // 1F97A..1F9CB
8317             UNKNOWN,                  // 1F9CC
8318             COMMON,                   // 1F9CD..1FA53
8319             UNKNOWN,                  // 1FA54..1FA5F
8320             COMMON,                   // 1FA60..1FA6D
8321             UNKNOWN,                  // 1FA6E..1FA6F
8322             COMMON,                   // 1FA70..1FA74
8323             UNKNOWN,                  // 1FA75..1FA77
8324             COMMON,                   // 1FA78..1FA7A
8325             UNKNOWN,                  // 1FA7B..1FA7F
8326             COMMON,                   // 1FA80..1FA86
8327             UNKNOWN,                  // 1FA87..1FA8F
8328             COMMON,                   // 1FA90..1FAA8
8329             UNKNOWN,                  // 1FAA9..1FAAF
8330             COMMON,                   // 1FAB0..1FAB6
8331             UNKNOWN,                  // 1FAB7..1FABF
8332             COMMON,                   // 1FAC0..1FAC2
8333             UNKNOWN,                  // 1FAC3..1FACF
8334             COMMON,                   // 1FAD0..1FAD6
8335             UNKNOWN,                  // 1FAD7..1FAFF
8336             COMMON,                   // 1FB00..1FB92
8337             UNKNOWN,                  // 1FB93
8338             COMMON,                   // 1FB94..1FBCA
8339             UNKNOWN,                  // 1FBCB..1FBEF
8340             COMMON,                   // 1FBF0..1FBF9
8341             UNKNOWN,                  // 1FBFA..1FFFF
8342             HAN,                      // 20000..2A6DD
8343             UNKNOWN,                  // 2A6DE..2A6FF
8344             HAN,                      // 2A700..2B734
8345             UNKNOWN,                  // 2B735..2B73F
8346             HAN,                      // 2B740..2B81D
8347             UNKNOWN,                  // 2B81E..2B81F
8348             HAN,                      // 2B820..2CEA1
8349             UNKNOWN,                  // 2CEA2..2CEAF
8350             HAN,                      // 2CEB0..2EBE0
8351             UNKNOWN,                  // 2EBE1..2F7FF
8352             HAN,                      // 2F800..2FA1D
8353             UNKNOWN,                  // 2FA1E..2FFFF
8354             HAN,                      // 30000..3134A
8355             UNKNOWN,                  // 3134B..E0000
8356             COMMON,                   // E0001
8357             UNKNOWN,                  // E0002..E001F
8358             COMMON,                   // E0020..E007F
8359             UNKNOWN,                  // E0080..E00FF
8360             INHERITED,                // E0100..E01EF
8361             UNKNOWN,                  // E01F0..10FFFF
8362         };
8363 
8364         private static final HashMap<String, Character.UnicodeScript> aliases;
8365         static {
8366             aliases = new HashMap<>((int)(157 / 0.75f + 1.0f));
8367             aliases.put("ADLM", ADLAM);
8368             aliases.put("AGHB", CAUCASIAN_ALBANIAN);
8369             aliases.put("AHOM", AHOM);
8370             aliases.put("ARAB", ARABIC);
8371             aliases.put("ARMI", IMPERIAL_ARAMAIC);
8372             aliases.put("ARMN", ARMENIAN);
8373             aliases.put("AVST", AVESTAN);
8374             aliases.put("BALI", BALINESE);
8375             aliases.put("BAMU", BAMUM);
8376             aliases.put("BASS", BASSA_VAH);
8377             aliases.put("BATK", BATAK);
8378             aliases.put("BENG", BENGALI);
8379             aliases.put("BHKS", BHAIKSUKI);
8380             aliases.put("BOPO", BOPOMOFO);
8381             aliases.put("BRAH", BRAHMI);
8382             aliases.put("BRAI", BRAILLE);
8383             aliases.put("BUGI", BUGINESE);
8384             aliases.put("BUHD", BUHID);
8385             aliases.put("CAKM", CHAKMA);
8386             aliases.put("CANS", CANADIAN_ABORIGINAL);
8387             aliases.put("CARI", CARIAN);
8388             aliases.put("CHAM", CHAM);
8389             aliases.put("CHER", CHEROKEE);
8390             aliases.put("CHRS", CHORASMIAN);
8391             aliases.put("COPT", COPTIC);
8392             aliases.put("CPRT", CYPRIOT);
8393             aliases.put("CYRL", CYRILLIC);
8394             aliases.put("DEVA", DEVANAGARI);
8395             aliases.put("DIAK", DIVES_AKURU);
8396             aliases.put("DOGR", DOGRA);
8397             aliases.put("DSRT", DESERET);
8398             aliases.put("DUPL", DUPLOYAN);
8399             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
8400             aliases.put("ELBA", ELBASAN);
8401             aliases.put("ELYM", ELYMAIC);
8402             aliases.put("ETHI", ETHIOPIC);
8403             aliases.put("GEOR", GEORGIAN);
8404             aliases.put("GLAG", GLAGOLITIC);
8405             aliases.put("GONM", MASARAM_GONDI);
8406             aliases.put("GOTH", GOTHIC);
8407             aliases.put("GONG", GUNJALA_GONDI);
8408             aliases.put("GRAN", GRANTHA);
8409             aliases.put("GREK", GREEK);
8410             aliases.put("GUJR", GUJARATI);
8411             aliases.put("GURU", GURMUKHI);
8412             aliases.put("HANG", HANGUL);
8413             aliases.put("HANI", HAN);
8414             aliases.put("HANO", HANUNOO);
8415             aliases.put("HATR", HATRAN);
8416             aliases.put("HEBR", HEBREW);
8417             aliases.put("HIRA", HIRAGANA);
8418             aliases.put("HLUW", ANATOLIAN_HIEROGLYPHS);
8419             aliases.put("HMNG", PAHAWH_HMONG);
8420             aliases.put("HMNP", NYIAKENG_PUACHUE_HMONG);
8421             // it appears we don't have the KATAKANA_OR_HIRAGANA
8422             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
8423             aliases.put("HUNG", OLD_HUNGARIAN);
8424             aliases.put("ITAL", OLD_ITALIC);
8425             aliases.put("JAVA", JAVANESE);
8426             aliases.put("KALI", KAYAH_LI);
8427             aliases.put("KANA", KATAKANA);
8428             aliases.put("KHAR", KHAROSHTHI);
8429             aliases.put("KHMR", KHMER);
8430             aliases.put("KHOJ", KHOJKI);
8431             aliases.put("KITS", KHITAN_SMALL_SCRIPT);
8432             aliases.put("KNDA", KANNADA);
8433             aliases.put("KTHI", KAITHI);
8434             aliases.put("LANA", TAI_THAM);
8435             aliases.put("LAOO", LAO);
8436             aliases.put("LATN", LATIN);
8437             aliases.put("LEPC", LEPCHA);
8438             aliases.put("LIMB", LIMBU);
8439             aliases.put("LINA", LINEAR_A);
8440             aliases.put("LINB", LINEAR_B);
8441             aliases.put("LISU", LISU);
8442             aliases.put("LYCI", LYCIAN);
8443             aliases.put("LYDI", LYDIAN);
8444             aliases.put("MAHJ", MAHAJANI);
8445             aliases.put("MAKA", MAKASAR);
8446             aliases.put("MARC", MARCHEN);
8447             aliases.put("MAND", MANDAIC);
8448             aliases.put("MANI", MANICHAEAN);
8449             aliases.put("MEDF", MEDEFAIDRIN);
8450             aliases.put("MEND", MENDE_KIKAKUI);
8451             aliases.put("MERC", MEROITIC_CURSIVE);
8452             aliases.put("MERO", MEROITIC_HIEROGLYPHS);
8453             aliases.put("MLYM", MALAYALAM);
8454             aliases.put("MODI", MODI);
8455             aliases.put("MONG", MONGOLIAN);
8456             aliases.put("MROO", MRO);
8457             aliases.put("MTEI", MEETEI_MAYEK);
8458             aliases.put("MULT", MULTANI);
8459             aliases.put("MYMR", MYANMAR);
8460             aliases.put("NAND", NANDINAGARI);
8461             aliases.put("NARB", OLD_NORTH_ARABIAN);
8462             aliases.put("NBAT", NABATAEAN);
8463             aliases.put("NEWA", NEWA);
8464             aliases.put("NKOO", NKO);
8465             aliases.put("NSHU", NUSHU);
8466             aliases.put("OGAM", OGHAM);
8467             aliases.put("OLCK", OL_CHIKI);
8468             aliases.put("ORKH", OLD_TURKIC);
8469             aliases.put("ORYA", ORIYA);
8470             aliases.put("OSGE", OSAGE);
8471             aliases.put("OSMA", OSMANYA);
8472             aliases.put("PALM", PALMYRENE);
8473             aliases.put("PAUC", PAU_CIN_HAU);
8474             aliases.put("PERM", OLD_PERMIC);
8475             aliases.put("PHAG", PHAGS_PA);
8476             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
8477             aliases.put("PHLP", PSALTER_PAHLAVI);
8478             aliases.put("PHNX", PHOENICIAN);
8479             aliases.put("PLRD", MIAO);
8480             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
8481             aliases.put("RJNG", REJANG);
8482             aliases.put("ROHG", HANIFI_ROHINGYA);
8483             aliases.put("RUNR", RUNIC);
8484             aliases.put("SAMR", SAMARITAN);
8485             aliases.put("SARB", OLD_SOUTH_ARABIAN);
8486             aliases.put("SAUR", SAURASHTRA);
8487             aliases.put("SGNW", SIGNWRITING);
8488             aliases.put("SHAW", SHAVIAN);
8489             aliases.put("SHRD", SHARADA);
8490             aliases.put("SIDD", SIDDHAM);
8491             aliases.put("SIND", KHUDAWADI);
8492             aliases.put("SINH", SINHALA);
8493             aliases.put("SOGD", SOGDIAN);
8494             aliases.put("SOGO", OLD_SOGDIAN);
8495             aliases.put("SORA", SORA_SOMPENG);
8496             aliases.put("SOYO", SOYOMBO);
8497             aliases.put("SUND", SUNDANESE);
8498             aliases.put("SYLO", SYLOTI_NAGRI);
8499             aliases.put("SYRC", SYRIAC);
8500             aliases.put("TAGB", TAGBANWA);
8501             aliases.put("TAKR", TAKRI);
8502             aliases.put("TALE", TAI_LE);
8503             aliases.put("TALU", NEW_TAI_LUE);
8504             aliases.put("TAML", TAMIL);
8505             aliases.put("TANG", TANGUT);
8506             aliases.put("TAVT", TAI_VIET);
8507             aliases.put("TELU", TELUGU);
8508             aliases.put("TFNG", TIFINAGH);
8509             aliases.put("TGLG", TAGALOG);
8510             aliases.put("THAA", THAANA);
8511             aliases.put("THAI", THAI);
8512             aliases.put("TIBT", TIBETAN);
8513             aliases.put("TIRH", TIRHUTA);
8514             aliases.put("UGAR", UGARITIC);
8515             aliases.put("VAII", VAI);
8516             aliases.put("WARA", WARANG_CITI);
8517             aliases.put("WCHO", WANCHO);
8518             aliases.put("XPEO", OLD_PERSIAN);
8519             aliases.put("XSUX", CUNEIFORM);
8520             aliases.put("YIII", YI);
8521             aliases.put("YEZI", YEZIDI);
8522             aliases.put("ZANB", ZANABAZAR_SQUARE);
8523             aliases.put("ZINH", INHERITED);
8524             aliases.put("ZYYY", COMMON);
8525             aliases.put("ZZZZ", UNKNOWN);
8526         }
8527 
8528         /**
8529          * Returns the enum constant representing the Unicode script of which
8530          * the given character (Unicode code point) is assigned to.
8531          *
8532          * @param   codePoint the character (Unicode code point) in question.
8533          * @return  The {@code UnicodeScript} constant representing the
8534          *          Unicode script of which this character is assigned to.
8535          *
8536          * @throws  IllegalArgumentException if the specified
8537          * {@code codePoint} is an invalid Unicode code point.
8538          * @see Character#isValidCodePoint(int)
8539          *
8540          */
of(int codePoint)8541         public static UnicodeScript of(int codePoint) {
8542             if (!isValidCodePoint(codePoint))
8543                 throw new IllegalArgumentException(
8544                     String.format("Not a valid Unicode code point: 0x%X", codePoint));
8545             int type = getType(codePoint);
8546             // leave SURROGATE and PRIVATE_USE for table lookup
8547             if (type == UNASSIGNED)
8548                 return UNKNOWN;
8549             int index = Arrays.binarySearch(scriptStarts, codePoint);
8550             if (index < 0)
8551                 index = -index - 2;
8552             return scripts[index];
8553         }
8554 
8555         /**
8556          * Returns the UnicodeScript constant with the given Unicode script
8557          * name or the script name alias. Script names and their aliases are
8558          * determined by The Unicode Standard. The files {@code Scripts<version>.txt}
8559          * and {@code PropertyValueAliases<version>.txt} define script names
8560          * and the script name aliases for a particular version of the
8561          * standard. The {@link Character} class specifies the version of
8562          * the standard that it supports.
8563          * <p>
8564          * Character case is ignored for all of the valid script names.
8565          * The en_US locale's case mapping rules are used to provide
8566          * case-insensitive string comparisons for script name validation.
8567          *
8568          * @param scriptName A {@code UnicodeScript} name.
8569          * @return The {@code UnicodeScript} constant identified
8570          *         by {@code scriptName}
8571          * @throws IllegalArgumentException if {@code scriptName} is an
8572          *         invalid name
8573          * @throws NullPointerException if {@code scriptName} is null
8574          */
forName(String scriptName)8575         public static final UnicodeScript forName(String scriptName) {
8576             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
8577                                  //.replace(' ', '_'));
8578             UnicodeScript sc = aliases.get(scriptName);
8579             if (sc != null)
8580                 return sc;
8581             return valueOf(scriptName);
8582         }
8583     }
8584 
8585     /**
8586      * The value of the {@code Character}.
8587      *
8588      * @serial
8589      */
8590     private final char value;
8591 
8592     /** use serialVersionUID from JDK 1.0.2 for interoperability */
8593     @java.io.Serial
8594     private static final long serialVersionUID = 3786198910865385080L;
8595 
8596     /**
8597      * Constructs a newly allocated {@code Character} object that
8598      * represents the specified {@code char} value.
8599      *
8600      * @param  value   the value to be represented by the
8601      *                  {@code Character} object.
8602      *
8603      * @deprecated
8604      * It is rarely appropriate to use this constructor. The static factory
8605      * {@link #valueOf(char)} is generally a better choice, as it is
8606      * likely to yield significantly better space and time performance.
8607      */
8608     // Android-changed: not yet forRemoval on Android.
8609     @Deprecated(since="9"/*, forRemoval = true*/)
Character(char value)8610     public Character(char value) {
8611         this.value = value;
8612     }
8613 
8614     private static class CharacterCache {
CharacterCache()8615         private CharacterCache(){}
8616 
8617         static final Character[] cache;
8618         static Character[] archivedCache;
8619 
8620         static {
8621             int size = 127 + 1;
8622 
8623             // Load and use the archived cache if it exists
8624             // Android-removed: CDS is not used on Android.
8625             // CDS.initializeFromArchive(CharacterCache.class);
8626             if (archivedCache == null || archivedCache.length != size) {
8627                 Character[] c = new Character[size];
8628                 for (int i = 0; i < size; i++) {
8629                     c[i] = new Character((char) i);
8630                 }
8631                 archivedCache = c;
8632             }
8633             cache = archivedCache;
8634         }
8635     }
8636 
8637     /**
8638      * Returns a {@code Character} instance representing the specified
8639      * {@code char} value.
8640      * If a new {@code Character} instance is not required, this method
8641      * should generally be used in preference to the constructor
8642      * {@link #Character(char)}, as this method is likely to yield
8643      * significantly better space and time performance by caching
8644      * frequently requested values.
8645      *
8646      * This method will always cache values in the range {@code
8647      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
8648      * cache other values outside of this range.
8649      *
8650      * @param  c a char value.
8651      * @return a {@code Character} instance representing {@code c}.
8652      * @since  1.5
8653      */
8654     @IntrinsicCandidate
valueOf(char c)8655     public static Character valueOf(char c) {
8656         if (c <= 127) { // must cache
8657             return CharacterCache.cache[(int)c];
8658         }
8659         return new Character(c);
8660     }
8661 
8662     /**
8663      * Returns the value of this {@code Character} object.
8664      * @return  the primitive {@code char} value represented by
8665      *          this object.
8666      */
8667     @IntrinsicCandidate
charValue()8668     public char charValue() {
8669         return value;
8670     }
8671 
8672     /**
8673      * Returns a hash code for this {@code Character}; equal to the result
8674      * of invoking {@code charValue()}.
8675      *
8676      * @return a hash code value for this {@code Character}
8677      */
8678     @Override
hashCode()8679     public int hashCode() {
8680         return Character.hashCode(value);
8681     }
8682 
8683     /**
8684      * Returns a hash code for a {@code char} value; compatible with
8685      * {@code Character.hashCode()}.
8686      *
8687      * @since 1.8
8688      *
8689      * @param value The {@code char} for which to return a hash code.
8690      * @return a hash code value for a {@code char} value.
8691      */
hashCode(char value)8692     public static int hashCode(char value) {
8693         return (int)value;
8694     }
8695 
8696     /**
8697      * Compares this object against the specified object.
8698      * The result is {@code true} if and only if the argument is not
8699      * {@code null} and is a {@code Character} object that
8700      * represents the same {@code char} value as this object.
8701      *
8702      * @param   obj   the object to compare with.
8703      * @return  {@code true} if the objects are the same;
8704      *          {@code false} otherwise.
8705      */
equals(Object obj)8706     public boolean equals(Object obj) {
8707         if (obj instanceof Character) {
8708             return value == ((Character)obj).charValue();
8709         }
8710         return false;
8711     }
8712 
8713     /**
8714      * Returns a {@code String} object representing this
8715      * {@code Character}'s value.  The result is a string of
8716      * length 1 whose sole component is the primitive
8717      * {@code char} value represented by this
8718      * {@code Character} object.
8719      *
8720      * @return  a string representation of this object.
8721      */
toString()8722     public String toString() {
8723         return String.valueOf(value);
8724     }
8725 
8726     // Android-removed: reference to Character.toString(int) in javadoc.
8727     /**
8728      * Returns a {@code String} object representing the
8729      * specified {@code char}.  The result is a string of length
8730      * 1 consisting solely of the specified {@code char}.
8731      *
8732      * @param c the {@code char} to be converted
8733      * @return the string representation of the specified {@code char}
8734      * @since 1.4
8735      */
toString(char c)8736     public static String toString(char c) {
8737         return String.valueOf(c);
8738     }
8739 
8740     // BEGIN Android-removed: expose after String.valueOfCodePoint() is imported.
8741     /**
8742      * Returns a {@code String} object representing the
8743      * specified character (Unicode code point).  The result is a string of
8744      * length 1 or 2, consisting solely of the specified {@code codePoint}.
8745      *
8746      * @param codePoint the {@code codePoint} to be converted
8747      * @return the string representation of the specified {@code codePoint}
8748      * @throws IllegalArgumentException if the specified
8749      *      {@code codePoint} is not a {@linkplain #isValidCodePoint
8750      *      valid Unicode code point}.
8751      * @since 11
8752      *
8753     public static String toString(int codePoint) {
8754         return String.valueOfCodePoint(codePoint);
8755     }
8756     */
8757     // END Android-removed: expose after String.valueOfCodePoint() is imported.
8758 
8759     /**
8760      * Determines whether the specified code point is a valid
8761      * <a href="http://www.unicode.org/glossary/#code_point">
8762      * Unicode code point value</a>.
8763      *
8764      * @param  codePoint the Unicode code point to be tested
8765      * @return {@code true} if the specified code point value is between
8766      *         {@link #MIN_CODE_POINT} and
8767      *         {@link #MAX_CODE_POINT} inclusive;
8768      *         {@code false} otherwise.
8769      * @since  1.5
8770      */
isValidCodePoint(int codePoint)8771     public static boolean isValidCodePoint(int codePoint) {
8772         // Optimized form of:
8773         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
8774         int plane = codePoint >>> 16;
8775         return plane < ((MAX_CODE_POINT + 1) >>> 16);
8776     }
8777 
8778     /**
8779      * Determines whether the specified character (Unicode code point)
8780      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
8781      * Such code points can be represented using a single {@code char}.
8782      *
8783      * @param  codePoint the character (Unicode code point) to be to
8784      * @return {@code true} if the specified code point is between
8785      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
8786      *         {@code false} otherwise.
8787      * @since  1.7
8788      */
isBmpCodePoint(int codePoint)8789     public static boolean isBmpCodePoint(int codePoint) {
8790         return codePoint >>> 16 == 0;
8791         // Optimized form of:
8792         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
8793         // We consistently use logical shift (>>>) to facilitate
8794         // additional runtime optimizations.
8795     }
8796 
8797     /**
8798      * Determines whether the specified character (Unicode code point)
8799      * is in the <a href="#supplementary">supplementary character</a> range.
8800      *
8801      * @param  codePoint the character (Unicode code point) to be tested
8802      * @return {@code true} if the specified code point is between
8803      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
8804      *         {@link #MAX_CODE_POINT} inclusive;
8805      *         {@code false} otherwise.
8806      * @since  1.5
8807      */
isSupplementaryCodePoint(int codePoint)8808     public static boolean isSupplementaryCodePoint(int codePoint) {
8809         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
8810             && codePoint <  MAX_CODE_POINT + 1;
8811     }
8812 
8813     /**
8814      * Determines if the given {@code char} value is a
8815      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
8816      * Unicode high-surrogate code unit</a>
8817      * (also known as <i>leading-surrogate code unit</i>).
8818      *
8819      * <p>Such values do not represent characters by themselves,
8820      * but are used in the representation of
8821      * <a href="#supplementary">supplementary characters</a>
8822      * in the UTF-16 encoding.
8823      *
8824      * @param  ch the {@code char} value to be tested.
8825      * @return {@code true} if the {@code char} value is between
8826      *         {@link #MIN_HIGH_SURROGATE} and
8827      *         {@link #MAX_HIGH_SURROGATE} inclusive;
8828      *         {@code false} otherwise.
8829      * @see    Character#isLowSurrogate(char)
8830      * @see    Character.UnicodeBlock#of(int)
8831      * @since  1.5
8832      */
isHighSurrogate(char ch)8833     public static boolean isHighSurrogate(char ch) {
8834         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
8835         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
8836     }
8837 
8838     /**
8839      * Determines if the given {@code char} value is a
8840      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
8841      * Unicode low-surrogate code unit</a>
8842      * (also known as <i>trailing-surrogate code unit</i>).
8843      *
8844      * <p>Such values do not represent characters by themselves,
8845      * but are used in the representation of
8846      * <a href="#supplementary">supplementary characters</a>
8847      * in the UTF-16 encoding.
8848      *
8849      * @param  ch the {@code char} value to be tested.
8850      * @return {@code true} if the {@code char} value is between
8851      *         {@link #MIN_LOW_SURROGATE} and
8852      *         {@link #MAX_LOW_SURROGATE} inclusive;
8853      *         {@code false} otherwise.
8854      * @see    Character#isHighSurrogate(char)
8855      * @since  1.5
8856      */
isLowSurrogate(char ch)8857     public static boolean isLowSurrogate(char ch) {
8858         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
8859     }
8860 
8861     /**
8862      * Determines if the given {@code char} value is a Unicode
8863      * <i>surrogate code unit</i>.
8864      *
8865      * <p>Such values do not represent characters by themselves,
8866      * but are used in the representation of
8867      * <a href="#supplementary">supplementary characters</a>
8868      * in the UTF-16 encoding.
8869      *
8870      * <p>A char value is a surrogate code unit if and only if it is either
8871      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
8872      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
8873      *
8874      * @param  ch the {@code char} value to be tested.
8875      * @return {@code true} if the {@code char} value is between
8876      *         {@link #MIN_SURROGATE} and
8877      *         {@link #MAX_SURROGATE} inclusive;
8878      *         {@code false} otherwise.
8879      * @since  1.7
8880      */
isSurrogate(char ch)8881     public static boolean isSurrogate(char ch) {
8882         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
8883     }
8884 
8885     /**
8886      * Determines whether the specified pair of {@code char}
8887      * values is a valid
8888      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
8889      * Unicode surrogate pair</a>.
8890      *
8891      * <p>This method is equivalent to the expression:
8892      * <blockquote><pre>{@code
8893      * isHighSurrogate(high) && isLowSurrogate(low)
8894      * }</pre></blockquote>
8895      *
8896      * @param  high the high-surrogate code value to be tested
8897      * @param  low the low-surrogate code value to be tested
8898      * @return {@code true} if the specified high and
8899      * low-surrogate code values represent a valid surrogate pair;
8900      * {@code false} otherwise.
8901      * @since  1.5
8902      */
isSurrogatePair(char high, char low)8903     public static boolean isSurrogatePair(char high, char low) {
8904         return isHighSurrogate(high) && isLowSurrogate(low);
8905     }
8906 
8907     /**
8908      * Determines the number of {@code char} values needed to
8909      * represent the specified character (Unicode code point). If the
8910      * specified character is equal to or greater than 0x10000, then
8911      * the method returns 2. Otherwise, the method returns 1.
8912      *
8913      * <p>This method doesn't validate the specified character to be a
8914      * valid Unicode code point. The caller must validate the
8915      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
8916      * if necessary.
8917      *
8918      * @param   codePoint the character (Unicode code point) to be tested.
8919      * @return  2 if the character is a valid supplementary character; 1 otherwise.
8920      * @see     Character#isSupplementaryCodePoint(int)
8921      * @since   1.5
8922      */
charCount(int codePoint)8923     public static int charCount(int codePoint) {
8924         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
8925     }
8926 
8927     /**
8928      * Converts the specified surrogate pair to its supplementary code
8929      * point value. This method does not validate the specified
8930      * surrogate pair. The caller must validate it using {@link
8931      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
8932      *
8933      * @param  high the high-surrogate code unit
8934      * @param  low the low-surrogate code unit
8935      * @return the supplementary code point composed from the
8936      *         specified surrogate pair.
8937      * @since  1.5
8938      */
toCodePoint(char high, char low)8939     public static int toCodePoint(char high, char low) {
8940         // Optimized form of:
8941         // return ((high - MIN_HIGH_SURROGATE) << 10)
8942         //         + (low - MIN_LOW_SURROGATE)
8943         //         + MIN_SUPPLEMENTARY_CODE_POINT;
8944         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
8945                                        - (MIN_HIGH_SURROGATE << 10)
8946                                        - MIN_LOW_SURROGATE);
8947     }
8948 
8949     /**
8950      * Returns the code point at the given index of the
8951      * {@code CharSequence}. If the {@code char} value at
8952      * the given index in the {@code CharSequence} is in the
8953      * high-surrogate range, the following index is less than the
8954      * length of the {@code CharSequence}, and the
8955      * {@code char} value at the following index is in the
8956      * low-surrogate range, then the supplementary code point
8957      * corresponding to this surrogate pair is returned. Otherwise,
8958      * the {@code char} value at the given index is returned.
8959      *
8960      * @param seq a sequence of {@code char} values (Unicode code
8961      * units)
8962      * @param index the index to the {@code char} values (Unicode
8963      * code units) in {@code seq} to be converted
8964      * @return the Unicode code point at the given index
8965      * @throws NullPointerException if {@code seq} is null.
8966      * @throws IndexOutOfBoundsException if the value
8967      * {@code index} is negative or not less than
8968      * {@link CharSequence#length() seq.length()}.
8969      * @since  1.5
8970      */
codePointAt(CharSequence seq, int index)8971     public static int codePointAt(CharSequence seq, int index) {
8972         char c1 = seq.charAt(index);
8973         if (isHighSurrogate(c1) && ++index < seq.length()) {
8974             char c2 = seq.charAt(index);
8975             if (isLowSurrogate(c2)) {
8976                 return toCodePoint(c1, c2);
8977             }
8978         }
8979         return c1;
8980     }
8981 
8982     /**
8983      * Returns the code point at the given index of the
8984      * {@code char} array. If the {@code char} value at
8985      * the given index in the {@code char} array is in the
8986      * high-surrogate range, the following index is less than the
8987      * length of the {@code char} array, and the
8988      * {@code char} value at the following index is in the
8989      * low-surrogate range, then the supplementary code point
8990      * corresponding to this surrogate pair is returned. Otherwise,
8991      * the {@code char} value at the given index is returned.
8992      *
8993      * @param a the {@code char} array
8994      * @param index the index to the {@code char} values (Unicode
8995      * code units) in the {@code char} array to be converted
8996      * @return the Unicode code point at the given index
8997      * @throws NullPointerException if {@code a} is null.
8998      * @throws IndexOutOfBoundsException if the value
8999      * {@code index} is negative or not less than
9000      * the length of the {@code char} array.
9001      * @since  1.5
9002      */
codePointAt(char[] a, int index)9003     public static int codePointAt(char[] a, int index) {
9004         return codePointAtImpl(a, index, a.length);
9005     }
9006 
9007     /**
9008      * Returns the code point at the given index of the
9009      * {@code char} array, where only array elements with
9010      * {@code index} less than {@code limit} can be used. If
9011      * the {@code char} value at the given index in the
9012      * {@code char} array is in the high-surrogate range, the
9013      * following index is less than the {@code limit}, and the
9014      * {@code char} value at the following index is in the
9015      * low-surrogate range, then the supplementary code point
9016      * corresponding to this surrogate pair is returned. Otherwise,
9017      * the {@code char} value at the given index is returned.
9018      *
9019      * @param a the {@code char} array
9020      * @param index the index to the {@code char} values (Unicode
9021      * code units) in the {@code char} array to be converted
9022      * @param limit the index after the last array element that
9023      * can be used in the {@code char} array
9024      * @return the Unicode code point at the given index
9025      * @throws NullPointerException if {@code a} is null.
9026      * @throws IndexOutOfBoundsException if the {@code index}
9027      * argument is negative or not less than the {@code limit}
9028      * argument, or if the {@code limit} argument is negative or
9029      * greater than the length of the {@code char} array.
9030      * @since  1.5
9031      */
codePointAt(char[] a, int index, int limit)9032     public static int codePointAt(char[] a, int index, int limit) {
9033         if (index >= limit || limit < 0 || limit > a.length) {
9034             throw new IndexOutOfBoundsException();
9035         }
9036         return codePointAtImpl(a, index, limit);
9037     }
9038 
9039     // throws ArrayIndexOutOfBoundsException if index out of bounds
codePointAtImpl(char[] a, int index, int limit)9040     static int codePointAtImpl(char[] a, int index, int limit) {
9041         char c1 = a[index];
9042         if (isHighSurrogate(c1) && ++index < limit) {
9043             char c2 = a[index];
9044             if (isLowSurrogate(c2)) {
9045                 return toCodePoint(c1, c2);
9046             }
9047         }
9048         return c1;
9049     }
9050 
9051     /**
9052      * Returns the code point preceding the given index of the
9053      * {@code CharSequence}. If the {@code char} value at
9054      * {@code (index - 1)} in the {@code CharSequence} is in
9055      * the low-surrogate range, {@code (index - 2)} is not
9056      * negative, and the {@code char} value at {@code (index - 2)}
9057      * in the {@code CharSequence} is in the
9058      * high-surrogate range, then the supplementary code point
9059      * corresponding to this surrogate pair is returned. Otherwise,
9060      * the {@code char} value at {@code (index - 1)} is
9061      * returned.
9062      *
9063      * @param seq the {@code CharSequence} instance
9064      * @param index the index following the code point that should be returned
9065      * @return the Unicode code point value before the given index.
9066      * @throws NullPointerException if {@code seq} is null.
9067      * @throws IndexOutOfBoundsException if the {@code index}
9068      * argument is less than 1 or greater than {@link
9069      * CharSequence#length() seq.length()}.
9070      * @since  1.5
9071      */
codePointBefore(CharSequence seq, int index)9072     public static int codePointBefore(CharSequence seq, int index) {
9073         char c2 = seq.charAt(--index);
9074         if (isLowSurrogate(c2) && index > 0) {
9075             char c1 = seq.charAt(--index);
9076             if (isHighSurrogate(c1)) {
9077                 return toCodePoint(c1, c2);
9078             }
9079         }
9080         return c2;
9081     }
9082 
9083     /**
9084      * Returns the code point preceding the given index of the
9085      * {@code char} array. If the {@code char} value at
9086      * {@code (index - 1)} in the {@code char} array is in
9087      * the low-surrogate range, {@code (index - 2)} is not
9088      * negative, and the {@code char} value at {@code (index - 2)}
9089      * in the {@code char} array is in the
9090      * high-surrogate range, then the supplementary code point
9091      * corresponding to this surrogate pair is returned. Otherwise,
9092      * the {@code char} value at {@code (index - 1)} is
9093      * returned.
9094      *
9095      * @param a the {@code char} array
9096      * @param index the index following the code point that should be returned
9097      * @return the Unicode code point value before the given index.
9098      * @throws NullPointerException if {@code a} is null.
9099      * @throws IndexOutOfBoundsException if the {@code index}
9100      * argument is less than 1 or greater than the length of the
9101      * {@code char} array
9102      * @since  1.5
9103      */
codePointBefore(char[] a, int index)9104     public static int codePointBefore(char[] a, int index) {
9105         return codePointBeforeImpl(a, index, 0);
9106     }
9107 
9108     /**
9109      * Returns the code point preceding the given index of the
9110      * {@code char} array, where only array elements with
9111      * {@code index} greater than or equal to {@code start}
9112      * can be used. If the {@code char} value at {@code (index - 1)}
9113      * in the {@code char} array is in the
9114      * low-surrogate range, {@code (index - 2)} is not less than
9115      * {@code start}, and the {@code char} value at
9116      * {@code (index - 2)} in the {@code char} array is in
9117      * the high-surrogate range, then the supplementary code point
9118      * corresponding to this surrogate pair is returned. Otherwise,
9119      * the {@code char} value at {@code (index - 1)} is
9120      * returned.
9121      *
9122      * @param a the {@code char} array
9123      * @param index the index following the code point that should be returned
9124      * @param start the index of the first array element in the
9125      * {@code char} array
9126      * @return the Unicode code point value before the given index.
9127      * @throws NullPointerException if {@code a} is null.
9128      * @throws IndexOutOfBoundsException if the {@code index}
9129      * argument is not greater than the {@code start} argument or
9130      * is greater than the length of the {@code char} array, or
9131      * if the {@code start} argument is negative or not less than
9132      * the length of the {@code char} array.
9133      * @since  1.5
9134      */
codePointBefore(char[] a, int index, int start)9135     public static int codePointBefore(char[] a, int index, int start) {
9136         if (index <= start || start < 0 || start >= a.length) {
9137             throw new IndexOutOfBoundsException();
9138         }
9139         return codePointBeforeImpl(a, index, start);
9140     }
9141 
9142     // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
codePointBeforeImpl(char[] a, int index, int start)9143     static int codePointBeforeImpl(char[] a, int index, int start) {
9144         char c2 = a[--index];
9145         if (isLowSurrogate(c2) && index > start) {
9146             char c1 = a[--index];
9147             if (isHighSurrogate(c1)) {
9148                 return toCodePoint(c1, c2);
9149             }
9150         }
9151         return c2;
9152     }
9153 
9154     /**
9155      * Returns the leading surrogate (a
9156      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
9157      * high surrogate code unit</a>) of the
9158      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
9159      * surrogate pair</a>
9160      * representing the specified supplementary character (Unicode
9161      * code point) in the UTF-16 encoding.  If the specified character
9162      * is not a
9163      * <a href="Character.html#supplementary">supplementary character</a>,
9164      * an unspecified {@code char} is returned.
9165      *
9166      * <p>If
9167      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
9168      * is {@code true}, then
9169      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
9170      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
9171      * are also always {@code true}.
9172      *
9173      * @param   codePoint a supplementary character (Unicode code point)
9174      * @return  the leading surrogate code unit used to represent the
9175      *          character in the UTF-16 encoding
9176      * @since   1.7
9177      */
highSurrogate(int codePoint)9178     public static char highSurrogate(int codePoint) {
9179         return (char) ((codePoint >>> 10)
9180             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
9181     }
9182 
9183     /**
9184      * Returns the trailing surrogate (a
9185      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
9186      * low surrogate code unit</a>) of the
9187      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
9188      * surrogate pair</a>
9189      * representing the specified supplementary character (Unicode
9190      * code point) in the UTF-16 encoding.  If the specified character
9191      * is not a
9192      * <a href="Character.html#supplementary">supplementary character</a>,
9193      * an unspecified {@code char} is returned.
9194      *
9195      * <p>If
9196      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
9197      * is {@code true}, then
9198      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
9199      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
9200      * are also always {@code true}.
9201      *
9202      * @param   codePoint a supplementary character (Unicode code point)
9203      * @return  the trailing surrogate code unit used to represent the
9204      *          character in the UTF-16 encoding
9205      * @since   1.7
9206      */
lowSurrogate(int codePoint)9207     public static char lowSurrogate(int codePoint) {
9208         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
9209     }
9210 
9211     /**
9212      * Converts the specified character (Unicode code point) to its
9213      * UTF-16 representation. If the specified code point is a BMP
9214      * (Basic Multilingual Plane or Plane 0) value, the same value is
9215      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
9216      * specified code point is a supplementary character, its
9217      * surrogate values are stored in {@code dst[dstIndex]}
9218      * (high-surrogate) and {@code dst[dstIndex+1]}
9219      * (low-surrogate), and 2 is returned.
9220      *
9221      * @param  codePoint the character (Unicode code point) to be converted.
9222      * @param  dst an array of {@code char} in which the
9223      * {@code codePoint}'s UTF-16 value is stored.
9224      * @param dstIndex the start index into the {@code dst}
9225      * array where the converted value is stored.
9226      * @return 1 if the code point is a BMP code point, 2 if the
9227      * code point is a supplementary code point.
9228      * @throws IllegalArgumentException if the specified
9229      * {@code codePoint} is not a valid Unicode code point.
9230      * @throws NullPointerException if the specified {@code dst} is null.
9231      * @throws IndexOutOfBoundsException if {@code dstIndex}
9232      * is negative or not less than {@code dst.length}, or if
9233      * {@code dst} at {@code dstIndex} doesn't have enough
9234      * array element(s) to store the resulting {@code char}
9235      * value(s). (If {@code dstIndex} is equal to
9236      * {@code dst.length-1} and the specified
9237      * {@code codePoint} is a supplementary character, the
9238      * high-surrogate value is not stored in
9239      * {@code dst[dstIndex]}.)
9240      * @since  1.5
9241      */
toChars(int codePoint, char[] dst, int dstIndex)9242     public static int toChars(int codePoint, char[] dst, int dstIndex) {
9243         if (isBmpCodePoint(codePoint)) {
9244             dst[dstIndex] = (char) codePoint;
9245             return 1;
9246         } else if (isValidCodePoint(codePoint)) {
9247             toSurrogates(codePoint, dst, dstIndex);
9248             return 2;
9249         } else {
9250             throw new IllegalArgumentException(
9251                 String.format("Not a valid Unicode code point: 0x%X", codePoint));
9252         }
9253     }
9254 
9255     /**
9256      * Converts the specified character (Unicode code point) to its
9257      * UTF-16 representation stored in a {@code char} array. If
9258      * the specified code point is a BMP (Basic Multilingual Plane or
9259      * Plane 0) value, the resulting {@code char} array has
9260      * the same value as {@code codePoint}. If the specified code
9261      * point is a supplementary code point, the resulting
9262      * {@code char} array has the corresponding surrogate pair.
9263      *
9264      * @param  codePoint a Unicode code point
9265      * @return a {@code char} array having
9266      *         {@code codePoint}'s UTF-16 representation.
9267      * @throws IllegalArgumentException if the specified
9268      * {@code codePoint} is not a valid Unicode code point.
9269      * @since  1.5
9270      */
toChars(int codePoint)9271     public static char[] toChars(int codePoint) {
9272         if (isBmpCodePoint(codePoint)) {
9273             return new char[] { (char) codePoint };
9274         } else if (isValidCodePoint(codePoint)) {
9275             char[] result = new char[2];
9276             toSurrogates(codePoint, result, 0);
9277             return result;
9278         } else {
9279             throw new IllegalArgumentException(
9280                 String.format("Not a valid Unicode code point: 0x%X", codePoint));
9281         }
9282     }
9283 
toSurrogates(int codePoint, char[] dst, int index)9284     static void toSurrogates(int codePoint, char[] dst, int index) {
9285         // We write elements "backwards" to guarantee all-or-nothing
9286         dst[index+1] = lowSurrogate(codePoint);
9287         dst[index] = highSurrogate(codePoint);
9288     }
9289 
9290     /**
9291      * Returns the number of Unicode code points in the text range of
9292      * the specified char sequence. The text range begins at the
9293      * specified {@code beginIndex} and extends to the
9294      * {@code char} at index {@code endIndex - 1}. Thus the
9295      * length (in {@code char}s) of the text range is
9296      * {@code endIndex-beginIndex}. Unpaired surrogates within
9297      * the text range count as one code point each.
9298      *
9299      * @param seq the char sequence
9300      * @param beginIndex the index to the first {@code char} of
9301      * the text range.
9302      * @param endIndex the index after the last {@code char} of
9303      * the text range.
9304      * @return the number of Unicode code points in the specified text
9305      * range
9306      * @throws NullPointerException if {@code seq} is null.
9307      * @throws IndexOutOfBoundsException if the
9308      * {@code beginIndex} is negative, or {@code endIndex}
9309      * is larger than the length of the given sequence, or
9310      * {@code beginIndex} is larger than {@code endIndex}.
9311      * @since  1.5
9312      */
codePointCount(CharSequence seq, int beginIndex, int endIndex)9313     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
9314         int length = seq.length();
9315         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
9316             throw new IndexOutOfBoundsException();
9317         }
9318         int n = endIndex - beginIndex;
9319         for (int i = beginIndex; i < endIndex; ) {
9320             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
9321                 isLowSurrogate(seq.charAt(i))) {
9322                 n--;
9323                 i++;
9324             }
9325         }
9326         return n;
9327     }
9328 
9329     /**
9330      * Returns the number of Unicode code points in a subarray of the
9331      * {@code char} array argument. The {@code offset}
9332      * argument is the index of the first {@code char} of the
9333      * subarray and the {@code count} argument specifies the
9334      * length of the subarray in {@code char}s. Unpaired
9335      * surrogates within the subarray count as one code point each.
9336      *
9337      * @param a the {@code char} array
9338      * @param offset the index of the first {@code char} in the
9339      * given {@code char} array
9340      * @param count the length of the subarray in {@code char}s
9341      * @return the number of Unicode code points in the specified subarray
9342      * @throws NullPointerException if {@code a} is null.
9343      * @throws IndexOutOfBoundsException if {@code offset} or
9344      * {@code count} is negative, or if {@code offset +
9345      * count} is larger than the length of the given array.
9346      * @since  1.5
9347      */
codePointCount(char[] a, int offset, int count)9348     public static int codePointCount(char[] a, int offset, int count) {
9349         if (count > a.length - offset || offset < 0 || count < 0) {
9350             throw new IndexOutOfBoundsException();
9351         }
9352         return codePointCountImpl(a, offset, count);
9353     }
9354 
codePointCountImpl(char[] a, int offset, int count)9355     static int codePointCountImpl(char[] a, int offset, int count) {
9356         int endIndex = offset + count;
9357         int n = count;
9358         for (int i = offset; i < endIndex; ) {
9359             if (isHighSurrogate(a[i++]) && i < endIndex &&
9360                 isLowSurrogate(a[i])) {
9361                 n--;
9362                 i++;
9363             }
9364         }
9365         return n;
9366     }
9367 
9368     /**
9369      * Returns the index within the given char sequence that is offset
9370      * from the given {@code index} by {@code codePointOffset}
9371      * code points. Unpaired surrogates within the text range given by
9372      * {@code index} and {@code codePointOffset} count as
9373      * one code point each.
9374      *
9375      * @param seq the char sequence
9376      * @param index the index to be offset
9377      * @param codePointOffset the offset in code points
9378      * @return the index within the char sequence
9379      * @throws NullPointerException if {@code seq} is null.
9380      * @throws IndexOutOfBoundsException if {@code index}
9381      *   is negative or larger then the length of the char sequence,
9382      *   or if {@code codePointOffset} is positive and the
9383      *   subsequence starting with {@code index} has fewer than
9384      *   {@code codePointOffset} code points, or if
9385      *   {@code codePointOffset} is negative and the subsequence
9386      *   before {@code index} has fewer than the absolute value
9387      *   of {@code codePointOffset} code points.
9388      * @since 1.5
9389      */
offsetByCodePoints(CharSequence seq, int index, int codePointOffset)9390     public static int offsetByCodePoints(CharSequence seq, int index,
9391                                          int codePointOffset) {
9392         int length = seq.length();
9393         if (index < 0 || index > length) {
9394             throw new IndexOutOfBoundsException();
9395         }
9396 
9397         int x = index;
9398         if (codePointOffset >= 0) {
9399             int i;
9400             for (i = 0; x < length && i < codePointOffset; i++) {
9401                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
9402                     isLowSurrogate(seq.charAt(x))) {
9403                     x++;
9404                 }
9405             }
9406             if (i < codePointOffset) {
9407                 throw new IndexOutOfBoundsException();
9408             }
9409         } else {
9410             int i;
9411             for (i = codePointOffset; x > 0 && i < 0; i++) {
9412                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
9413                     isHighSurrogate(seq.charAt(x-1))) {
9414                     x--;
9415                 }
9416             }
9417             if (i < 0) {
9418                 throw new IndexOutOfBoundsException();
9419             }
9420         }
9421         return x;
9422     }
9423 
9424     /**
9425      * Returns the index within the given {@code char} subarray
9426      * that is offset from the given {@code index} by
9427      * {@code codePointOffset} code points. The
9428      * {@code start} and {@code count} arguments specify a
9429      * subarray of the {@code char} array. Unpaired surrogates
9430      * within the text range given by {@code index} and
9431      * {@code codePointOffset} count as one code point each.
9432      *
9433      * @param a the {@code char} array
9434      * @param start the index of the first {@code char} of the
9435      * subarray
9436      * @param count the length of the subarray in {@code char}s
9437      * @param index the index to be offset
9438      * @param codePointOffset the offset in code points
9439      * @return the index within the subarray
9440      * @throws NullPointerException if {@code a} is null.
9441      * @throws IndexOutOfBoundsException
9442      *   if {@code start} or {@code count} is negative,
9443      *   or if {@code start + count} is larger than the length of
9444      *   the given array,
9445      *   or if {@code index} is less than {@code start} or
9446      *   larger then {@code start + count},
9447      *   or if {@code codePointOffset} is positive and the text range
9448      *   starting with {@code index} and ending with {@code start + count - 1}
9449      *   has fewer than {@code codePointOffset} code
9450      *   points,
9451      *   or if {@code codePointOffset} is negative and the text range
9452      *   starting with {@code start} and ending with {@code index - 1}
9453      *   has fewer than the absolute value of
9454      *   {@code codePointOffset} code points.
9455      * @since 1.5
9456      */
offsetByCodePoints(char[] a, int start, int count, int index, int codePointOffset)9457     public static int offsetByCodePoints(char[] a, int start, int count,
9458                                          int index, int codePointOffset) {
9459         if (count > a.length-start || start < 0 || count < 0
9460             || index < start || index > start+count) {
9461             throw new IndexOutOfBoundsException();
9462         }
9463         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
9464     }
9465 
offsetByCodePointsImpl(char[]a, int start, int count, int index, int codePointOffset)9466     static int offsetByCodePointsImpl(char[]a, int start, int count,
9467                                       int index, int codePointOffset) {
9468         int x = index;
9469         if (codePointOffset >= 0) {
9470             int limit = start + count;
9471             int i;
9472             for (i = 0; x < limit && i < codePointOffset; i++) {
9473                 if (isHighSurrogate(a[x++]) && x < limit &&
9474                     isLowSurrogate(a[x])) {
9475                     x++;
9476                 }
9477             }
9478             if (i < codePointOffset) {
9479                 throw new IndexOutOfBoundsException();
9480             }
9481         } else {
9482             int i;
9483             for (i = codePointOffset; x > start && i < 0; i++) {
9484                 if (isLowSurrogate(a[--x]) && x > start &&
9485                     isHighSurrogate(a[x-1])) {
9486                     x--;
9487                 }
9488             }
9489             if (i < 0) {
9490                 throw new IndexOutOfBoundsException();
9491             }
9492         }
9493         return x;
9494     }
9495 
9496     /**
9497      * Determines if the specified character is a lowercase character.
9498      * <p>
9499      * A character is lowercase if its general category type, provided
9500      * by {@code Character.getType(ch)}, is
9501      * {@code LOWERCASE_LETTER}, or it has contributory property
9502      * Other_Lowercase as defined by the Unicode Standard.
9503      * <p>
9504      * The following are examples of lowercase characters:
9505      * <blockquote><pre>
9506      * a b c d e f g h i j k l m n o p q r s t u v w x y z
9507      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
9508      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
9509      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
9510      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
9511      * </pre></blockquote>
9512      * <p> Many other Unicode characters are lowercase too.
9513      *
9514      * <p><b>Note:</b> This method cannot handle <a
9515      * href="#supplementary"> supplementary characters</a>. To support
9516      * all Unicode characters, including supplementary characters, use
9517      * the {@link #isLowerCase(int)} method.
9518      *
9519      * @param   ch   the character to be tested.
9520      * @return  {@code true} if the character is lowercase;
9521      *          {@code false} otherwise.
9522      * @see     Character#isLowerCase(char)
9523      * @see     Character#isTitleCase(char)
9524      * @see     Character#toLowerCase(char)
9525      * @see     Character#getType(char)
9526      */
isLowerCase(char ch)9527     public static boolean isLowerCase(char ch) {
9528         return isLowerCase((int)ch);
9529     }
9530 
9531     /**
9532      * Determines if the specified character (Unicode code point) is a
9533      * lowercase character.
9534      * <p>
9535      * A character is lowercase if its general category type, provided
9536      * by {@link Character#getType getType(codePoint)}, is
9537      * {@code LOWERCASE_LETTER}, or it has contributory property
9538      * Other_Lowercase as defined by the Unicode Standard.
9539      * <p>
9540      * The following are examples of lowercase characters:
9541      * <blockquote><pre>
9542      * a b c d e f g h i j k l m n o p q r s t u v w x y z
9543      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
9544      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
9545      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
9546      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
9547      * </pre></blockquote>
9548      * <p> Many other Unicode characters are lowercase too.
9549      *
9550      * @param   codePoint the character (Unicode code point) to be tested.
9551      * @return  {@code true} if the character is lowercase;
9552      *          {@code false} otherwise.
9553      * @see     Character#isLowerCase(int)
9554      * @see     Character#isTitleCase(int)
9555      * @see     Character#toLowerCase(int)
9556      * @see     Character#getType(int)
9557      * @since   1.5
9558      */
9559     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
9560     /*
9561     public static boolean isLowerCase(int codePoint) {
9562         return CharacterData.of(codePoint).isLowerCase(codePoint);
9563     }
9564     */
isLowerCase(int codePoint)9565     public static boolean isLowerCase(int codePoint) {
9566         return isLowerCaseImpl(codePoint);
9567     }
9568 
9569     @FastNative
isLowerCaseImpl(int codePoint)9570     static native boolean isLowerCaseImpl(int codePoint);
9571     // END Android-changed: Reimplement methods natively on top of ICU4C.
9572 
9573     /**
9574      * Determines if the specified character is an uppercase character.
9575      * <p>
9576      * A character is uppercase if its general category type, provided by
9577      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
9578      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
9579      * <p>
9580      * The following are examples of uppercase characters:
9581      * <blockquote><pre>
9582      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
9583      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
9584      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
9585      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
9586      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
9587      * </pre></blockquote>
9588      * <p> Many other Unicode characters are uppercase too.
9589      *
9590      * <p><b>Note:</b> This method cannot handle <a
9591      * href="#supplementary"> supplementary characters</a>. To support
9592      * all Unicode characters, including supplementary characters, use
9593      * the {@link #isUpperCase(int)} method.
9594      *
9595      * @param   ch   the character to be tested.
9596      * @return  {@code true} if the character is uppercase;
9597      *          {@code false} otherwise.
9598      * @see     Character#isLowerCase(char)
9599      * @see     Character#isTitleCase(char)
9600      * @see     Character#toUpperCase(char)
9601      * @see     Character#getType(char)
9602      * @since   1.0
9603      */
isUpperCase(char ch)9604     public static boolean isUpperCase(char ch) {
9605         return isUpperCase((int)ch);
9606     }
9607 
9608     /**
9609      * Determines if the specified character (Unicode code point) is an uppercase character.
9610      * <p>
9611      * A character is uppercase if its general category type, provided by
9612      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
9613      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
9614      * <p>
9615      * The following are examples of uppercase characters:
9616      * <blockquote><pre>
9617      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
9618      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
9619      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
9620      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
9621      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
9622      * </pre></blockquote>
9623      * <p> Many other Unicode characters are uppercase too.
9624      *
9625      * @param   codePoint the character (Unicode code point) to be tested.
9626      * @return  {@code true} if the character is uppercase;
9627      *          {@code false} otherwise.
9628      * @see     Character#isLowerCase(int)
9629      * @see     Character#isTitleCase(int)
9630      * @see     Character#toUpperCase(int)
9631      * @see     Character#getType(int)
9632      * @since   1.5
9633      */
9634     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
9635     /*
9636     public static boolean isUpperCase(int codePoint) {
9637         return CharacterData.of(codePoint).isUpperCase(codePoint);
9638     }
9639     */
isUpperCase(int codePoint)9640     public static boolean isUpperCase(int codePoint) {
9641         return isUpperCaseImpl(codePoint);
9642     }
9643 
9644     @FastNative
isUpperCaseImpl(int codePoint)9645     static native boolean isUpperCaseImpl(int codePoint);
9646     // END Android-changed: Reimplement methods natively on top of ICU4C.
9647 
9648     /**
9649      * Determines if the specified character is a titlecase character.
9650      * <p>
9651      * A character is a titlecase character if its general
9652      * category type, provided by {@code Character.getType(ch)},
9653      * is {@code TITLECASE_LETTER}.
9654      * <p>
9655      * Some characters look like pairs of Latin letters. For example, there
9656      * is an uppercase letter that looks like "LJ" and has a corresponding
9657      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
9658      * is the appropriate form to use when rendering a word in lowercase
9659      * with initial capitals, as for a book title.
9660      * <p>
9661      * These are some of the Unicode characters for which this method returns
9662      * {@code true}:
9663      * <ul>
9664      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
9665      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
9666      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
9667      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
9668      * </ul>
9669      * <p> Many other Unicode characters are titlecase too.
9670      *
9671      * <p><b>Note:</b> This method cannot handle <a
9672      * href="#supplementary"> supplementary characters</a>. To support
9673      * all Unicode characters, including supplementary characters, use
9674      * the {@link #isTitleCase(int)} method.
9675      *
9676      * @param   ch   the character to be tested.
9677      * @return  {@code true} if the character is titlecase;
9678      *          {@code false} otherwise.
9679      * @see     Character#isLowerCase(char)
9680      * @see     Character#isUpperCase(char)
9681      * @see     Character#toTitleCase(char)
9682      * @see     Character#getType(char)
9683      * @since   1.0.2
9684      */
isTitleCase(char ch)9685     public static boolean isTitleCase(char ch) {
9686         return isTitleCase((int)ch);
9687     }
9688 
9689     /**
9690      * Determines if the specified character (Unicode code point) is a titlecase character.
9691      * <p>
9692      * A character is a titlecase character if its general
9693      * category type, provided by {@link Character#getType(int) getType(codePoint)},
9694      * is {@code TITLECASE_LETTER}.
9695      * <p>
9696      * Some characters look like pairs of Latin letters. For example, there
9697      * is an uppercase letter that looks like "LJ" and has a corresponding
9698      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
9699      * is the appropriate form to use when rendering a word in lowercase
9700      * with initial capitals, as for a book title.
9701      * <p>
9702      * These are some of the Unicode characters for which this method returns
9703      * {@code true}:
9704      * <ul>
9705      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
9706      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
9707      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
9708      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
9709      * </ul>
9710      * <p> Many other Unicode characters are titlecase too.
9711      *
9712      * @param   codePoint the character (Unicode code point) to be tested.
9713      * @return  {@code true} if the character is titlecase;
9714      *          {@code false} otherwise.
9715      * @see     Character#isLowerCase(int)
9716      * @see     Character#isUpperCase(int)
9717      * @see     Character#toTitleCase(int)
9718      * @see     Character#getType(int)
9719      * @since   1.5
9720      */
9721     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
9722     /*
9723     public static boolean isTitleCase(int codePoint) {
9724         return getType(codePoint) == Character.TITLECASE_LETTER;
9725     }
9726     */
isTitleCase(int codePoint)9727     public static boolean isTitleCase(int codePoint) {
9728         return isTitleCaseImpl(codePoint);
9729     }
9730 
9731     @FastNative
isTitleCaseImpl(int codePoint)9732     static native boolean isTitleCaseImpl(int codePoint);
9733     // END Android-changed: Reimplement methods natively on top of ICU4C.
9734 
9735     /**
9736      * Determines if the specified character is a digit.
9737      * <p>
9738      * A character is a digit if its general category type, provided
9739      * by {@code Character.getType(ch)}, is
9740      * {@code DECIMAL_DIGIT_NUMBER}.
9741      * <p>
9742      * Some Unicode character ranges that contain digits:
9743      * <ul>
9744      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
9745      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
9746      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
9747      *     Arabic-Indic digits
9748      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
9749      *     Extended Arabic-Indic digits
9750      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
9751      *     Devanagari digits
9752      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
9753      *     Fullwidth digits
9754      * </ul>
9755      *
9756      * Many other character ranges contain digits as well.
9757      *
9758      * <p><b>Note:</b> This method cannot handle <a
9759      * href="#supplementary"> supplementary characters</a>. To support
9760      * all Unicode characters, including supplementary characters, use
9761      * the {@link #isDigit(int)} method.
9762      *
9763      * @param   ch   the character to be tested.
9764      * @return  {@code true} if the character is a digit;
9765      *          {@code false} otherwise.
9766      * @see     Character#digit(char, int)
9767      * @see     Character#forDigit(int, int)
9768      * @see     Character#getType(char)
9769      */
isDigit(char ch)9770     public static boolean isDigit(char ch) {
9771         return isDigit((int)ch);
9772     }
9773 
9774     /**
9775      * Determines if the specified character (Unicode code point) is a digit.
9776      * <p>
9777      * A character is a digit if its general category type, provided
9778      * by {@link Character#getType(int) getType(codePoint)}, is
9779      * {@code DECIMAL_DIGIT_NUMBER}.
9780      * <p>
9781      * Some Unicode character ranges that contain digits:
9782      * <ul>
9783      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
9784      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
9785      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
9786      *     Arabic-Indic digits
9787      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
9788      *     Extended Arabic-Indic digits
9789      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
9790      *     Devanagari digits
9791      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
9792      *     Fullwidth digits
9793      * </ul>
9794      *
9795      * Many other character ranges contain digits as well.
9796      *
9797      * @param   codePoint the character (Unicode code point) to be tested.
9798      * @return  {@code true} if the character is a digit;
9799      *          {@code false} otherwise.
9800      * @see     Character#forDigit(int, int)
9801      * @see     Character#getType(int)
9802      * @since   1.5
9803      */
9804     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
9805     /*
9806     public static boolean isDigit(int codePoint) {
9807         return CharacterData.of(codePoint).isDigit(codePoint);
9808     }
9809     */
isDigit(int codePoint)9810     public static boolean isDigit(int codePoint) {
9811         return isDigitImpl(codePoint);
9812     }
9813 
9814     @FastNative
isDigitImpl(int codePoint)9815     static native boolean isDigitImpl(int codePoint);
9816     // END Android-changed: Reimplement methods natively on top of ICU4C.
9817 
9818     /**
9819      * Determines if a character is defined in Unicode.
9820      * <p>
9821      * A character is defined if at least one of the following is true:
9822      * <ul>
9823      * <li>It has an entry in the UnicodeData file.
9824      * <li>It has a value in a range defined by the UnicodeData file.
9825      * </ul>
9826      *
9827      * <p><b>Note:</b> This method cannot handle <a
9828      * href="#supplementary"> supplementary characters</a>. To support
9829      * all Unicode characters, including supplementary characters, use
9830      * the {@link #isDefined(int)} method.
9831      *
9832      * @param   ch   the character to be tested
9833      * @return  {@code true} if the character has a defined meaning
9834      *          in Unicode; {@code false} otherwise.
9835      * @see     Character#isDigit(char)
9836      * @see     Character#isLetter(char)
9837      * @see     Character#isLetterOrDigit(char)
9838      * @see     Character#isLowerCase(char)
9839      * @see     Character#isTitleCase(char)
9840      * @see     Character#isUpperCase(char)
9841      * @since   1.0.2
9842      */
isDefined(char ch)9843     public static boolean isDefined(char ch) {
9844         return isDefined((int)ch);
9845     }
9846 
9847     /**
9848      * Determines if a character (Unicode code point) is defined in Unicode.
9849      * <p>
9850      * A character is defined if at least one of the following is true:
9851      * <ul>
9852      * <li>It has an entry in the UnicodeData file.
9853      * <li>It has a value in a range defined by the UnicodeData file.
9854      * </ul>
9855      *
9856      * @param   codePoint the character (Unicode code point) to be tested.
9857      * @return  {@code true} if the character has a defined meaning
9858      *          in Unicode; {@code false} otherwise.
9859      * @see     Character#isDigit(int)
9860      * @see     Character#isLetter(int)
9861      * @see     Character#isLetterOrDigit(int)
9862      * @see     Character#isLowerCase(int)
9863      * @see     Character#isTitleCase(int)
9864      * @see     Character#isUpperCase(int)
9865      * @since   1.5
9866      */
9867     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
9868     /*
9869     public static boolean isDefined(int codePoint) {
9870         return getType(codePoint) != Character.UNASSIGNED;
9871     }
9872     */
isDefined(int codePoint)9873     public static boolean isDefined(int codePoint) {
9874         return isDefinedImpl(codePoint);
9875     }
9876 
9877     @FastNative
isDefinedImpl(int codePoint)9878     static native boolean isDefinedImpl(int codePoint);
9879     // END Android-changed: Reimplement methods natively on top of ICU4C.
9880 
9881     /**
9882      * Determines if the specified character is a letter.
9883      * <p>
9884      * A character is considered to be a letter if its general
9885      * category type, provided by {@code Character.getType(ch)},
9886      * is any of the following:
9887      * <ul>
9888      * <li> {@code UPPERCASE_LETTER}
9889      * <li> {@code LOWERCASE_LETTER}
9890      * <li> {@code TITLECASE_LETTER}
9891      * <li> {@code MODIFIER_LETTER}
9892      * <li> {@code OTHER_LETTER}
9893      * </ul>
9894      *
9895      * Not all letters have case. Many characters are
9896      * letters but are neither uppercase nor lowercase nor titlecase.
9897      *
9898      * <p><b>Note:</b> This method cannot handle <a
9899      * href="#supplementary"> supplementary characters</a>. To support
9900      * all Unicode characters, including supplementary characters, use
9901      * the {@link #isLetter(int)} method.
9902      *
9903      * @param   ch   the character to be tested.
9904      * @return  {@code true} if the character is a letter;
9905      *          {@code false} otherwise.
9906      * @see     Character#isDigit(char)
9907      * @see     Character#isJavaIdentifierStart(char)
9908      * @see     Character#isJavaLetter(char)
9909      * @see     Character#isJavaLetterOrDigit(char)
9910      * @see     Character#isLetterOrDigit(char)
9911      * @see     Character#isLowerCase(char)
9912      * @see     Character#isTitleCase(char)
9913      * @see     Character#isUnicodeIdentifierStart(char)
9914      * @see     Character#isUpperCase(char)
9915      */
isLetter(char ch)9916     public static boolean isLetter(char ch) {
9917         return isLetter((int)ch);
9918     }
9919 
9920     /**
9921      * Determines if the specified character (Unicode code point) is a letter.
9922      * <p>
9923      * A character is considered to be a letter if its general
9924      * category type, provided by {@link Character#getType(int) getType(codePoint)},
9925      * is any of the following:
9926      * <ul>
9927      * <li> {@code UPPERCASE_LETTER}
9928      * <li> {@code LOWERCASE_LETTER}
9929      * <li> {@code TITLECASE_LETTER}
9930      * <li> {@code MODIFIER_LETTER}
9931      * <li> {@code OTHER_LETTER}
9932      * </ul>
9933      *
9934      * Not all letters have case. Many characters are
9935      * letters but are neither uppercase nor lowercase nor titlecase.
9936      *
9937      * @param   codePoint the character (Unicode code point) to be tested.
9938      * @return  {@code true} if the character is a letter;
9939      *          {@code false} otherwise.
9940      * @see     Character#isDigit(int)
9941      * @see     Character#isJavaIdentifierStart(int)
9942      * @see     Character#isLetterOrDigit(int)
9943      * @see     Character#isLowerCase(int)
9944      * @see     Character#isTitleCase(int)
9945      * @see     Character#isUnicodeIdentifierStart(int)
9946      * @see     Character#isUpperCase(int)
9947      * @since   1.5
9948      */
9949     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
9950     /*
9951     public static boolean isLetter(int codePoint) {
9952         return ((((1 << Character.UPPERCASE_LETTER) |
9953             (1 << Character.LOWERCASE_LETTER) |
9954             (1 << Character.TITLECASE_LETTER) |
9955             (1 << Character.MODIFIER_LETTER) |
9956             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
9957             != 0;
9958     }
9959     */
isLetter(int codePoint)9960     public static boolean isLetter(int codePoint) {
9961         return isLetterImpl(codePoint);
9962     }
9963 
9964     @FastNative
isLetterImpl(int codePoint)9965     static native boolean isLetterImpl(int codePoint);
9966     // END Android-changed: Reimplement methods natively on top of ICU4C.
9967 
9968     /**
9969      * Determines if the specified character is a letter or digit.
9970      * <p>
9971      * A character is considered to be a letter or digit if either
9972      * {@code Character.isLetter(char ch)} or
9973      * {@code Character.isDigit(char ch)} returns
9974      * {@code true} for the character.
9975      *
9976      * <p><b>Note:</b> This method cannot handle <a
9977      * href="#supplementary"> supplementary characters</a>. To support
9978      * all Unicode characters, including supplementary characters, use
9979      * the {@link #isLetterOrDigit(int)} method.
9980      *
9981      * @param   ch   the character to be tested.
9982      * @return  {@code true} if the character is a letter or digit;
9983      *          {@code false} otherwise.
9984      * @see     Character#isDigit(char)
9985      * @see     Character#isJavaIdentifierPart(char)
9986      * @see     Character#isJavaLetter(char)
9987      * @see     Character#isJavaLetterOrDigit(char)
9988      * @see     Character#isLetter(char)
9989      * @see     Character#isUnicodeIdentifierPart(char)
9990      * @since   1.0.2
9991      */
isLetterOrDigit(char ch)9992     public static boolean isLetterOrDigit(char ch) {
9993         return isLetterOrDigit((int)ch);
9994     }
9995 
9996     /**
9997      * Determines if the specified character (Unicode code point) is a letter or digit.
9998      * <p>
9999      * A character is considered to be a letter or digit if either
10000      * {@link #isLetter(int) isLetter(codePoint)} or
10001      * {@link #isDigit(int) isDigit(codePoint)} returns
10002      * {@code true} for the character.
10003      *
10004      * @param   codePoint the character (Unicode code point) to be tested.
10005      * @return  {@code true} if the character is a letter or digit;
10006      *          {@code false} otherwise.
10007      * @see     Character#isDigit(int)
10008      * @see     Character#isJavaIdentifierPart(int)
10009      * @see     Character#isLetter(int)
10010      * @see     Character#isUnicodeIdentifierPart(int)
10011      * @since   1.5
10012      */
10013     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10014     /*
10015     public static boolean isLetterOrDigit(int codePoint) {
10016         return ((((1 << Character.UPPERCASE_LETTER) |
10017             (1 << Character.LOWERCASE_LETTER) |
10018             (1 << Character.TITLECASE_LETTER) |
10019             (1 << Character.MODIFIER_LETTER) |
10020             (1 << Character.OTHER_LETTER) |
10021             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
10022             != 0;
10023     }
10024     */
isLetterOrDigit(int codePoint)10025     public static boolean isLetterOrDigit(int codePoint) {
10026         return isLetterOrDigitImpl(codePoint);
10027     }
10028 
10029     @FastNative
isLetterOrDigitImpl(int codePoint)10030     static native boolean isLetterOrDigitImpl(int codePoint);
10031     // END Android-changed: Reimplement methods natively on top of ICU4C.
10032 
10033     /**
10034      * Determines if the specified character is permissible as the first
10035      * character in a Java identifier.
10036      * <p>
10037      * A character may start a Java identifier if and only if
10038      * one of the following conditions is true:
10039      * <ul>
10040      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
10041      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
10042      * <li> {@code ch} is a currency symbol (such as {@code '$'})
10043      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
10044      * </ul>
10045      *
10046      * @param   ch the character to be tested.
10047      * @return  {@code true} if the character may start a Java
10048      *          identifier; {@code false} otherwise.
10049      * @see     Character#isJavaLetterOrDigit(char)
10050      * @see     Character#isJavaIdentifierStart(char)
10051      * @see     Character#isJavaIdentifierPart(char)
10052      * @see     Character#isLetter(char)
10053      * @see     Character#isLetterOrDigit(char)
10054      * @see     Character#isUnicodeIdentifierStart(char)
10055      * @since   1.0.2
10056      * @deprecated Replaced by isJavaIdentifierStart(char).
10057      */
10058     @Deprecated(since="1.1")
isJavaLetter(char ch)10059     public static boolean isJavaLetter(char ch) {
10060         return isJavaIdentifierStart(ch);
10061     }
10062 
10063     /**
10064      * Determines if the specified character may be part of a Java
10065      * identifier as other than the first character.
10066      * <p>
10067      * A character may be part of a Java identifier if and only if one
10068      * of the following conditions is true:
10069      * <ul>
10070      * <li>  it is a letter
10071      * <li>  it is a currency symbol (such as {@code '$'})
10072      * <li>  it is a connecting punctuation character (such as {@code '_'})
10073      * <li>  it is a digit
10074      * <li>  it is a numeric letter (such as a Roman numeral character)
10075      * <li>  it is a combining mark
10076      * <li>  it is a non-spacing mark
10077      * <li> {@code isIdentifierIgnorable} returns
10078      * {@code true} for the character.
10079      * </ul>
10080      *
10081      * @param   ch the character to be tested.
10082      * @return  {@code true} if the character may be part of a
10083      *          Java identifier; {@code false} otherwise.
10084      * @see     Character#isJavaLetter(char)
10085      * @see     Character#isJavaIdentifierStart(char)
10086      * @see     Character#isJavaIdentifierPart(char)
10087      * @see     Character#isLetter(char)
10088      * @see     Character#isLetterOrDigit(char)
10089      * @see     Character#isUnicodeIdentifierPart(char)
10090      * @see     Character#isIdentifierIgnorable(char)
10091      * @since   1.0.2
10092      * @deprecated Replaced by isJavaIdentifierPart(char).
10093      */
10094     @Deprecated(since="1.1")
isJavaLetterOrDigit(char ch)10095     public static boolean isJavaLetterOrDigit(char ch) {
10096         return isJavaIdentifierPart(ch);
10097     }
10098 
10099     /**
10100      * Determines if the specified character (Unicode code point) is alphabetic.
10101      * <p>
10102      * A character is considered to be alphabetic if its general category type,
10103      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
10104      * the following:
10105      * <ul>
10106      * <li> {@code UPPERCASE_LETTER}
10107      * <li> {@code LOWERCASE_LETTER}
10108      * <li> {@code TITLECASE_LETTER}
10109      * <li> {@code MODIFIER_LETTER}
10110      * <li> {@code OTHER_LETTER}
10111      * <li> {@code LETTER_NUMBER}
10112      * </ul>
10113      * or it has contributory property Other_Alphabetic as defined by the
10114      * Unicode Standard.
10115      *
10116      * @param   codePoint the character (Unicode code point) to be tested.
10117      * @return  {@code true} if the character is a Unicode alphabet
10118      *          character, {@code false} otherwise.
10119      * @since   1.7
10120      */
10121     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10122     /*
10123     public static boolean isAlphabetic(int codePoint) {
10124         return (((((1 << Character.UPPERCASE_LETTER) |
10125             (1 << Character.LOWERCASE_LETTER) |
10126             (1 << Character.TITLECASE_LETTER) |
10127             (1 << Character.MODIFIER_LETTER) |
10128             (1 << Character.OTHER_LETTER) |
10129             (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
10130             CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
10131     }
10132     */
isAlphabetic(int codePoint)10133     public static boolean isAlphabetic(int codePoint) {
10134         return isAlphabeticImpl(codePoint);
10135     }
10136 
10137     @FastNative
isAlphabeticImpl(int codePoint)10138     static native boolean isAlphabeticImpl(int codePoint);
10139     // END Android-changed: Reimplement methods natively on top of ICU4C.
10140 
10141     /**
10142      * Determines if the specified character (Unicode code point) is a CJKV
10143      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
10144      * the Unicode Standard.
10145      *
10146      * @param   codePoint the character (Unicode code point) to be tested.
10147      * @return  {@code true} if the character is a Unicode ideograph
10148      *          character, {@code false} otherwise.
10149      * @since   1.7
10150      */
10151     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10152     /*
10153     public static boolean isIdeographic(int codePoint) {
10154         return CharacterData.of(codePoint).isIdeographic(codePoint);
10155     }
10156     */
isIdeographic(int codePoint)10157     public static boolean isIdeographic(int codePoint) {
10158         return isIdeographicImpl(codePoint);
10159     }
10160     @FastNative
isIdeographicImpl(int codePoint)10161     static native boolean isIdeographicImpl(int codePoint);
10162     // END Android-changed: Reimplement methods natively on top of ICU4C.
10163 
10164     // Android-changed: Removed @see tag (target does not exist on Android):
10165     // @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10166     /**
10167      * Determines if the specified character is
10168      * permissible as the first character in a Java identifier.
10169      * <p>
10170      * A character may start a Java identifier if and only if
10171      * one of the following conditions is true:
10172      * <ul>
10173      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
10174      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
10175      * <li> {@code ch} is a currency symbol (such as {@code '$'})
10176      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
10177      * </ul>
10178      *
10179      * <p><b>Note:</b> This method cannot handle <a
10180      * href="#supplementary"> supplementary characters</a>. To support
10181      * all Unicode characters, including supplementary characters, use
10182      * the {@link #isJavaIdentifierStart(int)} method.
10183      *
10184      * @param   ch the character to be tested.
10185      * @return  {@code true} if the character may start a Java identifier;
10186      *          {@code false} otherwise.
10187      * @see     Character#isJavaIdentifierPart(char)
10188      * @see     Character#isLetter(char)
10189      * @see     Character#isUnicodeIdentifierStart(char)
10190      * @since   1.1
10191      */
isJavaIdentifierStart(char ch)10192     public static boolean isJavaIdentifierStart(char ch) {
10193         return isJavaIdentifierStart((int)ch);
10194     }
10195 
10196     // Android-changed: Removed @see tag (target does not exist on Android):
10197     // @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10198     /**
10199      * Determines if the character (Unicode code point) is
10200      * permissible as the first character in a Java identifier.
10201      * <p>
10202      * A character may start a Java identifier if and only if
10203      * one of the following conditions is true:
10204      * <ul>
10205      * <li> {@link #isLetter(int) isLetter(codePoint)}
10206      *      returns {@code true}
10207      * <li> {@link #getType(int) getType(codePoint)}
10208      *      returns {@code LETTER_NUMBER}
10209      * <li> the referenced character is a currency symbol (such as {@code '$'})
10210      * <li> the referenced character is a connecting punctuation character
10211      *      (such as {@code '_'}).
10212      * </ul>
10213      *
10214      * @param   codePoint the character (Unicode code point) to be tested.
10215      * @return  {@code true} if the character may start a Java identifier;
10216      *          {@code false} otherwise.
10217      * @see     Character#isJavaIdentifierPart(int)
10218      * @see     Character#isLetter(int)
10219      * @see     Character#isUnicodeIdentifierStart(int)
10220      * @since   1.5
10221      */
10222     // BEGIN Android-changed: Use ICU.
10223     /*
10224     public static boolean isJavaIdentifierStart(int codePoint) {
10225         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
10226     }
10227     */
isJavaIdentifierStart(int codePoint)10228     public static boolean isJavaIdentifierStart(int codePoint) {
10229         // Use precomputed bitmasks to optimize the ASCII range.
10230         if (codePoint < 64) {
10231             return (codePoint == '$'); // There's only one character in this range.
10232         } else if (codePoint < 128) {
10233             return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
10234         }
10235         return ((1 << getType(codePoint))
10236                 & ((1 << UPPERCASE_LETTER)
10237                    | (1 << LOWERCASE_LETTER)
10238                    | (1  << TITLECASE_LETTER)
10239                    | (1  << MODIFIER_LETTER)
10240                    | (1  << OTHER_LETTER)
10241                    | (1  << CURRENCY_SYMBOL)
10242                    | (1  << CONNECTOR_PUNCTUATION)
10243                    | (1  << LETTER_NUMBER))) != 0;
10244     }
10245     // END Android-changed: Use ICU.
10246 
10247     // Android-changed: Removed @see tag (target does not exist on Android):
10248     // @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10249     /**
10250      * Determines if the specified character may be part of a Java
10251      * identifier as other than the first character.
10252      * <p>
10253      * A character may be part of a Java identifier if any of the following
10254      * conditions are true:
10255      * <ul>
10256      * <li>  it is a letter
10257      * <li>  it is a currency symbol (such as {@code '$'})
10258      * <li>  it is a connecting punctuation character (such as {@code '_'})
10259      * <li>  it is a digit
10260      * <li>  it is a numeric letter (such as a Roman numeral character)
10261      * <li>  it is a combining mark
10262      * <li>  it is a non-spacing mark
10263      * <li> {@code isIdentifierIgnorable} returns
10264      * {@code true} for the character
10265      * </ul>
10266      *
10267      * <p><b>Note:</b> This method cannot handle <a
10268      * href="#supplementary"> supplementary characters</a>. To support
10269      * all Unicode characters, including supplementary characters, use
10270      * the {@link #isJavaIdentifierPart(int)} method.
10271      *
10272      * @param   ch      the character to be tested.
10273      * @return {@code true} if the character may be part of a
10274      *          Java identifier; {@code false} otherwise.
10275      * @see     Character#isIdentifierIgnorable(char)
10276      * @see     Character#isJavaIdentifierStart(char)
10277      * @see     Character#isLetterOrDigit(char)
10278      * @see     Character#isUnicodeIdentifierPart(char)
10279      * @since   1.1
10280      */
isJavaIdentifierPart(char ch)10281     public static boolean isJavaIdentifierPart(char ch) {
10282         return isJavaIdentifierPart((int)ch);
10283     }
10284 
10285     // Android-changed: Removed @see tag (target does not exist on Android):
10286     // @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
10287     /**
10288      * Determines if the character (Unicode code point) may be part of a Java
10289      * identifier as other than the first character.
10290      * <p>
10291      * A character may be part of a Java identifier if any of the following
10292      * conditions are true:
10293      * <ul>
10294      * <li>  it is a letter
10295      * <li>  it is a currency symbol (such as {@code '$'})
10296      * <li>  it is a connecting punctuation character (such as {@code '_'})
10297      * <li>  it is a digit
10298      * <li>  it is a numeric letter (such as a Roman numeral character)
10299      * <li>  it is a combining mark
10300      * <li>  it is a non-spacing mark
10301      * <li> {@link #isIdentifierIgnorable(int)
10302      * isIdentifierIgnorable(codePoint)} returns {@code true} for
10303      * the code point
10304      * </ul>
10305      *
10306      * @param   codePoint the character (Unicode code point) to be tested.
10307      * @return {@code true} if the character may be part of a
10308      *          Java identifier; {@code false} otherwise.
10309      * @see     Character#isIdentifierIgnorable(int)
10310      * @see     Character#isJavaIdentifierStart(int)
10311      * @see     Character#isLetterOrDigit(int)
10312      * @see     Character#isUnicodeIdentifierPart(int)
10313      * @since   1.5
10314      */
10315     // BEGIN Android-changed: Use ICU.
10316     /*
10317     public static boolean isJavaIdentifierPart(int codePoint) {
10318         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
10319     }
10320     */
isJavaIdentifierPart(int codePoint)10321     public static boolean isJavaIdentifierPart(int codePoint) {
10322         // Use precomputed bitmasks to optimize the ASCII range.
10323         if (codePoint < 64) {
10324             return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
10325         } else if (codePoint < 128) {
10326             return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
10327         }
10328         return ((1 << getType(codePoint))
10329                 & ((1 << UPPERCASE_LETTER)
10330                    | (1 << LOWERCASE_LETTER)
10331                    | (1 << TITLECASE_LETTER)
10332                    | (1 << MODIFIER_LETTER)
10333                    | (1 << OTHER_LETTER)
10334                    | (1 << CURRENCY_SYMBOL)
10335                    | (1 << CONNECTOR_PUNCTUATION)
10336                    | (1 << DECIMAL_DIGIT_NUMBER)
10337                    | (1 << LETTER_NUMBER)
10338                    | (1 << FORMAT)
10339                    | (1 << COMBINING_SPACING_MARK)
10340                    | (1 << NON_SPACING_MARK))) != 0
10341                 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
10342                 || (codePoint >= 0x7f && codePoint <= 0x9f);
10343     }
10344     // END Android-changed: Use ICU.
10345 
10346     /**
10347      * Determines if the specified character is permissible as the
10348      * first character in a Unicode identifier.
10349      * <p>
10350      * A character may start a Unicode identifier if and only if
10351      * one of the following conditions is true:
10352      * <ul>
10353      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
10354      * <li> {@link #getType(char) getType(ch)} returns
10355      *      {@code LETTER_NUMBER}.
10356      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
10357      *      {@code Other_ID_Start}</a> character.
10358      * </ul>
10359      * <p>
10360      * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
10361      * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
10362      * with the following profile of UAX31:
10363      * <pre>
10364      * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
10365      * </pre>
10366      * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
10367      * compatibility.
10368      *
10369      * <p><b>Note:</b> This method cannot handle <a
10370      * href="#supplementary"> supplementary characters</a>. To support
10371      * all Unicode characters, including supplementary characters, use
10372      * the {@link #isUnicodeIdentifierStart(int)} method.
10373      *
10374      * @param   ch      the character to be tested.
10375      * @return  {@code true} if the character may start a Unicode
10376      *          identifier; {@code false} otherwise.
10377      * @see     Character#isJavaIdentifierStart(char)
10378      * @see     Character#isLetter(char)
10379      * @see     Character#isUnicodeIdentifierPart(char)
10380      * @since   1.1
10381      */
isUnicodeIdentifierStart(char ch)10382     public static boolean isUnicodeIdentifierStart(char ch) {
10383         return isUnicodeIdentifierStart((int)ch);
10384     }
10385 
10386     /**
10387      * Determines if the specified character (Unicode code point) is permissible as the
10388      * first character in a Unicode identifier.
10389      * <p>
10390      * A character may start a Unicode identifier if and only if
10391      * one of the following conditions is true:
10392      * <ul>
10393      * <li> {@link #isLetter(int) isLetter(codePoint)}
10394      *      returns {@code true}
10395      * <li> {@link #getType(int) getType(codePoint)}
10396      *      returns {@code LETTER_NUMBER}.
10397      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
10398      *      {@code Other_ID_Start}</a> character.
10399      * </ul>
10400      * <p>
10401      * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
10402      * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
10403      * with the following profile of UAX31:
10404      * <pre>
10405      * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
10406      * </pre>
10407      * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
10408      * compatibility.
10409      *
10410      * @param   codePoint the character (Unicode code point) to be tested.
10411      * @return  {@code true} if the character may start a Unicode
10412      *          identifier; {@code false} otherwise.
10413      * @see     Character#isJavaIdentifierStart(int)
10414      * @see     Character#isLetter(int)
10415      * @see     Character#isUnicodeIdentifierPart(int)
10416      * @since   1.5
10417      */
10418     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10419     /*
10420     public static boolean isUnicodeIdentifierStart(int codePoint) {
10421         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
10422     }
10423     */
isUnicodeIdentifierStart(int codePoint)10424     public static boolean isUnicodeIdentifierStart(int codePoint) {
10425         return isUnicodeIdentifierStartImpl(codePoint);
10426     }
10427 
10428     @FastNative
isUnicodeIdentifierStartImpl(int codePoint)10429     static native boolean isUnicodeIdentifierStartImpl(int codePoint);
10430     // END Android-changed: Reimplement methods natively on top of ICU4C.
10431 
10432     /**
10433      * Determines if the specified character may be part of a Unicode
10434      * identifier as other than the first character.
10435      * <p>
10436      * A character may be part of a Unicode identifier if and only if
10437      * one of the following statements is true:
10438      * <ul>
10439      * <li>  it is a letter
10440      * <li>  it is a connecting punctuation character (such as {@code '_'})
10441      * <li>  it is a digit
10442      * <li>  it is a numeric letter (such as a Roman numeral character)
10443      * <li>  it is a combining mark
10444      * <li>  it is a non-spacing mark
10445      * <li> {@code isIdentifierIgnorable} returns
10446      * {@code true} for this character.
10447      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
10448      *      {@code Other_ID_Start}</a> character.
10449      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
10450      *      {@code Other_ID_Continue}</a> character.
10451      * </ul>
10452      * <p>
10453      * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
10454      * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
10455      * with the following profile of UAX31:
10456      * <pre>
10457      * Continue := Start + ID_Continue + ignorable
10458      * Medial := empty
10459      * ignorable := isIdentifierIgnorable(char) returns true for the character
10460      * </pre>
10461      * {@code ignorable} is added to {@code Continue} for backward
10462      * compatibility.
10463      *
10464      * <p><b>Note:</b> This method cannot handle <a
10465      * href="#supplementary"> supplementary characters</a>. To support
10466      * all Unicode characters, including supplementary characters, use
10467      * the {@link #isUnicodeIdentifierPart(int)} method.
10468      *
10469      * @param   ch      the character to be tested.
10470      * @return  {@code true} if the character may be part of a
10471      *          Unicode identifier; {@code false} otherwise.
10472      * @see     Character#isIdentifierIgnorable(char)
10473      * @see     Character#isJavaIdentifierPart(char)
10474      * @see     Character#isLetterOrDigit(char)
10475      * @see     Character#isUnicodeIdentifierStart(char)
10476      * @since   1.1
10477      */
isUnicodeIdentifierPart(char ch)10478     public static boolean isUnicodeIdentifierPart(char ch) {
10479         return isUnicodeIdentifierPart((int)ch);
10480     }
10481 
10482     /**
10483      * Determines if the specified character (Unicode code point) may be part of a Unicode
10484      * identifier as other than the first character.
10485      * <p>
10486      * A character may be part of a Unicode identifier if and only if
10487      * one of the following statements is true:
10488      * <ul>
10489      * <li>  it is a letter
10490      * <li>  it is a connecting punctuation character (such as {@code '_'})
10491      * <li>  it is a digit
10492      * <li>  it is a numeric letter (such as a Roman numeral character)
10493      * <li>  it is a combining mark
10494      * <li>  it is a non-spacing mark
10495      * <li> {@code isIdentifierIgnorable} returns
10496      * {@code true} for this character.
10497      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
10498      *      {@code Other_ID_Start}</a> character.
10499      * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
10500      *      {@code Other_ID_Continue}</a> character.
10501      * </ul>
10502      * <p>
10503      * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
10504      * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
10505      * with the following profile of UAX31:
10506      * <pre>
10507      * Continue := Start + ID_Continue + ignorable
10508      * Medial := empty
10509      * ignorable := isIdentifierIgnorable(int) returns true for the character
10510      * </pre>
10511      * {@code ignorable} is added to {@code Continue} for backward
10512      * compatibility.
10513      *
10514      * @param   codePoint the character (Unicode code point) to be tested.
10515      * @return  {@code true} if the character may be part of a
10516      *          Unicode identifier; {@code false} otherwise.
10517      * @see     Character#isIdentifierIgnorable(int)
10518      * @see     Character#isJavaIdentifierPart(int)
10519      * @see     Character#isLetterOrDigit(int)
10520      * @see     Character#isUnicodeIdentifierStart(int)
10521      * @since   1.5
10522      */
10523     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10524     /*
10525     public static boolean isUnicodeIdentifierPart(int codePoint) {
10526         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
10527     }
10528     */
isUnicodeIdentifierPart(int codePoint)10529     public static boolean isUnicodeIdentifierPart(int codePoint) {
10530         return isUnicodeIdentifierPartImpl(codePoint);
10531     }
10532 
10533     @FastNative
isUnicodeIdentifierPartImpl(int codePoint)10534     static native boolean isUnicodeIdentifierPartImpl(int codePoint);
10535     // END Android-changed: Reimplement methods natively on top of ICU4C.
10536 
10537     /**
10538      * Determines if the specified character should be regarded as
10539      * an ignorable character in a Java identifier or a Unicode identifier.
10540      * <p>
10541      * The following Unicode characters are ignorable in a Java identifier
10542      * or a Unicode identifier:
10543      * <ul>
10544      * <li>ISO control characters that are not whitespace
10545      * <ul>
10546      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
10547      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
10548      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
10549      * </ul>
10550      *
10551      * <li>all characters that have the {@code FORMAT} general
10552      * category value
10553      * </ul>
10554      *
10555      * <p><b>Note:</b> This method cannot handle <a
10556      * href="#supplementary"> supplementary characters</a>. To support
10557      * all Unicode characters, including supplementary characters, use
10558      * the {@link #isIdentifierIgnorable(int)} method.
10559      *
10560      * @param   ch      the character to be tested.
10561      * @return  {@code true} if the character is an ignorable control
10562      *          character that may be part of a Java or Unicode identifier;
10563      *           {@code false} otherwise.
10564      * @see     Character#isJavaIdentifierPart(char)
10565      * @see     Character#isUnicodeIdentifierPart(char)
10566      * @since   1.1
10567      */
isIdentifierIgnorable(char ch)10568     public static boolean isIdentifierIgnorable(char ch) {
10569         return isIdentifierIgnorable((int)ch);
10570     }
10571 
10572     /**
10573      * Determines if the specified character (Unicode code point) should be regarded as
10574      * an ignorable character in a Java identifier or a Unicode identifier.
10575      * <p>
10576      * The following Unicode characters are ignorable in a Java identifier
10577      * or a Unicode identifier:
10578      * <ul>
10579      * <li>ISO control characters that are not whitespace
10580      * <ul>
10581      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
10582      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
10583      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
10584      * </ul>
10585      *
10586      * <li>all characters that have the {@code FORMAT} general
10587      * category value
10588      * </ul>
10589      *
10590      * @param   codePoint the character (Unicode code point) to be tested.
10591      * @return  {@code true} if the character is an ignorable control
10592      *          character that may be part of a Java or Unicode identifier;
10593      *          {@code false} otherwise.
10594      * @see     Character#isJavaIdentifierPart(int)
10595      * @see     Character#isUnicodeIdentifierPart(int)
10596      * @since   1.5
10597      */
10598     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10599     /*
10600     public static boolean isIdentifierIgnorable(int codePoint) {
10601         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
10602     }
10603     */
isIdentifierIgnorable(int codePoint)10604     public static boolean isIdentifierIgnorable(int codePoint) {
10605         return isIdentifierIgnorableImpl(codePoint);
10606     }
10607 
10608     @FastNative
isIdentifierIgnorableImpl(int codePoint)10609     static native boolean isIdentifierIgnorableImpl(int codePoint);
10610     // END Android-changed: Reimplement methods natively on top of ICU4C.
10611 
10612     /**
10613      * Converts the character argument to lowercase using case
10614      * mapping information from the UnicodeData file.
10615      * <p>
10616      * Note that
10617      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
10618      * does not always return {@code true} for some ranges of
10619      * characters, particularly those that are symbols or ideographs.
10620      *
10621      * <p>In general, {@link String#toLowerCase()} should be used to map
10622      * characters to lowercase. {@code String} case mapping methods
10623      * have several benefits over {@code Character} case mapping methods.
10624      * {@code String} case mapping methods can perform locale-sensitive
10625      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
10626      * the {@code Character} case mapping methods cannot.
10627      *
10628      * <p><b>Note:</b> This method cannot handle <a
10629      * href="#supplementary"> supplementary characters</a>. To support
10630      * all Unicode characters, including supplementary characters, use
10631      * the {@link #toLowerCase(int)} method.
10632      *
10633      * @param   ch   the character to be converted.
10634      * @return  the lowercase equivalent of the character, if any;
10635      *          otherwise, the character itself.
10636      * @see     Character#isLowerCase(char)
10637      * @see     String#toLowerCase()
10638      */
toLowerCase(char ch)10639     public static char toLowerCase(char ch) {
10640         return (char)toLowerCase((int)ch);
10641     }
10642 
10643     /**
10644      * Converts the character (Unicode code point) argument to
10645      * lowercase using case mapping information from the UnicodeData
10646      * file.
10647      *
10648      * <p> Note that
10649      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
10650      * does not always return {@code true} for some ranges of
10651      * characters, particularly those that are symbols or ideographs.
10652      *
10653      * <p>In general, {@link String#toLowerCase()} should be used to map
10654      * characters to lowercase. {@code String} case mapping methods
10655      * have several benefits over {@code Character} case mapping methods.
10656      * {@code String} case mapping methods can perform locale-sensitive
10657      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
10658      * the {@code Character} case mapping methods cannot.
10659      *
10660      * @param   codePoint   the character (Unicode code point) to be converted.
10661      * @return  the lowercase equivalent of the character (Unicode code
10662      *          point), if any; otherwise, the character itself.
10663      * @see     Character#isLowerCase(int)
10664      * @see     String#toLowerCase()
10665      *
10666      * @since   1.5
10667      */
10668     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10669     /*
10670     public static int toLowerCase(int codePoint) {
10671         return CharacterData.of(codePoint).toLowerCase(codePoint);
10672     }
10673     */
toLowerCase(int codePoint)10674     public static int toLowerCase(int codePoint) {
10675         if (codePoint >= 'A' && codePoint <= 'Z') {
10676             return codePoint + ('a' - 'A');
10677         }
10678 
10679         // All ASCII codepoints except the ones above remain unchanged.
10680         if (codePoint < 0x80) {
10681             return codePoint;
10682         }
10683 
10684         return toLowerCaseImpl(codePoint);
10685     }
10686 
10687     @FastNative
toLowerCaseImpl(int codePoint)10688     static native int toLowerCaseImpl(int codePoint);
10689     // END Android-changed: Reimplement methods natively on top of ICU4C.
10690 
10691     /**
10692      * Converts the character argument to uppercase using case mapping
10693      * information from the UnicodeData file.
10694      * <p>
10695      * Note that
10696      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
10697      * does not always return {@code true} for some ranges of
10698      * characters, particularly those that are symbols or ideographs.
10699      *
10700      * <p>In general, {@link String#toUpperCase()} should be used to map
10701      * characters to uppercase. {@code String} case mapping methods
10702      * have several benefits over {@code Character} case mapping methods.
10703      * {@code String} case mapping methods can perform locale-sensitive
10704      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
10705      * the {@code Character} case mapping methods cannot.
10706      *
10707      * <p><b>Note:</b> This method cannot handle <a
10708      * href="#supplementary"> supplementary characters</a>. To support
10709      * all Unicode characters, including supplementary characters, use
10710      * the {@link #toUpperCase(int)} method.
10711      *
10712      * @param   ch   the character to be converted.
10713      * @return  the uppercase equivalent of the character, if any;
10714      *          otherwise, the character itself.
10715      * @see     Character#isUpperCase(char)
10716      * @see     String#toUpperCase()
10717      */
toUpperCase(char ch)10718     public static char toUpperCase(char ch) {
10719         return (char)toUpperCase((int)ch);
10720     }
10721 
10722     /**
10723      * Converts the character (Unicode code point) argument to
10724      * uppercase using case mapping information from the UnicodeData
10725      * file.
10726      *
10727      * <p>Note that
10728      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
10729      * does not always return {@code true} for some ranges of
10730      * characters, particularly those that are symbols or ideographs.
10731      *
10732      * <p>In general, {@link String#toUpperCase()} should be used to map
10733      * characters to uppercase. {@code String} case mapping methods
10734      * have several benefits over {@code Character} case mapping methods.
10735      * {@code String} case mapping methods can perform locale-sensitive
10736      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
10737      * the {@code Character} case mapping methods cannot.
10738      *
10739      * @param   codePoint   the character (Unicode code point) to be converted.
10740      * @return  the uppercase equivalent of the character, if any;
10741      *          otherwise, the character itself.
10742      * @see     Character#isUpperCase(int)
10743      * @see     String#toUpperCase()
10744      *
10745      * @since   1.5
10746      */
10747     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10748     /*
10749     public static int toUpperCase(int codePoint) {
10750         return CharacterData.of(codePoint).toUpperCase(codePoint);
10751     }
10752     */
toUpperCase(int codePoint)10753     public static int toUpperCase(int codePoint) {
10754         if (codePoint >= 'a' && codePoint <= 'z') {
10755             return codePoint - ('a' - 'A');
10756         }
10757 
10758         // All ASCII codepoints except the ones above remain unchanged.
10759         if (codePoint < 0x80) {
10760             return codePoint;
10761         }
10762 
10763         return toUpperCaseImpl(codePoint);
10764     }
10765 
10766     @FastNative
toUpperCaseImpl(int codePoint)10767     static native int toUpperCaseImpl(int codePoint);
10768     // END Android-changed: Reimplement methods natively on top of ICU4C.
10769 
10770     /**
10771      * Converts the character argument to titlecase using case mapping
10772      * information from the UnicodeData file. If a character has no
10773      * explicit titlecase mapping and is not itself a titlecase char
10774      * according to UnicodeData, then the uppercase mapping is
10775      * returned as an equivalent titlecase mapping. If the
10776      * {@code char} argument is already a titlecase
10777      * {@code char}, the same {@code char} value will be
10778      * returned.
10779      * <p>
10780      * Note that
10781      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
10782      * does not always return {@code true} for some ranges of
10783      * characters.
10784      *
10785      * <p><b>Note:</b> This method cannot handle <a
10786      * href="#supplementary"> supplementary characters</a>. To support
10787      * all Unicode characters, including supplementary characters, use
10788      * the {@link #toTitleCase(int)} method.
10789      *
10790      * @param   ch   the character to be converted.
10791      * @return  the titlecase equivalent of the character, if any;
10792      *          otherwise, the character itself.
10793      * @see     Character#isTitleCase(char)
10794      * @see     Character#toLowerCase(char)
10795      * @see     Character#toUpperCase(char)
10796      * @since   1.0.2
10797      */
toTitleCase(char ch)10798     public static char toTitleCase(char ch) {
10799         return (char)toTitleCase((int)ch);
10800     }
10801 
10802     /**
10803      * Converts the character (Unicode code point) argument to titlecase using case mapping
10804      * information from the UnicodeData file. If a character has no
10805      * explicit titlecase mapping and is not itself a titlecase char
10806      * according to UnicodeData, then the uppercase mapping is
10807      * returned as an equivalent titlecase mapping. If the
10808      * character argument is already a titlecase
10809      * character, the same character value will be
10810      * returned.
10811      *
10812      * <p>Note that
10813      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
10814      * does not always return {@code true} for some ranges of
10815      * characters.
10816      *
10817      * @param   codePoint   the character (Unicode code point) to be converted.
10818      * @return  the titlecase equivalent of the character, if any;
10819      *          otherwise, the character itself.
10820      * @see     Character#isTitleCase(int)
10821      * @see     Character#toLowerCase(int)
10822      * @see     Character#toUpperCase(int)
10823      * @since   1.5
10824      */
10825     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10826     /*
10827     public static int toTitleCase(int codePoint) {
10828         return CharacterData.of(codePoint).toTitleCase(codePoint);
10829     }
10830     */
toTitleCase(int codePoint)10831     public static int toTitleCase(int codePoint) {
10832         return toTitleCaseImpl(codePoint);
10833     }
10834 
10835     @FastNative
toTitleCaseImpl(int codePoint)10836     static native int toTitleCaseImpl(int codePoint);
10837     // END Android-changed: Reimplement methods natively on top of ICU4C.
10838 
10839     /**
10840      * Returns the numeric value of the character {@code ch} in the
10841      * specified radix.
10842      * <p>
10843      * If the radix is not in the range {@code MIN_RADIX} &le;
10844      * {@code radix} &le; {@code MAX_RADIX} or if the
10845      * value of {@code ch} is not a valid digit in the specified
10846      * radix, {@code -1} is returned. A character is a valid digit
10847      * if at least one of the following is true:
10848      * <ul>
10849      * <li>The method {@code isDigit} is {@code true} of the character
10850      *     and the Unicode decimal digit value of the character (or its
10851      *     single-character decomposition) is less than the specified radix.
10852      *     In this case the decimal digit value is returned.
10853      * <li>The character is one of the uppercase Latin letters
10854      *     {@code 'A'} through {@code 'Z'} and its code is less than
10855      *     {@code radix + 'A' - 10}.
10856      *     In this case, {@code ch - 'A' + 10}
10857      *     is returned.
10858      * <li>The character is one of the lowercase Latin letters
10859      *     {@code 'a'} through {@code 'z'} and its code is less than
10860      *     {@code radix + 'a' - 10}.
10861      *     In this case, {@code ch - 'a' + 10}
10862      *     is returned.
10863      * <li>The character is one of the fullwidth uppercase Latin letters A
10864      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
10865      *     and its code is less than
10866      *     {@code radix + '\u005CuFF21' - 10}.
10867      *     In this case, {@code ch - '\u005CuFF21' + 10}
10868      *     is returned.
10869      * <li>The character is one of the fullwidth lowercase Latin letters a
10870      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
10871      *     and its code is less than
10872      *     {@code radix + '\u005CuFF41' - 10}.
10873      *     In this case, {@code ch - '\u005CuFF41' + 10}
10874      *     is returned.
10875      * </ul>
10876      *
10877      * <p><b>Note:</b> This method cannot handle <a
10878      * href="#supplementary"> supplementary characters</a>. To support
10879      * all Unicode characters, including supplementary characters, use
10880      * the {@link #digit(int, int)} method.
10881      *
10882      * @param   ch      the character to be converted.
10883      * @param   radix   the radix.
10884      * @return  the numeric value represented by the character in the
10885      *          specified radix.
10886      * @see     Character#forDigit(int, int)
10887      * @see     Character#isDigit(char)
10888      */
digit(char ch, int radix)10889     public static int digit(char ch, int radix) {
10890         return digit((int)ch, radix);
10891     }
10892 
10893     /**
10894      * Returns the numeric value of the specified character (Unicode
10895      * code point) in the specified radix.
10896      *
10897      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
10898      * {@code radix} &le; {@code MAX_RADIX} or if the
10899      * character is not a valid digit in the specified
10900      * radix, {@code -1} is returned. A character is a valid digit
10901      * if at least one of the following is true:
10902      * <ul>
10903      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
10904      *     and the Unicode decimal digit value of the character (or its
10905      *     single-character decomposition) is less than the specified radix.
10906      *     In this case the decimal digit value is returned.
10907      * <li>The character is one of the uppercase Latin letters
10908      *     {@code 'A'} through {@code 'Z'} and its code is less than
10909      *     {@code radix + 'A' - 10}.
10910      *     In this case, {@code codePoint - 'A' + 10}
10911      *     is returned.
10912      * <li>The character is one of the lowercase Latin letters
10913      *     {@code 'a'} through {@code 'z'} and its code is less than
10914      *     {@code radix + 'a' - 10}.
10915      *     In this case, {@code codePoint - 'a' + 10}
10916      *     is returned.
10917      * <li>The character is one of the fullwidth uppercase Latin letters A
10918      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
10919      *     and its code is less than
10920      *     {@code radix + '\u005CuFF21' - 10}.
10921      *     In this case,
10922      *     {@code codePoint - '\u005CuFF21' + 10}
10923      *     is returned.
10924      * <li>The character is one of the fullwidth lowercase Latin letters a
10925      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
10926      *     and its code is less than
10927      *     {@code radix + '\u005CuFF41'- 10}.
10928      *     In this case,
10929      *     {@code codePoint - '\u005CuFF41' + 10}
10930      *     is returned.
10931      * </ul>
10932      *
10933      * @param   codePoint the character (Unicode code point) to be converted.
10934      * @param   radix   the radix.
10935      * @return  the numeric value represented by the character in the
10936      *          specified radix.
10937      * @see     Character#forDigit(int, int)
10938      * @see     Character#isDigit(int)
10939      * @since   1.5
10940      */
10941     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
10942     /*
10943     public static int digit(int codePoint, int radix) {
10944         return CharacterData.of(codePoint).digit(codePoint, radix);
10945     }
10946     */
digit(int codePoint, int radix)10947     public static int digit(int codePoint, int radix) {
10948         if (radix < MIN_RADIX || radix > MAX_RADIX) {
10949             return -1;
10950         }
10951         if (codePoint < 128) {
10952             // Optimized for ASCII
10953             int result = -1;
10954             if ('0' <= codePoint && codePoint <= '9') {
10955                 result = codePoint - '0';
10956             } else if ('a' <= codePoint && codePoint <= 'z') {
10957                 result = 10 + (codePoint - 'a');
10958             } else if ('A' <= codePoint && codePoint <= 'Z') {
10959                 result = 10 + (codePoint - 'A');
10960             }
10961             return result < radix ? result : -1;
10962         }
10963         return digitImpl(codePoint, radix);
10964     }
10965 
10966     @FastNative
digitImpl(int codePoint, int radix)10967     native static int digitImpl(int codePoint, int radix);
10968     // END Android-changed: Reimplement methods natively on top of ICU4C.
10969 
10970     /**
10971      * Returns the {@code int} value that the specified Unicode
10972      * character represents. For example, the character
10973      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
10974      * an int with a value of 50.
10975      * <p>
10976      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
10977      * {@code '\u005Cu005A'}), lowercase
10978      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
10979      * full width variant ({@code '\u005CuFF21'} through
10980      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
10981      * {@code '\u005CuFF5A'}) forms have numeric values from 10
10982      * through 35. This is independent of the Unicode specification,
10983      * which does not assign numeric values to these {@code char}
10984      * values.
10985      * <p>
10986      * If the character does not have a numeric value, then -1 is returned.
10987      * If the character has a numeric value that cannot be represented as a
10988      * nonnegative integer (for example, a fractional value), then -2
10989      * is returned.
10990      *
10991      * <p><b>Note:</b> This method cannot handle <a
10992      * href="#supplementary"> supplementary characters</a>. To support
10993      * all Unicode characters, including supplementary characters, use
10994      * the {@link #getNumericValue(int)} method.
10995      *
10996      * @param   ch      the character to be converted.
10997      * @return  the numeric value of the character, as a nonnegative {@code int}
10998      *          value; -2 if the character has a numeric value but the value
10999      *          can not be represented as a nonnegative {@code int} value;
11000      *          -1 if the character has no numeric value.
11001      * @see     Character#forDigit(int, int)
11002      * @see     Character#isDigit(char)
11003      * @since   1.1
11004      */
getNumericValue(char ch)11005     public static int getNumericValue(char ch) {
11006         return getNumericValue((int)ch);
11007     }
11008 
11009     /**
11010      * Returns the {@code int} value that the specified
11011      * character (Unicode code point) represents. For example, the character
11012      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
11013      * an {@code int} with a value of 50.
11014      * <p>
11015      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
11016      * {@code '\u005Cu005A'}), lowercase
11017      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
11018      * full width variant ({@code '\u005CuFF21'} through
11019      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
11020      * {@code '\u005CuFF5A'}) forms have numeric values from 10
11021      * through 35. This is independent of the Unicode specification,
11022      * which does not assign numeric values to these {@code char}
11023      * values.
11024      * <p>
11025      * If the character does not have a numeric value, then -1 is returned.
11026      * If the character has a numeric value that cannot be represented as a
11027      * nonnegative integer (for example, a fractional value), then -2
11028      * is returned.
11029      *
11030      * @param   codePoint the character (Unicode code point) to be converted.
11031      * @return  the numeric value of the character, as a nonnegative {@code int}
11032      *          value; -2 if the character has a numeric value but the value
11033      *          can not be represented as a nonnegative {@code int} value;
11034      *          -1 if the character has no numeric value.
11035      * @see     Character#forDigit(int, int)
11036      * @see     Character#isDigit(int)
11037      * @since   1.5
11038      */
11039     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
11040     /*
11041     public static int getNumericValue(int codePoint) {
11042         return CharacterData.of(codePoint).getNumericValue(codePoint);
11043     }
11044     */
getNumericValue(int codePoint)11045     public static int getNumericValue(int codePoint) {
11046         // This is both an optimization and papers over differences between Java and ICU.
11047         if (codePoint < 128) {
11048             if (codePoint >= '0' && codePoint <= '9') {
11049                 return codePoint - '0';
11050             }
11051             if (codePoint >= 'a' && codePoint <= 'z') {
11052                 return codePoint - ('a' - 10);
11053             }
11054             if (codePoint >= 'A' && codePoint <= 'Z') {
11055                 return codePoint - ('A' - 10);
11056             }
11057             return -1;
11058         }
11059         // Full-width uppercase A-Z.
11060         if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
11061             return codePoint - 0xff17;
11062         }
11063         // Full-width lowercase a-z.
11064         if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
11065             return codePoint - 0xff37;
11066         }
11067         return getNumericValueImpl(codePoint);
11068     }
11069 
11070     @FastNative
getNumericValueImpl(int codePoint)11071     native static int getNumericValueImpl(int codePoint);
11072     // END Android-changed: Reimplement methods natively on top of ICU4C.
11073 
11074     /**
11075      * Determines if the specified character is ISO-LATIN-1 white space.
11076      * This method returns {@code true} for the following five
11077      * characters only:
11078      * <table class="striped">
11079      * <caption style="display:none">truechars</caption>
11080      * <thead>
11081      * <tr><th scope="col">Character
11082      *     <th scope="col">Code
11083      *     <th scope="col">Name
11084      * </thead>
11085      * <tbody>
11086      * <tr><th scope="row">{@code '\t'}</th>            <td>{@code U+0009}</td>
11087      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
11088      * <tr><th scope="row">{@code '\n'}</th>            <td>{@code U+000A}</td>
11089      *     <td>{@code NEW LINE}</td></tr>
11090      * <tr><th scope="row">{@code '\f'}</th>            <td>{@code U+000C}</td>
11091      *     <td>{@code FORM FEED}</td></tr>
11092      * <tr><th scope="row">{@code '\r'}</th>            <td>{@code U+000D}</td>
11093      *     <td>{@code CARRIAGE RETURN}</td></tr>
11094      * <tr><th scope="row">{@code ' '}</th>  <td>{@code U+0020}</td>
11095      *     <td>{@code SPACE}</td></tr>
11096      * </tbody>
11097      * </table>
11098      *
11099      * @param      ch   the character to be tested.
11100      * @return     {@code true} if the character is ISO-LATIN-1 white
11101      *             space; {@code false} otherwise.
11102      * @see        Character#isSpaceChar(char)
11103      * @see        Character#isWhitespace(char)
11104      * @deprecated Replaced by isWhitespace(char).
11105      */
11106     @Deprecated(since="1.1")
isSpace(char ch)11107     public static boolean isSpace(char ch) {
11108         return (ch <= 0x0020) &&
11109             (((((1L << 0x0009) |
11110             (1L << 0x000A) |
11111             (1L << 0x000C) |
11112             (1L << 0x000D) |
11113             (1L << 0x0020)) >> ch) & 1L) != 0);
11114     }
11115 
11116 
11117     /**
11118      * Determines if the specified character is a Unicode space character.
11119      * A character is considered to be a space character if and only if
11120      * it is specified to be a space character by the Unicode Standard. This
11121      * method returns true if the character's general category type is any of
11122      * the following:
11123      * <ul>
11124      * <li> {@code SPACE_SEPARATOR}
11125      * <li> {@code LINE_SEPARATOR}
11126      * <li> {@code PARAGRAPH_SEPARATOR}
11127      * </ul>
11128      *
11129      * <p><b>Note:</b> This method cannot handle <a
11130      * href="#supplementary"> supplementary characters</a>. To support
11131      * all Unicode characters, including supplementary characters, use
11132      * the {@link #isSpaceChar(int)} method.
11133      *
11134      * @param   ch      the character to be tested.
11135      * @return  {@code true} if the character is a space character;
11136      *          {@code false} otherwise.
11137      * @see     Character#isWhitespace(char)
11138      * @since   1.1
11139      */
isSpaceChar(char ch)11140     public static boolean isSpaceChar(char ch) {
11141         return isSpaceChar((int)ch);
11142     }
11143 
11144     /**
11145      * Determines if the specified character (Unicode code point) is a
11146      * Unicode space character.  A character is considered to be a
11147      * space character if and only if it is specified to be a space
11148      * character by the Unicode Standard. This method returns true if
11149      * the character's general category type is any of the following:
11150      *
11151      * <ul>
11152      * <li> {@link #SPACE_SEPARATOR}
11153      * <li> {@link #LINE_SEPARATOR}
11154      * <li> {@link #PARAGRAPH_SEPARATOR}
11155      * </ul>
11156      *
11157      * @param   codePoint the character (Unicode code point) to be tested.
11158      * @return  {@code true} if the character is a space character;
11159      *          {@code false} otherwise.
11160      * @see     Character#isWhitespace(int)
11161      * @since   1.5
11162      */
11163     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
11164     /*
11165     public static boolean isSpaceChar(int codePoint) {
11166         return ((((1 << Character.SPACE_SEPARATOR) |
11167                   (1 << Character.LINE_SEPARATOR) |
11168                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
11169             != 0;
11170     }
11171     */
isSpaceChar(int codePoint)11172     public static boolean isSpaceChar(int codePoint) {
11173         // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
11174         // SPACE or NO-BREAK SPACE?
11175         if (codePoint == 0x20 || codePoint == 0xa0) {
11176             return true;
11177         }
11178         if (codePoint < 0x1000) {
11179             return false;
11180         }
11181         // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
11182         if (codePoint == 0x1680 || codePoint == 0x180e) {
11183             return true;
11184         }
11185         if (codePoint < 0x2000) {
11186             return false;
11187         }
11188         if (codePoint <= 0xffff) {
11189             // Other whitespace from General Punctuation...
11190             return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f ||
11191                 codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
11192         }
11193         // Let icu4c worry about non-BMP code points.
11194         return isSpaceCharImpl(codePoint);
11195     }
11196 
11197     @FastNative
isSpaceCharImpl(int codePoint)11198     static native boolean isSpaceCharImpl(int codePoint);
11199     // END Android-changed: Reimplement methods natively on top of ICU4C.
11200 
11201     /**
11202      * Determines if the specified character is white space according to Java.
11203      * A character is a Java whitespace character if and only if it satisfies
11204      * one of the following criteria:
11205      * <ul>
11206      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
11207      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
11208      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
11209      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
11210      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
11211      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
11212      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
11213      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
11214      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
11215      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
11216      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
11217      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
11218      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
11219      * </ul>
11220      *
11221      * <p><b>Note:</b> This method cannot handle <a
11222      * href="#supplementary"> supplementary characters</a>. To support
11223      * all Unicode characters, including supplementary characters, use
11224      * the {@link #isWhitespace(int)} method.
11225      *
11226      * @param   ch the character to be tested.
11227      * @return  {@code true} if the character is a Java whitespace
11228      *          character; {@code false} otherwise.
11229      * @see     Character#isSpaceChar(char)
11230      * @since   1.1
11231      */
isWhitespace(char ch)11232     public static boolean isWhitespace(char ch) {
11233         return isWhitespace((int)ch);
11234     }
11235 
11236     /**
11237      * Determines if the specified character (Unicode code point) is
11238      * white space according to Java.  A character is a Java
11239      * whitespace character if and only if it satisfies one of the
11240      * following criteria:
11241      * <ul>
11242      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
11243      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
11244      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
11245      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
11246      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
11247      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
11248      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
11249      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
11250      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
11251      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
11252      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
11253      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
11254      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
11255      * </ul>
11256      *
11257      * @param   codePoint the character (Unicode code point) to be tested.
11258      * @return  {@code true} if the character is a Java whitespace
11259      *          character; {@code false} otherwise.
11260      * @see     Character#isSpaceChar(int)
11261      * @since   1.5
11262      */
11263     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
11264     /*
11265     public static boolean isWhitespace(int codePoint) {
11266         return CharacterData.of(codePoint).isWhitespace(codePoint);
11267     }
11268     */
isWhitespace(int codePoint)11269     public static boolean isWhitespace(int codePoint) {
11270         // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
11271         // Any ASCII whitespace character?
11272         if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) {
11273             return true;
11274         }
11275         if (codePoint < 0x1000) {
11276             return false;
11277         }
11278         // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
11279         if (codePoint == 0x1680 || codePoint == 0x180e) {
11280             return true;
11281         }
11282         if (codePoint < 0x2000) {
11283             return false;
11284         }
11285         // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE).
11286         if (codePoint == 0x2007 || codePoint == 0x202f) {
11287             return false;
11288         }
11289         if (codePoint <= 0xffff) {
11290             // Other whitespace from General Punctuation...
11291             return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f ||
11292                 codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
11293         }
11294         // Let icu4c worry about non-BMP code points.
11295         return isWhitespaceImpl(codePoint);
11296     }
11297 
11298     @FastNative
isWhitespaceImpl(int codePoint)11299     native static boolean isWhitespaceImpl(int codePoint);
11300     // END Android-changed: Reimplement methods natively on top of ICU4C.
11301 
11302     /**
11303      * Determines if the specified character is an ISO control
11304      * character.  A character is considered to be an ISO control
11305      * character if its code is in the range {@code '\u005Cu0000'}
11306      * through {@code '\u005Cu001F'} or in the range
11307      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
11308      *
11309      * <p><b>Note:</b> This method cannot handle <a
11310      * href="#supplementary"> supplementary characters</a>. To support
11311      * all Unicode characters, including supplementary characters, use
11312      * the {@link #isISOControl(int)} method.
11313      *
11314      * @param   ch      the character to be tested.
11315      * @return  {@code true} if the character is an ISO control character;
11316      *          {@code false} otherwise.
11317      *
11318      * @see     Character#isSpaceChar(char)
11319      * @see     Character#isWhitespace(char)
11320      * @since   1.1
11321      */
isISOControl(char ch)11322     public static boolean isISOControl(char ch) {
11323         return isISOControl((int)ch);
11324     }
11325 
11326     /**
11327      * Determines if the referenced character (Unicode code point) is an ISO control
11328      * character.  A character is considered to be an ISO control
11329      * character if its code is in the range {@code '\u005Cu0000'}
11330      * through {@code '\u005Cu001F'} or in the range
11331      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
11332      *
11333      * @param   codePoint the character (Unicode code point) to be tested.
11334      * @return  {@code true} if the character is an ISO control character;
11335      *          {@code false} otherwise.
11336      * @see     Character#isSpaceChar(int)
11337      * @see     Character#isWhitespace(int)
11338      * @since   1.5
11339      */
isISOControl(int codePoint)11340     public static boolean isISOControl(int codePoint) {
11341         // Optimized form of:
11342         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
11343         //     (codePoint >= 0x7F && codePoint <= 0x9F);
11344         return codePoint <= 0x9F &&
11345             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
11346     }
11347 
11348     /**
11349      * Returns a value indicating a character's general category.
11350      *
11351      * <p><b>Note:</b> This method cannot handle <a
11352      * href="#supplementary"> supplementary characters</a>. To support
11353      * all Unicode characters, including supplementary characters, use
11354      * the {@link #getType(int)} method.
11355      *
11356      * @param   ch      the character to be tested.
11357      * @return  a value of type {@code int} representing the
11358      *          character's general category.
11359      * @see     Character#COMBINING_SPACING_MARK
11360      * @see     Character#CONNECTOR_PUNCTUATION
11361      * @see     Character#CONTROL
11362      * @see     Character#CURRENCY_SYMBOL
11363      * @see     Character#DASH_PUNCTUATION
11364      * @see     Character#DECIMAL_DIGIT_NUMBER
11365      * @see     Character#ENCLOSING_MARK
11366      * @see     Character#END_PUNCTUATION
11367      * @see     Character#FINAL_QUOTE_PUNCTUATION
11368      * @see     Character#FORMAT
11369      * @see     Character#INITIAL_QUOTE_PUNCTUATION
11370      * @see     Character#LETTER_NUMBER
11371      * @see     Character#LINE_SEPARATOR
11372      * @see     Character#LOWERCASE_LETTER
11373      * @see     Character#MATH_SYMBOL
11374      * @see     Character#MODIFIER_LETTER
11375      * @see     Character#MODIFIER_SYMBOL
11376      * @see     Character#NON_SPACING_MARK
11377      * @see     Character#OTHER_LETTER
11378      * @see     Character#OTHER_NUMBER
11379      * @see     Character#OTHER_PUNCTUATION
11380      * @see     Character#OTHER_SYMBOL
11381      * @see     Character#PARAGRAPH_SEPARATOR
11382      * @see     Character#PRIVATE_USE
11383      * @see     Character#SPACE_SEPARATOR
11384      * @see     Character#START_PUNCTUATION
11385      * @see     Character#SURROGATE
11386      * @see     Character#TITLECASE_LETTER
11387      * @see     Character#UNASSIGNED
11388      * @see     Character#UPPERCASE_LETTER
11389      * @since   1.1
11390      */
getType(char ch)11391     public static int getType(char ch) {
11392         return getType((int)ch);
11393     }
11394 
11395     /**
11396      * Returns a value indicating a character's general category.
11397      *
11398      * @param   codePoint the character (Unicode code point) to be tested.
11399      * @return  a value of type {@code int} representing the
11400      *          character's general category.
11401      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
11402      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
11403      * @see     Character#CONTROL CONTROL
11404      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
11405      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
11406      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
11407      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
11408      * @see     Character#END_PUNCTUATION END_PUNCTUATION
11409      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
11410      * @see     Character#FORMAT FORMAT
11411      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
11412      * @see     Character#LETTER_NUMBER LETTER_NUMBER
11413      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
11414      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
11415      * @see     Character#MATH_SYMBOL MATH_SYMBOL
11416      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
11417      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
11418      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
11419      * @see     Character#OTHER_LETTER OTHER_LETTER
11420      * @see     Character#OTHER_NUMBER OTHER_NUMBER
11421      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
11422      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
11423      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
11424      * @see     Character#PRIVATE_USE PRIVATE_USE
11425      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
11426      * @see     Character#START_PUNCTUATION START_PUNCTUATION
11427      * @see     Character#SURROGATE SURROGATE
11428      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
11429      * @see     Character#UNASSIGNED UNASSIGNED
11430      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
11431      * @since   1.5
11432      */
11433     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
11434     /*
11435     public static int getType(int codePoint) {
11436         return CharacterData.of(codePoint).getType(codePoint);
11437     }
11438     */
getType(int codePoint)11439     public static int getType(int codePoint) {
11440         int type = getTypeImpl(codePoint);
11441         // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
11442         if (type <= Character.FORMAT) {
11443             return type;
11444         }
11445         return (type + 1);
11446     }
11447 
11448     @FastNative
getTypeImpl(int codePoint)11449     static native int getTypeImpl(int codePoint);
11450     // END Android-changed: Reimplement methods natively on top of ICU4C.
11451 
11452     /**
11453      * Determines the character representation for a specific digit in
11454      * the specified radix. If the value of {@code radix} is not a
11455      * valid radix, or the value of {@code digit} is not a valid
11456      * digit in the specified radix, the null character
11457      * ({@code '\u005Cu0000'}) is returned.
11458      * <p>
11459      * The {@code radix} argument is valid if it is greater than or
11460      * equal to {@code MIN_RADIX} and less than or equal to
11461      * {@code MAX_RADIX}. The {@code digit} argument is valid if
11462      * {@code 0 <= digit < radix}.
11463      * <p>
11464      * If the digit is less than 10, then
11465      * {@code '0' + digit} is returned. Otherwise, the value
11466      * {@code 'a' + digit - 10} is returned.
11467      *
11468      * @param   digit   the number to convert to a character.
11469      * @param   radix   the radix.
11470      * @return  the {@code char} representation of the specified digit
11471      *          in the specified radix.
11472      * @see     Character#MIN_RADIX
11473      * @see     Character#MAX_RADIX
11474      * @see     Character#digit(char, int)
11475      */
forDigit(int digit, int radix)11476     public static char forDigit(int digit, int radix) {
11477         if ((digit >= radix) || (digit < 0)) {
11478             return '\0';
11479         }
11480         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
11481             return '\0';
11482         }
11483         if (digit < 10) {
11484             return (char)('0' + digit);
11485         }
11486         return (char)('a' - 10 + digit);
11487     }
11488 
11489     /**
11490      * Returns the Unicode directionality property for the given
11491      * character.  Character directionality is used to calculate the
11492      * visual ordering of text. The directionality value of undefined
11493      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
11494      *
11495      * <p><b>Note:</b> This method cannot handle <a
11496      * href="#supplementary"> supplementary characters</a>. To support
11497      * all Unicode characters, including supplementary characters, use
11498      * the {@link #getDirectionality(int)} method.
11499      *
11500      * @param  ch {@code char} for which the directionality property
11501      *            is requested.
11502      * @return the directionality property of the {@code char} value.
11503      *
11504      * @see Character#DIRECTIONALITY_UNDEFINED
11505      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
11506      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
11507      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
11508      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
11509      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
11510      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
11511      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
11512      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
11513      * @see Character#DIRECTIONALITY_NONSPACING_MARK
11514      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
11515      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
11516      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
11517      * @see Character#DIRECTIONALITY_WHITESPACE
11518      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
11519      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
11520      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
11521      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
11522      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
11523      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
11524      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
11525      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
11526      * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE
11527      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
11528      * @since 1.4
11529      */
getDirectionality(char ch)11530     public static byte getDirectionality(char ch) {
11531         return getDirectionality((int)ch);
11532     }
11533 
11534     /**
11535      * Returns the Unicode directionality property for the given
11536      * character (Unicode code point).  Character directionality is
11537      * used to calculate the visual ordering of text. The
11538      * directionality value of undefined character is {@link
11539      * #DIRECTIONALITY_UNDEFINED}.
11540      *
11541      * @param   codePoint the character (Unicode code point) for which
11542      *          the directionality property is requested.
11543      * @return the directionality property of the character.
11544      *
11545      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
11546      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
11547      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
11548      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
11549      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
11550      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
11551      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
11552      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
11553      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
11554      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
11555      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
11556      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
11557      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
11558      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
11559      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
11560      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
11561      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
11562      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
11563      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
11564      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
11565      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE DIRECTIONALITY_LEFT_TO_RIGHT_ISOLATE
11566      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE DIRECTIONALITY_RIGHT_TO_LEFT_ISOLATE
11567      * @see Character#DIRECTIONALITY_FIRST_STRONG_ISOLATE DIRECTIONALITY_FIRST_STRONG_ISOLATE
11568      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE DIRECTIONALITY_POP_DIRECTIONAL_ISOLATE
11569      * @since    1.5
11570      */
11571     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
11572     /*
11573     public static byte getDirectionality(int codePoint) {
11574         return CharacterData.of(codePoint).getDirectionality(codePoint);
11575     }
11576     */
getDirectionality(int codePoint)11577     public static byte getDirectionality(int codePoint) {
11578         if (getType(codePoint) == Character.UNASSIGNED) {
11579             return Character.DIRECTIONALITY_UNDEFINED;
11580         }
11581 
11582         byte directionality = getDirectionalityImpl(codePoint);
11583         if (directionality >= 0 && directionality < DIRECTIONALITY.length) {
11584             return DIRECTIONALITY[directionality];
11585         }
11586         return Character.DIRECTIONALITY_UNDEFINED;
11587     }
11588 
11589     @FastNative
getDirectionalityImpl(int codePoint)11590     native static byte getDirectionalityImpl(int codePoint);
11591     // END Android-changed: Reimplement methods natively on top of ICU4C.
11592 
11593     /**
11594      * Determines whether the character is mirrored according to the
11595      * Unicode specification.  Mirrored characters should have their
11596      * glyphs horizontally mirrored when displayed in text that is
11597      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
11598      * PARENTHESIS is semantically defined to be an <i>opening
11599      * parenthesis</i>.  This will appear as a "(" in text that is
11600      * left-to-right but as a ")" in text that is right-to-left.
11601      *
11602      * <p><b>Note:</b> This method cannot handle <a
11603      * href="#supplementary"> supplementary characters</a>. To support
11604      * all Unicode characters, including supplementary characters, use
11605      * the {@link #isMirrored(int)} method.
11606      *
11607      * @param  ch {@code char} for which the mirrored property is requested
11608      * @return {@code true} if the char is mirrored, {@code false}
11609      *         if the {@code char} is not mirrored or is not defined.
11610      * @since 1.4
11611      */
isMirrored(char ch)11612     public static boolean isMirrored(char ch) {
11613         return isMirrored((int)ch);
11614     }
11615 
11616     /**
11617      * Determines whether the specified character (Unicode code point)
11618      * is mirrored according to the Unicode specification.  Mirrored
11619      * characters should have their glyphs horizontally mirrored when
11620      * displayed in text that is right-to-left.  For example,
11621      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
11622      * defined to be an <i>opening parenthesis</i>.  This will appear
11623      * as a "(" in text that is left-to-right but as a ")" in text
11624      * that is right-to-left.
11625      *
11626      * @param   codePoint the character (Unicode code point) to be tested.
11627      * @return  {@code true} if the character is mirrored, {@code false}
11628      *          if the character is not mirrored or is not defined.
11629      * @since   1.5
11630      */
11631     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
11632     /*
11633     public static boolean isMirrored(int codePoint) {
11634         return CharacterData.of(codePoint).isMirrored(codePoint);
11635     }
11636     */
isMirrored(int codePoint)11637     public static boolean isMirrored(int codePoint) {
11638         return isMirroredImpl(codePoint);
11639     }
11640 
11641     @FastNative
isMirroredImpl(int codePoint)11642     native static boolean isMirroredImpl(int codePoint);
11643     // END Android-changed: Reimplement methods natively on top of ICU4C.
11644 
11645     /**
11646      * Compares two {@code Character} objects numerically.
11647      *
11648      * @param   anotherCharacter   the {@code Character} to be compared.
11649      * @return  the value {@code 0} if the argument {@code Character}
11650      *          is equal to this {@code Character}; a value less than
11651      *          {@code 0} if this {@code Character} is numerically less
11652      *          than the {@code Character} argument; and a value greater than
11653      *          {@code 0} if this {@code Character} is numerically greater
11654      *          than the {@code Character} argument (unsigned comparison).
11655      *          Note that this is strictly a numerical comparison; it is not
11656      *          locale-dependent.
11657      * @since   1.2
11658      */
compareTo(Character anotherCharacter)11659     public int compareTo(Character anotherCharacter) {
11660         return compare(this.value, anotherCharacter.value);
11661     }
11662 
11663     /**
11664      * Compares two {@code char} values numerically.
11665      * The value returned is identical to what would be returned by:
11666      * <pre>
11667      *    Character.valueOf(x).compareTo(Character.valueOf(y))
11668      * </pre>
11669      *
11670      * @param  x the first {@code char} to compare
11671      * @param  y the second {@code char} to compare
11672      * @return the value {@code 0} if {@code x == y};
11673      *         a value less than {@code 0} if {@code x < y}; and
11674      *         a value greater than {@code 0} if {@code x > y}
11675      * @since 1.7
11676      */
compare(char x, char y)11677     public static int compare(char x, char y) {
11678         return x - y;
11679     }
11680 
11681     // BEGIN Android-removed: Use ICU.
11682     /**
11683      * Converts the character (Unicode code point) argument to uppercase using
11684      * information from the UnicodeData file.
11685      *
11686      * @param   codePoint   the character (Unicode code point) to be converted.
11687      * @return  either the uppercase equivalent of the character, if
11688      *          any, or an error flag ({@code Character.ERROR})
11689      *          that indicates that a 1:M {@code char} mapping exists.
11690      * @see     Character#isLowerCase(char)
11691      * @see     Character#isUpperCase(char)
11692      * @see     Character#toLowerCase(char)
11693      * @see     Character#toTitleCase(char)
11694      * @since 1.4
11695      *
11696     static int toUpperCaseEx(int codePoint) {
11697         assert isValidCodePoint(codePoint);
11698         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
11699     }
11700 
11701     /**
11702      * Converts the character (Unicode code point) argument to uppercase using case
11703      * mapping information from the SpecialCasing file in the Unicode
11704      * specification. If a character has no explicit uppercase
11705      * mapping, then the {@code char} itself is returned in the
11706      * {@code char[]}.
11707      *
11708      * @param   codePoint   the character (Unicode code point) to be converted.
11709      * @return a {@code char[]} with the uppercased character.
11710      * @since 1.4
11711      *
11712     static char[] toUpperCaseCharArray(int codePoint) {
11713         // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
11714         assert isBmpCodePoint(codePoint);
11715         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
11716     }
11717     */
11718     // END Android-removed: Use ICU.
11719 
11720     /**
11721      * The number of bits used to represent a {@code char} value in unsigned
11722      * binary form, constant {@code 16}.
11723      *
11724      * @since 1.5
11725      */
11726     public static final int SIZE = 16;
11727 
11728     /**
11729      * The number of bytes used to represent a {@code char} value in unsigned
11730      * binary form.
11731      *
11732      * @since 1.8
11733      */
11734     public static final int BYTES = SIZE / Byte.SIZE;
11735 
11736     /**
11737      * Returns the value obtained by reversing the order of the bytes in the
11738      * specified {@code char} value.
11739      *
11740      * @param ch The {@code char} of which to reverse the byte order.
11741      * @return the value obtained by reversing (or, equivalently, swapping)
11742      *     the bytes in the specified {@code char} value.
11743      * @since 1.5
11744      */
11745     @IntrinsicCandidate
reverseBytes(char ch)11746     public static char reverseBytes(char ch) {
11747         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
11748     }
11749 
11750     /**
11751      * Returns the Unicode name of the specified character
11752      * {@code codePoint}, or null if the code point is
11753      * {@link #UNASSIGNED unassigned}.
11754      * <p>
11755      * Note: if the specified character is not assigned a name by
11756      * the <i>UnicodeData</i> file (part of the Unicode Character
11757      * Database maintained by the Unicode Consortium), the returned
11758      * name is the same as the result of expression:
11759      *
11760      * <blockquote>{@code
11761      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
11762      *     + " "
11763      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
11764      *
11765      * }</blockquote>
11766      *
11767      * @param  codePoint the character (Unicode code point)
11768      *
11769      * @return the Unicode name of the specified character, or null if
11770      *         the code point is unassigned.
11771      *
11772      * @throws IllegalArgumentException if the specified
11773      *            {@code codePoint} is not a valid Unicode
11774      *            code point.
11775      *
11776      * @since 1.7
11777      */
getName(int codePoint)11778     public static String getName(int codePoint) {
11779         if (!isValidCodePoint(codePoint)) {
11780             throw new IllegalArgumentException(
11781                 String.format("Not a valid Unicode code point: 0x%X", codePoint));
11782         }
11783         // Android-changed: Use ICU.
11784         // String name = CharacterName.get(codePoint);
11785         String name = getNameImpl(codePoint);
11786         if (name != null)
11787             return name;
11788         if (getType(codePoint) == UNASSIGNED)
11789             return null;
11790         UnicodeBlock block = UnicodeBlock.of(codePoint);
11791         if (block != null)
11792             return block.toString().replace('_', ' ') + " "
11793                    + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
11794         // should never come here
11795         return Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
11796     }
11797 
11798     // BEGIN Android-removed: expose after CharacterName.getCodePoint() is imported.
11799     /**
11800      * Returns the code point value of the Unicode character specified by
11801      * the given Unicode character name.
11802      * <p>
11803      * Note: if a character is not assigned a name by the <i>UnicodeData</i>
11804      * file (part of the Unicode Character Database maintained by the Unicode
11805      * Consortium), its name is defined as the result of expression:
11806      *
11807      * <blockquote>{@code
11808      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
11809      *     + " "
11810      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ROOT);
11811      *
11812      * }</blockquote>
11813      * <p>
11814      * The {@code name} matching is case insensitive, with any leading and
11815      * trailing whitespace character removed.
11816      *
11817      * @param  name the Unicode character name
11818      *
11819      * @return the code point value of the character specified by its name.
11820      *
11821      * @throws IllegalArgumentException if the specified {@code name}
11822      *         is not a valid Unicode character name.
11823      * @throws NullPointerException if {@code name} is {@code null}
11824      *
11825      * @since 9
11826      *
11827     public static int codePointOf(String name) {
11828         name = name.trim().toUpperCase(Locale.ROOT);
11829         int cp = CharacterName.getInstance().getCodePoint(name);
11830         if (cp != -1)
11831             return cp;
11832         try {
11833             int off = name.lastIndexOf(' ');
11834             if (off != -1) {
11835                 cp = Integer.parseInt(name, off + 1, name.length(), 16);
11836                 if (isValidCodePoint(cp) && name.equals(getName(cp)))
11837                     return cp;
11838             }
11839         } catch (Exception x) {}
11840         throw new IllegalArgumentException("Unrecognized character name :" + name);
11841     }
11842     */
11843     // END Android-removed: expose after CharacterName.getCodePoint() is imported.
11844 
11845     // Android-added: Use ICU.
11846     // Implement getNameImpl() natively.
getNameImpl(int codePoint)11847     private static native String getNameImpl(int codePoint);
11848 }
11849