• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4  *
5  * This code is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 only, as
7  * published by the Free Software Foundation.  Oracle designates this
8  * particular file as subject to the "Classpath" exception as provided
9  * by Oracle in the LICENSE file that accompanied this code.
10  *
11  * This code is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14  * version 2 for more details (a copy is included in the LICENSE file that
15  * accompanied this code).
16  *
17  * You should have received a copy of the GNU General Public License version
18  * 2 along with this work; if not, write to the Free Software Foundation,
19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20  *
21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22  * or visit www.oracle.com if you need additional information or have any
23  * questions.
24  */
25 
26 package java.lang;
27 
28 import dalvik.annotation.optimization.FastNative;
29 import java.util.Arrays;
30 import java.util.HashMap;
31 import java.util.Locale;
32 import java.util.Map;
33 
34 // Android-changed: Remove reference to a specific unicode standard version
35 /**
36  * The {@code Character} class wraps a value of the primitive
37  * type {@code char} in an object. An object of type
38  * {@code Character} contains a single field whose type is
39  * {@code char}.
40  * <p>
41  * In addition, this class provides several methods for determining
42  * a character's category (lowercase letter, digit, etc.) and for converting
43  * characters from uppercase to lowercase and vice versa.
44  * <p>
45  * Character information is based on the Unicode Standard
46  * <p>
47  * The methods and data of class {@code Character} are defined by
48  * the information in the <i>UnicodeData</i> file that is part of the
49  * Unicode Character Database maintained by the Unicode
50  * Consortium. This file specifies various properties including name
51  * and general category for every defined Unicode code point or
52  * character range.
53  * <p>
54  * The file and its description are available from the Unicode Consortium at:
55  * <ul>
56  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
57  * </ul>
58  *
59  * <h3><a name="unicode">Unicode Character Representations</a></h3>
60  *
61  * <p>The {@code char} data type (and therefore the value that a
62  * {@code Character} object encapsulates) are based on the
63  * original Unicode specification, which defined characters as
64  * fixed-width 16-bit entities. The Unicode Standard has since been
65  * changed to allow for characters whose representation requires more
66  * than 16 bits.  The range of legal <em>code point</em>s is now
67  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
68  * (Refer to the <a
69  * href="http://www.unicode.org/reports/tr27/#notation"><i>
70  * definition</i></a> of the U+<i>n</i> notation in the Unicode
71  * Standard.)
72  *
73  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF</a> is
74  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
75  * <a name="supplementary">Characters</a> whose code points are greater
76  * than U+FFFF are called <em>supplementary character</em>s.  The Java
77  * platform uses the UTF-16 representation in {@code char} arrays and
78  * in the {@code String} and {@code StringBuffer} classes. In
79  * this representation, supplementary characters are represented as a pair
80  * of {@code char} values, the first from the <em>high-surrogates</em>
81  * range, (&#92;uD800-&#92;uDBFF), the second from the
82  * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
83  *
84  * <p>A {@code char} value, therefore, represents Basic
85  * Multilingual Plane (BMP) code points, including the surrogate
86  * code points, or code units of the UTF-16 encoding. An
87  * {@code int} value represents all Unicode code points,
88  * including supplementary code points. The lower (least significant)
89  * 21 bits of {@code int} are used to represent Unicode code
90  * points and the upper (most significant) 11 bits must be zero.
91  * Unless otherwise specified, the behavior with respect to
92  * supplementary characters and surrogate {@code char} values is
93  * as follows:
94  *
95  * <ul>
96  * <li>The methods that only accept a {@code char} value cannot support
97  * supplementary characters. They treat {@code char} values from the
98  * surrogate ranges as undefined characters. For example,
99  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
100  * this specific value if followed by any low-surrogate value in a string
101  * would represent a letter.
102  *
103  * <li>The methods that accept an {@code int} value support all
104  * Unicode characters, including supplementary characters. For
105  * example, {@code Character.isLetter(0x2F81A)} returns
106  * {@code true} because the code point value represents a letter
107  * (a CJK ideograph).
108  * </ul>
109  *
110  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
111  * used for character values in the range between U+0000 and U+10FFFF,
112  * and <em>Unicode code unit</em> is used for 16-bit
113  * {@code char} values that are code units of the <em>UTF-16</em>
114  * encoding. For more information on Unicode terminology, refer to the
115  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
116  *
117  * @author  Lee Boynton
118  * @author  Guy Steele
119  * @author  Akira Tanaka
120  * @author  Martin Buchholz
121  * @author  Ulf Zibis
122  * @since   1.0
123  */
124 public final
125 class Character implements java.io.Serializable, Comparable<Character> {
126     /**
127      * The minimum radix available for conversion to and from strings.
128      * The constant value of this field is the smallest value permitted
129      * for the radix argument in radix-conversion methods such as the
130      * {@code digit} method, the {@code forDigit} method, and the
131      * {@code toString} method of class {@code Integer}.
132      *
133      * @see     Character#digit(char, int)
134      * @see     Character#forDigit(int, int)
135      * @see     Integer#toString(int, int)
136      * @see     Integer#valueOf(String)
137      */
138     public static final int MIN_RADIX = 2;
139 
140     /**
141      * The maximum radix available for conversion to and from strings.
142      * The constant value of this field is the largest value permitted
143      * for the radix argument in radix-conversion methods such as the
144      * {@code digit} method, the {@code forDigit} method, and the
145      * {@code toString} method of class {@code Integer}.
146      *
147      * @see     Character#digit(char, int)
148      * @see     Character#forDigit(int, int)
149      * @see     Integer#toString(int, int)
150      * @see     Integer#valueOf(String)
151      */
152     public static final int MAX_RADIX = 36;
153 
154     /**
155      * The constant value of this field is the smallest value of type
156      * {@code char}, {@code '\u005Cu0000'}.
157      *
158      * @since   1.0.2
159      */
160     public static final char MIN_VALUE = '\u0000';
161 
162     /**
163      * The constant value of this field is the largest value of type
164      * {@code char}, {@code '\u005CuFFFF'}.
165      *
166      * @since   1.0.2
167      */
168     public static final char MAX_VALUE = '\uFFFF';
169 
170     /**
171      * The {@code Class} instance representing the primitive type
172      * {@code char}.
173      *
174      * @since   1.1
175      */
176     @SuppressWarnings("unchecked")
177     public static final Class<Character> TYPE = (Class<Character>) Class.getPrimitiveClass("char");
178 
179     /*
180      * Normative general types
181      */
182 
183     /*
184      * General character types
185      */
186 
187     /**
188      * General category "Cn" in the Unicode specification.
189      * @since   1.1
190      */
191     public static final byte UNASSIGNED = 0;
192 
193     /**
194      * General category "Lu" in the Unicode specification.
195      * @since   1.1
196      */
197     public static final byte UPPERCASE_LETTER = 1;
198 
199     /**
200      * General category "Ll" in the Unicode specification.
201      * @since   1.1
202      */
203     public static final byte LOWERCASE_LETTER = 2;
204 
205     /**
206      * General category "Lt" in the Unicode specification.
207      * @since   1.1
208      */
209     public static final byte TITLECASE_LETTER = 3;
210 
211     /**
212      * General category "Lm" in the Unicode specification.
213      * @since   1.1
214      */
215     public static final byte MODIFIER_LETTER = 4;
216 
217     /**
218      * General category "Lo" in the Unicode specification.
219      * @since   1.1
220      */
221     public static final byte OTHER_LETTER = 5;
222 
223     /**
224      * General category "Mn" in the Unicode specification.
225      * @since   1.1
226      */
227     public static final byte NON_SPACING_MARK = 6;
228 
229     /**
230      * General category "Me" in the Unicode specification.
231      * @since   1.1
232      */
233     public static final byte ENCLOSING_MARK = 7;
234 
235     /**
236      * General category "Mc" in the Unicode specification.
237      * @since   1.1
238      */
239     public static final byte COMBINING_SPACING_MARK = 8;
240 
241     /**
242      * General category "Nd" in the Unicode specification.
243      * @since   1.1
244      */
245     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
246 
247     /**
248      * General category "Nl" in the Unicode specification.
249      * @since   1.1
250      */
251     public static final byte LETTER_NUMBER = 10;
252 
253     /**
254      * General category "No" in the Unicode specification.
255      * @since   1.1
256      */
257     public static final byte OTHER_NUMBER = 11;
258 
259     /**
260      * General category "Zs" in the Unicode specification.
261      * @since   1.1
262      */
263     public static final byte SPACE_SEPARATOR = 12;
264 
265     /**
266      * General category "Zl" in the Unicode specification.
267      * @since   1.1
268      */
269     public static final byte LINE_SEPARATOR = 13;
270 
271     /**
272      * General category "Zp" in the Unicode specification.
273      * @since   1.1
274      */
275     public static final byte PARAGRAPH_SEPARATOR = 14;
276 
277     /**
278      * General category "Cc" in the Unicode specification.
279      * @since   1.1
280      */
281     public static final byte CONTROL = 15;
282 
283     /**
284      * General category "Cf" in the Unicode specification.
285      * @since   1.1
286      */
287     public static final byte FORMAT = 16;
288 
289     /**
290      * General category "Co" in the Unicode specification.
291      * @since   1.1
292      */
293     public static final byte PRIVATE_USE = 18;
294 
295     /**
296      * General category "Cs" in the Unicode specification.
297      * @since   1.1
298      */
299     public static final byte SURROGATE = 19;
300 
301     /**
302      * General category "Pd" in the Unicode specification.
303      * @since   1.1
304      */
305     public static final byte DASH_PUNCTUATION = 20;
306 
307     /**
308      * General category "Ps" in the Unicode specification.
309      * @since   1.1
310      */
311     public static final byte START_PUNCTUATION = 21;
312 
313     /**
314      * General category "Pe" in the Unicode specification.
315      * @since   1.1
316      */
317     public static final byte END_PUNCTUATION = 22;
318 
319     /**
320      * General category "Pc" in the Unicode specification.
321      * @since   1.1
322      */
323     public static final byte CONNECTOR_PUNCTUATION = 23;
324 
325     /**
326      * General category "Po" in the Unicode specification.
327      * @since   1.1
328      */
329     public static final byte OTHER_PUNCTUATION = 24;
330 
331     /**
332      * General category "Sm" in the Unicode specification.
333      * @since   1.1
334      */
335     public static final byte MATH_SYMBOL = 25;
336 
337     /**
338      * General category "Sc" in the Unicode specification.
339      * @since   1.1
340      */
341     public static final byte CURRENCY_SYMBOL = 26;
342 
343     /**
344      * General category "Sk" in the Unicode specification.
345      * @since   1.1
346      */
347     public static final byte MODIFIER_SYMBOL = 27;
348 
349     /**
350      * General category "So" in the Unicode specification.
351      * @since   1.1
352      */
353     public static final byte OTHER_SYMBOL = 28;
354 
355     /**
356      * General category "Pi" in the Unicode specification.
357      * @since   1.4
358      */
359     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
360 
361     /**
362      * General category "Pf" in the Unicode specification.
363      * @since   1.4
364      */
365     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
366 
367     /**
368      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
369      */
370     static final int ERROR = 0xFFFFFFFF;
371 
372 
373     /**
374      * Undefined bidirectional character type. Undefined {@code char}
375      * values have undefined directionality in the Unicode specification.
376      * @since 1.4
377      */
378     public static final byte DIRECTIONALITY_UNDEFINED = -1;
379 
380     /**
381      * Strong bidirectional character type "L" in the Unicode specification.
382      * @since 1.4
383      */
384     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
385 
386     /**
387      * Strong bidirectional character type "R" in the Unicode specification.
388      * @since 1.4
389      */
390     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
391 
392     /**
393     * Strong bidirectional character type "AL" in the Unicode specification.
394      * @since 1.4
395      */
396     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
397 
398     /**
399      * Weak bidirectional character type "EN" in the Unicode specification.
400      * @since 1.4
401      */
402     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
403 
404     /**
405      * Weak bidirectional character type "ES" in the Unicode specification.
406      * @since 1.4
407      */
408     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
409 
410     /**
411      * Weak bidirectional character type "ET" in the Unicode specification.
412      * @since 1.4
413      */
414     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
415 
416     /**
417      * Weak bidirectional character type "AN" in the Unicode specification.
418      * @since 1.4
419      */
420     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
421 
422     /**
423      * Weak bidirectional character type "CS" in the Unicode specification.
424      * @since 1.4
425      */
426     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
427 
428     /**
429      * Weak bidirectional character type "NSM" in the Unicode specification.
430      * @since 1.4
431      */
432     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
433 
434     /**
435      * Weak bidirectional character type "BN" in the Unicode specification.
436      * @since 1.4
437      */
438     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
439 
440     /**
441      * Neutral bidirectional character type "B" in the Unicode specification.
442      * @since 1.4
443      */
444     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
445 
446     /**
447      * Neutral bidirectional character type "S" in the Unicode specification.
448      * @since 1.4
449      */
450     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
451 
452     /**
453      * Neutral bidirectional character type "WS" in the Unicode specification.
454      * @since 1.4
455      */
456     public static final byte DIRECTIONALITY_WHITESPACE = 12;
457 
458     /**
459      * Neutral bidirectional character type "ON" in the Unicode specification.
460      * @since 1.4
461      */
462     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
463 
464     /**
465      * Strong bidirectional character type "LRE" in the Unicode specification.
466      * @since 1.4
467      */
468     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
469 
470     /**
471      * Strong bidirectional character type "LRO" in the Unicode specification.
472      * @since 1.4
473      */
474     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
475 
476     /**
477      * Strong bidirectional character type "RLE" in the Unicode specification.
478      * @since 1.4
479      */
480     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
481 
482     /**
483      * Strong bidirectional character type "RLO" in the Unicode specification.
484      * @since 1.4
485      */
486     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
487 
488     /**
489      * Weak bidirectional character type "PDF" in the Unicode specification.
490      * @since 1.4
491      */
492     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
493 
494     /**
495      * The minimum value of a
496      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
497      * Unicode high-surrogate code unit</a>
498      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
499      * A high-surrogate is also known as a <i>leading-surrogate</i>.
500      *
501      * @since 1.5
502      */
503     public static final char MIN_HIGH_SURROGATE = '\uD800';
504 
505     /**
506      * The maximum value of a
507      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
508      * Unicode high-surrogate code unit</a>
509      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
510      * A high-surrogate is also known as a <i>leading-surrogate</i>.
511      *
512      * @since 1.5
513      */
514     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
515 
516     /**
517      * The minimum value of a
518      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
519      * Unicode low-surrogate code unit</a>
520      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
521      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
522      *
523      * @since 1.5
524      */
525     public static final char MIN_LOW_SURROGATE  = '\uDC00';
526 
527     /**
528      * The maximum value of a
529      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
530      * Unicode low-surrogate code unit</a>
531      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
532      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
533      *
534      * @since 1.5
535      */
536     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
537 
538     /**
539      * The minimum value of a Unicode surrogate code unit in the
540      * UTF-16 encoding, constant {@code '\u005CuD800'}.
541      *
542      * @since 1.5
543      */
544     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
545 
546     /**
547      * The maximum value of a Unicode surrogate code unit in the
548      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
549      *
550      * @since 1.5
551      */
552     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
553 
554     /**
555      * The minimum value of a
556      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
557      * Unicode supplementary code point</a>, constant {@code U+10000}.
558      *
559      * @since 1.5
560      */
561     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
562 
563     /**
564      * The minimum value of a
565      * <a href="http://www.unicode.org/glossary/#code_point">
566      * Unicode code point</a>, constant {@code U+0000}.
567      *
568      * @since 1.5
569      */
570     public static final int MIN_CODE_POINT = 0x000000;
571 
572     /**
573      * The maximum value of a
574      * <a href="http://www.unicode.org/glossary/#code_point">
575      * Unicode code point</a>, constant {@code U+10FFFF}.
576      *
577      * @since 1.5
578      */
579     public static final int MAX_CODE_POINT = 0X10FFFF;
580 
581     // BEGIN Android-added: Use ICU.
582     // The indices in int[] DIRECTIONALITY are based on icu4c's u_charDirection(),
583     // accessed via getDirectionalityImpl(), implemented in Character.cpp.
584     private static final byte[] DIRECTIONALITY = new byte[] {
585             DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
586             DIRECTIONALITY_EUROPEAN_NUMBER,
587             DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
588             DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
589             DIRECTIONALITY_ARABIC_NUMBER,
590             DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
591             DIRECTIONALITY_PARAGRAPH_SEPARATOR,
592             DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
593             DIRECTIONALITY_OTHER_NEUTRALS,
594             DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
595             DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
596             DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
597             DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
598             DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
599             DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
600             DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
601     // END Android-added: Use ICU.
602 
603     /**
604      * Instances of this class represent particular subsets of the Unicode
605      * character set.  The only family of subsets defined in the
606      * {@code Character} class is {@link Character.UnicodeBlock}.
607      * Other portions of the Java API may define other subsets for their
608      * own purposes.
609      *
610      * @since 1.2
611      */
612     public static class Subset  {
613 
614         private String name;
615 
616         /**
617          * Constructs a new {@code Subset} instance.
618          *
619          * @param  name  The name of this subset
620          * @exception NullPointerException if name is {@code null}
621          */
Subset(String name)622         protected Subset(String name) {
623             if (name == null) {
624                 throw new NullPointerException("name");
625             }
626             this.name = name;
627         }
628 
629         /**
630          * Compares two {@code Subset} objects for equality.
631          * This method returns {@code true} if and only if
632          * {@code this} and the argument refer to the same
633          * object; since this method is {@code final}, this
634          * guarantee holds for all subclasses.
635          */
equals(Object obj)636         public final boolean equals(Object obj) {
637             return (this == obj);
638         }
639 
640         /**
641          * Returns the standard hash code as defined by the
642          * {@link Object#hashCode} method.  This method
643          * is {@code final} in order to ensure that the
644          * {@code equals} and {@code hashCode} methods will
645          * be consistent in all subclasses.
646          */
hashCode()647         public final int hashCode() {
648             return super.hashCode();
649         }
650 
651         /**
652          * Returns the name of this subset.
653          */
toString()654         public final String toString() {
655             return name;
656         }
657     }
658 
659     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
660     // for the latest specification of Unicode Blocks.
661 
662     /**
663      * A family of character subsets representing the character blocks in the
664      * Unicode specification. Character blocks generally define characters
665      * used for a specific script or purpose. A character is contained by
666      * at most one Unicode block.
667      *
668      * @since 1.2
669      */
670     public static final class UnicodeBlock extends Subset {
671 
672         private static Map<String, UnicodeBlock> map = new HashMap<>(256);
673 
674         /**
675          * Creates a UnicodeBlock with the given identifier name.
676          * This name must be the same as the block identifier.
677          */
UnicodeBlock(String idName)678         private UnicodeBlock(String idName) {
679             super(idName);
680             map.put(idName, this);
681         }
682 
683         // BEGIN Android-added: ICU consistency: Don't map deprecated SURROGATES_AREA. b/26140229
684         // Add a (String, boolean) constructor for use by SURROGATES_AREA.
UnicodeBlock(String idName, boolean isMap)685         private UnicodeBlock(String idName, boolean isMap) {
686             super(idName);
687             if (isMap) {
688                 map.put(idName, this);
689             }
690         }
691         // END Android-added: ICU consistency: Don't map deprecated SURROGATES_AREA. b/26140229
692 
693         /**
694          * Creates a UnicodeBlock with the given identifier name and
695          * alias name.
696          */
UnicodeBlock(String idName, String alias)697         private UnicodeBlock(String idName, String alias) {
698             this(idName);
699             map.put(alias, this);
700         }
701 
702         /**
703          * Creates a UnicodeBlock with the given identifier name and
704          * alias names.
705          */
UnicodeBlock(String idName, String... aliases)706         private UnicodeBlock(String idName, String... aliases) {
707             this(idName);
708             for (String alias : aliases)
709                 map.put(alias, this);
710         }
711 
712         /**
713          * Constant for the "Basic Latin" Unicode character block.
714          * @since 1.2
715          */
716         public static final UnicodeBlock  BASIC_LATIN =
717             new UnicodeBlock("BASIC_LATIN",
718                              "BASIC LATIN",
719                              "BASICLATIN");
720 
721         /**
722          * Constant for the "Latin-1 Supplement" Unicode character block.
723          * @since 1.2
724          */
725         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
726             new UnicodeBlock("LATIN_1_SUPPLEMENT",
727                              "LATIN-1 SUPPLEMENT",
728                              "LATIN-1SUPPLEMENT");
729 
730         /**
731          * Constant for the "Latin Extended-A" Unicode character block.
732          * @since 1.2
733          */
734         public static final UnicodeBlock LATIN_EXTENDED_A =
735             new UnicodeBlock("LATIN_EXTENDED_A",
736                              "LATIN EXTENDED-A",
737                              "LATINEXTENDED-A");
738 
739         /**
740          * Constant for the "Latin Extended-B" Unicode character block.
741          * @since 1.2
742          */
743         public static final UnicodeBlock LATIN_EXTENDED_B =
744             new UnicodeBlock("LATIN_EXTENDED_B",
745                              "LATIN EXTENDED-B",
746                              "LATINEXTENDED-B");
747 
748         /**
749          * Constant for the "IPA Extensions" Unicode character block.
750          * @since 1.2
751          */
752         public static final UnicodeBlock IPA_EXTENSIONS =
753             new UnicodeBlock("IPA_EXTENSIONS",
754                              "IPA EXTENSIONS",
755                              "IPAEXTENSIONS");
756 
757         /**
758          * Constant for the "Spacing Modifier Letters" Unicode character block.
759          * @since 1.2
760          */
761         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
762             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
763                              "SPACING MODIFIER LETTERS",
764                              "SPACINGMODIFIERLETTERS");
765 
766         /**
767          * Constant for the "Combining Diacritical Marks" Unicode character block.
768          * @since 1.2
769          */
770         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
771             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
772                              "COMBINING DIACRITICAL MARKS",
773                              "COMBININGDIACRITICALMARKS");
774 
775         /**
776          * Constant for the "Greek and Coptic" Unicode character block.
777          * <p>
778          * This block was previously known as the "Greek" block.
779          *
780          * @since 1.2
781          */
782         public static final UnicodeBlock GREEK =
783             new UnicodeBlock("GREEK",
784                              "GREEK AND COPTIC",
785                              "GREEKANDCOPTIC");
786 
787         /**
788          * Constant for the "Cyrillic" Unicode character block.
789          * @since 1.2
790          */
791         public static final UnicodeBlock CYRILLIC =
792             new UnicodeBlock("CYRILLIC");
793 
794         /**
795          * Constant for the "Armenian" Unicode character block.
796          * @since 1.2
797          */
798         public static final UnicodeBlock ARMENIAN =
799             new UnicodeBlock("ARMENIAN");
800 
801         /**
802          * Constant for the "Hebrew" Unicode character block.
803          * @since 1.2
804          */
805         public static final UnicodeBlock HEBREW =
806             new UnicodeBlock("HEBREW");
807 
808         /**
809          * Constant for the "Arabic" Unicode character block.
810          * @since 1.2
811          */
812         public static final UnicodeBlock ARABIC =
813             new UnicodeBlock("ARABIC");
814 
815         /**
816          * Constant for the "Devanagari" Unicode character block.
817          * @since 1.2
818          */
819         public static final UnicodeBlock DEVANAGARI =
820             new UnicodeBlock("DEVANAGARI");
821 
822         /**
823          * Constant for the "Bengali" Unicode character block.
824          * @since 1.2
825          */
826         public static final UnicodeBlock BENGALI =
827             new UnicodeBlock("BENGALI");
828 
829         /**
830          * Constant for the "Gurmukhi" Unicode character block.
831          * @since 1.2
832          */
833         public static final UnicodeBlock GURMUKHI =
834             new UnicodeBlock("GURMUKHI");
835 
836         /**
837          * Constant for the "Gujarati" Unicode character block.
838          * @since 1.2
839          */
840         public static final UnicodeBlock GUJARATI =
841             new UnicodeBlock("GUJARATI");
842 
843         /**
844          * Constant for the "Oriya" Unicode character block.
845          * @since 1.2
846          */
847         public static final UnicodeBlock ORIYA =
848             new UnicodeBlock("ORIYA");
849 
850         /**
851          * Constant for the "Tamil" Unicode character block.
852          * @since 1.2
853          */
854         public static final UnicodeBlock TAMIL =
855             new UnicodeBlock("TAMIL");
856 
857         /**
858          * Constant for the "Telugu" Unicode character block.
859          * @since 1.2
860          */
861         public static final UnicodeBlock TELUGU =
862             new UnicodeBlock("TELUGU");
863 
864         /**
865          * Constant for the "Kannada" Unicode character block.
866          * @since 1.2
867          */
868         public static final UnicodeBlock KANNADA =
869             new UnicodeBlock("KANNADA");
870 
871         /**
872          * Constant for the "Malayalam" Unicode character block.
873          * @since 1.2
874          */
875         public static final UnicodeBlock MALAYALAM =
876             new UnicodeBlock("MALAYALAM");
877 
878         /**
879          * Constant for the "Thai" Unicode character block.
880          * @since 1.2
881          */
882         public static final UnicodeBlock THAI =
883             new UnicodeBlock("THAI");
884 
885         /**
886          * Constant for the "Lao" Unicode character block.
887          * @since 1.2
888          */
889         public static final UnicodeBlock LAO =
890             new UnicodeBlock("LAO");
891 
892         /**
893          * Constant for the "Tibetan" Unicode character block.
894          * @since 1.2
895          */
896         public static final UnicodeBlock TIBETAN =
897             new UnicodeBlock("TIBETAN");
898 
899         /**
900          * Constant for the "Georgian" Unicode character block.
901          * @since 1.2
902          */
903         public static final UnicodeBlock GEORGIAN =
904             new UnicodeBlock("GEORGIAN");
905 
906         /**
907          * Constant for the "Hangul Jamo" Unicode character block.
908          * @since 1.2
909          */
910         public static final UnicodeBlock HANGUL_JAMO =
911             new UnicodeBlock("HANGUL_JAMO",
912                              "HANGUL JAMO",
913                              "HANGULJAMO");
914 
915         /**
916          * Constant for the "Latin Extended Additional" Unicode character block.
917          * @since 1.2
918          */
919         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
920             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
921                              "LATIN EXTENDED ADDITIONAL",
922                              "LATINEXTENDEDADDITIONAL");
923 
924         /**
925          * Constant for the "Greek Extended" Unicode character block.
926          * @since 1.2
927          */
928         public static final UnicodeBlock GREEK_EXTENDED =
929             new UnicodeBlock("GREEK_EXTENDED",
930                              "GREEK EXTENDED",
931                              "GREEKEXTENDED");
932 
933         /**
934          * Constant for the "General Punctuation" Unicode character block.
935          * @since 1.2
936          */
937         public static final UnicodeBlock GENERAL_PUNCTUATION =
938             new UnicodeBlock("GENERAL_PUNCTUATION",
939                              "GENERAL PUNCTUATION",
940                              "GENERALPUNCTUATION");
941 
942         /**
943          * Constant for the "Superscripts and Subscripts" Unicode character
944          * block.
945          * @since 1.2
946          */
947         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
948             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
949                              "SUPERSCRIPTS AND SUBSCRIPTS",
950                              "SUPERSCRIPTSANDSUBSCRIPTS");
951 
952         /**
953          * Constant for the "Currency Symbols" Unicode character block.
954          * @since 1.2
955          */
956         public static final UnicodeBlock CURRENCY_SYMBOLS =
957             new UnicodeBlock("CURRENCY_SYMBOLS",
958                              "CURRENCY SYMBOLS",
959                              "CURRENCYSYMBOLS");
960 
961         /**
962          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
963          * character block.
964          * <p>
965          * This block was previously known as "Combining Marks for Symbols".
966          * @since 1.2
967          */
968         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
969             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
970                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
971                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
972                              "COMBINING MARKS FOR SYMBOLS",
973                              "COMBININGMARKSFORSYMBOLS");
974 
975         /**
976          * Constant for the "Letterlike Symbols" Unicode character block.
977          * @since 1.2
978          */
979         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
980             new UnicodeBlock("LETTERLIKE_SYMBOLS",
981                              "LETTERLIKE SYMBOLS",
982                              "LETTERLIKESYMBOLS");
983 
984         /**
985          * Constant for the "Number Forms" Unicode character block.
986          * @since 1.2
987          */
988         public static final UnicodeBlock NUMBER_FORMS =
989             new UnicodeBlock("NUMBER_FORMS",
990                              "NUMBER FORMS",
991                              "NUMBERFORMS");
992 
993         /**
994          * Constant for the "Arrows" Unicode character block.
995          * @since 1.2
996          */
997         public static final UnicodeBlock ARROWS =
998             new UnicodeBlock("ARROWS");
999 
1000         /**
1001          * Constant for the "Mathematical Operators" Unicode character block.
1002          * @since 1.2
1003          */
1004         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
1005             new UnicodeBlock("MATHEMATICAL_OPERATORS",
1006                              "MATHEMATICAL OPERATORS",
1007                              "MATHEMATICALOPERATORS");
1008 
1009         /**
1010          * Constant for the "Miscellaneous Technical" Unicode character block.
1011          * @since 1.2
1012          */
1013         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
1014             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
1015                              "MISCELLANEOUS TECHNICAL",
1016                              "MISCELLANEOUSTECHNICAL");
1017 
1018         /**
1019          * Constant for the "Control Pictures" Unicode character block.
1020          * @since 1.2
1021          */
1022         public static final UnicodeBlock CONTROL_PICTURES =
1023             new UnicodeBlock("CONTROL_PICTURES",
1024                              "CONTROL PICTURES",
1025                              "CONTROLPICTURES");
1026 
1027         /**
1028          * Constant for the "Optical Character Recognition" Unicode character block.
1029          * @since 1.2
1030          */
1031         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1032             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1033                              "OPTICAL CHARACTER RECOGNITION",
1034                              "OPTICALCHARACTERRECOGNITION");
1035 
1036         /**
1037          * Constant for the "Enclosed Alphanumerics" Unicode character block.
1038          * @since 1.2
1039          */
1040         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1041             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1042                              "ENCLOSED ALPHANUMERICS",
1043                              "ENCLOSEDALPHANUMERICS");
1044 
1045         /**
1046          * Constant for the "Box Drawing" Unicode character block.
1047          * @since 1.2
1048          */
1049         public static final UnicodeBlock BOX_DRAWING =
1050             new UnicodeBlock("BOX_DRAWING",
1051                              "BOX DRAWING",
1052                              "BOXDRAWING");
1053 
1054         /**
1055          * Constant for the "Block Elements" Unicode character block.
1056          * @since 1.2
1057          */
1058         public static final UnicodeBlock BLOCK_ELEMENTS =
1059             new UnicodeBlock("BLOCK_ELEMENTS",
1060                              "BLOCK ELEMENTS",
1061                              "BLOCKELEMENTS");
1062 
1063         /**
1064          * Constant for the "Geometric Shapes" Unicode character block.
1065          * @since 1.2
1066          */
1067         public static final UnicodeBlock GEOMETRIC_SHAPES =
1068             new UnicodeBlock("GEOMETRIC_SHAPES",
1069                              "GEOMETRIC SHAPES",
1070                              "GEOMETRICSHAPES");
1071 
1072         /**
1073          * Constant for the "Miscellaneous Symbols" Unicode character block.
1074          * @since 1.2
1075          */
1076         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1077             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1078                              "MISCELLANEOUS SYMBOLS",
1079                              "MISCELLANEOUSSYMBOLS");
1080 
1081         /**
1082          * Constant for the "Dingbats" Unicode character block.
1083          * @since 1.2
1084          */
1085         public static final UnicodeBlock DINGBATS =
1086             new UnicodeBlock("DINGBATS");
1087 
1088         /**
1089          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1090          * @since 1.2
1091          */
1092         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1093             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1094                              "CJK SYMBOLS AND PUNCTUATION",
1095                              "CJKSYMBOLSANDPUNCTUATION");
1096 
1097         /**
1098          * Constant for the "Hiragana" Unicode character block.
1099          * @since 1.2
1100          */
1101         public static final UnicodeBlock HIRAGANA =
1102             new UnicodeBlock("HIRAGANA");
1103 
1104         /**
1105          * Constant for the "Katakana" Unicode character block.
1106          * @since 1.2
1107          */
1108         public static final UnicodeBlock KATAKANA =
1109             new UnicodeBlock("KATAKANA");
1110 
1111         /**
1112          * Constant for the "Bopomofo" Unicode character block.
1113          * @since 1.2
1114          */
1115         public static final UnicodeBlock BOPOMOFO =
1116             new UnicodeBlock("BOPOMOFO");
1117 
1118         /**
1119          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1120          * @since 1.2
1121          */
1122         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1123             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1124                              "HANGUL COMPATIBILITY JAMO",
1125                              "HANGULCOMPATIBILITYJAMO");
1126 
1127         /**
1128          * Constant for the "Kanbun" Unicode character block.
1129          * @since 1.2
1130          */
1131         public static final UnicodeBlock KANBUN =
1132             new UnicodeBlock("KANBUN");
1133 
1134         /**
1135          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1136          * @since 1.2
1137          */
1138         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1139             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1140                              "ENCLOSED CJK LETTERS AND MONTHS",
1141                              "ENCLOSEDCJKLETTERSANDMONTHS");
1142 
1143         /**
1144          * Constant for the "CJK Compatibility" Unicode character block.
1145          * @since 1.2
1146          */
1147         public static final UnicodeBlock CJK_COMPATIBILITY =
1148             new UnicodeBlock("CJK_COMPATIBILITY",
1149                              "CJK COMPATIBILITY",
1150                              "CJKCOMPATIBILITY");
1151 
1152         /**
1153          * Constant for the "CJK Unified Ideographs" Unicode character block.
1154          * @since 1.2
1155          */
1156         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1157             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1158                              "CJK UNIFIED IDEOGRAPHS",
1159                              "CJKUNIFIEDIDEOGRAPHS");
1160 
1161         /**
1162          * Constant for the "Hangul Syllables" Unicode character block.
1163          * @since 1.2
1164          */
1165         public static final UnicodeBlock HANGUL_SYLLABLES =
1166             new UnicodeBlock("HANGUL_SYLLABLES",
1167                              "HANGUL SYLLABLES",
1168                              "HANGULSYLLABLES");
1169 
1170         /**
1171          * Constant for the "Private Use Area" Unicode character block.
1172          * @since 1.2
1173          */
1174         public static final UnicodeBlock PRIVATE_USE_AREA =
1175             new UnicodeBlock("PRIVATE_USE_AREA",
1176                              "PRIVATE USE AREA",
1177                              "PRIVATEUSEAREA");
1178 
1179         /**
1180          * Constant for the "CJK Compatibility Ideographs" Unicode character
1181          * block.
1182          * @since 1.2
1183          */
1184         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1185             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1186                              "CJK COMPATIBILITY IDEOGRAPHS",
1187                              "CJKCOMPATIBILITYIDEOGRAPHS");
1188 
1189         /**
1190          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1191          * @since 1.2
1192          */
1193         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1194             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1195                              "ALPHABETIC PRESENTATION FORMS",
1196                              "ALPHABETICPRESENTATIONFORMS");
1197 
1198         /**
1199          * Constant for the "Arabic Presentation Forms-A" Unicode character
1200          * block.
1201          * @since 1.2
1202          */
1203         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1204             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1205                              "ARABIC PRESENTATION FORMS-A",
1206                              "ARABICPRESENTATIONFORMS-A");
1207 
1208         /**
1209          * Constant for the "Combining Half Marks" Unicode character block.
1210          * @since 1.2
1211          */
1212         public static final UnicodeBlock COMBINING_HALF_MARKS =
1213             new UnicodeBlock("COMBINING_HALF_MARKS",
1214                              "COMBINING HALF MARKS",
1215                              "COMBININGHALFMARKS");
1216 
1217         /**
1218          * Constant for the "CJK Compatibility Forms" Unicode character block.
1219          * @since 1.2
1220          */
1221         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1222             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1223                              "CJK COMPATIBILITY FORMS",
1224                              "CJKCOMPATIBILITYFORMS");
1225 
1226         /**
1227          * Constant for the "Small Form Variants" Unicode character block.
1228          * @since 1.2
1229          */
1230         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1231             new UnicodeBlock("SMALL_FORM_VARIANTS",
1232                              "SMALL FORM VARIANTS",
1233                              "SMALLFORMVARIANTS");
1234 
1235         /**
1236          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1237          * @since 1.2
1238          */
1239         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1240             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1241                              "ARABIC PRESENTATION FORMS-B",
1242                              "ARABICPRESENTATIONFORMS-B");
1243 
1244         /**
1245          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1246          * block.
1247          * @since 1.2
1248          */
1249         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1250             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1251                              "HALFWIDTH AND FULLWIDTH FORMS",
1252                              "HALFWIDTHANDFULLWIDTHFORMS");
1253 
1254         /**
1255          * Constant for the "Specials" Unicode character block.
1256          * @since 1.2
1257          */
1258         public static final UnicodeBlock SPECIALS =
1259             new UnicodeBlock("SPECIALS");
1260 
1261         /**
1262          * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1263          *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1264          *             {@link #LOW_SURROGATES}. These new constants match
1265          *             the block definitions of the Unicode Standard.
1266          *             The {@link #of(char)} and {@link #of(int)} methods
1267          *             return the new constants, not SURROGATES_AREA.
1268          */
1269         @Deprecated
1270         public static final UnicodeBlock SURROGATES_AREA =
1271             // Android-changed: ICU consistency: Don't map deprecated SURROGATES_AREA. b/26140229
1272             // new UnicodeBlock("SURROGATES_AREA");
1273             new UnicodeBlock("SURROGATES_AREA", false);
1274 
1275         /**
1276          * Constant for the "Syriac" Unicode character block.
1277          * @since 1.4
1278          */
1279         public static final UnicodeBlock SYRIAC =
1280             new UnicodeBlock("SYRIAC");
1281 
1282         /**
1283          * Constant for the "Thaana" Unicode character block.
1284          * @since 1.4
1285          */
1286         public static final UnicodeBlock THAANA =
1287             new UnicodeBlock("THAANA");
1288 
1289         /**
1290          * Constant for the "Sinhala" Unicode character block.
1291          * @since 1.4
1292          */
1293         public static final UnicodeBlock SINHALA =
1294             new UnicodeBlock("SINHALA");
1295 
1296         /**
1297          * Constant for the "Myanmar" Unicode character block.
1298          * @since 1.4
1299          */
1300         public static final UnicodeBlock MYANMAR =
1301             new UnicodeBlock("MYANMAR");
1302 
1303         /**
1304          * Constant for the "Ethiopic" Unicode character block.
1305          * @since 1.4
1306          */
1307         public static final UnicodeBlock ETHIOPIC =
1308             new UnicodeBlock("ETHIOPIC");
1309 
1310         /**
1311          * Constant for the "Cherokee" Unicode character block.
1312          * @since 1.4
1313          */
1314         public static final UnicodeBlock CHEROKEE =
1315             new UnicodeBlock("CHEROKEE");
1316 
1317         /**
1318          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1319          * @since 1.4
1320          */
1321         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1322             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1323                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1324                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
1325 
1326         /**
1327          * Constant for the "Ogham" Unicode character block.
1328          * @since 1.4
1329          */
1330         public static final UnicodeBlock OGHAM =
1331             new UnicodeBlock("OGHAM");
1332 
1333         /**
1334          * Constant for the "Runic" Unicode character block.
1335          * @since 1.4
1336          */
1337         public static final UnicodeBlock RUNIC =
1338             new UnicodeBlock("RUNIC");
1339 
1340         /**
1341          * Constant for the "Khmer" Unicode character block.
1342          * @since 1.4
1343          */
1344         public static final UnicodeBlock KHMER =
1345             new UnicodeBlock("KHMER");
1346 
1347         /**
1348          * Constant for the "Mongolian" Unicode character block.
1349          * @since 1.4
1350          */
1351         public static final UnicodeBlock MONGOLIAN =
1352             new UnicodeBlock("MONGOLIAN");
1353 
1354         /**
1355          * Constant for the "Braille Patterns" Unicode character block.
1356          * @since 1.4
1357          */
1358         public static final UnicodeBlock BRAILLE_PATTERNS =
1359             new UnicodeBlock("BRAILLE_PATTERNS",
1360                              "BRAILLE PATTERNS",
1361                              "BRAILLEPATTERNS");
1362 
1363         /**
1364          * Constant for the "CJK Radicals Supplement" Unicode character block.
1365          * @since 1.4
1366          */
1367         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1368             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1369                              "CJK RADICALS SUPPLEMENT",
1370                              "CJKRADICALSSUPPLEMENT");
1371 
1372         /**
1373          * Constant for the "Kangxi Radicals" Unicode character block.
1374          * @since 1.4
1375          */
1376         public static final UnicodeBlock KANGXI_RADICALS =
1377             new UnicodeBlock("KANGXI_RADICALS",
1378                              "KANGXI RADICALS",
1379                              "KANGXIRADICALS");
1380 
1381         /**
1382          * Constant for the "Ideographic Description Characters" Unicode character block.
1383          * @since 1.4
1384          */
1385         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1386             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1387                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1388                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1389 
1390         /**
1391          * Constant for the "Bopomofo Extended" Unicode character block.
1392          * @since 1.4
1393          */
1394         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1395             new UnicodeBlock("BOPOMOFO_EXTENDED",
1396                              "BOPOMOFO EXTENDED",
1397                              "BOPOMOFOEXTENDED");
1398 
1399         /**
1400          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1401          * @since 1.4
1402          */
1403         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1404             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1405                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1406                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1407 
1408         /**
1409          * Constant for the "Yi Syllables" Unicode character block.
1410          * @since 1.4
1411          */
1412         public static final UnicodeBlock YI_SYLLABLES =
1413             new UnicodeBlock("YI_SYLLABLES",
1414                              "YI SYLLABLES",
1415                              "YISYLLABLES");
1416 
1417         /**
1418          * Constant for the "Yi Radicals" Unicode character block.
1419          * @since 1.4
1420          */
1421         public static final UnicodeBlock YI_RADICALS =
1422             new UnicodeBlock("YI_RADICALS",
1423                              "YI RADICALS",
1424                              "YIRADICALS");
1425 
1426         /**
1427          * Constant for the "Cyrillic Supplementary" Unicode character block.
1428          * @since 1.5
1429          */
1430         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1431             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1432                              "CYRILLIC SUPPLEMENTARY",
1433                              "CYRILLICSUPPLEMENTARY",
1434                              "CYRILLIC SUPPLEMENT",
1435                              "CYRILLICSUPPLEMENT");
1436 
1437         /**
1438          * Constant for the "Tagalog" Unicode character block.
1439          * @since 1.5
1440          */
1441         public static final UnicodeBlock TAGALOG =
1442             new UnicodeBlock("TAGALOG");
1443 
1444         /**
1445          * Constant for the "Hanunoo" Unicode character block.
1446          * @since 1.5
1447          */
1448         public static final UnicodeBlock HANUNOO =
1449             new UnicodeBlock("HANUNOO");
1450 
1451         /**
1452          * Constant for the "Buhid" Unicode character block.
1453          * @since 1.5
1454          */
1455         public static final UnicodeBlock BUHID =
1456             new UnicodeBlock("BUHID");
1457 
1458         /**
1459          * Constant for the "Tagbanwa" Unicode character block.
1460          * @since 1.5
1461          */
1462         public static final UnicodeBlock TAGBANWA =
1463             new UnicodeBlock("TAGBANWA");
1464 
1465         /**
1466          * Constant for the "Limbu" Unicode character block.
1467          * @since 1.5
1468          */
1469         public static final UnicodeBlock LIMBU =
1470             new UnicodeBlock("LIMBU");
1471 
1472         /**
1473          * Constant for the "Tai Le" Unicode character block.
1474          * @since 1.5
1475          */
1476         public static final UnicodeBlock TAI_LE =
1477             new UnicodeBlock("TAI_LE",
1478                              "TAI LE",
1479                              "TAILE");
1480 
1481         /**
1482          * Constant for the "Khmer Symbols" Unicode character block.
1483          * @since 1.5
1484          */
1485         public static final UnicodeBlock KHMER_SYMBOLS =
1486             new UnicodeBlock("KHMER_SYMBOLS",
1487                              "KHMER SYMBOLS",
1488                              "KHMERSYMBOLS");
1489 
1490         /**
1491          * Constant for the "Phonetic Extensions" Unicode character block.
1492          * @since 1.5
1493          */
1494         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1495             new UnicodeBlock("PHONETIC_EXTENSIONS",
1496                              "PHONETIC EXTENSIONS",
1497                              "PHONETICEXTENSIONS");
1498 
1499         /**
1500          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1501          * @since 1.5
1502          */
1503         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1504             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1505                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1506                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1507 
1508         /**
1509          * Constant for the "Supplemental Arrows-A" Unicode character block.
1510          * @since 1.5
1511          */
1512         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1513             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1514                              "SUPPLEMENTAL ARROWS-A",
1515                              "SUPPLEMENTALARROWS-A");
1516 
1517         /**
1518          * Constant for the "Supplemental Arrows-B" Unicode character block.
1519          * @since 1.5
1520          */
1521         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1522             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1523                              "SUPPLEMENTAL ARROWS-B",
1524                              "SUPPLEMENTALARROWS-B");
1525 
1526         /**
1527          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1528          * character block.
1529          * @since 1.5
1530          */
1531         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1532             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1533                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1534                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1535 
1536         /**
1537          * Constant for the "Supplemental Mathematical Operators" Unicode
1538          * character block.
1539          * @since 1.5
1540          */
1541         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1542             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1543                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1544                              "SUPPLEMENTALMATHEMATICALOPERATORS");
1545 
1546         /**
1547          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1548          * block.
1549          * @since 1.5
1550          */
1551         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1552             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1553                              "MISCELLANEOUS SYMBOLS AND ARROWS",
1554                              "MISCELLANEOUSSYMBOLSANDARROWS");
1555 
1556         /**
1557          * Constant for the "Katakana Phonetic Extensions" Unicode character
1558          * block.
1559          * @since 1.5
1560          */
1561         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1562             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1563                              "KATAKANA PHONETIC EXTENSIONS",
1564                              "KATAKANAPHONETICEXTENSIONS");
1565 
1566         /**
1567          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1568          * @since 1.5
1569          */
1570         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1571             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1572                              "YIJING HEXAGRAM SYMBOLS",
1573                              "YIJINGHEXAGRAMSYMBOLS");
1574 
1575         /**
1576          * Constant for the "Variation Selectors" Unicode character block.
1577          * @since 1.5
1578          */
1579         public static final UnicodeBlock VARIATION_SELECTORS =
1580             new UnicodeBlock("VARIATION_SELECTORS",
1581                              "VARIATION SELECTORS",
1582                              "VARIATIONSELECTORS");
1583 
1584         /**
1585          * Constant for the "Linear B Syllabary" Unicode character block.
1586          * @since 1.5
1587          */
1588         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1589             new UnicodeBlock("LINEAR_B_SYLLABARY",
1590                              "LINEAR B SYLLABARY",
1591                              "LINEARBSYLLABARY");
1592 
1593         /**
1594          * Constant for the "Linear B Ideograms" Unicode character block.
1595          * @since 1.5
1596          */
1597         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1598             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1599                              "LINEAR B IDEOGRAMS",
1600                              "LINEARBIDEOGRAMS");
1601 
1602         /**
1603          * Constant for the "Aegean Numbers" Unicode character block.
1604          * @since 1.5
1605          */
1606         public static final UnicodeBlock AEGEAN_NUMBERS =
1607             new UnicodeBlock("AEGEAN_NUMBERS",
1608                              "AEGEAN NUMBERS",
1609                              "AEGEANNUMBERS");
1610 
1611         /**
1612          * Constant for the "Old Italic" Unicode character block.
1613          * @since 1.5
1614          */
1615         public static final UnicodeBlock OLD_ITALIC =
1616             new UnicodeBlock("OLD_ITALIC",
1617                              "OLD ITALIC",
1618                              "OLDITALIC");
1619 
1620         /**
1621          * Constant for the "Gothic" Unicode character block.
1622          * @since 1.5
1623          */
1624         public static final UnicodeBlock GOTHIC =
1625             new UnicodeBlock("GOTHIC");
1626 
1627         /**
1628          * Constant for the "Ugaritic" Unicode character block.
1629          * @since 1.5
1630          */
1631         public static final UnicodeBlock UGARITIC =
1632             new UnicodeBlock("UGARITIC");
1633 
1634         /**
1635          * Constant for the "Deseret" Unicode character block.
1636          * @since 1.5
1637          */
1638         public static final UnicodeBlock DESERET =
1639             new UnicodeBlock("DESERET");
1640 
1641         /**
1642          * Constant for the "Shavian" Unicode character block.
1643          * @since 1.5
1644          */
1645         public static final UnicodeBlock SHAVIAN =
1646             new UnicodeBlock("SHAVIAN");
1647 
1648         /**
1649          * Constant for the "Osmanya" Unicode character block.
1650          * @since 1.5
1651          */
1652         public static final UnicodeBlock OSMANYA =
1653             new UnicodeBlock("OSMANYA");
1654 
1655         /**
1656          * Constant for the "Cypriot Syllabary" Unicode character block.
1657          * @since 1.5
1658          */
1659         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1660             new UnicodeBlock("CYPRIOT_SYLLABARY",
1661                              "CYPRIOT SYLLABARY",
1662                              "CYPRIOTSYLLABARY");
1663 
1664         /**
1665          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1666          * @since 1.5
1667          */
1668         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1669             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1670                              "BYZANTINE MUSICAL SYMBOLS",
1671                              "BYZANTINEMUSICALSYMBOLS");
1672 
1673         /**
1674          * Constant for the "Musical Symbols" Unicode character block.
1675          * @since 1.5
1676          */
1677         public static final UnicodeBlock MUSICAL_SYMBOLS =
1678             new UnicodeBlock("MUSICAL_SYMBOLS",
1679                              "MUSICAL SYMBOLS",
1680                              "MUSICALSYMBOLS");
1681 
1682         /**
1683          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1684          * @since 1.5
1685          */
1686         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1687             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1688                              "TAI XUAN JING SYMBOLS",
1689                              "TAIXUANJINGSYMBOLS");
1690 
1691         /**
1692          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1693          * character block.
1694          * @since 1.5
1695          */
1696         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1697             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1698                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1699                              "MATHEMATICALALPHANUMERICSYMBOLS");
1700 
1701         /**
1702          * Constant for the "CJK Unified Ideographs Extension B" Unicode
1703          * character block.
1704          * @since 1.5
1705          */
1706         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1707             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1708                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1709                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1710 
1711         /**
1712          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1713          * @since 1.5
1714          */
1715         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1716             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1717                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1718                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1719 
1720         /**
1721          * Constant for the "Tags" Unicode character block.
1722          * @since 1.5
1723          */
1724         public static final UnicodeBlock TAGS =
1725             new UnicodeBlock("TAGS");
1726 
1727         /**
1728          * Constant for the "Variation Selectors Supplement" Unicode character
1729          * block.
1730          * @since 1.5
1731          */
1732         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1733             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1734                              "VARIATION SELECTORS SUPPLEMENT",
1735                              "VARIATIONSELECTORSSUPPLEMENT");
1736 
1737         /**
1738          * Constant for the "Supplementary Private Use Area-A" Unicode character
1739          * block.
1740          * @since 1.5
1741          */
1742         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1743             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1744                              "SUPPLEMENTARY PRIVATE USE AREA-A",
1745                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
1746 
1747         /**
1748          * Constant for the "Supplementary Private Use Area-B" Unicode character
1749          * block.
1750          * @since 1.5
1751          */
1752         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1753             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1754                              "SUPPLEMENTARY PRIVATE USE AREA-B",
1755                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
1756 
1757         /**
1758          * Constant for the "High Surrogates" Unicode character block.
1759          * This block represents codepoint values in the high surrogate
1760          * range: U+D800 through U+DB7F
1761          *
1762          * @since 1.5
1763          */
1764         public static final UnicodeBlock HIGH_SURROGATES =
1765             new UnicodeBlock("HIGH_SURROGATES",
1766                              "HIGH SURROGATES",
1767                              "HIGHSURROGATES");
1768 
1769         /**
1770          * Constant for the "High Private Use Surrogates" Unicode character
1771          * block.
1772          * This block represents codepoint values in the private use high
1773          * surrogate range: U+DB80 through U+DBFF
1774          *
1775          * @since 1.5
1776          */
1777         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1778             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1779                              "HIGH PRIVATE USE SURROGATES",
1780                              "HIGHPRIVATEUSESURROGATES");
1781 
1782         /**
1783          * Constant for the "Low Surrogates" Unicode character block.
1784          * This block represents codepoint values in the low surrogate
1785          * range: U+DC00 through U+DFFF
1786          *
1787          * @since 1.5
1788          */
1789         public static final UnicodeBlock LOW_SURROGATES =
1790             new UnicodeBlock("LOW_SURROGATES",
1791                              "LOW SURROGATES",
1792                              "LOWSURROGATES");
1793 
1794         /**
1795          * Constant for the "Arabic Supplement" Unicode character block.
1796          * @since 1.7
1797          */
1798         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1799             new UnicodeBlock("ARABIC_SUPPLEMENT",
1800                              "ARABIC SUPPLEMENT",
1801                              "ARABICSUPPLEMENT");
1802 
1803         /**
1804          * Constant for the "NKo" Unicode character block.
1805          * @since 1.7
1806          */
1807         public static final UnicodeBlock NKO =
1808             new UnicodeBlock("NKO");
1809 
1810         /**
1811          * Constant for the "Samaritan" Unicode character block.
1812          * @since 1.7
1813          */
1814         public static final UnicodeBlock SAMARITAN =
1815             new UnicodeBlock("SAMARITAN");
1816 
1817         /**
1818          * Constant for the "Mandaic" Unicode character block.
1819          * @since 1.7
1820          */
1821         public static final UnicodeBlock MANDAIC =
1822             new UnicodeBlock("MANDAIC");
1823 
1824         /**
1825          * Constant for the "Ethiopic Supplement" Unicode character block.
1826          * @since 1.7
1827          */
1828         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1829             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1830                              "ETHIOPIC SUPPLEMENT",
1831                              "ETHIOPICSUPPLEMENT");
1832 
1833         /**
1834          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1835          * Unicode character block.
1836          * @since 1.7
1837          */
1838         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1839             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1840                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1841                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1842 
1843         /**
1844          * Constant for the "New Tai Lue" Unicode character block.
1845          * @since 1.7
1846          */
1847         public static final UnicodeBlock NEW_TAI_LUE =
1848             new UnicodeBlock("NEW_TAI_LUE",
1849                              "NEW TAI LUE",
1850                              "NEWTAILUE");
1851 
1852         /**
1853          * Constant for the "Buginese" Unicode character block.
1854          * @since 1.7
1855          */
1856         public static final UnicodeBlock BUGINESE =
1857             new UnicodeBlock("BUGINESE");
1858 
1859         /**
1860          * Constant for the "Tai Tham" Unicode character block.
1861          * @since 1.7
1862          */
1863         public static final UnicodeBlock TAI_THAM =
1864             new UnicodeBlock("TAI_THAM",
1865                              "TAI THAM",
1866                              "TAITHAM");
1867 
1868         /**
1869          * Constant for the "Balinese" Unicode character block.
1870          * @since 1.7
1871          */
1872         public static final UnicodeBlock BALINESE =
1873             new UnicodeBlock("BALINESE");
1874 
1875         /**
1876          * Constant for the "Sundanese" Unicode character block.
1877          * @since 1.7
1878          */
1879         public static final UnicodeBlock SUNDANESE =
1880             new UnicodeBlock("SUNDANESE");
1881 
1882         /**
1883          * Constant for the "Batak" Unicode character block.
1884          * @since 1.7
1885          */
1886         public static final UnicodeBlock BATAK =
1887             new UnicodeBlock("BATAK");
1888 
1889         /**
1890          * Constant for the "Lepcha" Unicode character block.
1891          * @since 1.7
1892          */
1893         public static final UnicodeBlock LEPCHA =
1894             new UnicodeBlock("LEPCHA");
1895 
1896         /**
1897          * Constant for the "Ol Chiki" Unicode character block.
1898          * @since 1.7
1899          */
1900         public static final UnicodeBlock OL_CHIKI =
1901             new UnicodeBlock("OL_CHIKI",
1902                              "OL CHIKI",
1903                              "OLCHIKI");
1904 
1905         /**
1906          * Constant for the "Vedic Extensions" Unicode character block.
1907          * @since 1.7
1908          */
1909         public static final UnicodeBlock VEDIC_EXTENSIONS =
1910             new UnicodeBlock("VEDIC_EXTENSIONS",
1911                              "VEDIC EXTENSIONS",
1912                              "VEDICEXTENSIONS");
1913 
1914         /**
1915          * Constant for the "Phonetic Extensions Supplement" Unicode character
1916          * block.
1917          * @since 1.7
1918          */
1919         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1920             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1921                              "PHONETIC EXTENSIONS SUPPLEMENT",
1922                              "PHONETICEXTENSIONSSUPPLEMENT");
1923 
1924         /**
1925          * Constant for the "Combining Diacritical Marks Supplement" Unicode
1926          * character block.
1927          * @since 1.7
1928          */
1929         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1930             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1931                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1932                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
1933 
1934         /**
1935          * Constant for the "Glagolitic" Unicode character block.
1936          * @since 1.7
1937          */
1938         public static final UnicodeBlock GLAGOLITIC =
1939             new UnicodeBlock("GLAGOLITIC");
1940 
1941         /**
1942          * Constant for the "Latin Extended-C" Unicode character block.
1943          * @since 1.7
1944          */
1945         public static final UnicodeBlock LATIN_EXTENDED_C =
1946             new UnicodeBlock("LATIN_EXTENDED_C",
1947                              "LATIN EXTENDED-C",
1948                              "LATINEXTENDED-C");
1949 
1950         /**
1951          * Constant for the "Coptic" Unicode character block.
1952          * @since 1.7
1953          */
1954         public static final UnicodeBlock COPTIC =
1955             new UnicodeBlock("COPTIC");
1956 
1957         /**
1958          * Constant for the "Georgian Supplement" Unicode character block.
1959          * @since 1.7
1960          */
1961         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1962             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1963                              "GEORGIAN SUPPLEMENT",
1964                              "GEORGIANSUPPLEMENT");
1965 
1966         /**
1967          * Constant for the "Tifinagh" Unicode character block.
1968          * @since 1.7
1969          */
1970         public static final UnicodeBlock TIFINAGH =
1971             new UnicodeBlock("TIFINAGH");
1972 
1973         /**
1974          * Constant for the "Ethiopic Extended" Unicode character block.
1975          * @since 1.7
1976          */
1977         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1978             new UnicodeBlock("ETHIOPIC_EXTENDED",
1979                              "ETHIOPIC EXTENDED",
1980                              "ETHIOPICEXTENDED");
1981 
1982         /**
1983          * Constant for the "Cyrillic Extended-A" Unicode character block.
1984          * @since 1.7
1985          */
1986         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1987             new UnicodeBlock("CYRILLIC_EXTENDED_A",
1988                              "CYRILLIC EXTENDED-A",
1989                              "CYRILLICEXTENDED-A");
1990 
1991         /**
1992          * Constant for the "Supplemental Punctuation" Unicode character block.
1993          * @since 1.7
1994          */
1995         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1996             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1997                              "SUPPLEMENTAL PUNCTUATION",
1998                              "SUPPLEMENTALPUNCTUATION");
1999 
2000         /**
2001          * Constant for the "CJK Strokes" Unicode character block.
2002          * @since 1.7
2003          */
2004         public static final UnicodeBlock CJK_STROKES =
2005             new UnicodeBlock("CJK_STROKES",
2006                              "CJK STROKES",
2007                              "CJKSTROKES");
2008 
2009         /**
2010          * Constant for the "Lisu" Unicode character block.
2011          * @since 1.7
2012          */
2013         public static final UnicodeBlock LISU =
2014             new UnicodeBlock("LISU");
2015 
2016         /**
2017          * Constant for the "Vai" Unicode character block.
2018          * @since 1.7
2019          */
2020         public static final UnicodeBlock VAI =
2021             new UnicodeBlock("VAI");
2022 
2023         /**
2024          * Constant for the "Cyrillic Extended-B" Unicode character block.
2025          * @since 1.7
2026          */
2027         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2028             new UnicodeBlock("CYRILLIC_EXTENDED_B",
2029                              "CYRILLIC EXTENDED-B",
2030                              "CYRILLICEXTENDED-B");
2031 
2032         /**
2033          * Constant for the "Bamum" Unicode character block.
2034          * @since 1.7
2035          */
2036         public static final UnicodeBlock BAMUM =
2037             new UnicodeBlock("BAMUM");
2038 
2039         /**
2040          * Constant for the "Modifier Tone Letters" Unicode character block.
2041          * @since 1.7
2042          */
2043         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2044             new UnicodeBlock("MODIFIER_TONE_LETTERS",
2045                              "MODIFIER TONE LETTERS",
2046                              "MODIFIERTONELETTERS");
2047 
2048         /**
2049          * Constant for the "Latin Extended-D" Unicode character block.
2050          * @since 1.7
2051          */
2052         public static final UnicodeBlock LATIN_EXTENDED_D =
2053             new UnicodeBlock("LATIN_EXTENDED_D",
2054                              "LATIN EXTENDED-D",
2055                              "LATINEXTENDED-D");
2056 
2057         /**
2058          * Constant for the "Syloti Nagri" Unicode character block.
2059          * @since 1.7
2060          */
2061         public static final UnicodeBlock SYLOTI_NAGRI =
2062             new UnicodeBlock("SYLOTI_NAGRI",
2063                              "SYLOTI NAGRI",
2064                              "SYLOTINAGRI");
2065 
2066         /**
2067          * Constant for the "Common Indic Number Forms" Unicode character block.
2068          * @since 1.7
2069          */
2070         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2071             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2072                              "COMMON INDIC NUMBER FORMS",
2073                              "COMMONINDICNUMBERFORMS");
2074 
2075         /**
2076          * Constant for the "Phags-pa" Unicode character block.
2077          * @since 1.7
2078          */
2079         public static final UnicodeBlock PHAGS_PA =
2080             new UnicodeBlock("PHAGS_PA",
2081                              "PHAGS-PA");
2082 
2083         /**
2084          * Constant for the "Saurashtra" Unicode character block.
2085          * @since 1.7
2086          */
2087         public static final UnicodeBlock SAURASHTRA =
2088             new UnicodeBlock("SAURASHTRA");
2089 
2090         /**
2091          * Constant for the "Devanagari Extended" Unicode character block.
2092          * @since 1.7
2093          */
2094         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2095             new UnicodeBlock("DEVANAGARI_EXTENDED",
2096                              "DEVANAGARI EXTENDED",
2097                              "DEVANAGARIEXTENDED");
2098 
2099         /**
2100          * Constant for the "Kayah Li" Unicode character block.
2101          * @since 1.7
2102          */
2103         public static final UnicodeBlock KAYAH_LI =
2104             new UnicodeBlock("KAYAH_LI",
2105                              "KAYAH LI",
2106                              "KAYAHLI");
2107 
2108         /**
2109          * Constant for the "Rejang" Unicode character block.
2110          * @since 1.7
2111          */
2112         public static final UnicodeBlock REJANG =
2113             new UnicodeBlock("REJANG");
2114 
2115         /**
2116          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2117          * @since 1.7
2118          */
2119         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2120             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2121                              "HANGUL JAMO EXTENDED-A",
2122                              "HANGULJAMOEXTENDED-A");
2123 
2124         /**
2125          * Constant for the "Javanese" Unicode character block.
2126          * @since 1.7
2127          */
2128         public static final UnicodeBlock JAVANESE =
2129             new UnicodeBlock("JAVANESE");
2130 
2131         /**
2132          * Constant for the "Cham" Unicode character block.
2133          * @since 1.7
2134          */
2135         public static final UnicodeBlock CHAM =
2136             new UnicodeBlock("CHAM");
2137 
2138         /**
2139          * Constant for the "Myanmar Extended-A" Unicode character block.
2140          * @since 1.7
2141          */
2142         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2143             new UnicodeBlock("MYANMAR_EXTENDED_A",
2144                              "MYANMAR EXTENDED-A",
2145                              "MYANMAREXTENDED-A");
2146 
2147         /**
2148          * Constant for the "Tai Viet" Unicode character block.
2149          * @since 1.7
2150          */
2151         public static final UnicodeBlock TAI_VIET =
2152             new UnicodeBlock("TAI_VIET",
2153                              "TAI VIET",
2154                              "TAIVIET");
2155 
2156         /**
2157          * Constant for the "Ethiopic Extended-A" Unicode character block.
2158          * @since 1.7
2159          */
2160         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2161             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2162                              "ETHIOPIC EXTENDED-A",
2163                              "ETHIOPICEXTENDED-A");
2164 
2165         /**
2166          * Constant for the "Meetei Mayek" Unicode character block.
2167          * @since 1.7
2168          */
2169         public static final UnicodeBlock MEETEI_MAYEK =
2170             new UnicodeBlock("MEETEI_MAYEK",
2171                              "MEETEI MAYEK",
2172                              "MEETEIMAYEK");
2173 
2174         /**
2175          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2176          * @since 1.7
2177          */
2178         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2179             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2180                              "HANGUL JAMO EXTENDED-B",
2181                              "HANGULJAMOEXTENDED-B");
2182 
2183         /**
2184          * Constant for the "Vertical Forms" Unicode character block.
2185          * @since 1.7
2186          */
2187         public static final UnicodeBlock VERTICAL_FORMS =
2188             new UnicodeBlock("VERTICAL_FORMS",
2189                              "VERTICAL FORMS",
2190                              "VERTICALFORMS");
2191 
2192         /**
2193          * Constant for the "Ancient Greek Numbers" Unicode character block.
2194          * @since 1.7
2195          */
2196         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2197             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2198                              "ANCIENT GREEK NUMBERS",
2199                              "ANCIENTGREEKNUMBERS");
2200 
2201         /**
2202          * Constant for the "Ancient Symbols" Unicode character block.
2203          * @since 1.7
2204          */
2205         public static final UnicodeBlock ANCIENT_SYMBOLS =
2206             new UnicodeBlock("ANCIENT_SYMBOLS",
2207                              "ANCIENT SYMBOLS",
2208                              "ANCIENTSYMBOLS");
2209 
2210         /**
2211          * Constant for the "Phaistos Disc" Unicode character block.
2212          * @since 1.7
2213          */
2214         public static final UnicodeBlock PHAISTOS_DISC =
2215             new UnicodeBlock("PHAISTOS_DISC",
2216                              "PHAISTOS DISC",
2217                              "PHAISTOSDISC");
2218 
2219         /**
2220          * Constant for the "Lycian" Unicode character block.
2221          * @since 1.7
2222          */
2223         public static final UnicodeBlock LYCIAN =
2224             new UnicodeBlock("LYCIAN");
2225 
2226         /**
2227          * Constant for the "Carian" Unicode character block.
2228          * @since 1.7
2229          */
2230         public static final UnicodeBlock CARIAN =
2231             new UnicodeBlock("CARIAN");
2232 
2233         /**
2234          * Constant for the "Old Persian" Unicode character block.
2235          * @since 1.7
2236          */
2237         public static final UnicodeBlock OLD_PERSIAN =
2238             new UnicodeBlock("OLD_PERSIAN",
2239                              "OLD PERSIAN",
2240                              "OLDPERSIAN");
2241 
2242         /**
2243          * Constant for the "Imperial Aramaic" Unicode character block.
2244          * @since 1.7
2245          */
2246         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2247             new UnicodeBlock("IMPERIAL_ARAMAIC",
2248                              "IMPERIAL ARAMAIC",
2249                              "IMPERIALARAMAIC");
2250 
2251         /**
2252          * Constant for the "Phoenician" Unicode character block.
2253          * @since 1.7
2254          */
2255         public static final UnicodeBlock PHOENICIAN =
2256             new UnicodeBlock("PHOENICIAN");
2257 
2258         /**
2259          * Constant for the "Lydian" Unicode character block.
2260          * @since 1.7
2261          */
2262         public static final UnicodeBlock LYDIAN =
2263             new UnicodeBlock("LYDIAN");
2264 
2265         /**
2266          * Constant for the "Kharoshthi" Unicode character block.
2267          * @since 1.7
2268          */
2269         public static final UnicodeBlock KHAROSHTHI =
2270             new UnicodeBlock("KHAROSHTHI");
2271 
2272         /**
2273          * Constant for the "Old South Arabian" Unicode character block.
2274          * @since 1.7
2275          */
2276         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2277             new UnicodeBlock("OLD_SOUTH_ARABIAN",
2278                              "OLD SOUTH ARABIAN",
2279                              "OLDSOUTHARABIAN");
2280 
2281         /**
2282          * Constant for the "Avestan" Unicode character block.
2283          * @since 1.7
2284          */
2285         public static final UnicodeBlock AVESTAN =
2286             new UnicodeBlock("AVESTAN");
2287 
2288         /**
2289          * Constant for the "Inscriptional Parthian" Unicode character block.
2290          * @since 1.7
2291          */
2292         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2293             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2294                              "INSCRIPTIONAL PARTHIAN",
2295                              "INSCRIPTIONALPARTHIAN");
2296 
2297         /**
2298          * Constant for the "Inscriptional Pahlavi" Unicode character block.
2299          * @since 1.7
2300          */
2301         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2302             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2303                              "INSCRIPTIONAL PAHLAVI",
2304                              "INSCRIPTIONALPAHLAVI");
2305 
2306         /**
2307          * Constant for the "Old Turkic" Unicode character block.
2308          * @since 1.7
2309          */
2310         public static final UnicodeBlock OLD_TURKIC =
2311             new UnicodeBlock("OLD_TURKIC",
2312                              "OLD TURKIC",
2313                              "OLDTURKIC");
2314 
2315         /**
2316          * Constant for the "Rumi Numeral Symbols" Unicode character block.
2317          * @since 1.7
2318          */
2319         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2320             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2321                              "RUMI NUMERAL SYMBOLS",
2322                              "RUMINUMERALSYMBOLS");
2323 
2324         /**
2325          * Constant for the "Brahmi" Unicode character block.
2326          * @since 1.7
2327          */
2328         public static final UnicodeBlock BRAHMI =
2329             new UnicodeBlock("BRAHMI");
2330 
2331         /**
2332          * Constant for the "Kaithi" Unicode character block.
2333          * @since 1.7
2334          */
2335         public static final UnicodeBlock KAITHI =
2336             new UnicodeBlock("KAITHI");
2337 
2338         /**
2339          * Constant for the "Cuneiform" Unicode character block.
2340          * @since 1.7
2341          */
2342         public static final UnicodeBlock CUNEIFORM =
2343             new UnicodeBlock("CUNEIFORM");
2344 
2345         /**
2346          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2347          * character block.
2348          * @since 1.7
2349          */
2350         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2351             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2352                              "CUNEIFORM NUMBERS AND PUNCTUATION",
2353                              "CUNEIFORMNUMBERSANDPUNCTUATION");
2354 
2355         /**
2356          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2357          * @since 1.7
2358          */
2359         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2360             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2361                              "EGYPTIAN HIEROGLYPHS",
2362                              "EGYPTIANHIEROGLYPHS");
2363 
2364         /**
2365          * Constant for the "Bamum Supplement" Unicode character block.
2366          * @since 1.7
2367          */
2368         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2369             new UnicodeBlock("BAMUM_SUPPLEMENT",
2370                              "BAMUM SUPPLEMENT",
2371                              "BAMUMSUPPLEMENT");
2372 
2373         /**
2374          * Constant for the "Kana Supplement" Unicode character block.
2375          * @since 1.7
2376          */
2377         public static final UnicodeBlock KANA_SUPPLEMENT =
2378             new UnicodeBlock("KANA_SUPPLEMENT",
2379                              "KANA SUPPLEMENT",
2380                              "KANASUPPLEMENT");
2381 
2382         /**
2383          * Constant for the "Ancient Greek Musical Notation" Unicode character
2384          * block.
2385          * @since 1.7
2386          */
2387         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2388             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2389                              "ANCIENT GREEK MUSICAL NOTATION",
2390                              "ANCIENTGREEKMUSICALNOTATION");
2391 
2392         /**
2393          * Constant for the "Counting Rod Numerals" Unicode character block.
2394          * @since 1.7
2395          */
2396         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2397             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2398                              "COUNTING ROD NUMERALS",
2399                              "COUNTINGRODNUMERALS");
2400 
2401         /**
2402          * Constant for the "Mahjong Tiles" Unicode character block.
2403          * @since 1.7
2404          */
2405         public static final UnicodeBlock MAHJONG_TILES =
2406             new UnicodeBlock("MAHJONG_TILES",
2407                              "MAHJONG TILES",
2408                              "MAHJONGTILES");
2409 
2410         /**
2411          * Constant for the "Domino Tiles" Unicode character block.
2412          * @since 1.7
2413          */
2414         public static final UnicodeBlock DOMINO_TILES =
2415             new UnicodeBlock("DOMINO_TILES",
2416                              "DOMINO TILES",
2417                              "DOMINOTILES");
2418 
2419         /**
2420          * Constant for the "Playing Cards" Unicode character block.
2421          * @since 1.7
2422          */
2423         public static final UnicodeBlock PLAYING_CARDS =
2424             new UnicodeBlock("PLAYING_CARDS",
2425                              "PLAYING CARDS",
2426                              "PLAYINGCARDS");
2427 
2428         /**
2429          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2430          * block.
2431          * @since 1.7
2432          */
2433         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2434             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2435                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2436                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
2437 
2438         /**
2439          * Constant for the "Enclosed Ideographic Supplement" Unicode character
2440          * block.
2441          * @since 1.7
2442          */
2443         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2444             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2445                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2446                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2447 
2448         /**
2449          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2450          * character block.
2451          * @since 1.7
2452          */
2453         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2454             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2455                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2456                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2457 
2458         /**
2459          * Constant for the "Emoticons" Unicode character block.
2460          * @since 1.7
2461          */
2462         public static final UnicodeBlock EMOTICONS =
2463             new UnicodeBlock("EMOTICONS");
2464 
2465         /**
2466          * Constant for the "Transport And Map Symbols" Unicode character block.
2467          * @since 1.7
2468          */
2469         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2470             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2471                              "TRANSPORT AND MAP SYMBOLS",
2472                              "TRANSPORTANDMAPSYMBOLS");
2473 
2474         /**
2475          * Constant for the "Alchemical Symbols" Unicode character block.
2476          * @since 1.7
2477          */
2478         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2479             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2480                              "ALCHEMICAL SYMBOLS",
2481                              "ALCHEMICALSYMBOLS");
2482 
2483         /**
2484          * Constant for the "CJK Unified Ideographs Extension C" Unicode
2485          * character block.
2486          * @since 1.7
2487          */
2488         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2489             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2490                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2491                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2492 
2493         /**
2494          * Constant for the "CJK Unified Ideographs Extension D" Unicode
2495          * character block.
2496          * @since 1.7
2497          */
2498         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2499             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2500                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2501                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2502 
2503         /**
2504          * Constant for the "Arabic Extended-A" Unicode character block.
2505          * @since 1.8
2506          */
2507         public static final UnicodeBlock ARABIC_EXTENDED_A =
2508             new UnicodeBlock("ARABIC_EXTENDED_A",
2509                              "ARABIC EXTENDED-A",
2510                              "ARABICEXTENDED-A");
2511 
2512         /**
2513          * Constant for the "Sundanese Supplement" Unicode character block.
2514          * @since 1.8
2515          */
2516         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2517             new UnicodeBlock("SUNDANESE_SUPPLEMENT",
2518                              "SUNDANESE SUPPLEMENT",
2519                              "SUNDANESESUPPLEMENT");
2520 
2521         /**
2522          * Constant for the "Meetei Mayek Extensions" Unicode character block.
2523          * @since 1.8
2524          */
2525         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2526             new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS",
2527                              "MEETEI MAYEK EXTENSIONS",
2528                              "MEETEIMAYEKEXTENSIONS");
2529 
2530         /**
2531          * Constant for the "Meroitic Hieroglyphs" Unicode character block.
2532          * @since 1.8
2533          */
2534         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2535             new UnicodeBlock("MEROITIC_HIEROGLYPHS",
2536                              "MEROITIC HIEROGLYPHS",
2537                              "MEROITICHIEROGLYPHS");
2538 
2539         /**
2540          * Constant for the "Meroitic Cursive" Unicode character block.
2541          * @since 1.8
2542          */
2543         public static final UnicodeBlock MEROITIC_CURSIVE =
2544             new UnicodeBlock("MEROITIC_CURSIVE",
2545                              "MEROITIC CURSIVE",
2546                              "MEROITICCURSIVE");
2547 
2548         /**
2549          * Constant for the "Sora Sompeng" Unicode character block.
2550          * @since 1.8
2551          */
2552         public static final UnicodeBlock SORA_SOMPENG =
2553             new UnicodeBlock("SORA_SOMPENG",
2554                              "SORA SOMPENG",
2555                              "SORASOMPENG");
2556 
2557         /**
2558          * Constant for the "Chakma" Unicode character block.
2559          * @since 1.8
2560          */
2561         public static final UnicodeBlock CHAKMA =
2562             new UnicodeBlock("CHAKMA");
2563 
2564         /**
2565          * Constant for the "Sharada" Unicode character block.
2566          * @since 1.8
2567          */
2568         public static final UnicodeBlock SHARADA =
2569             new UnicodeBlock("SHARADA");
2570 
2571         /**
2572          * Constant for the "Takri" Unicode character block.
2573          * @since 1.8
2574          */
2575         public static final UnicodeBlock TAKRI =
2576             new UnicodeBlock("TAKRI");
2577 
2578         /**
2579          * Constant for the "Miao" Unicode character block.
2580          * @since 1.8
2581          */
2582         public static final UnicodeBlock MIAO =
2583             new UnicodeBlock("MIAO");
2584 
2585         /**
2586          * Constant for the "Arabic Mathematical Alphabetic Symbols" Unicode
2587          * character block.
2588          * @since 1.8
2589          */
2590         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2591             new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS",
2592                              "ARABIC MATHEMATICAL ALPHABETIC SYMBOLS",
2593                              "ARABICMATHEMATICALALPHABETICSYMBOLS");
2594 
2595         private static final int blockStarts[] = {
2596             0x0000,   // 0000..007F; Basic Latin
2597             0x0080,   // 0080..00FF; Latin-1 Supplement
2598             0x0100,   // 0100..017F; Latin Extended-A
2599             0x0180,   // 0180..024F; Latin Extended-B
2600             0x0250,   // 0250..02AF; IPA Extensions
2601             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2602             0x0300,   // 0300..036F; Combining Diacritical Marks
2603             0x0370,   // 0370..03FF; Greek and Coptic
2604             0x0400,   // 0400..04FF; Cyrillic
2605             0x0500,   // 0500..052F; Cyrillic Supplement
2606             0x0530,   // 0530..058F; Armenian
2607             0x0590,   // 0590..05FF; Hebrew
2608             0x0600,   // 0600..06FF; Arabic
2609             0x0700,   // 0700..074F; Syriac
2610             0x0750,   // 0750..077F; Arabic Supplement
2611             0x0780,   // 0780..07BF; Thaana
2612             0x07C0,   // 07C0..07FF; NKo
2613             0x0800,   // 0800..083F; Samaritan
2614             0x0840,   // 0840..085F; Mandaic
2615             0x0860,   //             unassigned
2616             0x08A0,   // 08A0..08FF; Arabic Extended-A
2617             0x0900,   // 0900..097F; Devanagari
2618             0x0980,   // 0980..09FF; Bengali
2619             0x0A00,   // 0A00..0A7F; Gurmukhi
2620             0x0A80,   // 0A80..0AFF; Gujarati
2621             0x0B00,   // 0B00..0B7F; Oriya
2622             0x0B80,   // 0B80..0BFF; Tamil
2623             0x0C00,   // 0C00..0C7F; Telugu
2624             0x0C80,   // 0C80..0CFF; Kannada
2625             0x0D00,   // 0D00..0D7F; Malayalam
2626             0x0D80,   // 0D80..0DFF; Sinhala
2627             0x0E00,   // 0E00..0E7F; Thai
2628             0x0E80,   // 0E80..0EFF; Lao
2629             0x0F00,   // 0F00..0FFF; Tibetan
2630             0x1000,   // 1000..109F; Myanmar
2631             0x10A0,   // 10A0..10FF; Georgian
2632             0x1100,   // 1100..11FF; Hangul Jamo
2633             0x1200,   // 1200..137F; Ethiopic
2634             0x1380,   // 1380..139F; Ethiopic Supplement
2635             0x13A0,   // 13A0..13FF; Cherokee
2636             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2637             0x1680,   // 1680..169F; Ogham
2638             0x16A0,   // 16A0..16FF; Runic
2639             0x1700,   // 1700..171F; Tagalog
2640             0x1720,   // 1720..173F; Hanunoo
2641             0x1740,   // 1740..175F; Buhid
2642             0x1760,   // 1760..177F; Tagbanwa
2643             0x1780,   // 1780..17FF; Khmer
2644             0x1800,   // 1800..18AF; Mongolian
2645             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2646             0x1900,   // 1900..194F; Limbu
2647             0x1950,   // 1950..197F; Tai Le
2648             0x1980,   // 1980..19DF; New Tai Lue
2649             0x19E0,   // 19E0..19FF; Khmer Symbols
2650             0x1A00,   // 1A00..1A1F; Buginese
2651             0x1A20,   // 1A20..1AAF; Tai Tham
2652             0x1AB0,   //             unassigned
2653             0x1B00,   // 1B00..1B7F; Balinese
2654             0x1B80,   // 1B80..1BBF; Sundanese
2655             0x1BC0,   // 1BC0..1BFF; Batak
2656             0x1C00,   // 1C00..1C4F; Lepcha
2657             0x1C50,   // 1C50..1C7F; Ol Chiki
2658             0x1C80,   //             unassigned
2659             0x1CC0,   // 1CC0..1CCF; Sundanese Supplement
2660             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
2661             0x1D00,   // 1D00..1D7F; Phonetic Extensions
2662             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2663             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2664             0x1E00,   // 1E00..1EFF; Latin Extended Additional
2665             0x1F00,   // 1F00..1FFF; Greek Extended
2666             0x2000,   // 2000..206F; General Punctuation
2667             0x2070,   // 2070..209F; Superscripts and Subscripts
2668             0x20A0,   // 20A0..20CF; Currency Symbols
2669             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2670             0x2100,   // 2100..214F; Letterlike Symbols
2671             0x2150,   // 2150..218F; Number Forms
2672             0x2190,   // 2190..21FF; Arrows
2673             0x2200,   // 2200..22FF; Mathematical Operators
2674             0x2300,   // 2300..23FF; Miscellaneous Technical
2675             0x2400,   // 2400..243F; Control Pictures
2676             0x2440,   // 2440..245F; Optical Character Recognition
2677             0x2460,   // 2460..24FF; Enclosed Alphanumerics
2678             0x2500,   // 2500..257F; Box Drawing
2679             0x2580,   // 2580..259F; Block Elements
2680             0x25A0,   // 25A0..25FF; Geometric Shapes
2681             0x2600,   // 2600..26FF; Miscellaneous Symbols
2682             0x2700,   // 2700..27BF; Dingbats
2683             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2684             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2685             0x2800,   // 2800..28FF; Braille Patterns
2686             0x2900,   // 2900..297F; Supplemental Arrows-B
2687             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2688             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2689             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2690             0x2C00,   // 2C00..2C5F; Glagolitic
2691             0x2C60,   // 2C60..2C7F; Latin Extended-C
2692             0x2C80,   // 2C80..2CFF; Coptic
2693             0x2D00,   // 2D00..2D2F; Georgian Supplement
2694             0x2D30,   // 2D30..2D7F; Tifinagh
2695             0x2D80,   // 2D80..2DDF; Ethiopic Extended
2696             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2697             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2698             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2699             0x2F00,   // 2F00..2FDF; Kangxi Radicals
2700             0x2FE0,   //             unassigned
2701             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2702             0x3000,   // 3000..303F; CJK Symbols and Punctuation
2703             0x3040,   // 3040..309F; Hiragana
2704             0x30A0,   // 30A0..30FF; Katakana
2705             0x3100,   // 3100..312F; Bopomofo
2706             0x3130,   // 3130..318F; Hangul Compatibility Jamo
2707             0x3190,   // 3190..319F; Kanbun
2708             0x31A0,   // 31A0..31BF; Bopomofo Extended
2709             0x31C0,   // 31C0..31EF; CJK Strokes
2710             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2711             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2712             0x3300,   // 3300..33FF; CJK Compatibility
2713             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2714             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2715             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
2716             0xA000,   // A000..A48F; Yi Syllables
2717             0xA490,   // A490..A4CF; Yi Radicals
2718             0xA4D0,   // A4D0..A4FF; Lisu
2719             0xA500,   // A500..A63F; Vai
2720             0xA640,   // A640..A69F; Cyrillic Extended-B
2721             0xA6A0,   // A6A0..A6FF; Bamum
2722             0xA700,   // A700..A71F; Modifier Tone Letters
2723             0xA720,   // A720..A7FF; Latin Extended-D
2724             0xA800,   // A800..A82F; Syloti Nagri
2725             0xA830,   // A830..A83F; Common Indic Number Forms
2726             0xA840,   // A840..A87F; Phags-pa
2727             0xA880,   // A880..A8DF; Saurashtra
2728             0xA8E0,   // A8E0..A8FF; Devanagari Extended
2729             0xA900,   // A900..A92F; Kayah Li
2730             0xA930,   // A930..A95F; Rejang
2731             0xA960,   // A960..A97F; Hangul Jamo Extended-A
2732             0xA980,   // A980..A9DF; Javanese
2733             0xA9E0,   //             unassigned
2734             0xAA00,   // AA00..AA5F; Cham
2735             0xAA60,   // AA60..AA7F; Myanmar Extended-A
2736             0xAA80,   // AA80..AADF; Tai Viet
2737             0xAAE0,   // AAE0..AAFF; Meetei Mayek Extensions
2738             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
2739             0xAB30,   //             unassigned
2740             0xABC0,   // ABC0..ABFF; Meetei Mayek
2741             0xAC00,   // AC00..D7AF; Hangul Syllables
2742             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
2743             0xD800,   // D800..DB7F; High Surrogates
2744             0xDB80,   // DB80..DBFF; High Private Use Surrogates
2745             0xDC00,   // DC00..DFFF; Low Surrogates
2746             0xE000,   // E000..F8FF; Private Use Area
2747             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2748             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2749             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2750             0xFE00,   // FE00..FE0F; Variation Selectors
2751             0xFE10,   // FE10..FE1F; Vertical Forms
2752             0xFE20,   // FE20..FE2F; Combining Half Marks
2753             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2754             0xFE50,   // FE50..FE6F; Small Form Variants
2755             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2756             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2757             0xFFF0,   // FFF0..FFFF; Specials
2758             0x10000,  // 10000..1007F; Linear B Syllabary
2759             0x10080,  // 10080..100FF; Linear B Ideograms
2760             0x10100,  // 10100..1013F; Aegean Numbers
2761             0x10140,  // 10140..1018F; Ancient Greek Numbers
2762             0x10190,  // 10190..101CF; Ancient Symbols
2763             0x101D0,  // 101D0..101FF; Phaistos Disc
2764             0x10200,  //               unassigned
2765             0x10280,  // 10280..1029F; Lycian
2766             0x102A0,  // 102A0..102DF; Carian
2767             0x102E0,  //               unassigned
2768             0x10300,  // 10300..1032F; Old Italic
2769             0x10330,  // 10330..1034F; Gothic
2770             0x10350,  //               unassigned
2771             0x10380,  // 10380..1039F; Ugaritic
2772             0x103A0,  // 103A0..103DF; Old Persian
2773             0x103E0,  //               unassigned
2774             0x10400,  // 10400..1044F; Deseret
2775             0x10450,  // 10450..1047F; Shavian
2776             0x10480,  // 10480..104AF; Osmanya
2777             0x104B0,  //               unassigned
2778             0x10800,  // 10800..1083F; Cypriot Syllabary
2779             0x10840,  // 10840..1085F; Imperial Aramaic
2780             0x10860,  //               unassigned
2781             0x10900,  // 10900..1091F; Phoenician
2782             0x10920,  // 10920..1093F; Lydian
2783             0x10940,  //               unassigned
2784             0x10980,  // 10980..1099F; Meroitic Hieroglyphs
2785             0x109A0,  // 109A0..109FF; Meroitic Cursive
2786             0x10A00,  // 10A00..10A5F; Kharoshthi
2787             0x10A60,  // 10A60..10A7F; Old South Arabian
2788             0x10A80,  //               unassigned
2789             0x10B00,  // 10B00..10B3F; Avestan
2790             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
2791             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
2792             0x10B80,  //               unassigned
2793             0x10C00,  // 10C00..10C4F; Old Turkic
2794             0x10C50,  //               unassigned
2795             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
2796             0x10E80,  //               unassigned
2797             0x11000,  // 11000..1107F; Brahmi
2798             0x11080,  // 11080..110CF; Kaithi
2799             0x110D0,  // 110D0..110FF; Sora Sompeng
2800             0x11100,  // 11100..1114F; Chakma
2801             0x11150,  //               unassigned
2802             0x11180,  // 11180..111DF; Sharada
2803             0x111E0,  //               unassigned
2804             0x11680,  // 11680..116CF; Takri
2805             0x116D0,  //               unassigned
2806             0x12000,  // 12000..123FF; Cuneiform
2807             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2808             0x12480,  //               unassigned
2809             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
2810             0x13430,  //               unassigned
2811             0x16800,  // 16800..16A3F; Bamum Supplement
2812             0x16A40,  //               unassigned
2813             0x16F00,  // 16F00..16F9F; Miao
2814             0x16FA0,  //               unassigned
2815             0x1B000,  // 1B000..1B0FF; Kana Supplement
2816             0x1B100,  //               unassigned
2817             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2818             0x1D100,  // 1D100..1D1FF; Musical Symbols
2819             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2820             0x1D250,  //               unassigned
2821             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2822             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2823             0x1D380,  //               unassigned
2824             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2825             0x1D800,  //               unassigned
2826             0x1EE00,  // 1EE00..1EEFF; Arabic Mathematical Alphabetic Symbols
2827             0x1EF00,  //               unassigned
2828             0x1F000,  // 1F000..1F02F; Mahjong Tiles
2829             0x1F030,  // 1F030..1F09F; Domino Tiles
2830             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
2831             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2832             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
2833             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2834             0x1F600,  // 1F600..1F64F; Emoticons
2835             0x1F650,  //               unassigned
2836             0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
2837             0x1F700,  // 1F700..1F77F; Alchemical Symbols
2838             0x1F780,  //               unassigned
2839             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2840             0x2A6E0,  //               unassigned
2841             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
2842             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
2843             0x2B820,  //               unassigned
2844             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2845             0x2FA20,  //               unassigned
2846             0xE0000,  // E0000..E007F; Tags
2847             0xE0080,  //               unassigned
2848             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2849             0xE01F0,  //               unassigned
2850             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2851             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
2852         };
2853 
2854         private static final UnicodeBlock[] blocks = {
2855             BASIC_LATIN,
2856             LATIN_1_SUPPLEMENT,
2857             LATIN_EXTENDED_A,
2858             LATIN_EXTENDED_B,
2859             IPA_EXTENSIONS,
2860             SPACING_MODIFIER_LETTERS,
2861             COMBINING_DIACRITICAL_MARKS,
2862             GREEK,
2863             CYRILLIC,
2864             CYRILLIC_SUPPLEMENTARY,
2865             ARMENIAN,
2866             HEBREW,
2867             ARABIC,
2868             SYRIAC,
2869             ARABIC_SUPPLEMENT,
2870             THAANA,
2871             NKO,
2872             SAMARITAN,
2873             MANDAIC,
2874             null,
2875             ARABIC_EXTENDED_A,
2876             DEVANAGARI,
2877             BENGALI,
2878             GURMUKHI,
2879             GUJARATI,
2880             ORIYA,
2881             TAMIL,
2882             TELUGU,
2883             KANNADA,
2884             MALAYALAM,
2885             SINHALA,
2886             THAI,
2887             LAO,
2888             TIBETAN,
2889             MYANMAR,
2890             GEORGIAN,
2891             HANGUL_JAMO,
2892             ETHIOPIC,
2893             ETHIOPIC_SUPPLEMENT,
2894             CHEROKEE,
2895             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2896             OGHAM,
2897             RUNIC,
2898             TAGALOG,
2899             HANUNOO,
2900             BUHID,
2901             TAGBANWA,
2902             KHMER,
2903             MONGOLIAN,
2904             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2905             LIMBU,
2906             TAI_LE,
2907             NEW_TAI_LUE,
2908             KHMER_SYMBOLS,
2909             BUGINESE,
2910             TAI_THAM,
2911             null,
2912             BALINESE,
2913             SUNDANESE,
2914             BATAK,
2915             LEPCHA,
2916             OL_CHIKI,
2917             null,
2918             SUNDANESE_SUPPLEMENT,
2919             VEDIC_EXTENSIONS,
2920             PHONETIC_EXTENSIONS,
2921             PHONETIC_EXTENSIONS_SUPPLEMENT,
2922             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2923             LATIN_EXTENDED_ADDITIONAL,
2924             GREEK_EXTENDED,
2925             GENERAL_PUNCTUATION,
2926             SUPERSCRIPTS_AND_SUBSCRIPTS,
2927             CURRENCY_SYMBOLS,
2928             COMBINING_MARKS_FOR_SYMBOLS,
2929             LETTERLIKE_SYMBOLS,
2930             NUMBER_FORMS,
2931             ARROWS,
2932             MATHEMATICAL_OPERATORS,
2933             MISCELLANEOUS_TECHNICAL,
2934             CONTROL_PICTURES,
2935             OPTICAL_CHARACTER_RECOGNITION,
2936             ENCLOSED_ALPHANUMERICS,
2937             BOX_DRAWING,
2938             BLOCK_ELEMENTS,
2939             GEOMETRIC_SHAPES,
2940             MISCELLANEOUS_SYMBOLS,
2941             DINGBATS,
2942             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2943             SUPPLEMENTAL_ARROWS_A,
2944             BRAILLE_PATTERNS,
2945             SUPPLEMENTAL_ARROWS_B,
2946             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2947             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2948             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2949             GLAGOLITIC,
2950             LATIN_EXTENDED_C,
2951             COPTIC,
2952             GEORGIAN_SUPPLEMENT,
2953             TIFINAGH,
2954             ETHIOPIC_EXTENDED,
2955             CYRILLIC_EXTENDED_A,
2956             SUPPLEMENTAL_PUNCTUATION,
2957             CJK_RADICALS_SUPPLEMENT,
2958             KANGXI_RADICALS,
2959             null,
2960             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2961             CJK_SYMBOLS_AND_PUNCTUATION,
2962             HIRAGANA,
2963             KATAKANA,
2964             BOPOMOFO,
2965             HANGUL_COMPATIBILITY_JAMO,
2966             KANBUN,
2967             BOPOMOFO_EXTENDED,
2968             CJK_STROKES,
2969             KATAKANA_PHONETIC_EXTENSIONS,
2970             ENCLOSED_CJK_LETTERS_AND_MONTHS,
2971             CJK_COMPATIBILITY,
2972             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2973             YIJING_HEXAGRAM_SYMBOLS,
2974             CJK_UNIFIED_IDEOGRAPHS,
2975             YI_SYLLABLES,
2976             YI_RADICALS,
2977             LISU,
2978             VAI,
2979             CYRILLIC_EXTENDED_B,
2980             BAMUM,
2981             MODIFIER_TONE_LETTERS,
2982             LATIN_EXTENDED_D,
2983             SYLOTI_NAGRI,
2984             COMMON_INDIC_NUMBER_FORMS,
2985             PHAGS_PA,
2986             SAURASHTRA,
2987             DEVANAGARI_EXTENDED,
2988             KAYAH_LI,
2989             REJANG,
2990             HANGUL_JAMO_EXTENDED_A,
2991             JAVANESE,
2992             null,
2993             CHAM,
2994             MYANMAR_EXTENDED_A,
2995             TAI_VIET,
2996             MEETEI_MAYEK_EXTENSIONS,
2997             ETHIOPIC_EXTENDED_A,
2998             null,
2999             MEETEI_MAYEK,
3000             HANGUL_SYLLABLES,
3001             HANGUL_JAMO_EXTENDED_B,
3002             HIGH_SURROGATES,
3003             HIGH_PRIVATE_USE_SURROGATES,
3004             LOW_SURROGATES,
3005             PRIVATE_USE_AREA,
3006             CJK_COMPATIBILITY_IDEOGRAPHS,
3007             ALPHABETIC_PRESENTATION_FORMS,
3008             ARABIC_PRESENTATION_FORMS_A,
3009             VARIATION_SELECTORS,
3010             VERTICAL_FORMS,
3011             COMBINING_HALF_MARKS,
3012             CJK_COMPATIBILITY_FORMS,
3013             SMALL_FORM_VARIANTS,
3014             ARABIC_PRESENTATION_FORMS_B,
3015             HALFWIDTH_AND_FULLWIDTH_FORMS,
3016             SPECIALS,
3017             LINEAR_B_SYLLABARY,
3018             LINEAR_B_IDEOGRAMS,
3019             AEGEAN_NUMBERS,
3020             ANCIENT_GREEK_NUMBERS,
3021             ANCIENT_SYMBOLS,
3022             PHAISTOS_DISC,
3023             null,
3024             LYCIAN,
3025             CARIAN,
3026             null,
3027             OLD_ITALIC,
3028             GOTHIC,
3029             null,
3030             UGARITIC,
3031             OLD_PERSIAN,
3032             null,
3033             DESERET,
3034             SHAVIAN,
3035             OSMANYA,
3036             null,
3037             CYPRIOT_SYLLABARY,
3038             IMPERIAL_ARAMAIC,
3039             null,
3040             PHOENICIAN,
3041             LYDIAN,
3042             null,
3043             MEROITIC_HIEROGLYPHS,
3044             MEROITIC_CURSIVE,
3045             KHAROSHTHI,
3046             OLD_SOUTH_ARABIAN,
3047             null,
3048             AVESTAN,
3049             INSCRIPTIONAL_PARTHIAN,
3050             INSCRIPTIONAL_PAHLAVI,
3051             null,
3052             OLD_TURKIC,
3053             null,
3054             RUMI_NUMERAL_SYMBOLS,
3055             null,
3056             BRAHMI,
3057             KAITHI,
3058             SORA_SOMPENG,
3059             CHAKMA,
3060             null,
3061             SHARADA,
3062             null,
3063             TAKRI,
3064             null,
3065             CUNEIFORM,
3066             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
3067             null,
3068             EGYPTIAN_HIEROGLYPHS,
3069             null,
3070             BAMUM_SUPPLEMENT,
3071             null,
3072             MIAO,
3073             null,
3074             KANA_SUPPLEMENT,
3075             null,
3076             BYZANTINE_MUSICAL_SYMBOLS,
3077             MUSICAL_SYMBOLS,
3078             ANCIENT_GREEK_MUSICAL_NOTATION,
3079             null,
3080             TAI_XUAN_JING_SYMBOLS,
3081             COUNTING_ROD_NUMERALS,
3082             null,
3083             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
3084             null,
3085             ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS,
3086             null,
3087             MAHJONG_TILES,
3088             DOMINO_TILES,
3089             PLAYING_CARDS,
3090             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
3091             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
3092             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
3093             EMOTICONS,
3094             null,
3095             TRANSPORT_AND_MAP_SYMBOLS,
3096             ALCHEMICAL_SYMBOLS,
3097             null,
3098             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
3099             null,
3100             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
3101             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
3102             null,
3103             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
3104             null,
3105             TAGS,
3106             null,
3107             VARIATION_SELECTORS_SUPPLEMENT,
3108             null,
3109             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
3110             SUPPLEMENTARY_PRIVATE_USE_AREA_B
3111         };
3112 
3113 
3114         /**
3115          * Returns the object representing the Unicode block containing the
3116          * given character, or {@code null} if the character is not a
3117          * member of a defined block.
3118          *
3119          * <p><b>Note:</b> This method cannot handle
3120          * <a href="Character.html#supplementary"> supplementary
3121          * characters</a>.  To support all Unicode characters, including
3122          * supplementary characters, use the {@link #of(int)} method.
3123          *
3124          * @param   c  The character in question
3125          * @return  The {@code UnicodeBlock} instance representing the
3126          *          Unicode block of which this character is a member, or
3127          *          {@code null} if the character is not a member of any
3128          *          Unicode block
3129          */
of(char c)3130         public static UnicodeBlock of(char c) {
3131             return of((int)c);
3132         }
3133 
3134         /**
3135          * Returns the object representing the Unicode block
3136          * containing the given character (Unicode code point), or
3137          * {@code null} if the character is not a member of a
3138          * defined block.
3139          *
3140          * @param   codePoint the character (Unicode code point) in question.
3141          * @return  The {@code UnicodeBlock} instance representing the
3142          *          Unicode block of which this character is a member, or
3143          *          {@code null} if the character is not a member of any
3144          *          Unicode block
3145          * @exception IllegalArgumentException if the specified
3146          * {@code codePoint} is an invalid Unicode code point.
3147          * @see Character#isValidCodePoint(int)
3148          * @since   1.5
3149          */
of(int codePoint)3150         public static UnicodeBlock of(int codePoint) {
3151             if (!isValidCodePoint(codePoint)) {
3152                 throw new IllegalArgumentException();
3153             }
3154 
3155             int top, bottom, current;
3156             bottom = 0;
3157             top = blockStarts.length;
3158             current = top/2;
3159 
3160             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3161             while (top - bottom > 1) {
3162                 if (codePoint >= blockStarts[current]) {
3163                     bottom = current;
3164                 } else {
3165                     top = current;
3166                 }
3167                 current = (top + bottom) / 2;
3168             }
3169             return blocks[current];
3170         }
3171 
3172         /**
3173          * Returns the UnicodeBlock with the given name. Block
3174          * names are determined by The Unicode Standard. The file
3175          * Blocks-&lt;version&gt;.txt defines blocks for a particular
3176          * version of the standard. The {@link Character} class specifies
3177          * the version of the standard that it supports.
3178          * <p>
3179          * This method accepts block names in the following forms:
3180          * <ol>
3181          * <li> Canonical block names as defined by the Unicode Standard.
3182          * For example, the standard defines a "Basic Latin" block. Therefore, this
3183          * method accepts "Basic Latin" as a valid block name. The documentation of
3184          * each UnicodeBlock provides the canonical name.
3185          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3186          * is a valid block name for the "Basic Latin" block.
3187          * <li>The text representation of each constant UnicodeBlock identifier.
3188          * For example, this method will return the {@link #BASIC_LATIN} block if
3189          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3190          * hyphens in the canonical name with underscores.
3191          * </ol>
3192          * Finally, character case is ignored for all of the valid block name forms.
3193          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3194          * The en_US locale's case mapping rules are used to provide case-insensitive
3195          * string comparisons for block name validation.
3196          * <p>
3197          * If the Unicode Standard changes block names, both the previous and
3198          * current names will be accepted.
3199          *
3200          * @param blockName A {@code UnicodeBlock} name.
3201          * @return The {@code UnicodeBlock} instance identified
3202          *         by {@code blockName}
3203          * @throws IllegalArgumentException if {@code blockName} is an
3204          *         invalid name
3205          * @throws NullPointerException if {@code blockName} is null
3206          * @since 1.5
3207          */
forName(String blockName)3208         public static final UnicodeBlock forName(String blockName) {
3209             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3210             if (block == null) {
3211                 throw new IllegalArgumentException();
3212             }
3213             return block;
3214         }
3215     }
3216 
3217 
3218     /**
3219      * A family of character subsets representing the character scripts
3220      * defined in the <a href="http://www.unicode.org/reports/tr24/">
3221      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3222      * character is assigned to a single Unicode script, either a specific
3223      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3224      * one of the following three special values,
3225      * {@link Character.UnicodeScript#INHERITED Inherited},
3226      * {@link Character.UnicodeScript#COMMON Common} or
3227      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3228      *
3229      * @since 1.7
3230      */
3231     public static enum UnicodeScript {
3232         /**
3233          * Unicode script "Common".
3234          */
3235         COMMON,
3236 
3237         /**
3238          * Unicode script "Latin".
3239          */
3240         LATIN,
3241 
3242         /**
3243          * Unicode script "Greek".
3244          */
3245         GREEK,
3246 
3247         /**
3248          * Unicode script "Cyrillic".
3249          */
3250         CYRILLIC,
3251 
3252         /**
3253          * Unicode script "Armenian".
3254          */
3255         ARMENIAN,
3256 
3257         /**
3258          * Unicode script "Hebrew".
3259          */
3260         HEBREW,
3261 
3262         /**
3263          * Unicode script "Arabic".
3264          */
3265         ARABIC,
3266 
3267         /**
3268          * Unicode script "Syriac".
3269          */
3270         SYRIAC,
3271 
3272         /**
3273          * Unicode script "Thaana".
3274          */
3275         THAANA,
3276 
3277         /**
3278          * Unicode script "Devanagari".
3279          */
3280         DEVANAGARI,
3281 
3282         /**
3283          * Unicode script "Bengali".
3284          */
3285         BENGALI,
3286 
3287         /**
3288          * Unicode script "Gurmukhi".
3289          */
3290         GURMUKHI,
3291 
3292         /**
3293          * Unicode script "Gujarati".
3294          */
3295         GUJARATI,
3296 
3297         /**
3298          * Unicode script "Oriya".
3299          */
3300         ORIYA,
3301 
3302         /**
3303          * Unicode script "Tamil".
3304          */
3305         TAMIL,
3306 
3307         /**
3308          * Unicode script "Telugu".
3309          */
3310         TELUGU,
3311 
3312         /**
3313          * Unicode script "Kannada".
3314          */
3315         KANNADA,
3316 
3317         /**
3318          * Unicode script "Malayalam".
3319          */
3320         MALAYALAM,
3321 
3322         /**
3323          * Unicode script "Sinhala".
3324          */
3325         SINHALA,
3326 
3327         /**
3328          * Unicode script "Thai".
3329          */
3330         THAI,
3331 
3332         /**
3333          * Unicode script "Lao".
3334          */
3335         LAO,
3336 
3337         /**
3338          * Unicode script "Tibetan".
3339          */
3340         TIBETAN,
3341 
3342         /**
3343          * Unicode script "Myanmar".
3344          */
3345         MYANMAR,
3346 
3347         /**
3348          * Unicode script "Georgian".
3349          */
3350         GEORGIAN,
3351 
3352         /**
3353          * Unicode script "Hangul".
3354          */
3355         HANGUL,
3356 
3357         /**
3358          * Unicode script "Ethiopic".
3359          */
3360         ETHIOPIC,
3361 
3362         /**
3363          * Unicode script "Cherokee".
3364          */
3365         CHEROKEE,
3366 
3367         /**
3368          * Unicode script "Canadian_Aboriginal".
3369          */
3370         CANADIAN_ABORIGINAL,
3371 
3372         /**
3373          * Unicode script "Ogham".
3374          */
3375         OGHAM,
3376 
3377         /**
3378          * Unicode script "Runic".
3379          */
3380         RUNIC,
3381 
3382         /**
3383          * Unicode script "Khmer".
3384          */
3385         KHMER,
3386 
3387         /**
3388          * Unicode script "Mongolian".
3389          */
3390         MONGOLIAN,
3391 
3392         /**
3393          * Unicode script "Hiragana".
3394          */
3395         HIRAGANA,
3396 
3397         /**
3398          * Unicode script "Katakana".
3399          */
3400         KATAKANA,
3401 
3402         /**
3403          * Unicode script "Bopomofo".
3404          */
3405         BOPOMOFO,
3406 
3407         /**
3408          * Unicode script "Han".
3409          */
3410         HAN,
3411 
3412         /**
3413          * Unicode script "Yi".
3414          */
3415         YI,
3416 
3417         /**
3418          * Unicode script "Old_Italic".
3419          */
3420         OLD_ITALIC,
3421 
3422         /**
3423          * Unicode script "Gothic".
3424          */
3425         GOTHIC,
3426 
3427         /**
3428          * Unicode script "Deseret".
3429          */
3430         DESERET,
3431 
3432         /**
3433          * Unicode script "Inherited".
3434          */
3435         INHERITED,
3436 
3437         /**
3438          * Unicode script "Tagalog".
3439          */
3440         TAGALOG,
3441 
3442         /**
3443          * Unicode script "Hanunoo".
3444          */
3445         HANUNOO,
3446 
3447         /**
3448          * Unicode script "Buhid".
3449          */
3450         BUHID,
3451 
3452         /**
3453          * Unicode script "Tagbanwa".
3454          */
3455         TAGBANWA,
3456 
3457         /**
3458          * Unicode script "Limbu".
3459          */
3460         LIMBU,
3461 
3462         /**
3463          * Unicode script "Tai_Le".
3464          */
3465         TAI_LE,
3466 
3467         /**
3468          * Unicode script "Linear_B".
3469          */
3470         LINEAR_B,
3471 
3472         /**
3473          * Unicode script "Ugaritic".
3474          */
3475         UGARITIC,
3476 
3477         /**
3478          * Unicode script "Shavian".
3479          */
3480         SHAVIAN,
3481 
3482         /**
3483          * Unicode script "Osmanya".
3484          */
3485         OSMANYA,
3486 
3487         /**
3488          * Unicode script "Cypriot".
3489          */
3490         CYPRIOT,
3491 
3492         /**
3493          * Unicode script "Braille".
3494          */
3495         BRAILLE,
3496 
3497         /**
3498          * Unicode script "Buginese".
3499          */
3500         BUGINESE,
3501 
3502         /**
3503          * Unicode script "Coptic".
3504          */
3505         COPTIC,
3506 
3507         /**
3508          * Unicode script "New_Tai_Lue".
3509          */
3510         NEW_TAI_LUE,
3511 
3512         /**
3513          * Unicode script "Glagolitic".
3514          */
3515         GLAGOLITIC,
3516 
3517         /**
3518          * Unicode script "Tifinagh".
3519          */
3520         TIFINAGH,
3521 
3522         /**
3523          * Unicode script "Syloti_Nagri".
3524          */
3525         SYLOTI_NAGRI,
3526 
3527         /**
3528          * Unicode script "Old_Persian".
3529          */
3530         OLD_PERSIAN,
3531 
3532         /**
3533          * Unicode script "Kharoshthi".
3534          */
3535         KHAROSHTHI,
3536 
3537         /**
3538          * Unicode script "Balinese".
3539          */
3540         BALINESE,
3541 
3542         /**
3543          * Unicode script "Cuneiform".
3544          */
3545         CUNEIFORM,
3546 
3547         /**
3548          * Unicode script "Phoenician".
3549          */
3550         PHOENICIAN,
3551 
3552         /**
3553          * Unicode script "Phags_Pa".
3554          */
3555         PHAGS_PA,
3556 
3557         /**
3558          * Unicode script "Nko".
3559          */
3560         NKO,
3561 
3562         /**
3563          * Unicode script "Sundanese".
3564          */
3565         SUNDANESE,
3566 
3567         /**
3568          * Unicode script "Batak".
3569          */
3570         BATAK,
3571 
3572         /**
3573          * Unicode script "Lepcha".
3574          */
3575         LEPCHA,
3576 
3577         /**
3578          * Unicode script "Ol_Chiki".
3579          */
3580         OL_CHIKI,
3581 
3582         /**
3583          * Unicode script "Vai".
3584          */
3585         VAI,
3586 
3587         /**
3588          * Unicode script "Saurashtra".
3589          */
3590         SAURASHTRA,
3591 
3592         /**
3593          * Unicode script "Kayah_Li".
3594          */
3595         KAYAH_LI,
3596 
3597         /**
3598          * Unicode script "Rejang".
3599          */
3600         REJANG,
3601 
3602         /**
3603          * Unicode script "Lycian".
3604          */
3605         LYCIAN,
3606 
3607         /**
3608          * Unicode script "Carian".
3609          */
3610         CARIAN,
3611 
3612         /**
3613          * Unicode script "Lydian".
3614          */
3615         LYDIAN,
3616 
3617         /**
3618          * Unicode script "Cham".
3619          */
3620         CHAM,
3621 
3622         /**
3623          * Unicode script "Tai_Tham".
3624          */
3625         TAI_THAM,
3626 
3627         /**
3628          * Unicode script "Tai_Viet".
3629          */
3630         TAI_VIET,
3631 
3632         /**
3633          * Unicode script "Avestan".
3634          */
3635         AVESTAN,
3636 
3637         /**
3638          * Unicode script "Egyptian_Hieroglyphs".
3639          */
3640         EGYPTIAN_HIEROGLYPHS,
3641 
3642         /**
3643          * Unicode script "Samaritan".
3644          */
3645         SAMARITAN,
3646 
3647         /**
3648          * Unicode script "Mandaic".
3649          */
3650         MANDAIC,
3651 
3652         /**
3653          * Unicode script "Lisu".
3654          */
3655         LISU,
3656 
3657         /**
3658          * Unicode script "Bamum".
3659          */
3660         BAMUM,
3661 
3662         /**
3663          * Unicode script "Javanese".
3664          */
3665         JAVANESE,
3666 
3667         /**
3668          * Unicode script "Meetei_Mayek".
3669          */
3670         MEETEI_MAYEK,
3671 
3672         /**
3673          * Unicode script "Imperial_Aramaic".
3674          */
3675         IMPERIAL_ARAMAIC,
3676 
3677         /**
3678          * Unicode script "Old_South_Arabian".
3679          */
3680         OLD_SOUTH_ARABIAN,
3681 
3682         /**
3683          * Unicode script "Inscriptional_Parthian".
3684          */
3685         INSCRIPTIONAL_PARTHIAN,
3686 
3687         /**
3688          * Unicode script "Inscriptional_Pahlavi".
3689          */
3690         INSCRIPTIONAL_PAHLAVI,
3691 
3692         /**
3693          * Unicode script "Old_Turkic".
3694          */
3695         OLD_TURKIC,
3696 
3697         /**
3698          * Unicode script "Brahmi".
3699          */
3700         BRAHMI,
3701 
3702         /**
3703          * Unicode script "Kaithi".
3704          */
3705         KAITHI,
3706 
3707         /**
3708          * Unicode script "Meroitic Hieroglyphs".
3709          */
3710         MEROITIC_HIEROGLYPHS,
3711 
3712         /**
3713          * Unicode script "Meroitic Cursive".
3714          */
3715         MEROITIC_CURSIVE,
3716 
3717         /**
3718          * Unicode script "Sora Sompeng".
3719          */
3720         SORA_SOMPENG,
3721 
3722         /**
3723          * Unicode script "Chakma".
3724          */
3725         CHAKMA,
3726 
3727         /**
3728          * Unicode script "Sharada".
3729          */
3730         SHARADA,
3731 
3732         /**
3733          * Unicode script "Takri".
3734          */
3735         TAKRI,
3736 
3737         /**
3738          * Unicode script "Miao".
3739          */
3740         MIAO,
3741 
3742         /**
3743          * Unicode script "Unknown".
3744          */
3745         UNKNOWN;
3746 
3747         private static final int[] scriptStarts = {
3748             0x0000,   // 0000..0040; COMMON
3749             0x0041,   // 0041..005A; LATIN
3750             0x005B,   // 005B..0060; COMMON
3751             0x0061,   // 0061..007A; LATIN
3752             0x007B,   // 007B..00A9; COMMON
3753             0x00AA,   // 00AA..00AA; LATIN
3754             0x00AB,   // 00AB..00B9; COMMON
3755             0x00BA,   // 00BA..00BA; LATIN
3756             0x00BB,   // 00BB..00BF; COMMON
3757             0x00C0,   // 00C0..00D6; LATIN
3758             0x00D7,   // 00D7..00D7; COMMON
3759             0x00D8,   // 00D8..00F6; LATIN
3760             0x00F7,   // 00F7..00F7; COMMON
3761             0x00F8,   // 00F8..02B8; LATIN
3762             0x02B9,   // 02B9..02DF; COMMON
3763             0x02E0,   // 02E0..02E4; LATIN
3764             0x02E5,   // 02E5..02E9; COMMON
3765             0x02EA,   // 02EA..02EB; BOPOMOFO
3766             0x02EC,   // 02EC..02FF; COMMON
3767             0x0300,   // 0300..036F; INHERITED
3768             0x0370,   // 0370..0373; GREEK
3769             0x0374,   // 0374..0374; COMMON
3770             0x0375,   // 0375..037D; GREEK
3771             0x037E,   // 037E..0383; COMMON
3772             0x0384,   // 0384..0384; GREEK
3773             0x0385,   // 0385..0385; COMMON
3774             0x0386,   // 0386..0386; GREEK
3775             0x0387,   // 0387..0387; COMMON
3776             0x0388,   // 0388..03E1; GREEK
3777             0x03E2,   // 03E2..03EF; COPTIC
3778             0x03F0,   // 03F0..03FF; GREEK
3779             0x0400,   // 0400..0484; CYRILLIC
3780             0x0485,   // 0485..0486; INHERITED
3781             0x0487,   // 0487..0530; CYRILLIC
3782             0x0531,   // 0531..0588; ARMENIAN
3783             0x0589,   // 0589..0589; COMMON
3784             0x058A,   // 058A..0590; ARMENIAN
3785             0x0591,   // 0591..05FF; HEBREW
3786             0x0600,   // 0600..060B; ARABIC
3787             0x060C,   // 060C..060C; COMMON
3788             0x060D,   // 060D..061A; ARABIC
3789             0x061B,   // 061B..061D; COMMON
3790             0x061E,   // 061E..061E; ARABIC
3791             0x061F,   // 061F..061F; COMMON
3792             0x0620,   // 0620..063F; ARABIC
3793             0x0640,   // 0640..0640; COMMON
3794             0x0641,   // 0641..064A; ARABIC
3795             0x064B,   // 064B..0655; INHERITED
3796             0x0656,   // 0656..065F; ARABIC
3797             0x0660,   // 0660..0669; COMMON
3798             0x066A,   // 066A..066F; ARABIC
3799             0x0670,   // 0670..0670; INHERITED
3800             0x0671,   // 0671..06DC; ARABIC
3801             0x06DD,   // 06DD..06DD; COMMON
3802             0x06DE,   // 06DE..06FF; ARABIC
3803             0x0700,   // 0700..074F; SYRIAC
3804             0x0750,   // 0750..077F; ARABIC
3805             0x0780,   // 0780..07BF; THAANA
3806             0x07C0,   // 07C0..07FF; NKO
3807             0x0800,   // 0800..083F; SAMARITAN
3808             0x0840,   // 0840..089F; MANDAIC
3809             0x08A0,   // 08A0..08FF; ARABIC
3810             0x0900,   // 0900..0950; DEVANAGARI
3811             0x0951,   // 0951..0952; INHERITED
3812             0x0953,   // 0953..0963; DEVANAGARI
3813             0x0964,   // 0964..0965; COMMON
3814             0x0966,   // 0966..0980; DEVANAGARI
3815             0x0981,   // 0981..0A00; BENGALI
3816             0x0A01,   // 0A01..0A80; GURMUKHI
3817             0x0A81,   // 0A81..0B00; GUJARATI
3818             0x0B01,   // 0B01..0B81; ORIYA
3819             0x0B82,   // 0B82..0C00; TAMIL
3820             0x0C01,   // 0C01..0C81; TELUGU
3821             0x0C82,   // 0C82..0CF0; KANNADA
3822             0x0D02,   // 0D02..0D81; MALAYALAM
3823             0x0D82,   // 0D82..0E00; SINHALA
3824             0x0E01,   // 0E01..0E3E; THAI
3825             0x0E3F,   // 0E3F..0E3F; COMMON
3826             0x0E40,   // 0E40..0E80; THAI
3827             0x0E81,   // 0E81..0EFF; LAO
3828             0x0F00,   // 0F00..0FD4; TIBETAN
3829             0x0FD5,   // 0FD5..0FD8; COMMON
3830             0x0FD9,   // 0FD9..0FFF; TIBETAN
3831             0x1000,   // 1000..109F; MYANMAR
3832             0x10A0,   // 10A0..10FA; GEORGIAN
3833             0x10FB,   // 10FB..10FB; COMMON
3834             0x10FC,   // 10FC..10FF; GEORGIAN
3835             0x1100,   // 1100..11FF; HANGUL
3836             0x1200,   // 1200..139F; ETHIOPIC
3837             0x13A0,   // 13A0..13FF; CHEROKEE
3838             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
3839             0x1680,   // 1680..169F; OGHAM
3840             0x16A0,   // 16A0..16EA; RUNIC
3841             0x16EB,   // 16EB..16ED; COMMON
3842             0x16EE,   // 16EE..16FF; RUNIC
3843             0x1700,   // 1700..171F; TAGALOG
3844             0x1720,   // 1720..1734; HANUNOO
3845             0x1735,   // 1735..173F; COMMON
3846             0x1740,   // 1740..175F; BUHID
3847             0x1760,   // 1760..177F; TAGBANWA
3848             0x1780,   // 1780..17FF; KHMER
3849             0x1800,   // 1800..1801; MONGOLIAN
3850             0x1802,   // 1802..1803; COMMON
3851             0x1804,   // 1804..1804; MONGOLIAN
3852             0x1805,   // 1805..1805; COMMON
3853             0x1806,   // 1806..18AF; MONGOLIAN
3854             0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
3855             0x1900,   // 1900..194F; LIMBU
3856             0x1950,   // 1950..197F; TAI_LE
3857             0x1980,   // 1980..19DF; NEW_TAI_LUE
3858             0x19E0,   // 19E0..19FF; KHMER
3859             0x1A00,   // 1A00..1A1F; BUGINESE
3860             0x1A20,   // 1A20..1AFF; TAI_THAM
3861             0x1B00,   // 1B00..1B7F; BALINESE
3862             0x1B80,   // 1B80..1BBF; SUNDANESE
3863             0x1BC0,   // 1BC0..1BFF; BATAK
3864             0x1C00,   // 1C00..1C4F; LEPCHA
3865             0x1C50,   // 1C50..1CBF; OL_CHIKI
3866             0x1CC0,   // 1CC0..1CCF; SUNDANESE
3867             0x1CD0,   // 1CD0..1CD2; INHERITED
3868             0x1CD3,   // 1CD3..1CD3; COMMON
3869             0x1CD4,   // 1CD4..1CE0; INHERITED
3870             0x1CE1,   // 1CE1..1CE1; COMMON
3871             0x1CE2,   // 1CE2..1CE8; INHERITED
3872             0x1CE9,   // 1CE9..1CEC; COMMON
3873             0x1CED,   // 1CED..1CED; INHERITED
3874             0x1CEE,   // 1CEE..1CF3; COMMON
3875             0x1CF4,   // 1CF4..1CF4; INHERITED
3876             0x1CF5,   // 1CF5..1CFF; COMMON
3877             0x1D00,   // 1D00..1D25; LATIN
3878             0x1D26,   // 1D26..1D2A; GREEK
3879             0x1D2B,   // 1D2B..1D2B; CYRILLIC
3880             0x1D2C,   // 1D2C..1D5C; LATIN
3881             0x1D5D,   // 1D5D..1D61; GREEK
3882             0x1D62,   // 1D62..1D65; LATIN
3883             0x1D66,   // 1D66..1D6A; GREEK
3884             0x1D6B,   // 1D6B..1D77; LATIN
3885             0x1D78,   // 1D78..1D78; CYRILLIC
3886             0x1D79,   // 1D79..1DBE; LATIN
3887             0x1DBF,   // 1DBF..1DBF; GREEK
3888             0x1DC0,   // 1DC0..1DFF; INHERITED
3889             0x1E00,   // 1E00..1EFF; LATIN
3890             0x1F00,   // 1F00..1FFF; GREEK
3891             0x2000,   // 2000..200B; COMMON
3892             0x200C,   // 200C..200D; INHERITED
3893             0x200E,   // 200E..2070; COMMON
3894             0x2071,   // 2071..2073; LATIN
3895             0x2074,   // 2074..207E; COMMON
3896             0x207F,   // 207F..207F; LATIN
3897             0x2080,   // 2080..208F; COMMON
3898             0x2090,   // 2090..209F; LATIN
3899             0x20A0,   // 20A0..20CF; COMMON
3900             0x20D0,   // 20D0..20FF; INHERITED
3901             0x2100,   // 2100..2125; COMMON
3902             0x2126,   // 2126..2126; GREEK
3903             0x2127,   // 2127..2129; COMMON
3904             0x212A,   // 212A..212B; LATIN
3905             0x212C,   // 212C..2131; COMMON
3906             0x2132,   // 2132..2132; LATIN
3907             0x2133,   // 2133..214D; COMMON
3908             0x214E,   // 214E..214E; LATIN
3909             0x214F,   // 214F..215F; COMMON
3910             0x2160,   // 2160..2188; LATIN
3911             0x2189,   // 2189..27FF; COMMON
3912             0x2800,   // 2800..28FF; BRAILLE
3913             0x2900,   // 2900..2BFF; COMMON
3914             0x2C00,   // 2C00..2C5F; GLAGOLITIC
3915             0x2C60,   // 2C60..2C7F; LATIN
3916             0x2C80,   // 2C80..2CFF; COPTIC
3917             0x2D00,   // 2D00..2D2F; GEORGIAN
3918             0x2D30,   // 2D30..2D7F; TIFINAGH
3919             0x2D80,   // 2D80..2DDF; ETHIOPIC
3920             0x2DE0,   // 2DE0..2DFF; CYRILLIC
3921             0x2E00,   // 2E00..2E7F; COMMON
3922             0x2E80,   // 2E80..2FEF; HAN
3923             0x2FF0,   // 2FF0..3004; COMMON
3924             0x3005,   // 3005..3005; HAN
3925             0x3006,   // 3006..3006; COMMON
3926             0x3007,   // 3007..3007; HAN
3927             0x3008,   // 3008..3020; COMMON
3928             0x3021,   // 3021..3029; HAN
3929             0x302A,   // 302A..302D; INHERITED
3930             0x302E,   // 302E..302F; HANGUL
3931             0x3030,   // 3030..3037; COMMON
3932             0x3038,   // 3038..303B; HAN
3933             0x303C,   // 303C..3040; COMMON
3934             0x3041,   // 3041..3098; HIRAGANA
3935             0x3099,   // 3099..309A; INHERITED
3936             0x309B,   // 309B..309C; COMMON
3937             0x309D,   // 309D..309F; HIRAGANA
3938             0x30A0,   // 30A0..30A0; COMMON
3939             0x30A1,   // 30A1..30FA; KATAKANA
3940             0x30FB,   // 30FB..30FC; COMMON
3941             0x30FD,   // 30FD..3104; KATAKANA
3942             0x3105,   // 3105..3130; BOPOMOFO
3943             0x3131,   // 3131..318F; HANGUL
3944             0x3190,   // 3190..319F; COMMON
3945             0x31A0,   // 31A0..31BF; BOPOMOFO
3946             0x31C0,   // 31C0..31EF; COMMON
3947             0x31F0,   // 31F0..31FF; KATAKANA
3948             0x3200,   // 3200..321F; HANGUL
3949             0x3220,   // 3220..325F; COMMON
3950             0x3260,   // 3260..327E; HANGUL
3951             0x327F,   // 327F..32CF; COMMON
3952             0x32D0,   // 32D0..3357; KATAKANA
3953             0x3358,   // 3358..33FF; COMMON
3954             0x3400,   // 3400..4DBF; HAN
3955             0x4DC0,   // 4DC0..4DFF; COMMON
3956             0x4E00,   // 4E00..9FFF; HAN
3957             0xA000,   // A000..A4CF; YI
3958             0xA4D0,   // A4D0..A4FF; LISU
3959             0xA500,   // A500..A63F; VAI
3960             0xA640,   // A640..A69F; CYRILLIC
3961             0xA6A0,   // A6A0..A6FF; BAMUM
3962             0xA700,   // A700..A721; COMMON
3963             0xA722,   // A722..A787; LATIN
3964             0xA788,   // A788..A78A; COMMON
3965             0xA78B,   // A78B..A7FF; LATIN
3966             0xA800,   // A800..A82F; SYLOTI_NAGRI
3967             0xA830,   // A830..A83F; COMMON
3968             0xA840,   // A840..A87F; PHAGS_PA
3969             0xA880,   // A880..A8DF; SAURASHTRA
3970             0xA8E0,   // A8E0..A8FF; DEVANAGARI
3971             0xA900,   // A900..A92F; KAYAH_LI
3972             0xA930,   // A930..A95F; REJANG
3973             0xA960,   // A960..A97F; HANGUL
3974             0xA980,   // A980..A9FF; JAVANESE
3975             0xAA00,   // AA00..AA5F; CHAM
3976             0xAA60,   // AA60..AA7F; MYANMAR
3977             0xAA80,   // AA80..AADF; TAI_VIET
3978             0xAAE0,   // AAE0..AB00; MEETEI_MAYEK
3979             0xAB01,   // AB01..ABBF; ETHIOPIC
3980             0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
3981             0xAC00,   // AC00..D7FB; HANGUL
3982             0xD7FC,   // D7FC..F8FF; UNKNOWN
3983             0xF900,   // F900..FAFF; HAN
3984             0xFB00,   // FB00..FB12; LATIN
3985             0xFB13,   // FB13..FB1C; ARMENIAN
3986             0xFB1D,   // FB1D..FB4F; HEBREW
3987             0xFB50,   // FB50..FD3D; ARABIC
3988             0xFD3E,   // FD3E..FD4F; COMMON
3989             0xFD50,   // FD50..FDFC; ARABIC
3990             0xFDFD,   // FDFD..FDFF; COMMON
3991             0xFE00,   // FE00..FE0F; INHERITED
3992             0xFE10,   // FE10..FE1F; COMMON
3993             0xFE20,   // FE20..FE2F; INHERITED
3994             0xFE30,   // FE30..FE6F; COMMON
3995             0xFE70,   // FE70..FEFE; ARABIC
3996             0xFEFF,   // FEFF..FF20; COMMON
3997             0xFF21,   // FF21..FF3A; LATIN
3998             0xFF3B,   // FF3B..FF40; COMMON
3999             0xFF41,   // FF41..FF5A; LATIN
4000             0xFF5B,   // FF5B..FF65; COMMON
4001             0xFF66,   // FF66..FF6F; KATAKANA
4002             0xFF70,   // FF70..FF70; COMMON
4003             0xFF71,   // FF71..FF9D; KATAKANA
4004             0xFF9E,   // FF9E..FF9F; COMMON
4005             0xFFA0,   // FFA0..FFDF; HANGUL
4006             0xFFE0,   // FFE0..FFFF; COMMON
4007             0x10000,  // 10000..100FF; LINEAR_B
4008             0x10100,  // 10100..1013F; COMMON
4009             0x10140,  // 10140..1018F; GREEK
4010             0x10190,  // 10190..101FC; COMMON
4011             0x101FD,  // 101FD..1027F; INHERITED
4012             0x10280,  // 10280..1029F; LYCIAN
4013             0x102A0,  // 102A0..102FF; CARIAN
4014             0x10300,  // 10300..1032F; OLD_ITALIC
4015             0x10330,  // 10330..1037F; GOTHIC
4016             0x10380,  // 10380..1039F; UGARITIC
4017             0x103A0,  // 103A0..103FF; OLD_PERSIAN
4018             0x10400,  // 10400..1044F; DESERET
4019             0x10450,  // 10450..1047F; SHAVIAN
4020             0x10480,  // 10480..107FF; OSMANYA
4021             0x10800,  // 10800..1083F; CYPRIOT
4022             0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
4023             0x10900,  // 10900..1091F; PHOENICIAN
4024             0x10920,  // 10920..1097F; LYDIAN
4025             0x10980,  // 10980..1099F; MEROITIC_HIEROGLYPHS
4026             0x109A0,  // 109A0..109FF; MEROITIC_CURSIVE
4027             0x10A00,  // 10A00..10A5F; KHAROSHTHI
4028             0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
4029             0x10B00,  // 10B00..10B3F; AVESTAN
4030             0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
4031             0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
4032             0x10C00,  // 10C00..10E5F; OLD_TURKIC
4033             0x10E60,  // 10E60..10FFF; ARABIC
4034             0x11000,  // 11000..1107F; BRAHMI
4035             0x11080,  // 11080..110CF; KAITHI
4036             0x110D0,  // 110D0..110FF; SORA_SOMPENG
4037             0x11100,  // 11100..1117F; CHAKMA
4038             0x11180,  // 11180..1167F; SHARADA
4039             0x11680,  // 11680..116CF; TAKRI
4040             0x12000,  // 12000..12FFF; CUNEIFORM
4041             0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
4042             0x16800,  // 16800..16A38; BAMUM
4043             0x16F00,  // 16F00..16F9F; MIAO
4044             0x1B000,  // 1B000..1B000; KATAKANA
4045             0x1B001,  // 1B001..1CFFF; HIRAGANA
4046             0x1D000,  // 1D000..1D166; COMMON
4047             0x1D167,  // 1D167..1D169; INHERITED
4048             0x1D16A,  // 1D16A..1D17A; COMMON
4049             0x1D17B,  // 1D17B..1D182; INHERITED
4050             0x1D183,  // 1D183..1D184; COMMON
4051             0x1D185,  // 1D185..1D18B; INHERITED
4052             0x1D18C,  // 1D18C..1D1A9; COMMON
4053             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
4054             0x1D1AE,  // 1D1AE..1D1FF; COMMON
4055             0x1D200,  // 1D200..1D2FF; GREEK
4056             0x1D300,  // 1D300..1EDFF; COMMON
4057             0x1EE00,  // 1EE00..1EFFF; ARABIC
4058             0x1F000,  // 1F000..1F1FF; COMMON
4059             0x1F200,  // 1F200..1F200; HIRAGANA
4060             0x1F201,  // 1F210..1FFFF; COMMON
4061             0x20000,  // 20000..E0000; HAN
4062             0xE0001,  // E0001..E00FF; COMMON
4063             0xE0100,  // E0100..E01EF; INHERITED
4064             0xE01F0   // E01F0..10FFFF; UNKNOWN
4065 
4066         };
4067 
4068         private static final UnicodeScript[] scripts = {
4069             COMMON,
4070             LATIN,
4071             COMMON,
4072             LATIN,
4073             COMMON,
4074             LATIN,
4075             COMMON,
4076             LATIN,
4077             COMMON,
4078             LATIN,
4079             COMMON,
4080             LATIN,
4081             COMMON,
4082             LATIN,
4083             COMMON,
4084             LATIN,
4085             COMMON,
4086             BOPOMOFO,
4087             COMMON,
4088             INHERITED,
4089             GREEK,
4090             COMMON,
4091             GREEK,
4092             COMMON,
4093             GREEK,
4094             COMMON,
4095             GREEK,
4096             COMMON,
4097             GREEK,
4098             COPTIC,
4099             GREEK,
4100             CYRILLIC,
4101             INHERITED,
4102             CYRILLIC,
4103             ARMENIAN,
4104             COMMON,
4105             ARMENIAN,
4106             HEBREW,
4107             ARABIC,
4108             COMMON,
4109             ARABIC,
4110             COMMON,
4111             ARABIC,
4112             COMMON,
4113             ARABIC,
4114             COMMON,
4115             ARABIC,
4116             INHERITED,
4117             ARABIC,
4118             COMMON,
4119             ARABIC,
4120             INHERITED,
4121             ARABIC,
4122             COMMON,
4123             ARABIC,
4124             SYRIAC,
4125             ARABIC,
4126             THAANA,
4127             NKO,
4128             SAMARITAN,
4129             MANDAIC,
4130             ARABIC,
4131             DEVANAGARI,
4132             INHERITED,
4133             DEVANAGARI,
4134             COMMON,
4135             DEVANAGARI,
4136             BENGALI,
4137             GURMUKHI,
4138             GUJARATI,
4139             ORIYA,
4140             TAMIL,
4141             TELUGU,
4142             KANNADA,
4143             MALAYALAM,
4144             SINHALA,
4145             THAI,
4146             COMMON,
4147             THAI,
4148             LAO,
4149             TIBETAN,
4150             COMMON,
4151             TIBETAN,
4152             MYANMAR,
4153             GEORGIAN,
4154             COMMON,
4155             GEORGIAN,
4156             HANGUL,
4157             ETHIOPIC,
4158             CHEROKEE,
4159             CANADIAN_ABORIGINAL,
4160             OGHAM,
4161             RUNIC,
4162             COMMON,
4163             RUNIC,
4164             TAGALOG,
4165             HANUNOO,
4166             COMMON,
4167             BUHID,
4168             TAGBANWA,
4169             KHMER,
4170             MONGOLIAN,
4171             COMMON,
4172             MONGOLIAN,
4173             COMMON,
4174             MONGOLIAN,
4175             CANADIAN_ABORIGINAL,
4176             LIMBU,
4177             TAI_LE,
4178             NEW_TAI_LUE,
4179             KHMER,
4180             BUGINESE,
4181             TAI_THAM,
4182             BALINESE,
4183             SUNDANESE,
4184             BATAK,
4185             LEPCHA,
4186             OL_CHIKI,
4187             SUNDANESE,
4188             INHERITED,
4189             COMMON,
4190             INHERITED,
4191             COMMON,
4192             INHERITED,
4193             COMMON,
4194             INHERITED,
4195             COMMON,
4196             INHERITED,
4197             COMMON,
4198             LATIN,
4199             GREEK,
4200             CYRILLIC,
4201             LATIN,
4202             GREEK,
4203             LATIN,
4204             GREEK,
4205             LATIN,
4206             CYRILLIC,
4207             LATIN,
4208             GREEK,
4209             INHERITED,
4210             LATIN,
4211             GREEK,
4212             COMMON,
4213             INHERITED,
4214             COMMON,
4215             LATIN,
4216             COMMON,
4217             LATIN,
4218             COMMON,
4219             LATIN,
4220             COMMON,
4221             INHERITED,
4222             COMMON,
4223             GREEK,
4224             COMMON,
4225             LATIN,
4226             COMMON,
4227             LATIN,
4228             COMMON,
4229             LATIN,
4230             COMMON,
4231             LATIN,
4232             COMMON,
4233             BRAILLE,
4234             COMMON,
4235             GLAGOLITIC,
4236             LATIN,
4237             COPTIC,
4238             GEORGIAN,
4239             TIFINAGH,
4240             ETHIOPIC,
4241             CYRILLIC,
4242             COMMON,
4243             HAN,
4244             COMMON,
4245             HAN,
4246             COMMON,
4247             HAN,
4248             COMMON,
4249             HAN,
4250             INHERITED,
4251             HANGUL,
4252             COMMON,
4253             HAN,
4254             COMMON,
4255             HIRAGANA,
4256             INHERITED,
4257             COMMON,
4258             HIRAGANA,
4259             COMMON,
4260             KATAKANA,
4261             COMMON,
4262             KATAKANA,
4263             BOPOMOFO,
4264             HANGUL,
4265             COMMON,
4266             BOPOMOFO,
4267             COMMON,
4268             KATAKANA,
4269             HANGUL,
4270             COMMON,
4271             HANGUL,
4272             COMMON,
4273             KATAKANA,
4274             COMMON,
4275             HAN,
4276             COMMON,
4277             HAN,
4278             YI,
4279             LISU,
4280             VAI,
4281             CYRILLIC,
4282             BAMUM,
4283             COMMON,
4284             LATIN,
4285             COMMON,
4286             LATIN,
4287             SYLOTI_NAGRI,
4288             COMMON,
4289             PHAGS_PA,
4290             SAURASHTRA,
4291             DEVANAGARI,
4292             KAYAH_LI,
4293             REJANG,
4294             HANGUL,
4295             JAVANESE,
4296             CHAM,
4297             MYANMAR,
4298             TAI_VIET,
4299             MEETEI_MAYEK,
4300             ETHIOPIC,
4301             MEETEI_MAYEK,
4302             HANGUL,
4303             UNKNOWN     ,
4304             HAN,
4305             LATIN,
4306             ARMENIAN,
4307             HEBREW,
4308             ARABIC,
4309             COMMON,
4310             ARABIC,
4311             COMMON,
4312             INHERITED,
4313             COMMON,
4314             INHERITED,
4315             COMMON,
4316             ARABIC,
4317             COMMON,
4318             LATIN,
4319             COMMON,
4320             LATIN,
4321             COMMON,
4322             KATAKANA,
4323             COMMON,
4324             KATAKANA,
4325             COMMON,
4326             HANGUL,
4327             COMMON,
4328             LINEAR_B,
4329             COMMON,
4330             GREEK,
4331             COMMON,
4332             INHERITED,
4333             LYCIAN,
4334             CARIAN,
4335             OLD_ITALIC,
4336             GOTHIC,
4337             UGARITIC,
4338             OLD_PERSIAN,
4339             DESERET,
4340             SHAVIAN,
4341             OSMANYA,
4342             CYPRIOT,
4343             IMPERIAL_ARAMAIC,
4344             PHOENICIAN,
4345             LYDIAN,
4346             MEROITIC_HIEROGLYPHS,
4347             MEROITIC_CURSIVE,
4348             KHAROSHTHI,
4349             OLD_SOUTH_ARABIAN,
4350             AVESTAN,
4351             INSCRIPTIONAL_PARTHIAN,
4352             INSCRIPTIONAL_PAHLAVI,
4353             OLD_TURKIC,
4354             ARABIC,
4355             BRAHMI,
4356             KAITHI,
4357             SORA_SOMPENG,
4358             CHAKMA,
4359             SHARADA,
4360             TAKRI,
4361             CUNEIFORM,
4362             EGYPTIAN_HIEROGLYPHS,
4363             BAMUM,
4364             MIAO,
4365             KATAKANA,
4366             HIRAGANA,
4367             COMMON,
4368             INHERITED,
4369             COMMON,
4370             INHERITED,
4371             COMMON,
4372             INHERITED,
4373             COMMON,
4374             INHERITED,
4375             COMMON,
4376             GREEK,
4377             COMMON,
4378             ARABIC,
4379             COMMON,
4380             HIRAGANA,
4381             COMMON,
4382             HAN,
4383             COMMON,
4384             INHERITED,
4385             UNKNOWN
4386         };
4387 
4388         private static HashMap<String, Character.UnicodeScript> aliases;
4389         static {
4390             aliases = new HashMap<>(128);
4391             aliases.put("ARAB", ARABIC);
4392             aliases.put("ARMI", IMPERIAL_ARAMAIC);
4393             aliases.put("ARMN", ARMENIAN);
4394             aliases.put("AVST", AVESTAN);
4395             aliases.put("BALI", BALINESE);
4396             aliases.put("BAMU", BAMUM);
4397             aliases.put("BATK", BATAK);
4398             aliases.put("BENG", BENGALI);
4399             aliases.put("BOPO", BOPOMOFO);
4400             aliases.put("BRAI", BRAILLE);
4401             aliases.put("BRAH", BRAHMI);
4402             aliases.put("BUGI", BUGINESE);
4403             aliases.put("BUHD", BUHID);
4404             aliases.put("CAKM", CHAKMA);
4405             aliases.put("CANS", CANADIAN_ABORIGINAL);
4406             aliases.put("CARI", CARIAN);
4407             aliases.put("CHAM", CHAM);
4408             aliases.put("CHER", CHEROKEE);
4409             aliases.put("COPT", COPTIC);
4410             aliases.put("CPRT", CYPRIOT);
4411             aliases.put("CYRL", CYRILLIC);
4412             aliases.put("DEVA", DEVANAGARI);
4413             aliases.put("DSRT", DESERET);
4414             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
4415             aliases.put("ETHI", ETHIOPIC);
4416             aliases.put("GEOR", GEORGIAN);
4417             aliases.put("GLAG", GLAGOLITIC);
4418             aliases.put("GOTH", GOTHIC);
4419             aliases.put("GREK", GREEK);
4420             aliases.put("GUJR", GUJARATI);
4421             aliases.put("GURU", GURMUKHI);
4422             aliases.put("HANG", HANGUL);
4423             aliases.put("HANI", HAN);
4424             aliases.put("HANO", HANUNOO);
4425             aliases.put("HEBR", HEBREW);
4426             aliases.put("HIRA", HIRAGANA);
4427             // it appears we don't have the KATAKANA_OR_HIRAGANA
4428             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4429             aliases.put("ITAL", OLD_ITALIC);
4430             aliases.put("JAVA", JAVANESE);
4431             aliases.put("KALI", KAYAH_LI);
4432             aliases.put("KANA", KATAKANA);
4433             aliases.put("KHAR", KHAROSHTHI);
4434             aliases.put("KHMR", KHMER);
4435             aliases.put("KNDA", KANNADA);
4436             aliases.put("KTHI", KAITHI);
4437             aliases.put("LANA", TAI_THAM);
4438             aliases.put("LAOO", LAO);
4439             aliases.put("LATN", LATIN);
4440             aliases.put("LEPC", LEPCHA);
4441             aliases.put("LIMB", LIMBU);
4442             aliases.put("LINB", LINEAR_B);
4443             aliases.put("LISU", LISU);
4444             aliases.put("LYCI", LYCIAN);
4445             aliases.put("LYDI", LYDIAN);
4446             aliases.put("MAND", MANDAIC);
4447             aliases.put("MERC", MEROITIC_CURSIVE);
4448             aliases.put("MERO", MEROITIC_HIEROGLYPHS);
4449             aliases.put("MLYM", MALAYALAM);
4450             aliases.put("MONG", MONGOLIAN);
4451             aliases.put("MTEI", MEETEI_MAYEK);
4452             aliases.put("MYMR", MYANMAR);
4453             aliases.put("NKOO", NKO);
4454             aliases.put("OGAM", OGHAM);
4455             aliases.put("OLCK", OL_CHIKI);
4456             aliases.put("ORKH", OLD_TURKIC);
4457             aliases.put("ORYA", ORIYA);
4458             aliases.put("OSMA", OSMANYA);
4459             aliases.put("PHAG", PHAGS_PA);
4460             aliases.put("PLRD", MIAO);
4461             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
4462             aliases.put("PHNX", PHOENICIAN);
4463             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4464             aliases.put("RJNG", REJANG);
4465             aliases.put("RUNR", RUNIC);
4466             aliases.put("SAMR", SAMARITAN);
4467             aliases.put("SARB", OLD_SOUTH_ARABIAN);
4468             aliases.put("SAUR", SAURASHTRA);
4469             aliases.put("SHAW", SHAVIAN);
4470             aliases.put("SHRD", SHARADA);
4471             aliases.put("SINH", SINHALA);
4472             aliases.put("SORA", SORA_SOMPENG);
4473             aliases.put("SUND", SUNDANESE);
4474             aliases.put("SYLO", SYLOTI_NAGRI);
4475             aliases.put("SYRC", SYRIAC);
4476             aliases.put("TAGB", TAGBANWA);
4477             aliases.put("TALE", TAI_LE);
4478             aliases.put("TAKR", TAKRI);
4479             aliases.put("TALU", NEW_TAI_LUE);
4480             aliases.put("TAML", TAMIL);
4481             aliases.put("TAVT", TAI_VIET);
4482             aliases.put("TELU", TELUGU);
4483             aliases.put("TFNG", TIFINAGH);
4484             aliases.put("TGLG", TAGALOG);
4485             aliases.put("THAA", THAANA);
4486             aliases.put("THAI", THAI);
4487             aliases.put("TIBT", TIBETAN);
4488             aliases.put("UGAR", UGARITIC);
4489             aliases.put("VAII", VAI);
4490             aliases.put("XPEO", OLD_PERSIAN);
4491             aliases.put("XSUX", CUNEIFORM);
4492             aliases.put("YIII", YI);
4493             aliases.put("ZINH", INHERITED);
4494             aliases.put("ZYYY", COMMON);
4495             aliases.put("ZZZZ", UNKNOWN);
4496         }
4497 
4498         /**
4499          * Returns the enum constant representing the Unicode script of which
4500          * the given character (Unicode code point) is assigned to.
4501          *
4502          * @param   codePoint the character (Unicode code point) in question.
4503          * @return  The {@code UnicodeScript} constant representing the
4504          *          Unicode script of which this character is assigned to.
4505          *
4506          * @exception IllegalArgumentException if the specified
4507          * {@code codePoint} is an invalid Unicode code point.
4508          * @see Character#isValidCodePoint(int)
4509          *
4510          */
of(int codePoint)4511         public static UnicodeScript of(int codePoint) {
4512             if (!isValidCodePoint(codePoint))
4513                 throw new IllegalArgumentException();
4514             int type = getType(codePoint);
4515             // leave SURROGATE and PRIVATE_USE for table lookup
4516             if (type == UNASSIGNED)
4517                 return UNKNOWN;
4518             int index = Arrays.binarySearch(scriptStarts, codePoint);
4519             if (index < 0)
4520                 index = -index - 2;
4521             return scripts[index];
4522         }
4523 
4524         /**
4525          * Returns the UnicodeScript constant with the given Unicode script
4526          * name or the script name alias. Script names and their aliases are
4527          * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
4528          * and PropertyValueAliases&lt;version&gt;.txt define script names
4529          * and the script name aliases for a particular version of the
4530          * standard. The {@link Character} class specifies the version of
4531          * the standard that it supports.
4532          * <p>
4533          * Character case is ignored for all of the valid script names.
4534          * The en_US locale's case mapping rules are used to provide
4535          * case-insensitive string comparisons for script name validation.
4536          * <p>
4537          *
4538          * @param scriptName A {@code UnicodeScript} name.
4539          * @return The {@code UnicodeScript} constant identified
4540          *         by {@code scriptName}
4541          * @throws IllegalArgumentException if {@code scriptName} is an
4542          *         invalid name
4543          * @throws NullPointerException if {@code scriptName} is null
4544          */
forName(String scriptName)4545         public static final UnicodeScript forName(String scriptName) {
4546             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4547                                  //.replace(' ', '_'));
4548             UnicodeScript sc = aliases.get(scriptName);
4549             if (sc != null)
4550                 return sc;
4551             return valueOf(scriptName);
4552         }
4553     }
4554 
4555     /**
4556      * The value of the {@code Character}.
4557      *
4558      * @serial
4559      */
4560     private final char value;
4561 
4562     /** use serialVersionUID from JDK 1.0.2 for interoperability */
4563     private static final long serialVersionUID = 3786198910865385080L;
4564 
4565     /**
4566      * Constructs a newly allocated {@code Character} object that
4567      * represents the specified {@code char} value.
4568      *
4569      * @param  value   the value to be represented by the
4570      *                  {@code Character} object.
4571      */
Character(char value)4572     public Character(char value) {
4573         this.value = value;
4574     }
4575 
4576     private static class CharacterCache {
CharacterCache()4577         private CharacterCache(){}
4578 
4579         static final Character cache[] = new Character[127 + 1];
4580 
4581         static {
4582             for (int i = 0; i < cache.length; i++)
4583                 cache[i] = new Character((char)i);
4584         }
4585     }
4586 
4587     /**
4588      * Returns a <tt>Character</tt> instance representing the specified
4589      * <tt>char</tt> value.
4590      * If a new <tt>Character</tt> instance is not required, this method
4591      * should generally be used in preference to the constructor
4592      * {@link #Character(char)}, as this method is likely to yield
4593      * significantly better space and time performance by caching
4594      * frequently requested values.
4595      *
4596      * This method will always cache values in the range {@code
4597      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
4598      * cache other values outside of this range.
4599      *
4600      * @param  c a char value.
4601      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
4602      * @since  1.5
4603      */
valueOf(char c)4604     public static Character valueOf(char c) {
4605         if (c <= 127) { // must cache
4606             return CharacterCache.cache[(int)c];
4607         }
4608         return new Character(c);
4609     }
4610 
4611     /**
4612      * Returns the value of this {@code Character} object.
4613      * @return  the primitive {@code char} value represented by
4614      *          this object.
4615      */
charValue()4616     public char charValue() {
4617         return value;
4618     }
4619 
4620     /**
4621      * Returns a hash code for this {@code Character}; equal to the result
4622      * of invoking {@code charValue()}.
4623      *
4624      * @return a hash code value for this {@code Character}
4625      */
4626     @Override
hashCode()4627     public int hashCode() {
4628         return Character.hashCode(value);
4629     }
4630 
4631     /**
4632      * Returns a hash code for a {@code char} value; compatible with
4633      * {@code Character.hashCode()}.
4634      *
4635      * @since 1.8
4636      *
4637      * @param value The {@code char} for which to return a hash code.
4638      * @return a hash code value for a {@code char} value.
4639      */
hashCode(char value)4640     public static int hashCode(char value) {
4641         return (int)value;
4642     }
4643 
4644     /**
4645      * Compares this object against the specified object.
4646      * The result is {@code true} if and only if the argument is not
4647      * {@code null} and is a {@code Character} object that
4648      * represents the same {@code char} value as this object.
4649      *
4650      * @param   obj   the object to compare with.
4651      * @return  {@code true} if the objects are the same;
4652      *          {@code false} otherwise.
4653      */
equals(Object obj)4654     public boolean equals(Object obj) {
4655         if (obj instanceof Character) {
4656             return value == ((Character)obj).charValue();
4657         }
4658         return false;
4659     }
4660 
4661     /**
4662      * Returns a {@code String} object representing this
4663      * {@code Character}'s value.  The result is a string of
4664      * length 1 whose sole component is the primitive
4665      * {@code char} value represented by this
4666      * {@code Character} object.
4667      *
4668      * @return  a string representation of this object.
4669      */
toString()4670     public String toString() {
4671         char buf[] = {value};
4672         return String.valueOf(buf);
4673     }
4674 
4675     /**
4676      * Returns a {@code String} object representing the
4677      * specified {@code char}.  The result is a string of length
4678      * 1 consisting solely of the specified {@code char}.
4679      *
4680      * @param c the {@code char} to be converted
4681      * @return the string representation of the specified {@code char}
4682      * @since 1.4
4683      */
toString(char c)4684     public static String toString(char c) {
4685         return String.valueOf(c);
4686     }
4687 
4688     /**
4689      * Determines whether the specified code point is a valid
4690      * <a href="http://www.unicode.org/glossary/#code_point">
4691      * Unicode code point value</a>.
4692      *
4693      * @param  codePoint the Unicode code point to be tested
4694      * @return {@code true} if the specified code point value is between
4695      *         {@link #MIN_CODE_POINT} and
4696      *         {@link #MAX_CODE_POINT} inclusive;
4697      *         {@code false} otherwise.
4698      * @since  1.5
4699      */
isValidCodePoint(int codePoint)4700     public static boolean isValidCodePoint(int codePoint) {
4701         // Optimized form of:
4702         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
4703         int plane = codePoint >>> 16;
4704         return plane < ((MAX_CODE_POINT + 1) >>> 16);
4705     }
4706 
4707     /**
4708      * Determines whether the specified character (Unicode code point)
4709      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
4710      * Such code points can be represented using a single {@code char}.
4711      *
4712      * @param  codePoint the character (Unicode code point) to be tested
4713      * @return {@code true} if the specified code point is between
4714      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
4715      *         {@code false} otherwise.
4716      * @since  1.7
4717      */
isBmpCodePoint(int codePoint)4718     public static boolean isBmpCodePoint(int codePoint) {
4719         return codePoint >>> 16 == 0;
4720         // Optimized form of:
4721         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
4722         // We consistently use logical shift (>>>) to facilitate
4723         // additional runtime optimizations.
4724     }
4725 
4726     /**
4727      * Determines whether the specified character (Unicode code point)
4728      * is in the <a href="#supplementary">supplementary character</a> range.
4729      *
4730      * @param  codePoint the character (Unicode code point) to be tested
4731      * @return {@code true} if the specified code point is between
4732      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
4733      *         {@link #MAX_CODE_POINT} inclusive;
4734      *         {@code false} otherwise.
4735      * @since  1.5
4736      */
isSupplementaryCodePoint(int codePoint)4737     public static boolean isSupplementaryCodePoint(int codePoint) {
4738         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4739             && codePoint <  MAX_CODE_POINT + 1;
4740     }
4741 
4742     /**
4743      * Determines if the given {@code char} value is a
4744      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4745      * Unicode high-surrogate code unit</a>
4746      * (also known as <i>leading-surrogate code unit</i>).
4747      *
4748      * <p>Such values do not represent characters by themselves,
4749      * but are used in the representation of
4750      * <a href="#supplementary">supplementary characters</a>
4751      * in the UTF-16 encoding.
4752      *
4753      * @param  ch the {@code char} value to be tested.
4754      * @return {@code true} if the {@code char} value is between
4755      *         {@link #MIN_HIGH_SURROGATE} and
4756      *         {@link #MAX_HIGH_SURROGATE} inclusive;
4757      *         {@code false} otherwise.
4758      * @see    Character#isLowSurrogate(char)
4759      * @see    Character.UnicodeBlock#of(int)
4760      * @since  1.5
4761      */
isHighSurrogate(char ch)4762     public static boolean isHighSurrogate(char ch) {
4763         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
4764         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
4765     }
4766 
4767     /**
4768      * Determines if the given {@code char} value is a
4769      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4770      * Unicode low-surrogate code unit</a>
4771      * (also known as <i>trailing-surrogate code unit</i>).
4772      *
4773      * <p>Such values do not represent characters by themselves,
4774      * but are used in the representation of
4775      * <a href="#supplementary">supplementary characters</a>
4776      * in the UTF-16 encoding.
4777      *
4778      * @param  ch the {@code char} value to be tested.
4779      * @return {@code true} if the {@code char} value is between
4780      *         {@link #MIN_LOW_SURROGATE} and
4781      *         {@link #MAX_LOW_SURROGATE} inclusive;
4782      *         {@code false} otherwise.
4783      * @see    Character#isHighSurrogate(char)
4784      * @since  1.5
4785      */
isLowSurrogate(char ch)4786     public static boolean isLowSurrogate(char ch) {
4787         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
4788     }
4789 
4790     /**
4791      * Determines if the given {@code char} value is a Unicode
4792      * <i>surrogate code unit</i>.
4793      *
4794      * <p>Such values do not represent characters by themselves,
4795      * but are used in the representation of
4796      * <a href="#supplementary">supplementary characters</a>
4797      * in the UTF-16 encoding.
4798      *
4799      * <p>A char value is a surrogate code unit if and only if it is either
4800      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
4801      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
4802      *
4803      * @param  ch the {@code char} value to be tested.
4804      * @return {@code true} if the {@code char} value is between
4805      *         {@link #MIN_SURROGATE} and
4806      *         {@link #MAX_SURROGATE} inclusive;
4807      *         {@code false} otherwise.
4808      * @since  1.7
4809      */
isSurrogate(char ch)4810     public static boolean isSurrogate(char ch) {
4811         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
4812     }
4813 
4814     /**
4815      * Determines whether the specified pair of {@code char}
4816      * values is a valid
4817      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4818      * Unicode surrogate pair</a>.
4819 
4820      * <p>This method is equivalent to the expression:
4821      * <blockquote><pre>{@code
4822      * isHighSurrogate(high) && isLowSurrogate(low)
4823      * }</pre></blockquote>
4824      *
4825      * @param  high the high-surrogate code value to be tested
4826      * @param  low the low-surrogate code value to be tested
4827      * @return {@code true} if the specified high and
4828      * low-surrogate code values represent a valid surrogate pair;
4829      * {@code false} otherwise.
4830      * @since  1.5
4831      */
isSurrogatePair(char high, char low)4832     public static boolean isSurrogatePair(char high, char low) {
4833         return isHighSurrogate(high) && isLowSurrogate(low);
4834     }
4835 
4836     /**
4837      * Determines the number of {@code char} values needed to
4838      * represent the specified character (Unicode code point). If the
4839      * specified character is equal to or greater than 0x10000, then
4840      * the method returns 2. Otherwise, the method returns 1.
4841      *
4842      * <p>This method doesn't validate the specified character to be a
4843      * valid Unicode code point. The caller must validate the
4844      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
4845      * if necessary.
4846      *
4847      * @param   codePoint the character (Unicode code point) to be tested.
4848      * @return  2 if the character is a valid supplementary character; 1 otherwise.
4849      * @see     Character#isSupplementaryCodePoint(int)
4850      * @since   1.5
4851      */
charCount(int codePoint)4852     public static int charCount(int codePoint) {
4853         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
4854     }
4855 
4856     /**
4857      * Converts the specified surrogate pair to its supplementary code
4858      * point value. This method does not validate the specified
4859      * surrogate pair. The caller must validate it using {@link
4860      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
4861      *
4862      * @param  high the high-surrogate code unit
4863      * @param  low the low-surrogate code unit
4864      * @return the supplementary code point composed from the
4865      *         specified surrogate pair.
4866      * @since  1.5
4867      */
toCodePoint(char high, char low)4868     public static int toCodePoint(char high, char low) {
4869         // Optimized form of:
4870         // return ((high - MIN_HIGH_SURROGATE) << 10)
4871         //         + (low - MIN_LOW_SURROGATE)
4872         //         + MIN_SUPPLEMENTARY_CODE_POINT;
4873         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
4874                                        - (MIN_HIGH_SURROGATE << 10)
4875                                        - MIN_LOW_SURROGATE);
4876     }
4877 
4878     /**
4879      * Returns the code point at the given index of the
4880      * {@code CharSequence}. If the {@code char} value at
4881      * the given index in the {@code CharSequence} is in the
4882      * high-surrogate range, the following index is less than the
4883      * length of the {@code CharSequence}, and the
4884      * {@code char} value at the following index is in the
4885      * low-surrogate range, then the supplementary code point
4886      * corresponding to this surrogate pair is returned. Otherwise,
4887      * the {@code char} value at the given index is returned.
4888      *
4889      * @param seq a sequence of {@code char} values (Unicode code
4890      * units)
4891      * @param index the index to the {@code char} values (Unicode
4892      * code units) in {@code seq} to be converted
4893      * @return the Unicode code point at the given index
4894      * @exception NullPointerException if {@code seq} is null.
4895      * @exception IndexOutOfBoundsException if the value
4896      * {@code index} is negative or not less than
4897      * {@link CharSequence#length() seq.length()}.
4898      * @since  1.5
4899      */
codePointAt(CharSequence seq, int index)4900     public static int codePointAt(CharSequence seq, int index) {
4901         char c1 = seq.charAt(index);
4902         if (isHighSurrogate(c1) && ++index < seq.length()) {
4903             char c2 = seq.charAt(index);
4904             if (isLowSurrogate(c2)) {
4905                 return toCodePoint(c1, c2);
4906             }
4907         }
4908         return c1;
4909     }
4910 
4911     /**
4912      * Returns the code point at the given index of the
4913      * {@code char} array. If the {@code char} value at
4914      * the given index in the {@code char} array is in the
4915      * high-surrogate range, the following index is less than the
4916      * length of the {@code char} array, and the
4917      * {@code char} value at the following index is in the
4918      * low-surrogate range, then the supplementary code point
4919      * corresponding to this surrogate pair is returned. Otherwise,
4920      * the {@code char} value at the given index is returned.
4921      *
4922      * @param a the {@code char} array
4923      * @param index the index to the {@code char} values (Unicode
4924      * code units) in the {@code char} array to be converted
4925      * @return the Unicode code point at the given index
4926      * @exception NullPointerException if {@code a} is null.
4927      * @exception IndexOutOfBoundsException if the value
4928      * {@code index} is negative or not less than
4929      * the length of the {@code char} array.
4930      * @since  1.5
4931      */
codePointAt(char[] a, int index)4932     public static int codePointAt(char[] a, int index) {
4933         return codePointAtImpl(a, index, a.length);
4934     }
4935 
4936     /**
4937      * Returns the code point at the given index of the
4938      * {@code char} array, where only array elements with
4939      * {@code index} less than {@code limit} can be used. If
4940      * the {@code char} value at the given index in the
4941      * {@code char} array is in the high-surrogate range, the
4942      * following index is less than the {@code limit}, and the
4943      * {@code char} value at the following index is in the
4944      * low-surrogate range, then the supplementary code point
4945      * corresponding to this surrogate pair is returned. Otherwise,
4946      * the {@code char} value at the given index is returned.
4947      *
4948      * @param a the {@code char} array
4949      * @param index the index to the {@code char} values (Unicode
4950      * code units) in the {@code char} array to be converted
4951      * @param limit the index after the last array element that
4952      * can be used in the {@code char} array
4953      * @return the Unicode code point at the given index
4954      * @exception NullPointerException if {@code a} is null.
4955      * @exception IndexOutOfBoundsException if the {@code index}
4956      * argument is negative or not less than the {@code limit}
4957      * argument, or if the {@code limit} argument is negative or
4958      * greater than the length of the {@code char} array.
4959      * @since  1.5
4960      */
codePointAt(char[] a, int index, int limit)4961     public static int codePointAt(char[] a, int index, int limit) {
4962         if (index >= limit || limit < 0 || limit > a.length) {
4963             throw new IndexOutOfBoundsException();
4964         }
4965         return codePointAtImpl(a, index, limit);
4966     }
4967 
4968     // throws ArrayIndexOutOfBoundsException if index out of bounds
codePointAtImpl(char[] a, int index, int limit)4969     static int codePointAtImpl(char[] a, int index, int limit) {
4970         char c1 = a[index];
4971         if (isHighSurrogate(c1) && ++index < limit) {
4972             char c2 = a[index];
4973             if (isLowSurrogate(c2)) {
4974                 return toCodePoint(c1, c2);
4975             }
4976         }
4977         return c1;
4978     }
4979 
4980     /**
4981      * Returns the code point preceding the given index of the
4982      * {@code CharSequence}. If the {@code char} value at
4983      * {@code (index - 1)} in the {@code CharSequence} is in
4984      * the low-surrogate range, {@code (index - 2)} is not
4985      * negative, and the {@code char} value at {@code (index - 2)}
4986      * in the {@code CharSequence} is in the
4987      * high-surrogate range, then the supplementary code point
4988      * corresponding to this surrogate pair is returned. Otherwise,
4989      * the {@code char} value at {@code (index - 1)} is
4990      * returned.
4991      *
4992      * @param seq the {@code CharSequence} instance
4993      * @param index the index following the code point that should be returned
4994      * @return the Unicode code point value before the given index.
4995      * @exception NullPointerException if {@code seq} is null.
4996      * @exception IndexOutOfBoundsException if the {@code index}
4997      * argument is less than 1 or greater than {@link
4998      * CharSequence#length() seq.length()}.
4999      * @since  1.5
5000      */
codePointBefore(CharSequence seq, int index)5001     public static int codePointBefore(CharSequence seq, int index) {
5002         char c2 = seq.charAt(--index);
5003         if (isLowSurrogate(c2) && index > 0) {
5004             char c1 = seq.charAt(--index);
5005             if (isHighSurrogate(c1)) {
5006                 return toCodePoint(c1, c2);
5007             }
5008         }
5009         return c2;
5010     }
5011 
5012     /**
5013      * Returns the code point preceding the given index of the
5014      * {@code char} array. If the {@code char} value at
5015      * {@code (index - 1)} in the {@code char} array is in
5016      * the low-surrogate range, {@code (index - 2)} is not
5017      * negative, and the {@code char} value at {@code (index - 2)}
5018      * in the {@code char} array is in the
5019      * high-surrogate range, then the supplementary code point
5020      * corresponding to this surrogate pair is returned. Otherwise,
5021      * the {@code char} value at {@code (index - 1)} is
5022      * returned.
5023      *
5024      * @param a the {@code char} array
5025      * @param index the index following the code point that should be returned
5026      * @return the Unicode code point value before the given index.
5027      * @exception NullPointerException if {@code a} is null.
5028      * @exception IndexOutOfBoundsException if the {@code index}
5029      * argument is less than 1 or greater than the length of the
5030      * {@code char} array
5031      * @since  1.5
5032      */
codePointBefore(char[] a, int index)5033     public static int codePointBefore(char[] a, int index) {
5034         return codePointBeforeImpl(a, index, 0);
5035     }
5036 
5037     /**
5038      * Returns the code point preceding the given index of the
5039      * {@code char} array, where only array elements with
5040      * {@code index} greater than or equal to {@code start}
5041      * can be used. If the {@code char} value at {@code (index - 1)}
5042      * in the {@code char} array is in the
5043      * low-surrogate range, {@code (index - 2)} is not less than
5044      * {@code start}, and the {@code char} value at
5045      * {@code (index - 2)} in the {@code char} array is in
5046      * the high-surrogate range, then the supplementary code point
5047      * corresponding to this surrogate pair is returned. Otherwise,
5048      * the {@code char} value at {@code (index - 1)} is
5049      * returned.
5050      *
5051      * @param a the {@code char} array
5052      * @param index the index following the code point that should be returned
5053      * @param start the index of the first array element in the
5054      * {@code char} array
5055      * @return the Unicode code point value before the given index.
5056      * @exception NullPointerException if {@code a} is null.
5057      * @exception IndexOutOfBoundsException if the {@code index}
5058      * argument is not greater than the {@code start} argument or
5059      * is greater than the length of the {@code char} array, or
5060      * if the {@code start} argument is negative or not less than
5061      * the length of the {@code char} array.
5062      * @since  1.5
5063      */
codePointBefore(char[] a, int index, int start)5064     public static int codePointBefore(char[] a, int index, int start) {
5065         if (index <= start || start < 0 || start >= a.length) {
5066             throw new IndexOutOfBoundsException();
5067         }
5068         return codePointBeforeImpl(a, index, start);
5069     }
5070 
5071     // throws ArrayIndexOutOfBoundsException if index-1 out of bounds
codePointBeforeImpl(char[] a, int index, int start)5072     static int codePointBeforeImpl(char[] a, int index, int start) {
5073         char c2 = a[--index];
5074         if (isLowSurrogate(c2) && index > start) {
5075             char c1 = a[--index];
5076             if (isHighSurrogate(c1)) {
5077                 return toCodePoint(c1, c2);
5078             }
5079         }
5080         return c2;
5081     }
5082 
5083     /**
5084      * Returns the leading surrogate (a
5085      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
5086      * high surrogate code unit</a>) of the
5087      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5088      * surrogate pair</a>
5089      * representing the specified supplementary character (Unicode
5090      * code point) in the UTF-16 encoding.  If the specified character
5091      * is not a
5092      * <a href="Character.html#supplementary">supplementary character</a>,
5093      * an unspecified {@code char} is returned.
5094      *
5095      * <p>If
5096      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5097      * is {@code true}, then
5098      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
5099      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
5100      * are also always {@code true}.
5101      *
5102      * @param   codePoint a supplementary character (Unicode code point)
5103      * @return  the leading surrogate code unit used to represent the
5104      *          character in the UTF-16 encoding
5105      * @since   1.7
5106      */
highSurrogate(int codePoint)5107     public static char highSurrogate(int codePoint) {
5108         return (char) ((codePoint >>> 10)
5109             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
5110     }
5111 
5112     /**
5113      * Returns the trailing surrogate (a
5114      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
5115      * low surrogate code unit</a>) of the
5116      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
5117      * surrogate pair</a>
5118      * representing the specified supplementary character (Unicode
5119      * code point) in the UTF-16 encoding.  If the specified character
5120      * is not a
5121      * <a href="Character.html#supplementary">supplementary character</a>,
5122      * an unspecified {@code char} is returned.
5123      *
5124      * <p>If
5125      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
5126      * is {@code true}, then
5127      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
5128      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
5129      * are also always {@code true}.
5130      *
5131      * @param   codePoint a supplementary character (Unicode code point)
5132      * @return  the trailing surrogate code unit used to represent the
5133      *          character in the UTF-16 encoding
5134      * @since   1.7
5135      */
lowSurrogate(int codePoint)5136     public static char lowSurrogate(int codePoint) {
5137         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
5138     }
5139 
5140     /**
5141      * Converts the specified character (Unicode code point) to its
5142      * UTF-16 representation. If the specified code point is a BMP
5143      * (Basic Multilingual Plane or Plane 0) value, the same value is
5144      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
5145      * specified code point is a supplementary character, its
5146      * surrogate values are stored in {@code dst[dstIndex]}
5147      * (high-surrogate) and {@code dst[dstIndex+1]}
5148      * (low-surrogate), and 2 is returned.
5149      *
5150      * @param  codePoint the character (Unicode code point) to be converted.
5151      * @param  dst an array of {@code char} in which the
5152      * {@code codePoint}'s UTF-16 value is stored.
5153      * @param dstIndex the start index into the {@code dst}
5154      * array where the converted value is stored.
5155      * @return 1 if the code point is a BMP code point, 2 if the
5156      * code point is a supplementary code point.
5157      * @exception IllegalArgumentException if the specified
5158      * {@code codePoint} is not a valid Unicode code point.
5159      * @exception NullPointerException if the specified {@code dst} is null.
5160      * @exception IndexOutOfBoundsException if {@code dstIndex}
5161      * is negative or not less than {@code dst.length}, or if
5162      * {@code dst} at {@code dstIndex} doesn't have enough
5163      * array element(s) to store the resulting {@code char}
5164      * value(s). (If {@code dstIndex} is equal to
5165      * {@code dst.length-1} and the specified
5166      * {@code codePoint} is a supplementary character, the
5167      * high-surrogate value is not stored in
5168      * {@code dst[dstIndex]}.)
5169      * @since  1.5
5170      */
toChars(int codePoint, char[] dst, int dstIndex)5171     public static int toChars(int codePoint, char[] dst, int dstIndex) {
5172         if (isBmpCodePoint(codePoint)) {
5173             dst[dstIndex] = (char) codePoint;
5174             return 1;
5175         } else if (isValidCodePoint(codePoint)) {
5176             toSurrogates(codePoint, dst, dstIndex);
5177             return 2;
5178         } else {
5179             throw new IllegalArgumentException();
5180         }
5181     }
5182 
5183     /**
5184      * Converts the specified character (Unicode code point) to its
5185      * UTF-16 representation stored in a {@code char} array. If
5186      * the specified code point is a BMP (Basic Multilingual Plane or
5187      * Plane 0) value, the resulting {@code char} array has
5188      * the same value as {@code codePoint}. If the specified code
5189      * point is a supplementary code point, the resulting
5190      * {@code char} array has the corresponding surrogate pair.
5191      *
5192      * @param  codePoint a Unicode code point
5193      * @return a {@code char} array having
5194      *         {@code codePoint}'s UTF-16 representation.
5195      * @exception IllegalArgumentException if the specified
5196      * {@code codePoint} is not a valid Unicode code point.
5197      * @since  1.5
5198      */
toChars(int codePoint)5199     public static char[] toChars(int codePoint) {
5200         if (isBmpCodePoint(codePoint)) {
5201             return new char[] { (char) codePoint };
5202         } else if (isValidCodePoint(codePoint)) {
5203             char[] result = new char[2];
5204             toSurrogates(codePoint, result, 0);
5205             return result;
5206         } else {
5207             throw new IllegalArgumentException();
5208         }
5209     }
5210 
toSurrogates(int codePoint, char[] dst, int index)5211     static void toSurrogates(int codePoint, char[] dst, int index) {
5212         // We write elements "backwards" to guarantee all-or-nothing
5213         dst[index+1] = lowSurrogate(codePoint);
5214         dst[index] = highSurrogate(codePoint);
5215     }
5216 
5217     /**
5218      * Returns the number of Unicode code points in the text range of
5219      * the specified char sequence. The text range begins at the
5220      * specified {@code beginIndex} and extends to the
5221      * {@code char} at index {@code endIndex - 1}. Thus the
5222      * length (in {@code char}s) of the text range is
5223      * {@code endIndex-beginIndex}. Unpaired surrogates within
5224      * the text range count as one code point each.
5225      *
5226      * @param seq the char sequence
5227      * @param beginIndex the index to the first {@code char} of
5228      * the text range.
5229      * @param endIndex the index after the last {@code char} of
5230      * the text range.
5231      * @return the number of Unicode code points in the specified text
5232      * range
5233      * @exception NullPointerException if {@code seq} is null.
5234      * @exception IndexOutOfBoundsException if the
5235      * {@code beginIndex} is negative, or {@code endIndex}
5236      * is larger than the length of the given sequence, or
5237      * {@code beginIndex} is larger than {@code endIndex}.
5238      * @since  1.5
5239      */
codePointCount(CharSequence seq, int beginIndex, int endIndex)5240     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
5241         int length = seq.length();
5242         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
5243             throw new IndexOutOfBoundsException();
5244         }
5245         int n = endIndex - beginIndex;
5246         for (int i = beginIndex; i < endIndex; ) {
5247             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
5248                 isLowSurrogate(seq.charAt(i))) {
5249                 n--;
5250                 i++;
5251             }
5252         }
5253         return n;
5254     }
5255 
5256     /**
5257      * Returns the number of Unicode code points in a subarray of the
5258      * {@code char} array argument. The {@code offset}
5259      * argument is the index of the first {@code char} of the
5260      * subarray and the {@code count} argument specifies the
5261      * length of the subarray in {@code char}s. Unpaired
5262      * surrogates within the subarray count as one code point each.
5263      *
5264      * @param a the {@code char} array
5265      * @param offset the index of the first {@code char} in the
5266      * given {@code char} array
5267      * @param count the length of the subarray in {@code char}s
5268      * @return the number of Unicode code points in the specified subarray
5269      * @exception NullPointerException if {@code a} is null.
5270      * @exception IndexOutOfBoundsException if {@code offset} or
5271      * {@code count} is negative, or if {@code offset +
5272      * count} is larger than the length of the given array.
5273      * @since  1.5
5274      */
codePointCount(char[] a, int offset, int count)5275     public static int codePointCount(char[] a, int offset, int count) {
5276         if (count > a.length - offset || offset < 0 || count < 0) {
5277             throw new IndexOutOfBoundsException();
5278         }
5279         return codePointCountImpl(a, offset, count);
5280     }
5281 
codePointCountImpl(char[] a, int offset, int count)5282     static int codePointCountImpl(char[] a, int offset, int count) {
5283         int endIndex = offset + count;
5284         int n = count;
5285         for (int i = offset; i < endIndex; ) {
5286             if (isHighSurrogate(a[i++]) && i < endIndex &&
5287                 isLowSurrogate(a[i])) {
5288                 n--;
5289                 i++;
5290             }
5291         }
5292         return n;
5293     }
5294 
5295     /**
5296      * Returns the index within the given char sequence that is offset
5297      * from the given {@code index} by {@code codePointOffset}
5298      * code points. Unpaired surrogates within the text range given by
5299      * {@code index} and {@code codePointOffset} count as
5300      * one code point each.
5301      *
5302      * @param seq the char sequence
5303      * @param index the index to be offset
5304      * @param codePointOffset the offset in code points
5305      * @return the index within the char sequence
5306      * @exception NullPointerException if {@code seq} is null.
5307      * @exception IndexOutOfBoundsException if {@code index}
5308      *   is negative or larger then the length of the char sequence,
5309      *   or if {@code codePointOffset} is positive and the
5310      *   subsequence starting with {@code index} has fewer than
5311      *   {@code codePointOffset} code points, or if
5312      *   {@code codePointOffset} is negative and the subsequence
5313      *   before {@code index} has fewer than the absolute value
5314      *   of {@code codePointOffset} code points.
5315      * @since 1.5
5316      */
offsetByCodePoints(CharSequence seq, int index, int codePointOffset)5317     public static int offsetByCodePoints(CharSequence seq, int index,
5318                                          int codePointOffset) {
5319         int length = seq.length();
5320         if (index < 0 || index > length) {
5321             throw new IndexOutOfBoundsException();
5322         }
5323 
5324         int x = index;
5325         if (codePointOffset >= 0) {
5326             int i;
5327             for (i = 0; x < length && i < codePointOffset; i++) {
5328                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
5329                     isLowSurrogate(seq.charAt(x))) {
5330                     x++;
5331                 }
5332             }
5333             if (i < codePointOffset) {
5334                 throw new IndexOutOfBoundsException();
5335             }
5336         } else {
5337             int i;
5338             for (i = codePointOffset; x > 0 && i < 0; i++) {
5339                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
5340                     isHighSurrogate(seq.charAt(x-1))) {
5341                     x--;
5342                 }
5343             }
5344             if (i < 0) {
5345                 throw new IndexOutOfBoundsException();
5346             }
5347         }
5348         return x;
5349     }
5350 
5351     /**
5352      * Returns the index within the given {@code char} subarray
5353      * that is offset from the given {@code index} by
5354      * {@code codePointOffset} code points. The
5355      * {@code start} and {@code count} arguments specify a
5356      * subarray of the {@code char} array. Unpaired surrogates
5357      * within the text range given by {@code index} and
5358      * {@code codePointOffset} count as one code point each.
5359      *
5360      * @param a the {@code char} array
5361      * @param start the index of the first {@code char} of the
5362      * subarray
5363      * @param count the length of the subarray in {@code char}s
5364      * @param index the index to be offset
5365      * @param codePointOffset the offset in code points
5366      * @return the index within the subarray
5367      * @exception NullPointerException if {@code a} is null.
5368      * @exception IndexOutOfBoundsException
5369      *   if {@code start} or {@code count} is negative,
5370      *   or if {@code start + count} is larger than the length of
5371      *   the given array,
5372      *   or if {@code index} is less than {@code start} or
5373      *   larger then {@code start + count},
5374      *   or if {@code codePointOffset} is positive and the text range
5375      *   starting with {@code index} and ending with {@code start + count - 1}
5376      *   has fewer than {@code codePointOffset} code
5377      *   points,
5378      *   or if {@code codePointOffset} is negative and the text range
5379      *   starting with {@code start} and ending with {@code index - 1}
5380      *   has fewer than the absolute value of
5381      *   {@code codePointOffset} code points.
5382      * @since 1.5
5383      */
offsetByCodePoints(char[] a, int start, int count, int index, int codePointOffset)5384     public static int offsetByCodePoints(char[] a, int start, int count,
5385                                          int index, int codePointOffset) {
5386         if (count > a.length-start || start < 0 || count < 0
5387             || index < start || index > start+count) {
5388             throw new IndexOutOfBoundsException();
5389         }
5390         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
5391     }
5392 
offsetByCodePointsImpl(char[]a, int start, int count, int index, int codePointOffset)5393     static int offsetByCodePointsImpl(char[]a, int start, int count,
5394                                       int index, int codePointOffset) {
5395         int x = index;
5396         if (codePointOffset >= 0) {
5397             int limit = start + count;
5398             int i;
5399             for (i = 0; x < limit && i < codePointOffset; i++) {
5400                 if (isHighSurrogate(a[x++]) && x < limit &&
5401                     isLowSurrogate(a[x])) {
5402                     x++;
5403                 }
5404             }
5405             if (i < codePointOffset) {
5406                 throw new IndexOutOfBoundsException();
5407             }
5408         } else {
5409             int i;
5410             for (i = codePointOffset; x > start && i < 0; i++) {
5411                 if (isLowSurrogate(a[--x]) && x > start &&
5412                     isHighSurrogate(a[x-1])) {
5413                     x--;
5414                 }
5415             }
5416             if (i < 0) {
5417                 throw new IndexOutOfBoundsException();
5418             }
5419         }
5420         return x;
5421     }
5422 
5423     /**
5424      * Determines if the specified character is a lowercase character.
5425      * <p>
5426      * A character is lowercase if its general category type, provided
5427      * by {@code Character.getType(ch)}, is
5428      * {@code LOWERCASE_LETTER}, or it has contributory property
5429      * Other_Lowercase as defined by the Unicode Standard.
5430      * <p>
5431      * The following are examples of lowercase characters:
5432      * <blockquote><pre>
5433      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5434      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5435      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5436      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5437      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5438      * </pre></blockquote>
5439      * <p> Many other Unicode characters are lowercase too.
5440      *
5441      * <p><b>Note:</b> This method cannot handle <a
5442      * href="#supplementary"> supplementary characters</a>. To support
5443      * all Unicode characters, including supplementary characters, use
5444      * the {@link #isLowerCase(int)} method.
5445      *
5446      * @param   ch   the character to be tested.
5447      * @return  {@code true} if the character is lowercase;
5448      *          {@code false} otherwise.
5449      * @see     Character#isLowerCase(char)
5450      * @see     Character#isTitleCase(char)
5451      * @see     Character#toLowerCase(char)
5452      * @see     Character#getType(char)
5453      */
isLowerCase(char ch)5454     public static boolean isLowerCase(char ch) {
5455         return isLowerCase((int)ch);
5456     }
5457 
5458     /**
5459      * Determines if the specified character (Unicode code point) is a
5460      * lowercase character.
5461      * <p>
5462      * A character is lowercase if its general category type, provided
5463      * by {@link Character#getType getType(codePoint)}, is
5464      * {@code LOWERCASE_LETTER}, or it has contributory property
5465      * Other_Lowercase as defined by the Unicode Standard.
5466      * <p>
5467      * The following are examples of lowercase characters:
5468      * <blockquote><pre>
5469      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5470      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5471      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5472      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5473      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5474      * </pre></blockquote>
5475      * <p> Many other Unicode characters are lowercase too.
5476      *
5477      * @param   codePoint the character (Unicode code point) to be tested.
5478      * @return  {@code true} if the character is lowercase;
5479      *          {@code false} otherwise.
5480      * @see     Character#isLowerCase(int)
5481      * @see     Character#isTitleCase(int)
5482      * @see     Character#toLowerCase(int)
5483      * @see     Character#getType(int)
5484      * @since   1.5
5485      */
5486     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
5487     /*
5488     public static boolean isLowerCase(int codePoint) {
5489         return getType(codePoint) == Character.LOWERCASE_LETTER ||
5490                CharacterData.of(codePoint).isOtherLowercase(codePoint);
5491     }
5492     */
isLowerCase(int codePoint)5493     public static boolean isLowerCase(int codePoint) {
5494         return isLowerCaseImpl(codePoint);
5495     }
5496 
5497     @FastNative
isLowerCaseImpl(int codePoint)5498     static native boolean isLowerCaseImpl(int codePoint);
5499     // END Android-changed: Reimplement methods natively on top of ICU4C.
5500 
5501     /**
5502      * Determines if the specified character is an uppercase character.
5503      * <p>
5504      * A character is uppercase if its general category type, provided by
5505      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
5506      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5507      * <p>
5508      * The following are examples of uppercase characters:
5509      * <blockquote><pre>
5510      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5511      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5512      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5513      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5514      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5515      * </pre></blockquote>
5516      * <p> Many other Unicode characters are uppercase too.
5517      *
5518      * <p><b>Note:</b> This method cannot handle <a
5519      * href="#supplementary"> supplementary characters</a>. To support
5520      * all Unicode characters, including supplementary characters, use
5521      * the {@link #isUpperCase(int)} method.
5522      *
5523      * @param   ch   the character to be tested.
5524      * @return  {@code true} if the character is uppercase;
5525      *          {@code false} otherwise.
5526      * @see     Character#isLowerCase(char)
5527      * @see     Character#isTitleCase(char)
5528      * @see     Character#toUpperCase(char)
5529      * @see     Character#getType(char)
5530      * @since   1.0
5531      */
isUpperCase(char ch)5532     public static boolean isUpperCase(char ch) {
5533         return isUpperCase((int)ch);
5534     }
5535 
5536     /**
5537      * Determines if the specified character (Unicode code point) is an uppercase character.
5538      * <p>
5539      * A character is uppercase if its general category type, provided by
5540      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
5541      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5542      * <p>
5543      * The following are examples of uppercase characters:
5544      * <blockquote><pre>
5545      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5546      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5547      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5548      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5549      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5550      * </pre></blockquote>
5551      * <p> Many other Unicode characters are uppercase too.<p>
5552      *
5553      * @param   codePoint the character (Unicode code point) to be tested.
5554      * @return  {@code true} if the character is uppercase;
5555      *          {@code false} otherwise.
5556      * @see     Character#isLowerCase(int)
5557      * @see     Character#isTitleCase(int)
5558      * @see     Character#toUpperCase(int)
5559      * @see     Character#getType(int)
5560      * @since   1.5
5561      */
5562     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
5563     /*
5564     public static boolean isUpperCase(int codePoint) {
5565         return getType(codePoint) == Character.UPPERCASE_LETTER ||
5566                CharacterData.of(codePoint).isOtherUppercase(codePoint);
5567     }
5568     */
isUpperCase(int codePoint)5569     public static boolean isUpperCase(int codePoint) {
5570         return isUpperCaseImpl(codePoint);
5571     }
5572 
5573     @FastNative
isUpperCaseImpl(int codePoint)5574     static native boolean isUpperCaseImpl(int codePoint);
5575     // END Android-changed: Reimplement methods natively on top of ICU4C.
5576 
5577     /**
5578      * Determines if the specified character is a titlecase character.
5579      * <p>
5580      * A character is a titlecase character if its general
5581      * category type, provided by {@code Character.getType(ch)},
5582      * is {@code TITLECASE_LETTER}.
5583      * <p>
5584      * Some characters look like pairs of Latin letters. For example, there
5585      * is an uppercase letter that looks like "LJ" and has a corresponding
5586      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5587      * is the appropriate form to use when rendering a word in lowercase
5588      * with initial capitals, as for a book title.
5589      * <p>
5590      * These are some of the Unicode characters for which this method returns
5591      * {@code true}:
5592      * <ul>
5593      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5594      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5595      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5596      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5597      * </ul>
5598      * <p> Many other Unicode characters are titlecase too.
5599      *
5600      * <p><b>Note:</b> This method cannot handle <a
5601      * href="#supplementary"> supplementary characters</a>. To support
5602      * all Unicode characters, including supplementary characters, use
5603      * the {@link #isTitleCase(int)} method.
5604      *
5605      * @param   ch   the character to be tested.
5606      * @return  {@code true} if the character is titlecase;
5607      *          {@code false} otherwise.
5608      * @see     Character#isLowerCase(char)
5609      * @see     Character#isUpperCase(char)
5610      * @see     Character#toTitleCase(char)
5611      * @see     Character#getType(char)
5612      * @since   1.0.2
5613      */
isTitleCase(char ch)5614     public static boolean isTitleCase(char ch) {
5615         return isTitleCase((int)ch);
5616     }
5617 
5618     /**
5619      * Determines if the specified character (Unicode code point) is a titlecase character.
5620      * <p>
5621      * A character is a titlecase character if its general
5622      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5623      * is {@code TITLECASE_LETTER}.
5624      * <p>
5625      * Some characters look like pairs of Latin letters. For example, there
5626      * is an uppercase letter that looks like "LJ" and has a corresponding
5627      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5628      * is the appropriate form to use when rendering a word in lowercase
5629      * with initial capitals, as for a book title.
5630      * <p>
5631      * These are some of the Unicode characters for which this method returns
5632      * {@code true}:
5633      * <ul>
5634      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5635      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5636      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5637      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5638      * </ul>
5639      * <p> Many other Unicode characters are titlecase too.<p>
5640      *
5641      * @param   codePoint the character (Unicode code point) to be tested.
5642      * @return  {@code true} if the character is titlecase;
5643      *          {@code false} otherwise.
5644      * @see     Character#isLowerCase(int)
5645      * @see     Character#isUpperCase(int)
5646      * @see     Character#toTitleCase(int)
5647      * @see     Character#getType(int)
5648      * @since   1.5
5649      */
5650     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
5651     /*
5652     public static boolean isTitleCase(int codePoint) {
5653         return getType(codePoint) == Character.TITLECASE_LETTER;
5654     }
5655     */
isTitleCase(int codePoint)5656     public static boolean isTitleCase(int codePoint) {
5657         return isTitleCaseImpl(codePoint);
5658     }
5659 
5660     @FastNative
isTitleCaseImpl(int codePoint)5661     static native boolean isTitleCaseImpl(int codePoint);
5662     // END Android-changed: Reimplement methods natively on top of ICU4C.
5663 
5664     /**
5665      * Determines if the specified character is a digit.
5666      * <p>
5667      * A character is a digit if its general category type, provided
5668      * by {@code Character.getType(ch)}, is
5669      * {@code DECIMAL_DIGIT_NUMBER}.
5670      * <p>
5671      * Some Unicode character ranges that contain digits:
5672      * <ul>
5673      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5674      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5675      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5676      *     Arabic-Indic digits
5677      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5678      *     Extended Arabic-Indic digits
5679      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5680      *     Devanagari digits
5681      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5682      *     Fullwidth digits
5683      * </ul>
5684      *
5685      * Many other character ranges contain digits as well.
5686      *
5687      * <p><b>Note:</b> This method cannot handle <a
5688      * href="#supplementary"> supplementary characters</a>. To support
5689      * all Unicode characters, including supplementary characters, use
5690      * the {@link #isDigit(int)} method.
5691      *
5692      * @param   ch   the character to be tested.
5693      * @return  {@code true} if the character is a digit;
5694      *          {@code false} otherwise.
5695      * @see     Character#digit(char, int)
5696      * @see     Character#forDigit(int, int)
5697      * @see     Character#getType(char)
5698      */
isDigit(char ch)5699     public static boolean isDigit(char ch) {
5700         return isDigit((int)ch);
5701     }
5702 
5703     /**
5704      * Determines if the specified character (Unicode code point) is a digit.
5705      * <p>
5706      * A character is a digit if its general category type, provided
5707      * by {@link Character#getType(int) getType(codePoint)}, is
5708      * {@code DECIMAL_DIGIT_NUMBER}.
5709      * <p>
5710      * Some Unicode character ranges that contain digits:
5711      * <ul>
5712      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5713      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5714      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5715      *     Arabic-Indic digits
5716      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5717      *     Extended Arabic-Indic digits
5718      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5719      *     Devanagari digits
5720      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5721      *     Fullwidth digits
5722      * </ul>
5723      *
5724      * Many other character ranges contain digits as well.
5725      *
5726      * @param   codePoint the character (Unicode code point) to be tested.
5727      * @return  {@code true} if the character is a digit;
5728      *          {@code false} otherwise.
5729      * @see     Character#forDigit(int, int)
5730      * @see     Character#getType(int)
5731      * @since   1.5
5732      */
5733     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
5734     /*
5735     public static boolean isDigit(int codePoint) {
5736         return getType(codePoint) == Character.DECIMAL_DIGIT_NUMBER;
5737     }
5738     */
isDigit(int codePoint)5739     public static boolean isDigit(int codePoint) {
5740         return isDigitImpl(codePoint);
5741     }
5742 
5743     @FastNative
isDigitImpl(int codePoint)5744     static native boolean isDigitImpl(int codePoint);
5745     // END Android-changed: Reimplement methods natively on top of ICU4C.
5746 
5747     /**
5748      * Determines if a character is defined in Unicode.
5749      * <p>
5750      * A character is defined if at least one of the following is true:
5751      * <ul>
5752      * <li>It has an entry in the UnicodeData file.
5753      * <li>It has a value in a range defined by the UnicodeData file.
5754      * </ul>
5755      *
5756      * <p><b>Note:</b> This method cannot handle <a
5757      * href="#supplementary"> supplementary characters</a>. To support
5758      * all Unicode characters, including supplementary characters, use
5759      * the {@link #isDefined(int)} method.
5760      *
5761      * @param   ch   the character to be tested
5762      * @return  {@code true} if the character has a defined meaning
5763      *          in Unicode; {@code false} otherwise.
5764      * @see     Character#isDigit(char)
5765      * @see     Character#isLetter(char)
5766      * @see     Character#isLetterOrDigit(char)
5767      * @see     Character#isLowerCase(char)
5768      * @see     Character#isTitleCase(char)
5769      * @see     Character#isUpperCase(char)
5770      * @since   1.0.2
5771      */
isDefined(char ch)5772     public static boolean isDefined(char ch) {
5773         return isDefined((int)ch);
5774     }
5775 
5776     /**
5777      * Determines if a character (Unicode code point) is defined in Unicode.
5778      * <p>
5779      * A character is defined if at least one of the following is true:
5780      * <ul>
5781      * <li>It has an entry in the UnicodeData file.
5782      * <li>It has a value in a range defined by the UnicodeData file.
5783      * </ul>
5784      *
5785      * @param   codePoint the character (Unicode code point) to be tested.
5786      * @return  {@code true} if the character has a defined meaning
5787      *          in Unicode; {@code false} otherwise.
5788      * @see     Character#isDigit(int)
5789      * @see     Character#isLetter(int)
5790      * @see     Character#isLetterOrDigit(int)
5791      * @see     Character#isLowerCase(int)
5792      * @see     Character#isTitleCase(int)
5793      * @see     Character#isUpperCase(int)
5794      * @since   1.5
5795      */
5796     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
5797     /*
5798     public static boolean isDefined(int codePoint) {
5799         return getType(codePoint) != Character.UNASSIGNED;
5800     }
5801     */
isDefined(int codePoint)5802     public static boolean isDefined(int codePoint) {
5803         return isDefinedImpl(codePoint);
5804     }
5805 
5806     @FastNative
isDefinedImpl(int codePoint)5807     static native boolean isDefinedImpl(int codePoint);
5808     // END Android-changed: Reimplement methods natively on top of ICU4C.
5809 
5810     /**
5811      * Determines if the specified character is a letter.
5812      * <p>
5813      * A character is considered to be a letter if its general
5814      * category type, provided by {@code Character.getType(ch)},
5815      * is any of the following:
5816      * <ul>
5817      * <li> {@code UPPERCASE_LETTER}
5818      * <li> {@code LOWERCASE_LETTER}
5819      * <li> {@code TITLECASE_LETTER}
5820      * <li> {@code MODIFIER_LETTER}
5821      * <li> {@code OTHER_LETTER}
5822      * </ul>
5823      *
5824      * Not all letters have case. Many characters are
5825      * letters but are neither uppercase nor lowercase nor titlecase.
5826      *
5827      * <p><b>Note:</b> This method cannot handle <a
5828      * href="#supplementary"> supplementary characters</a>. To support
5829      * all Unicode characters, including supplementary characters, use
5830      * the {@link #isLetter(int)} method.
5831      *
5832      * @param   ch   the character to be tested.
5833      * @return  {@code true} if the character is a letter;
5834      *          {@code false} otherwise.
5835      * @see     Character#isDigit(char)
5836      * @see     Character#isJavaIdentifierStart(char)
5837      * @see     Character#isJavaLetter(char)
5838      * @see     Character#isJavaLetterOrDigit(char)
5839      * @see     Character#isLetterOrDigit(char)
5840      * @see     Character#isLowerCase(char)
5841      * @see     Character#isTitleCase(char)
5842      * @see     Character#isUnicodeIdentifierStart(char)
5843      * @see     Character#isUpperCase(char)
5844      */
isLetter(char ch)5845     public static boolean isLetter(char ch) {
5846         return isLetter((int)ch);
5847     }
5848 
5849     /**
5850      * Determines if the specified character (Unicode code point) is a letter.
5851      * <p>
5852      * A character is considered to be a letter if its general
5853      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5854      * is any of the following:
5855      * <ul>
5856      * <li> {@code UPPERCASE_LETTER}
5857      * <li> {@code LOWERCASE_LETTER}
5858      * <li> {@code TITLECASE_LETTER}
5859      * <li> {@code MODIFIER_LETTER}
5860      * <li> {@code OTHER_LETTER}
5861      * </ul>
5862      *
5863      * Not all letters have case. Many characters are
5864      * letters but are neither uppercase nor lowercase nor titlecase.
5865      *
5866      * @param   codePoint the character (Unicode code point) to be tested.
5867      * @return  {@code true} if the character is a letter;
5868      *          {@code false} otherwise.
5869      * @see     Character#isDigit(int)
5870      * @see     Character#isJavaIdentifierStart(int)
5871      * @see     Character#isLetterOrDigit(int)
5872      * @see     Character#isLowerCase(int)
5873      * @see     Character#isTitleCase(int)
5874      * @see     Character#isUnicodeIdentifierStart(int)
5875      * @see     Character#isUpperCase(int)
5876      * @since   1.5
5877      */
5878     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
5879     /*
5880     public static boolean isLetter(int codePoint) {
5881         return ((((1 << Character.UPPERCASE_LETTER) |
5882             (1 << Character.LOWERCASE_LETTER) |
5883             (1 << Character.TITLECASE_LETTER) |
5884             (1 << Character.MODIFIER_LETTER) |
5885             (1 << Character.OTHER_LETTER)) >> getType(codePoint)) & 1)
5886             != 0;
5887     }
5888     */
isLetter(int codePoint)5889     public static boolean isLetter(int codePoint) {
5890         return isLetterImpl(codePoint);
5891     }
5892 
5893     @FastNative
isLetterImpl(int codePoint)5894     static native boolean isLetterImpl(int codePoint);
5895     // END Android-changed: Reimplement methods natively on top of ICU4C.
5896 
5897     /**
5898      * Determines if the specified character is a letter or digit.
5899      * <p>
5900      * A character is considered to be a letter or digit if either
5901      * {@code Character.isLetter(char ch)} or
5902      * {@code Character.isDigit(char ch)} returns
5903      * {@code true} for the character.
5904      *
5905      * <p><b>Note:</b> This method cannot handle <a
5906      * href="#supplementary"> supplementary characters</a>. To support
5907      * all Unicode characters, including supplementary characters, use
5908      * the {@link #isLetterOrDigit(int)} method.
5909      *
5910      * @param   ch   the character to be tested.
5911      * @return  {@code true} if the character is a letter or digit;
5912      *          {@code false} otherwise.
5913      * @see     Character#isDigit(char)
5914      * @see     Character#isJavaIdentifierPart(char)
5915      * @see     Character#isJavaLetter(char)
5916      * @see     Character#isJavaLetterOrDigit(char)
5917      * @see     Character#isLetter(char)
5918      * @see     Character#isUnicodeIdentifierPart(char)
5919      * @since   1.0.2
5920      */
isLetterOrDigit(char ch)5921     public static boolean isLetterOrDigit(char ch) {
5922         return isLetterOrDigit((int)ch);
5923     }
5924 
5925     /**
5926      * Determines if the specified character (Unicode code point) is a letter or digit.
5927      * <p>
5928      * A character is considered to be a letter or digit if either
5929      * {@link #isLetter(int) isLetter(codePoint)} or
5930      * {@link #isDigit(int) isDigit(codePoint)} returns
5931      * {@code true} for the character.
5932      *
5933      * @param   codePoint the character (Unicode code point) to be tested.
5934      * @return  {@code true} if the character is a letter or digit;
5935      *          {@code false} otherwise.
5936      * @see     Character#isDigit(int)
5937      * @see     Character#isJavaIdentifierPart(int)
5938      * @see     Character#isLetter(int)
5939      * @see     Character#isUnicodeIdentifierPart(int)
5940      * @since   1.5
5941      */
5942     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
5943     /*
5944     public static boolean isLetterOrDigit(int codePoint) {
5945         return ((((1 << Character.UPPERCASE_LETTER) |
5946             (1 << Character.LOWERCASE_LETTER) |
5947             (1 << Character.TITLECASE_LETTER) |
5948             (1 << Character.MODIFIER_LETTER) |
5949             (1 << Character.OTHER_LETTER) |
5950             (1 << Character.DECIMAL_DIGIT_NUMBER)) >> getType(codePoint)) & 1)
5951             != 0;
5952     }
5953     */
isLetterOrDigit(int codePoint)5954     public static boolean isLetterOrDigit(int codePoint) {
5955         return isLetterOrDigitImpl(codePoint);
5956     }
5957 
5958     @FastNative
isLetterOrDigitImpl(int codePoint)5959     static native boolean isLetterOrDigitImpl(int codePoint);
5960     // END Android-changed: Reimplement methods natively on top of ICU4C.
5961 
5962     /**
5963      * Determines if the specified character is permissible as the first
5964      * character in a Java identifier.
5965      * <p>
5966      * A character may start a Java identifier if and only if
5967      * one of the following is true:
5968      * <ul>
5969      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5970      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5971      * <li> {@code ch} is a currency symbol (such as {@code '$'})
5972      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5973      * </ul>
5974      *
5975      * @param   ch the character to be tested.
5976      * @return  {@code true} if the character may start a Java
5977      *          identifier; {@code false} otherwise.
5978      * @see     Character#isJavaLetterOrDigit(char)
5979      * @see     Character#isJavaIdentifierStart(char)
5980      * @see     Character#isJavaIdentifierPart(char)
5981      * @see     Character#isLetter(char)
5982      * @see     Character#isLetterOrDigit(char)
5983      * @see     Character#isUnicodeIdentifierStart(char)
5984      * @since   1.02
5985      * @deprecated Replaced by isJavaIdentifierStart(char).
5986      */
5987     @Deprecated
isJavaLetter(char ch)5988     public static boolean isJavaLetter(char ch) {
5989         return isJavaIdentifierStart(ch);
5990     }
5991 
5992     /**
5993      * Determines if the specified character may be part of a Java
5994      * identifier as other than the first character.
5995      * <p>
5996      * A character may be part of a Java identifier if and only if any
5997      * of the following are true:
5998      * <ul>
5999      * <li>  it is a letter
6000      * <li>  it is a currency symbol (such as {@code '$'})
6001      * <li>  it is a connecting punctuation character (such as {@code '_'})
6002      * <li>  it is a digit
6003      * <li>  it is a numeric letter (such as a Roman numeral character)
6004      * <li>  it is a combining mark
6005      * <li>  it is a non-spacing mark
6006      * <li> {@code isIdentifierIgnorable} returns
6007      * {@code true} for the character.
6008      * </ul>
6009      *
6010      * @param   ch the character to be tested.
6011      * @return  {@code true} if the character may be part of a
6012      *          Java identifier; {@code false} otherwise.
6013      * @see     Character#isJavaLetter(char)
6014      * @see     Character#isJavaIdentifierStart(char)
6015      * @see     Character#isJavaIdentifierPart(char)
6016      * @see     Character#isLetter(char)
6017      * @see     Character#isLetterOrDigit(char)
6018      * @see     Character#isUnicodeIdentifierPart(char)
6019      * @see     Character#isIdentifierIgnorable(char)
6020      * @since   1.02
6021      * @deprecated Replaced by isJavaIdentifierPart(char).
6022      */
6023     @Deprecated
isJavaLetterOrDigit(char ch)6024     public static boolean isJavaLetterOrDigit(char ch) {
6025         return isJavaIdentifierPart(ch);
6026     }
6027 
6028     /**
6029      * Determines if the specified character (Unicode code point) is an alphabet.
6030      * <p>
6031      * A character is considered to be alphabetic if its general category type,
6032      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
6033      * the following:
6034      * <ul>
6035      * <li> <code>UPPERCASE_LETTER</code>
6036      * <li> <code>LOWERCASE_LETTER</code>
6037      * <li> <code>TITLECASE_LETTER</code>
6038      * <li> <code>MODIFIER_LETTER</code>
6039      * <li> <code>OTHER_LETTER</code>
6040      * <li> <code>LETTER_NUMBER</code>
6041      * </ul>
6042      * or it has contributory property Other_Alphabetic as defined by the
6043      * Unicode Standard.
6044      *
6045      * @param   codePoint the character (Unicode code point) to be tested.
6046      * @return  <code>true</code> if the character is a Unicode alphabet
6047      *          character, <code>false</code> otherwise.
6048      * @since   1.7
6049      */
6050     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
6051     /*
6052     public static boolean isAlphabetic(int codePoint) {
6053         return (((((1 << Character.UPPERCASE_LETTER) |
6054             (1 << Character.LOWERCASE_LETTER) |
6055             (1 << Character.TITLECASE_LETTER) |
6056             (1 << Character.MODIFIER_LETTER) |
6057             (1 << Character.OTHER_LETTER) |
6058             (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
6059             CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
6060     }
6061     */
isAlphabetic(int codePoint)6062     public static boolean isAlphabetic(int codePoint) {
6063         return isAlphabeticImpl(codePoint);
6064     }
6065 
6066     @FastNative
isAlphabeticImpl(int codePoint)6067     static native boolean isAlphabeticImpl(int codePoint);
6068     // END Android-changed: Reimplement methods natively on top of ICU4C.
6069 
6070     /**
6071      * Determines if the specified character (Unicode code point) is a CJKV
6072      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
6073      * the Unicode Standard.
6074      *
6075      * @param   codePoint the character (Unicode code point) to be tested.
6076      * @return  <code>true</code> if the character is a Unicode ideograph
6077      *          character, <code>false</code> otherwise.
6078      * @since   1.7
6079      */
6080     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
6081     /*
6082     public static boolean isIdeographic(int codePoint) {
6083         return CharacterData.of(codePoint).isIdeographic(codePoint);
6084     }
6085     */
isIdeographic(int codePoint)6086     public static boolean isIdeographic(int codePoint) {
6087         return isIdeographicImpl(codePoint);
6088     }
6089     @FastNative
isIdeographicImpl(int codePoint)6090     static native boolean isIdeographicImpl(int codePoint);
6091     // END Android-changed: Reimplement methods natively on top of ICU4C.
6092 
6093     // Android-changed: Removed @see tag (target does not exist on Android):
6094     // @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6095     /**
6096      * Determines if the specified character is
6097      * permissible as the first character in a Java identifier.
6098      * <p>
6099      * A character may start a Java identifier if and only if
6100      * one of the following conditions is true:
6101      * <ul>
6102      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
6103      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
6104      * <li> {@code ch} is a currency symbol (such as {@code '$'})
6105      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
6106      * </ul>
6107      *
6108      * <p><b>Note:</b> This method cannot handle <a
6109      * href="#supplementary"> supplementary characters</a>. To support
6110      * all Unicode characters, including supplementary characters, use
6111      * the {@link #isJavaIdentifierStart(int)} method.
6112      *
6113      * @param   ch the character to be tested.
6114      * @return  {@code true} if the character may start a Java identifier;
6115      *          {@code false} otherwise.
6116      * @see     Character#isJavaIdentifierPart(char)
6117      * @see     Character#isLetter(char)
6118      * @see     Character#isUnicodeIdentifierStart(char)
6119      * @since   1.1
6120      */
isJavaIdentifierStart(char ch)6121     public static boolean isJavaIdentifierStart(char ch) {
6122         return isJavaIdentifierStart((int)ch);
6123     }
6124 
6125     // Android-changed: Removed @see tag (target does not exist on Android):
6126     // @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6127     /**
6128      * Determines if the character (Unicode code point) is
6129      * permissible as the first character in a Java identifier.
6130      * <p>
6131      * A character may start a Java identifier if and only if
6132      * one of the following conditions is true:
6133      * <ul>
6134      * <li> {@link #isLetter(int) isLetter(codePoint)}
6135      *      returns {@code true}
6136      * <li> {@link #getType(int) getType(codePoint)}
6137      *      returns {@code LETTER_NUMBER}
6138      * <li> the referenced character is a currency symbol (such as {@code '$'})
6139      * <li> the referenced character is a connecting punctuation character
6140      *      (such as {@code '_'}).
6141      * </ul>
6142      *
6143      * @param   codePoint the character (Unicode code point) to be tested.
6144      * @return  {@code true} if the character may start a Java identifier;
6145      *          {@code false} otherwise.
6146      * @see     Character#isJavaIdentifierPart(int)
6147      * @see     Character#isLetter(int)
6148      * @see     Character#isUnicodeIdentifierStart(int)
6149      * @since   1.5
6150      */
6151     // BEGIN Android-changed: Use ICU.
6152     /*
6153     public static boolean isJavaIdentifierStart(int codePoint) {
6154         return CharacterData.of(codePoint).isJavaIdentifierStart(codePoint);
6155     }
6156     */
isJavaIdentifierStart(int codePoint)6157     public static boolean isJavaIdentifierStart(int codePoint) {
6158         // Use precomputed bitmasks to optimize the ASCII range.
6159         if (codePoint < 64) {
6160             return (codePoint == '$'); // There's only one character in this range.
6161         } else if (codePoint < 128) {
6162             return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
6163         }
6164         return ((1 << getType(codePoint))
6165                 & ((1 << UPPERCASE_LETTER)
6166                    | (1 << LOWERCASE_LETTER)
6167                    | (1  << TITLECASE_LETTER)
6168                    | (1  << MODIFIER_LETTER)
6169                    | (1  << OTHER_LETTER)
6170                    | (1  << CURRENCY_SYMBOL)
6171                    | (1  << CONNECTOR_PUNCTUATION)
6172                    | (1  << LETTER_NUMBER))) != 0;
6173     }
6174     // END Android-changed: Use ICU.
6175 
6176     // Android-changed: Removed @see tag (target does not exist on Android):
6177     // @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6178     /**
6179      * Determines if the specified character may be part of a Java
6180      * identifier as other than the first character.
6181      * <p>
6182      * A character may be part of a Java identifier if any of the following
6183      * are true:
6184      * <ul>
6185      * <li>  it is a letter
6186      * <li>  it is a currency symbol (such as {@code '$'})
6187      * <li>  it is a connecting punctuation character (such as {@code '_'})
6188      * <li>  it is a digit
6189      * <li>  it is a numeric letter (such as a Roman numeral character)
6190      * <li>  it is a combining mark
6191      * <li>  it is a non-spacing mark
6192      * <li> {@code isIdentifierIgnorable} returns
6193      * {@code true} for the character
6194      * </ul>
6195      *
6196      * <p><b>Note:</b> This method cannot handle <a
6197      * href="#supplementary"> supplementary characters</a>. To support
6198      * all Unicode characters, including supplementary characters, use
6199      * the {@link #isJavaIdentifierPart(int)} method.
6200      *
6201      * @param   ch      the character to be tested.
6202      * @return {@code true} if the character may be part of a
6203      *          Java identifier; {@code false} otherwise.
6204      * @see     Character#isIdentifierIgnorable(char)
6205      * @see     Character#isJavaIdentifierStart(char)
6206      * @see     Character#isLetterOrDigit(char)
6207      * @see     Character#isUnicodeIdentifierPart(char)
6208      * @since   1.1
6209      */
isJavaIdentifierPart(char ch)6210     public static boolean isJavaIdentifierPart(char ch) {
6211         return isJavaIdentifierPart((int)ch);
6212     }
6213 
6214     // Android-changed: Removed @see tag (target does not exist on Android):
6215     // @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
6216     /**
6217      * Determines if the character (Unicode code point) may be part of a Java
6218      * identifier as other than the first character.
6219      * <p>
6220      * A character may be part of a Java identifier if any of the following
6221      * are true:
6222      * <ul>
6223      * <li>  it is a letter
6224      * <li>  it is a currency symbol (such as {@code '$'})
6225      * <li>  it is a connecting punctuation character (such as {@code '_'})
6226      * <li>  it is a digit
6227      * <li>  it is a numeric letter (such as a Roman numeral character)
6228      * <li>  it is a combining mark
6229      * <li>  it is a non-spacing mark
6230      * <li> {@link #isIdentifierIgnorable(int)
6231      * isIdentifierIgnorable(codePoint)} returns {@code true} for
6232      * the character
6233      * </ul>
6234      *
6235      * @param   codePoint the character (Unicode code point) to be tested.
6236      * @return {@code true} if the character may be part of a
6237      *          Java identifier; {@code false} otherwise.
6238      * @see     Character#isIdentifierIgnorable(int)
6239      * @see     Character#isJavaIdentifierStart(int)
6240      * @see     Character#isLetterOrDigit(int)
6241      * @see     Character#isUnicodeIdentifierPart(int)
6242      * @since   1.5
6243      */
6244     // BEGIN Android-changed: Use ICU.
6245     /*
6246     public static boolean isJavaIdentifierPart(int codePoint) {
6247         return CharacterData.of(codePoint).isJavaIdentifierPart(codePoint);
6248     }
6249     */
isJavaIdentifierPart(int codePoint)6250     public static boolean isJavaIdentifierPart(int codePoint) {
6251         // Use precomputed bitmasks to optimize the ASCII range.
6252         if (codePoint < 64) {
6253             return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
6254         } else if (codePoint < 128) {
6255             return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
6256         }
6257         return ((1 << getType(codePoint))
6258                 & ((1 << UPPERCASE_LETTER)
6259                    | (1 << LOWERCASE_LETTER)
6260                    | (1 << TITLECASE_LETTER)
6261                    | (1 << MODIFIER_LETTER)
6262                    | (1 << OTHER_LETTER)
6263                    | (1 << CURRENCY_SYMBOL)
6264                    | (1 << CONNECTOR_PUNCTUATION)
6265                    | (1 << DECIMAL_DIGIT_NUMBER)
6266                    | (1 << LETTER_NUMBER)
6267                    | (1 << FORMAT)
6268                    | (1 << COMBINING_SPACING_MARK)
6269                    | (1 << NON_SPACING_MARK))) != 0
6270                 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
6271                 || (codePoint >= 0x7f && codePoint <= 0x9f);
6272     }
6273     // END Android-changed: Use ICU.
6274 
6275     /**
6276      * Determines if the specified character is permissible as the
6277      * first character in a Unicode identifier.
6278      * <p>
6279      * A character may start a Unicode identifier if and only if
6280      * one of the following conditions is true:
6281      * <ul>
6282      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
6283      * <li> {@link #getType(char) getType(ch)} returns
6284      *      {@code LETTER_NUMBER}.
6285      * </ul>
6286      *
6287      * <p><b>Note:</b> This method cannot handle <a
6288      * href="#supplementary"> supplementary characters</a>. To support
6289      * all Unicode characters, including supplementary characters, use
6290      * the {@link #isUnicodeIdentifierStart(int)} method.
6291      *
6292      * @param   ch      the character to be tested.
6293      * @return  {@code true} if the character may start a Unicode
6294      *          identifier; {@code false} otherwise.
6295      * @see     Character#isJavaIdentifierStart(char)
6296      * @see     Character#isLetter(char)
6297      * @see     Character#isUnicodeIdentifierPart(char)
6298      * @since   1.1
6299      */
isUnicodeIdentifierStart(char ch)6300     public static boolean isUnicodeIdentifierStart(char ch) {
6301         return isUnicodeIdentifierStart((int)ch);
6302     }
6303 
6304     /**
6305      * Determines if the specified character (Unicode code point) is permissible as the
6306      * first character in a Unicode identifier.
6307      * <p>
6308      * A character may start a Unicode identifier if and only if
6309      * one of the following conditions is true:
6310      * <ul>
6311      * <li> {@link #isLetter(int) isLetter(codePoint)}
6312      *      returns {@code true}
6313      * <li> {@link #getType(int) getType(codePoint)}
6314      *      returns {@code LETTER_NUMBER}.
6315      * </ul>
6316      * @param   codePoint the character (Unicode code point) to be tested.
6317      * @return  {@code true} if the character may start a Unicode
6318      *          identifier; {@code false} otherwise.
6319      * @see     Character#isJavaIdentifierStart(int)
6320      * @see     Character#isLetter(int)
6321      * @see     Character#isUnicodeIdentifierPart(int)
6322      * @since   1.5
6323      */
6324     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
6325     /*
6326     public static boolean isUnicodeIdentifierStart(int codePoint) {
6327         return CharacterData.of(codePoint).isUnicodeIdentifierStart(codePoint);
6328     }
6329     */
isUnicodeIdentifierStart(int codePoint)6330     public static boolean isUnicodeIdentifierStart(int codePoint) {
6331         return isUnicodeIdentifierStartImpl(codePoint);
6332     }
6333 
6334     @FastNative
isUnicodeIdentifierStartImpl(int codePoint)6335     static native boolean isUnicodeIdentifierStartImpl(int codePoint);
6336     // END Android-changed: Reimplement methods natively on top of ICU4C.
6337 
6338     /**
6339      * Determines if the specified character may be part of a Unicode
6340      * identifier as other than the first character.
6341      * <p>
6342      * A character may be part of a Unicode identifier if and only if
6343      * one of the following statements is true:
6344      * <ul>
6345      * <li>  it is a letter
6346      * <li>  it is a connecting punctuation character (such as {@code '_'})
6347      * <li>  it is a digit
6348      * <li>  it is a numeric letter (such as a Roman numeral character)
6349      * <li>  it is a combining mark
6350      * <li>  it is a non-spacing mark
6351      * <li> {@code isIdentifierIgnorable} returns
6352      * {@code true} for this character.
6353      * </ul>
6354      *
6355      * <p><b>Note:</b> This method cannot handle <a
6356      * href="#supplementary"> supplementary characters</a>. To support
6357      * all Unicode characters, including supplementary characters, use
6358      * the {@link #isUnicodeIdentifierPart(int)} method.
6359      *
6360      * @param   ch      the character to be tested.
6361      * @return  {@code true} if the character may be part of a
6362      *          Unicode identifier; {@code false} otherwise.
6363      * @see     Character#isIdentifierIgnorable(char)
6364      * @see     Character#isJavaIdentifierPart(char)
6365      * @see     Character#isLetterOrDigit(char)
6366      * @see     Character#isUnicodeIdentifierStart(char)
6367      * @since   1.1
6368      */
isUnicodeIdentifierPart(char ch)6369     public static boolean isUnicodeIdentifierPart(char ch) {
6370         return isUnicodeIdentifierPart((int)ch);
6371     }
6372 
6373     /**
6374      * Determines if the specified character (Unicode code point) may be part of a Unicode
6375      * identifier as other than the first character.
6376      * <p>
6377      * A character may be part of a Unicode identifier if and only if
6378      * one of the following statements is true:
6379      * <ul>
6380      * <li>  it is a letter
6381      * <li>  it is a connecting punctuation character (such as {@code '_'})
6382      * <li>  it is a digit
6383      * <li>  it is a numeric letter (such as a Roman numeral character)
6384      * <li>  it is a combining mark
6385      * <li>  it is a non-spacing mark
6386      * <li> {@code isIdentifierIgnorable} returns
6387      * {@code true} for this character.
6388      * </ul>
6389      * @param   codePoint the character (Unicode code point) to be tested.
6390      * @return  {@code true} if the character may be part of a
6391      *          Unicode identifier; {@code false} otherwise.
6392      * @see     Character#isIdentifierIgnorable(int)
6393      * @see     Character#isJavaIdentifierPart(int)
6394      * @see     Character#isLetterOrDigit(int)
6395      * @see     Character#isUnicodeIdentifierStart(int)
6396      * @since   1.5
6397      */
6398     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
6399     /*
6400     public static boolean isUnicodeIdentifierPart(int codePoint) {
6401         return CharacterData.of(codePoint).isUnicodeIdentifierPart(codePoint);
6402     }
6403     */
isUnicodeIdentifierPart(int codePoint)6404     public static boolean isUnicodeIdentifierPart(int codePoint) {
6405         return isUnicodeIdentifierPartImpl(codePoint);
6406     }
6407 
6408     @FastNative
isUnicodeIdentifierPartImpl(int codePoint)6409     static native boolean isUnicodeIdentifierPartImpl(int codePoint);
6410     // END Android-changed: Reimplement methods natively on top of ICU4C.
6411 
6412     /**
6413      * Determines if the specified character should be regarded as
6414      * an ignorable character in a Java identifier or a Unicode identifier.
6415      * <p>
6416      * The following Unicode characters are ignorable in a Java identifier
6417      * or a Unicode identifier:
6418      * <ul>
6419      * <li>ISO control characters that are not whitespace
6420      * <ul>
6421      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6422      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6423      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6424      * </ul>
6425      *
6426      * <li>all characters that have the {@code FORMAT} general
6427      * category value
6428      * </ul>
6429      *
6430      * <p><b>Note:</b> This method cannot handle <a
6431      * href="#supplementary"> supplementary characters</a>. To support
6432      * all Unicode characters, including supplementary characters, use
6433      * the {@link #isIdentifierIgnorable(int)} method.
6434      *
6435      * @param   ch      the character to be tested.
6436      * @return  {@code true} if the character is an ignorable control
6437      *          character that may be part of a Java or Unicode identifier;
6438      *           {@code false} otherwise.
6439      * @see     Character#isJavaIdentifierPart(char)
6440      * @see     Character#isUnicodeIdentifierPart(char)
6441      * @since   1.1
6442      */
isIdentifierIgnorable(char ch)6443     public static boolean isIdentifierIgnorable(char ch) {
6444         return isIdentifierIgnorable((int)ch);
6445     }
6446 
6447     /**
6448      * Determines if the specified character (Unicode code point) should be regarded as
6449      * an ignorable character in a Java identifier or a Unicode identifier.
6450      * <p>
6451      * The following Unicode characters are ignorable in a Java identifier
6452      * or a Unicode identifier:
6453      * <ul>
6454      * <li>ISO control characters that are not whitespace
6455      * <ul>
6456      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6457      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6458      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6459      * </ul>
6460      *
6461      * <li>all characters that have the {@code FORMAT} general
6462      * category value
6463      * </ul>
6464      *
6465      * @param   codePoint the character (Unicode code point) to be tested.
6466      * @return  {@code true} if the character is an ignorable control
6467      *          character that may be part of a Java or Unicode identifier;
6468      *          {@code false} otherwise.
6469      * @see     Character#isJavaIdentifierPart(int)
6470      * @see     Character#isUnicodeIdentifierPart(int)
6471      * @since   1.5
6472      */
6473     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
6474     /*
6475     public static boolean isIdentifierIgnorable(int codePoint) {
6476         return CharacterData.of(codePoint).isIdentifierIgnorable(codePoint);
6477     }
6478     */
isIdentifierIgnorable(int codePoint)6479     public static boolean isIdentifierIgnorable(int codePoint) {
6480         return isIdentifierIgnorableImpl(codePoint);
6481     }
6482 
6483     @FastNative
isIdentifierIgnorableImpl(int codePoint)6484     static native boolean isIdentifierIgnorableImpl(int codePoint);
6485     // END Android-changed: Reimplement methods natively on top of ICU4C.
6486 
6487     /**
6488      * Converts the character argument to lowercase using case
6489      * mapping information from the UnicodeData file.
6490      * <p>
6491      * Note that
6492      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
6493      * does not always return {@code true} for some ranges of
6494      * characters, particularly those that are symbols or ideographs.
6495      *
6496      * <p>In general, {@link String#toLowerCase()} should be used to map
6497      * characters to lowercase. {@code String} case mapping methods
6498      * have several benefits over {@code Character} case mapping methods.
6499      * {@code String} case mapping methods can perform locale-sensitive
6500      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6501      * the {@code Character} case mapping methods cannot.
6502      *
6503      * <p><b>Note:</b> This method cannot handle <a
6504      * href="#supplementary"> supplementary characters</a>. To support
6505      * all Unicode characters, including supplementary characters, use
6506      * the {@link #toLowerCase(int)} method.
6507      *
6508      * @param   ch   the character to be converted.
6509      * @return  the lowercase equivalent of the character, if any;
6510      *          otherwise, the character itself.
6511      * @see     Character#isLowerCase(char)
6512      * @see     String#toLowerCase()
6513      */
toLowerCase(char ch)6514     public static char toLowerCase(char ch) {
6515         return (char)toLowerCase((int)ch);
6516     }
6517 
6518     /**
6519      * Converts the character (Unicode code point) argument to
6520      * lowercase using case mapping information from the UnicodeData
6521      * file.
6522      *
6523      * <p> Note that
6524      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
6525      * does not always return {@code true} for some ranges of
6526      * characters, particularly those that are symbols or ideographs.
6527      *
6528      * <p>In general, {@link String#toLowerCase()} should be used to map
6529      * characters to lowercase. {@code String} case mapping methods
6530      * have several benefits over {@code Character} case mapping methods.
6531      * {@code String} case mapping methods can perform locale-sensitive
6532      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6533      * the {@code Character} case mapping methods cannot.
6534      *
6535      * @param   codePoint   the character (Unicode code point) to be converted.
6536      * @return  the lowercase equivalent of the character (Unicode code
6537      *          point), if any; otherwise, the character itself.
6538      * @see     Character#isLowerCase(int)
6539      * @see     String#toLowerCase()
6540      *
6541      * @since   1.5
6542      */
6543     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
6544     /*
6545     public static int toLowerCase(int codePoint) {
6546         return CharacterData.of(codePoint).toLowerCase(codePoint);
6547     }
6548     */
toLowerCase(int codePoint)6549     public static int toLowerCase(int codePoint) {
6550         if (codePoint >= 'A' && codePoint <= 'Z') {
6551             return codePoint + ('a' - 'A');
6552         }
6553 
6554         // All ASCII codepoints except the ones above remain unchanged.
6555         if (codePoint < 0x80) {
6556             return codePoint;
6557         }
6558 
6559         return toLowerCaseImpl(codePoint);
6560     }
6561 
6562     @FastNative
toLowerCaseImpl(int codePoint)6563     static native int toLowerCaseImpl(int codePoint);
6564     // END Android-changed: Reimplement methods natively on top of ICU4C.
6565 
6566     /**
6567      * Converts the character argument to uppercase using case mapping
6568      * information from the UnicodeData file.
6569      * <p>
6570      * Note that
6571      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
6572      * does not always return {@code true} for some ranges of
6573      * characters, particularly those that are symbols or ideographs.
6574      *
6575      * <p>In general, {@link String#toUpperCase()} should be used to map
6576      * characters to uppercase. {@code String} case mapping methods
6577      * have several benefits over {@code Character} case mapping methods.
6578      * {@code String} case mapping methods can perform locale-sensitive
6579      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6580      * the {@code Character} case mapping methods cannot.
6581      *
6582      * <p><b>Note:</b> This method cannot handle <a
6583      * href="#supplementary"> supplementary characters</a>. To support
6584      * all Unicode characters, including supplementary characters, use
6585      * the {@link #toUpperCase(int)} method.
6586      *
6587      * @param   ch   the character to be converted.
6588      * @return  the uppercase equivalent of the character, if any;
6589      *          otherwise, the character itself.
6590      * @see     Character#isUpperCase(char)
6591      * @see     String#toUpperCase()
6592      */
toUpperCase(char ch)6593     public static char toUpperCase(char ch) {
6594         return (char)toUpperCase((int)ch);
6595     }
6596 
6597     /**
6598      * Converts the character (Unicode code point) argument to
6599      * uppercase using case mapping information from the UnicodeData
6600      * file.
6601      *
6602      * <p>Note that
6603      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
6604      * does not always return {@code true} for some ranges of
6605      * characters, particularly those that are symbols or ideographs.
6606      *
6607      * <p>In general, {@link String#toUpperCase()} should be used to map
6608      * characters to uppercase. {@code String} case mapping methods
6609      * have several benefits over {@code Character} case mapping methods.
6610      * {@code String} case mapping methods can perform locale-sensitive
6611      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6612      * the {@code Character} case mapping methods cannot.
6613      *
6614      * @param   codePoint   the character (Unicode code point) to be converted.
6615      * @return  the uppercase equivalent of the character, if any;
6616      *          otherwise, the character itself.
6617      * @see     Character#isUpperCase(int)
6618      * @see     String#toUpperCase()
6619      *
6620      * @since   1.5
6621      */
6622     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
6623     /*
6624     public static int toUpperCase(int codePoint) {
6625         return CharacterData.of(codePoint).toUpperCase(codePoint);
6626     }
6627     */
toUpperCase(int codePoint)6628     public static int toUpperCase(int codePoint) {
6629         if (codePoint >= 'a' && codePoint <= 'z') {
6630             return codePoint - ('a' - 'A');
6631         }
6632 
6633         // All ASCII codepoints except the ones above remain unchanged.
6634         if (codePoint < 0x80) {
6635             return codePoint;
6636         }
6637 
6638         return toUpperCaseImpl(codePoint);
6639     }
6640 
6641     @FastNative
toUpperCaseImpl(int codePoint)6642     static native int toUpperCaseImpl(int codePoint);
6643     // END Android-changed: Reimplement methods natively on top of ICU4C.
6644 
6645     /**
6646      * Converts the character argument to titlecase using case mapping
6647      * information from the UnicodeData file. If a character has no
6648      * explicit titlecase mapping and is not itself a titlecase char
6649      * according to UnicodeData, then the uppercase mapping is
6650      * returned as an equivalent titlecase mapping. If the
6651      * {@code char} argument is already a titlecase
6652      * {@code char}, the same {@code char} value will be
6653      * returned.
6654      * <p>
6655      * Note that
6656      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
6657      * does not always return {@code true} for some ranges of
6658      * characters.
6659      *
6660      * <p><b>Note:</b> This method cannot handle <a
6661      * href="#supplementary"> supplementary characters</a>. To support
6662      * all Unicode characters, including supplementary characters, use
6663      * the {@link #toTitleCase(int)} method.
6664      *
6665      * @param   ch   the character to be converted.
6666      * @return  the titlecase equivalent of the character, if any;
6667      *          otherwise, the character itself.
6668      * @see     Character#isTitleCase(char)
6669      * @see     Character#toLowerCase(char)
6670      * @see     Character#toUpperCase(char)
6671      * @since   1.0.2
6672      */
toTitleCase(char ch)6673     public static char toTitleCase(char ch) {
6674         return (char)toTitleCase((int)ch);
6675     }
6676 
6677     /**
6678      * Converts the character (Unicode code point) argument to titlecase using case mapping
6679      * information from the UnicodeData file. If a character has no
6680      * explicit titlecase mapping and is not itself a titlecase char
6681      * according to UnicodeData, then the uppercase mapping is
6682      * returned as an equivalent titlecase mapping. If the
6683      * character argument is already a titlecase
6684      * character, the same character value will be
6685      * returned.
6686      *
6687      * <p>Note that
6688      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
6689      * does not always return {@code true} for some ranges of
6690      * characters.
6691      *
6692      * @param   codePoint   the character (Unicode code point) to be converted.
6693      * @return  the titlecase equivalent of the character, if any;
6694      *          otherwise, the character itself.
6695      * @see     Character#isTitleCase(int)
6696      * @see     Character#toLowerCase(int)
6697      * @see     Character#toUpperCase(int)
6698      * @since   1.5
6699      */
6700     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
6701     /*
6702     public static int toTitleCase(int codePoint) {
6703         return CharacterData.of(codePoint).toTitleCase(codePoint);
6704     }
6705     */
toTitleCase(int codePoint)6706     public static int toTitleCase(int codePoint) {
6707         return toTitleCaseImpl(codePoint);
6708     }
6709 
6710     @FastNative
toTitleCaseImpl(int codePoint)6711     static native int toTitleCaseImpl(int codePoint);
6712     // END Android-changed: Reimplement methods natively on top of ICU4C.
6713 
6714     /**
6715      * Returns the numeric value of the character {@code ch} in the
6716      * specified radix.
6717      * <p>
6718      * If the radix is not in the range {@code MIN_RADIX} &le;
6719      * {@code radix} &le; {@code MAX_RADIX} or if the
6720      * value of {@code ch} is not a valid digit in the specified
6721      * radix, {@code -1} is returned. A character is a valid digit
6722      * if at least one of the following is true:
6723      * <ul>
6724      * <li>The method {@code isDigit} is {@code true} of the character
6725      *     and the Unicode decimal digit value of the character (or its
6726      *     single-character decomposition) is less than the specified radix.
6727      *     In this case the decimal digit value is returned.
6728      * <li>The character is one of the uppercase Latin letters
6729      *     {@code 'A'} through {@code 'Z'} and its code is less than
6730      *     {@code radix + 'A' - 10}.
6731      *     In this case, {@code ch - 'A' + 10}
6732      *     is returned.
6733      * <li>The character is one of the lowercase Latin letters
6734      *     {@code 'a'} through {@code 'z'} and its code is less than
6735      *     {@code radix + 'a' - 10}.
6736      *     In this case, {@code ch - 'a' + 10}
6737      *     is returned.
6738      * <li>The character is one of the fullwidth uppercase Latin letters A
6739      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6740      *     and its code is less than
6741      *     {@code radix + '\u005CuFF21' - 10}.
6742      *     In this case, {@code ch - '\u005CuFF21' + 10}
6743      *     is returned.
6744      * <li>The character is one of the fullwidth lowercase Latin letters a
6745      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6746      *     and its code is less than
6747      *     {@code radix + '\u005CuFF41' - 10}.
6748      *     In this case, {@code ch - '\u005CuFF41' + 10}
6749      *     is returned.
6750      * </ul>
6751      *
6752      * <p><b>Note:</b> This method cannot handle <a
6753      * href="#supplementary"> supplementary characters</a>. To support
6754      * all Unicode characters, including supplementary characters, use
6755      * the {@link #digit(int, int)} method.
6756      *
6757      * @param   ch      the character to be converted.
6758      * @param   radix   the radix.
6759      * @return  the numeric value represented by the character in the
6760      *          specified radix.
6761      * @see     Character#forDigit(int, int)
6762      * @see     Character#isDigit(char)
6763      */
digit(char ch, int radix)6764     public static int digit(char ch, int radix) {
6765         return digit((int)ch, radix);
6766     }
6767 
6768     /**
6769      * Returns the numeric value of the specified character (Unicode
6770      * code point) in the specified radix.
6771      *
6772      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
6773      * {@code radix} &le; {@code MAX_RADIX} or if the
6774      * character is not a valid digit in the specified
6775      * radix, {@code -1} is returned. A character is a valid digit
6776      * if at least one of the following is true:
6777      * <ul>
6778      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
6779      *     and the Unicode decimal digit value of the character (or its
6780      *     single-character decomposition) is less than the specified radix.
6781      *     In this case the decimal digit value is returned.
6782      * <li>The character is one of the uppercase Latin letters
6783      *     {@code 'A'} through {@code 'Z'} and its code is less than
6784      *     {@code radix + 'A' - 10}.
6785      *     In this case, {@code codePoint - 'A' + 10}
6786      *     is returned.
6787      * <li>The character is one of the lowercase Latin letters
6788      *     {@code 'a'} through {@code 'z'} and its code is less than
6789      *     {@code radix + 'a' - 10}.
6790      *     In this case, {@code codePoint - 'a' + 10}
6791      *     is returned.
6792      * <li>The character is one of the fullwidth uppercase Latin letters A
6793      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6794      *     and its code is less than
6795      *     {@code radix + '\u005CuFF21' - 10}.
6796      *     In this case,
6797      *     {@code codePoint - '\u005CuFF21' + 10}
6798      *     is returned.
6799      * <li>The character is one of the fullwidth lowercase Latin letters a
6800      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6801      *     and its code is less than
6802      *     {@code radix + '\u005CuFF41'- 10}.
6803      *     In this case,
6804      *     {@code codePoint - '\u005CuFF41' + 10}
6805      *     is returned.
6806      * </ul>
6807      *
6808      * @param   codePoint the character (Unicode code point) to be converted.
6809      * @param   radix   the radix.
6810      * @return  the numeric value represented by the character in the
6811      *          specified radix.
6812      * @see     Character#forDigit(int, int)
6813      * @see     Character#isDigit(int)
6814      * @since   1.5
6815      */
6816     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
6817     /*
6818     public static int digit(int codePoint, int radix) {
6819         return CharacterData.of(codePoint).digit(codePoint, radix);
6820     }
6821     */
digit(int codePoint, int radix)6822     public static int digit(int codePoint, int radix) {
6823         if (radix < MIN_RADIX || radix > MAX_RADIX) {
6824             return -1;
6825         }
6826         if (codePoint < 128) {
6827             // Optimized for ASCII
6828             int result = -1;
6829             if ('0' <= codePoint && codePoint <= '9') {
6830                 result = codePoint - '0';
6831             } else if ('a' <= codePoint && codePoint <= 'z') {
6832                 result = 10 + (codePoint - 'a');
6833             } else if ('A' <= codePoint && codePoint <= 'Z') {
6834                 result = 10 + (codePoint - 'A');
6835             }
6836             return result < radix ? result : -1;
6837         }
6838         return digitImpl(codePoint, radix);
6839     }
6840 
6841     @FastNative
digitImpl(int codePoint, int radix)6842     native static int digitImpl(int codePoint, int radix);
6843     // END Android-changed: Reimplement methods natively on top of ICU4C.
6844 
6845     /**
6846      * Returns the {@code int} value that the specified Unicode
6847      * character represents. For example, the character
6848      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
6849      * an int with a value of 50.
6850      * <p>
6851      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6852      * {@code '\u005Cu005A'}), lowercase
6853      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6854      * full width variant ({@code '\u005CuFF21'} through
6855      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6856      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6857      * through 35. This is independent of the Unicode specification,
6858      * which does not assign numeric values to these {@code char}
6859      * values.
6860      * <p>
6861      * If the character does not have a numeric value, then -1 is returned.
6862      * If the character has a numeric value that cannot be represented as a
6863      * nonnegative integer (for example, a fractional value), then -2
6864      * is returned.
6865      *
6866      * <p><b>Note:</b> This method cannot handle <a
6867      * href="#supplementary"> supplementary characters</a>. To support
6868      * all Unicode characters, including supplementary characters, use
6869      * the {@link #getNumericValue(int)} method.
6870      *
6871      * @param   ch      the character to be converted.
6872      * @return  the numeric value of the character, as a nonnegative {@code int}
6873      *           value; -2 if the character has a numeric value that is not a
6874      *          nonnegative integer; -1 if the character has no numeric value.
6875      * @see     Character#forDigit(int, int)
6876      * @see     Character#isDigit(char)
6877      * @since   1.1
6878      */
getNumericValue(char ch)6879     public static int getNumericValue(char ch) {
6880         return getNumericValue((int)ch);
6881     }
6882 
6883     /**
6884      * Returns the {@code int} value that the specified
6885      * character (Unicode code point) represents. For example, the character
6886      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
6887      * an {@code int} with a value of 50.
6888      * <p>
6889      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6890      * {@code '\u005Cu005A'}), lowercase
6891      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6892      * full width variant ({@code '\u005CuFF21'} through
6893      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6894      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6895      * through 35. This is independent of the Unicode specification,
6896      * which does not assign numeric values to these {@code char}
6897      * values.
6898      * <p>
6899      * If the character does not have a numeric value, then -1 is returned.
6900      * If the character has a numeric value that cannot be represented as a
6901      * nonnegative integer (for example, a fractional value), then -2
6902      * is returned.
6903      *
6904      * @param   codePoint the character (Unicode code point) to be converted.
6905      * @return  the numeric value of the character, as a nonnegative {@code int}
6906      *          value; -2 if the character has a numeric value that is not a
6907      *          nonnegative integer; -1 if the character has no numeric value.
6908      * @see     Character#forDigit(int, int)
6909      * @see     Character#isDigit(int)
6910      * @since   1.5
6911      */
6912     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
6913     /*
6914     public static int getNumericValue(int codePoint) {
6915         return CharacterData.of(codePoint).getNumericValue(codePoint);
6916     }
6917     */
getNumericValue(int codePoint)6918     public static int getNumericValue(int codePoint) {
6919         // This is both an optimization and papers over differences between Java and ICU.
6920         if (codePoint < 128) {
6921             if (codePoint >= '0' && codePoint <= '9') {
6922                 return codePoint - '0';
6923             }
6924             if (codePoint >= 'a' && codePoint <= 'z') {
6925                 return codePoint - ('a' - 10);
6926             }
6927             if (codePoint >= 'A' && codePoint <= 'Z') {
6928                 return codePoint - ('A' - 10);
6929             }
6930             return -1;
6931         }
6932         // Full-width uppercase A-Z.
6933         if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
6934             return codePoint - 0xff17;
6935         }
6936         // Full-width lowercase a-z.
6937         if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
6938             return codePoint - 0xff37;
6939         }
6940         return getNumericValueImpl(codePoint);
6941     }
6942 
6943     @FastNative
getNumericValueImpl(int codePoint)6944     native static int getNumericValueImpl(int codePoint);
6945     // END Android-changed: Reimplement methods natively on top of ICU4C.
6946 
6947     /**
6948      * Determines if the specified character is ISO-LATIN-1 white space.
6949      * This method returns {@code true} for the following five
6950      * characters only:
6951      * <table summary="truechars">
6952      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
6953      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
6954      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
6955      *     <td>{@code NEW LINE}</td></tr>
6956      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
6957      *     <td>{@code FORM FEED}</td></tr>
6958      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
6959      *     <td>{@code CARRIAGE RETURN}</td></tr>
6960      * <tr><td>{@code ' '}</td>             <td>{@code U+0020}</td>
6961      *     <td>{@code SPACE}</td></tr>
6962      * </table>
6963      *
6964      * @param      ch   the character to be tested.
6965      * @return     {@code true} if the character is ISO-LATIN-1 white
6966      *             space; {@code false} otherwise.
6967      * @see        Character#isSpaceChar(char)
6968      * @see        Character#isWhitespace(char)
6969      * @deprecated Replaced by isWhitespace(char).
6970      */
6971     @Deprecated
isSpace(char ch)6972     public static boolean isSpace(char ch) {
6973         return (ch <= 0x0020) &&
6974             (((((1L << 0x0009) |
6975             (1L << 0x000A) |
6976             (1L << 0x000C) |
6977             (1L << 0x000D) |
6978             (1L << 0x0020)) >> ch) & 1L) != 0);
6979     }
6980 
6981 
6982     /**
6983      * Determines if the specified character is a Unicode space character.
6984      * A character is considered to be a space character if and only if
6985      * it is specified to be a space character by the Unicode Standard. This
6986      * method returns true if the character's general category type is any of
6987      * the following:
6988      * <ul>
6989      * <li> {@code SPACE_SEPARATOR}
6990      * <li> {@code LINE_SEPARATOR}
6991      * <li> {@code PARAGRAPH_SEPARATOR}
6992      * </ul>
6993      *
6994      * <p><b>Note:</b> This method cannot handle <a
6995      * href="#supplementary"> supplementary characters</a>. To support
6996      * all Unicode characters, including supplementary characters, use
6997      * the {@link #isSpaceChar(int)} method.
6998      *
6999      * @param   ch      the character to be tested.
7000      * @return  {@code true} if the character is a space character;
7001      *          {@code false} otherwise.
7002      * @see     Character#isWhitespace(char)
7003      * @since   1.1
7004      */
isSpaceChar(char ch)7005     public static boolean isSpaceChar(char ch) {
7006         return isSpaceChar((int)ch);
7007     }
7008 
7009     /**
7010      * Determines if the specified character (Unicode code point) is a
7011      * Unicode space character.  A character is considered to be a
7012      * space character if and only if it is specified to be a space
7013      * character by the Unicode Standard. This method returns true if
7014      * the character's general category type is any of the following:
7015      *
7016      * <ul>
7017      * <li> {@link #SPACE_SEPARATOR}
7018      * <li> {@link #LINE_SEPARATOR}
7019      * <li> {@link #PARAGRAPH_SEPARATOR}
7020      * </ul>
7021      *
7022      * @param   codePoint the character (Unicode code point) to be tested.
7023      * @return  {@code true} if the character is a space character;
7024      *          {@code false} otherwise.
7025      * @see     Character#isWhitespace(int)
7026      * @since   1.5
7027      */
7028     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
7029     /*
7030     public static boolean isSpaceChar(int codePoint) {
7031         return ((((1 << Character.SPACE_SEPARATOR) |
7032                   (1 << Character.LINE_SEPARATOR) |
7033                   (1 << Character.PARAGRAPH_SEPARATOR)) >> getType(codePoint)) & 1)
7034             != 0;
7035     }
7036     */
isSpaceChar(int codePoint)7037     public static boolean isSpaceChar(int codePoint) {
7038         // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
7039         // SPACE or NO-BREAK SPACE?
7040         if (codePoint == 0x20 || codePoint == 0xa0) {
7041             return true;
7042         }
7043         if (codePoint < 0x1000) {
7044             return false;
7045         }
7046         // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
7047         if (codePoint == 0x1680 || codePoint == 0x180e) {
7048             return true;
7049         }
7050         if (codePoint < 0x2000) {
7051             return false;
7052         }
7053         if (codePoint <= 0xffff) {
7054             // Other whitespace from General Punctuation...
7055             return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f ||
7056                 codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
7057         }
7058         // Let icu4c worry about non-BMP code points.
7059         return isSpaceCharImpl(codePoint);
7060     }
7061 
7062     @FastNative
isSpaceCharImpl(int codePoint)7063     static native boolean isSpaceCharImpl(int codePoint);
7064     // END Android-changed: Reimplement methods natively on top of ICU4C.
7065 
7066     /**
7067      * Determines if the specified character is white space according to Java.
7068      * A character is a Java whitespace character if and only if it satisfies
7069      * one of the following criteria:
7070      * <ul>
7071      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
7072      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
7073      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
7074      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
7075      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
7076      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
7077      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
7078      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
7079      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
7080      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
7081      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
7082      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
7083      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
7084      * </ul>
7085      *
7086      * <p><b>Note:</b> This method cannot handle <a
7087      * href="#supplementary"> supplementary characters</a>. To support
7088      * all Unicode characters, including supplementary characters, use
7089      * the {@link #isWhitespace(int)} method.
7090      *
7091      * @param   ch the character to be tested.
7092      * @return  {@code true} if the character is a Java whitespace
7093      *          character; {@code false} otherwise.
7094      * @see     Character#isSpaceChar(char)
7095      * @since   1.1
7096      */
isWhitespace(char ch)7097     public static boolean isWhitespace(char ch) {
7098         return isWhitespace((int)ch);
7099     }
7100 
7101     /**
7102      * Determines if the specified character (Unicode code point) is
7103      * white space according to Java.  A character is a Java
7104      * whitespace character if and only if it satisfies one of the
7105      * following criteria:
7106      * <ul>
7107      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
7108      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
7109      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
7110      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
7111      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
7112      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
7113      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
7114      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
7115      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
7116      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
7117      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
7118      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
7119      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
7120      * </ul>
7121      * <p>
7122      *
7123      * @param   codePoint the character (Unicode code point) to be tested.
7124      * @return  {@code true} if the character is a Java whitespace
7125      *          character; {@code false} otherwise.
7126      * @see     Character#isSpaceChar(int)
7127      * @since   1.5
7128      */
7129     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
7130     /*
7131     public static boolean isWhitespace(int codePoint) {
7132         return CharacterData.of(codePoint).isWhitespace(codePoint);
7133     }
7134     */
isWhitespace(int codePoint)7135     public static boolean isWhitespace(int codePoint) {
7136         // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
7137         // Any ASCII whitespace character?
7138         if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) {
7139             return true;
7140         }
7141         if (codePoint < 0x1000) {
7142             return false;
7143         }
7144         // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
7145         if (codePoint == 0x1680 || codePoint == 0x180e) {
7146             return true;
7147         }
7148         if (codePoint < 0x2000) {
7149             return false;
7150         }
7151         // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE).
7152         if (codePoint == 0x2007 || codePoint == 0x202f) {
7153             return false;
7154         }
7155         if (codePoint <= 0xffff) {
7156             // Other whitespace from General Punctuation...
7157             return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f ||
7158                 codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
7159         }
7160         // Let icu4c worry about non-BMP code points.
7161         return isWhitespaceImpl(codePoint);
7162     }
7163 
7164     @FastNative
isWhitespaceImpl(int codePoint)7165     native static boolean isWhitespaceImpl(int codePoint);
7166     // END Android-changed: Reimplement methods natively on top of ICU4C.
7167 
7168     /**
7169      * Determines if the specified character is an ISO control
7170      * character.  A character is considered to be an ISO control
7171      * character if its code is in the range {@code '\u005Cu0000'}
7172      * through {@code '\u005Cu001F'} or in the range
7173      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
7174      *
7175      * <p><b>Note:</b> This method cannot handle <a
7176      * href="#supplementary"> supplementary characters</a>. To support
7177      * all Unicode characters, including supplementary characters, use
7178      * the {@link #isISOControl(int)} method.
7179      *
7180      * @param   ch      the character to be tested.
7181      * @return  {@code true} if the character is an ISO control character;
7182      *          {@code false} otherwise.
7183      *
7184      * @see     Character#isSpaceChar(char)
7185      * @see     Character#isWhitespace(char)
7186      * @since   1.1
7187      */
isISOControl(char ch)7188     public static boolean isISOControl(char ch) {
7189         return isISOControl((int)ch);
7190     }
7191 
7192     /**
7193      * Determines if the referenced character (Unicode code point) is an ISO control
7194      * character.  A character is considered to be an ISO control
7195      * character if its code is in the range {@code '\u005Cu0000'}
7196      * through {@code '\u005Cu001F'} or in the range
7197      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
7198      *
7199      * @param   codePoint the character (Unicode code point) to be tested.
7200      * @return  {@code true} if the character is an ISO control character;
7201      *          {@code false} otherwise.
7202      * @see     Character#isSpaceChar(int)
7203      * @see     Character#isWhitespace(int)
7204      * @since   1.5
7205      */
isISOControl(int codePoint)7206     public static boolean isISOControl(int codePoint) {
7207         // Optimized form of:
7208         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
7209         //     (codePoint >= 0x7F && codePoint <= 0x9F);
7210         return codePoint <= 0x9F &&
7211             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
7212     }
7213 
7214     /**
7215      * Returns a value indicating a character's general category.
7216      *
7217      * <p><b>Note:</b> This method cannot handle <a
7218      * href="#supplementary"> supplementary characters</a>. To support
7219      * all Unicode characters, including supplementary characters, use
7220      * the {@link #getType(int)} method.
7221      *
7222      * @param   ch      the character to be tested.
7223      * @return  a value of type {@code int} representing the
7224      *          character's general category.
7225      * @see     Character#COMBINING_SPACING_MARK
7226      * @see     Character#CONNECTOR_PUNCTUATION
7227      * @see     Character#CONTROL
7228      * @see     Character#CURRENCY_SYMBOL
7229      * @see     Character#DASH_PUNCTUATION
7230      * @see     Character#DECIMAL_DIGIT_NUMBER
7231      * @see     Character#ENCLOSING_MARK
7232      * @see     Character#END_PUNCTUATION
7233      * @see     Character#FINAL_QUOTE_PUNCTUATION
7234      * @see     Character#FORMAT
7235      * @see     Character#INITIAL_QUOTE_PUNCTUATION
7236      * @see     Character#LETTER_NUMBER
7237      * @see     Character#LINE_SEPARATOR
7238      * @see     Character#LOWERCASE_LETTER
7239      * @see     Character#MATH_SYMBOL
7240      * @see     Character#MODIFIER_LETTER
7241      * @see     Character#MODIFIER_SYMBOL
7242      * @see     Character#NON_SPACING_MARK
7243      * @see     Character#OTHER_LETTER
7244      * @see     Character#OTHER_NUMBER
7245      * @see     Character#OTHER_PUNCTUATION
7246      * @see     Character#OTHER_SYMBOL
7247      * @see     Character#PARAGRAPH_SEPARATOR
7248      * @see     Character#PRIVATE_USE
7249      * @see     Character#SPACE_SEPARATOR
7250      * @see     Character#START_PUNCTUATION
7251      * @see     Character#SURROGATE
7252      * @see     Character#TITLECASE_LETTER
7253      * @see     Character#UNASSIGNED
7254      * @see     Character#UPPERCASE_LETTER
7255      * @since   1.1
7256      */
getType(char ch)7257     public static int getType(char ch) {
7258         return getType((int)ch);
7259     }
7260 
7261     /**
7262      * Returns a value indicating a character's general category.
7263      *
7264      * @param   codePoint the character (Unicode code point) to be tested.
7265      * @return  a value of type {@code int} representing the
7266      *          character's general category.
7267      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
7268      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
7269      * @see     Character#CONTROL CONTROL
7270      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
7271      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
7272      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
7273      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
7274      * @see     Character#END_PUNCTUATION END_PUNCTUATION
7275      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
7276      * @see     Character#FORMAT FORMAT
7277      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
7278      * @see     Character#LETTER_NUMBER LETTER_NUMBER
7279      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
7280      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
7281      * @see     Character#MATH_SYMBOL MATH_SYMBOL
7282      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
7283      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
7284      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
7285      * @see     Character#OTHER_LETTER OTHER_LETTER
7286      * @see     Character#OTHER_NUMBER OTHER_NUMBER
7287      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
7288      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
7289      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
7290      * @see     Character#PRIVATE_USE PRIVATE_USE
7291      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
7292      * @see     Character#START_PUNCTUATION START_PUNCTUATION
7293      * @see     Character#SURROGATE SURROGATE
7294      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
7295      * @see     Character#UNASSIGNED UNASSIGNED
7296      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
7297      * @since   1.5
7298      */
7299     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
7300     /*
7301     public static int getType(int codePoint) {
7302         return CharacterData.of(codePoint).getType(codePoint);
7303     }
7304     */
getType(int codePoint)7305     public static int getType(int codePoint) {
7306         int type = getTypeImpl(codePoint);
7307         // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
7308         if (type <= Character.FORMAT) {
7309             return type;
7310         }
7311         return (type + 1);
7312     }
7313 
7314     @FastNative
getTypeImpl(int codePoint)7315     static native int getTypeImpl(int codePoint);
7316     // END Android-changed: Reimplement methods natively on top of ICU4C.
7317 
7318     /**
7319      * Determines the character representation for a specific digit in
7320      * the specified radix. If the value of {@code radix} is not a
7321      * valid radix, or the value of {@code digit} is not a valid
7322      * digit in the specified radix, the null character
7323      * ({@code '\u005Cu0000'}) is returned.
7324      * <p>
7325      * The {@code radix} argument is valid if it is greater than or
7326      * equal to {@code MIN_RADIX} and less than or equal to
7327      * {@code MAX_RADIX}. The {@code digit} argument is valid if
7328      * {@code 0 <= digit < radix}.
7329      * <p>
7330      * If the digit is less than 10, then
7331      * {@code '0' + digit} is returned. Otherwise, the value
7332      * {@code 'a' + digit - 10} is returned.
7333      *
7334      * @param   digit   the number to convert to a character.
7335      * @param   radix   the radix.
7336      * @return  the {@code char} representation of the specified digit
7337      *          in the specified radix.
7338      * @see     Character#MIN_RADIX
7339      * @see     Character#MAX_RADIX
7340      * @see     Character#digit(char, int)
7341      */
forDigit(int digit, int radix)7342     public static char forDigit(int digit, int radix) {
7343         if ((digit >= radix) || (digit < 0)) {
7344             return '\0';
7345         }
7346         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
7347             return '\0';
7348         }
7349         if (digit < 10) {
7350             return (char)('0' + digit);
7351         }
7352         return (char)('a' - 10 + digit);
7353     }
7354 
7355     /**
7356      * Returns the Unicode directionality property for the given
7357      * character.  Character directionality is used to calculate the
7358      * visual ordering of text. The directionality value of undefined
7359      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
7360      *
7361      * <p><b>Note:</b> This method cannot handle <a
7362      * href="#supplementary"> supplementary characters</a>. To support
7363      * all Unicode characters, including supplementary characters, use
7364      * the {@link #getDirectionality(int)} method.
7365      *
7366      * @param  ch {@code char} for which the directionality property
7367      *            is requested.
7368      * @return the directionality property of the {@code char} value.
7369      *
7370      * @see Character#DIRECTIONALITY_UNDEFINED
7371      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
7372      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
7373      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7374      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
7375      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7376      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7377      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
7378      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7379      * @see Character#DIRECTIONALITY_NONSPACING_MARK
7380      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
7381      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
7382      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
7383      * @see Character#DIRECTIONALITY_WHITESPACE
7384      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
7385      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7386      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7387      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7388      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7389      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7390      * @since 1.4
7391      */
getDirectionality(char ch)7392     public static byte getDirectionality(char ch) {
7393         return getDirectionality((int)ch);
7394     }
7395 
7396     /**
7397      * Returns the Unicode directionality property for the given
7398      * character (Unicode code point).  Character directionality is
7399      * used to calculate the visual ordering of text. The
7400      * directionality value of undefined character is {@link
7401      * #DIRECTIONALITY_UNDEFINED}.
7402      *
7403      * @param   codePoint the character (Unicode code point) for which
7404      *          the directionality property is requested.
7405      * @return the directionality property of the character.
7406      *
7407      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
7408      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
7409      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
7410      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7411      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
7412      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7413      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7414      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
7415      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7416      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
7417      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
7418      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
7419      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
7420      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
7421      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
7422      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7423      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7424      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7425      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7426      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7427      * @since    1.5
7428      */
7429     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
7430     /*
7431     public static byte getDirectionality(int codePoint) {
7432         return CharacterData.of(codePoint).getDirectionality(codePoint);
7433     }
7434     */
getDirectionality(int codePoint)7435     public static byte getDirectionality(int codePoint) {
7436         if (getType(codePoint) == Character.UNASSIGNED) {
7437             return Character.DIRECTIONALITY_UNDEFINED;
7438         }
7439 
7440         byte directionality = getDirectionalityImpl(codePoint);
7441         if (directionality >= 0 && directionality < DIRECTIONALITY.length) {
7442             return DIRECTIONALITY[directionality];
7443         }
7444         return Character.DIRECTIONALITY_UNDEFINED;
7445     }
7446 
7447     @FastNative
getDirectionalityImpl(int codePoint)7448     native static byte getDirectionalityImpl(int codePoint);
7449     // END Android-changed: Reimplement methods natively on top of ICU4C.
7450 
7451     /**
7452      * Determines whether the character is mirrored according to the
7453      * Unicode specification.  Mirrored characters should have their
7454      * glyphs horizontally mirrored when displayed in text that is
7455      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
7456      * PARENTHESIS is semantically defined to be an <i>opening
7457      * parenthesis</i>.  This will appear as a "(" in text that is
7458      * left-to-right but as a ")" in text that is right-to-left.
7459      *
7460      * <p><b>Note:</b> This method cannot handle <a
7461      * href="#supplementary"> supplementary characters</a>. To support
7462      * all Unicode characters, including supplementary characters, use
7463      * the {@link #isMirrored(int)} method.
7464      *
7465      * @param  ch {@code char} for which the mirrored property is requested
7466      * @return {@code true} if the char is mirrored, {@code false}
7467      *         if the {@code char} is not mirrored or is not defined.
7468      * @since 1.4
7469      */
isMirrored(char ch)7470     public static boolean isMirrored(char ch) {
7471         return isMirrored((int)ch);
7472     }
7473 
7474     /**
7475      * Determines whether the specified character (Unicode code point)
7476      * is mirrored according to the Unicode specification.  Mirrored
7477      * characters should have their glyphs horizontally mirrored when
7478      * displayed in text that is right-to-left.  For example,
7479      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
7480      * defined to be an <i>opening parenthesis</i>.  This will appear
7481      * as a "(" in text that is left-to-right but as a ")" in text
7482      * that is right-to-left.
7483      *
7484      * @param   codePoint the character (Unicode code point) to be tested.
7485      * @return  {@code true} if the character is mirrored, {@code false}
7486      *          if the character is not mirrored or is not defined.
7487      * @since   1.5
7488      */
7489     // BEGIN Android-changed: Reimplement methods natively on top of ICU4C.
7490     /*
7491     public static boolean isMirrored(int codePoint) {
7492         return CharacterData.of(codePoint).isMirrored(codePoint);
7493     }
7494     */
isMirrored(int codePoint)7495     public static boolean isMirrored(int codePoint) {
7496         return isMirroredImpl(codePoint);
7497     }
7498 
7499     @FastNative
isMirroredImpl(int codePoint)7500     native static boolean isMirroredImpl(int codePoint);
7501     // END Android-changed: Reimplement methods natively on top of ICU4C.
7502 
7503     /**
7504      * Compares two {@code Character} objects numerically.
7505      *
7506      * @param   anotherCharacter   the {@code Character} to be compared.
7507 
7508      * @return  the value {@code 0} if the argument {@code Character}
7509      *          is equal to this {@code Character}; a value less than
7510      *          {@code 0} if this {@code Character} is numerically less
7511      *          than the {@code Character} argument; and a value greater than
7512      *          {@code 0} if this {@code Character} is numerically greater
7513      *          than the {@code Character} argument (unsigned comparison).
7514      *          Note that this is strictly a numerical comparison; it is not
7515      *          locale-dependent.
7516      * @since   1.2
7517      */
compareTo(Character anotherCharacter)7518     public int compareTo(Character anotherCharacter) {
7519         return compare(this.value, anotherCharacter.value);
7520     }
7521 
7522     /**
7523      * Compares two {@code char} values numerically.
7524      * The value returned is identical to what would be returned by:
7525      * <pre>
7526      *    Character.valueOf(x).compareTo(Character.valueOf(y))
7527      * </pre>
7528      *
7529      * @param  x the first {@code char} to compare
7530      * @param  y the second {@code char} to compare
7531      * @return the value {@code 0} if {@code x == y};
7532      *         a value less than {@code 0} if {@code x < y}; and
7533      *         a value greater than {@code 0} if {@code x > y}
7534      * @since 1.7
7535      */
compare(char x, char y)7536     public static int compare(char x, char y) {
7537         return x - y;
7538     }
7539 
7540     // BEGIN Android-removed: Use ICU.
7541     /**
7542      * Converts the character (Unicode code point) argument to uppercase using
7543      * information from the UnicodeData file.
7544      * <p>
7545      *
7546      * @param   codePoint   the character (Unicode code point) to be converted.
7547      * @return  either the uppercase equivalent of the character, if
7548      *          any, or an error flag ({@code Character.ERROR})
7549      *          that indicates that a 1:M {@code char} mapping exists.
7550      * @see     Character#isLowerCase(char)
7551      * @see     Character#isUpperCase(char)
7552      * @see     Character#toLowerCase(char)
7553      * @see     Character#toTitleCase(char)
7554      * @since 1.4
7555      *
7556     static int toUpperCaseEx(int codePoint) {
7557         assert isValidCodePoint(codePoint);
7558         return CharacterData.of(codePoint).toUpperCaseEx(codePoint);
7559     }
7560 
7561     /**
7562      * Converts the character (Unicode code point) argument to uppercase using case
7563      * mapping information from the SpecialCasing file in the Unicode
7564      * specification. If a character has no explicit uppercase
7565      * mapping, then the {@code char} itself is returned in the
7566      * {@code char[]}.
7567      *
7568      * @param   codePoint   the character (Unicode code point) to be converted.
7569      * @return a {@code char[]} with the uppercased character.
7570      * @since 1.4
7571      *
7572     static char[] toUpperCaseCharArray(int codePoint) {
7573         // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
7574         assert isBmpCodePoint(codePoint);
7575         return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
7576     }
7577     */
7578     // END Android-removed: Use ICU.
7579 
7580     /**
7581      * The number of bits used to represent a <tt>char</tt> value in unsigned
7582      * binary form, constant {@code 16}.
7583      *
7584      * @since 1.5
7585      */
7586     public static final int SIZE = 16;
7587 
7588     /**
7589      * The number of bytes used to represent a {@code char} value in unsigned
7590      * binary form.
7591      *
7592      * @since 1.8
7593      */
7594     public static final int BYTES = SIZE / Byte.SIZE;
7595 
7596     /**
7597      * Returns the value obtained by reversing the order of the bytes in the
7598      * specified <tt>char</tt> value.
7599      *
7600      * @param ch The {@code char} of which to reverse the byte order.
7601      * @return the value obtained by reversing (or, equivalently, swapping)
7602      *     the bytes in the specified <tt>char</tt> value.
7603      * @since 1.5
7604      */
reverseBytes(char ch)7605     public static char reverseBytes(char ch) {
7606         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
7607     }
7608 
7609     /**
7610      * Returns the Unicode name of the specified character
7611      * {@code codePoint}, or null if the code point is
7612      * {@link #UNASSIGNED unassigned}.
7613      * <p>
7614      * Note: if the specified character is not assigned a name by
7615      * the <i>UnicodeData</i> file (part of the Unicode Character
7616      * Database maintained by the Unicode Consortium), the returned
7617      * name is the same as the result of expression.
7618      *
7619      * <blockquote>{@code
7620      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
7621      *     + " "
7622      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7623      *
7624      * }</blockquote>
7625      *
7626      * @param  codePoint the character (Unicode code point)
7627      *
7628      * @return the Unicode name of the specified character, or null if
7629      *         the code point is unassigned.
7630      *
7631      * @exception IllegalArgumentException if the specified
7632      *            {@code codePoint} is not a valid Unicode
7633      *            code point.
7634      *
7635      * @since 1.7
7636      */
getName(int codePoint)7637     public static String getName(int codePoint) {
7638         if (!isValidCodePoint(codePoint)) {
7639             throw new IllegalArgumentException();
7640         }
7641         // Android-changed: Use ICU.
7642         // String name = CharacterName.get(codePoint);
7643         String name = getNameImpl(codePoint);
7644         if (name != null)
7645             return name;
7646         if (getType(codePoint) == UNASSIGNED)
7647             return null;
7648         UnicodeBlock block = UnicodeBlock.of(codePoint);
7649         if (block != null)
7650             return block.toString().replace('_', ' ') + " "
7651                    + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7652         // should never come here
7653         return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7654     }
7655 
7656     // Android-added: Use ICU.
7657     // Implement getNameImpl() natively.
getNameImpl(int codePoint)7658     private static native String getNameImpl(int codePoint);
7659 }
7660