• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 The Android Open Source Project
3  * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This code is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 only, as
8  * published by the Free Software Foundation.  Oracle designates this
9  * particular file as subject to the "Classpath" exception as provided
10  * by Oracle in the LICENSE file that accompanied this code.
11  *
12  * This code is distributed in the hope that it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15  * version 2 for more details (a copy is included in the LICENSE file that
16  * accompanied this code).
17  *
18  * You should have received a copy of the GNU General Public License version
19  * 2 along with this work; if not, write to the Free Software Foundation,
20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
21  *
22  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
23  * or visit www.oracle.com if you need additional information or have any
24  * questions.
25  */
26 
27 package java.lang;
28 
29 import java.util.Arrays;
30 import java.util.HashMap;
31 import java.util.Locale;
32 import java.util.Map;
33 
34 /**
35  * The {@code Character} class wraps a value of the primitive
36  * type {@code char} in an object. An object of type
37  * {@code Character} contains a single field whose type is
38  * {@code char}.
39  * <p>
40  * In addition, this class provides several methods for determining
41  * a character's category (lowercase letter, digit, etc.) and for converting
42  * characters from uppercase to lowercase and vice versa.
43  * <p>
44  * Character information is based on the Unicode Standard, version 6.0.0.
45  * <p>
46  * The methods and data of class {@code Character} are defined by
47  * the information in the <i>UnicodeData</i> file that is part of the
48  * Unicode Character Database maintained by the Unicode
49  * Consortium. This file specifies various properties including name
50  * and general category for every defined Unicode code point or
51  * character range.
52  * <p>
53  * The file and its description are available from the Unicode Consortium at:
54  * <ul>
55  * <li><a href="http://www.unicode.org">http://www.unicode.org</a>
56  * </ul>
57  *
58  * <h4><a name="unicode">Unicode Character Representations</a></h4>
59  *
60  * <p>The {@code char} data type (and therefore the value that a
61  * {@code Character} object encapsulates) are based on the
62  * original Unicode specification, which defined characters as
63  * fixed-width 16-bit entities. The Unicode Standard has since been
64  * changed to allow for characters whose representation requires more
65  * than 16 bits.  The range of legal <em>code point</em>s is now
66  * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
67  * (Refer to the <a
68  * href="http://www.unicode.org/reports/tr27/#notation"><i>
69  * definition</i></a> of the U+<i>n</i> notation in the Unicode
70  * Standard.)
71  *
72  * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
73  * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
74  * <a name="supplementary">Characters</a> whose code points are greater
75  * than U+FFFF are called <em>supplementary character</em>s.  The Java
76  * platform uses the UTF-16 representation in {@code char} arrays and
77  * in the {@code String} and {@code StringBuffer} classes. In
78  * this representation, supplementary characters are represented as a pair
79  * of {@code char} values, the first from the <em>high-surrogates</em>
80  * range, (&#92;uD800-&#92;uDBFF), the second from the
81  * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
82  *
83  * <p>A {@code char} value, therefore, represents Basic
84  * Multilingual Plane (BMP) code points, including the surrogate
85  * code points, or code units of the UTF-16 encoding. An
86  * {@code int} value represents all Unicode code points,
87  * including supplementary code points. The lower (least significant)
88  * 21 bits of {@code int} are used to represent Unicode code
89  * points and the upper (most significant) 11 bits must be zero.
90  * Unless otherwise specified, the behavior with respect to
91  * supplementary characters and surrogate {@code char} values is
92  * as follows:
93  *
94  * <ul>
95  * <li>The methods that only accept a {@code char} value cannot support
96  * supplementary characters. They treat {@code char} values from the
97  * surrogate ranges as undefined characters. For example,
98  * {@code Character.isLetter('\u005CuD840')} returns {@code false}, even though
99  * this specific value if followed by any low-surrogate value in a string
100  * would represent a letter.
101  *
102  * <li>The methods that accept an {@code int} value support all
103  * Unicode characters, including supplementary characters. For
104  * example, {@code Character.isLetter(0x2F81A)} returns
105  * {@code true} because the code point value represents a letter
106  * (a CJK ideograph).
107  * </ul>
108  *
109  * <p>In the Java SE API documentation, <em>Unicode code point</em> is
110  * used for character values in the range between U+0000 and U+10FFFF,
111  * and <em>Unicode code unit</em> is used for 16-bit
112  * {@code char} values that are code units of the <em>UTF-16</em>
113  * encoding. For more information on Unicode terminology, refer to the
114  * <a href="http://www.unicode.org/glossary/">Unicode Glossary</a>.
115  *
116  * @author  Lee Boynton
117  * @author  Guy Steele
118  * @author  Akira Tanaka
119  * @author  Martin Buchholz
120  * @author  Ulf Zibis
121  * @since   1.0
122  */
123 public final
124 class Character implements java.io.Serializable, Comparable<Character> {
125     /**
126      * The minimum radix available for conversion to and from strings.
127      * The constant value of this field is the smallest value permitted
128      * for the radix argument in radix-conversion methods such as the
129      * {@code digit} method, the {@code forDigit} method, and the
130      * {@code toString} method of class {@code Integer}.
131      *
132      * @see     Character#digit(char, int)
133      * @see     Character#forDigit(int, int)
134      * @see     Integer#toString(int, int)
135      * @see     Integer#valueOf(String)
136      */
137     public static final int MIN_RADIX = 2;
138 
139     /**
140      * The maximum radix available for conversion to and from strings.
141      * The constant value of this field is the largest value permitted
142      * for the radix argument in radix-conversion methods such as the
143      * {@code digit} method, the {@code forDigit} method, and the
144      * {@code toString} method of class {@code Integer}.
145      *
146      * @see     Character#digit(char, int)
147      * @see     Character#forDigit(int, int)
148      * @see     Integer#toString(int, int)
149      * @see     Integer#valueOf(String)
150      */
151     public static final int MAX_RADIX = 36;
152 
153     /**
154      * The constant value of this field is the smallest value of type
155      * {@code char}, {@code '\u005Cu0000'}.
156      *
157      * @since   1.0.2
158      */
159     public static final char MIN_VALUE = '\u0000';
160 
161     /**
162      * The constant value of this field is the largest value of type
163      * {@code char}, {@code '\u005CuFFFF'}.
164      *
165      * @since   1.0.2
166      */
167     public static final char MAX_VALUE = '\uFFFF';
168 
169     /**
170      * The {@code Class} instance representing the primitive type
171      * {@code char}.
172      *
173      * @since   1.1
174      */
175     @SuppressWarnings("unchecked")
176     /* ----- BEGIN android -----
177     public static final Class<Character> TYPE = Class.getPrimitiveClass("char");
178     */
179     public static final Class<Character> TYPE = (Class<Character>) char[].class.getComponentType();
180     // ----- END android -----
181 
182     /*
183      * Normative general types
184      */
185 
186     /*
187      * General character types
188      */
189 
190     /**
191      * General category "Cn" in the Unicode specification.
192      * @since   1.1
193      */
194     public static final byte UNASSIGNED = 0;
195 
196     /**
197      * General category "Lu" in the Unicode specification.
198      * @since   1.1
199      */
200     public static final byte UPPERCASE_LETTER = 1;
201 
202     /**
203      * General category "Ll" in the Unicode specification.
204      * @since   1.1
205      */
206     public static final byte LOWERCASE_LETTER = 2;
207 
208     /**
209      * General category "Lt" in the Unicode specification.
210      * @since   1.1
211      */
212     public static final byte TITLECASE_LETTER = 3;
213 
214     /**
215      * General category "Lm" in the Unicode specification.
216      * @since   1.1
217      */
218     public static final byte MODIFIER_LETTER = 4;
219 
220     /**
221      * General category "Lo" in the Unicode specification.
222      * @since   1.1
223      */
224     public static final byte OTHER_LETTER = 5;
225 
226     /**
227      * General category "Mn" in the Unicode specification.
228      * @since   1.1
229      */
230     public static final byte NON_SPACING_MARK = 6;
231 
232     /**
233      * General category "Me" in the Unicode specification.
234      * @since   1.1
235      */
236     public static final byte ENCLOSING_MARK = 7;
237 
238     /**
239      * General category "Mc" in the Unicode specification.
240      * @since   1.1
241      */
242     public static final byte COMBINING_SPACING_MARK = 8;
243 
244     /**
245      * General category "Nd" in the Unicode specification.
246      * @since   1.1
247      */
248     public static final byte DECIMAL_DIGIT_NUMBER        = 9;
249 
250     /**
251      * General category "Nl" in the Unicode specification.
252      * @since   1.1
253      */
254     public static final byte LETTER_NUMBER = 10;
255 
256     /**
257      * General category "No" in the Unicode specification.
258      * @since   1.1
259      */
260     public static final byte OTHER_NUMBER = 11;
261 
262     /**
263      * General category "Zs" in the Unicode specification.
264      * @since   1.1
265      */
266     public static final byte SPACE_SEPARATOR = 12;
267 
268     /**
269      * General category "Zl" in the Unicode specification.
270      * @since   1.1
271      */
272     public static final byte LINE_SEPARATOR = 13;
273 
274     /**
275      * General category "Zp" in the Unicode specification.
276      * @since   1.1
277      */
278     public static final byte PARAGRAPH_SEPARATOR = 14;
279 
280     /**
281      * General category "Cc" in the Unicode specification.
282      * @since   1.1
283      */
284     public static final byte CONTROL = 15;
285 
286     /**
287      * General category "Cf" in the Unicode specification.
288      * @since   1.1
289      */
290     public static final byte FORMAT = 16;
291 
292     /**
293      * General category "Co" in the Unicode specification.
294      * @since   1.1
295      */
296     public static final byte PRIVATE_USE = 18;
297 
298     /**
299      * General category "Cs" in the Unicode specification.
300      * @since   1.1
301      */
302     public static final byte SURROGATE = 19;
303 
304     /**
305      * General category "Pd" in the Unicode specification.
306      * @since   1.1
307      */
308     public static final byte DASH_PUNCTUATION = 20;
309 
310     /**
311      * General category "Ps" in the Unicode specification.
312      * @since   1.1
313      */
314     public static final byte START_PUNCTUATION = 21;
315 
316     /**
317      * General category "Pe" in the Unicode specification.
318      * @since   1.1
319      */
320     public static final byte END_PUNCTUATION = 22;
321 
322     /**
323      * General category "Pc" in the Unicode specification.
324      * @since   1.1
325      */
326     public static final byte CONNECTOR_PUNCTUATION = 23;
327 
328     /**
329      * General category "Po" in the Unicode specification.
330      * @since   1.1
331      */
332     public static final byte OTHER_PUNCTUATION = 24;
333 
334     /**
335      * General category "Sm" in the Unicode specification.
336      * @since   1.1
337      */
338     public static final byte MATH_SYMBOL = 25;
339 
340     /**
341      * General category "Sc" in the Unicode specification.
342      * @since   1.1
343      */
344     public static final byte CURRENCY_SYMBOL = 26;
345 
346     /**
347      * General category "Sk" in the Unicode specification.
348      * @since   1.1
349      */
350     public static final byte MODIFIER_SYMBOL = 27;
351 
352     /**
353      * General category "So" in the Unicode specification.
354      * @since   1.1
355      */
356     public static final byte OTHER_SYMBOL = 28;
357 
358     /**
359      * General category "Pi" in the Unicode specification.
360      * @since   1.4
361      */
362     public static final byte INITIAL_QUOTE_PUNCTUATION = 29;
363 
364     /**
365      * General category "Pf" in the Unicode specification.
366      * @since   1.4
367      */
368     public static final byte FINAL_QUOTE_PUNCTUATION = 30;
369 
370     /**
371      * Error flag. Use int (code point) to avoid confusion with U+FFFF.
372      */
373     static final int ERROR = 0xFFFFFFFF;
374 
375 
376     /**
377      * Undefined bidirectional character type. Undefined {@code char}
378      * values have undefined directionality in the Unicode specification.
379      * @since 1.4
380      */
381     public static final byte DIRECTIONALITY_UNDEFINED = -1;
382 
383     /**
384      * Strong bidirectional character type "L" in the Unicode specification.
385      * @since 1.4
386      */
387     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0;
388 
389     /**
390      * Strong bidirectional character type "R" in the Unicode specification.
391      * @since 1.4
392      */
393     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1;
394 
395     /**
396     * Strong bidirectional character type "AL" in the Unicode specification.
397      * @since 1.4
398      */
399     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2;
400 
401     /**
402      * Weak bidirectional character type "EN" in the Unicode specification.
403      * @since 1.4
404      */
405     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3;
406 
407     /**
408      * Weak bidirectional character type "ES" in the Unicode specification.
409      * @since 1.4
410      */
411     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4;
412 
413     /**
414      * Weak bidirectional character type "ET" in the Unicode specification.
415      * @since 1.4
416      */
417     public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5;
418 
419     /**
420      * Weak bidirectional character type "AN" in the Unicode specification.
421      * @since 1.4
422      */
423     public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6;
424 
425     /**
426      * Weak bidirectional character type "CS" in the Unicode specification.
427      * @since 1.4
428      */
429     public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7;
430 
431     /**
432      * Weak bidirectional character type "NSM" in the Unicode specification.
433      * @since 1.4
434      */
435     public static final byte DIRECTIONALITY_NONSPACING_MARK = 8;
436 
437     /**
438      * Weak bidirectional character type "BN" in the Unicode specification.
439      * @since 1.4
440      */
441     public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9;
442 
443     /**
444      * Neutral bidirectional character type "B" in the Unicode specification.
445      * @since 1.4
446      */
447     public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10;
448 
449     /**
450      * Neutral bidirectional character type "S" in the Unicode specification.
451      * @since 1.4
452      */
453     public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11;
454 
455     /**
456      * Neutral bidirectional character type "WS" in the Unicode specification.
457      * @since 1.4
458      */
459     public static final byte DIRECTIONALITY_WHITESPACE = 12;
460 
461     /**
462      * Neutral bidirectional character type "ON" in the Unicode specification.
463      * @since 1.4
464      */
465     public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13;
466 
467     /**
468      * Strong bidirectional character type "LRE" in the Unicode specification.
469      * @since 1.4
470      */
471     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14;
472 
473     /**
474      * Strong bidirectional character type "LRO" in the Unicode specification.
475      * @since 1.4
476      */
477     public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15;
478 
479     /**
480      * Strong bidirectional character type "RLE" in the Unicode specification.
481      * @since 1.4
482      */
483     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16;
484 
485     /**
486      * Strong bidirectional character type "RLO" in the Unicode specification.
487      * @since 1.4
488      */
489     public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17;
490 
491     /**
492      * Weak bidirectional character type "PDF" in the Unicode specification.
493      * @since 1.4
494      */
495     public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
496 
497     /**
498      * The minimum value of a
499      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
500      * Unicode high-surrogate code unit</a>
501      * in the UTF-16 encoding, constant {@code '\u005CuD800'}.
502      * A high-surrogate is also known as a <i>leading-surrogate</i>.
503      *
504      * @since 1.5
505      */
506     public static final char MIN_HIGH_SURROGATE = '\uD800';
507 
508     /**
509      * The maximum value of a
510      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
511      * Unicode high-surrogate code unit</a>
512      * in the UTF-16 encoding, constant {@code '\u005CuDBFF'}.
513      * A high-surrogate is also known as a <i>leading-surrogate</i>.
514      *
515      * @since 1.5
516      */
517     public static final char MAX_HIGH_SURROGATE = '\uDBFF';
518 
519     /**
520      * The minimum value of a
521      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
522      * Unicode low-surrogate code unit</a>
523      * in the UTF-16 encoding, constant {@code '\u005CuDC00'}.
524      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
525      *
526      * @since 1.5
527      */
528     public static final char MIN_LOW_SURROGATE  = '\uDC00';
529 
530     /**
531      * The maximum value of a
532      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
533      * Unicode low-surrogate code unit</a>
534      * in the UTF-16 encoding, constant {@code '\u005CuDFFF'}.
535      * A low-surrogate is also known as a <i>trailing-surrogate</i>.
536      *
537      * @since 1.5
538      */
539     public static final char MAX_LOW_SURROGATE  = '\uDFFF';
540 
541     /**
542      * The minimum value of a Unicode surrogate code unit in the
543      * UTF-16 encoding, constant {@code '\u005CuD800'}.
544      *
545      * @since 1.5
546      */
547     public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
548 
549     /**
550      * The maximum value of a Unicode surrogate code unit in the
551      * UTF-16 encoding, constant {@code '\u005CuDFFF'}.
552      *
553      * @since 1.5
554      */
555     public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
556 
557     /**
558      * The minimum value of a
559      * <a href="http://www.unicode.org/glossary/#supplementary_code_point">
560      * Unicode supplementary code point</a>, constant {@code U+10000}.
561      *
562      * @since 1.5
563      */
564     public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
565 
566     /**
567      * The minimum value of a
568      * <a href="http://www.unicode.org/glossary/#code_point">
569      * Unicode code point</a>, constant {@code U+0000}.
570      *
571      * @since 1.5
572      */
573     public static final int MIN_CODE_POINT = 0x000000;
574 
575     /**
576      * The maximum value of a
577      * <a href="http://www.unicode.org/glossary/#code_point">
578      * Unicode code point</a>, constant {@code U+10FFFF}.
579      *
580      * @since 1.5
581      */
582     public static final int MAX_CODE_POINT = 0X10FFFF;
583 
584     private static final byte[] DIRECTIONALITY = new byte[] {
585             DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT,
586             DIRECTIONALITY_EUROPEAN_NUMBER,
587             DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR,
588             DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR,
589             DIRECTIONALITY_ARABIC_NUMBER,
590             DIRECTIONALITY_COMMON_NUMBER_SEPARATOR,
591             DIRECTIONALITY_PARAGRAPH_SEPARATOR,
592             DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE,
593             DIRECTIONALITY_OTHER_NEUTRALS,
594             DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING,
595             DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE,
596             DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC,
597             DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING,
598             DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE,
599             DIRECTIONALITY_POP_DIRECTIONAL_FORMAT,
600             DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL };
601 
602     /**
603      * Instances of this class represent particular subsets of the Unicode
604      * character set.  The only family of subsets defined in the
605      * {@code Character} class is {@link Character.UnicodeBlock}.
606      * Other portions of the Java API may define other subsets for their
607      * own purposes.
608      *
609      * @since 1.2
610      */
611     public static class Subset  {
612 
613         private String name;
614 
615         /**
616          * Constructs a new {@code Subset} instance.
617          *
618          * @param  name  The name of this subset
619          * @exception NullPointerException if name is {@code null}
620          */
Subset(String name)621         protected Subset(String name) {
622             if (name == null) {
623                 throw new NullPointerException("name");
624             }
625             this.name = name;
626         }
627 
628         /**
629          * Compares two {@code Subset} objects for equality.
630          * This method returns {@code true} if and only if
631          * {@code this} and the argument refer to the same
632          * object; since this method is {@code final}, this
633          * guarantee holds for all subclasses.
634          */
equals(Object obj)635         public final boolean equals(Object obj) {
636             return (this == obj);
637         }
638 
639         /**
640          * Returns the standard hash code as defined by the
641          * {@link Object#hashCode} method.  This method
642          * is {@code final} in order to ensure that the
643          * {@code equals} and {@code hashCode} methods will
644          * be consistent in all subclasses.
645          */
hashCode()646         public final int hashCode() {
647             return super.hashCode();
648         }
649 
650         /**
651          * Returns the name of this subset.
652          */
toString()653         public final String toString() {
654             return name;
655         }
656     }
657 
658     // See http://www.unicode.org/Public/UNIDATA/Blocks.txt
659     // for the latest specification of Unicode Blocks.
660 
661     /**
662      * A family of character subsets representing the character blocks in the
663      * Unicode specification. Character blocks generally define characters
664      * used for a specific script or purpose. A character is contained by
665      * at most one Unicode block.
666      *
667      * @since 1.2
668      */
669     public static final class UnicodeBlock extends Subset {
670 
671         private static Map<String, UnicodeBlock> map = new HashMap<>(256);
672 
673         /**
674          * Creates a UnicodeBlock with the given identifier name.
675          * This name must be the same as the block identifier.
676          */
UnicodeBlock(String idName)677         private UnicodeBlock(String idName) {
678             this(idName, true);
679         }
680 
UnicodeBlock(String idName, boolean isMap)681         private UnicodeBlock(String idName, boolean isMap) {
682             super(idName);
683             if (isMap) {
684                 map.put(idName, this);
685             }
686         }
687 
688         /**
689          * Creates a UnicodeBlock with the given identifier name and
690          * alias name.
691          */
UnicodeBlock(String idName, String alias)692         private UnicodeBlock(String idName, String alias) {
693             this(idName, true);
694             map.put(alias, this);
695         }
696 
697         /**
698          * Creates a UnicodeBlock with the given identifier name and
699          * alias names.
700          */
UnicodeBlock(String idName, String... aliases)701         private UnicodeBlock(String idName, String... aliases) {
702             this(idName, true);
703             for (String alias : aliases)
704                 map.put(alias, this);
705         }
706 
707         /**
708          * Constant for the "Basic Latin" Unicode character block.
709          * @since 1.2
710          */
711         public static final UnicodeBlock  BASIC_LATIN =
712             new UnicodeBlock("BASIC_LATIN",
713                              "BASIC LATIN",
714                              "BASICLATIN");
715 
716         /**
717          * Constant for the "Latin-1 Supplement" Unicode character block.
718          * @since 1.2
719          */
720         public static final UnicodeBlock LATIN_1_SUPPLEMENT =
721             new UnicodeBlock("LATIN_1_SUPPLEMENT",
722                              "LATIN-1 SUPPLEMENT",
723                              "LATIN-1SUPPLEMENT");
724 
725         /**
726          * Constant for the "Latin Extended-A" Unicode character block.
727          * @since 1.2
728          */
729         public static final UnicodeBlock LATIN_EXTENDED_A =
730             new UnicodeBlock("LATIN_EXTENDED_A",
731                              "LATIN EXTENDED-A",
732                              "LATINEXTENDED-A");
733 
734         /**
735          * Constant for the "Latin Extended-B" Unicode character block.
736          * @since 1.2
737          */
738         public static final UnicodeBlock LATIN_EXTENDED_B =
739             new UnicodeBlock("LATIN_EXTENDED_B",
740                              "LATIN EXTENDED-B",
741                              "LATINEXTENDED-B");
742 
743         /**
744          * Constant for the "IPA Extensions" Unicode character block.
745          * @since 1.2
746          */
747         public static final UnicodeBlock IPA_EXTENSIONS =
748             new UnicodeBlock("IPA_EXTENSIONS",
749                              "IPA EXTENSIONS",
750                              "IPAEXTENSIONS");
751 
752         /**
753          * Constant for the "Spacing Modifier Letters" Unicode character block.
754          * @since 1.2
755          */
756         public static final UnicodeBlock SPACING_MODIFIER_LETTERS =
757             new UnicodeBlock("SPACING_MODIFIER_LETTERS",
758                              "SPACING MODIFIER LETTERS",
759                              "SPACINGMODIFIERLETTERS");
760 
761         /**
762          * Constant for the "Combining Diacritical Marks" Unicode character block.
763          * @since 1.2
764          */
765         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS =
766             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS",
767                              "COMBINING DIACRITICAL MARKS",
768                              "COMBININGDIACRITICALMARKS");
769 
770         /**
771          * Constant for the "Greek and Coptic" Unicode character block.
772          * <p>
773          * This block was previously known as the "Greek" block.
774          *
775          * @since 1.2
776          */
777         public static final UnicodeBlock GREEK =
778             new UnicodeBlock("GREEK",
779                              "GREEK AND COPTIC",
780                              "GREEKANDCOPTIC");
781 
782         /**
783          * Constant for the "Cyrillic" Unicode character block.
784          * @since 1.2
785          */
786         public static final UnicodeBlock CYRILLIC =
787             new UnicodeBlock("CYRILLIC");
788 
789         /**
790          * Constant for the "Armenian" Unicode character block.
791          * @since 1.2
792          */
793         public static final UnicodeBlock ARMENIAN =
794             new UnicodeBlock("ARMENIAN");
795 
796         /**
797          * Constant for the "Hebrew" Unicode character block.
798          * @since 1.2
799          */
800         public static final UnicodeBlock HEBREW =
801             new UnicodeBlock("HEBREW");
802 
803         /**
804          * Constant for the "Arabic" Unicode character block.
805          * @since 1.2
806          */
807         public static final UnicodeBlock ARABIC =
808             new UnicodeBlock("ARABIC");
809 
810         /**
811          * Constant for the "Devanagari" Unicode character block.
812          * @since 1.2
813          */
814         public static final UnicodeBlock DEVANAGARI =
815             new UnicodeBlock("DEVANAGARI");
816 
817         /**
818          * Constant for the "Bengali" Unicode character block.
819          * @since 1.2
820          */
821         public static final UnicodeBlock BENGALI =
822             new UnicodeBlock("BENGALI");
823 
824         /**
825          * Constant for the "Gurmukhi" Unicode character block.
826          * @since 1.2
827          */
828         public static final UnicodeBlock GURMUKHI =
829             new UnicodeBlock("GURMUKHI");
830 
831         /**
832          * Constant for the "Gujarati" Unicode character block.
833          * @since 1.2
834          */
835         public static final UnicodeBlock GUJARATI =
836             new UnicodeBlock("GUJARATI");
837 
838         /**
839          * Constant for the "Oriya" Unicode character block.
840          * @since 1.2
841          */
842         public static final UnicodeBlock ORIYA =
843             new UnicodeBlock("ORIYA");
844 
845         /**
846          * Constant for the "Tamil" Unicode character block.
847          * @since 1.2
848          */
849         public static final UnicodeBlock TAMIL =
850             new UnicodeBlock("TAMIL");
851 
852         /**
853          * Constant for the "Telugu" Unicode character block.
854          * @since 1.2
855          */
856         public static final UnicodeBlock TELUGU =
857             new UnicodeBlock("TELUGU");
858 
859         /**
860          * Constant for the "Kannada" Unicode character block.
861          * @since 1.2
862          */
863         public static final UnicodeBlock KANNADA =
864             new UnicodeBlock("KANNADA");
865 
866         /**
867          * Constant for the "Malayalam" Unicode character block.
868          * @since 1.2
869          */
870         public static final UnicodeBlock MALAYALAM =
871             new UnicodeBlock("MALAYALAM");
872 
873         /**
874          * Constant for the "Thai" Unicode character block.
875          * @since 1.2
876          */
877         public static final UnicodeBlock THAI =
878             new UnicodeBlock("THAI");
879 
880         /**
881          * Constant for the "Lao" Unicode character block.
882          * @since 1.2
883          */
884         public static final UnicodeBlock LAO =
885             new UnicodeBlock("LAO");
886 
887         /**
888          * Constant for the "Tibetan" Unicode character block.
889          * @since 1.2
890          */
891         public static final UnicodeBlock TIBETAN =
892             new UnicodeBlock("TIBETAN");
893 
894         /**
895          * Constant for the "Georgian" Unicode character block.
896          * @since 1.2
897          */
898         public static final UnicodeBlock GEORGIAN =
899             new UnicodeBlock("GEORGIAN");
900 
901         /**
902          * Constant for the "Hangul Jamo" Unicode character block.
903          * @since 1.2
904          */
905         public static final UnicodeBlock HANGUL_JAMO =
906             new UnicodeBlock("HANGUL_JAMO",
907                              "HANGUL JAMO",
908                              "HANGULJAMO");
909 
910         /**
911          * Constant for the "Latin Extended Additional" Unicode character block.
912          * @since 1.2
913          */
914         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL =
915             new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL",
916                              "LATIN EXTENDED ADDITIONAL",
917                              "LATINEXTENDEDADDITIONAL");
918 
919         /**
920          * Constant for the "Greek Extended" Unicode character block.
921          * @since 1.2
922          */
923         public static final UnicodeBlock GREEK_EXTENDED =
924             new UnicodeBlock("GREEK_EXTENDED",
925                              "GREEK EXTENDED",
926                              "GREEKEXTENDED");
927 
928         /**
929          * Constant for the "General Punctuation" Unicode character block.
930          * @since 1.2
931          */
932         public static final UnicodeBlock GENERAL_PUNCTUATION =
933             new UnicodeBlock("GENERAL_PUNCTUATION",
934                              "GENERAL PUNCTUATION",
935                              "GENERALPUNCTUATION");
936 
937         /**
938          * Constant for the "Superscripts and Subscripts" Unicode character
939          * block.
940          * @since 1.2
941          */
942         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS =
943             new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS",
944                              "SUPERSCRIPTS AND SUBSCRIPTS",
945                              "SUPERSCRIPTSANDSUBSCRIPTS");
946 
947         /**
948          * Constant for the "Currency Symbols" Unicode character block.
949          * @since 1.2
950          */
951         public static final UnicodeBlock CURRENCY_SYMBOLS =
952             new UnicodeBlock("CURRENCY_SYMBOLS",
953                              "CURRENCY SYMBOLS",
954                              "CURRENCYSYMBOLS");
955 
956         /**
957          * Constant for the "Combining Diacritical Marks for Symbols" Unicode
958          * character block.
959          * <p>
960          * This block was previously known as "Combining Marks for Symbols".
961          * @since 1.2
962          */
963         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS =
964             new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS",
965                              "COMBINING DIACRITICAL MARKS FOR SYMBOLS",
966                              "COMBININGDIACRITICALMARKSFORSYMBOLS",
967                              "COMBINING MARKS FOR SYMBOLS",
968                              "COMBININGMARKSFORSYMBOLS");
969 
970         /**
971          * Constant for the "Letterlike Symbols" Unicode character block.
972          * @since 1.2
973          */
974         public static final UnicodeBlock LETTERLIKE_SYMBOLS =
975             new UnicodeBlock("LETTERLIKE_SYMBOLS",
976                              "LETTERLIKE SYMBOLS",
977                              "LETTERLIKESYMBOLS");
978 
979         /**
980          * Constant for the "Number Forms" Unicode character block.
981          * @since 1.2
982          */
983         public static final UnicodeBlock NUMBER_FORMS =
984             new UnicodeBlock("NUMBER_FORMS",
985                              "NUMBER FORMS",
986                              "NUMBERFORMS");
987 
988         /**
989          * Constant for the "Arrows" Unicode character block.
990          * @since 1.2
991          */
992         public static final UnicodeBlock ARROWS =
993             new UnicodeBlock("ARROWS");
994 
995         /**
996          * Constant for the "Mathematical Operators" Unicode character block.
997          * @since 1.2
998          */
999         public static final UnicodeBlock MATHEMATICAL_OPERATORS =
1000             new UnicodeBlock("MATHEMATICAL_OPERATORS",
1001                              "MATHEMATICAL OPERATORS",
1002                              "MATHEMATICALOPERATORS");
1003 
1004         /**
1005          * Constant for the "Miscellaneous Technical" Unicode character block.
1006          * @since 1.2
1007          */
1008         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL =
1009             new UnicodeBlock("MISCELLANEOUS_TECHNICAL",
1010                              "MISCELLANEOUS TECHNICAL",
1011                              "MISCELLANEOUSTECHNICAL");
1012 
1013         /**
1014          * Constant for the "Control Pictures" Unicode character block.
1015          * @since 1.2
1016          */
1017         public static final UnicodeBlock CONTROL_PICTURES =
1018             new UnicodeBlock("CONTROL_PICTURES",
1019                              "CONTROL PICTURES",
1020                              "CONTROLPICTURES");
1021 
1022         /**
1023          * Constant for the "Optical Character Recognition" Unicode character block.
1024          * @since 1.2
1025          */
1026         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION =
1027             new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION",
1028                              "OPTICAL CHARACTER RECOGNITION",
1029                              "OPTICALCHARACTERRECOGNITION");
1030 
1031         /**
1032          * Constant for the "Enclosed Alphanumerics" Unicode character block.
1033          * @since 1.2
1034          */
1035         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS =
1036             new UnicodeBlock("ENCLOSED_ALPHANUMERICS",
1037                              "ENCLOSED ALPHANUMERICS",
1038                              "ENCLOSEDALPHANUMERICS");
1039 
1040         /**
1041          * Constant for the "Box Drawing" Unicode character block.
1042          * @since 1.2
1043          */
1044         public static final UnicodeBlock BOX_DRAWING =
1045             new UnicodeBlock("BOX_DRAWING",
1046                              "BOX DRAWING",
1047                              "BOXDRAWING");
1048 
1049         /**
1050          * Constant for the "Block Elements" Unicode character block.
1051          * @since 1.2
1052          */
1053         public static final UnicodeBlock BLOCK_ELEMENTS =
1054             new UnicodeBlock("BLOCK_ELEMENTS",
1055                              "BLOCK ELEMENTS",
1056                              "BLOCKELEMENTS");
1057 
1058         /**
1059          * Constant for the "Geometric Shapes" Unicode character block.
1060          * @since 1.2
1061          */
1062         public static final UnicodeBlock GEOMETRIC_SHAPES =
1063             new UnicodeBlock("GEOMETRIC_SHAPES",
1064                              "GEOMETRIC SHAPES",
1065                              "GEOMETRICSHAPES");
1066 
1067         /**
1068          * Constant for the "Miscellaneous Symbols" Unicode character block.
1069          * @since 1.2
1070          */
1071         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS =
1072             new UnicodeBlock("MISCELLANEOUS_SYMBOLS",
1073                              "MISCELLANEOUS SYMBOLS",
1074                              "MISCELLANEOUSSYMBOLS");
1075 
1076         /**
1077          * Constant for the "Dingbats" Unicode character block.
1078          * @since 1.2
1079          */
1080         public static final UnicodeBlock DINGBATS =
1081             new UnicodeBlock("DINGBATS");
1082 
1083         /**
1084          * Constant for the "CJK Symbols and Punctuation" Unicode character block.
1085          * @since 1.2
1086          */
1087         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION =
1088             new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION",
1089                              "CJK SYMBOLS AND PUNCTUATION",
1090                              "CJKSYMBOLSANDPUNCTUATION");
1091 
1092         /**
1093          * Constant for the "Hiragana" Unicode character block.
1094          * @since 1.2
1095          */
1096         public static final UnicodeBlock HIRAGANA =
1097             new UnicodeBlock("HIRAGANA");
1098 
1099         /**
1100          * Constant for the "Katakana" Unicode character block.
1101          * @since 1.2
1102          */
1103         public static final UnicodeBlock KATAKANA =
1104             new UnicodeBlock("KATAKANA");
1105 
1106         /**
1107          * Constant for the "Bopomofo" Unicode character block.
1108          * @since 1.2
1109          */
1110         public static final UnicodeBlock BOPOMOFO =
1111             new UnicodeBlock("BOPOMOFO");
1112 
1113         /**
1114          * Constant for the "Hangul Compatibility Jamo" Unicode character block.
1115          * @since 1.2
1116          */
1117         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO =
1118             new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO",
1119                              "HANGUL COMPATIBILITY JAMO",
1120                              "HANGULCOMPATIBILITYJAMO");
1121 
1122         /**
1123          * Constant for the "Kanbun" Unicode character block.
1124          * @since 1.2
1125          */
1126         public static final UnicodeBlock KANBUN =
1127             new UnicodeBlock("KANBUN");
1128 
1129         /**
1130          * Constant for the "Enclosed CJK Letters and Months" Unicode character block.
1131          * @since 1.2
1132          */
1133         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS =
1134             new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1135                              "ENCLOSED CJK LETTERS AND MONTHS",
1136                              "ENCLOSEDCJKLETTERSANDMONTHS");
1137 
1138         /**
1139          * Constant for the "CJK Compatibility" Unicode character block.
1140          * @since 1.2
1141          */
1142         public static final UnicodeBlock CJK_COMPATIBILITY =
1143             new UnicodeBlock("CJK_COMPATIBILITY",
1144                              "CJK COMPATIBILITY",
1145                              "CJKCOMPATIBILITY");
1146 
1147         /**
1148          * Constant for the "CJK Unified Ideographs" Unicode character block.
1149          * @since 1.2
1150          */
1151         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS =
1152             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS",
1153                              "CJK UNIFIED IDEOGRAPHS",
1154                              "CJKUNIFIEDIDEOGRAPHS");
1155 
1156         /**
1157          * Constant for the "Hangul Syllables" Unicode character block.
1158          * @since 1.2
1159          */
1160         public static final UnicodeBlock HANGUL_SYLLABLES =
1161             new UnicodeBlock("HANGUL_SYLLABLES",
1162                              "HANGUL SYLLABLES",
1163                              "HANGULSYLLABLES");
1164 
1165         /**
1166          * Constant for the "Private Use Area" Unicode character block.
1167          * @since 1.2
1168          */
1169         public static final UnicodeBlock PRIVATE_USE_AREA =
1170             new UnicodeBlock("PRIVATE_USE_AREA",
1171                              "PRIVATE USE AREA",
1172                              "PRIVATEUSEAREA");
1173 
1174         /**
1175          * Constant for the "CJK Compatibility Ideographs" Unicode character
1176          * block.
1177          * @since 1.2
1178          */
1179         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS =
1180             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS",
1181                              "CJK COMPATIBILITY IDEOGRAPHS",
1182                              "CJKCOMPATIBILITYIDEOGRAPHS");
1183 
1184         /**
1185          * Constant for the "Alphabetic Presentation Forms" Unicode character block.
1186          * @since 1.2
1187          */
1188         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS =
1189             new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS",
1190                              "ALPHABETIC PRESENTATION FORMS",
1191                              "ALPHABETICPRESENTATIONFORMS");
1192 
1193         /**
1194          * Constant for the "Arabic Presentation Forms-A" Unicode character
1195          * block.
1196          * @since 1.2
1197          */
1198         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A =
1199             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A",
1200                              "ARABIC PRESENTATION FORMS-A",
1201                              "ARABICPRESENTATIONFORMS-A");
1202 
1203         /**
1204          * Constant for the "Combining Half Marks" Unicode character block.
1205          * @since 1.2
1206          */
1207         public static final UnicodeBlock COMBINING_HALF_MARKS =
1208             new UnicodeBlock("COMBINING_HALF_MARKS",
1209                              "COMBINING HALF MARKS",
1210                              "COMBININGHALFMARKS");
1211 
1212         /**
1213          * Constant for the "CJK Compatibility Forms" Unicode character block.
1214          * @since 1.2
1215          */
1216         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS =
1217             new UnicodeBlock("CJK_COMPATIBILITY_FORMS",
1218                              "CJK COMPATIBILITY FORMS",
1219                              "CJKCOMPATIBILITYFORMS");
1220 
1221         /**
1222          * Constant for the "Small Form Variants" Unicode character block.
1223          * @since 1.2
1224          */
1225         public static final UnicodeBlock SMALL_FORM_VARIANTS =
1226             new UnicodeBlock("SMALL_FORM_VARIANTS",
1227                              "SMALL FORM VARIANTS",
1228                              "SMALLFORMVARIANTS");
1229 
1230         /**
1231          * Constant for the "Arabic Presentation Forms-B" Unicode character block.
1232          * @since 1.2
1233          */
1234         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B =
1235             new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B",
1236                              "ARABIC PRESENTATION FORMS-B",
1237                              "ARABICPRESENTATIONFORMS-B");
1238 
1239         /**
1240          * Constant for the "Halfwidth and Fullwidth Forms" Unicode character
1241          * block.
1242          * @since 1.2
1243          */
1244         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS =
1245             new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS",
1246                              "HALFWIDTH AND FULLWIDTH FORMS",
1247                              "HALFWIDTHANDFULLWIDTHFORMS");
1248 
1249         /**
1250          * Constant for the "Specials" Unicode character block.
1251          * @since 1.2
1252          */
1253         public static final UnicodeBlock SPECIALS =
1254             new UnicodeBlock("SPECIALS");
1255 
1256         /**
1257          * @deprecated As of J2SE 5, use {@link #HIGH_SURROGATES},
1258          *             {@link #HIGH_PRIVATE_USE_SURROGATES}, and
1259          *             {@link #LOW_SURROGATES}. These new constants match
1260          *             the block definitions of the Unicode Standard.
1261          *             The {@link #of(char)} and {@link #of(int)} methods
1262          *             return the new constants, not SURROGATES_AREA.
1263          */
1264         @Deprecated
1265         public static final UnicodeBlock SURROGATES_AREA =
1266             new UnicodeBlock("SURROGATES_AREA", false);
1267 
1268         /**
1269          * Constant for the "Syriac" Unicode character block.
1270          * @since 1.4
1271          */
1272         public static final UnicodeBlock SYRIAC =
1273             new UnicodeBlock("SYRIAC");
1274 
1275         /**
1276          * Constant for the "Thaana" Unicode character block.
1277          * @since 1.4
1278          */
1279         public static final UnicodeBlock THAANA =
1280             new UnicodeBlock("THAANA");
1281 
1282         /**
1283          * Constant for the "Sinhala" Unicode character block.
1284          * @since 1.4
1285          */
1286         public static final UnicodeBlock SINHALA =
1287             new UnicodeBlock("SINHALA");
1288 
1289         /**
1290          * Constant for the "Myanmar" Unicode character block.
1291          * @since 1.4
1292          */
1293         public static final UnicodeBlock MYANMAR =
1294             new UnicodeBlock("MYANMAR");
1295 
1296         /**
1297          * Constant for the "Ethiopic" Unicode character block.
1298          * @since 1.4
1299          */
1300         public static final UnicodeBlock ETHIOPIC =
1301             new UnicodeBlock("ETHIOPIC");
1302 
1303         /**
1304          * Constant for the "Cherokee" Unicode character block.
1305          * @since 1.4
1306          */
1307         public static final UnicodeBlock CHEROKEE =
1308             new UnicodeBlock("CHEROKEE");
1309 
1310         /**
1311          * Constant for the "Unified Canadian Aboriginal Syllabics" Unicode character block.
1312          * @since 1.4
1313          */
1314         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =
1315             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1316                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS",
1317                              "UNIFIEDCANADIANABORIGINALSYLLABICS");
1318 
1319         /**
1320          * Constant for the "Ogham" Unicode character block.
1321          * @since 1.4
1322          */
1323         public static final UnicodeBlock OGHAM =
1324             new UnicodeBlock("OGHAM");
1325 
1326         /**
1327          * Constant for the "Runic" Unicode character block.
1328          * @since 1.4
1329          */
1330         public static final UnicodeBlock RUNIC =
1331             new UnicodeBlock("RUNIC");
1332 
1333         /**
1334          * Constant for the "Khmer" Unicode character block.
1335          * @since 1.4
1336          */
1337         public static final UnicodeBlock KHMER =
1338             new UnicodeBlock("KHMER");
1339 
1340         /**
1341          * Constant for the "Mongolian" Unicode character block.
1342          * @since 1.4
1343          */
1344         public static final UnicodeBlock MONGOLIAN =
1345             new UnicodeBlock("MONGOLIAN");
1346 
1347         /**
1348          * Constant for the "Braille Patterns" Unicode character block.
1349          * @since 1.4
1350          */
1351         public static final UnicodeBlock BRAILLE_PATTERNS =
1352             new UnicodeBlock("BRAILLE_PATTERNS",
1353                              "BRAILLE PATTERNS",
1354                              "BRAILLEPATTERNS");
1355 
1356         /**
1357          * Constant for the "CJK Radicals Supplement" Unicode character block.
1358          * @since 1.4
1359          */
1360         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT =
1361             new UnicodeBlock("CJK_RADICALS_SUPPLEMENT",
1362                              "CJK RADICALS SUPPLEMENT",
1363                              "CJKRADICALSSUPPLEMENT");
1364 
1365         /**
1366          * Constant for the "Kangxi Radicals" Unicode character block.
1367          * @since 1.4
1368          */
1369         public static final UnicodeBlock KANGXI_RADICALS =
1370             new UnicodeBlock("KANGXI_RADICALS",
1371                              "KANGXI RADICALS",
1372                              "KANGXIRADICALS");
1373 
1374         /**
1375          * Constant for the "Ideographic Description Characters" Unicode character block.
1376          * @since 1.4
1377          */
1378         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS =
1379             new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1380                              "IDEOGRAPHIC DESCRIPTION CHARACTERS",
1381                              "IDEOGRAPHICDESCRIPTIONCHARACTERS");
1382 
1383         /**
1384          * Constant for the "Bopomofo Extended" Unicode character block.
1385          * @since 1.4
1386          */
1387         public static final UnicodeBlock BOPOMOFO_EXTENDED =
1388             new UnicodeBlock("BOPOMOFO_EXTENDED",
1389                              "BOPOMOFO EXTENDED",
1390                              "BOPOMOFOEXTENDED");
1391 
1392         /**
1393          * Constant for the "CJK Unified Ideographs Extension A" Unicode character block.
1394          * @since 1.4
1395          */
1396         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =
1397             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1398                              "CJK UNIFIED IDEOGRAPHS EXTENSION A",
1399                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONA");
1400 
1401         /**
1402          * Constant for the "Yi Syllables" Unicode character block.
1403          * @since 1.4
1404          */
1405         public static final UnicodeBlock YI_SYLLABLES =
1406             new UnicodeBlock("YI_SYLLABLES",
1407                              "YI SYLLABLES",
1408                              "YISYLLABLES");
1409 
1410         /**
1411          * Constant for the "Yi Radicals" Unicode character block.
1412          * @since 1.4
1413          */
1414         public static final UnicodeBlock YI_RADICALS =
1415             new UnicodeBlock("YI_RADICALS",
1416                              "YI RADICALS",
1417                              "YIRADICALS");
1418 
1419         /**
1420          * Constant for the "Cyrillic Supplementary" Unicode character block.
1421          * @since 1.5
1422          */
1423         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY =
1424             new UnicodeBlock("CYRILLIC_SUPPLEMENTARY",
1425                              "CYRILLIC SUPPLEMENTARY",
1426                              "CYRILLICSUPPLEMENTARY",
1427                              "CYRILLIC SUPPLEMENT",
1428                              "CYRILLICSUPPLEMENT");
1429 
1430         /**
1431          * Constant for the "Tagalog" Unicode character block.
1432          * @since 1.5
1433          */
1434         public static final UnicodeBlock TAGALOG =
1435             new UnicodeBlock("TAGALOG");
1436 
1437         /**
1438          * Constant for the "Hanunoo" Unicode character block.
1439          * @since 1.5
1440          */
1441         public static final UnicodeBlock HANUNOO =
1442             new UnicodeBlock("HANUNOO");
1443 
1444         /**
1445          * Constant for the "Buhid" Unicode character block.
1446          * @since 1.5
1447          */
1448         public static final UnicodeBlock BUHID =
1449             new UnicodeBlock("BUHID");
1450 
1451         /**
1452          * Constant for the "Tagbanwa" Unicode character block.
1453          * @since 1.5
1454          */
1455         public static final UnicodeBlock TAGBANWA =
1456             new UnicodeBlock("TAGBANWA");
1457 
1458         /**
1459          * Constant for the "Limbu" Unicode character block.
1460          * @since 1.5
1461          */
1462         public static final UnicodeBlock LIMBU =
1463             new UnicodeBlock("LIMBU");
1464 
1465         /**
1466          * Constant for the "Tai Le" Unicode character block.
1467          * @since 1.5
1468          */
1469         public static final UnicodeBlock TAI_LE =
1470             new UnicodeBlock("TAI_LE",
1471                              "TAI LE",
1472                              "TAILE");
1473 
1474         /**
1475          * Constant for the "Khmer Symbols" Unicode character block.
1476          * @since 1.5
1477          */
1478         public static final UnicodeBlock KHMER_SYMBOLS =
1479             new UnicodeBlock("KHMER_SYMBOLS",
1480                              "KHMER SYMBOLS",
1481                              "KHMERSYMBOLS");
1482 
1483         /**
1484          * Constant for the "Phonetic Extensions" Unicode character block.
1485          * @since 1.5
1486          */
1487         public static final UnicodeBlock PHONETIC_EXTENSIONS =
1488             new UnicodeBlock("PHONETIC_EXTENSIONS",
1489                              "PHONETIC EXTENSIONS",
1490                              "PHONETICEXTENSIONS");
1491 
1492         /**
1493          * Constant for the "Miscellaneous Mathematical Symbols-A" Unicode character block.
1494          * @since 1.5
1495          */
1496         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A =
1497             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1498                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-A",
1499                              "MISCELLANEOUSMATHEMATICALSYMBOLS-A");
1500 
1501         /**
1502          * Constant for the "Supplemental Arrows-A" Unicode character block.
1503          * @since 1.5
1504          */
1505         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A =
1506             new UnicodeBlock("SUPPLEMENTAL_ARROWS_A",
1507                              "SUPPLEMENTAL ARROWS-A",
1508                              "SUPPLEMENTALARROWS-A");
1509 
1510         /**
1511          * Constant for the "Supplemental Arrows-B" Unicode character block.
1512          * @since 1.5
1513          */
1514         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B =
1515             new UnicodeBlock("SUPPLEMENTAL_ARROWS_B",
1516                              "SUPPLEMENTAL ARROWS-B",
1517                              "SUPPLEMENTALARROWS-B");
1518 
1519         /**
1520          * Constant for the "Miscellaneous Mathematical Symbols-B" Unicode
1521          * character block.
1522          * @since 1.5
1523          */
1524         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B =
1525             new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1526                              "MISCELLANEOUS MATHEMATICAL SYMBOLS-B",
1527                              "MISCELLANEOUSMATHEMATICALSYMBOLS-B");
1528 
1529         /**
1530          * Constant for the "Supplemental Mathematical Operators" Unicode
1531          * character block.
1532          * @since 1.5
1533          */
1534         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS =
1535             new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1536                              "SUPPLEMENTAL MATHEMATICAL OPERATORS",
1537                              "SUPPLEMENTALMATHEMATICALOPERATORS");
1538 
1539         /**
1540          * Constant for the "Miscellaneous Symbols and Arrows" Unicode character
1541          * block.
1542          * @since 1.5
1543          */
1544         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS =
1545             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1546                              "MISCELLANEOUS SYMBOLS AND ARROWS",
1547                              "MISCELLANEOUSSYMBOLSANDARROWS");
1548 
1549         /**
1550          * Constant for the "Katakana Phonetic Extensions" Unicode character
1551          * block.
1552          * @since 1.5
1553          */
1554         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS =
1555             new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS",
1556                              "KATAKANA PHONETIC EXTENSIONS",
1557                              "KATAKANAPHONETICEXTENSIONS");
1558 
1559         /**
1560          * Constant for the "Yijing Hexagram Symbols" Unicode character block.
1561          * @since 1.5
1562          */
1563         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS =
1564             new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS",
1565                              "YIJING HEXAGRAM SYMBOLS",
1566                              "YIJINGHEXAGRAMSYMBOLS");
1567 
1568         /**
1569          * Constant for the "Variation Selectors" Unicode character block.
1570          * @since 1.5
1571          */
1572         public static final UnicodeBlock VARIATION_SELECTORS =
1573             new UnicodeBlock("VARIATION_SELECTORS",
1574                              "VARIATION SELECTORS",
1575                              "VARIATIONSELECTORS");
1576 
1577         /**
1578          * Constant for the "Linear B Syllabary" Unicode character block.
1579          * @since 1.5
1580          */
1581         public static final UnicodeBlock LINEAR_B_SYLLABARY =
1582             new UnicodeBlock("LINEAR_B_SYLLABARY",
1583                              "LINEAR B SYLLABARY",
1584                              "LINEARBSYLLABARY");
1585 
1586         /**
1587          * Constant for the "Linear B Ideograms" Unicode character block.
1588          * @since 1.5
1589          */
1590         public static final UnicodeBlock LINEAR_B_IDEOGRAMS =
1591             new UnicodeBlock("LINEAR_B_IDEOGRAMS",
1592                              "LINEAR B IDEOGRAMS",
1593                              "LINEARBIDEOGRAMS");
1594 
1595         /**
1596          * Constant for the "Aegean Numbers" Unicode character block.
1597          * @since 1.5
1598          */
1599         public static final UnicodeBlock AEGEAN_NUMBERS =
1600             new UnicodeBlock("AEGEAN_NUMBERS",
1601                              "AEGEAN NUMBERS",
1602                              "AEGEANNUMBERS");
1603 
1604         /**
1605          * Constant for the "Old Italic" Unicode character block.
1606          * @since 1.5
1607          */
1608         public static final UnicodeBlock OLD_ITALIC =
1609             new UnicodeBlock("OLD_ITALIC",
1610                              "OLD ITALIC",
1611                              "OLDITALIC");
1612 
1613         /**
1614          * Constant for the "Gothic" Unicode character block.
1615          * @since 1.5
1616          */
1617         public static final UnicodeBlock GOTHIC =
1618             new UnicodeBlock("GOTHIC");
1619 
1620         /**
1621          * Constant for the "Ugaritic" Unicode character block.
1622          * @since 1.5
1623          */
1624         public static final UnicodeBlock UGARITIC =
1625             new UnicodeBlock("UGARITIC");
1626 
1627         /**
1628          * Constant for the "Deseret" Unicode character block.
1629          * @since 1.5
1630          */
1631         public static final UnicodeBlock DESERET =
1632             new UnicodeBlock("DESERET");
1633 
1634         /**
1635          * Constant for the "Shavian" Unicode character block.
1636          * @since 1.5
1637          */
1638         public static final UnicodeBlock SHAVIAN =
1639             new UnicodeBlock("SHAVIAN");
1640 
1641         /**
1642          * Constant for the "Osmanya" Unicode character block.
1643          * @since 1.5
1644          */
1645         public static final UnicodeBlock OSMANYA =
1646             new UnicodeBlock("OSMANYA");
1647 
1648         /**
1649          * Constant for the "Cypriot Syllabary" Unicode character block.
1650          * @since 1.5
1651          */
1652         public static final UnicodeBlock CYPRIOT_SYLLABARY =
1653             new UnicodeBlock("CYPRIOT_SYLLABARY",
1654                              "CYPRIOT SYLLABARY",
1655                              "CYPRIOTSYLLABARY");
1656 
1657         /**
1658          * Constant for the "Byzantine Musical Symbols" Unicode character block.
1659          * @since 1.5
1660          */
1661         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS =
1662             new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS",
1663                              "BYZANTINE MUSICAL SYMBOLS",
1664                              "BYZANTINEMUSICALSYMBOLS");
1665 
1666         /**
1667          * Constant for the "Musical Symbols" Unicode character block.
1668          * @since 1.5
1669          */
1670         public static final UnicodeBlock MUSICAL_SYMBOLS =
1671             new UnicodeBlock("MUSICAL_SYMBOLS",
1672                              "MUSICAL SYMBOLS",
1673                              "MUSICALSYMBOLS");
1674 
1675         /**
1676          * Constant for the "Tai Xuan Jing Symbols" Unicode character block.
1677          * @since 1.5
1678          */
1679         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS =
1680             new UnicodeBlock("TAI_XUAN_JING_SYMBOLS",
1681                              "TAI XUAN JING SYMBOLS",
1682                              "TAIXUANJINGSYMBOLS");
1683 
1684         /**
1685          * Constant for the "Mathematical Alphanumeric Symbols" Unicode
1686          * character block.
1687          * @since 1.5
1688          */
1689         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS =
1690             new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1691                              "MATHEMATICAL ALPHANUMERIC SYMBOLS",
1692                              "MATHEMATICALALPHANUMERICSYMBOLS");
1693 
1694         /**
1695          * Constant for the "CJK Unified Ideographs Extension B" Unicode
1696          * character block.
1697          * @since 1.5
1698          */
1699         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B =
1700             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1701                              "CJK UNIFIED IDEOGRAPHS EXTENSION B",
1702                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONB");
1703 
1704         /**
1705          * Constant for the "CJK Compatibility Ideographs Supplement" Unicode character block.
1706          * @since 1.5
1707          */
1708         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT =
1709             new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1710                              "CJK COMPATIBILITY IDEOGRAPHS SUPPLEMENT",
1711                              "CJKCOMPATIBILITYIDEOGRAPHSSUPPLEMENT");
1712 
1713         /**
1714          * Constant for the "Tags" Unicode character block.
1715          * @since 1.5
1716          */
1717         public static final UnicodeBlock TAGS =
1718             new UnicodeBlock("TAGS");
1719 
1720         /**
1721          * Constant for the "Variation Selectors Supplement" Unicode character
1722          * block.
1723          * @since 1.5
1724          */
1725         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT =
1726             new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT",
1727                              "VARIATION SELECTORS SUPPLEMENT",
1728                              "VARIATIONSELECTORSSUPPLEMENT");
1729 
1730         /**
1731          * Constant for the "Supplementary Private Use Area-A" Unicode character
1732          * block.
1733          * @since 1.5
1734          */
1735         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A =
1736             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1737                              "SUPPLEMENTARY PRIVATE USE AREA-A",
1738                              "SUPPLEMENTARYPRIVATEUSEAREA-A");
1739 
1740         /**
1741          * Constant for the "Supplementary Private Use Area-B" Unicode character
1742          * block.
1743          * @since 1.5
1744          */
1745         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B =
1746             new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1747                              "SUPPLEMENTARY PRIVATE USE AREA-B",
1748                              "SUPPLEMENTARYPRIVATEUSEAREA-B");
1749 
1750         /**
1751          * Constant for the "High Surrogates" Unicode character block.
1752          * This block represents codepoint values in the high surrogate
1753          * range: U+D800 through U+DB7F
1754          *
1755          * @since 1.5
1756          */
1757         public static final UnicodeBlock HIGH_SURROGATES =
1758             new UnicodeBlock("HIGH_SURROGATES",
1759                              "HIGH SURROGATES",
1760                              "HIGHSURROGATES");
1761 
1762         /**
1763          * Constant for the "High Private Use Surrogates" Unicode character
1764          * block.
1765          * This block represents codepoint values in the private use high
1766          * surrogate range: U+DB80 through U+DBFF
1767          *
1768          * @since 1.5
1769          */
1770         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES =
1771             new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES",
1772                              "HIGH PRIVATE USE SURROGATES",
1773                              "HIGHPRIVATEUSESURROGATES");
1774 
1775         /**
1776          * Constant for the "Low Surrogates" Unicode character block.
1777          * This block represents codepoint values in the low surrogate
1778          * range: U+DC00 through U+DFFF
1779          *
1780          * @since 1.5
1781          */
1782         public static final UnicodeBlock LOW_SURROGATES =
1783             new UnicodeBlock("LOW_SURROGATES",
1784                              "LOW SURROGATES",
1785                              "LOWSURROGATES");
1786 
1787         /**
1788          * Constant for the "Arabic Supplement" Unicode character block.
1789          * @since 1.7
1790          */
1791         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1792             new UnicodeBlock("ARABIC_SUPPLEMENT",
1793                              "ARABIC SUPPLEMENT",
1794                              "ARABICSUPPLEMENT");
1795 
1796         /**
1797          * Constant for the "NKo" Unicode character block.
1798          * @since 1.7
1799          */
1800         public static final UnicodeBlock NKO =
1801             new UnicodeBlock("NKO");
1802 
1803         /**
1804          * Constant for the "Samaritan" Unicode character block.
1805          * @since 1.7
1806          */
1807         public static final UnicodeBlock SAMARITAN =
1808             new UnicodeBlock("SAMARITAN");
1809 
1810         /**
1811          * Constant for the "Mandaic" Unicode character block.
1812          * @since 1.7
1813          */
1814         public static final UnicodeBlock MANDAIC =
1815             new UnicodeBlock("MANDAIC");
1816 
1817         /**
1818          * Constant for the "Ethiopic Supplement" Unicode character block.
1819          * @since 1.7
1820          */
1821         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1822             new UnicodeBlock("ETHIOPIC_SUPPLEMENT",
1823                              "ETHIOPIC SUPPLEMENT",
1824                              "ETHIOPICSUPPLEMENT");
1825 
1826         /**
1827          * Constant for the "Unified Canadian Aboriginal Syllabics Extended"
1828          * Unicode character block.
1829          * @since 1.7
1830          */
1831         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1832             new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1833                              "UNIFIED CANADIAN ABORIGINAL SYLLABICS EXTENDED",
1834                              "UNIFIEDCANADIANABORIGINALSYLLABICSEXTENDED");
1835 
1836         /**
1837          * Constant for the "New Tai Lue" Unicode character block.
1838          * @since 1.7
1839          */
1840         public static final UnicodeBlock NEW_TAI_LUE =
1841             new UnicodeBlock("NEW_TAI_LUE",
1842                              "NEW TAI LUE",
1843                              "NEWTAILUE");
1844 
1845         /**
1846          * Constant for the "Buginese" Unicode character block.
1847          * @since 1.7
1848          */
1849         public static final UnicodeBlock BUGINESE =
1850             new UnicodeBlock("BUGINESE");
1851 
1852         /**
1853          * Constant for the "Tai Tham" Unicode character block.
1854          * @since 1.7
1855          */
1856         public static final UnicodeBlock TAI_THAM =
1857             new UnicodeBlock("TAI_THAM",
1858                              "TAI THAM",
1859                              "TAITHAM");
1860 
1861         /**
1862          * Constant for the "Balinese" Unicode character block.
1863          * @since 1.7
1864          */
1865         public static final UnicodeBlock BALINESE =
1866             new UnicodeBlock("BALINESE");
1867 
1868         /**
1869          * Constant for the "Sundanese" Unicode character block.
1870          * @since 1.7
1871          */
1872         public static final UnicodeBlock SUNDANESE =
1873             new UnicodeBlock("SUNDANESE");
1874 
1875         /**
1876          * Constant for the "Batak" Unicode character block.
1877          * @since 1.7
1878          */
1879         public static final UnicodeBlock BATAK =
1880             new UnicodeBlock("BATAK");
1881 
1882         /**
1883          * Constant for the "Lepcha" Unicode character block.
1884          * @since 1.7
1885          */
1886         public static final UnicodeBlock LEPCHA =
1887             new UnicodeBlock("LEPCHA");
1888 
1889         /**
1890          * Constant for the "Ol Chiki" Unicode character block.
1891          * @since 1.7
1892          */
1893         public static final UnicodeBlock OL_CHIKI =
1894             new UnicodeBlock("OL_CHIKI",
1895                              "OL CHIKI",
1896                              "OLCHIKI");
1897 
1898         /**
1899          * Constant for the "Vedic Extensions" Unicode character block.
1900          * @since 1.7
1901          */
1902         public static final UnicodeBlock VEDIC_EXTENSIONS =
1903             new UnicodeBlock("VEDIC_EXTENSIONS",
1904                              "VEDIC EXTENSIONS",
1905                              "VEDICEXTENSIONS");
1906 
1907         /**
1908          * Constant for the "Phonetic Extensions Supplement" Unicode character
1909          * block.
1910          * @since 1.7
1911          */
1912         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1913             new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1914                              "PHONETIC EXTENSIONS SUPPLEMENT",
1915                              "PHONETICEXTENSIONSSUPPLEMENT");
1916 
1917         /**
1918          * Constant for the "Combining Diacritical Marks Supplement" Unicode
1919          * character block.
1920          * @since 1.7
1921          */
1922         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1923             new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1924                              "COMBINING DIACRITICAL MARKS SUPPLEMENT",
1925                              "COMBININGDIACRITICALMARKSSUPPLEMENT");
1926 
1927         /**
1928          * Constant for the "Glagolitic" Unicode character block.
1929          * @since 1.7
1930          */
1931         public static final UnicodeBlock GLAGOLITIC =
1932             new UnicodeBlock("GLAGOLITIC");
1933 
1934         /**
1935          * Constant for the "Latin Extended-C" Unicode character block.
1936          * @since 1.7
1937          */
1938         public static final UnicodeBlock LATIN_EXTENDED_C =
1939             new UnicodeBlock("LATIN_EXTENDED_C",
1940                              "LATIN EXTENDED-C",
1941                              "LATINEXTENDED-C");
1942 
1943         /**
1944          * Constant for the "Coptic" Unicode character block.
1945          * @since 1.7
1946          */
1947         public static final UnicodeBlock COPTIC =
1948             new UnicodeBlock("COPTIC");
1949 
1950         /**
1951          * Constant for the "Georgian Supplement" Unicode character block.
1952          * @since 1.7
1953          */
1954         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1955             new UnicodeBlock("GEORGIAN_SUPPLEMENT",
1956                              "GEORGIAN SUPPLEMENT",
1957                              "GEORGIANSUPPLEMENT");
1958 
1959         /**
1960          * Constant for the "Tifinagh" Unicode character block.
1961          * @since 1.7
1962          */
1963         public static final UnicodeBlock TIFINAGH =
1964             new UnicodeBlock("TIFINAGH");
1965 
1966         /**
1967          * Constant for the "Ethiopic Extended" Unicode character block.
1968          * @since 1.7
1969          */
1970         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1971             new UnicodeBlock("ETHIOPIC_EXTENDED",
1972                              "ETHIOPIC EXTENDED",
1973                              "ETHIOPICEXTENDED");
1974 
1975         /**
1976          * Constant for the "Cyrillic Extended-A" Unicode character block.
1977          * @since 1.7
1978          */
1979         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1980             new UnicodeBlock("CYRILLIC_EXTENDED_A",
1981                              "CYRILLIC EXTENDED-A",
1982                              "CYRILLICEXTENDED-A");
1983 
1984         /**
1985          * Constant for the "Supplemental Punctuation" Unicode character block.
1986          * @since 1.7
1987          */
1988         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1989             new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION",
1990                              "SUPPLEMENTAL PUNCTUATION",
1991                              "SUPPLEMENTALPUNCTUATION");
1992 
1993         /**
1994          * Constant for the "CJK Strokes" Unicode character block.
1995          * @since 1.7
1996          */
1997         public static final UnicodeBlock CJK_STROKES =
1998             new UnicodeBlock("CJK_STROKES",
1999                              "CJK STROKES",
2000                              "CJKSTROKES");
2001 
2002         /**
2003          * Constant for the "Lisu" Unicode character block.
2004          * @since 1.7
2005          */
2006         public static final UnicodeBlock LISU =
2007             new UnicodeBlock("LISU");
2008 
2009         /**
2010          * Constant for the "Vai" Unicode character block.
2011          * @since 1.7
2012          */
2013         public static final UnicodeBlock VAI =
2014             new UnicodeBlock("VAI");
2015 
2016         /**
2017          * Constant for the "Cyrillic Extended-B" Unicode character block.
2018          * @since 1.7
2019          */
2020         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2021             new UnicodeBlock("CYRILLIC_EXTENDED_B",
2022                              "CYRILLIC EXTENDED-B",
2023                              "CYRILLICEXTENDED-B");
2024 
2025         /**
2026          * Constant for the "Bamum" Unicode character block.
2027          * @since 1.7
2028          */
2029         public static final UnicodeBlock BAMUM =
2030             new UnicodeBlock("BAMUM");
2031 
2032         /**
2033          * Constant for the "Modifier Tone Letters" Unicode character block.
2034          * @since 1.7
2035          */
2036         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2037             new UnicodeBlock("MODIFIER_TONE_LETTERS",
2038                              "MODIFIER TONE LETTERS",
2039                              "MODIFIERTONELETTERS");
2040 
2041         /**
2042          * Constant for the "Latin Extended-D" Unicode character block.
2043          * @since 1.7
2044          */
2045         public static final UnicodeBlock LATIN_EXTENDED_D =
2046             new UnicodeBlock("LATIN_EXTENDED_D",
2047                              "LATIN EXTENDED-D",
2048                              "LATINEXTENDED-D");
2049 
2050         /**
2051          * Constant for the "Syloti Nagri" Unicode character block.
2052          * @since 1.7
2053          */
2054         public static final UnicodeBlock SYLOTI_NAGRI =
2055             new UnicodeBlock("SYLOTI_NAGRI",
2056                              "SYLOTI NAGRI",
2057                              "SYLOTINAGRI");
2058 
2059         /**
2060          * Constant for the "Common Indic Number Forms" Unicode character block.
2061          * @since 1.7
2062          */
2063         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2064             new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS",
2065                              "COMMON INDIC NUMBER FORMS",
2066                              "COMMONINDICNUMBERFORMS");
2067 
2068         /**
2069          * Constant for the "Phags-pa" Unicode character block.
2070          * @since 1.7
2071          */
2072         public static final UnicodeBlock PHAGS_PA =
2073             new UnicodeBlock("PHAGS_PA",
2074                              "PHAGS-PA");
2075 
2076         /**
2077          * Constant for the "Saurashtra" Unicode character block.
2078          * @since 1.7
2079          */
2080         public static final UnicodeBlock SAURASHTRA =
2081             new UnicodeBlock("SAURASHTRA");
2082 
2083         /**
2084          * Constant for the "Devanagari Extended" Unicode character block.
2085          * @since 1.7
2086          */
2087         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2088             new UnicodeBlock("DEVANAGARI_EXTENDED",
2089                              "DEVANAGARI EXTENDED",
2090                              "DEVANAGARIEXTENDED");
2091 
2092         /**
2093          * Constant for the "Kayah Li" Unicode character block.
2094          * @since 1.7
2095          */
2096         public static final UnicodeBlock KAYAH_LI =
2097             new UnicodeBlock("KAYAH_LI",
2098                              "KAYAH LI",
2099                              "KAYAHLI");
2100 
2101         /**
2102          * Constant for the "Rejang" Unicode character block.
2103          * @since 1.7
2104          */
2105         public static final UnicodeBlock REJANG =
2106             new UnicodeBlock("REJANG");
2107 
2108         /**
2109          * Constant for the "Hangul Jamo Extended-A" Unicode character block.
2110          * @since 1.7
2111          */
2112         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2113             new UnicodeBlock("HANGUL_JAMO_EXTENDED_A",
2114                              "HANGUL JAMO EXTENDED-A",
2115                              "HANGULJAMOEXTENDED-A");
2116 
2117         /**
2118          * Constant for the "Javanese" Unicode character block.
2119          * @since 1.7
2120          */
2121         public static final UnicodeBlock JAVANESE =
2122             new UnicodeBlock("JAVANESE");
2123 
2124         /**
2125          * Constant for the "Cham" Unicode character block.
2126          * @since 1.7
2127          */
2128         public static final UnicodeBlock CHAM =
2129             new UnicodeBlock("CHAM");
2130 
2131         /**
2132          * Constant for the "Myanmar Extended-A" Unicode character block.
2133          * @since 1.7
2134          */
2135         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2136             new UnicodeBlock("MYANMAR_EXTENDED_A",
2137                              "MYANMAR EXTENDED-A",
2138                              "MYANMAREXTENDED-A");
2139 
2140         /**
2141          * Constant for the "Tai Viet" Unicode character block.
2142          * @since 1.7
2143          */
2144         public static final UnicodeBlock TAI_VIET =
2145             new UnicodeBlock("TAI_VIET",
2146                              "TAI VIET",
2147                              "TAIVIET");
2148 
2149         /**
2150          * Constant for the "Ethiopic Extended-A" Unicode character block.
2151          * @since 1.7
2152          */
2153         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2154             new UnicodeBlock("ETHIOPIC_EXTENDED_A",
2155                              "ETHIOPIC EXTENDED-A",
2156                              "ETHIOPICEXTENDED-A");
2157 
2158         /**
2159          * Constant for the "Meetei Mayek" Unicode character block.
2160          * @since 1.7
2161          */
2162         public static final UnicodeBlock MEETEI_MAYEK =
2163             new UnicodeBlock("MEETEI_MAYEK",
2164                              "MEETEI MAYEK",
2165                              "MEETEIMAYEK");
2166 
2167         /**
2168          * Constant for the "Hangul Jamo Extended-B" Unicode character block.
2169          * @since 1.7
2170          */
2171         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2172             new UnicodeBlock("HANGUL_JAMO_EXTENDED_B",
2173                              "HANGUL JAMO EXTENDED-B",
2174                              "HANGULJAMOEXTENDED-B");
2175 
2176         /**
2177          * Constant for the "Vertical Forms" Unicode character block.
2178          * @since 1.7
2179          */
2180         public static final UnicodeBlock VERTICAL_FORMS =
2181             new UnicodeBlock("VERTICAL_FORMS",
2182                              "VERTICAL FORMS",
2183                              "VERTICALFORMS");
2184 
2185         /**
2186          * Constant for the "Ancient Greek Numbers" Unicode character block.
2187          * @since 1.7
2188          */
2189         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2190             new UnicodeBlock("ANCIENT_GREEK_NUMBERS",
2191                              "ANCIENT GREEK NUMBERS",
2192                              "ANCIENTGREEKNUMBERS");
2193 
2194         /**
2195          * Constant for the "Ancient Symbols" Unicode character block.
2196          * @since 1.7
2197          */
2198         public static final UnicodeBlock ANCIENT_SYMBOLS =
2199             new UnicodeBlock("ANCIENT_SYMBOLS",
2200                              "ANCIENT SYMBOLS",
2201                              "ANCIENTSYMBOLS");
2202 
2203         /**
2204          * Constant for the "Phaistos Disc" Unicode character block.
2205          * @since 1.7
2206          */
2207         public static final UnicodeBlock PHAISTOS_DISC =
2208             new UnicodeBlock("PHAISTOS_DISC",
2209                              "PHAISTOS DISC",
2210                              "PHAISTOSDISC");
2211 
2212         /**
2213          * Constant for the "Lycian" Unicode character block.
2214          * @since 1.7
2215          */
2216         public static final UnicodeBlock LYCIAN =
2217             new UnicodeBlock("LYCIAN");
2218 
2219         /**
2220          * Constant for the "Carian" Unicode character block.
2221          * @since 1.7
2222          */
2223         public static final UnicodeBlock CARIAN =
2224             new UnicodeBlock("CARIAN");
2225 
2226         /**
2227          * Constant for the "Old Persian" Unicode character block.
2228          * @since 1.7
2229          */
2230         public static final UnicodeBlock OLD_PERSIAN =
2231             new UnicodeBlock("OLD_PERSIAN",
2232                              "OLD PERSIAN",
2233                              "OLDPERSIAN");
2234 
2235         /**
2236          * Constant for the "Imperial Aramaic" Unicode character block.
2237          * @since 1.7
2238          */
2239         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2240             new UnicodeBlock("IMPERIAL_ARAMAIC",
2241                              "IMPERIAL ARAMAIC",
2242                              "IMPERIALARAMAIC");
2243 
2244         /**
2245          * Constant for the "Phoenician" Unicode character block.
2246          * @since 1.7
2247          */
2248         public static final UnicodeBlock PHOENICIAN =
2249             new UnicodeBlock("PHOENICIAN");
2250 
2251         /**
2252          * Constant for the "Lydian" Unicode character block.
2253          * @since 1.7
2254          */
2255         public static final UnicodeBlock LYDIAN =
2256             new UnicodeBlock("LYDIAN");
2257 
2258         /**
2259          * Constant for the "Kharoshthi" Unicode character block.
2260          * @since 1.7
2261          */
2262         public static final UnicodeBlock KHAROSHTHI =
2263             new UnicodeBlock("KHAROSHTHI");
2264 
2265         /**
2266          * Constant for the "Old South Arabian" Unicode character block.
2267          * @since 1.7
2268          */
2269         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2270             new UnicodeBlock("OLD_SOUTH_ARABIAN",
2271                              "OLD SOUTH ARABIAN",
2272                              "OLDSOUTHARABIAN");
2273 
2274         /**
2275          * Constant for the "Avestan" Unicode character block.
2276          * @since 1.7
2277          */
2278         public static final UnicodeBlock AVESTAN =
2279             new UnicodeBlock("AVESTAN");
2280 
2281         /**
2282          * Constant for the "Inscriptional Parthian" Unicode character block.
2283          * @since 1.7
2284          */
2285         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2286             new UnicodeBlock("INSCRIPTIONAL_PARTHIAN",
2287                              "INSCRIPTIONAL PARTHIAN",
2288                              "INSCRIPTIONALPARTHIAN");
2289 
2290         /**
2291          * Constant for the "Inscriptional Pahlavi" Unicode character block.
2292          * @since 1.7
2293          */
2294         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2295             new UnicodeBlock("INSCRIPTIONAL_PAHLAVI",
2296                              "INSCRIPTIONAL PAHLAVI",
2297                              "INSCRIPTIONALPAHLAVI");
2298 
2299         /**
2300          * Constant for the "Old Turkic" Unicode character block.
2301          * @since 1.7
2302          */
2303         public static final UnicodeBlock OLD_TURKIC =
2304             new UnicodeBlock("OLD_TURKIC",
2305                              "OLD TURKIC",
2306                              "OLDTURKIC");
2307 
2308         /**
2309          * Constant for the "Rumi Numeral Symbols" Unicode character block.
2310          * @since 1.7
2311          */
2312         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2313             new UnicodeBlock("RUMI_NUMERAL_SYMBOLS",
2314                              "RUMI NUMERAL SYMBOLS",
2315                              "RUMINUMERALSYMBOLS");
2316 
2317         /**
2318          * Constant for the "Brahmi" Unicode character block.
2319          * @since 1.7
2320          */
2321         public static final UnicodeBlock BRAHMI =
2322             new UnicodeBlock("BRAHMI");
2323 
2324         /**
2325          * Constant for the "Kaithi" Unicode character block.
2326          * @since 1.7
2327          */
2328         public static final UnicodeBlock KAITHI =
2329             new UnicodeBlock("KAITHI");
2330 
2331         /**
2332          * Constant for the "Cuneiform" Unicode character block.
2333          * @since 1.7
2334          */
2335         public static final UnicodeBlock CUNEIFORM =
2336             new UnicodeBlock("CUNEIFORM");
2337 
2338         /**
2339          * Constant for the "Cuneiform Numbers and Punctuation" Unicode
2340          * character block.
2341          * @since 1.7
2342          */
2343         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2344             new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2345                              "CUNEIFORM NUMBERS AND PUNCTUATION",
2346                              "CUNEIFORMNUMBERSANDPUNCTUATION");
2347 
2348         /**
2349          * Constant for the "Egyptian Hieroglyphs" Unicode character block.
2350          * @since 1.7
2351          */
2352         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2353             new UnicodeBlock("EGYPTIAN_HIEROGLYPHS",
2354                              "EGYPTIAN HIEROGLYPHS",
2355                              "EGYPTIANHIEROGLYPHS");
2356 
2357         /**
2358          * Constant for the "Bamum Supplement" Unicode character block.
2359          * @since 1.7
2360          */
2361         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2362             new UnicodeBlock("BAMUM_SUPPLEMENT",
2363                              "BAMUM SUPPLEMENT",
2364                              "BAMUMSUPPLEMENT");
2365 
2366         /**
2367          * Constant for the "Kana Supplement" Unicode character block.
2368          * @since 1.7
2369          */
2370         public static final UnicodeBlock KANA_SUPPLEMENT =
2371             new UnicodeBlock("KANA_SUPPLEMENT",
2372                              "KANA SUPPLEMENT",
2373                              "KANASUPPLEMENT");
2374 
2375         /**
2376          * Constant for the "Ancient Greek Musical Notation" Unicode character
2377          * block.
2378          * @since 1.7
2379          */
2380         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2381             new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2382                              "ANCIENT GREEK MUSICAL NOTATION",
2383                              "ANCIENTGREEKMUSICALNOTATION");
2384 
2385         /**
2386          * Constant for the "Counting Rod Numerals" Unicode character block.
2387          * @since 1.7
2388          */
2389         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2390             new UnicodeBlock("COUNTING_ROD_NUMERALS",
2391                              "COUNTING ROD NUMERALS",
2392                              "COUNTINGRODNUMERALS");
2393 
2394         /**
2395          * Constant for the "Mahjong Tiles" Unicode character block.
2396          * @since 1.7
2397          */
2398         public static final UnicodeBlock MAHJONG_TILES =
2399             new UnicodeBlock("MAHJONG_TILES",
2400                              "MAHJONG TILES",
2401                              "MAHJONGTILES");
2402 
2403         /**
2404          * Constant for the "Domino Tiles" Unicode character block.
2405          * @since 1.7
2406          */
2407         public static final UnicodeBlock DOMINO_TILES =
2408             new UnicodeBlock("DOMINO_TILES",
2409                              "DOMINO TILES",
2410                              "DOMINOTILES");
2411 
2412         /**
2413          * Constant for the "Playing Cards" Unicode character block.
2414          * @since 1.7
2415          */
2416         public static final UnicodeBlock PLAYING_CARDS =
2417             new UnicodeBlock("PLAYING_CARDS",
2418                              "PLAYING CARDS",
2419                              "PLAYINGCARDS");
2420 
2421         /**
2422          * Constant for the "Enclosed Alphanumeric Supplement" Unicode character
2423          * block.
2424          * @since 1.7
2425          */
2426         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2427             new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2428                              "ENCLOSED ALPHANUMERIC SUPPLEMENT",
2429                              "ENCLOSEDALPHANUMERICSUPPLEMENT");
2430 
2431         /**
2432          * Constant for the "Enclosed Ideographic Supplement" Unicode character
2433          * block.
2434          * @since 1.7
2435          */
2436         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2437             new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2438                              "ENCLOSED IDEOGRAPHIC SUPPLEMENT",
2439                              "ENCLOSEDIDEOGRAPHICSUPPLEMENT");
2440 
2441         /**
2442          * Constant for the "Miscellaneous Symbols And Pictographs" Unicode
2443          * character block.
2444          * @since 1.7
2445          */
2446         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2447             new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2448                              "MISCELLANEOUS SYMBOLS AND PICTOGRAPHS",
2449                              "MISCELLANEOUSSYMBOLSANDPICTOGRAPHS");
2450 
2451         /**
2452          * Constant for the "Emoticons" Unicode character block.
2453          * @since 1.7
2454          */
2455         public static final UnicodeBlock EMOTICONS =
2456             new UnicodeBlock("EMOTICONS");
2457 
2458         /**
2459          * Constant for the "Transport And Map Symbols" Unicode character block.
2460          * @since 1.7
2461          */
2462         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2463             new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS",
2464                              "TRANSPORT AND MAP SYMBOLS",
2465                              "TRANSPORTANDMAPSYMBOLS");
2466 
2467         /**
2468          * Constant for the "Alchemical Symbols" Unicode character block.
2469          * @since 1.7
2470          */
2471         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2472             new UnicodeBlock("ALCHEMICAL_SYMBOLS",
2473                              "ALCHEMICAL SYMBOLS",
2474                              "ALCHEMICALSYMBOLS");
2475 
2476         /**
2477          * Constant for the "CJK Unified Ideographs Extension C" Unicode
2478          * character block.
2479          * @since 1.7
2480          */
2481         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2482             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2483                              "CJK UNIFIED IDEOGRAPHS EXTENSION C",
2484                              "CJKUNIFIEDIDEOGRAPHSEXTENSIONC");
2485 
2486         /**
2487          * Constant for the "CJK Unified Ideographs Extension D" Unicode
2488          * character block.
2489          * @since 1.7
2490          */
2491         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2492             new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2493                              "CJK UNIFIED IDEOGRAPHS EXTENSION D",
2494                              "CJKUNIFIEDIDEOGRAPHSEXTENSIOND");
2495 
2496         private static final int blockStarts[] = {
2497             0x0000,   // 0000..007F; Basic Latin
2498             0x0080,   // 0080..00FF; Latin-1 Supplement
2499             0x0100,   // 0100..017F; Latin Extended-A
2500             0x0180,   // 0180..024F; Latin Extended-B
2501             0x0250,   // 0250..02AF; IPA Extensions
2502             0x02B0,   // 02B0..02FF; Spacing Modifier Letters
2503             0x0300,   // 0300..036F; Combining Diacritical Marks
2504             0x0370,   // 0370..03FF; Greek and Coptic
2505             0x0400,   // 0400..04FF; Cyrillic
2506             0x0500,   // 0500..052F; Cyrillic Supplement
2507             0x0530,   // 0530..058F; Armenian
2508             0x0590,   // 0590..05FF; Hebrew
2509             0x0600,   // 0600..06FF; Arabic
2510             0x0700,   // 0700..074F; Syriac
2511             0x0750,   // 0750..077F; Arabic Supplement
2512             0x0780,   // 0780..07BF; Thaana
2513             0x07C0,   // 07C0..07FF; NKo
2514             0x0800,   // 0800..083F; Samaritan
2515             0x0840,   // 0840..085F; Mandaic
2516             0x0860,   //             unassigned
2517             0x0900,   // 0900..097F; Devanagari
2518             0x0980,   // 0980..09FF; Bengali
2519             0x0A00,   // 0A00..0A7F; Gurmukhi
2520             0x0A80,   // 0A80..0AFF; Gujarati
2521             0x0B00,   // 0B00..0B7F; Oriya
2522             0x0B80,   // 0B80..0BFF; Tamil
2523             0x0C00,   // 0C00..0C7F; Telugu
2524             0x0C80,   // 0C80..0CFF; Kannada
2525             0x0D00,   // 0D00..0D7F; Malayalam
2526             0x0D80,   // 0D80..0DFF; Sinhala
2527             0x0E00,   // 0E00..0E7F; Thai
2528             0x0E80,   // 0E80..0EFF; Lao
2529             0x0F00,   // 0F00..0FFF; Tibetan
2530             0x1000,   // 1000..109F; Myanmar
2531             0x10A0,   // 10A0..10FF; Georgian
2532             0x1100,   // 1100..11FF; Hangul Jamo
2533             0x1200,   // 1200..137F; Ethiopic
2534             0x1380,   // 1380..139F; Ethiopic Supplement
2535             0x13A0,   // 13A0..13FF; Cherokee
2536             0x1400,   // 1400..167F; Unified Canadian Aboriginal Syllabics
2537             0x1680,   // 1680..169F; Ogham
2538             0x16A0,   // 16A0..16FF; Runic
2539             0x1700,   // 1700..171F; Tagalog
2540             0x1720,   // 1720..173F; Hanunoo
2541             0x1740,   // 1740..175F; Buhid
2542             0x1760,   // 1760..177F; Tagbanwa
2543             0x1780,   // 1780..17FF; Khmer
2544             0x1800,   // 1800..18AF; Mongolian
2545             0x18B0,   // 18B0..18FF; Unified Canadian Aboriginal Syllabics Extended
2546             0x1900,   // 1900..194F; Limbu
2547             0x1950,   // 1950..197F; Tai Le
2548             0x1980,   // 1980..19DF; New Tai Lue
2549             0x19E0,   // 19E0..19FF; Khmer Symbols
2550             0x1A00,   // 1A00..1A1F; Buginese
2551             0x1A20,   // 1A20..1AAF; Tai Tham
2552             0x1AB0,   //             unassigned
2553             0x1B00,   // 1B00..1B7F; Balinese
2554             0x1B80,   // 1B80..1BBF; Sundanese
2555             0x1BC0,   // 1BC0..1BFF; Batak
2556             0x1C00,   // 1C00..1C4F; Lepcha
2557             0x1C50,   // 1C50..1C7F; Ol Chiki
2558             0x1C80,   //             unassigned
2559             0x1CD0,   // 1CD0..1CFF; Vedic Extensions
2560             0x1D00,   // 1D00..1D7F; Phonetic Extensions
2561             0x1D80,   // 1D80..1DBF; Phonetic Extensions Supplement
2562             0x1DC0,   // 1DC0..1DFF; Combining Diacritical Marks Supplement
2563             0x1E00,   // 1E00..1EFF; Latin Extended Additional
2564             0x1F00,   // 1F00..1FFF; Greek Extended
2565             0x2000,   // 2000..206F; General Punctuation
2566             0x2070,   // 2070..209F; Superscripts and Subscripts
2567             0x20A0,   // 20A0..20CF; Currency Symbols
2568             0x20D0,   // 20D0..20FF; Combining Diacritical Marks for Symbols
2569             0x2100,   // 2100..214F; Letterlike Symbols
2570             0x2150,   // 2150..218F; Number Forms
2571             0x2190,   // 2190..21FF; Arrows
2572             0x2200,   // 2200..22FF; Mathematical Operators
2573             0x2300,   // 2300..23FF; Miscellaneous Technical
2574             0x2400,   // 2400..243F; Control Pictures
2575             0x2440,   // 2440..245F; Optical Character Recognition
2576             0x2460,   // 2460..24FF; Enclosed Alphanumerics
2577             0x2500,   // 2500..257F; Box Drawing
2578             0x2580,   // 2580..259F; Block Elements
2579             0x25A0,   // 25A0..25FF; Geometric Shapes
2580             0x2600,   // 2600..26FF; Miscellaneous Symbols
2581             0x2700,   // 2700..27BF; Dingbats
2582             0x27C0,   // 27C0..27EF; Miscellaneous Mathematical Symbols-A
2583             0x27F0,   // 27F0..27FF; Supplemental Arrows-A
2584             0x2800,   // 2800..28FF; Braille Patterns
2585             0x2900,   // 2900..297F; Supplemental Arrows-B
2586             0x2980,   // 2980..29FF; Miscellaneous Mathematical Symbols-B
2587             0x2A00,   // 2A00..2AFF; Supplemental Mathematical Operators
2588             0x2B00,   // 2B00..2BFF; Miscellaneous Symbols and Arrows
2589             0x2C00,   // 2C00..2C5F; Glagolitic
2590             0x2C60,   // 2C60..2C7F; Latin Extended-C
2591             0x2C80,   // 2C80..2CFF; Coptic
2592             0x2D00,   // 2D00..2D2F; Georgian Supplement
2593             0x2D30,   // 2D30..2D7F; Tifinagh
2594             0x2D80,   // 2D80..2DDF; Ethiopic Extended
2595             0x2DE0,   // 2DE0..2DFF; Cyrillic Extended-A
2596             0x2E00,   // 2E00..2E7F; Supplemental Punctuation
2597             0x2E80,   // 2E80..2EFF; CJK Radicals Supplement
2598             0x2F00,   // 2F00..2FDF; Kangxi Radicals
2599             0x2FE0,   //             unassigned
2600             0x2FF0,   // 2FF0..2FFF; Ideographic Description Characters
2601             0x3000,   // 3000..303F; CJK Symbols and Punctuation
2602             0x3040,   // 3040..309F; Hiragana
2603             0x30A0,   // 30A0..30FF; Katakana
2604             0x3100,   // 3100..312F; Bopomofo
2605             0x3130,   // 3130..318F; Hangul Compatibility Jamo
2606             0x3190,   // 3190..319F; Kanbun
2607             0x31A0,   // 31A0..31BF; Bopomofo Extended
2608             0x31C0,   // 31C0..31EF; CJK Strokes
2609             0x31F0,   // 31F0..31FF; Katakana Phonetic Extensions
2610             0x3200,   // 3200..32FF; Enclosed CJK Letters and Months
2611             0x3300,   // 3300..33FF; CJK Compatibility
2612             0x3400,   // 3400..4DBF; CJK Unified Ideographs Extension A
2613             0x4DC0,   // 4DC0..4DFF; Yijing Hexagram Symbols
2614             0x4E00,   // 4E00..9FFF; CJK Unified Ideographs
2615             0xA000,   // A000..A48F; Yi Syllables
2616             0xA490,   // A490..A4CF; Yi Radicals
2617             0xA4D0,   // A4D0..A4FF; Lisu
2618             0xA500,   // A500..A63F; Vai
2619             0xA640,   // A640..A69F; Cyrillic Extended-B
2620             0xA6A0,   // A6A0..A6FF; Bamum
2621             0xA700,   // A700..A71F; Modifier Tone Letters
2622             0xA720,   // A720..A7FF; Latin Extended-D
2623             0xA800,   // A800..A82F; Syloti Nagri
2624             0xA830,   // A830..A83F; Common Indic Number Forms
2625             0xA840,   // A840..A87F; Phags-pa
2626             0xA880,   // A880..A8DF; Saurashtra
2627             0xA8E0,   // A8E0..A8FF; Devanagari Extended
2628             0xA900,   // A900..A92F; Kayah Li
2629             0xA930,   // A930..A95F; Rejang
2630             0xA960,   // A960..A97F; Hangul Jamo Extended-A
2631             0xA980,   // A980..A9DF; Javanese
2632             0xA9E0,   //             unassigned
2633             0xAA00,   // AA00..AA5F; Cham
2634             0xAA60,   // AA60..AA7F; Myanmar Extended-A
2635             0xAA80,   // AA80..AADF; Tai Viet
2636             0xAAE0,   //             unassigned
2637             0xAB00,   // AB00..AB2F; Ethiopic Extended-A
2638             0xAB30,   //             unassigned
2639             0xABC0,   // ABC0..ABFF; Meetei Mayek
2640             0xAC00,   // AC00..D7AF; Hangul Syllables
2641             0xD7B0,   // D7B0..D7FF; Hangul Jamo Extended-B
2642             0xD800,   // D800..DB7F; High Surrogates
2643             0xDB80,   // DB80..DBFF; High Private Use Surrogates
2644             0xDC00,   // DC00..DFFF; Low Surrogates
2645             0xE000,   // E000..F8FF; Private Use Area
2646             0xF900,   // F900..FAFF; CJK Compatibility Ideographs
2647             0xFB00,   // FB00..FB4F; Alphabetic Presentation Forms
2648             0xFB50,   // FB50..FDFF; Arabic Presentation Forms-A
2649             0xFE00,   // FE00..FE0F; Variation Selectors
2650             0xFE10,   // FE10..FE1F; Vertical Forms
2651             0xFE20,   // FE20..FE2F; Combining Half Marks
2652             0xFE30,   // FE30..FE4F; CJK Compatibility Forms
2653             0xFE50,   // FE50..FE6F; Small Form Variants
2654             0xFE70,   // FE70..FEFF; Arabic Presentation Forms-B
2655             0xFF00,   // FF00..FFEF; Halfwidth and Fullwidth Forms
2656             0xFFF0,   // FFF0..FFFF; Specials
2657             0x10000,  // 10000..1007F; Linear B Syllabary
2658             0x10080,  // 10080..100FF; Linear B Ideograms
2659             0x10100,  // 10100..1013F; Aegean Numbers
2660             0x10140,  // 10140..1018F; Ancient Greek Numbers
2661             0x10190,  // 10190..101CF; Ancient Symbols
2662             0x101D0,  // 101D0..101FF; Phaistos Disc
2663             0x10200,  //               unassigned
2664             0x10280,  // 10280..1029F; Lycian
2665             0x102A0,  // 102A0..102DF; Carian
2666             0x102E0,  //               unassigned
2667             0x10300,  // 10300..1032F; Old Italic
2668             0x10330,  // 10330..1034F; Gothic
2669             0x10350,  //               unassigned
2670             0x10380,  // 10380..1039F; Ugaritic
2671             0x103A0,  // 103A0..103DF; Old Persian
2672             0x103E0,  //               unassigned
2673             0x10400,  // 10400..1044F; Deseret
2674             0x10450,  // 10450..1047F; Shavian
2675             0x10480,  // 10480..104AF; Osmanya
2676             0x104B0,  //               unassigned
2677             0x10800,  // 10800..1083F; Cypriot Syllabary
2678             0x10840,  // 10840..1085F; Imperial Aramaic
2679             0x10860,  //               unassigned
2680             0x10900,  // 10900..1091F; Phoenician
2681             0x10920,  // 10920..1093F; Lydian
2682             0x10940,  //               unassigned
2683             0x10A00,  // 10A00..10A5F; Kharoshthi
2684             0x10A60,  // 10A60..10A7F; Old South Arabian
2685             0x10A80,  //               unassigned
2686             0x10B00,  // 10B00..10B3F; Avestan
2687             0x10B40,  // 10B40..10B5F; Inscriptional Parthian
2688             0x10B60,  // 10B60..10B7F; Inscriptional Pahlavi
2689             0x10B80,  //               unassigned
2690             0x10C00,  // 10C00..10C4F; Old Turkic
2691             0x10C50,  //               unassigned
2692             0x10E60,  // 10E60..10E7F; Rumi Numeral Symbols
2693             0x10E80,  //               unassigned
2694             0x11000,  // 11000..1107F; Brahmi
2695             0x11080,  // 11080..110CF; Kaithi
2696             0x110D0,  //               unassigned
2697             0x12000,  // 12000..123FF; Cuneiform
2698             0x12400,  // 12400..1247F; Cuneiform Numbers and Punctuation
2699             0x12480,  //               unassigned
2700             0x13000,  // 13000..1342F; Egyptian Hieroglyphs
2701             0x13430,  //               unassigned
2702             0x16800,  // 16800..16A3F; Bamum Supplement
2703             0x16A40,  //               unassigned
2704             0x1B000,  // 1B000..1B0FF; Kana Supplement
2705             0x1B100,  //               unassigned
2706             0x1D000,  // 1D000..1D0FF; Byzantine Musical Symbols
2707             0x1D100,  // 1D100..1D1FF; Musical Symbols
2708             0x1D200,  // 1D200..1D24F; Ancient Greek Musical Notation
2709             0x1D250,  //               unassigned
2710             0x1D300,  // 1D300..1D35F; Tai Xuan Jing Symbols
2711             0x1D360,  // 1D360..1D37F; Counting Rod Numerals
2712             0x1D380,  //               unassigned
2713             0x1D400,  // 1D400..1D7FF; Mathematical Alphanumeric Symbols
2714             0x1D800,  //               unassigned
2715             0x1F000,  // 1F000..1F02F; Mahjong Tiles
2716             0x1F030,  // 1F030..1F09F; Domino Tiles
2717             0x1F0A0,  // 1F0A0..1F0FF; Playing Cards
2718             0x1F100,  // 1F100..1F1FF; Enclosed Alphanumeric Supplement
2719             0x1F200,  // 1F200..1F2FF; Enclosed Ideographic Supplement
2720             0x1F300,  // 1F300..1F5FF; Miscellaneous Symbols And Pictographs
2721             0x1F600,  // 1F600..1F64F; Emoticons
2722             0x1F650,  //               unassigned
2723             0x1F680,  // 1F680..1F6FF; Transport And Map Symbols
2724             0x1F700,  // 1F700..1F77F; Alchemical Symbols
2725             0x1F780,  //               unassigned
2726             0x20000,  // 20000..2A6DF; CJK Unified Ideographs Extension B
2727             0x2A6E0,  //               unassigned
2728             0x2A700,  // 2A700..2B73F; CJK Unified Ideographs Extension C
2729             0x2B740,  // 2B740..2B81F; CJK Unified Ideographs Extension D
2730             0x2B820,  //               unassigned
2731             0x2F800,  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
2732             0x2FA20,  //               unassigned
2733             0xE0000,  // E0000..E007F; Tags
2734             0xE0080,  //               unassigned
2735             0xE0100,  // E0100..E01EF; Variation Selectors Supplement
2736             0xE01F0,  //               unassigned
2737             0xF0000,  // F0000..FFFFF; Supplementary Private Use Area-A
2738             0x100000  // 100000..10FFFF; Supplementary Private Use Area-B
2739         };
2740 
2741         private static final UnicodeBlock[] blocks = {
2742             BASIC_LATIN,
2743             LATIN_1_SUPPLEMENT,
2744             LATIN_EXTENDED_A,
2745             LATIN_EXTENDED_B,
2746             IPA_EXTENSIONS,
2747             SPACING_MODIFIER_LETTERS,
2748             COMBINING_DIACRITICAL_MARKS,
2749             GREEK,
2750             CYRILLIC,
2751             CYRILLIC_SUPPLEMENTARY,
2752             ARMENIAN,
2753             HEBREW,
2754             ARABIC,
2755             SYRIAC,
2756             ARABIC_SUPPLEMENT,
2757             THAANA,
2758             NKO,
2759             SAMARITAN,
2760             MANDAIC,
2761             null,
2762             DEVANAGARI,
2763             BENGALI,
2764             GURMUKHI,
2765             GUJARATI,
2766             ORIYA,
2767             TAMIL,
2768             TELUGU,
2769             KANNADA,
2770             MALAYALAM,
2771             SINHALA,
2772             THAI,
2773             LAO,
2774             TIBETAN,
2775             MYANMAR,
2776             GEORGIAN,
2777             HANGUL_JAMO,
2778             ETHIOPIC,
2779             ETHIOPIC_SUPPLEMENT,
2780             CHEROKEE,
2781             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS,
2782             OGHAM,
2783             RUNIC,
2784             TAGALOG,
2785             HANUNOO,
2786             BUHID,
2787             TAGBANWA,
2788             KHMER,
2789             MONGOLIAN,
2790             UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED,
2791             LIMBU,
2792             TAI_LE,
2793             NEW_TAI_LUE,
2794             KHMER_SYMBOLS,
2795             BUGINESE,
2796             TAI_THAM,
2797             null,
2798             BALINESE,
2799             SUNDANESE,
2800             BATAK,
2801             LEPCHA,
2802             OL_CHIKI,
2803             null,
2804             VEDIC_EXTENSIONS,
2805             PHONETIC_EXTENSIONS,
2806             PHONETIC_EXTENSIONS_SUPPLEMENT,
2807             COMBINING_DIACRITICAL_MARKS_SUPPLEMENT,
2808             LATIN_EXTENDED_ADDITIONAL,
2809             GREEK_EXTENDED,
2810             GENERAL_PUNCTUATION,
2811             SUPERSCRIPTS_AND_SUBSCRIPTS,
2812             CURRENCY_SYMBOLS,
2813             COMBINING_MARKS_FOR_SYMBOLS,
2814             LETTERLIKE_SYMBOLS,
2815             NUMBER_FORMS,
2816             ARROWS,
2817             MATHEMATICAL_OPERATORS,
2818             MISCELLANEOUS_TECHNICAL,
2819             CONTROL_PICTURES,
2820             OPTICAL_CHARACTER_RECOGNITION,
2821             ENCLOSED_ALPHANUMERICS,
2822             BOX_DRAWING,
2823             BLOCK_ELEMENTS,
2824             GEOMETRIC_SHAPES,
2825             MISCELLANEOUS_SYMBOLS,
2826             DINGBATS,
2827             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A,
2828             SUPPLEMENTAL_ARROWS_A,
2829             BRAILLE_PATTERNS,
2830             SUPPLEMENTAL_ARROWS_B,
2831             MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B,
2832             SUPPLEMENTAL_MATHEMATICAL_OPERATORS,
2833             MISCELLANEOUS_SYMBOLS_AND_ARROWS,
2834             GLAGOLITIC,
2835             LATIN_EXTENDED_C,
2836             COPTIC,
2837             GEORGIAN_SUPPLEMENT,
2838             TIFINAGH,
2839             ETHIOPIC_EXTENDED,
2840             CYRILLIC_EXTENDED_A,
2841             SUPPLEMENTAL_PUNCTUATION,
2842             CJK_RADICALS_SUPPLEMENT,
2843             KANGXI_RADICALS,
2844             null,
2845             IDEOGRAPHIC_DESCRIPTION_CHARACTERS,
2846             CJK_SYMBOLS_AND_PUNCTUATION,
2847             HIRAGANA,
2848             KATAKANA,
2849             BOPOMOFO,
2850             HANGUL_COMPATIBILITY_JAMO,
2851             KANBUN,
2852             BOPOMOFO_EXTENDED,
2853             CJK_STROKES,
2854             KATAKANA_PHONETIC_EXTENSIONS,
2855             ENCLOSED_CJK_LETTERS_AND_MONTHS,
2856             CJK_COMPATIBILITY,
2857             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A,
2858             YIJING_HEXAGRAM_SYMBOLS,
2859             CJK_UNIFIED_IDEOGRAPHS,
2860             YI_SYLLABLES,
2861             YI_RADICALS,
2862             LISU,
2863             VAI,
2864             CYRILLIC_EXTENDED_B,
2865             BAMUM,
2866             MODIFIER_TONE_LETTERS,
2867             LATIN_EXTENDED_D,
2868             SYLOTI_NAGRI,
2869             COMMON_INDIC_NUMBER_FORMS,
2870             PHAGS_PA,
2871             SAURASHTRA,
2872             DEVANAGARI_EXTENDED,
2873             KAYAH_LI,
2874             REJANG,
2875             HANGUL_JAMO_EXTENDED_A,
2876             JAVANESE,
2877             null,
2878             CHAM,
2879             MYANMAR_EXTENDED_A,
2880             TAI_VIET,
2881             null,
2882             ETHIOPIC_EXTENDED_A,
2883             null,
2884             MEETEI_MAYEK,
2885             HANGUL_SYLLABLES,
2886             HANGUL_JAMO_EXTENDED_B,
2887             HIGH_SURROGATES,
2888             HIGH_PRIVATE_USE_SURROGATES,
2889             LOW_SURROGATES,
2890             PRIVATE_USE_AREA,
2891             CJK_COMPATIBILITY_IDEOGRAPHS,
2892             ALPHABETIC_PRESENTATION_FORMS,
2893             ARABIC_PRESENTATION_FORMS_A,
2894             VARIATION_SELECTORS,
2895             VERTICAL_FORMS,
2896             COMBINING_HALF_MARKS,
2897             CJK_COMPATIBILITY_FORMS,
2898             SMALL_FORM_VARIANTS,
2899             ARABIC_PRESENTATION_FORMS_B,
2900             HALFWIDTH_AND_FULLWIDTH_FORMS,
2901             SPECIALS,
2902             LINEAR_B_SYLLABARY,
2903             LINEAR_B_IDEOGRAMS,
2904             AEGEAN_NUMBERS,
2905             ANCIENT_GREEK_NUMBERS,
2906             ANCIENT_SYMBOLS,
2907             PHAISTOS_DISC,
2908             null,
2909             LYCIAN,
2910             CARIAN,
2911             null,
2912             OLD_ITALIC,
2913             GOTHIC,
2914             null,
2915             UGARITIC,
2916             OLD_PERSIAN,
2917             null,
2918             DESERET,
2919             SHAVIAN,
2920             OSMANYA,
2921             null,
2922             CYPRIOT_SYLLABARY,
2923             IMPERIAL_ARAMAIC,
2924             null,
2925             PHOENICIAN,
2926             LYDIAN,
2927             null,
2928             KHAROSHTHI,
2929             OLD_SOUTH_ARABIAN,
2930             null,
2931             AVESTAN,
2932             INSCRIPTIONAL_PARTHIAN,
2933             INSCRIPTIONAL_PAHLAVI,
2934             null,
2935             OLD_TURKIC,
2936             null,
2937             RUMI_NUMERAL_SYMBOLS,
2938             null,
2939             BRAHMI,
2940             KAITHI,
2941             null,
2942             CUNEIFORM,
2943             CUNEIFORM_NUMBERS_AND_PUNCTUATION,
2944             null,
2945             EGYPTIAN_HIEROGLYPHS,
2946             null,
2947             BAMUM_SUPPLEMENT,
2948             null,
2949             KANA_SUPPLEMENT,
2950             null,
2951             BYZANTINE_MUSICAL_SYMBOLS,
2952             MUSICAL_SYMBOLS,
2953             ANCIENT_GREEK_MUSICAL_NOTATION,
2954             null,
2955             TAI_XUAN_JING_SYMBOLS,
2956             COUNTING_ROD_NUMERALS,
2957             null,
2958             MATHEMATICAL_ALPHANUMERIC_SYMBOLS,
2959             null,
2960             MAHJONG_TILES,
2961             DOMINO_TILES,
2962             PLAYING_CARDS,
2963             ENCLOSED_ALPHANUMERIC_SUPPLEMENT,
2964             ENCLOSED_IDEOGRAPHIC_SUPPLEMENT,
2965             MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS,
2966             EMOTICONS,
2967             null,
2968             TRANSPORT_AND_MAP_SYMBOLS,
2969             ALCHEMICAL_SYMBOLS,
2970             null,
2971             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B,
2972             null,
2973             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C,
2974             CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D,
2975             null,
2976             CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT,
2977             null,
2978             TAGS,
2979             null,
2980             VARIATION_SELECTORS_SUPPLEMENT,
2981             null,
2982             SUPPLEMENTARY_PRIVATE_USE_AREA_A,
2983             SUPPLEMENTARY_PRIVATE_USE_AREA_B
2984         };
2985 
2986 
2987         /**
2988          * Returns the object representing the Unicode block containing the
2989          * given character, or {@code null} if the character is not a
2990          * member of a defined block.
2991          *
2992          * <p><b>Note:</b> This method cannot handle
2993          * <a href="Character.html#supplementary"> supplementary
2994          * characters</a>.  To support all Unicode characters, including
2995          * supplementary characters, use the {@link #of(int)} method.
2996          *
2997          * @param   c  The character in question
2998          * @return  The {@code UnicodeBlock} instance representing the
2999          *          Unicode block of which this character is a member, or
3000          *          {@code null} if the character is not a member of any
3001          *          Unicode block
3002          */
of(char c)3003         public static UnicodeBlock of(char c) {
3004             return of((int)c);
3005         }
3006 
3007         /**
3008          * Returns the object representing the Unicode block
3009          * containing the given character (Unicode code point), or
3010          * {@code null} if the character is not a member of a
3011          * defined block.
3012          *
3013          * @param   codePoint the character (Unicode code point) in question.
3014          * @return  The {@code UnicodeBlock} instance representing the
3015          *          Unicode block of which this character is a member, or
3016          *          {@code null} if the character is not a member of any
3017          *          Unicode block
3018          * @exception IllegalArgumentException if the specified
3019          * {@code codePoint} is an invalid Unicode code point.
3020          * @see Character#isValidCodePoint(int)
3021          * @since   1.5
3022          */
of(int codePoint)3023         public static UnicodeBlock of(int codePoint) {
3024             if (!isValidCodePoint(codePoint)) {
3025                 throw new IllegalArgumentException();
3026             }
3027 
3028             int top, bottom, current;
3029             bottom = 0;
3030             top = blockStarts.length;
3031             current = top/2;
3032 
3033             // invariant: top > current >= bottom && codePoint >= unicodeBlockStarts[bottom]
3034             while (top - bottom > 1) {
3035                 if (codePoint >= blockStarts[current]) {
3036                     bottom = current;
3037                 } else {
3038                     top = current;
3039                 }
3040                 current = (top + bottom) / 2;
3041             }
3042             return blocks[current];
3043         }
3044 
3045         /**
3046          * Returns the UnicodeBlock with the given name. Block
3047          * names are determined by The Unicode Standard. The file
3048          * Blocks-&lt;version&gt;.txt defines blocks for a particular
3049          * version of the standard. The {@link Character} class specifies
3050          * the version of the standard that it supports.
3051          * <p>
3052          * This method accepts block names in the following forms:
3053          * <ol>
3054          * <li> Canonical block names as defined by the Unicode Standard.
3055          * For example, the standard defines a "Basic Latin" block. Therefore, this
3056          * method accepts "Basic Latin" as a valid block name. The documentation of
3057          * each UnicodeBlock provides the canonical name.
3058          * <li>Canonical block names with all spaces removed. For example, "BasicLatin"
3059          * is a valid block name for the "Basic Latin" block.
3060          * <li>The text representation of each constant UnicodeBlock identifier.
3061          * For example, this method will return the {@link #BASIC_LATIN} block if
3062          * provided with the "BASIC_LATIN" name. This form replaces all spaces and
3063          * hyphens in the canonical name with underscores.
3064          * </ol>
3065          * Finally, character case is ignored for all of the valid block name forms.
3066          * For example, "BASIC_LATIN" and "basic_latin" are both valid block names.
3067          * The en_US locale's case mapping rules are used to provide case-insensitive
3068          * string comparisons for block name validation.
3069          * <p>
3070          * If the Unicode Standard changes block names, both the previous and
3071          * current names will be accepted.
3072          *
3073          * @param blockName A {@code UnicodeBlock} name.
3074          * @return The {@code UnicodeBlock} instance identified
3075          *         by {@code blockName}
3076          * @throws IllegalArgumentException if {@code blockName} is an
3077          *         invalid name
3078          * @throws NullPointerException if {@code blockName} is null
3079          * @since 1.5
3080          */
forName(String blockName)3081         public static final UnicodeBlock forName(String blockName) {
3082             UnicodeBlock block = map.get(blockName.toUpperCase(Locale.US));
3083             if (block == null) {
3084                 throw new IllegalArgumentException();
3085             }
3086             return block;
3087         }
3088     }
3089 
3090 
3091     /**
3092      * A family of character subsets representing the character scripts
3093      * defined in the <a href="http://www.unicode.org/reports/tr24/">
3094      * <i>Unicode Standard Annex #24: Script Names</i></a>. Every Unicode
3095      * character is assigned to a single Unicode script, either a specific
3096      * script, such as {@link Character.UnicodeScript#LATIN Latin}, or
3097      * one of the following three special values,
3098      * {@link Character.UnicodeScript#INHERITED Inherited},
3099      * {@link Character.UnicodeScript#COMMON Common} or
3100      * {@link Character.UnicodeScript#UNKNOWN Unknown}.
3101      *
3102      * @since 1.7
3103      */
3104     public static enum UnicodeScript {
3105         /**
3106          * Unicode script "Common".
3107          */
3108         COMMON,
3109 
3110         /**
3111          * Unicode script "Latin".
3112          */
3113         LATIN,
3114 
3115         /**
3116          * Unicode script "Greek".
3117          */
3118         GREEK,
3119 
3120         /**
3121          * Unicode script "Cyrillic".
3122          */
3123         CYRILLIC,
3124 
3125         /**
3126          * Unicode script "Armenian".
3127          */
3128         ARMENIAN,
3129 
3130         /**
3131          * Unicode script "Hebrew".
3132          */
3133         HEBREW,
3134 
3135         /**
3136          * Unicode script "Arabic".
3137          */
3138         ARABIC,
3139 
3140         /**
3141          * Unicode script "Syriac".
3142          */
3143         SYRIAC,
3144 
3145         /**
3146          * Unicode script "Thaana".
3147          */
3148         THAANA,
3149 
3150         /**
3151          * Unicode script "Devanagari".
3152          */
3153         DEVANAGARI,
3154 
3155         /**
3156          * Unicode script "Bengali".
3157          */
3158         BENGALI,
3159 
3160         /**
3161          * Unicode script "Gurmukhi".
3162          */
3163         GURMUKHI,
3164 
3165         /**
3166          * Unicode script "Gujarati".
3167          */
3168         GUJARATI,
3169 
3170         /**
3171          * Unicode script "Oriya".
3172          */
3173         ORIYA,
3174 
3175         /**
3176          * Unicode script "Tamil".
3177          */
3178         TAMIL,
3179 
3180         /**
3181          * Unicode script "Telugu".
3182          */
3183         TELUGU,
3184 
3185         /**
3186          * Unicode script "Kannada".
3187          */
3188         KANNADA,
3189 
3190         /**
3191          * Unicode script "Malayalam".
3192          */
3193         MALAYALAM,
3194 
3195         /**
3196          * Unicode script "Sinhala".
3197          */
3198         SINHALA,
3199 
3200         /**
3201          * Unicode script "Thai".
3202          */
3203         THAI,
3204 
3205         /**
3206          * Unicode script "Lao".
3207          */
3208         LAO,
3209 
3210         /**
3211          * Unicode script "Tibetan".
3212          */
3213         TIBETAN,
3214 
3215         /**
3216          * Unicode script "Myanmar".
3217          */
3218         MYANMAR,
3219 
3220         /**
3221          * Unicode script "Georgian".
3222          */
3223         GEORGIAN,
3224 
3225         /**
3226          * Unicode script "Hangul".
3227          */
3228         HANGUL,
3229 
3230         /**
3231          * Unicode script "Ethiopic".
3232          */
3233         ETHIOPIC,
3234 
3235         /**
3236          * Unicode script "Cherokee".
3237          */
3238         CHEROKEE,
3239 
3240         /**
3241          * Unicode script "Canadian_Aboriginal".
3242          */
3243         CANADIAN_ABORIGINAL,
3244 
3245         /**
3246          * Unicode script "Ogham".
3247          */
3248         OGHAM,
3249 
3250         /**
3251          * Unicode script "Runic".
3252          */
3253         RUNIC,
3254 
3255         /**
3256          * Unicode script "Khmer".
3257          */
3258         KHMER,
3259 
3260         /**
3261          * Unicode script "Mongolian".
3262          */
3263         MONGOLIAN,
3264 
3265         /**
3266          * Unicode script "Hiragana".
3267          */
3268         HIRAGANA,
3269 
3270         /**
3271          * Unicode script "Katakana".
3272          */
3273         KATAKANA,
3274 
3275         /**
3276          * Unicode script "Bopomofo".
3277          */
3278         BOPOMOFO,
3279 
3280         /**
3281          * Unicode script "Han".
3282          */
3283         HAN,
3284 
3285         /**
3286          * Unicode script "Yi".
3287          */
3288         YI,
3289 
3290         /**
3291          * Unicode script "Old_Italic".
3292          */
3293         OLD_ITALIC,
3294 
3295         /**
3296          * Unicode script "Gothic".
3297          */
3298         GOTHIC,
3299 
3300         /**
3301          * Unicode script "Deseret".
3302          */
3303         DESERET,
3304 
3305         /**
3306          * Unicode script "Inherited".
3307          */
3308         INHERITED,
3309 
3310         /**
3311          * Unicode script "Tagalog".
3312          */
3313         TAGALOG,
3314 
3315         /**
3316          * Unicode script "Hanunoo".
3317          */
3318         HANUNOO,
3319 
3320         /**
3321          * Unicode script "Buhid".
3322          */
3323         BUHID,
3324 
3325         /**
3326          * Unicode script "Tagbanwa".
3327          */
3328         TAGBANWA,
3329 
3330         /**
3331          * Unicode script "Limbu".
3332          */
3333         LIMBU,
3334 
3335         /**
3336          * Unicode script "Tai_Le".
3337          */
3338         TAI_LE,
3339 
3340         /**
3341          * Unicode script "Linear_B".
3342          */
3343         LINEAR_B,
3344 
3345         /**
3346          * Unicode script "Ugaritic".
3347          */
3348         UGARITIC,
3349 
3350         /**
3351          * Unicode script "Shavian".
3352          */
3353         SHAVIAN,
3354 
3355         /**
3356          * Unicode script "Osmanya".
3357          */
3358         OSMANYA,
3359 
3360         /**
3361          * Unicode script "Cypriot".
3362          */
3363         CYPRIOT,
3364 
3365         /**
3366          * Unicode script "Braille".
3367          */
3368         BRAILLE,
3369 
3370         /**
3371          * Unicode script "Buginese".
3372          */
3373         BUGINESE,
3374 
3375         /**
3376          * Unicode script "Coptic".
3377          */
3378         COPTIC,
3379 
3380         /**
3381          * Unicode script "New_Tai_Lue".
3382          */
3383         NEW_TAI_LUE,
3384 
3385         /**
3386          * Unicode script "Glagolitic".
3387          */
3388         GLAGOLITIC,
3389 
3390         /**
3391          * Unicode script "Tifinagh".
3392          */
3393         TIFINAGH,
3394 
3395         /**
3396          * Unicode script "Syloti_Nagri".
3397          */
3398         SYLOTI_NAGRI,
3399 
3400         /**
3401          * Unicode script "Old_Persian".
3402          */
3403         OLD_PERSIAN,
3404 
3405         /**
3406          * Unicode script "Kharoshthi".
3407          */
3408         KHAROSHTHI,
3409 
3410         /**
3411          * Unicode script "Balinese".
3412          */
3413         BALINESE,
3414 
3415         /**
3416          * Unicode script "Cuneiform".
3417          */
3418         CUNEIFORM,
3419 
3420         /**
3421          * Unicode script "Phoenician".
3422          */
3423         PHOENICIAN,
3424 
3425         /**
3426          * Unicode script "Phags_Pa".
3427          */
3428         PHAGS_PA,
3429 
3430         /**
3431          * Unicode script "Nko".
3432          */
3433         NKO,
3434 
3435         /**
3436          * Unicode script "Sundanese".
3437          */
3438         SUNDANESE,
3439 
3440         /**
3441          * Unicode script "Batak".
3442          */
3443         BATAK,
3444 
3445         /**
3446          * Unicode script "Lepcha".
3447          */
3448         LEPCHA,
3449 
3450         /**
3451          * Unicode script "Ol_Chiki".
3452          */
3453         OL_CHIKI,
3454 
3455         /**
3456          * Unicode script "Vai".
3457          */
3458         VAI,
3459 
3460         /**
3461          * Unicode script "Saurashtra".
3462          */
3463         SAURASHTRA,
3464 
3465         /**
3466          * Unicode script "Kayah_Li".
3467          */
3468         KAYAH_LI,
3469 
3470         /**
3471          * Unicode script "Rejang".
3472          */
3473         REJANG,
3474 
3475         /**
3476          * Unicode script "Lycian".
3477          */
3478         LYCIAN,
3479 
3480         /**
3481          * Unicode script "Carian".
3482          */
3483         CARIAN,
3484 
3485         /**
3486          * Unicode script "Lydian".
3487          */
3488         LYDIAN,
3489 
3490         /**
3491          * Unicode script "Cham".
3492          */
3493         CHAM,
3494 
3495         /**
3496          * Unicode script "Tai_Tham".
3497          */
3498         TAI_THAM,
3499 
3500         /**
3501          * Unicode script "Tai_Viet".
3502          */
3503         TAI_VIET,
3504 
3505         /**
3506          * Unicode script "Avestan".
3507          */
3508         AVESTAN,
3509 
3510         /**
3511          * Unicode script "Egyptian_Hieroglyphs".
3512          */
3513         EGYPTIAN_HIEROGLYPHS,
3514 
3515         /**
3516          * Unicode script "Samaritan".
3517          */
3518         SAMARITAN,
3519 
3520         /**
3521          * Unicode script "Mandaic".
3522          */
3523         MANDAIC,
3524 
3525         /**
3526          * Unicode script "Lisu".
3527          */
3528         LISU,
3529 
3530         /**
3531          * Unicode script "Bamum".
3532          */
3533         BAMUM,
3534 
3535         /**
3536          * Unicode script "Javanese".
3537          */
3538         JAVANESE,
3539 
3540         /**
3541          * Unicode script "Meetei_Mayek".
3542          */
3543         MEETEI_MAYEK,
3544 
3545         /**
3546          * Unicode script "Imperial_Aramaic".
3547          */
3548         IMPERIAL_ARAMAIC,
3549 
3550         /**
3551          * Unicode script "Old_South_Arabian".
3552          */
3553         OLD_SOUTH_ARABIAN,
3554 
3555         /**
3556          * Unicode script "Inscriptional_Parthian".
3557          */
3558         INSCRIPTIONAL_PARTHIAN,
3559 
3560         /**
3561          * Unicode script "Inscriptional_Pahlavi".
3562          */
3563         INSCRIPTIONAL_PAHLAVI,
3564 
3565         /**
3566          * Unicode script "Old_Turkic".
3567          */
3568         OLD_TURKIC,
3569 
3570         /**
3571          * Unicode script "Brahmi".
3572          */
3573         BRAHMI,
3574 
3575         /**
3576          * Unicode script "Kaithi".
3577          */
3578         KAITHI,
3579 
3580         /**
3581          * Unicode script "Unknown".
3582          */
3583         UNKNOWN;
3584 
3585         private static final int[] scriptStarts = {
3586             0x0000,   // 0000..0040; COMMON
3587             0x0041,   // 0041..005A; LATIN
3588             0x005B,   // 005B..0060; COMMON
3589             0x0061,   // 0061..007A; LATIN
3590             0x007B,   // 007B..00A9; COMMON
3591             0x00AA,   // 00AA..00AA; LATIN
3592             0x00AB,   // 00AB..00B9; COMMON
3593             0x00BA,   // 00BA..00BA; LATIN
3594             0x00BB,   // 00BB..00BF; COMMON
3595             0x00C0,   // 00C0..00D6; LATIN
3596             0x00D7,   // 00D7..00D7; COMMON
3597             0x00D8,   // 00D8..00F6; LATIN
3598             0x00F7,   // 00F7..00F7; COMMON
3599             0x00F8,   // 00F8..02B8; LATIN
3600             0x02B9,   // 02B9..02DF; COMMON
3601             0x02E0,   // 02E0..02E4; LATIN
3602             0x02E5,   // 02E5..02E9; COMMON
3603             0x02EA,   // 02EA..02EB; BOPOMOFO
3604             0x02EC,   // 02EC..02FF; COMMON
3605             0x0300,   // 0300..036F; INHERITED
3606             0x0370,   // 0370..0373; GREEK
3607             0x0374,   // 0374..0374; COMMON
3608             0x0375,   // 0375..037D; GREEK
3609             0x037E,   // 037E..0383; COMMON
3610             0x0384,   // 0384..0384; GREEK
3611             0x0385,   // 0385..0385; COMMON
3612             0x0386,   // 0386..0386; GREEK
3613             0x0387,   // 0387..0387; COMMON
3614             0x0388,   // 0388..03E1; GREEK
3615             0x03E2,   // 03E2..03EF; COPTIC
3616             0x03F0,   // 03F0..03FF; GREEK
3617             0x0400,   // 0400..0484; CYRILLIC
3618             0x0485,   // 0485..0486; INHERITED
3619             0x0487,   // 0487..0530; CYRILLIC
3620             0x0531,   // 0531..0588; ARMENIAN
3621             0x0589,   // 0589..0589; COMMON
3622             0x058A,   // 058A..0590; ARMENIAN
3623             0x0591,   // 0591..05FF; HEBREW
3624             0x0600,   // 0600..060B; ARABIC
3625             0x060C,   // 060C..060C; COMMON
3626             0x060D,   // 060D..061A; ARABIC
3627             0x061B,   // 061B..061D; COMMON
3628             0x061E,   // 061E..061E; ARABIC
3629             0x061F,   // 061F..061F; COMMON
3630             0x0620,   // 0620..063F; ARABIC
3631             0x0640,   // 0640..0640; COMMON
3632             0x0641,   // 0641..064A; ARABIC
3633             0x064B,   // 064B..0655; INHERITED
3634             0x0656,   // 0656..065E; ARABIC
3635             0x065F,   // 065F..065F; INHERITED
3636             0x0660,   // 0660..0669; COMMON
3637             0x066A,   // 066A..066F; ARABIC
3638             0x0670,   // 0670..0670; INHERITED
3639             0x0671,   // 0671..06DC; ARABIC
3640             0x06DD,   // 06DD..06DD; COMMON
3641             0x06DE,   // 06DE..06FF; ARABIC
3642             0x0700,   // 0700..074F; SYRIAC
3643             0x0750,   // 0750..077F; ARABIC
3644             0x0780,   // 0780..07BF; THAANA
3645             0x07C0,   // 07C0..07FF; NKO
3646             0x0800,   // 0800..083F; SAMARITAN
3647             0x0840,   // 0840..08FF; MANDAIC
3648             0x0900,   // 0900..0950; DEVANAGARI
3649             0x0951,   // 0951..0952; INHERITED
3650             0x0953,   // 0953..0963; DEVANAGARI
3651             0x0964,   // 0964..0965; COMMON
3652             0x0966,   // 0966..096F; DEVANAGARI
3653             0x0970,   // 0970..0970; COMMON
3654             0x0971,   // 0971..0980; DEVANAGARI
3655             0x0981,   // 0981..0A00; BENGALI
3656             0x0A01,   // 0A01..0A80; GURMUKHI
3657             0x0A81,   // 0A81..0B00; GUJARATI
3658             0x0B01,   // 0B01..0B81; ORIYA
3659             0x0B82,   // 0B82..0C00; TAMIL
3660             0x0C01,   // 0C01..0C81; TELUGU
3661             0x0C82,   // 0C82..0CF0; KANNADA
3662             0x0D02,   // 0D02..0D81; MALAYALAM
3663             0x0D82,   // 0D82..0E00; SINHALA
3664             0x0E01,   // 0E01..0E3E; THAI
3665             0x0E3F,   // 0E3F..0E3F; COMMON
3666             0x0E40,   // 0E40..0E80; THAI
3667             0x0E81,   // 0E81..0EFF; LAO
3668             0x0F00,   // 0F00..0FD4; TIBETAN
3669             0x0FD5,   // 0FD5..0FD8; COMMON
3670             0x0FD9,   // 0FD9..0FFF; TIBETAN
3671             0x1000,   // 1000..109F; MYANMAR
3672             0x10A0,   // 10A0..10FA; GEORGIAN
3673             0x10FB,   // 10FB..10FB; COMMON
3674             0x10FC,   // 10FC..10FF; GEORGIAN
3675             0x1100,   // 1100..11FF; HANGUL
3676             0x1200,   // 1200..139F; ETHIOPIC
3677             0x13A0,   // 13A0..13FF; CHEROKEE
3678             0x1400,   // 1400..167F; CANADIAN_ABORIGINAL
3679             0x1680,   // 1680..169F; OGHAM
3680             0x16A0,   // 16A0..16EA; RUNIC
3681             0x16EB,   // 16EB..16ED; COMMON
3682             0x16EE,   // 16EE..16FF; RUNIC
3683             0x1700,   // 1700..171F; TAGALOG
3684             0x1720,   // 1720..1734; HANUNOO
3685             0x1735,   // 1735..173F; COMMON
3686             0x1740,   // 1740..175F; BUHID
3687             0x1760,   // 1760..177F; TAGBANWA
3688             0x1780,   // 1780..17FF; KHMER
3689             0x1800,   // 1800..1801; MONGOLIAN
3690             0x1802,   // 1802..1803; COMMON
3691             0x1804,   // 1804..1804; MONGOLIAN
3692             0x1805,   // 1805..1805; COMMON
3693             0x1806,   // 1806..18AF; MONGOLIAN
3694             0x18B0,   // 18B0..18FF; CANADIAN_ABORIGINAL
3695             0x1900,   // 1900..194F; LIMBU
3696             0x1950,   // 1950..197F; TAI_LE
3697             0x1980,   // 1980..19DF; NEW_TAI_LUE
3698             0x19E0,   // 19E0..19FF; KHMER
3699             0x1A00,   // 1A00..1A1F; BUGINESE
3700             0x1A20,   // 1A20..1AFF; TAI_THAM
3701             0x1B00,   // 1B00..1B7F; BALINESE
3702             0x1B80,   // 1B80..1BBF; SUNDANESE
3703             0x1BC0,   // 1BC0..1BFF; BATAK
3704             0x1C00,   // 1C00..1C4F; LEPCHA
3705             0x1C50,   // 1C50..1CCF; OL_CHIKI
3706             0x1CD0,   // 1CD0..1CD2; INHERITED
3707             0x1CD3,   // 1CD3..1CD3; COMMON
3708             0x1CD4,   // 1CD4..1CE0; INHERITED
3709             0x1CE1,   // 1CE1..1CE1; COMMON
3710             0x1CE2,   // 1CE2..1CE8; INHERITED
3711             0x1CE9,   // 1CE9..1CEC; COMMON
3712             0x1CED,   // 1CED..1CED; INHERITED
3713             0x1CEE,   // 1CEE..1CFF; COMMON
3714             0x1D00,   // 1D00..1D25; LATIN
3715             0x1D26,   // 1D26..1D2A; GREEK
3716             0x1D2B,   // 1D2B..1D2B; CYRILLIC
3717             0x1D2C,   // 1D2C..1D5C; LATIN
3718             0x1D5D,   // 1D5D..1D61; GREEK
3719             0x1D62,   // 1D62..1D65; LATIN
3720             0x1D66,   // 1D66..1D6A; GREEK
3721             0x1D6B,   // 1D6B..1D77; LATIN
3722             0x1D78,   // 1D78..1D78; CYRILLIC
3723             0x1D79,   // 1D79..1DBE; LATIN
3724             0x1DBF,   // 1DBF..1DBF; GREEK
3725             0x1DC0,   // 1DC0..1DFF; INHERITED
3726             0x1E00,   // 1E00..1EFF; LATIN
3727             0x1F00,   // 1F00..1FFF; GREEK
3728             0x2000,   // 2000..200B; COMMON
3729             0x200C,   // 200C..200D; INHERITED
3730             0x200E,   // 200E..2070; COMMON
3731             0x2071,   // 2071..2073; LATIN
3732             0x2074,   // 2074..207E; COMMON
3733             0x207F,   // 207F..207F; LATIN
3734             0x2080,   // 2080..208F; COMMON
3735             0x2090,   // 2090..209F; LATIN
3736             0x20A0,   // 20A0..20CF; COMMON
3737             0x20D0,   // 20D0..20FF; INHERITED
3738             0x2100,   // 2100..2125; COMMON
3739             0x2126,   // 2126..2126; GREEK
3740             0x2127,   // 2127..2129; COMMON
3741             0x212A,   // 212A..212B; LATIN
3742             0x212C,   // 212C..2131; COMMON
3743             0x2132,   // 2132..2132; LATIN
3744             0x2133,   // 2133..214D; COMMON
3745             0x214E,   // 214E..214E; LATIN
3746             0x214F,   // 214F..215F; COMMON
3747             0x2160,   // 2160..2188; LATIN
3748             0x2189,   // 2189..27FF; COMMON
3749             0x2800,   // 2800..28FF; BRAILLE
3750             0x2900,   // 2900..2BFF; COMMON
3751             0x2C00,   // 2C00..2C5F; GLAGOLITIC
3752             0x2C60,   // 2C60..2C7F; LATIN
3753             0x2C80,   // 2C80..2CFF; COPTIC
3754             0x2D00,   // 2D00..2D2F; GEORGIAN
3755             0x2D30,   // 2D30..2D7F; TIFINAGH
3756             0x2D80,   // 2D80..2DDF; ETHIOPIC
3757             0x2DE0,   // 2DE0..2DFF; CYRILLIC
3758             0x2E00,   // 2E00..2E7F; COMMON
3759             0x2E80,   // 2E80..2FEF; HAN
3760             0x2FF0,   // 2FF0..3004; COMMON
3761             0x3005,   // 3005..3005; HAN
3762             0x3006,   // 3006..3006; COMMON
3763             0x3007,   // 3007..3007; HAN
3764             0x3008,   // 3008..3020; COMMON
3765             0x3021,   // 3021..3029; HAN
3766             0x302A,   // 302A..302D; INHERITED
3767             0x302E,   // 302E..302F; HANGUL
3768             0x3030,   // 3030..3037; COMMON
3769             0x3038,   // 3038..303B; HAN
3770             0x303C,   // 303C..3040; COMMON
3771             0x3041,   // 3041..3098; HIRAGANA
3772             0x3099,   // 3099..309A; INHERITED
3773             0x309B,   // 309B..309C; COMMON
3774             0x309D,   // 309D..309F; HIRAGANA
3775             0x30A0,   // 30A0..30A0; COMMON
3776             0x30A1,   // 30A1..30FA; KATAKANA
3777             0x30FB,   // 30FB..30FC; COMMON
3778             0x30FD,   // 30FD..3104; KATAKANA
3779             0x3105,   // 3105..3130; BOPOMOFO
3780             0x3131,   // 3131..318F; HANGUL
3781             0x3190,   // 3190..319F; COMMON
3782             0x31A0,   // 31A0..31BF; BOPOMOFO
3783             0x31C0,   // 31C0..31EF; COMMON
3784             0x31F0,   // 31F0..31FF; KATAKANA
3785             0x3200,   // 3200..321F; HANGUL
3786             0x3220,   // 3220..325F; COMMON
3787             0x3260,   // 3260..327E; HANGUL
3788             0x327F,   // 327F..32CF; COMMON
3789             0x32D0,   // 32D0..3357; KATAKANA
3790             0x3358,   // 3358..33FF; COMMON
3791             0x3400,   // 3400..4DBF; HAN
3792             0x4DC0,   // 4DC0..4DFF; COMMON
3793             0x4E00,   // 4E00..9FFF; HAN
3794             0xA000,   // A000..A4CF; YI
3795             0xA4D0,   // A4D0..A4FF; LISU
3796             0xA500,   // A500..A63F; VAI
3797             0xA640,   // A640..A69F; CYRILLIC
3798             0xA6A0,   // A6A0..A6FF; BAMUM
3799             0xA700,   // A700..A721; COMMON
3800             0xA722,   // A722..A787; LATIN
3801             0xA788,   // A788..A78A; COMMON
3802             0xA78B,   // A78B..A7FF; LATIN
3803             0xA800,   // A800..A82F; SYLOTI_NAGRI
3804             0xA830,   // A830..A83F; COMMON
3805             0xA840,   // A840..A87F; PHAGS_PA
3806             0xA880,   // A880..A8DF; SAURASHTRA
3807             0xA8E0,   // A8E0..A8FF; DEVANAGARI
3808             0xA900,   // A900..A92F; KAYAH_LI
3809             0xA930,   // A930..A95F; REJANG
3810             0xA960,   // A960..A97F; HANGUL
3811             0xA980,   // A980..A9FF; JAVANESE
3812             0xAA00,   // AA00..AA5F; CHAM
3813             0xAA60,   // AA60..AA7F; MYANMAR
3814             0xAA80,   // AA80..AB00; TAI_VIET
3815             0xAB01,   // AB01..ABBF; ETHIOPIC
3816             0xABC0,   // ABC0..ABFF; MEETEI_MAYEK
3817             0xAC00,   // AC00..D7FB; HANGUL
3818             0xD7FC,   // D7FC..F8FF; UNKNOWN
3819             0xF900,   // F900..FAFF; HAN
3820             0xFB00,   // FB00..FB12; LATIN
3821             0xFB13,   // FB13..FB1C; ARMENIAN
3822             0xFB1D,   // FB1D..FB4F; HEBREW
3823             0xFB50,   // FB50..FD3D; ARABIC
3824             0xFD3E,   // FD3E..FD4F; COMMON
3825             0xFD50,   // FD50..FDFC; ARABIC
3826             0xFDFD,   // FDFD..FDFF; COMMON
3827             0xFE00,   // FE00..FE0F; INHERITED
3828             0xFE10,   // FE10..FE1F; COMMON
3829             0xFE20,   // FE20..FE2F; INHERITED
3830             0xFE30,   // FE30..FE6F; COMMON
3831             0xFE70,   // FE70..FEFE; ARABIC
3832             0xFEFF,   // FEFF..FF20; COMMON
3833             0xFF21,   // FF21..FF3A; LATIN
3834             0xFF3B,   // FF3B..FF40; COMMON
3835             0xFF41,   // FF41..FF5A; LATIN
3836             0xFF5B,   // FF5B..FF65; COMMON
3837             0xFF66,   // FF66..FF6F; KATAKANA
3838             0xFF70,   // FF70..FF70; COMMON
3839             0xFF71,   // FF71..FF9D; KATAKANA
3840             0xFF9E,   // FF9E..FF9F; COMMON
3841             0xFFA0,   // FFA0..FFDF; HANGUL
3842             0xFFE0,   // FFE0..FFFF; COMMON
3843             0x10000,  // 10000..100FF; LINEAR_B
3844             0x10100,  // 10100..1013F; COMMON
3845             0x10140,  // 10140..1018F; GREEK
3846             0x10190,  // 10190..101FC; COMMON
3847             0x101FD,  // 101FD..1027F; INHERITED
3848             0x10280,  // 10280..1029F; LYCIAN
3849             0x102A0,  // 102A0..102FF; CARIAN
3850             0x10300,  // 10300..1032F; OLD_ITALIC
3851             0x10330,  // 10330..1037F; GOTHIC
3852             0x10380,  // 10380..1039F; UGARITIC
3853             0x103A0,  // 103A0..103FF; OLD_PERSIAN
3854             0x10400,  // 10400..1044F; DESERET
3855             0x10450,  // 10450..1047F; SHAVIAN
3856             0x10480,  // 10480..107FF; OSMANYA
3857             0x10800,  // 10800..1083F; CYPRIOT
3858             0x10840,  // 10840..108FF; IMPERIAL_ARAMAIC
3859             0x10900,  // 10900..1091F; PHOENICIAN
3860             0x10920,  // 10920..109FF; LYDIAN
3861             0x10A00,  // 10A00..10A5F; KHAROSHTHI
3862             0x10A60,  // 10A60..10AFF; OLD_SOUTH_ARABIAN
3863             0x10B00,  // 10B00..10B3F; AVESTAN
3864             0x10B40,  // 10B40..10B5F; INSCRIPTIONAL_PARTHIAN
3865             0x10B60,  // 10B60..10BFF; INSCRIPTIONAL_PAHLAVI
3866             0x10C00,  // 10C00..10E5F; OLD_TURKIC
3867             0x10E60,  // 10E60..10FFF; ARABIC
3868             0x11000,  // 11000..1107F; BRAHMI
3869             0x11080,  // 11080..11FFF; KAITHI
3870             0x12000,  // 12000..12FFF; CUNEIFORM
3871             0x13000,  // 13000..167FF; EGYPTIAN_HIEROGLYPHS
3872             0x16800,  // 16800..16A38; BAMUM
3873             0x1B000,  // 1B000..1B000; KATAKANA
3874             0x1B001,  // 1B001..1CFFF; HIRAGANA
3875             0x1D000,  // 1D000..1D166; COMMON
3876             0x1D167,  // 1D167..1D169; INHERITED
3877             0x1D16A,  // 1D16A..1D17A; COMMON
3878             0x1D17B,  // 1D17B..1D182; INHERITED
3879             0x1D183,  // 1D183..1D184; COMMON
3880             0x1D185,  // 1D185..1D18B; INHERITED
3881             0x1D18C,  // 1D18C..1D1A9; COMMON
3882             0x1D1AA,  // 1D1AA..1D1AD; INHERITED
3883             0x1D1AE,  // 1D1AE..1D1FF; COMMON
3884             0x1D200,  // 1D200..1D2FF; GREEK
3885             0x1D300,  // 1D300..1F1FF; COMMON
3886             0x1F200,  // 1F200..1F200; HIRAGANA
3887             0x1F201,  // 1F210..1FFFF; COMMON
3888             0x20000,  // 20000..E0000; HAN
3889             0xE0001,  // E0001..E00FF; COMMON
3890             0xE0100,  // E0100..E01EF; INHERITED
3891             0xE01F0   // E01F0..10FFFF; UNKNOWN
3892 
3893         };
3894 
3895         private static final UnicodeScript[] scripts = {
3896             COMMON,
3897             LATIN,
3898             COMMON,
3899             LATIN,
3900             COMMON,
3901             LATIN,
3902             COMMON,
3903             LATIN,
3904             COMMON,
3905             LATIN,
3906             COMMON,
3907             LATIN,
3908             COMMON,
3909             LATIN,
3910             COMMON,
3911             LATIN,
3912             COMMON,
3913             BOPOMOFO,
3914             COMMON,
3915             INHERITED,
3916             GREEK,
3917             COMMON,
3918             GREEK,
3919             COMMON,
3920             GREEK,
3921             COMMON,
3922             GREEK,
3923             COMMON,
3924             GREEK,
3925             COPTIC,
3926             GREEK,
3927             CYRILLIC,
3928             INHERITED,
3929             CYRILLIC,
3930             ARMENIAN,
3931             COMMON,
3932             ARMENIAN,
3933             HEBREW,
3934             ARABIC,
3935             COMMON,
3936             ARABIC,
3937             COMMON,
3938             ARABIC,
3939             COMMON,
3940             ARABIC,
3941             COMMON,
3942             ARABIC,
3943             INHERITED,
3944             ARABIC,
3945             INHERITED,
3946             COMMON,
3947             ARABIC,
3948             INHERITED,
3949             ARABIC,
3950             COMMON,
3951             ARABIC,
3952             SYRIAC,
3953             ARABIC,
3954             THAANA,
3955             NKO,
3956             SAMARITAN,
3957             MANDAIC,
3958             DEVANAGARI,
3959             INHERITED,
3960             DEVANAGARI,
3961             COMMON,
3962             DEVANAGARI,
3963             COMMON,
3964             DEVANAGARI,
3965             BENGALI,
3966             GURMUKHI,
3967             GUJARATI,
3968             ORIYA,
3969             TAMIL,
3970             TELUGU,
3971             KANNADA,
3972             MALAYALAM,
3973             SINHALA,
3974             THAI,
3975             COMMON,
3976             THAI,
3977             LAO,
3978             TIBETAN,
3979             COMMON,
3980             TIBETAN,
3981             MYANMAR,
3982             GEORGIAN,
3983             COMMON,
3984             GEORGIAN,
3985             HANGUL,
3986             ETHIOPIC,
3987             CHEROKEE,
3988             CANADIAN_ABORIGINAL,
3989             OGHAM,
3990             RUNIC,
3991             COMMON,
3992             RUNIC,
3993             TAGALOG,
3994             HANUNOO,
3995             COMMON,
3996             BUHID,
3997             TAGBANWA,
3998             KHMER,
3999             MONGOLIAN,
4000             COMMON,
4001             MONGOLIAN,
4002             COMMON,
4003             MONGOLIAN,
4004             CANADIAN_ABORIGINAL,
4005             LIMBU,
4006             TAI_LE,
4007             NEW_TAI_LUE,
4008             KHMER,
4009             BUGINESE,
4010             TAI_THAM,
4011             BALINESE,
4012             SUNDANESE,
4013             BATAK,
4014             LEPCHA,
4015             OL_CHIKI,
4016             INHERITED,
4017             COMMON,
4018             INHERITED,
4019             COMMON,
4020             INHERITED,
4021             COMMON,
4022             INHERITED,
4023             COMMON,
4024             LATIN,
4025             GREEK,
4026             CYRILLIC,
4027             LATIN,
4028             GREEK,
4029             LATIN,
4030             GREEK,
4031             LATIN,
4032             CYRILLIC,
4033             LATIN,
4034             GREEK,
4035             INHERITED,
4036             LATIN,
4037             GREEK,
4038             COMMON,
4039             INHERITED,
4040             COMMON,
4041             LATIN,
4042             COMMON,
4043             LATIN,
4044             COMMON,
4045             LATIN,
4046             COMMON,
4047             INHERITED,
4048             COMMON,
4049             GREEK,
4050             COMMON,
4051             LATIN,
4052             COMMON,
4053             LATIN,
4054             COMMON,
4055             LATIN,
4056             COMMON,
4057             LATIN,
4058             COMMON,
4059             BRAILLE,
4060             COMMON,
4061             GLAGOLITIC,
4062             LATIN,
4063             COPTIC,
4064             GEORGIAN,
4065             TIFINAGH,
4066             ETHIOPIC,
4067             CYRILLIC,
4068             COMMON,
4069             HAN,
4070             COMMON,
4071             HAN,
4072             COMMON,
4073             HAN,
4074             COMMON,
4075             HAN,
4076             INHERITED,
4077             HANGUL,
4078             COMMON,
4079             HAN,
4080             COMMON,
4081             HIRAGANA,
4082             INHERITED,
4083             COMMON,
4084             HIRAGANA,
4085             COMMON,
4086             KATAKANA,
4087             COMMON,
4088             KATAKANA,
4089             BOPOMOFO,
4090             HANGUL,
4091             COMMON,
4092             BOPOMOFO,
4093             COMMON,
4094             KATAKANA,
4095             HANGUL,
4096             COMMON,
4097             HANGUL,
4098             COMMON,
4099             KATAKANA,
4100             COMMON,
4101             HAN,
4102             COMMON,
4103             HAN,
4104             YI,
4105             LISU,
4106             VAI,
4107             CYRILLIC,
4108             BAMUM,
4109             COMMON,
4110             LATIN,
4111             COMMON,
4112             LATIN,
4113             SYLOTI_NAGRI,
4114             COMMON,
4115             PHAGS_PA,
4116             SAURASHTRA,
4117             DEVANAGARI,
4118             KAYAH_LI,
4119             REJANG,
4120             HANGUL,
4121             JAVANESE,
4122             CHAM,
4123             MYANMAR,
4124             TAI_VIET,
4125             ETHIOPIC,
4126             MEETEI_MAYEK,
4127             HANGUL,
4128             UNKNOWN,
4129             HAN,
4130             LATIN,
4131             ARMENIAN,
4132             HEBREW,
4133             ARABIC,
4134             COMMON,
4135             ARABIC,
4136             COMMON,
4137             INHERITED,
4138             COMMON,
4139             INHERITED,
4140             COMMON,
4141             ARABIC,
4142             COMMON,
4143             LATIN,
4144             COMMON,
4145             LATIN,
4146             COMMON,
4147             KATAKANA,
4148             COMMON,
4149             KATAKANA,
4150             COMMON,
4151             HANGUL,
4152             COMMON,
4153             LINEAR_B,
4154             COMMON,
4155             GREEK,
4156             COMMON,
4157             INHERITED,
4158             LYCIAN,
4159             CARIAN,
4160             OLD_ITALIC,
4161             GOTHIC,
4162             UGARITIC,
4163             OLD_PERSIAN,
4164             DESERET,
4165             SHAVIAN,
4166             OSMANYA,
4167             CYPRIOT,
4168             IMPERIAL_ARAMAIC,
4169             PHOENICIAN,
4170             LYDIAN,
4171             KHAROSHTHI,
4172             OLD_SOUTH_ARABIAN,
4173             AVESTAN,
4174             INSCRIPTIONAL_PARTHIAN,
4175             INSCRIPTIONAL_PAHLAVI,
4176             OLD_TURKIC,
4177             ARABIC,
4178             BRAHMI,
4179             KAITHI,
4180             CUNEIFORM,
4181             EGYPTIAN_HIEROGLYPHS,
4182             BAMUM,
4183             KATAKANA,
4184             HIRAGANA,
4185             COMMON,
4186             INHERITED,
4187             COMMON,
4188             INHERITED,
4189             COMMON,
4190             INHERITED,
4191             COMMON,
4192             INHERITED,
4193             COMMON,
4194             GREEK,
4195             COMMON,
4196             HIRAGANA,
4197             COMMON,
4198             HAN,
4199             COMMON,
4200             INHERITED,
4201             UNKNOWN
4202         };
4203 
4204         private static HashMap<String, Character.UnicodeScript> aliases;
4205         static {
4206             aliases = new HashMap<>(128);
4207             aliases.put("ARAB", ARABIC);
4208             aliases.put("ARMI", IMPERIAL_ARAMAIC);
4209             aliases.put("ARMN", ARMENIAN);
4210             aliases.put("AVST", AVESTAN);
4211             aliases.put("BALI", BALINESE);
4212             aliases.put("BAMU", BAMUM);
4213             aliases.put("BATK", BATAK);
4214             aliases.put("BENG", BENGALI);
4215             aliases.put("BOPO", BOPOMOFO);
4216             aliases.put("BRAI", BRAILLE);
4217             aliases.put("BRAH", BRAHMI);
4218             aliases.put("BUGI", BUGINESE);
4219             aliases.put("BUHD", BUHID);
4220             aliases.put("CANS", CANADIAN_ABORIGINAL);
4221             aliases.put("CARI", CARIAN);
4222             aliases.put("CHAM", CHAM);
4223             aliases.put("CHER", CHEROKEE);
4224             aliases.put("COPT", COPTIC);
4225             aliases.put("CPRT", CYPRIOT);
4226             aliases.put("CYRL", CYRILLIC);
4227             aliases.put("DEVA", DEVANAGARI);
4228             aliases.put("DSRT", DESERET);
4229             aliases.put("EGYP", EGYPTIAN_HIEROGLYPHS);
4230             aliases.put("ETHI", ETHIOPIC);
4231             aliases.put("GEOR", GEORGIAN);
4232             aliases.put("GLAG", GLAGOLITIC);
4233             aliases.put("GOTH", GOTHIC);
4234             aliases.put("GREK", GREEK);
4235             aliases.put("GUJR", GUJARATI);
4236             aliases.put("GURU", GURMUKHI);
4237             aliases.put("HANG", HANGUL);
4238             aliases.put("HANI", HAN);
4239             aliases.put("HANO", HANUNOO);
4240             aliases.put("HEBR", HEBREW);
4241             aliases.put("HIRA", HIRAGANA);
4242             // it appears we don't have the KATAKANA_OR_HIRAGANA
4243             //aliases.put("HRKT", KATAKANA_OR_HIRAGANA);
4244             aliases.put("ITAL", OLD_ITALIC);
4245             aliases.put("JAVA", JAVANESE);
4246             aliases.put("KALI", KAYAH_LI);
4247             aliases.put("KANA", KATAKANA);
4248             aliases.put("KHAR", KHAROSHTHI);
4249             aliases.put("KHMR", KHMER);
4250             aliases.put("KNDA", KANNADA);
4251             aliases.put("KTHI", KAITHI);
4252             aliases.put("LANA", TAI_THAM);
4253             aliases.put("LAOO", LAO);
4254             aliases.put("LATN", LATIN);
4255             aliases.put("LEPC", LEPCHA);
4256             aliases.put("LIMB", LIMBU);
4257             aliases.put("LINB", LINEAR_B);
4258             aliases.put("LISU", LISU);
4259             aliases.put("LYCI", LYCIAN);
4260             aliases.put("LYDI", LYDIAN);
4261             aliases.put("MAND", MANDAIC);
4262             aliases.put("MLYM", MALAYALAM);
4263             aliases.put("MONG", MONGOLIAN);
4264             aliases.put("MTEI", MEETEI_MAYEK);
4265             aliases.put("MYMR", MYANMAR);
4266             aliases.put("NKOO", NKO);
4267             aliases.put("OGAM", OGHAM);
4268             aliases.put("OLCK", OL_CHIKI);
4269             aliases.put("ORKH", OLD_TURKIC);
4270             aliases.put("ORYA", ORIYA);
4271             aliases.put("OSMA", OSMANYA);
4272             aliases.put("PHAG", PHAGS_PA);
4273             aliases.put("PHLI", INSCRIPTIONAL_PAHLAVI);
4274             aliases.put("PHNX", PHOENICIAN);
4275             aliases.put("PRTI", INSCRIPTIONAL_PARTHIAN);
4276             aliases.put("RJNG", REJANG);
4277             aliases.put("RUNR", RUNIC);
4278             aliases.put("SAMR", SAMARITAN);
4279             aliases.put("SARB", OLD_SOUTH_ARABIAN);
4280             aliases.put("SAUR", SAURASHTRA);
4281             aliases.put("SHAW", SHAVIAN);
4282             aliases.put("SINH", SINHALA);
4283             aliases.put("SUND", SUNDANESE);
4284             aliases.put("SYLO", SYLOTI_NAGRI);
4285             aliases.put("SYRC", SYRIAC);
4286             aliases.put("TAGB", TAGBANWA);
4287             aliases.put("TALE", TAI_LE);
4288             aliases.put("TALU", NEW_TAI_LUE);
4289             aliases.put("TAML", TAMIL);
4290             aliases.put("TAVT", TAI_VIET);
4291             aliases.put("TELU", TELUGU);
4292             aliases.put("TFNG", TIFINAGH);
4293             aliases.put("TGLG", TAGALOG);
4294             aliases.put("THAA", THAANA);
4295             aliases.put("THAI", THAI);
4296             aliases.put("TIBT", TIBETAN);
4297             aliases.put("UGAR", UGARITIC);
4298             aliases.put("VAII", VAI);
4299             aliases.put("XPEO", OLD_PERSIAN);
4300             aliases.put("XSUX", CUNEIFORM);
4301             aliases.put("YIII", YI);
4302             aliases.put("ZINH", INHERITED);
4303             aliases.put("ZYYY", COMMON);
4304             aliases.put("ZZZZ", UNKNOWN);
4305         }
4306 
4307         /**
4308          * Returns the enum constant representing the Unicode script of which
4309          * the given character (Unicode code point) is assigned to.
4310          *
4311          * @param   codePoint the character (Unicode code point) in question.
4312          * @return  The {@code UnicodeScript} constant representing the
4313          *          Unicode script of which this character is assigned to.
4314          *
4315          * @exception IllegalArgumentException if the specified
4316          * {@code codePoint} is an invalid Unicode code point.
4317          * @see Character#isValidCodePoint(int)
4318          *
4319          */
of(int codePoint)4320         public static UnicodeScript of(int codePoint) {
4321             if (!isValidCodePoint(codePoint))
4322                 throw new IllegalArgumentException();
4323             int type = getType(codePoint);
4324             // leave SURROGATE and PRIVATE_USE for table lookup
4325             if (type == UNASSIGNED)
4326                 return UNKNOWN;
4327             int index = Arrays.binarySearch(scriptStarts, codePoint);
4328             if (index < 0)
4329                 index = -index - 2;
4330             return scripts[index];
4331         }
4332 
4333         /**
4334          * Returns the UnicodeScript constant with the given Unicode script
4335          * name or the script name alias. Script names and their aliases are
4336          * determined by The Unicode Standard. The files Scripts&lt;version&gt;.txt
4337          * and PropertyValueAliases&lt;version&gt;.txt define script names
4338          * and the script name aliases for a particular version of the
4339          * standard. The {@link Character} class specifies the version of
4340          * the standard that it supports.
4341          * <p>
4342          * Character case is ignored for all of the valid script names.
4343          * The en_US locale's case mapping rules are used to provide
4344          * case-insensitive string comparisons for script name validation.
4345          * <p>
4346          *
4347          * @param scriptName A {@code UnicodeScript} name.
4348          * @return The {@code UnicodeScript} constant identified
4349          *         by {@code scriptName}
4350          * @throws IllegalArgumentException if {@code scriptName} is an
4351          *         invalid name
4352          * @throws NullPointerException if {@code scriptName} is null
4353          */
forName(String scriptName)4354         public static final UnicodeScript forName(String scriptName) {
4355             scriptName = scriptName.toUpperCase(Locale.ENGLISH);
4356                                  //.replace(' ', '_'));
4357             UnicodeScript sc = aliases.get(scriptName);
4358             if (sc != null)
4359                 return sc;
4360             return valueOf(scriptName);
4361         }
4362     }
4363 
4364     /**
4365      * The value of the {@code Character}.
4366      *
4367      * @serial
4368      */
4369     private final char value;
4370 
4371     /** use serialVersionUID from JDK 1.0.2 for interoperability */
4372     private static final long serialVersionUID = 3786198910865385080L;
4373 
4374     /**
4375      * Constructs a newly allocated {@code Character} object that
4376      * represents the specified {@code char} value.
4377      *
4378      * @param  value   the value to be represented by the
4379      *                  {@code Character} object.
4380      */
Character(char value)4381     public Character(char value) {
4382         this.value = value;
4383     }
4384 
4385     private static class CharacterCache {
CharacterCache()4386         private CharacterCache(){}
4387 
4388         static final Character cache[] = new Character[127 + 1];
4389 
4390         static {
4391             for (int i = 0; i < cache.length; i++)
4392                 cache[i] = new Character((char)i);
4393         }
4394     }
4395 
4396     /**
4397      * Returns a <tt>Character</tt> instance representing the specified
4398      * <tt>char</tt> value.
4399      * If a new <tt>Character</tt> instance is not required, this method
4400      * should generally be used in preference to the constructor
4401      * {@link #Character(char)}, as this method is likely to yield
4402      * significantly better space and time performance by caching
4403      * frequently requested values.
4404      *
4405      * This method will always cache values in the range {@code
4406      * '\u005Cu0000'} to {@code '\u005Cu007F'}, inclusive, and may
4407      * cache other values outside of this range.
4408      *
4409      * @param  c a char value.
4410      * @return a <tt>Character</tt> instance representing <tt>c</tt>.
4411      * @since  1.5
4412      */
valueOf(char c)4413     public static Character valueOf(char c) {
4414         if (c <= 127) { // must cache
4415             return CharacterCache.cache[(int)c];
4416         }
4417         return new Character(c);
4418     }
4419 
4420     /**
4421      * Returns the value of this {@code Character} object.
4422      * @return  the primitive {@code char} value represented by
4423      *          this object.
4424      */
charValue()4425     public char charValue() {
4426         return value;
4427     }
4428 
4429     /**
4430      * Returns a hash code for this {@code Character}; equal to the result
4431      * of invoking {@code charValue()}.
4432      *
4433      * @return a hash code value for this {@code Character}
4434      */
hashCode()4435     public int hashCode() {
4436         return Character.hashCode(value);
4437     }
4438 
4439     /**
4440      * Returns a hash code for a {@code char} value; compatible with
4441      * {@code Character.hashCode()}.
4442      *
4443      * @since 1.8
4444      *
4445      * @param value The {@code char} for which to return a hash code.
4446      * @return a hash code value for a {@code char} value.
4447      */
hashCode(char value)4448     public static int hashCode(char value) {
4449         return (int)value;
4450     }
4451 
4452     /**
4453      * Compares this object against the specified object.
4454      * The result is {@code true} if and only if the argument is not
4455      * {@code null} and is a {@code Character} object that
4456      * represents the same {@code char} value as this object.
4457      *
4458      * @param   obj   the object to compare with.
4459      * @return  {@code true} if the objects are the same;
4460      *          {@code false} otherwise.
4461      */
equals(Object obj)4462     public boolean equals(Object obj) {
4463         if (obj instanceof Character) {
4464             return value == ((Character)obj).charValue();
4465         }
4466         return false;
4467     }
4468 
4469     /**
4470      * Returns a {@code String} object representing this
4471      * {@code Character}'s value.  The result is a string of
4472      * length 1 whose sole component is the primitive
4473      * {@code char} value represented by this
4474      * {@code Character} object.
4475      *
4476      * @return  a string representation of this object.
4477      */
toString()4478     public String toString() {
4479         char buf[] = {value};
4480         return String.valueOf(buf);
4481     }
4482 
4483     /**
4484      * Returns a {@code String} object representing the
4485      * specified {@code char}.  The result is a string of length
4486      * 1 consisting solely of the specified {@code char}.
4487      *
4488      * @param c the {@code char} to be converted
4489      * @return the string representation of the specified {@code char}
4490      * @since 1.4
4491      */
toString(char c)4492     public static String toString(char c) {
4493         return String.valueOf(c);
4494     }
4495 
4496     /**
4497      * Determines whether the specified code point is a valid
4498      * <a href="http://www.unicode.org/glossary/#code_point">
4499      * Unicode code point value</a>.
4500      *
4501      * @param  codePoint the Unicode code point to be tested
4502      * @return {@code true} if the specified code point value is between
4503      *         {@link #MIN_CODE_POINT} and
4504      *         {@link #MAX_CODE_POINT} inclusive;
4505      *         {@code false} otherwise.
4506      * @since  1.5
4507      */
isValidCodePoint(int codePoint)4508     public static boolean isValidCodePoint(int codePoint) {
4509         // Optimized form of:
4510         //     codePoint >= MIN_CODE_POINT && codePoint <= MAX_CODE_POINT
4511         int plane = codePoint >>> 16;
4512         return plane < ((MAX_CODE_POINT + 1) >>> 16);
4513     }
4514 
4515     /**
4516      * Determines whether the specified character (Unicode code point)
4517      * is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
4518      * Such code points can be represented using a single {@code char}.
4519      *
4520      * @param  codePoint the character (Unicode code point) to be tested
4521      * @return {@code true} if the specified code point is between
4522      *         {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
4523      *         {@code false} otherwise.
4524      * @since  1.7
4525      */
isBmpCodePoint(int codePoint)4526     public static boolean isBmpCodePoint(int codePoint) {
4527         return codePoint >>> 16 == 0;
4528         // Optimized form of:
4529         //     codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
4530         // We consistently use logical shift (>>>) to facilitate
4531         // additional runtime optimizations.
4532     }
4533 
4534     /**
4535      * Determines whether the specified character (Unicode code point)
4536      * is in the <a href="#supplementary">supplementary character</a> range.
4537      *
4538      * @param  codePoint the character (Unicode code point) to be tested
4539      * @return {@code true} if the specified code point is between
4540      *         {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
4541      *         {@link #MAX_CODE_POINT} inclusive;
4542      *         {@code false} otherwise.
4543      * @since  1.5
4544      */
isSupplementaryCodePoint(int codePoint)4545     public static boolean isSupplementaryCodePoint(int codePoint) {
4546         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT
4547             && codePoint <  MAX_CODE_POINT + 1;
4548     }
4549 
4550     /**
4551      * Determines if the given {@code char} value is a
4552      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4553      * Unicode high-surrogate code unit</a>
4554      * (also known as <i>leading-surrogate code unit</i>).
4555      *
4556      * <p>Such values do not represent characters by themselves,
4557      * but are used in the representation of
4558      * <a href="#supplementary">supplementary characters</a>
4559      * in the UTF-16 encoding.
4560      *
4561      * @param  ch the {@code char} value to be tested.
4562      * @return {@code true} if the {@code char} value is between
4563      *         {@link #MIN_HIGH_SURROGATE} and
4564      *         {@link #MAX_HIGH_SURROGATE} inclusive;
4565      *         {@code false} otherwise.
4566      * @see    Character#isLowSurrogate(char)
4567      * @see    Character.UnicodeBlock#of(int)
4568      * @since  1.5
4569      */
isHighSurrogate(char ch)4570     public static boolean isHighSurrogate(char ch) {
4571         // Help VM constant-fold; MAX_HIGH_SURROGATE + 1 == MIN_LOW_SURROGATE
4572         return ch >= MIN_HIGH_SURROGATE && ch < (MAX_HIGH_SURROGATE + 1);
4573     }
4574 
4575     /**
4576      * Determines if the given {@code char} value is a
4577      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4578      * Unicode low-surrogate code unit</a>
4579      * (also known as <i>trailing-surrogate code unit</i>).
4580      *
4581      * <p>Such values do not represent characters by themselves,
4582      * but are used in the representation of
4583      * <a href="#supplementary">supplementary characters</a>
4584      * in the UTF-16 encoding.
4585      *
4586      * @param  ch the {@code char} value to be tested.
4587      * @return {@code true} if the {@code char} value is between
4588      *         {@link #MIN_LOW_SURROGATE} and
4589      *         {@link #MAX_LOW_SURROGATE} inclusive;
4590      *         {@code false} otherwise.
4591      * @see    Character#isHighSurrogate(char)
4592      * @since  1.5
4593      */
isLowSurrogate(char ch)4594     public static boolean isLowSurrogate(char ch) {
4595         return ch >= MIN_LOW_SURROGATE && ch < (MAX_LOW_SURROGATE + 1);
4596     }
4597 
4598     /**
4599      * Determines if the given {@code char} value is a Unicode
4600      * <i>surrogate code unit</i>.
4601      *
4602      * <p>Such values do not represent characters by themselves,
4603      * but are used in the representation of
4604      * <a href="#supplementary">supplementary characters</a>
4605      * in the UTF-16 encoding.
4606      *
4607      * <p>A char value is a surrogate code unit if and only if it is either
4608      * a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
4609      * a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
4610      *
4611      * @param  ch the {@code char} value to be tested.
4612      * @return {@code true} if the {@code char} value is between
4613      *         {@link #MIN_SURROGATE} and
4614      *         {@link #MAX_SURROGATE} inclusive;
4615      *         {@code false} otherwise.
4616      * @since  1.7
4617      */
isSurrogate(char ch)4618     public static boolean isSurrogate(char ch) {
4619         return ch >= MIN_SURROGATE && ch < (MAX_SURROGATE + 1);
4620     }
4621 
4622     /**
4623      * Determines whether the specified pair of {@code char}
4624      * values is a valid
4625      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4626      * Unicode surrogate pair</a>.
4627 
4628      * <p>This method is equivalent to the expression:
4629      * <blockquote><pre>
4630      * isHighSurrogate(high) && isLowSurrogate(low)
4631      * </pre></blockquote>
4632      *
4633      * @param  high the high-surrogate code value to be tested
4634      * @param  low the low-surrogate code value to be tested
4635      * @return {@code true} if the specified high and
4636      * low-surrogate code values represent a valid surrogate pair;
4637      * {@code false} otherwise.
4638      * @since  1.5
4639      */
isSurrogatePair(char high, char low)4640     public static boolean isSurrogatePair(char high, char low) {
4641         return isHighSurrogate(high) && isLowSurrogate(low);
4642     }
4643 
4644     /**
4645      * Determines the number of {@code char} values needed to
4646      * represent the specified character (Unicode code point). If the
4647      * specified character is equal to or greater than 0x10000, then
4648      * the method returns 2. Otherwise, the method returns 1.
4649      *
4650      * <p>This method doesn't validate the specified character to be a
4651      * valid Unicode code point. The caller must validate the
4652      * character value using {@link #isValidCodePoint(int) isValidCodePoint}
4653      * if necessary.
4654      *
4655      * @param   codePoint the character (Unicode code point) to be tested.
4656      * @return  2 if the character is a valid supplementary character; 1 otherwise.
4657      * @see     Character#isSupplementaryCodePoint(int)
4658      * @since   1.5
4659      */
charCount(int codePoint)4660     public static int charCount(int codePoint) {
4661         return codePoint >= MIN_SUPPLEMENTARY_CODE_POINT ? 2 : 1;
4662     }
4663 
4664     /**
4665      * Converts the specified surrogate pair to its supplementary code
4666      * point value. This method does not validate the specified
4667      * surrogate pair. The caller must validate it using {@link
4668      * #isSurrogatePair(char, char) isSurrogatePair} if necessary.
4669      *
4670      * @param  high the high-surrogate code unit
4671      * @param  low the low-surrogate code unit
4672      * @return the supplementary code point composed from the
4673      *         specified surrogate pair.
4674      * @since  1.5
4675      */
toCodePoint(char high, char low)4676     public static int toCodePoint(char high, char low) {
4677         // Optimized form of:
4678         // return ((high - MIN_HIGH_SURROGATE) << 10)
4679         //         + (low - MIN_LOW_SURROGATE)
4680         //         + MIN_SUPPLEMENTARY_CODE_POINT;
4681         return ((high << 10) + low) + (MIN_SUPPLEMENTARY_CODE_POINT
4682                                        - (MIN_HIGH_SURROGATE << 10)
4683                                        - MIN_LOW_SURROGATE);
4684     }
4685 
4686     /**
4687      * Returns the code point at the given index of the
4688      * {@code CharSequence}. If the {@code char} value at
4689      * the given index in the {@code CharSequence} is in the
4690      * high-surrogate range, the following index is less than the
4691      * length of the {@code CharSequence}, and the
4692      * {@code char} value at the following index is in the
4693      * low-surrogate range, then the supplementary code point
4694      * corresponding to this surrogate pair is returned. Otherwise,
4695      * the {@code char} value at the given index is returned.
4696      *
4697      * @param seq a sequence of {@code char} values (Unicode code
4698      * units)
4699      * @param index the index to the {@code char} values (Unicode
4700      * code units) in {@code seq} to be converted
4701      * @return the Unicode code point at the given index
4702      * @exception NullPointerException if {@code seq} is null.
4703      * @exception IndexOutOfBoundsException if the value
4704      * {@code index} is negative or not less than
4705      * {@link CharSequence#length() seq.length()}.
4706      * @since  1.5
4707      */
codePointAt(CharSequence seq, int index)4708     public static int codePointAt(CharSequence seq, int index) {
4709         char c1 = seq.charAt(index++);
4710         if (isHighSurrogate(c1)) {
4711             if (index < seq.length()) {
4712                 char c2 = seq.charAt(index);
4713                 if (isLowSurrogate(c2)) {
4714                     return toCodePoint(c1, c2);
4715                 }
4716             }
4717         }
4718         return c1;
4719     }
4720 
4721     /**
4722      * Returns the code point at the given index of the
4723      * {@code char} array. If the {@code char} value at
4724      * the given index in the {@code char} array is in the
4725      * high-surrogate range, the following index is less than the
4726      * length of the {@code char} array, and the
4727      * {@code char} value at the following index is in the
4728      * low-surrogate range, then the supplementary code point
4729      * corresponding to this surrogate pair is returned. Otherwise,
4730      * the {@code char} value at the given index is returned.
4731      *
4732      * @param a the {@code char} array
4733      * @param index the index to the {@code char} values (Unicode
4734      * code units) in the {@code char} array to be converted
4735      * @return the Unicode code point at the given index
4736      * @exception NullPointerException if {@code a} is null.
4737      * @exception IndexOutOfBoundsException if the value
4738      * {@code index} is negative or not less than
4739      * the length of the {@code char} array.
4740      * @since  1.5
4741      */
codePointAt(char[] a, int index)4742     public static int codePointAt(char[] a, int index) {
4743         return codePointAtImpl(a, index, a.length);
4744     }
4745 
4746     /**
4747      * Returns the code point at the given index of the
4748      * {@code char} array, where only array elements with
4749      * {@code index} less than {@code limit} can be used. If
4750      * the {@code char} value at the given index in the
4751      * {@code char} array is in the high-surrogate range, the
4752      * following index is less than the {@code limit}, and the
4753      * {@code char} value at the following index is in the
4754      * low-surrogate range, then the supplementary code point
4755      * corresponding to this surrogate pair is returned. Otherwise,
4756      * the {@code char} value at the given index is returned.
4757      *
4758      * @param a the {@code char} array
4759      * @param index the index to the {@code char} values (Unicode
4760      * code units) in the {@code char} array to be converted
4761      * @param limit the index after the last array element that
4762      * can be used in the {@code char} array
4763      * @return the Unicode code point at the given index
4764      * @exception NullPointerException if {@code a} is null.
4765      * @exception IndexOutOfBoundsException if the {@code index}
4766      * argument is negative or not less than the {@code limit}
4767      * argument, or if the {@code limit} argument is negative or
4768      * greater than the length of the {@code char} array.
4769      * @since  1.5
4770      */
codePointAt(char[] a, int index, int limit)4771     public static int codePointAt(char[] a, int index, int limit) {
4772         if (index >= limit || limit < 0 || limit > a.length) {
4773             throw new IndexOutOfBoundsException();
4774         }
4775         return codePointAtImpl(a, index, limit);
4776     }
4777 
4778     // throws ArrayIndexOutofBoundsException if index out of bounds
codePointAtImpl(char[] a, int index, int limit)4779     static int codePointAtImpl(char[] a, int index, int limit) {
4780         char c1 = a[index++];
4781         if (isHighSurrogate(c1)) {
4782             if (index < limit) {
4783                 char c2 = a[index];
4784                 if (isLowSurrogate(c2)) {
4785                     return toCodePoint(c1, c2);
4786                 }
4787             }
4788         }
4789         return c1;
4790     }
4791 
4792     /**
4793      * Returns the code point preceding the given index of the
4794      * {@code CharSequence}. If the {@code char} value at
4795      * {@code (index - 1)} in the {@code CharSequence} is in
4796      * the low-surrogate range, {@code (index - 2)} is not
4797      * negative, and the {@code char} value at {@code (index - 2)}
4798      * in the {@code CharSequence} is in the
4799      * high-surrogate range, then the supplementary code point
4800      * corresponding to this surrogate pair is returned. Otherwise,
4801      * the {@code char} value at {@code (index - 1)} is
4802      * returned.
4803      *
4804      * @param seq the {@code CharSequence} instance
4805      * @param index the index following the code point that should be returned
4806      * @return the Unicode code point value before the given index.
4807      * @exception NullPointerException if {@code seq} is null.
4808      * @exception IndexOutOfBoundsException if the {@code index}
4809      * argument is less than 1 or greater than {@link
4810      * CharSequence#length() seq.length()}.
4811      * @since  1.5
4812      */
codePointBefore(CharSequence seq, int index)4813     public static int codePointBefore(CharSequence seq, int index) {
4814         char c2 = seq.charAt(--index);
4815         if (isLowSurrogate(c2)) {
4816             if (index > 0) {
4817                 char c1 = seq.charAt(--index);
4818                 if (isHighSurrogate(c1)) {
4819                     return toCodePoint(c1, c2);
4820                 }
4821             }
4822         }
4823         return c2;
4824     }
4825 
4826     /**
4827      * Returns the code point preceding the given index of the
4828      * {@code char} array. If the {@code char} value at
4829      * {@code (index - 1)} in the {@code char} array is in
4830      * the low-surrogate range, {@code (index - 2)} is not
4831      * negative, and the {@code char} value at {@code (index - 2)}
4832      * in the {@code char} array is in the
4833      * high-surrogate range, then the supplementary code point
4834      * corresponding to this surrogate pair is returned. Otherwise,
4835      * the {@code char} value at {@code (index - 1)} is
4836      * returned.
4837      *
4838      * @param a the {@code char} array
4839      * @param index the index following the code point that should be returned
4840      * @return the Unicode code point value before the given index.
4841      * @exception NullPointerException if {@code a} is null.
4842      * @exception IndexOutOfBoundsException if the {@code index}
4843      * argument is less than 1 or greater than the length of the
4844      * {@code char} array
4845      * @since  1.5
4846      */
codePointBefore(char[] a, int index)4847     public static int codePointBefore(char[] a, int index) {
4848         return codePointBeforeImpl(a, index, 0);
4849     }
4850 
4851     /**
4852      * Returns the code point preceding the given index of the
4853      * {@code char} array, where only array elements with
4854      * {@code index} greater than or equal to {@code start}
4855      * can be used. If the {@code char} value at {@code (index - 1)}
4856      * in the {@code char} array is in the
4857      * low-surrogate range, {@code (index - 2)} is not less than
4858      * {@code start}, and the {@code char} value at
4859      * {@code (index - 2)} in the {@code char} array is in
4860      * the high-surrogate range, then the supplementary code point
4861      * corresponding to this surrogate pair is returned. Otherwise,
4862      * the {@code char} value at {@code (index - 1)} is
4863      * returned.
4864      *
4865      * @param a the {@code char} array
4866      * @param index the index following the code point that should be returned
4867      * @param start the index of the first array element in the
4868      * {@code char} array
4869      * @return the Unicode code point value before the given index.
4870      * @exception NullPointerException if {@code a} is null.
4871      * @exception IndexOutOfBoundsException if the {@code index}
4872      * argument is not greater than the {@code start} argument or
4873      * is greater than the length of the {@code char} array, or
4874      * if the {@code start} argument is negative or not less than
4875      * the length of the {@code char} array.
4876      * @since  1.5
4877      */
codePointBefore(char[] a, int index, int start)4878     public static int codePointBefore(char[] a, int index, int start) {
4879         if (index <= start || start < 0 || start >= a.length) {
4880             throw new IndexOutOfBoundsException();
4881         }
4882         return codePointBeforeImpl(a, index, start);
4883     }
4884 
4885     // throws ArrayIndexOutofBoundsException if index-1 out of bounds
codePointBeforeImpl(char[] a, int index, int start)4886     static int codePointBeforeImpl(char[] a, int index, int start) {
4887         char c2 = a[--index];
4888         if (isLowSurrogate(c2)) {
4889             if (index > start) {
4890                 char c1 = a[--index];
4891                 if (isHighSurrogate(c1)) {
4892                     return toCodePoint(c1, c2);
4893                 }
4894             }
4895         }
4896         return c2;
4897     }
4898 
4899     /**
4900      * Returns the leading surrogate (a
4901      * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
4902      * high surrogate code unit</a>) of the
4903      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4904      * surrogate pair</a>
4905      * representing the specified supplementary character (Unicode
4906      * code point) in the UTF-16 encoding.  If the specified character
4907      * is not a
4908      * <a href="Character.html#supplementary">supplementary character</a>,
4909      * an unspecified {@code char} is returned.
4910      *
4911      * <p>If
4912      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
4913      * is {@code true}, then
4914      * {@link #isHighSurrogate isHighSurrogate}{@code (highSurrogate(x))} and
4915      * {@link #toCodePoint toCodePoint}{@code (highSurrogate(x), }{@link #lowSurrogate lowSurrogate}{@code (x)) == x}
4916      * are also always {@code true}.
4917      *
4918      * @param   codePoint a supplementary character (Unicode code point)
4919      * @return  the leading surrogate code unit used to represent the
4920      *          character in the UTF-16 encoding
4921      * @since   1.7
4922      */
highSurrogate(int codePoint)4923     public static char highSurrogate(int codePoint) {
4924         return (char) ((codePoint >>> 10)
4925             + (MIN_HIGH_SURROGATE - (MIN_SUPPLEMENTARY_CODE_POINT >>> 10)));
4926     }
4927 
4928     /**
4929      * Returns the trailing surrogate (a
4930      * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
4931      * low surrogate code unit</a>) of the
4932      * <a href="http://www.unicode.org/glossary/#surrogate_pair">
4933      * surrogate pair</a>
4934      * representing the specified supplementary character (Unicode
4935      * code point) in the UTF-16 encoding.  If the specified character
4936      * is not a
4937      * <a href="Character.html#supplementary">supplementary character</a>,
4938      * an unspecified {@code char} is returned.
4939      *
4940      * <p>If
4941      * {@link #isSupplementaryCodePoint isSupplementaryCodePoint(x)}
4942      * is {@code true}, then
4943      * {@link #isLowSurrogate isLowSurrogate}{@code (lowSurrogate(x))} and
4944      * {@link #toCodePoint toCodePoint}{@code (}{@link #highSurrogate highSurrogate}{@code (x), lowSurrogate(x)) == x}
4945      * are also always {@code true}.
4946      *
4947      * @param   codePoint a supplementary character (Unicode code point)
4948      * @return  the trailing surrogate code unit used to represent the
4949      *          character in the UTF-16 encoding
4950      * @since   1.7
4951      */
lowSurrogate(int codePoint)4952     public static char lowSurrogate(int codePoint) {
4953         return (char) ((codePoint & 0x3ff) + MIN_LOW_SURROGATE);
4954     }
4955 
4956     /**
4957      * Converts the specified character (Unicode code point) to its
4958      * UTF-16 representation. If the specified code point is a BMP
4959      * (Basic Multilingual Plane or Plane 0) value, the same value is
4960      * stored in {@code dst[dstIndex]}, and 1 is returned. If the
4961      * specified code point is a supplementary character, its
4962      * surrogate values are stored in {@code dst[dstIndex]}
4963      * (high-surrogate) and {@code dst[dstIndex+1]}
4964      * (low-surrogate), and 2 is returned.
4965      *
4966      * @param  codePoint the character (Unicode code point) to be converted.
4967      * @param  dst an array of {@code char} in which the
4968      * {@code codePoint}'s UTF-16 value is stored.
4969      * @param dstIndex the start index into the {@code dst}
4970      * array where the converted value is stored.
4971      * @return 1 if the code point is a BMP code point, 2 if the
4972      * code point is a supplementary code point.
4973      * @exception IllegalArgumentException if the specified
4974      * {@code codePoint} is not a valid Unicode code point.
4975      * @exception NullPointerException if the specified {@code dst} is null.
4976      * @exception IndexOutOfBoundsException if {@code dstIndex}
4977      * is negative or not less than {@code dst.length}, or if
4978      * {@code dst} at {@code dstIndex} doesn't have enough
4979      * array element(s) to store the resulting {@code char}
4980      * value(s). (If {@code dstIndex} is equal to
4981      * {@code dst.length-1} and the specified
4982      * {@code codePoint} is a supplementary character, the
4983      * high-surrogate value is not stored in
4984      * {@code dst[dstIndex]}.)
4985      * @since  1.5
4986      */
toChars(int codePoint, char[] dst, int dstIndex)4987     public static int toChars(int codePoint, char[] dst, int dstIndex) {
4988         if (isBmpCodePoint(codePoint)) {
4989             dst[dstIndex] = (char) codePoint;
4990             return 1;
4991         } else if (isValidCodePoint(codePoint)) {
4992             toSurrogates(codePoint, dst, dstIndex);
4993             return 2;
4994         } else {
4995             throw new IllegalArgumentException();
4996         }
4997     }
4998 
4999     /**
5000      * Converts the specified character (Unicode code point) to its
5001      * UTF-16 representation stored in a {@code char} array. If
5002      * the specified code point is a BMP (Basic Multilingual Plane or
5003      * Plane 0) value, the resulting {@code char} array has
5004      * the same value as {@code codePoint}. If the specified code
5005      * point is a supplementary code point, the resulting
5006      * {@code char} array has the corresponding surrogate pair.
5007      *
5008      * @param  codePoint a Unicode code point
5009      * @return a {@code char} array having
5010      *         {@code codePoint}'s UTF-16 representation.
5011      * @exception IllegalArgumentException if the specified
5012      * {@code codePoint} is not a valid Unicode code point.
5013      * @since  1.5
5014      */
toChars(int codePoint)5015     public static char[] toChars(int codePoint) {
5016         if (isBmpCodePoint(codePoint)) {
5017             return new char[] { (char) codePoint };
5018         } else if (isValidCodePoint(codePoint)) {
5019             char[] result = new char[2];
5020             toSurrogates(codePoint, result, 0);
5021             return result;
5022         } else {
5023             throw new IllegalArgumentException();
5024         }
5025     }
5026 
toSurrogates(int codePoint, char[] dst, int index)5027     static void toSurrogates(int codePoint, char[] dst, int index) {
5028         // We write elements "backwards" to guarantee all-or-nothing
5029         dst[index+1] = lowSurrogate(codePoint);
5030         dst[index] = highSurrogate(codePoint);
5031     }
5032 
5033     /**
5034      * Returns the number of Unicode code points in the text range of
5035      * the specified char sequence. The text range begins at the
5036      * specified {@code beginIndex} and extends to the
5037      * {@code char} at index {@code endIndex - 1}. Thus the
5038      * length (in {@code char}s) of the text range is
5039      * {@code endIndex-beginIndex}. Unpaired surrogates within
5040      * the text range count as one code point each.
5041      *
5042      * @param seq the char sequence
5043      * @param beginIndex the index to the first {@code char} of
5044      * the text range.
5045      * @param endIndex the index after the last {@code char} of
5046      * the text range.
5047      * @return the number of Unicode code points in the specified text
5048      * range
5049      * @exception NullPointerException if {@code seq} is null.
5050      * @exception IndexOutOfBoundsException if the
5051      * {@code beginIndex} is negative, or {@code endIndex}
5052      * is larger than the length of the given sequence, or
5053      * {@code beginIndex} is larger than {@code endIndex}.
5054      * @since  1.5
5055      */
codePointCount(CharSequence seq, int beginIndex, int endIndex)5056     public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) {
5057         int length = seq.length();
5058         if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) {
5059             throw new IndexOutOfBoundsException();
5060         }
5061         int n = endIndex - beginIndex;
5062         for (int i = beginIndex; i < endIndex; ) {
5063             if (isHighSurrogate(seq.charAt(i++)) && i < endIndex &&
5064                 isLowSurrogate(seq.charAt(i))) {
5065                 n--;
5066                 i++;
5067             }
5068         }
5069         return n;
5070     }
5071 
5072     /**
5073      * Returns the number of Unicode code points in a subarray of the
5074      * {@code char} array argument. The {@code offset}
5075      * argument is the index of the first {@code char} of the
5076      * subarray and the {@code count} argument specifies the
5077      * length of the subarray in {@code char}s. Unpaired
5078      * surrogates within the subarray count as one code point each.
5079      *
5080      * @param a the {@code char} array
5081      * @param offset the index of the first {@code char} in the
5082      * given {@code char} array
5083      * @param count the length of the subarray in {@code char}s
5084      * @return the number of Unicode code points in the specified subarray
5085      * @exception NullPointerException if {@code a} is null.
5086      * @exception IndexOutOfBoundsException if {@code offset} or
5087      * {@code count} is negative, or if {@code offset +
5088      * count} is larger than the length of the given array.
5089      * @since  1.5
5090      */
codePointCount(char[] a, int offset, int count)5091     public static int codePointCount(char[] a, int offset, int count) {
5092         if (count > a.length - offset || offset < 0 || count < 0) {
5093             throw new IndexOutOfBoundsException();
5094         }
5095         return codePointCountImpl(a, offset, count);
5096     }
5097 
codePointCountImpl(char[] a, int offset, int count)5098     static int codePointCountImpl(char[] a, int offset, int count) {
5099         int endIndex = offset + count;
5100         int n = count;
5101         for (int i = offset; i < endIndex; ) {
5102             if (isHighSurrogate(a[i++]) && i < endIndex &&
5103                 isLowSurrogate(a[i])) {
5104                 n--;
5105                 i++;
5106             }
5107         }
5108         return n;
5109     }
5110 
5111     /**
5112      * Returns the index within the given char sequence that is offset
5113      * from the given {@code index} by {@code codePointOffset}
5114      * code points. Unpaired surrogates within the text range given by
5115      * {@code index} and {@code codePointOffset} count as
5116      * one code point each.
5117      *
5118      * @param seq the char sequence
5119      * @param index the index to be offset
5120      * @param codePointOffset the offset in code points
5121      * @return the index within the char sequence
5122      * @exception NullPointerException if {@code seq} is null.
5123      * @exception IndexOutOfBoundsException if {@code index}
5124      *   is negative or larger then the length of the char sequence,
5125      *   or if {@code codePointOffset} is positive and the
5126      *   subsequence starting with {@code index} has fewer than
5127      *   {@code codePointOffset} code points, or if
5128      *   {@code codePointOffset} is negative and the subsequence
5129      *   before {@code index} has fewer than the absolute value
5130      *   of {@code codePointOffset} code points.
5131      * @since 1.5
5132      */
offsetByCodePoints(CharSequence seq, int index, int codePointOffset)5133     public static int offsetByCodePoints(CharSequence seq, int index,
5134                                          int codePointOffset) {
5135         int length = seq.length();
5136         if (index < 0 || index > length) {
5137             throw new IndexOutOfBoundsException();
5138         }
5139 
5140         int x = index;
5141         if (codePointOffset >= 0) {
5142             int i;
5143             for (i = 0; x < length && i < codePointOffset; i++) {
5144                 if (isHighSurrogate(seq.charAt(x++)) && x < length &&
5145                     isLowSurrogate(seq.charAt(x))) {
5146                     x++;
5147                 }
5148             }
5149             if (i < codePointOffset) {
5150                 throw new IndexOutOfBoundsException();
5151             }
5152         } else {
5153             int i;
5154             for (i = codePointOffset; x > 0 && i < 0; i++) {
5155                 if (isLowSurrogate(seq.charAt(--x)) && x > 0 &&
5156                     isHighSurrogate(seq.charAt(x-1))) {
5157                     x--;
5158                 }
5159             }
5160             if (i < 0) {
5161                 throw new IndexOutOfBoundsException();
5162             }
5163         }
5164         return x;
5165     }
5166 
5167     /**
5168      * Returns the index within the given {@code char} subarray
5169      * that is offset from the given {@code index} by
5170      * {@code codePointOffset} code points. The
5171      * {@code start} and {@code count} arguments specify a
5172      * subarray of the {@code char} array. Unpaired surrogates
5173      * within the text range given by {@code index} and
5174      * {@code codePointOffset} count as one code point each.
5175      *
5176      * @param a the {@code char} array
5177      * @param start the index of the first {@code char} of the
5178      * subarray
5179      * @param count the length of the subarray in {@code char}s
5180      * @param index the index to be offset
5181      * @param codePointOffset the offset in code points
5182      * @return the index within the subarray
5183      * @exception NullPointerException if {@code a} is null.
5184      * @exception IndexOutOfBoundsException
5185      *   if {@code start} or {@code count} is negative,
5186      *   or if {@code start + count} is larger than the length of
5187      *   the given array,
5188      *   or if {@code index} is less than {@code start} or
5189      *   larger then {@code start + count},
5190      *   or if {@code codePointOffset} is positive and the text range
5191      *   starting with {@code index} and ending with {@code start + count - 1}
5192      *   has fewer than {@code codePointOffset} code
5193      *   points,
5194      *   or if {@code codePointOffset} is negative and the text range
5195      *   starting with {@code start} and ending with {@code index - 1}
5196      *   has fewer than the absolute value of
5197      *   {@code codePointOffset} code points.
5198      * @since 1.5
5199      */
offsetByCodePoints(char[] a, int start, int count, int index, int codePointOffset)5200     public static int offsetByCodePoints(char[] a, int start, int count,
5201                                          int index, int codePointOffset) {
5202         if (count > a.length-start || start < 0 || count < 0
5203             || index < start || index > start+count) {
5204             throw new IndexOutOfBoundsException();
5205         }
5206         return offsetByCodePointsImpl(a, start, count, index, codePointOffset);
5207     }
5208 
offsetByCodePointsImpl(char[]a, int start, int count, int index, int codePointOffset)5209     static int offsetByCodePointsImpl(char[]a, int start, int count,
5210                                       int index, int codePointOffset) {
5211         int x = index;
5212         if (codePointOffset >= 0) {
5213             int limit = start + count;
5214             int i;
5215             for (i = 0; x < limit && i < codePointOffset; i++) {
5216                 if (isHighSurrogate(a[x++]) && x < limit &&
5217                     isLowSurrogate(a[x])) {
5218                     x++;
5219                 }
5220             }
5221             if (i < codePointOffset) {
5222                 throw new IndexOutOfBoundsException();
5223             }
5224         } else {
5225             int i;
5226             for (i = codePointOffset; x > start && i < 0; i++) {
5227                 if (isLowSurrogate(a[--x]) && x > start &&
5228                     isHighSurrogate(a[x-1])) {
5229                     x--;
5230                 }
5231             }
5232             if (i < 0) {
5233                 throw new IndexOutOfBoundsException();
5234             }
5235         }
5236         return x;
5237     }
5238 
5239     /**
5240      * Determines if the specified character is a lowercase character.
5241      * <p>
5242      * A character is lowercase if its general category type, provided
5243      * by {@code Character.getType(ch)}, is
5244      * {@code LOWERCASE_LETTER}, or it has contributory property
5245      * Other_Lowercase as defined by the Unicode Standard.
5246      * <p>
5247      * The following are examples of lowercase characters:
5248      * <p><blockquote><pre>
5249      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5250      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5251      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5252      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5253      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5254      * </pre></blockquote>
5255      * <p> Many other Unicode characters are lowercase too.
5256      *
5257      * <p><b>Note:</b> This method cannot handle <a
5258      * href="#supplementary"> supplementary characters</a>. To support
5259      * all Unicode characters, including supplementary characters, use
5260      * the {@link #isLowerCase(int)} method.
5261      *
5262      * @param   ch   the character to be tested.
5263      * @return  {@code true} if the character is lowercase;
5264      *          {@code false} otherwise.
5265      * @see     Character#isLowerCase(char)
5266      * @see     Character#isTitleCase(char)
5267      * @see     Character#toLowerCase(char)
5268      * @see     Character#getType(char)
5269      */
isLowerCase(char ch)5270     public static boolean isLowerCase(char ch) {
5271         return isLowerCase((int)ch);
5272     }
5273 
5274     /**
5275      * Determines if the specified character (Unicode code point) is a
5276      * lowercase character.
5277      * <p>
5278      * A character is lowercase if its general category type, provided
5279      * by {@link Character#getType getType(codePoint)}, is
5280      * {@code LOWERCASE_LETTER}, or it has contributory property
5281      * Other_Lowercase as defined by the Unicode Standard.
5282      * <p>
5283      * The following are examples of lowercase characters:
5284      * <p><blockquote><pre>
5285      * a b c d e f g h i j k l m n o p q r s t u v w x y z
5286      * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
5287      * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
5288      * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
5289      * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
5290      * </pre></blockquote>
5291      * <p> Many other Unicode characters are lowercase too.
5292      *
5293      * @param   codePoint the character (Unicode code point) to be tested.
5294      * @return  {@code true} if the character is lowercase;
5295      *          {@code false} otherwise.
5296      * @see     Character#isLowerCase(int)
5297      * @see     Character#isTitleCase(int)
5298      * @see     Character#toLowerCase(int)
5299      * @see     Character#getType(int)
5300      * @since   1.5
5301      */
isLowerCase(int codePoint)5302     public static boolean isLowerCase(int codePoint) {
5303         return isLowerCaseImpl(codePoint);
5304     }
5305 
isLowerCaseImpl(int codePoint)5306     static native boolean isLowerCaseImpl(int codePoint);
5307 
5308     /**
5309      * Determines if the specified character is an uppercase character.
5310      * <p>
5311      * A character is uppercase if its general category type, provided by
5312      * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
5313      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5314      * <p>
5315      * The following are examples of uppercase characters:
5316      * <p><blockquote><pre>
5317      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5318      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5319      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5320      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5321      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5322      * </pre></blockquote>
5323      * <p> Many other Unicode characters are uppercase too.<p>
5324      *
5325      * <p><b>Note:</b> This method cannot handle <a
5326      * href="#supplementary"> supplementary characters</a>. To support
5327      * all Unicode characters, including supplementary characters, use
5328      * the {@link #isUpperCase(int)} method.
5329      *
5330      * @param   ch   the character to be tested.
5331      * @return  {@code true} if the character is uppercase;
5332      *          {@code false} otherwise.
5333      * @see     Character#isLowerCase(char)
5334      * @see     Character#isTitleCase(char)
5335      * @see     Character#toUpperCase(char)
5336      * @see     Character#getType(char)
5337      * @since   1.0
5338      */
isUpperCase(char ch)5339     public static boolean isUpperCase(char ch) {
5340         return isUpperCase((int)ch);
5341     }
5342 
5343     /**
5344      * Determines if the specified character (Unicode code point) is an uppercase character.
5345      * <p>
5346      * A character is uppercase if its general category type, provided by
5347      * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
5348      * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
5349      * <p>
5350      * The following are examples of uppercase characters:
5351      * <p><blockquote><pre>
5352      * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
5353      * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
5354      * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
5355      * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
5356      * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
5357      * </pre></blockquote>
5358      * <p> Many other Unicode characters are uppercase too.<p>
5359      *
5360      * @param   codePoint the character (Unicode code point) to be tested.
5361      * @return  {@code true} if the character is uppercase;
5362      *          {@code false} otherwise.
5363      * @see     Character#isLowerCase(int)
5364      * @see     Character#isTitleCase(int)
5365      * @see     Character#toUpperCase(int)
5366      * @see     Character#getType(int)
5367      * @since   1.5
5368      */
isUpperCase(int codePoint)5369     public static boolean isUpperCase(int codePoint) {
5370         return isUpperCaseImpl(codePoint);
5371     }
5372 
isUpperCaseImpl(int codePoint)5373     static native boolean isUpperCaseImpl(int codePoint);
5374 
5375 
5376     /**
5377      * Determines if the specified character is a titlecase character.
5378      * <p>
5379      * A character is a titlecase character if its general
5380      * category type, provided by {@code Character.getType(ch)},
5381      * is {@code TITLECASE_LETTER}.
5382      * <p>
5383      * Some characters look like pairs of Latin letters. For example, there
5384      * is an uppercase letter that looks like "LJ" and has a corresponding
5385      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5386      * is the appropriate form to use when rendering a word in lowercase
5387      * with initial capitals, as for a book title.
5388      * <p>
5389      * These are some of the Unicode characters for which this method returns
5390      * {@code true}:
5391      * <ul>
5392      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5393      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5394      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5395      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5396      * </ul>
5397      * <p> Many other Unicode characters are titlecase too.<p>
5398      *
5399      * <p><b>Note:</b> This method cannot handle <a
5400      * href="#supplementary"> supplementary characters</a>. To support
5401      * all Unicode characters, including supplementary characters, use
5402      * the {@link #isTitleCase(int)} method.
5403      *
5404      * @param   ch   the character to be tested.
5405      * @return  {@code true} if the character is titlecase;
5406      *          {@code false} otherwise.
5407      * @see     Character#isLowerCase(char)
5408      * @see     Character#isUpperCase(char)
5409      * @see     Character#toTitleCase(char)
5410      * @see     Character#getType(char)
5411      * @since   1.0.2
5412      */
isTitleCase(char ch)5413     public static boolean isTitleCase(char ch) {
5414         return isTitleCase((int)ch);
5415     }
5416 
5417     /**
5418      * Determines if the specified character (Unicode code point) is a titlecase character.
5419      * <p>
5420      * A character is a titlecase character if its general
5421      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5422      * is {@code TITLECASE_LETTER}.
5423      * <p>
5424      * Some characters look like pairs of Latin letters. For example, there
5425      * is an uppercase letter that looks like "LJ" and has a corresponding
5426      * lowercase letter that looks like "lj". A third form, which looks like "Lj",
5427      * is the appropriate form to use when rendering a word in lowercase
5428      * with initial capitals, as for a book title.
5429      * <p>
5430      * These are some of the Unicode characters for which this method returns
5431      * {@code true}:
5432      * <ul>
5433      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON}
5434      * <li>{@code LATIN CAPITAL LETTER L WITH SMALL LETTER J}
5435      * <li>{@code LATIN CAPITAL LETTER N WITH SMALL LETTER J}
5436      * <li>{@code LATIN CAPITAL LETTER D WITH SMALL LETTER Z}
5437      * </ul>
5438      * <p> Many other Unicode characters are titlecase too.<p>
5439      *
5440      * @param   codePoint the character (Unicode code point) to be tested.
5441      * @return  {@code true} if the character is titlecase;
5442      *          {@code false} otherwise.
5443      * @see     Character#isLowerCase(int)
5444      * @see     Character#isUpperCase(int)
5445      * @see     Character#toTitleCase(int)
5446      * @see     Character#getType(int)
5447      * @since   1.5
5448      */
isTitleCase(int codePoint)5449     public static boolean isTitleCase(int codePoint) {
5450         return isTitleCaseImpl(codePoint);
5451     }
5452 
isTitleCaseImpl(int codePoint)5453     static native boolean isTitleCaseImpl(int codePoint);
5454 
5455     /**
5456      * Determines if the specified character is a digit.
5457      * <p>
5458      * A character is a digit if its general category type, provided
5459      * by {@code Character.getType(ch)}, is
5460      * {@code DECIMAL_DIGIT_NUMBER}.
5461      * <p>
5462      * Some Unicode character ranges that contain digits:
5463      * <ul>
5464      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5465      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5466      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5467      *     Arabic-Indic digits
5468      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5469      *     Extended Arabic-Indic digits
5470      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5471      *     Devanagari digits
5472      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5473      *     Fullwidth digits
5474      * </ul>
5475      *
5476      * Many other character ranges contain digits as well.
5477      *
5478      * <p><b>Note:</b> This method cannot handle <a
5479      * href="#supplementary"> supplementary characters</a>. To support
5480      * all Unicode characters, including supplementary characters, use
5481      * the {@link #isDigit(int)} method.
5482      *
5483      * @param   ch   the character to be tested.
5484      * @return  {@code true} if the character is a digit;
5485      *          {@code false} otherwise.
5486      * @see     Character#digit(char, int)
5487      * @see     Character#forDigit(int, int)
5488      * @see     Character#getType(char)
5489      */
isDigit(char ch)5490     public static boolean isDigit(char ch) {
5491         return isDigit((int)ch);
5492     }
5493 
5494     /**
5495      * Determines if the specified character (Unicode code point) is a digit.
5496      * <p>
5497      * A character is a digit if its general category type, provided
5498      * by {@link Character#getType(int) getType(codePoint)}, is
5499      * {@code DECIMAL_DIGIT_NUMBER}.
5500      * <p>
5501      * Some Unicode character ranges that contain digits:
5502      * <ul>
5503      * <li>{@code '\u005Cu0030'} through {@code '\u005Cu0039'},
5504      *     ISO-LATIN-1 digits ({@code '0'} through {@code '9'})
5505      * <li>{@code '\u005Cu0660'} through {@code '\u005Cu0669'},
5506      *     Arabic-Indic digits
5507      * <li>{@code '\u005Cu06F0'} through {@code '\u005Cu06F9'},
5508      *     Extended Arabic-Indic digits
5509      * <li>{@code '\u005Cu0966'} through {@code '\u005Cu096F'},
5510      *     Devanagari digits
5511      * <li>{@code '\u005CuFF10'} through {@code '\u005CuFF19'},
5512      *     Fullwidth digits
5513      * </ul>
5514      *
5515      * Many other character ranges contain digits as well.
5516      *
5517      * @param   codePoint the character (Unicode code point) to be tested.
5518      * @return  {@code true} if the character is a digit;
5519      *          {@code false} otherwise.
5520      * @see     Character#forDigit(int, int)
5521      * @see     Character#getType(int)
5522      * @since   1.5
5523      */
isDigit(int codePoint)5524     public static boolean isDigit(int codePoint) {
5525         return isDigitImpl(codePoint);
5526     }
5527 
isDigitImpl(int codePoint)5528     static native boolean isDigitImpl(int codePoint);
5529 
5530     /**
5531      * Determines if a character is defined in Unicode.
5532      * <p>
5533      * A character is defined if at least one of the following is true:
5534      * <ul>
5535      * <li>It has an entry in the UnicodeData file.
5536      * <li>It has a value in a range defined by the UnicodeData file.
5537      * </ul>
5538      *
5539      * <p><b>Note:</b> This method cannot handle <a
5540      * href="#supplementary"> supplementary characters</a>. To support
5541      * all Unicode characters, including supplementary characters, use
5542      * the {@link #isDefined(int)} method.
5543      *
5544      * @param   ch   the character to be tested
5545      * @return  {@code true} if the character has a defined meaning
5546      *          in Unicode; {@code false} otherwise.
5547      * @see     Character#isDigit(char)
5548      * @see     Character#isLetter(char)
5549      * @see     Character#isLetterOrDigit(char)
5550      * @see     Character#isLowerCase(char)
5551      * @see     Character#isTitleCase(char)
5552      * @see     Character#isUpperCase(char)
5553      * @since   1.0.2
5554      */
isDefined(char ch)5555     public static boolean isDefined(char ch) {
5556         return isDefined((int)ch);
5557     }
5558 
5559     /**
5560      * Determines if a character (Unicode code point) is defined in Unicode.
5561      * <p>
5562      * A character is defined if at least one of the following is true:
5563      * <ul>
5564      * <li>It has an entry in the UnicodeData file.
5565      * <li>It has a value in a range defined by the UnicodeData file.
5566      * </ul>
5567      *
5568      * @param   codePoint the character (Unicode code point) to be tested.
5569      * @return  {@code true} if the character has a defined meaning
5570      *          in Unicode; {@code false} otherwise.
5571      * @see     Character#isDigit(int)
5572      * @see     Character#isLetter(int)
5573      * @see     Character#isLetterOrDigit(int)
5574      * @see     Character#isLowerCase(int)
5575      * @see     Character#isTitleCase(int)
5576      * @see     Character#isUpperCase(int)
5577      * @since   1.5
5578      */
isDefined(int codePoint)5579     public static boolean isDefined(int codePoint) {
5580         return isDefinedImpl(codePoint);
5581     }
5582 
isDefinedImpl(int codePoint)5583     static native boolean isDefinedImpl(int codePoint);
5584 
5585     /**
5586      * Determines if the specified character is a letter.
5587      * <p>
5588      * A character is considered to be a letter if its general
5589      * category type, provided by {@code Character.getType(ch)},
5590      * is any of the following:
5591      * <ul>
5592      * <li> {@code UPPERCASE_LETTER}
5593      * <li> {@code LOWERCASE_LETTER}
5594      * <li> {@code TITLECASE_LETTER}
5595      * <li> {@code MODIFIER_LETTER}
5596      * <li> {@code OTHER_LETTER}
5597      * </ul>
5598      *
5599      * Not all letters have case. Many characters are
5600      * letters but are neither uppercase nor lowercase nor titlecase.
5601      *
5602      * <p><b>Note:</b> This method cannot handle <a
5603      * href="#supplementary"> supplementary characters</a>. To support
5604      * all Unicode characters, including supplementary characters, use
5605      * the {@link #isLetter(int)} method.
5606      *
5607      * @param   ch   the character to be tested.
5608      * @return  {@code true} if the character is a letter;
5609      *          {@code false} otherwise.
5610      * @see     Character#isDigit(char)
5611      * @see     Character#isJavaIdentifierStart(char)
5612      * @see     Character#isJavaLetter(char)
5613      * @see     Character#isJavaLetterOrDigit(char)
5614      * @see     Character#isLetterOrDigit(char)
5615      * @see     Character#isLowerCase(char)
5616      * @see     Character#isTitleCase(char)
5617      * @see     Character#isUnicodeIdentifierStart(char)
5618      * @see     Character#isUpperCase(char)
5619      */
isLetter(char ch)5620     public static boolean isLetter(char ch) {
5621         return isLetter((int)ch);
5622     }
5623 
5624     /**
5625      * Determines if the specified character (Unicode code point) is a letter.
5626      * <p>
5627      * A character is considered to be a letter if its general
5628      * category type, provided by {@link Character#getType(int) getType(codePoint)},
5629      * is any of the following:
5630      * <ul>
5631      * <li> {@code UPPERCASE_LETTER}
5632      * <li> {@code LOWERCASE_LETTER}
5633      * <li> {@code TITLECASE_LETTER}
5634      * <li> {@code MODIFIER_LETTER}
5635      * <li> {@code OTHER_LETTER}
5636      * </ul>
5637      *
5638      * Not all letters have case. Many characters are
5639      * letters but are neither uppercase nor lowercase nor titlecase.
5640      *
5641      * @param   codePoint the character (Unicode code point) to be tested.
5642      * @return  {@code true} if the character is a letter;
5643      *          {@code false} otherwise.
5644      * @see     Character#isDigit(int)
5645      * @see     Character#isJavaIdentifierStart(int)
5646      * @see     Character#isLetterOrDigit(int)
5647      * @see     Character#isLowerCase(int)
5648      * @see     Character#isTitleCase(int)
5649      * @see     Character#isUnicodeIdentifierStart(int)
5650      * @see     Character#isUpperCase(int)
5651      * @since   1.5
5652      */
isLetter(int codePoint)5653     public static boolean isLetter(int codePoint) {
5654         return isLetterImpl(codePoint);
5655     }
5656 
isLetterImpl(int codePoint)5657     static native boolean isLetterImpl(int codePoint);
5658 
5659     /**
5660      * Determines if the specified character is a letter or digit.
5661      * <p>
5662      * A character is considered to be a letter or digit if either
5663      * {@code Character.isLetter(char ch)} or
5664      * {@code Character.isDigit(char ch)} returns
5665      * {@code true} for the character.
5666      *
5667      * <p><b>Note:</b> This method cannot handle <a
5668      * href="#supplementary"> supplementary characters</a>. To support
5669      * all Unicode characters, including supplementary characters, use
5670      * the {@link #isLetterOrDigit(int)} method.
5671      *
5672      * @param   ch   the character to be tested.
5673      * @return  {@code true} if the character is a letter or digit;
5674      *          {@code false} otherwise.
5675      * @see     Character#isDigit(char)
5676      * @see     Character#isJavaIdentifierPart(char)
5677      * @see     Character#isJavaLetter(char)
5678      * @see     Character#isJavaLetterOrDigit(char)
5679      * @see     Character#isLetter(char)
5680      * @see     Character#isUnicodeIdentifierPart(char)
5681      * @since   1.0.2
5682      */
isLetterOrDigit(char ch)5683     public static boolean isLetterOrDigit(char ch) {
5684         return isLetterOrDigit((int)ch);
5685     }
5686 
5687     /**
5688      * Determines if the specified character (Unicode code point) is a letter or digit.
5689      * <p>
5690      * A character is considered to be a letter or digit if either
5691      * {@link #isLetter(int) isLetter(codePoint)} or
5692      * {@link #isDigit(int) isDigit(codePoint)} returns
5693      * {@code true} for the character.
5694      *
5695      * @param   codePoint the character (Unicode code point) to be tested.
5696      * @return  {@code true} if the character is a letter or digit;
5697      *          {@code false} otherwise.
5698      * @see     Character#isDigit(int)
5699      * @see     Character#isJavaIdentifierPart(int)
5700      * @see     Character#isLetter(int)
5701      * @see     Character#isUnicodeIdentifierPart(int)
5702      * @since   1.5
5703      */
isLetterOrDigit(int codePoint)5704     public static boolean isLetterOrDigit(int codePoint) {
5705         return isLetterOrDigitImpl(codePoint);
5706     }
5707 
isLetterOrDigitImpl(int codePoint)5708     static native boolean isLetterOrDigitImpl(int codePoint);
5709 
5710     /**
5711      * Determines if the specified character is permissible as the first
5712      * character in a Java identifier.
5713      * <p>
5714      * A character may start a Java identifier if and only if
5715      * one of the following is true:
5716      * <ul>
5717      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5718      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5719      * <li> {@code ch} is a currency symbol (such as {@code '$'})
5720      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5721      * </ul>
5722      *
5723      * @param   ch the character to be tested.
5724      * @return  {@code true} if the character may start a Java
5725      *          identifier; {@code false} otherwise.
5726      * @see     Character#isJavaLetterOrDigit(char)
5727      * @see     Character#isJavaIdentifierStart(char)
5728      * @see     Character#isJavaIdentifierPart(char)
5729      * @see     Character#isLetter(char)
5730      * @see     Character#isLetterOrDigit(char)
5731      * @see     Character#isUnicodeIdentifierStart(char)
5732      * @since   1.02
5733      * @deprecated Replaced by isJavaIdentifierStart(char).
5734      */
5735     @Deprecated
isJavaLetter(char ch)5736     public static boolean isJavaLetter(char ch) {
5737         return isJavaIdentifierStart(ch);
5738     }
5739 
5740     /**
5741      * Determines if the specified character may be part of a Java
5742      * identifier as other than the first character.
5743      * <p>
5744      * A character may be part of a Java identifier if and only if any
5745      * of the following are true:
5746      * <ul>
5747      * <li>  it is a letter
5748      * <li>  it is a currency symbol (such as {@code '$'})
5749      * <li>  it is a connecting punctuation character (such as {@code '_'})
5750      * <li>  it is a digit
5751      * <li>  it is a numeric letter (such as a Roman numeral character)
5752      * <li>  it is a combining mark
5753      * <li>  it is a non-spacing mark
5754      * <li> {@code isIdentifierIgnorable} returns
5755      * {@code true} for the character.
5756      * </ul>
5757      *
5758      * @param   ch the character to be tested.
5759      * @return  {@code true} if the character may be part of a
5760      *          Java identifier; {@code false} otherwise.
5761      * @see     Character#isJavaLetter(char)
5762      * @see     Character#isJavaIdentifierStart(char)
5763      * @see     Character#isJavaIdentifierPart(char)
5764      * @see     Character#isLetter(char)
5765      * @see     Character#isLetterOrDigit(char)
5766      * @see     Character#isUnicodeIdentifierPart(char)
5767      * @see     Character#isIdentifierIgnorable(char)
5768      * @since   1.02
5769      * @deprecated Replaced by isJavaIdentifierPart(char).
5770      */
5771     @Deprecated
isJavaLetterOrDigit(char ch)5772     public static boolean isJavaLetterOrDigit(char ch) {
5773         return isJavaIdentifierPart(ch);
5774     }
5775 
5776     /**
5777      * Determines if the specified character (Unicode code point) is an alphabet.
5778      * <p>
5779      * A character is considered to be alphabetic if its general category type,
5780      * provided by {@link Character#getType(int) getType(codePoint)}, is any of
5781      * the following:
5782      * <ul>
5783      * <li> <code>UPPERCASE_LETTER</code>
5784      * <li> <code>LOWERCASE_LETTER</code>
5785      * <li> <code>TITLECASE_LETTER</code>
5786      * <li> <code>MODIFIER_LETTER</code>
5787      * <li> <code>OTHER_LETTER</code>
5788      * <li> <code>LETTER_NUMBER</code>
5789      * </ul>
5790      * or it has contributory property Other_Alphabetic as defined by the
5791      * Unicode Standard.
5792      *
5793      * @param   codePoint the character (Unicode code point) to be tested.
5794      * @return  <code>true</code> if the character is a Unicode alphabet
5795      *          character, <code>false</code> otherwise.
5796      * @since   1.7
5797      */
isAlphabetic(int codePoint)5798     public static boolean isAlphabetic(int codePoint) {
5799         return isAlphabeticImpl(codePoint);
5800     }
5801 
isAlphabeticImpl(int codePoint)5802     static native boolean isAlphabeticImpl(int codePoint);
5803 
5804 
5805     /**
5806      * Determines if the specified character (Unicode code point) is a CJKV
5807      * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
5808      * the Unicode Standard.
5809      *
5810      * @param   codePoint the character (Unicode code point) to be tested.
5811      * @return  <code>true</code> if the character is a Unicode ideograph
5812      *          character, <code>false</code> otherwise.
5813      * @since   1.7
5814      */
isIdeographic(int codePoint)5815     public static boolean isIdeographic(int codePoint) {
5816         return isIdeographicImpl(codePoint);
5817     }
5818 
isIdeographicImpl(int codePoint)5819     static native boolean isIdeographicImpl(int codePoint);
5820 
5821     /**
5822      * Determines if the specified character is
5823      * permissible as the first character in a Java identifier.
5824      * <p>
5825      * A character may start a Java identifier if and only if
5826      * one of the following conditions is true:
5827      * <ul>
5828      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5829      * <li> {@link #getType(char) getType(ch)} returns {@code LETTER_NUMBER}
5830      * <li> {@code ch} is a currency symbol (such as {@code '$'})
5831      * <li> {@code ch} is a connecting punctuation character (such as {@code '_'}).
5832      * </ul>
5833      *
5834      * <p><b>Note:</b> This method cannot handle <a
5835      * href="#supplementary"> supplementary characters</a>. To support
5836      * all Unicode characters, including supplementary characters, use
5837      * the {@link #isJavaIdentifierStart(int)} method.
5838      *
5839      * @param   ch the character to be tested.
5840      * @return  {@code true} if the character may start a Java identifier;
5841      *          {@code false} otherwise.
5842      * @see     Character#isJavaIdentifierPart(char)
5843      * @see     Character#isLetter(char)
5844      * @see     Character#isUnicodeIdentifierStart(char)
5845      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5846      * @since   1.1
5847      */
isJavaIdentifierStart(char ch)5848     public static boolean isJavaIdentifierStart(char ch) {
5849         return isJavaIdentifierStart((int)ch);
5850     }
5851 
5852     /**
5853      * Determines if the character (Unicode code point) is
5854      * permissible as the first character in a Java identifier.
5855      * <p>
5856      * A character may start a Java identifier if and only if
5857      * one of the following conditions is true:
5858      * <ul>
5859      * <li> {@link #isLetter(int) isLetter(codePoint)}
5860      *      returns {@code true}
5861      * <li> {@link #getType(int) getType(codePoint)}
5862      *      returns {@code LETTER_NUMBER}
5863      * <li> the referenced character is a currency symbol (such as {@code '$'})
5864      * <li> the referenced character is a connecting punctuation character
5865      *      (such as {@code '_'}).
5866      * </ul>
5867      *
5868      * @param   codePoint the character (Unicode code point) to be tested.
5869      * @return  {@code true} if the character may start a Java identifier;
5870      *          {@code false} otherwise.
5871      * @see     Character#isJavaIdentifierPart(int)
5872      * @see     Character#isLetter(int)
5873      * @see     Character#isUnicodeIdentifierStart(int)
5874      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5875      * @since   1.5
5876      */
isJavaIdentifierStart(int codePoint)5877     public static boolean isJavaIdentifierStart(int codePoint) {
5878         // Use precomputed bitmasks to optimize the ASCII range.
5879         if (codePoint < 64) {
5880             return (codePoint == '$'); // There's only one character in this range.
5881         } else if (codePoint < 128) {
5882             return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
5883         }
5884         return ((1 << getType(codePoint))
5885                 & ((1 << UPPERCASE_LETTER)
5886                    | (1 << LOWERCASE_LETTER)
5887                    | (1  << TITLECASE_LETTER)
5888                    | (1  << MODIFIER_LETTER)
5889                    | (1  << OTHER_LETTER)
5890                    | (1  << CURRENCY_SYMBOL)
5891                    | (1  << CONNECTOR_PUNCTUATION)
5892                    | (1  << LETTER_NUMBER))) != 0;
5893     }
5894 
5895     /**
5896      * Determines if the specified character may be part of a Java
5897      * identifier as other than the first character.
5898      * <p>
5899      * A character may be part of a Java identifier if any of the following
5900      * are true:
5901      * <ul>
5902      * <li>  it is a letter
5903      * <li>  it is a currency symbol (such as {@code '$'})
5904      * <li>  it is a connecting punctuation character (such as {@code '_'})
5905      * <li>  it is a digit
5906      * <li>  it is a numeric letter (such as a Roman numeral character)
5907      * <li>  it is a combining mark
5908      * <li>  it is a non-spacing mark
5909      * <li> {@code isIdentifierIgnorable} returns
5910      * {@code true} for the character
5911      * </ul>
5912      *
5913      * <p><b>Note:</b> This method cannot handle <a
5914      * href="#supplementary"> supplementary characters</a>. To support
5915      * all Unicode characters, including supplementary characters, use
5916      * the {@link #isJavaIdentifierPart(int)} method.
5917      *
5918      * @param   ch      the character to be tested.
5919      * @return {@code true} if the character may be part of a
5920      *          Java identifier; {@code false} otherwise.
5921      * @see     Character#isIdentifierIgnorable(char)
5922      * @see     Character#isJavaIdentifierStart(char)
5923      * @see     Character#isLetterOrDigit(char)
5924      * @see     Character#isUnicodeIdentifierPart(char)
5925      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5926      * @since   1.1
5927      */
isJavaIdentifierPart(char ch)5928     public static boolean isJavaIdentifierPart(char ch) {
5929         return isJavaIdentifierPart((int)ch);
5930     }
5931 
5932     /**
5933      * Determines if the character (Unicode code point) may be part of a Java
5934      * identifier as other than the first character.
5935      * <p>
5936      * A character may be part of a Java identifier if any of the following
5937      * are true:
5938      * <ul>
5939      * <li>  it is a letter
5940      * <li>  it is a currency symbol (such as {@code '$'})
5941      * <li>  it is a connecting punctuation character (such as {@code '_'})
5942      * <li>  it is a digit
5943      * <li>  it is a numeric letter (such as a Roman numeral character)
5944      * <li>  it is a combining mark
5945      * <li>  it is a non-spacing mark
5946      * <li> {@link #isIdentifierIgnorable(int)
5947      * isIdentifierIgnorable(codePoint)} returns {@code true} for
5948      * the character
5949      * </ul>
5950      *
5951      * @param   codePoint the character (Unicode code point) to be tested.
5952      * @return {@code true} if the character may be part of a
5953      *          Java identifier; {@code false} otherwise.
5954      * @see     Character#isIdentifierIgnorable(int)
5955      * @see     Character#isJavaIdentifierStart(int)
5956      * @see     Character#isLetterOrDigit(int)
5957      * @see     Character#isUnicodeIdentifierPart(int)
5958      * @see     javax.lang.model.SourceVersion#isIdentifier(CharSequence)
5959      * @since   1.5
5960      */
isJavaIdentifierPart(int codePoint)5961     public static boolean isJavaIdentifierPart(int codePoint) {
5962         // Use precomputed bitmasks to optimize the ASCII range.
5963         if (codePoint < 64) {
5964             return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0;
5965         } else if (codePoint < 128) {
5966             return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0;
5967         }
5968         return ((1 << getType(codePoint))
5969                 & ((1 << UPPERCASE_LETTER)
5970                    | (1 << LOWERCASE_LETTER)
5971                    | (1 << TITLECASE_LETTER)
5972                    | (1 << MODIFIER_LETTER)
5973                    | (1 << OTHER_LETTER)
5974                    | (1 << CURRENCY_SYMBOL)
5975                    | (1 << CONNECTOR_PUNCTUATION)
5976                    | (1 << DECIMAL_DIGIT_NUMBER)
5977                    | (1 << LETTER_NUMBER)
5978                    | (1 << FORMAT)
5979                    | (1 << COMBINING_SPACING_MARK)
5980                    | (1 << NON_SPACING_MARK))) != 0
5981                 || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b)
5982                 || (codePoint >= 0x7f && codePoint <= 0x9f);
5983     }
5984 
5985     /**
5986      * Determines if the specified character is permissible as the
5987      * first character in a Unicode identifier.
5988      * <p>
5989      * A character may start a Unicode identifier if and only if
5990      * one of the following conditions is true:
5991      * <ul>
5992      * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
5993      * <li> {@link #getType(char) getType(ch)} returns
5994      *      {@code LETTER_NUMBER}.
5995      * </ul>
5996      *
5997      * <p><b>Note:</b> This method cannot handle <a
5998      * href="#supplementary"> supplementary characters</a>. To support
5999      * all Unicode characters, including supplementary characters, use
6000      * the {@link #isUnicodeIdentifierStart(int)} method.
6001      *
6002      * @param   ch      the character to be tested.
6003      * @return  {@code true} if the character may start a Unicode
6004      *          identifier; {@code false} otherwise.
6005      * @see     Character#isJavaIdentifierStart(char)
6006      * @see     Character#isLetter(char)
6007      * @see     Character#isUnicodeIdentifierPart(char)
6008      * @since   1.1
6009      */
isUnicodeIdentifierStart(char ch)6010     public static boolean isUnicodeIdentifierStart(char ch) {
6011         return isUnicodeIdentifierStart((int)ch);
6012     }
6013 
6014     /**
6015      * Determines if the specified character (Unicode code point) is permissible as the
6016      * first character in a Unicode identifier.
6017      * <p>
6018      * A character may start a Unicode identifier if and only if
6019      * one of the following conditions is true:
6020      * <ul>
6021      * <li> {@link #isLetter(int) isLetter(codePoint)}
6022      *      returns {@code true}
6023      * <li> {@link #getType(int) getType(codePoint)}
6024      *      returns {@code LETTER_NUMBER}.
6025      * </ul>
6026      * @param   codePoint the character (Unicode code point) to be tested.
6027      * @return  {@code true} if the character may start a Unicode
6028      *          identifier; {@code false} otherwise.
6029      * @see     Character#isJavaIdentifierStart(int)
6030      * @see     Character#isLetter(int)
6031      * @see     Character#isUnicodeIdentifierPart(int)
6032      * @since   1.5
6033      */
isUnicodeIdentifierStart(int codePoint)6034     public static boolean isUnicodeIdentifierStart(int codePoint) {
6035         return isUnicodeIdentifierStartImpl(codePoint);
6036     }
6037 
isUnicodeIdentifierStartImpl(int codePoint)6038     static native boolean isUnicodeIdentifierStartImpl(int codePoint);
6039 
6040     /**
6041      * Determines if the specified character may be part of a Unicode
6042      * identifier as other than the first character.
6043      * <p>
6044      * A character may be part of a Unicode identifier if and only if
6045      * one of the following statements is true:
6046      * <ul>
6047      * <li>  it is a letter
6048      * <li>  it is a connecting punctuation character (such as {@code '_'})
6049      * <li>  it is a digit
6050      * <li>  it is a numeric letter (such as a Roman numeral character)
6051      * <li>  it is a combining mark
6052      * <li>  it is a non-spacing mark
6053      * <li> {@code isIdentifierIgnorable} returns
6054      * {@code true} for this character.
6055      * </ul>
6056      *
6057      * <p><b>Note:</b> This method cannot handle <a
6058      * href="#supplementary"> supplementary characters</a>. To support
6059      * all Unicode characters, including supplementary characters, use
6060      * the {@link #isUnicodeIdentifierPart(int)} method.
6061      *
6062      * @param   ch      the character to be tested.
6063      * @return  {@code true} if the character may be part of a
6064      *          Unicode identifier; {@code false} otherwise.
6065      * @see     Character#isIdentifierIgnorable(char)
6066      * @see     Character#isJavaIdentifierPart(char)
6067      * @see     Character#isLetterOrDigit(char)
6068      * @see     Character#isUnicodeIdentifierStart(char)
6069      * @since   1.1
6070      */
isUnicodeIdentifierPart(char ch)6071     public static boolean isUnicodeIdentifierPart(char ch) {
6072         return isUnicodeIdentifierPart((int)ch);
6073     }
6074 
6075     /**
6076      * Determines if the specified character (Unicode code point) may be part of a Unicode
6077      * identifier as other than the first character.
6078      * <p>
6079      * A character may be part of a Unicode identifier if and only if
6080      * one of the following statements is true:
6081      * <ul>
6082      * <li>  it is a letter
6083      * <li>  it is a connecting punctuation character (such as {@code '_'})
6084      * <li>  it is a digit
6085      * <li>  it is a numeric letter (such as a Roman numeral character)
6086      * <li>  it is a combining mark
6087      * <li>  it is a non-spacing mark
6088      * <li> {@code isIdentifierIgnorable} returns
6089      * {@code true} for this character.
6090      * </ul>
6091      * @param   codePoint the character (Unicode code point) to be tested.
6092      * @return  {@code true} if the character may be part of a
6093      *          Unicode identifier; {@code false} otherwise.
6094      * @see     Character#isIdentifierIgnorable(int)
6095      * @see     Character#isJavaIdentifierPart(int)
6096      * @see     Character#isLetterOrDigit(int)
6097      * @see     Character#isUnicodeIdentifierStart(int)
6098      * @since   1.5
6099      */
isUnicodeIdentifierPart(int codePoint)6100     public static boolean isUnicodeIdentifierPart(int codePoint) {
6101         return isUnicodeIdentifierPartImpl(codePoint);
6102     }
6103 
isUnicodeIdentifierPartImpl(int codePoint)6104     static native boolean isUnicodeIdentifierPartImpl(int codePoint);
6105 
6106     /**
6107      * Determines if the specified character should be regarded as
6108      * an ignorable character in a Java identifier or a Unicode identifier.
6109      * <p>
6110      * The following Unicode characters are ignorable in a Java identifier
6111      * or a Unicode identifier:
6112      * <ul>
6113      * <li>ISO control characters that are not whitespace
6114      * <ul>
6115      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6116      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6117      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6118      * </ul>
6119      *
6120      * <li>all characters that have the {@code FORMAT} general
6121      * category value
6122      * </ul>
6123      *
6124      * <p><b>Note:</b> This method cannot handle <a
6125      * href="#supplementary"> supplementary characters</a>. To support
6126      * all Unicode characters, including supplementary characters, use
6127      * the {@link #isIdentifierIgnorable(int)} method.
6128      *
6129      * @param   ch      the character to be tested.
6130      * @return  {@code true} if the character is an ignorable control
6131      *          character that may be part of a Java or Unicode identifier;
6132      *           {@code false} otherwise.
6133      * @see     Character#isJavaIdentifierPart(char)
6134      * @see     Character#isUnicodeIdentifierPart(char)
6135      * @since   1.1
6136      */
isIdentifierIgnorable(char ch)6137     public static boolean isIdentifierIgnorable(char ch) {
6138         return isIdentifierIgnorable((int)ch);
6139     }
6140 
6141     /**
6142      * Determines if the specified character (Unicode code point) should be regarded as
6143      * an ignorable character in a Java identifier or a Unicode identifier.
6144      * <p>
6145      * The following Unicode characters are ignorable in a Java identifier
6146      * or a Unicode identifier:
6147      * <ul>
6148      * <li>ISO control characters that are not whitespace
6149      * <ul>
6150      * <li>{@code '\u005Cu0000'} through {@code '\u005Cu0008'}
6151      * <li>{@code '\u005Cu000E'} through {@code '\u005Cu001B'}
6152      * <li>{@code '\u005Cu007F'} through {@code '\u005Cu009F'}
6153      * </ul>
6154      *
6155      * <li>all characters that have the {@code FORMAT} general
6156      * category value
6157      * </ul>
6158      *
6159      * @param   codePoint the character (Unicode code point) to be tested.
6160      * @return  {@code true} if the character is an ignorable control
6161      *          character that may be part of a Java or Unicode identifier;
6162      *          {@code false} otherwise.
6163      * @see     Character#isJavaIdentifierPart(int)
6164      * @see     Character#isUnicodeIdentifierPart(int)
6165      * @since   1.5
6166      */
isIdentifierIgnorable(int codePoint)6167     public static boolean isIdentifierIgnorable(int codePoint) {
6168         return isIdentifierIgnorableImpl(codePoint);
6169     }
6170 
isIdentifierIgnorableImpl(int codePoint)6171     static native boolean isIdentifierIgnorableImpl(int codePoint);
6172 
6173     /**
6174      * Converts the character argument to lowercase using case
6175      * mapping information from the UnicodeData file.
6176      * <p>
6177      * Note that
6178      * {@code Character.isLowerCase(Character.toLowerCase(ch))}
6179      * does not always return {@code true} for some ranges of
6180      * characters, particularly those that are symbols or ideographs.
6181      *
6182      * <p>In general, {@link String#toLowerCase()} should be used to map
6183      * characters to lowercase. {@code String} case mapping methods
6184      * have several benefits over {@code Character} case mapping methods.
6185      * {@code String} case mapping methods can perform locale-sensitive
6186      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6187      * the {@code Character} case mapping methods cannot.
6188      *
6189      * <p><b>Note:</b> This method cannot handle <a
6190      * href="#supplementary"> supplementary characters</a>. To support
6191      * all Unicode characters, including supplementary characters, use
6192      * the {@link #toLowerCase(int)} method.
6193      *
6194      * @param   ch   the character to be converted.
6195      * @return  the lowercase equivalent of the character, if any;
6196      *          otherwise, the character itself.
6197      * @see     Character#isLowerCase(char)
6198      * @see     String#toLowerCase()
6199      */
toLowerCase(char ch)6200     public static char toLowerCase(char ch) {
6201         return (char)toLowerCase((int)ch);
6202     }
6203 
6204     /**
6205      * Converts the character (Unicode code point) argument to
6206      * lowercase using case mapping information from the UnicodeData
6207      * file.
6208      *
6209      * <p> Note that
6210      * {@code Character.isLowerCase(Character.toLowerCase(codePoint))}
6211      * does not always return {@code true} for some ranges of
6212      * characters, particularly those that are symbols or ideographs.
6213      *
6214      * <p>In general, {@link String#toLowerCase()} should be used to map
6215      * characters to lowercase. {@code String} case mapping methods
6216      * have several benefits over {@code Character} case mapping methods.
6217      * {@code String} case mapping methods can perform locale-sensitive
6218      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6219      * the {@code Character} case mapping methods cannot.
6220      *
6221      * @param   codePoint   the character (Unicode code point) to be converted.
6222      * @return  the lowercase equivalent of the character (Unicode code
6223      *          point), if any; otherwise, the character itself.
6224      * @see     Character#isLowerCase(int)
6225      * @see     String#toLowerCase()
6226      *
6227      * @since   1.5
6228      */
toLowerCase(int codePoint)6229     public static int toLowerCase(int codePoint) {
6230         return toLowerCaseImpl(codePoint);
6231     }
6232 
toLowerCaseImpl(int codePoint)6233     static native int toLowerCaseImpl(int codePoint);
6234 
6235     /**
6236      * Converts the character argument to uppercase using case mapping
6237      * information from the UnicodeData file.
6238      * <p>
6239      * Note that
6240      * {@code Character.isUpperCase(Character.toUpperCase(ch))}
6241      * does not always return {@code true} for some ranges of
6242      * characters, particularly those that are symbols or ideographs.
6243      *
6244      * <p>In general, {@link String#toUpperCase()} should be used to map
6245      * characters to uppercase. {@code String} case mapping methods
6246      * have several benefits over {@code Character} case mapping methods.
6247      * {@code String} case mapping methods can perform locale-sensitive
6248      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6249      * the {@code Character} case mapping methods cannot.
6250      *
6251      * <p><b>Note:</b> This method cannot handle <a
6252      * href="#supplementary"> supplementary characters</a>. To support
6253      * all Unicode characters, including supplementary characters, use
6254      * the {@link #toUpperCase(int)} method.
6255      *
6256      * @param   ch   the character to be converted.
6257      * @return  the uppercase equivalent of the character, if any;
6258      *          otherwise, the character itself.
6259      * @see     Character#isUpperCase(char)
6260      * @see     String#toUpperCase()
6261      */
toUpperCase(char ch)6262     public static char toUpperCase(char ch) {
6263         return (char)toUpperCase((int)ch);
6264     }
6265 
6266     /**
6267      * Converts the character (Unicode code point) argument to
6268      * uppercase using case mapping information from the UnicodeData
6269      * file.
6270      *
6271      * <p>Note that
6272      * {@code Character.isUpperCase(Character.toUpperCase(codePoint))}
6273      * does not always return {@code true} for some ranges of
6274      * characters, particularly those that are symbols or ideographs.
6275      *
6276      * <p>In general, {@link String#toUpperCase()} should be used to map
6277      * characters to uppercase. {@code String} case mapping methods
6278      * have several benefits over {@code Character} case mapping methods.
6279      * {@code String} case mapping methods can perform locale-sensitive
6280      * mappings, context-sensitive mappings, and 1:M character mappings, whereas
6281      * the {@code Character} case mapping methods cannot.
6282      *
6283      * @param   codePoint   the character (Unicode code point) to be converted.
6284      * @return  the uppercase equivalent of the character, if any;
6285      *          otherwise, the character itself.
6286      * @see     Character#isUpperCase(int)
6287      * @see     String#toUpperCase()
6288      *
6289      * @since   1.5
6290      */
toUpperCase(int codePoint)6291     public static int toUpperCase(int codePoint) {
6292         return toUpperCaseImpl(codePoint);
6293     }
6294 
toUpperCaseImpl(int codePoint)6295     static native int toUpperCaseImpl(int codePoint);
6296 
6297     /**
6298      * Converts the character argument to titlecase using case mapping
6299      * information from the UnicodeData file. If a character has no
6300      * explicit titlecase mapping and is not itself a titlecase char
6301      * according to UnicodeData, then the uppercase mapping is
6302      * returned as an equivalent titlecase mapping. If the
6303      * {@code char} argument is already a titlecase
6304      * {@code char}, the same {@code char} value will be
6305      * returned.
6306      * <p>
6307      * Note that
6308      * {@code Character.isTitleCase(Character.toTitleCase(ch))}
6309      * does not always return {@code true} for some ranges of
6310      * characters.
6311      *
6312      * <p><b>Note:</b> This method cannot handle <a
6313      * href="#supplementary"> supplementary characters</a>. To support
6314      * all Unicode characters, including supplementary characters, use
6315      * the {@link #toTitleCase(int)} method.
6316      *
6317      * @param   ch   the character to be converted.
6318      * @return  the titlecase equivalent of the character, if any;
6319      *          otherwise, the character itself.
6320      * @see     Character#isTitleCase(char)
6321      * @see     Character#toLowerCase(char)
6322      * @see     Character#toUpperCase(char)
6323      * @since   1.0.2
6324      */
toTitleCase(char ch)6325     public static char toTitleCase(char ch) {
6326         return (char)toTitleCase((int)ch);
6327     }
6328 
6329     /**
6330      * Converts the character (Unicode code point) argument to titlecase using case mapping
6331      * information from the UnicodeData file. If a character has no
6332      * explicit titlecase mapping and is not itself a titlecase char
6333      * according to UnicodeData, then the uppercase mapping is
6334      * returned as an equivalent titlecase mapping. If the
6335      * character argument is already a titlecase
6336      * character, the same character value will be
6337      * returned.
6338      *
6339      * <p>Note that
6340      * {@code Character.isTitleCase(Character.toTitleCase(codePoint))}
6341      * does not always return {@code true} for some ranges of
6342      * characters.
6343      *
6344      * @param   codePoint   the character (Unicode code point) to be converted.
6345      * @return  the titlecase equivalent of the character, if any;
6346      *          otherwise, the character itself.
6347      * @see     Character#isTitleCase(int)
6348      * @see     Character#toLowerCase(int)
6349      * @see     Character#toUpperCase(int)
6350      * @since   1.5
6351      */
toTitleCase(int codePoint)6352     public static int toTitleCase(int codePoint) {
6353         return toTitleCaseImpl(codePoint);
6354     }
6355 
toTitleCaseImpl(int codePoint)6356     static native int toTitleCaseImpl(int codePoint);
6357 
6358     /**
6359      * Returns the numeric value of the character {@code ch} in the
6360      * specified radix.
6361      * <p>
6362      * If the radix is not in the range {@code MIN_RADIX} &le;
6363      * {@code radix} &le; {@code MAX_RADIX} or if the
6364      * value of {@code ch} is not a valid digit in the specified
6365      * radix, {@code -1} is returned. A character is a valid digit
6366      * if at least one of the following is true:
6367      * <ul>
6368      * <li>The method {@code isDigit} is {@code true} of the character
6369      *     and the Unicode decimal digit value of the character (or its
6370      *     single-character decomposition) is less than the specified radix.
6371      *     In this case the decimal digit value is returned.
6372      * <li>The character is one of the uppercase Latin letters
6373      *     {@code 'A'} through {@code 'Z'} and its code is less than
6374      *     {@code radix + 'A' - 10}.
6375      *     In this case, {@code ch - 'A' + 10}
6376      *     is returned.
6377      * <li>The character is one of the lowercase Latin letters
6378      *     {@code 'a'} through {@code 'z'} and its code is less than
6379      *     {@code radix + 'a' - 10}.
6380      *     In this case, {@code ch - 'a' + 10}
6381      *     is returned.
6382      * <li>The character is one of the fullwidth uppercase Latin letters A
6383      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6384      *     and its code is less than
6385      *     {@code radix + '\u005CuFF21' - 10}.
6386      *     In this case, {@code ch - '\u005CuFF21' + 10}
6387      *     is returned.
6388      * <li>The character is one of the fullwidth lowercase Latin letters a
6389      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6390      *     and its code is less than
6391      *     {@code radix + '\u005CuFF41' - 10}.
6392      *     In this case, {@code ch - '\u005CuFF41' + 10}
6393      *     is returned.
6394      * </ul>
6395      *
6396      * <p><b>Note:</b> This method cannot handle <a
6397      * href="#supplementary"> supplementary characters</a>. To support
6398      * all Unicode characters, including supplementary characters, use
6399      * the {@link #digit(int, int)} method.
6400      *
6401      * @param   ch      the character to be converted.
6402      * @param   radix   the radix.
6403      * @return  the numeric value represented by the character in the
6404      *          specified radix.
6405      * @see     Character#forDigit(int, int)
6406      * @see     Character#isDigit(char)
6407      */
digit(char ch, int radix)6408     public static int digit(char ch, int radix) {
6409         return digit((int)ch, radix);
6410     }
6411 
6412     /**
6413      * Returns the numeric value of the specified character (Unicode
6414      * code point) in the specified radix.
6415      *
6416      * <p>If the radix is not in the range {@code MIN_RADIX} &le;
6417      * {@code radix} &le; {@code MAX_RADIX} or if the
6418      * character is not a valid digit in the specified
6419      * radix, {@code -1} is returned. A character is a valid digit
6420      * if at least one of the following is true:
6421      * <ul>
6422      * <li>The method {@link #isDigit(int) isDigit(codePoint)} is {@code true} of the character
6423      *     and the Unicode decimal digit value of the character (or its
6424      *     single-character decomposition) is less than the specified radix.
6425      *     In this case the decimal digit value is returned.
6426      * <li>The character is one of the uppercase Latin letters
6427      *     {@code 'A'} through {@code 'Z'} and its code is less than
6428      *     {@code radix + 'A' - 10}.
6429      *     In this case, {@code codePoint - 'A' + 10}
6430      *     is returned.
6431      * <li>The character is one of the lowercase Latin letters
6432      *     {@code 'a'} through {@code 'z'} and its code is less than
6433      *     {@code radix + 'a' - 10}.
6434      *     In this case, {@code codePoint - 'a' + 10}
6435      *     is returned.
6436      * <li>The character is one of the fullwidth uppercase Latin letters A
6437      *     ({@code '\u005CuFF21'}) through Z ({@code '\u005CuFF3A'})
6438      *     and its code is less than
6439      *     {@code radix + '\u005CuFF21' - 10}.
6440      *     In this case,
6441      *     {@code codePoint - '\u005CuFF21' + 10}
6442      *     is returned.
6443      * <li>The character is one of the fullwidth lowercase Latin letters a
6444      *     ({@code '\u005CuFF41'}) through z ({@code '\u005CuFF5A'})
6445      *     and its code is less than
6446      *     {@code radix + '\u005CuFF41'- 10}.
6447      *     In this case,
6448      *     {@code codePoint - '\u005CuFF41' + 10}
6449      *     is returned.
6450      * </ul>
6451      *
6452      * @param   codePoint the character (Unicode code point) to be converted.
6453      * @param   radix   the radix.
6454      * @return  the numeric value represented by the character in the
6455      *          specified radix.
6456      * @see     Character#forDigit(int, int)
6457      * @see     Character#isDigit(int)
6458      * @since   1.5
6459      */
digit(int codePoint, int radix)6460     public static int digit(int codePoint, int radix) {
6461         if (radix < MIN_RADIX || radix > MAX_RADIX) {
6462             return -1;
6463         }
6464         if (codePoint < 128) {
6465             // Optimized for ASCII
6466             int result = -1;
6467             if ('0' <= codePoint && codePoint <= '9') {
6468                 result = codePoint - '0';
6469             } else if ('a' <= codePoint && codePoint <= 'z') {
6470                 result = 10 + (codePoint - 'a');
6471             } else if ('A' <= codePoint && codePoint <= 'Z') {
6472                 result = 10 + (codePoint - 'A');
6473             }
6474             return result < radix ? result : -1;
6475         }
6476         return digitImpl(codePoint, radix);
6477     }
6478 
digitImpl(int codePoint, int radix)6479     native static int digitImpl(int codePoint, int radix);
6480 
6481     /**
6482      * Returns the {@code int} value that the specified Unicode
6483      * character represents. For example, the character
6484      * {@code '\u005Cu216C'} (the roman numeral fifty) will return
6485      * an int with a value of 50.
6486      * <p>
6487      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6488      * {@code '\u005Cu005A'}), lowercase
6489      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6490      * full width variant ({@code '\u005CuFF21'} through
6491      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6492      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6493      * through 35. This is independent of the Unicode specification,
6494      * which does not assign numeric values to these {@code char}
6495      * values.
6496      * <p>
6497      * If the character does not have a numeric value, then -1 is returned.
6498      * If the character has a numeric value that cannot be represented as a
6499      * nonnegative integer (for example, a fractional value), then -2
6500      * is returned.
6501      *
6502      * <p><b>Note:</b> This method cannot handle <a
6503      * href="#supplementary"> supplementary characters</a>. To support
6504      * all Unicode characters, including supplementary characters, use
6505      * the {@link #getNumericValue(int)} method.
6506      *
6507      * @param   ch      the character to be converted.
6508      * @return  the numeric value of the character, as a nonnegative {@code int}
6509      *           value; -2 if the character has a numeric value that is not a
6510      *          nonnegative integer; -1 if the character has no numeric value.
6511      * @see     Character#forDigit(int, int)
6512      * @see     Character#isDigit(char)
6513      * @since   1.1
6514      */
getNumericValue(char ch)6515     public static int getNumericValue(char ch) {
6516         return getNumericValue((int)ch);
6517     }
6518 
6519     /**
6520      * Returns the {@code int} value that the specified
6521      * character (Unicode code point) represents. For example, the character
6522      * {@code '\u005Cu216C'} (the Roman numeral fifty) will return
6523      * an {@code int} with a value of 50.
6524      * <p>
6525      * The letters A-Z in their uppercase ({@code '\u005Cu0041'} through
6526      * {@code '\u005Cu005A'}), lowercase
6527      * ({@code '\u005Cu0061'} through {@code '\u005Cu007A'}), and
6528      * full width variant ({@code '\u005CuFF21'} through
6529      * {@code '\u005CuFF3A'} and {@code '\u005CuFF41'} through
6530      * {@code '\u005CuFF5A'}) forms have numeric values from 10
6531      * through 35. This is independent of the Unicode specification,
6532      * which does not assign numeric values to these {@code char}
6533      * values.
6534      * <p>
6535      * If the character does not have a numeric value, then -1 is returned.
6536      * If the character has a numeric value that cannot be represented as a
6537      * nonnegative integer (for example, a fractional value), then -2
6538      * is returned.
6539      *
6540      * @param   codePoint the character (Unicode code point) to be converted.
6541      * @return  the numeric value of the character, as a nonnegative {@code int}
6542      *          value; -2 if the character has a numeric value that is not a
6543      *          nonnegative integer; -1 if the character has no numeric value.
6544      * @see     Character#forDigit(int, int)
6545      * @see     Character#isDigit(int)
6546      * @since   1.5
6547      */
getNumericValue(int codePoint)6548     public static int getNumericValue(int codePoint) {
6549         // This is both an optimization and papers over differences between Java and ICU.
6550         if (codePoint < 128) {
6551             if (codePoint >= '0' && codePoint <= '9') {
6552                 return codePoint - '0';
6553             }
6554             if (codePoint >= 'a' && codePoint <= 'z') {
6555                 return codePoint - ('a' - 10);
6556             }
6557             if (codePoint >= 'A' && codePoint <= 'Z') {
6558                 return codePoint - ('A' - 10);
6559             }
6560             return -1;
6561         }
6562         // Full-width uppercase A-Z.
6563         if (codePoint >= 0xff21 && codePoint <= 0xff3a) {
6564             return codePoint - 0xff17;
6565         }
6566         // Full-width lowercase a-z.
6567         if (codePoint >= 0xff41 && codePoint <= 0xff5a) {
6568             return codePoint - 0xff37;
6569         }
6570         return getNumericValueImpl(codePoint);
6571     }
6572 
getNumericValueImpl(int codePoint)6573     native static int getNumericValueImpl(int codePoint);
6574 
6575     /**
6576      * Determines if the specified character is ISO-LATIN-1 white space.
6577      * This method returns {@code true} for the following five
6578      * characters only:
6579      * <table>
6580      * <tr><td>{@code '\t'}</td>            <td>{@code U+0009}</td>
6581      *     <td>{@code HORIZONTAL TABULATION}</td></tr>
6582      * <tr><td>{@code '\n'}</td>            <td>{@code U+000A}</td>
6583      *     <td>{@code NEW LINE}</td></tr>
6584      * <tr><td>{@code '\f'}</td>            <td>{@code U+000C}</td>
6585      *     <td>{@code FORM FEED}</td></tr>
6586      * <tr><td>{@code '\r'}</td>            <td>{@code U+000D}</td>
6587      *     <td>{@code CARRIAGE RETURN}</td></tr>
6588      * <tr><td>{@code '&nbsp;'}</td>  <td>{@code U+0020}</td>
6589      *     <td>{@code SPACE}</td></tr>
6590      * </table>
6591      *
6592      * @param      ch   the character to be tested.
6593      * @return     {@code true} if the character is ISO-LATIN-1 white
6594      *             space; {@code false} otherwise.
6595      * @see        Character#isSpaceChar(char)
6596      * @see        Character#isWhitespace(char)
6597      * @deprecated Replaced by isWhitespace(char).
6598      */
6599     @Deprecated
isSpace(char ch)6600     public static boolean isSpace(char ch) {
6601         return (ch <= 0x0020) &&
6602             (((((1L << 0x0009) |
6603             (1L << 0x000A) |
6604             (1L << 0x000C) |
6605             (1L << 0x000D) |
6606             (1L << 0x0020)) >> ch) & 1L) != 0);
6607     }
6608 
6609 
6610     /**
6611      * Determines if the specified character is a Unicode space character.
6612      * A character is considered to be a space character if and only if
6613      * it is specified to be a space character by the Unicode Standard. This
6614      * method returns true if the character's general category type is any of
6615      * the following:
6616      * <ul>
6617      * <li> {@code SPACE_SEPARATOR}
6618      * <li> {@code LINE_SEPARATOR}
6619      * <li> {@code PARAGRAPH_SEPARATOR}
6620      * </ul>
6621      *
6622      * <p><b>Note:</b> This method cannot handle <a
6623      * href="#supplementary"> supplementary characters</a>. To support
6624      * all Unicode characters, including supplementary characters, use
6625      * the {@link #isSpaceChar(int)} method.
6626      *
6627      * @param   ch      the character to be tested.
6628      * @return  {@code true} if the character is a space character;
6629      *          {@code false} otherwise.
6630      * @see     Character#isWhitespace(char)
6631      * @since   1.1
6632      */
isSpaceChar(char ch)6633     public static boolean isSpaceChar(char ch) {
6634         return isSpaceChar((int)ch);
6635     }
6636 
6637     /**
6638      * Determines if the specified character (Unicode code point) is a
6639      * Unicode space character.  A character is considered to be a
6640      * space character if and only if it is specified to be a space
6641      * character by the Unicode Standard. This method returns true if
6642      * the character's general category type is any of the following:
6643      *
6644      * <ul>
6645      * <li> {@link #SPACE_SEPARATOR}
6646      * <li> {@link #LINE_SEPARATOR}
6647      * <li> {@link #PARAGRAPH_SEPARATOR}
6648      * </ul>
6649      *
6650      * @param   codePoint the character (Unicode code point) to be tested.
6651      * @return  {@code true} if the character is a space character;
6652      *          {@code false} otherwise.
6653      * @see     Character#isWhitespace(int)
6654      * @since   1.5
6655      */
isSpaceChar(int codePoint)6656     public static boolean isSpaceChar(int codePoint) {
6657         // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
6658         // SPACE or NO-BREAK SPACE?
6659         if (codePoint == 0x20 || codePoint == 0xa0) {
6660             return true;
6661         }
6662         if (codePoint < 0x1000) {
6663             return false;
6664         }
6665         // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
6666         if (codePoint == 0x1680 || codePoint == 0x180e) {
6667             return true;
6668         }
6669         if (codePoint < 0x2000) {
6670             return false;
6671         }
6672         if (codePoint <= 0xffff) {
6673             // Other whitespace from General Punctuation...
6674             return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f ||
6675                 codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
6676         }
6677         // Let icu4c worry about non-BMP code points.
6678         return isSpaceCharImpl(codePoint);
6679     }
6680 
isSpaceCharImpl(int codePoint)6681     static native boolean isSpaceCharImpl(int codePoint);
6682 
6683     /**
6684      * Determines if the specified character is white space according to Java.
6685      * A character is a Java whitespace character if and only if it satisfies
6686      * one of the following criteria:
6687      * <ul>
6688      * <li> It is a Unicode space character ({@code SPACE_SEPARATOR},
6689      *      {@code LINE_SEPARATOR}, or {@code PARAGRAPH_SEPARATOR})
6690      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6691      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6692      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6693      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6694      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6695      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6696      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6697      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6698      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6699      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6700      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6701      * </ul>
6702      *
6703      * <p><b>Note:</b> This method cannot handle <a
6704      * href="#supplementary"> supplementary characters</a>. To support
6705      * all Unicode characters, including supplementary characters, use
6706      * the {@link #isWhitespace(int)} method.
6707      *
6708      * @param   ch the character to be tested.
6709      * @return  {@code true} if the character is a Java whitespace
6710      *          character; {@code false} otherwise.
6711      * @see     Character#isSpaceChar(char)
6712      * @since   1.1
6713      */
isWhitespace(char ch)6714     public static boolean isWhitespace(char ch) {
6715         return isWhitespace((int)ch);
6716     }
6717 
6718     /**
6719      * Determines if the specified character (Unicode code point) is
6720      * white space according to Java.  A character is a Java
6721      * whitespace character if and only if it satisfies one of the
6722      * following criteria:
6723      * <ul>
6724      * <li> It is a Unicode space character ({@link #SPACE_SEPARATOR},
6725      *      {@link #LINE_SEPARATOR}, or {@link #PARAGRAPH_SEPARATOR})
6726      *      but is not also a non-breaking space ({@code '\u005Cu00A0'},
6727      *      {@code '\u005Cu2007'}, {@code '\u005Cu202F'}).
6728      * <li> It is {@code '\u005Ct'}, U+0009 HORIZONTAL TABULATION.
6729      * <li> It is {@code '\u005Cn'}, U+000A LINE FEED.
6730      * <li> It is {@code '\u005Cu000B'}, U+000B VERTICAL TABULATION.
6731      * <li> It is {@code '\u005Cf'}, U+000C FORM FEED.
6732      * <li> It is {@code '\u005Cr'}, U+000D CARRIAGE RETURN.
6733      * <li> It is {@code '\u005Cu001C'}, U+001C FILE SEPARATOR.
6734      * <li> It is {@code '\u005Cu001D'}, U+001D GROUP SEPARATOR.
6735      * <li> It is {@code '\u005Cu001E'}, U+001E RECORD SEPARATOR.
6736      * <li> It is {@code '\u005Cu001F'}, U+001F UNIT SEPARATOR.
6737      * </ul>
6738      * <p>
6739      *
6740      * @param   codePoint the character (Unicode code point) to be tested.
6741      * @return  {@code true} if the character is a Java whitespace
6742      *          character; {@code false} otherwise.
6743      * @see     Character#isSpaceChar(int)
6744      * @since   1.5
6745      */
isWhitespace(int codePoint)6746     public static boolean isWhitespace(int codePoint) {
6747         // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that.
6748         // Any ASCII whitespace character?
6749         if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) {
6750             return true;
6751         }
6752         if (codePoint < 0x1000) {
6753             return false;
6754         }
6755         // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR?
6756         if (codePoint == 0x1680 || codePoint == 0x180e) {
6757             return true;
6758         }
6759         if (codePoint < 0x2000) {
6760             return false;
6761         }
6762         // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE).
6763         if (codePoint == 0x2007 || codePoint == 0x202f) {
6764             return false;
6765         }
6766         if (codePoint <= 0xffff) {
6767             // Other whitespace from General Punctuation...
6768             return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f ||
6769                 codePoint == 0x3000; // ...or CJK Symbols and Punctuation?
6770         }
6771         // Let icu4c worry about non-BMP code points.
6772         return isWhitespaceImpl(codePoint);
6773     }
6774 
isWhitespaceImpl(int codePoint)6775     native static boolean isWhitespaceImpl(int codePoint);
6776 
6777     /**
6778      * Determines if the specified character is an ISO control
6779      * character.  A character is considered to be an ISO control
6780      * character if its code is in the range {@code '\u005Cu0000'}
6781      * through {@code '\u005Cu001F'} or in the range
6782      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6783      *
6784      * <p><b>Note:</b> This method cannot handle <a
6785      * href="#supplementary"> supplementary characters</a>. To support
6786      * all Unicode characters, including supplementary characters, use
6787      * the {@link #isISOControl(int)} method.
6788      *
6789      * @param   ch      the character to be tested.
6790      * @return  {@code true} if the character is an ISO control character;
6791      *          {@code false} otherwise.
6792      *
6793      * @see     Character#isSpaceChar(char)
6794      * @see     Character#isWhitespace(char)
6795      * @since   1.1
6796      */
isISOControl(char ch)6797     public static boolean isISOControl(char ch) {
6798         return isISOControl((int)ch);
6799     }
6800 
6801     /**
6802      * Determines if the referenced character (Unicode code point) is an ISO control
6803      * character.  A character is considered to be an ISO control
6804      * character if its code is in the range {@code '\u005Cu0000'}
6805      * through {@code '\u005Cu001F'} or in the range
6806      * {@code '\u005Cu007F'} through {@code '\u005Cu009F'}.
6807      *
6808      * @param   codePoint the character (Unicode code point) to be tested.
6809      * @return  {@code true} if the character is an ISO control character;
6810      *          {@code false} otherwise.
6811      * @see     Character#isSpaceChar(int)
6812      * @see     Character#isWhitespace(int)
6813      * @since   1.5
6814      */
isISOControl(int codePoint)6815     public static boolean isISOControl(int codePoint) {
6816         // Optimized form of:
6817         //     (codePoint >= 0x00 && codePoint <= 0x1F) ||
6818         //     (codePoint >= 0x7F && codePoint <= 0x9F);
6819         return codePoint <= 0x9F &&
6820             (codePoint >= 0x7F || (codePoint >>> 5 == 0));
6821     }
6822 
6823     /**
6824      * Returns a value indicating a character's general category.
6825      *
6826      * <p><b>Note:</b> This method cannot handle <a
6827      * href="#supplementary"> supplementary characters</a>. To support
6828      * all Unicode characters, including supplementary characters, use
6829      * the {@link #getType(int)} method.
6830      *
6831      * @param   ch      the character to be tested.
6832      * @return  a value of type {@code int} representing the
6833      *          character's general category.
6834      * @see     Character#COMBINING_SPACING_MARK
6835      * @see     Character#CONNECTOR_PUNCTUATION
6836      * @see     Character#CONTROL
6837      * @see     Character#CURRENCY_SYMBOL
6838      * @see     Character#DASH_PUNCTUATION
6839      * @see     Character#DECIMAL_DIGIT_NUMBER
6840      * @see     Character#ENCLOSING_MARK
6841      * @see     Character#END_PUNCTUATION
6842      * @see     Character#FINAL_QUOTE_PUNCTUATION
6843      * @see     Character#FORMAT
6844      * @see     Character#INITIAL_QUOTE_PUNCTUATION
6845      * @see     Character#LETTER_NUMBER
6846      * @see     Character#LINE_SEPARATOR
6847      * @see     Character#LOWERCASE_LETTER
6848      * @see     Character#MATH_SYMBOL
6849      * @see     Character#MODIFIER_LETTER
6850      * @see     Character#MODIFIER_SYMBOL
6851      * @see     Character#NON_SPACING_MARK
6852      * @see     Character#OTHER_LETTER
6853      * @see     Character#OTHER_NUMBER
6854      * @see     Character#OTHER_PUNCTUATION
6855      * @see     Character#OTHER_SYMBOL
6856      * @see     Character#PARAGRAPH_SEPARATOR
6857      * @see     Character#PRIVATE_USE
6858      * @see     Character#SPACE_SEPARATOR
6859      * @see     Character#START_PUNCTUATION
6860      * @see     Character#SURROGATE
6861      * @see     Character#TITLECASE_LETTER
6862      * @see     Character#UNASSIGNED
6863      * @see     Character#UPPERCASE_LETTER
6864      * @since   1.1
6865      */
getType(char ch)6866     public static int getType(char ch) {
6867         return getType((int)ch);
6868     }
6869 
6870     /**
6871      * Returns a value indicating a character's general category.
6872      *
6873      * @param   codePoint the character (Unicode code point) to be tested.
6874      * @return  a value of type {@code int} representing the
6875      *          character's general category.
6876      * @see     Character#COMBINING_SPACING_MARK COMBINING_SPACING_MARK
6877      * @see     Character#CONNECTOR_PUNCTUATION CONNECTOR_PUNCTUATION
6878      * @see     Character#CONTROL CONTROL
6879      * @see     Character#CURRENCY_SYMBOL CURRENCY_SYMBOL
6880      * @see     Character#DASH_PUNCTUATION DASH_PUNCTUATION
6881      * @see     Character#DECIMAL_DIGIT_NUMBER DECIMAL_DIGIT_NUMBER
6882      * @see     Character#ENCLOSING_MARK ENCLOSING_MARK
6883      * @see     Character#END_PUNCTUATION END_PUNCTUATION
6884      * @see     Character#FINAL_QUOTE_PUNCTUATION FINAL_QUOTE_PUNCTUATION
6885      * @see     Character#FORMAT FORMAT
6886      * @see     Character#INITIAL_QUOTE_PUNCTUATION INITIAL_QUOTE_PUNCTUATION
6887      * @see     Character#LETTER_NUMBER LETTER_NUMBER
6888      * @see     Character#LINE_SEPARATOR LINE_SEPARATOR
6889      * @see     Character#LOWERCASE_LETTER LOWERCASE_LETTER
6890      * @see     Character#MATH_SYMBOL MATH_SYMBOL
6891      * @see     Character#MODIFIER_LETTER MODIFIER_LETTER
6892      * @see     Character#MODIFIER_SYMBOL MODIFIER_SYMBOL
6893      * @see     Character#NON_SPACING_MARK NON_SPACING_MARK
6894      * @see     Character#OTHER_LETTER OTHER_LETTER
6895      * @see     Character#OTHER_NUMBER OTHER_NUMBER
6896      * @see     Character#OTHER_PUNCTUATION OTHER_PUNCTUATION
6897      * @see     Character#OTHER_SYMBOL OTHER_SYMBOL
6898      * @see     Character#PARAGRAPH_SEPARATOR PARAGRAPH_SEPARATOR
6899      * @see     Character#PRIVATE_USE PRIVATE_USE
6900      * @see     Character#SPACE_SEPARATOR SPACE_SEPARATOR
6901      * @see     Character#START_PUNCTUATION START_PUNCTUATION
6902      * @see     Character#SURROGATE SURROGATE
6903      * @see     Character#TITLECASE_LETTER TITLECASE_LETTER
6904      * @see     Character#UNASSIGNED UNASSIGNED
6905      * @see     Character#UPPERCASE_LETTER UPPERCASE_LETTER
6906      * @since   1.5
6907      */
getType(int codePoint)6908     public static int getType(int codePoint) {
6909         int type = getTypeImpl(codePoint);
6910         // The type values returned by ICU are not RI-compatible. The RI skips the value 17.
6911         if (type <= Character.FORMAT) {
6912             return type;
6913         }
6914         return (type + 1);
6915     }
6916 
getTypeImpl(int codePoint)6917     static native int getTypeImpl(int codePoint);
6918 
6919     /**
6920      * Determines the character representation for a specific digit in
6921      * the specified radix. If the value of {@code radix} is not a
6922      * valid radix, or the value of {@code digit} is not a valid
6923      * digit in the specified radix, the null character
6924      * ({@code '\u005Cu0000'}) is returned.
6925      * <p>
6926      * The {@code radix} argument is valid if it is greater than or
6927      * equal to {@code MIN_RADIX} and less than or equal to
6928      * {@code MAX_RADIX}. The {@code digit} argument is valid if
6929      * {@code 0 <= digit < radix}.
6930      * <p>
6931      * If the digit is less than 10, then
6932      * {@code '0' + digit} is returned. Otherwise, the value
6933      * {@code 'a' + digit - 10} is returned.
6934      *
6935      * @param   digit   the number to convert to a character.
6936      * @param   radix   the radix.
6937      * @return  the {@code char} representation of the specified digit
6938      *          in the specified radix.
6939      * @see     Character#MIN_RADIX
6940      * @see     Character#MAX_RADIX
6941      * @see     Character#digit(char, int)
6942      */
forDigit(int digit, int radix)6943     public static char forDigit(int digit, int radix) {
6944         if ((digit >= radix) || (digit < 0)) {
6945             return '\0';
6946         }
6947         if ((radix < Character.MIN_RADIX) || (radix > Character.MAX_RADIX)) {
6948             return '\0';
6949         }
6950         if (digit < 10) {
6951             return (char)('0' + digit);
6952         }
6953         return (char)('a' - 10 + digit);
6954     }
6955 
6956     /**
6957      * Returns the Unicode directionality property for the given
6958      * character.  Character directionality is used to calculate the
6959      * visual ordering of text. The directionality value of undefined
6960      * {@code char} values is {@code DIRECTIONALITY_UNDEFINED}.
6961      *
6962      * <p><b>Note:</b> This method cannot handle <a
6963      * href="#supplementary"> supplementary characters</a>. To support
6964      * all Unicode characters, including supplementary characters, use
6965      * the {@link #getDirectionality(int)} method.
6966      *
6967      * @param  ch {@code char} for which the directionality property
6968      *            is requested.
6969      * @return the directionality property of the {@code char} value.
6970      *
6971      * @see Character#DIRECTIONALITY_UNDEFINED
6972      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT
6973      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT
6974      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
6975      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER
6976      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
6977      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
6978      * @see Character#DIRECTIONALITY_ARABIC_NUMBER
6979      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
6980      * @see Character#DIRECTIONALITY_NONSPACING_MARK
6981      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL
6982      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR
6983      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR
6984      * @see Character#DIRECTIONALITY_WHITESPACE
6985      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS
6986      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
6987      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
6988      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
6989      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
6990      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
6991      * @since 1.4
6992      */
getDirectionality(char ch)6993     public static byte getDirectionality(char ch) {
6994         return getDirectionality((int)ch);
6995     }
6996 
6997     /**
6998      * Returns the Unicode directionality property for the given
6999      * character (Unicode code point).  Character directionality is
7000      * used to calculate the visual ordering of text. The
7001      * directionality value of undefined character is {@link
7002      * #DIRECTIONALITY_UNDEFINED}.
7003      *
7004      * @param   codePoint the character (Unicode code point) for which
7005      *          the directionality property is requested.
7006      * @return the directionality property of the character.
7007      *
7008      * @see Character#DIRECTIONALITY_UNDEFINED DIRECTIONALITY_UNDEFINED
7009      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT DIRECTIONALITY_LEFT_TO_RIGHT
7010      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT DIRECTIONALITY_RIGHT_TO_LEFT
7011      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC
7012      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER DIRECTIONALITY_EUROPEAN_NUMBER
7013      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
7014      * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
7015      * @see Character#DIRECTIONALITY_ARABIC_NUMBER DIRECTIONALITY_ARABIC_NUMBER
7016      * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
7017      * @see Character#DIRECTIONALITY_NONSPACING_MARK DIRECTIONALITY_NONSPACING_MARK
7018      * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL DIRECTIONALITY_BOUNDARY_NEUTRAL
7019      * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR DIRECTIONALITY_PARAGRAPH_SEPARATOR
7020      * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR DIRECTIONALITY_SEGMENT_SEPARATOR
7021      * @see Character#DIRECTIONALITY_WHITESPACE DIRECTIONALITY_WHITESPACE
7022      * @see Character#DIRECTIONALITY_OTHER_NEUTRALS DIRECTIONALITY_OTHER_NEUTRALS
7023      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING
7024      * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE
7025      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING
7026      * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE
7027      * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT DIRECTIONALITY_POP_DIRECTIONAL_FORMAT
7028      * @since    1.5
7029      */
getDirectionality(int codePoint)7030     public static byte getDirectionality(int codePoint) {
7031         if (getType(codePoint) == Character.UNASSIGNED) {
7032             return Character.DIRECTIONALITY_UNDEFINED;
7033         }
7034 
7035         byte directionality = getDirectionalityImpl(codePoint);
7036         if (directionality >= 0 && directionality < DIRECTIONALITY.length) {
7037             return DIRECTIONALITY[directionality];
7038         }
7039         return Character.DIRECTIONALITY_UNDEFINED;
7040     }
7041 
getDirectionalityImpl(int codePoint)7042     native static byte getDirectionalityImpl(int codePoint);
7043     /**
7044      * Determines whether the character is mirrored according to the
7045      * Unicode specification.  Mirrored characters should have their
7046      * glyphs horizontally mirrored when displayed in text that is
7047      * right-to-left.  For example, {@code '\u005Cu0028'} LEFT
7048      * PARENTHESIS is semantically defined to be an <i>opening
7049      * parenthesis</i>.  This will appear as a "(" in text that is
7050      * left-to-right but as a ")" in text that is right-to-left.
7051      *
7052      * <p><b>Note:</b> This method cannot handle <a
7053      * href="#supplementary"> supplementary characters</a>. To support
7054      * all Unicode characters, including supplementary characters, use
7055      * the {@link #isMirrored(int)} method.
7056      *
7057      * @param  ch {@code char} for which the mirrored property is requested
7058      * @return {@code true} if the char is mirrored, {@code false}
7059      *         if the {@code char} is not mirrored or is not defined.
7060      * @since 1.4
7061      */
isMirrored(char ch)7062     public static boolean isMirrored(char ch) {
7063         return isMirrored((int)ch);
7064     }
7065 
7066     /**
7067      * Determines whether the specified character (Unicode code point)
7068      * is mirrored according to the Unicode specification.  Mirrored
7069      * characters should have their glyphs horizontally mirrored when
7070      * displayed in text that is right-to-left.  For example,
7071      * {@code '\u005Cu0028'} LEFT PARENTHESIS is semantically
7072      * defined to be an <i>opening parenthesis</i>.  This will appear
7073      * as a "(" in text that is left-to-right but as a ")" in text
7074      * that is right-to-left.
7075      *
7076      * @param   codePoint the character (Unicode code point) to be tested.
7077      * @return  {@code true} if the character is mirrored, {@code false}
7078      *          if the character is not mirrored or is not defined.
7079      * @since   1.5
7080      */
isMirrored(int codePoint)7081     public static boolean isMirrored(int codePoint) {
7082         return isMirroredImpl(codePoint);
7083     }
7084 
isMirroredImpl(int codePoint)7085     native static boolean isMirroredImpl(int codePoint);
7086     /**
7087      * Compares two {@code Character} objects numerically.
7088      *
7089      * @param   anotherCharacter   the {@code Character} to be compared.
7090 
7091      * @return  the value {@code 0} if the argument {@code Character}
7092      *          is equal to this {@code Character}; a value less than
7093      *          {@code 0} if this {@code Character} is numerically less
7094      *          than the {@code Character} argument; and a value greater than
7095      *          {@code 0} if this {@code Character} is numerically greater
7096      *          than the {@code Character} argument (unsigned comparison).
7097      *          Note that this is strictly a numerical comparison; it is not
7098      *          locale-dependent.
7099      * @since   1.2
7100      */
compareTo(Character anotherCharacter)7101     public int compareTo(Character anotherCharacter) {
7102         return compare(this.value, anotherCharacter.value);
7103     }
7104 
7105     /**
7106      * Compares two {@code char} values numerically.
7107      * The value returned is identical to what would be returned by:
7108      * <pre>
7109      *    Character.valueOf(x).compareTo(Character.valueOf(y))
7110      * </pre>
7111      *
7112      * @param  x the first {@code char} to compare
7113      * @param  y the second {@code char} to compare
7114      * @return the value {@code 0} if {@code x == y};
7115      *         a value less than {@code 0} if {@code x < y}; and
7116      *         a value greater than {@code 0} if {@code x > y}
7117      * @since 1.7
7118      */
compare(char x, char y)7119     public static int compare(char x, char y) {
7120         return x - y;
7121     }
7122 
7123     /**
7124      * The number of bits used to represent a <tt>char</tt> value in unsigned
7125      * binary form, constant {@code 16}.
7126      *
7127      * @since 1.5
7128      */
7129     public static final int SIZE = 16;
7130 
7131     /**
7132      * The number of bytes used to represent a {@code char} value in unsigned
7133      * binary form.
7134      *
7135      * @since 1.8
7136      */
7137     public static final int BYTES = SIZE / Byte.SIZE;
7138 
7139     /**
7140      * Returns the value obtained by reversing the order of the bytes in the
7141      * specified <tt>char</tt> value.
7142      *
7143      * @return the value obtained by reversing (or, equivalently, swapping)
7144      *     the bytes in the specified <tt>char</tt> value.
7145      * @since 1.5
7146      */
reverseBytes(char ch)7147     public static char reverseBytes(char ch) {
7148         return (char) (((ch & 0xFF00) >> 8) | (ch << 8));
7149     }
7150 
7151     /**
7152      * Returns the Unicode name of the specified character
7153      * {@code codePoint}, or null if the code point is
7154      * {@link #UNASSIGNED unassigned}.
7155      * <p>
7156      * Note: if the specified character is not assigned a name by
7157      * the <i>UnicodeData</i> file (part of the Unicode Character
7158      * Database maintained by the Unicode Consortium), the returned
7159      * name is the same as the result of expression.
7160      *
7161      * <blockquote>{@code
7162      *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
7163      *     + " "
7164      *     + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7165      *
7166      * }</blockquote>
7167      *
7168      * @param  codePoint the character (Unicode code point)
7169      *
7170      * @return the Unicode name of the specified character, or null if
7171      *         the code point is unassigned.
7172      *
7173      * @exception IllegalArgumentException if the specified
7174      *            {@code codePoint} is not a valid Unicode
7175      *            code point.
7176      *
7177      * @since 1.7
7178      */
getName(int codePoint)7179     public static String getName(int codePoint) {
7180         if (!isValidCodePoint(codePoint)) {
7181             throw new IllegalArgumentException();
7182         }
7183         String name = getNameImpl(codePoint);
7184         if (name != null)
7185             return name;
7186         if (getType(codePoint) == UNASSIGNED)
7187             return null;
7188         UnicodeBlock block = UnicodeBlock.of(codePoint);
7189         if (block != null)
7190             return block.toString().replace('_', ' ') + " "
7191                    + Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7192         // should never come here
7193         return Integer.toHexString(codePoint).toUpperCase(Locale.ENGLISH);
7194     }
7195 
getNameImpl(int codePoint)7196     private static native String getNameImpl(int codePoint);
7197 }
7198