• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html
4 /**
5  *******************************************************************************
6  * Copyright (C) 1996-2016, International Business Machines Corporation and
7  * others. All Rights Reserved.
8  *******************************************************************************
9  */
10 
11 package android.icu.lang;
12 
13 import java.lang.ref.SoftReference;
14 import java.util.EnumSet;
15 import java.util.HashMap;
16 import java.util.Iterator;
17 import java.util.Locale;
18 import java.util.Map;
19 
20 import android.icu.impl.CaseMapImpl;
21 import android.icu.impl.EmojiProps;
22 import android.icu.impl.IllegalIcuArgumentException;
23 import android.icu.impl.Trie2;
24 import android.icu.impl.UBiDiProps;
25 import android.icu.impl.UCaseProps;
26 import android.icu.impl.UCharacterName;
27 import android.icu.impl.UCharacterNameChoice;
28 import android.icu.impl.UCharacterProperty;
29 import android.icu.impl.UCharacterUtility;
30 import android.icu.impl.UPropertyAliases;
31 import android.icu.lang.UCharacterEnums.ECharacterCategory;
32 import android.icu.lang.UCharacterEnums.ECharacterDirection;
33 import android.icu.text.BreakIterator;
34 import android.icu.text.Normalizer2;
35 import android.icu.util.RangeValueIterator;
36 import android.icu.util.ULocale;
37 import android.icu.util.ValueIterator;
38 import android.icu.util.VersionInfo;
39 
40 /**
41  * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character}.&nbsp;Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'.
42  *
43  * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class.
44  * These extensions provide support for more Unicode properties.
45  * Each ICU release supports the latest version of Unicode available at that time.
46  *
47  * <p>For some time before Java 5 added support for supplementary Unicode code points,
48  * The ICU UCharacter class and many other ICU classes already supported them.
49  * Some UCharacter methods and constants were widened slightly differently than
50  * how the Character class methods and constants were widened later.
51  * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF,
52  * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF.
53  *
54  * <p>Code points are represented in these API using ints. While it would be
55  * more convenient in Java to have a separate primitive datatype for them,
56  * ints suffice in the meantime.
57  *
58  * <p>Aside from the additions for UTF-16 support, and the updated Unicode
59  * properties, the main differences between UCharacter and Character are:
60  * <ul>
61  * <li> UCharacter is not designed to be a char wrapper and does not have
62  *      APIs to which involves management of that single char.<br>
63  *      These include:
64  *      <ul>
65  *        <li> char charValue(),
66  *        <li> int compareTo(java.lang.Character, java.lang.Character), etc.
67  *      </ul>
68  * <li> UCharacter does not include Character APIs that are deprecated, nor
69  *      does it include the Java-specific character information, such as
70  *      boolean isJavaIdentifierPart(char ch).
71  * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric
72  *      values '10' - '35'. UCharacter also does this in digit and
73  *      getNumericValue, to adhere to the java semantics of these
74  *      methods.  New methods unicodeDigit, and
75  *      getUnicodeNumericValue do not treat the above code points
76  *      as having numeric values.  This is a semantic change from ICU4J 1.3.1.
77  * </ul>
78  * <p>
79  * In addition to Java compatibility functions, which calculate derived properties,
80  * this API provides low-level access to the Unicode Character Database.
81  * <p>
82  * Unicode assigns each code point (not just assigned character) values for
83  * many properties.
84  * Most of them are simple boolean flags, or constants from a small enumerated list.
85  * For some properties, values are strings or other relatively more complex types.
86  * <p>
87  * For more information see
88  * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a>
89  * (http://www.unicode.org/ucd/)
90  * and the <a href="https://unicode-org.github.io/icu/userguide/strings/properties">ICU
91  * User Guide chapter on Properties</a>
92  * (https://unicode-org.github.io/icu/userguide/strings/properties).
93  * <p>
94  * There are also functions that provide easy migration from C/POSIX functions
95  * like isblank(). Their use is generally discouraged because the C/POSIX
96  * standards do not define their semantics beyond the ASCII range, which means
97  * that different implementations exhibit very different behavior.
98  * Instead, Unicode properties should be used directly.
99  * <p>
100  * There are also only a few, broad C/POSIX character classes, and they tend
101  * to be used for conflicting purposes. For example, the "isalpha()" class
102  * is sometimes used to determine word boundaries, while a more sophisticated
103  * approach would at least distinguish initial letters from continuation
104  * characters (the latter including combining marks).
105  * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
106  * Another example: There is no "istitle()" class for titlecase characters.
107  * <p>
108  * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
109  * ICU implements them according to the Standard Recommendations in
110  * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
111  * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
112  * <p>
113  * API access for C/POSIX character classes is as follows:
114  * <pre>{@code
115  * - alpha:     isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC)
116  * - lower:     isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE)
117  * - upper:     isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE)
118  * - punct:     ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)|
119  *               (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)|
120  *               (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0
121  * - digit:     isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER
122  * - xdigit:    hasBinaryProperty(c, UProperty.POSIX_XDIGIT)
123  * - alnum:     hasBinaryProperty(c, UProperty.POSIX_ALNUM)
124  * - space:     isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE)
125  * - blank:     hasBinaryProperty(c, UProperty.POSIX_BLANK)
126  * - cntrl:     getType(c)==CONTROL
127  * - graph:     hasBinaryProperty(c, UProperty.POSIX_GRAPH)
128  * - print:     hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre>
129  * <p>
130  * The C/POSIX character classes are also available in UnicodeSet patterns,
131  * using patterns like [:graph:] or \p{graph}.
132  *
133  * <p><strong>[icu] Note:</strong> There are several ICU (and Java) whitespace functions.
134  * Comparison:<ul>
135  * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
136  *       most of general categories "Z" (separators) + most whitespace ISO controls
137  *       (including no-break spaces, but excluding IS1..IS4)
138  * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
139  * <li> isSpaceChar: just Z (including no-break spaces)</ul>
140  *
141  * <p>
142  * This class is not subclassable.
143  *
144  * @author Syn Wee Quek
145  * @see android.icu.lang.UCharacterEnums
146  */
147 
148 public final class UCharacter implements ECharacterCategory, ECharacterDirection
149 {
150     /**
151      * Lead surrogate bitmask
152      */
153     private static final int LEAD_SURROGATE_BITMASK = 0xFFFFFC00;
154 
155     /**
156      * Trail surrogate bitmask
157      */
158     private static final int TRAIL_SURROGATE_BITMASK = 0xFFFFFC00;
159 
160     /**
161      * Lead surrogate bits
162      */
163     private static final int LEAD_SURROGATE_BITS = 0xD800;
164 
165     /**
166      * Trail surrogate bits
167      */
168     private static final int TRAIL_SURROGATE_BITS = 0xDC00;
169 
170     private static final int U16_SURROGATE_OFFSET = ((0xd800 << 10) + 0xdc00 - 0x10000);
171 
172     // public inner classes ----------------------------------------------
173 
174     /**
175      * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character.UnicodeBlock}.&nbsp;Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'.
176      *
177      * A family of character subsets representing the character blocks in the
178      * Unicode specification, generated from Unicode Data file Blocks.txt.
179      * Character blocks generally define characters used for a specific script
180      * or purpose. A character is contained by at most one Unicode block.
181      *
182      * <strong>[icu] Note:</strong> All fields named XXX_ID are specific to ICU.
183      */
184     public static final class UnicodeBlock extends Character.Subset
185     {
186         // block id corresponding to icu4c -----------------------------------
187 
188         /**
189          */
190         public static final int INVALID_CODE_ID = -1;
191         /**
192          */
193         public static final int BASIC_LATIN_ID = 1;
194         /**
195          */
196         public static final int LATIN_1_SUPPLEMENT_ID = 2;
197         /**
198          */
199         public static final int LATIN_EXTENDED_A_ID = 3;
200         /**
201          */
202         public static final int LATIN_EXTENDED_B_ID = 4;
203         /**
204          */
205         public static final int IPA_EXTENSIONS_ID = 5;
206         /**
207          */
208         public static final int SPACING_MODIFIER_LETTERS_ID = 6;
209         /**
210          */
211         public static final int COMBINING_DIACRITICAL_MARKS_ID = 7;
212         /**
213          * Unicode 3.2 renames this block to "Greek and Coptic".
214          */
215         public static final int GREEK_ID = 8;
216         /**
217          */
218         public static final int CYRILLIC_ID = 9;
219         /**
220          */
221         public static final int ARMENIAN_ID = 10;
222         /**
223          */
224         public static final int HEBREW_ID = 11;
225         /**
226          */
227         public static final int ARABIC_ID = 12;
228         /**
229          */
230         public static final int SYRIAC_ID = 13;
231         /**
232          */
233         public static final int THAANA_ID = 14;
234         /**
235          */
236         public static final int DEVANAGARI_ID = 15;
237         /**
238          */
239         public static final int BENGALI_ID = 16;
240         /**
241          */
242         public static final int GURMUKHI_ID = 17;
243         /**
244          */
245         public static final int GUJARATI_ID = 18;
246         /**
247          */
248         public static final int ORIYA_ID = 19;
249         /**
250          */
251         public static final int TAMIL_ID = 20;
252         /**
253          */
254         public static final int TELUGU_ID = 21;
255         /**
256          */
257         public static final int KANNADA_ID = 22;
258         /**
259          */
260         public static final int MALAYALAM_ID = 23;
261         /**
262          */
263         public static final int SINHALA_ID = 24;
264         /**
265          */
266         public static final int THAI_ID = 25;
267         /**
268          */
269         public static final int LAO_ID = 26;
270         /**
271          */
272         public static final int TIBETAN_ID = 27;
273         /**
274          */
275         public static final int MYANMAR_ID = 28;
276         /**
277          */
278         public static final int GEORGIAN_ID = 29;
279         /**
280          */
281         public static final int HANGUL_JAMO_ID = 30;
282         /**
283          */
284         public static final int ETHIOPIC_ID = 31;
285         /**
286          */
287         public static final int CHEROKEE_ID = 32;
288         /**
289          */
290         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33;
291         /**
292          */
293         public static final int OGHAM_ID = 34;
294         /**
295          */
296         public static final int RUNIC_ID = 35;
297         /**
298          */
299         public static final int KHMER_ID = 36;
300         /**
301          */
302         public static final int MONGOLIAN_ID = 37;
303         /**
304          */
305         public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38;
306         /**
307          */
308         public static final int GREEK_EXTENDED_ID = 39;
309         /**
310          */
311         public static final int GENERAL_PUNCTUATION_ID = 40;
312         /**
313          */
314         public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41;
315         /**
316          */
317         public static final int CURRENCY_SYMBOLS_ID = 42;
318         /**
319          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
320          * Symbols".
321          */
322         public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43;
323         /**
324          */
325         public static final int LETTERLIKE_SYMBOLS_ID = 44;
326         /**
327          */
328         public static final int NUMBER_FORMS_ID = 45;
329         /**
330          */
331         public static final int ARROWS_ID = 46;
332         /**
333          */
334         public static final int MATHEMATICAL_OPERATORS_ID = 47;
335         /**
336          */
337         public static final int MISCELLANEOUS_TECHNICAL_ID = 48;
338         /**
339          */
340         public static final int CONTROL_PICTURES_ID = 49;
341         /**
342          */
343         public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50;
344         /**
345          */
346         public static final int ENCLOSED_ALPHANUMERICS_ID = 51;
347         /**
348          */
349         public static final int BOX_DRAWING_ID = 52;
350         /**
351          */
352         public static final int BLOCK_ELEMENTS_ID = 53;
353         /**
354          */
355         public static final int GEOMETRIC_SHAPES_ID = 54;
356         /**
357          */
358         public static final int MISCELLANEOUS_SYMBOLS_ID = 55;
359         /**
360          */
361         public static final int DINGBATS_ID = 56;
362         /**
363          */
364         public static final int BRAILLE_PATTERNS_ID = 57;
365         /**
366          */
367         public static final int CJK_RADICALS_SUPPLEMENT_ID = 58;
368         /**
369          */
370         public static final int KANGXI_RADICALS_ID = 59;
371         /**
372          */
373         public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60;
374         /**
375          */
376         public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61;
377         /**
378          */
379         public static final int HIRAGANA_ID = 62;
380         /**
381          */
382         public static final int KATAKANA_ID = 63;
383         /**
384          */
385         public static final int BOPOMOFO_ID = 64;
386         /**
387          */
388         public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65;
389         /**
390          */
391         public static final int KANBUN_ID = 66;
392         /**
393          */
394         public static final int BOPOMOFO_EXTENDED_ID = 67;
395         /**
396          */
397         public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68;
398         /**
399          */
400         public static final int CJK_COMPATIBILITY_ID = 69;
401         /**
402          */
403         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70;
404         /**
405          */
406         public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71;
407         /**
408          */
409         public static final int YI_SYLLABLES_ID = 72;
410         /**
411          */
412         public static final int YI_RADICALS_ID = 73;
413         /**
414          */
415         public static final int HANGUL_SYLLABLES_ID = 74;
416         /**
417          */
418         public static final int HIGH_SURROGATES_ID = 75;
419         /**
420          */
421         public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76;
422         /**
423          */
424         public static final int LOW_SURROGATES_ID = 77;
425         /**
426          * Same as public static final int PRIVATE_USE.
427          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
428          * and multiple code point ranges had this block.
429          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
430          * and adds separate blocks for the supplementary PUAs.
431          */
432         public static final int PRIVATE_USE_AREA_ID = 78;
433         /**
434          * Same as public static final int PRIVATE_USE_AREA.
435          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
436          * and multiple code point ranges had this block.
437          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
438          * and adds separate blocks for the supplementary PUAs.
439          */
440         public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID;
441         /**
442          */
443         public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79;
444         /**
445          */
446         public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80;
447         /**
448          */
449         public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81;
450         /**
451          */
452         public static final int COMBINING_HALF_MARKS_ID = 82;
453         /**
454          */
455         public static final int CJK_COMPATIBILITY_FORMS_ID = 83;
456         /**
457          */
458         public static final int SMALL_FORM_VARIANTS_ID = 84;
459         /**
460          */
461         public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85;
462         /**
463          */
464         public static final int SPECIALS_ID = 86;
465         /**
466          */
467         public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87;
468         /**
469          */
470         public static final int OLD_ITALIC_ID = 88;
471         /**
472          */
473         public static final int GOTHIC_ID = 89;
474         /**
475          */
476         public static final int DESERET_ID = 90;
477         /**
478          */
479         public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91;
480         /**
481          */
482         public static final int MUSICAL_SYMBOLS_ID = 92;
483         /**
484          */
485         public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93;
486         /**
487          */
488         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94;
489         /**
490          */
491         public static final int
492         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95;
493         /**
494          */
495         public static final int TAGS_ID = 96;
496 
497         // New blocks in Unicode 3.2
498 
499         /**
500          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
501          */
502         public static final int CYRILLIC_SUPPLEMENTARY_ID = 97;
503         /**
504          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
505          */
506 
507         public static final int CYRILLIC_SUPPLEMENT_ID = 97;
508         /**
509          */
510         public static final int TAGALOG_ID = 98;
511         /**
512          */
513         public static final int HANUNOO_ID = 99;
514         /**
515          */
516         public static final int BUHID_ID = 100;
517         /**
518          */
519         public static final int TAGBANWA_ID = 101;
520         /**
521          */
522         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102;
523         /**
524          */
525         public static final int SUPPLEMENTAL_ARROWS_A_ID = 103;
526         /**
527          */
528         public static final int SUPPLEMENTAL_ARROWS_B_ID = 104;
529         /**
530          */
531         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105;
532         /**
533          */
534         public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106;
535         /**
536          */
537         public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107;
538         /**
539          */
540         public static final int VARIATION_SELECTORS_ID = 108;
541         /**
542          */
543         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109;
544         /**
545          */
546         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110;
547 
548         /**
549          */
550         public static final int LIMBU_ID = 111; /*[1900]*/
551         /**
552          */
553         public static final int TAI_LE_ID = 112; /*[1950]*/
554         /**
555          */
556         public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/
557         /**
558          */
559         public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/
560         /**
561          */
562         public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/
563         /**
564          */
565         public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/
566         /**
567          */
568         public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/
569         /**
570          */
571         public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/
572         /**
573          */
574         public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/
575         /**
576          */
577         public static final int UGARITIC_ID = 120; /*[10380]*/
578         /**
579          */
580         public static final int SHAVIAN_ID = 121; /*[10450]*/
581         /**
582          */
583         public static final int OSMANYA_ID = 122; /*[10480]*/
584         /**
585          */
586         public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/
587         /**
588          */
589         public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/
590         /**
591          */
592         public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/
593 
594         /* New blocks in Unicode 4.1 */
595 
596         /**
597          */
598         public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/
599 
600         /**
601          */
602         public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/
603 
604         /**
605          */
606         public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/
607 
608         /**
609          */
610         public static final int BUGINESE_ID = 129; /*[1A00]*/
611 
612         /**
613          */
614         public static final int CJK_STROKES_ID = 130; /*[31C0]*/
615 
616         /**
617          */
618         public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/
619 
620         /**
621          */
622         public static final int COPTIC_ID = 132; /*[2C80]*/
623 
624         /**
625          */
626         public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/
627 
628         /**
629          */
630         public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/
631 
632         /**
633          */
634         public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/
635 
636         /**
637          */
638         public static final int GLAGOLITIC_ID = 136; /*[2C00]*/
639 
640         /**
641          */
642         public static final int KHAROSHTHI_ID = 137; /*[10A00]*/
643 
644         /**
645          */
646         public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/
647 
648         /**
649          */
650         public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/
651 
652         /**
653          */
654         public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/
655 
656         /**
657          */
658         public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/
659 
660         /**
661          */
662         public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/
663 
664         /**
665          */
666         public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/
667 
668         /**
669          */
670         public static final int TIFINAGH_ID = 144; /*[2D30]*/
671 
672         /**
673          */
674         public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/
675 
676         /* New blocks in Unicode 5.0 */
677 
678         /**
679          */
680         public static final int NKO_ID = 146; /*[07C0]*/
681         /**
682          */
683         public static final int BALINESE_ID = 147; /*[1B00]*/
684         /**
685          */
686         public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/
687         /**
688          */
689         public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/
690         /**
691          */
692         public static final int PHAGS_PA_ID = 150; /*[A840]*/
693         /**
694          */
695         public static final int PHOENICIAN_ID = 151; /*[10900]*/
696         /**
697          */
698         public static final int CUNEIFORM_ID = 152; /*[12000]*/
699         /**
700          */
701         public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/
702         /**
703          */
704         public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/
705 
706         /**
707          */
708         public static final int SUNDANESE_ID = 155; /* [1B80] */
709 
710         /**
711          */
712         public static final int LEPCHA_ID = 156; /* [1C00] */
713 
714         /**
715          */
716         public static final int OL_CHIKI_ID = 157; /* [1C50] */
717 
718         /**
719          */
720         public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */
721 
722         /**
723          */
724         public static final int VAI_ID = 159; /* [A500] */
725 
726         /**
727          */
728         public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */
729 
730         /**
731          */
732         public static final int SAURASHTRA_ID = 161; /* [A880] */
733 
734         /**
735          */
736         public static final int KAYAH_LI_ID = 162; /* [A900] */
737 
738         /**
739          */
740         public static final int REJANG_ID = 163; /* [A930] */
741 
742         /**
743          */
744         public static final int CHAM_ID = 164; /* [AA00] */
745 
746         /**
747          */
748         public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */
749 
750         /**
751          */
752         public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */
753 
754         /**
755          */
756         public static final int LYCIAN_ID = 167; /* [10280] */
757 
758         /**
759          */
760         public static final int CARIAN_ID = 168; /* [102A0] */
761 
762         /**
763          */
764         public static final int LYDIAN_ID = 169; /* [10920] */
765 
766         /**
767          */
768         public static final int MAHJONG_TILES_ID = 170; /* [1F000] */
769 
770         /**
771          */
772         public static final int DOMINO_TILES_ID = 171; /* [1F030] */
773 
774         /* New blocks in Unicode 5.2 */
775 
776         /***/
777         public static final int SAMARITAN_ID = 172; /*[0800]*/
778         /***/
779         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/
780         /***/
781         public static final int TAI_THAM_ID = 174; /*[1A20]*/
782         /***/
783         public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/
784         /***/
785         public static final int LISU_ID = 176; /*[A4D0]*/
786         /***/
787         public static final int BAMUM_ID = 177; /*[A6A0]*/
788         /***/
789         public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/
790         /***/
791         public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/
792         /***/
793         public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/
794         /***/
795         public static final int JAVANESE_ID = 181; /*[A980]*/
796         /***/
797         public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/
798         /***/
799         public static final int TAI_VIET_ID = 183; /*[AA80]*/
800         /***/
801         public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/
802         /***/
803         public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/
804         /***/
805         public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/
806         /***/
807         public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/
808         /***/
809         public static final int AVESTAN_ID = 188; /*[10B00]*/
810         /***/
811         public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/
812         /***/
813         public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/
814         /***/
815         public static final int OLD_TURKIC_ID = 191; /*[10C00]*/
816         /***/
817         public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/
818         /***/
819         public static final int KAITHI_ID = 193; /*[11080]*/
820         /***/
821         public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/
822         /***/
823         public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/
824         /***/
825         public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/
826         /***/
827         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/
828 
829         /* New blocks in Unicode 6.0 */
830 
831         /***/
832         public static final int MANDAIC_ID = 198; /*[0840]*/
833         /***/
834         public static final int BATAK_ID = 199; /*[1BC0]*/
835         /***/
836         public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/
837         /***/
838         public static final int BRAHMI_ID = 201; /*[11000]*/
839         /***/
840         public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/
841         /***/
842         public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/
843         /***/
844         public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/
845         /***/
846         public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/
847         /***/
848         public static final int EMOTICONS_ID = 206; /*[1F600]*/
849         /***/
850         public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/
851         /***/
852         public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/
853         /***/
854         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/
855 
856         /* New blocks in Unicode 6.1 */
857 
858         /***/
859         public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/
860         /***/
861         public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/
862         /***/
863         public static final int CHAKMA_ID = 212; /*[11100]*/
864         /***/
865         public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/
866         /***/
867         public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/
868         /***/
869         public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/
870         /***/
871         public static final int MIAO_ID = 216; /*[16F00]*/
872         /***/
873         public static final int SHARADA_ID = 217; /*[11180]*/
874         /***/
875         public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/
876         /***/
877         public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/
878         /***/
879         public static final int TAKRI_ID = 220; /*[11680]*/
880 
881         /* New blocks in Unicode 7.0 */
882 
883         /***/
884         public static final int BASSA_VAH_ID = 221; /*[16AD0]*/
885         /***/
886         public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/
887         /***/
888         public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/
889         /***/
890         public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/
891         /***/
892         public static final int DUPLOYAN_ID = 225; /*[1BC00]*/
893         /***/
894         public static final int ELBASAN_ID = 226; /*[10500]*/
895         /***/
896         public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/
897         /***/
898         public static final int GRANTHA_ID = 228; /*[11300]*/
899         /***/
900         public static final int KHOJKI_ID = 229; /*[11200]*/
901         /***/
902         public static final int KHUDAWADI_ID = 230; /*[112B0]*/
903         /***/
904         public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/
905         /***/
906         public static final int LINEAR_A_ID = 232; /*[10600]*/
907         /***/
908         public static final int MAHAJANI_ID = 233; /*[11150]*/
909         /***/
910         public static final int MANICHAEAN_ID = 234; /*[10AC0]*/
911         /***/
912         public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/
913         /***/
914         public static final int MODI_ID = 236; /*[11600]*/
915         /***/
916         public static final int MRO_ID = 237; /*[16A40]*/
917         /***/
918         public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/
919         /***/
920         public static final int NABATAEAN_ID = 239; /*[10880]*/
921         /***/
922         public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/
923         /***/
924         public static final int OLD_PERMIC_ID = 241; /*[10350]*/
925         /***/
926         public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/
927         /***/
928         public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/
929         /***/
930         public static final int PALMYRENE_ID = 244; /*[10860]*/
931         /***/
932         public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/
933         /***/
934         public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/
935         /***/
936         public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/
937         /***/
938         public static final int SIDDHAM_ID = 248; /*[11580]*/
939         /***/
940         public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/
941         /***/
942         public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/
943         /***/
944         public static final int TIRHUTA_ID = 251; /*[11480]*/
945         /***/
946         public static final int WARANG_CITI_ID = 252; /*[118A0]*/
947 
948         /* New blocks in Unicode 8.0 */
949 
950         /***/
951         public static final int AHOM_ID = 253; /*[11700]*/
952         /***/
953         public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/
954         /***/
955         public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/
956         /***/
957         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/
958         /***/
959         public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/
960         /***/
961         public static final int HATRAN_ID = 258; /*[108E0]*/
962         /***/
963         public static final int MULTANI_ID = 259; /*[11280]*/
964         /***/
965         public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/
966         /***/
967         public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/
968         /***/
969         public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/
970 
971         /* New blocks in Unicode 9.0 */
972 
973         /***/
974         public static final int ADLAM_ID = 263; /*[1E900]*/
975         /***/
976         public static final int BHAIKSUKI_ID = 264; /*[11C00]*/
977         /***/
978         public static final int CYRILLIC_EXTENDED_C_ID = 265; /*[1C80]*/
979         /***/
980         public static final int GLAGOLITIC_SUPPLEMENT_ID = 266; /*[1E000]*/
981         /***/
982         public static final int IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID = 267; /*[16FE0]*/
983         /***/
984         public static final int MARCHEN_ID = 268; /*[11C70]*/
985         /***/
986         public static final int MONGOLIAN_SUPPLEMENT_ID = 269; /*[11660]*/
987         /***/
988         public static final int NEWA_ID = 270; /*[11400]*/
989         /***/
990         public static final int OSAGE_ID = 271; /*[104B0]*/
991         /***/
992         public static final int TANGUT_ID = 272; /*[17000]*/
993         /***/
994         public static final int TANGUT_COMPONENTS_ID = 273; /*[18800]*/
995 
996         // New blocks in Unicode 10.0
997 
998         /***/
999         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID = 274; /*[2CEB0]*/
1000         /***/
1001         public static final int KANA_EXTENDED_A_ID = 275; /*[1B100]*/
1002         /***/
1003         public static final int MASARAM_GONDI_ID = 276; /*[11D00]*/
1004         /***/
1005         public static final int NUSHU_ID = 277; /*[1B170]*/
1006         /***/
1007         public static final int SOYOMBO_ID = 278; /*[11A50]*/
1008         /***/
1009         public static final int SYRIAC_SUPPLEMENT_ID = 279; /*[0860]*/
1010         /***/
1011         public static final int ZANABAZAR_SQUARE_ID = 280; /*[11A00]*/
1012 
1013         // New blocks in Unicode 11.0
1014 
1015         /***/
1016         public static final int CHESS_SYMBOLS_ID = 281; /*[1FA00]*/
1017         /***/
1018         public static final int DOGRA_ID = 282; /*[11800]*/
1019         /***/
1020         public static final int GEORGIAN_EXTENDED_ID = 283; /*[1C90]*/
1021         /***/
1022         public static final int GUNJALA_GONDI_ID = 284; /*[11D60]*/
1023         /***/
1024         public static final int HANIFI_ROHINGYA_ID = 285; /*[10D00]*/
1025         /***/
1026         public static final int INDIC_SIYAQ_NUMBERS_ID = 286; /*[1EC70]*/
1027         /***/
1028         public static final int MAKASAR_ID = 287; /*[11EE0]*/
1029         /***/
1030         public static final int MAYAN_NUMERALS_ID = 288; /*[1D2E0]*/
1031         /***/
1032         public static final int MEDEFAIDRIN_ID = 289; /*[16E40]*/
1033         /***/
1034         public static final int OLD_SOGDIAN_ID = 290; /*[10F00]*/
1035         /***/
1036         public static final int SOGDIAN_ID = 291; /*[10F30]*/
1037 
1038         // New blocks in Unicode 12.0
1039 
1040         /***/
1041         public static final int EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID = 292; /*[13430]*/
1042         /***/
1043         public static final int ELYMAIC_ID = 293; /*[10FE0]*/
1044         /***/
1045         public static final int NANDINAGARI_ID = 294; /*[119A0]*/
1046         /***/
1047         public static final int NYIAKENG_PUACHUE_HMONG_ID = 295; /*[1E100]*/
1048         /***/
1049         public static final int OTTOMAN_SIYAQ_NUMBERS_ID = 296; /*[1ED00]*/
1050         /***/
1051         public static final int SMALL_KANA_EXTENSION_ID = 297; /*[1B130]*/
1052         /***/
1053         public static final int SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID = 298; /*[1FA70]*/
1054         /***/
1055         public static final int TAMIL_SUPPLEMENT_ID = 299; /*[11FC0]*/
1056         /***/
1057         public static final int WANCHO_ID = 300; /*[1E2C0]*/
1058 
1059         // New blocks in Unicode 13.0
1060 
1061         /***/
1062         public static final int CHORASMIAN_ID = 301; /*[10FB0]*/
1063         /***/
1064         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID = 302; /*[30000]*/
1065         /***/
1066         public static final int DIVES_AKURU_ID = 303; /*[11900]*/
1067         /***/
1068         public static final int KHITAN_SMALL_SCRIPT_ID = 304; /*[18B00]*/
1069         /***/
1070         public static final int LISU_SUPPLEMENT_ID = 305; /*[11FB0]*/
1071         /***/
1072         public static final int SYMBOLS_FOR_LEGACY_COMPUTING_ID = 306; /*[1FB00]*/
1073         /***/
1074         public static final int TANGUT_SUPPLEMENT_ID = 307; /*[18D00]*/
1075         /***/
1076         public static final int YEZIDI_ID = 308; /*[10E80]*/
1077 
1078         // New blocks in Unicode 14.0
1079 
1080         /***/
1081         public static final int ARABIC_EXTENDED_B_ID = 309; /*[0870]*/
1082         /***/
1083         public static final int CYPRO_MINOAN_ID = 310; /*[12F90]*/
1084         /***/
1085         public static final int ETHIOPIC_EXTENDED_B_ID = 311; /*[1E7E0]*/
1086         /***/
1087         public static final int KANA_EXTENDED_B_ID = 312; /*[1AFF0]*/
1088         /***/
1089         public static final int LATIN_EXTENDED_F_ID = 313; /*[10780]*/
1090         /***/
1091         public static final int LATIN_EXTENDED_G_ID = 314; /*[1DF00]*/
1092         /***/
1093         public static final int OLD_UYGHUR_ID = 315; /*[10F70]*/
1094         /***/
1095         public static final int TANGSA_ID = 316; /*[16A70]*/
1096         /***/
1097         public static final int TOTO_ID = 317; /*[1E290]*/
1098         /***/
1099         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A_ID = 318; /*[11AB0]*/
1100         /***/
1101         public static final int VITHKUQI_ID = 319; /*[10570]*/
1102         /***/
1103         public static final int ZNAMENNY_MUSICAL_NOTATION_ID = 320; /*[1CF00]*/
1104 
1105         // New blocks in Unicode 15.0
1106 
1107         /***/
1108         public static final int ARABIC_EXTENDED_C_ID = 321; /*[10EC0]*/
1109         /***/
1110         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H_ID = 322; /*[31350]*/
1111         /***/
1112         public static final int CYRILLIC_EXTENDED_D_ID = 323; /*[1E030]*/
1113         /***/
1114         public static final int DEVANAGARI_EXTENDED_A_ID = 324; /*[11B00]*/
1115         /***/
1116         public static final int KAKTOVIK_NUMERALS_ID = 325; /*[1D2C0]*/
1117         /***/
1118         public static final int KAWI_ID = 326; /*[11F00]*/
1119         /***/
1120         public static final int NAG_MUNDARI_ID = 327; /*[1E4D0]*/
1121 
1122         // New block in Unicode 15.1
1123 
1124         /***/
1125         @android.annotation.FlaggedApi(com.android.icu.Flags.FLAG_ICU_V_API)
1126         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I_ID = 328; /*[2EBF0]*/
1127 
1128         /**
1129          * One more than the highest normal UnicodeBlock value.
1130          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BLOCK).
1131          *
1132          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
1133          * @hide unsupported on Android
1134          */
1135         @Deprecated
1136         public static final int COUNT = 329;
1137 
1138         // blocks objects ---------------------------------------------------
1139 
1140         /**
1141          * Array of UnicodeBlocks, for easy access in getInstance(int)
1142          */
1143         private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT];
1144 
1145         /**
1146          */
1147         public static final UnicodeBlock NO_BLOCK
1148         = new UnicodeBlock("NO_BLOCK", 0);
1149 
1150         /**
1151          */
1152         public static final UnicodeBlock BASIC_LATIN
1153         = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID);
1154         /**
1155          */
1156         public static final UnicodeBlock LATIN_1_SUPPLEMENT
1157         = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID);
1158         /**
1159          */
1160         public static final UnicodeBlock LATIN_EXTENDED_A
1161         = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID);
1162         /**
1163          */
1164         public static final UnicodeBlock LATIN_EXTENDED_B
1165         = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID);
1166         /**
1167          */
1168         public static final UnicodeBlock IPA_EXTENSIONS
1169         = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID);
1170         /**
1171          */
1172         public static final UnicodeBlock SPACING_MODIFIER_LETTERS
1173         = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID);
1174         /**
1175          */
1176         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
1177         = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID);
1178         /**
1179          * Unicode 3.2 renames this block to "Greek and Coptic".
1180          */
1181         public static final UnicodeBlock GREEK
1182         = new UnicodeBlock("GREEK", GREEK_ID);
1183         /**
1184          */
1185         public static final UnicodeBlock CYRILLIC
1186         = new UnicodeBlock("CYRILLIC", CYRILLIC_ID);
1187         /**
1188          */
1189         public static final UnicodeBlock ARMENIAN
1190         = new UnicodeBlock("ARMENIAN", ARMENIAN_ID);
1191         /**
1192          */
1193         public static final UnicodeBlock HEBREW
1194         = new UnicodeBlock("HEBREW", HEBREW_ID);
1195         /**
1196          */
1197         public static final UnicodeBlock ARABIC
1198         = new UnicodeBlock("ARABIC", ARABIC_ID);
1199         /**
1200          */
1201         public static final UnicodeBlock SYRIAC
1202         = new UnicodeBlock("SYRIAC", SYRIAC_ID);
1203         /**
1204          */
1205         public static final UnicodeBlock THAANA
1206         = new UnicodeBlock("THAANA", THAANA_ID);
1207         /**
1208          */
1209         public static final UnicodeBlock DEVANAGARI
1210         = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID);
1211         /**
1212          */
1213         public static final UnicodeBlock BENGALI
1214         = new UnicodeBlock("BENGALI", BENGALI_ID);
1215         /**
1216          */
1217         public static final UnicodeBlock GURMUKHI
1218         = new UnicodeBlock("GURMUKHI", GURMUKHI_ID);
1219         /**
1220          */
1221         public static final UnicodeBlock GUJARATI
1222         = new UnicodeBlock("GUJARATI", GUJARATI_ID);
1223         /**
1224          */
1225         public static final UnicodeBlock ORIYA
1226         = new UnicodeBlock("ORIYA", ORIYA_ID);
1227         /**
1228          */
1229         public static final UnicodeBlock TAMIL
1230         = new UnicodeBlock("TAMIL", TAMIL_ID);
1231         /**
1232          */
1233         public static final UnicodeBlock TELUGU
1234         = new UnicodeBlock("TELUGU", TELUGU_ID);
1235         /**
1236          */
1237         public static final UnicodeBlock KANNADA
1238         = new UnicodeBlock("KANNADA", KANNADA_ID);
1239         /**
1240          */
1241         public static final UnicodeBlock MALAYALAM
1242         = new UnicodeBlock("MALAYALAM", MALAYALAM_ID);
1243         /**
1244          */
1245         public static final UnicodeBlock SINHALA
1246         = new UnicodeBlock("SINHALA", SINHALA_ID);
1247         /**
1248          */
1249         public static final UnicodeBlock THAI
1250         = new UnicodeBlock("THAI", THAI_ID);
1251         /**
1252          */
1253         public static final UnicodeBlock LAO
1254         = new UnicodeBlock("LAO", LAO_ID);
1255         /**
1256          */
1257         public static final UnicodeBlock TIBETAN
1258         = new UnicodeBlock("TIBETAN", TIBETAN_ID);
1259         /**
1260          */
1261         public static final UnicodeBlock MYANMAR
1262         = new UnicodeBlock("MYANMAR", MYANMAR_ID);
1263         /**
1264          */
1265         public static final UnicodeBlock GEORGIAN
1266         = new UnicodeBlock("GEORGIAN", GEORGIAN_ID);
1267         /**
1268          */
1269         public static final UnicodeBlock HANGUL_JAMO
1270         = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID);
1271         /**
1272          */
1273         public static final UnicodeBlock ETHIOPIC
1274         = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID);
1275         /**
1276          */
1277         public static final UnicodeBlock CHEROKEE
1278         = new UnicodeBlock("CHEROKEE", CHEROKEE_ID);
1279         /**
1280          */
1281         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
1282         = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1283                 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID);
1284         /**
1285          */
1286         public static final UnicodeBlock OGHAM
1287         = new UnicodeBlock("OGHAM", OGHAM_ID);
1288         /**
1289          */
1290         public static final UnicodeBlock RUNIC
1291         = new UnicodeBlock("RUNIC", RUNIC_ID);
1292         /**
1293          */
1294         public static final UnicodeBlock KHMER
1295         = new UnicodeBlock("KHMER", KHMER_ID);
1296         /**
1297          */
1298         public static final UnicodeBlock MONGOLIAN
1299         = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID);
1300         /**
1301          */
1302         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
1303         = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID);
1304         /**
1305          */
1306         public static final UnicodeBlock GREEK_EXTENDED
1307         = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID);
1308         /**
1309          */
1310         public static final UnicodeBlock GENERAL_PUNCTUATION
1311         = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID);
1312         /**
1313          */
1314         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
1315         = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID);
1316         /**
1317          */
1318         public static final UnicodeBlock CURRENCY_SYMBOLS
1319         = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID);
1320         /**
1321          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
1322          * Symbols".
1323          */
1324         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
1325         = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID);
1326         /**
1327          */
1328         public static final UnicodeBlock LETTERLIKE_SYMBOLS
1329         = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID);
1330         /**
1331          */
1332         public static final UnicodeBlock NUMBER_FORMS
1333         = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID);
1334         /**
1335          */
1336         public static final UnicodeBlock ARROWS
1337         = new UnicodeBlock("ARROWS", ARROWS_ID);
1338         /**
1339          */
1340         public static final UnicodeBlock MATHEMATICAL_OPERATORS
1341         = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID);
1342         /**
1343          */
1344         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
1345         = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID);
1346         /**
1347          */
1348         public static final UnicodeBlock CONTROL_PICTURES
1349         = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID);
1350         /**
1351          */
1352         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
1353         = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID);
1354         /**
1355          */
1356         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
1357         = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID);
1358         /**
1359          */
1360         public static final UnicodeBlock BOX_DRAWING
1361         = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID);
1362         /**
1363          */
1364         public static final UnicodeBlock BLOCK_ELEMENTS
1365         = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID);
1366         /**
1367          */
1368         public static final UnicodeBlock GEOMETRIC_SHAPES
1369         = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID);
1370         /**
1371          */
1372         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
1373         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID);
1374         /**
1375          */
1376         public static final UnicodeBlock DINGBATS
1377         = new UnicodeBlock("DINGBATS", DINGBATS_ID);
1378         /**
1379          */
1380         public static final UnicodeBlock BRAILLE_PATTERNS
1381         = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID);
1382         /**
1383          */
1384         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
1385         = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID);
1386         /**
1387          */
1388         public static final UnicodeBlock KANGXI_RADICALS
1389         = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID);
1390         /**
1391          */
1392         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1393         = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1394                 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID);
1395         /**
1396          */
1397         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
1398         = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID);
1399         /**
1400          */
1401         public static final UnicodeBlock HIRAGANA
1402         = new UnicodeBlock("HIRAGANA", HIRAGANA_ID);
1403         /**
1404          */
1405         public static final UnicodeBlock KATAKANA
1406         = new UnicodeBlock("KATAKANA", KATAKANA_ID);
1407         /**
1408          */
1409         public static final UnicodeBlock BOPOMOFO
1410         = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID);
1411         /**
1412          */
1413         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
1414         = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID);
1415         /**
1416          */
1417         public static final UnicodeBlock KANBUN
1418         = new UnicodeBlock("KANBUN", KANBUN_ID);
1419         /**
1420          */
1421         public static final UnicodeBlock BOPOMOFO_EXTENDED
1422         = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID);
1423         /**
1424          */
1425         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
1426         = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1427                 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID);
1428         /**
1429          */
1430         public static final UnicodeBlock CJK_COMPATIBILITY
1431         = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID);
1432         /**
1433          */
1434         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1435         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1436                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID);
1437         /**
1438          */
1439         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
1440         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID);
1441         /**
1442          */
1443         public static final UnicodeBlock YI_SYLLABLES
1444         = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID);
1445         /**
1446          */
1447         public static final UnicodeBlock YI_RADICALS
1448         = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID);
1449         /**
1450          */
1451         public static final UnicodeBlock HANGUL_SYLLABLES
1452         = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID);
1453         /**
1454          */
1455         public static final UnicodeBlock HIGH_SURROGATES
1456         = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID);
1457         /**
1458          */
1459         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
1460         = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID);
1461         /**
1462          */
1463         public static final UnicodeBlock LOW_SURROGATES
1464         = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID);
1465         /**
1466          * Same as public static final int PRIVATE_USE.
1467          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1468          * and multiple code point ranges had this block.
1469          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1470          * and adds separate blocks for the supplementary PUAs.
1471          */
1472         public static final UnicodeBlock PRIVATE_USE_AREA
1473         = new UnicodeBlock("PRIVATE_USE_AREA",  78);
1474         /**
1475          * Same as public static final int PRIVATE_USE_AREA.
1476          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1477          * and multiple code point ranges had this block.
1478          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1479          * and adds separate blocks for the supplementary PUAs.
1480          */
1481         public static final UnicodeBlock PRIVATE_USE
1482         = PRIVATE_USE_AREA;
1483         /**
1484          */
1485         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
1486         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID);
1487         /**
1488          */
1489         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
1490         = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID);
1491         /**
1492          */
1493         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
1494         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID);
1495         /**
1496          */
1497         public static final UnicodeBlock COMBINING_HALF_MARKS
1498         = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID);
1499         /**
1500          */
1501         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
1502         = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID);
1503         /**
1504          */
1505         public static final UnicodeBlock SMALL_FORM_VARIANTS
1506         = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID);
1507         /**
1508          */
1509         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
1510         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID);
1511         /**
1512          */
1513         public static final UnicodeBlock SPECIALS
1514         = new UnicodeBlock("SPECIALS", SPECIALS_ID);
1515         /**
1516          */
1517         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
1518         = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID);
1519         /**
1520          */
1521         public static final UnicodeBlock OLD_ITALIC
1522         = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID);
1523         /**
1524          */
1525         public static final UnicodeBlock GOTHIC
1526         = new UnicodeBlock("GOTHIC", GOTHIC_ID);
1527         /**
1528          */
1529         public static final UnicodeBlock DESERET
1530         = new UnicodeBlock("DESERET", DESERET_ID);
1531         /**
1532          */
1533         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
1534         = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID);
1535         /**
1536          */
1537         public static final UnicodeBlock MUSICAL_SYMBOLS
1538         = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID);
1539         /**
1540          */
1541         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
1542         = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1543                 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID);
1544         /**
1545          */
1546         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1547         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1548                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID);
1549         /**
1550          */
1551         public static final UnicodeBlock
1552         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
1553         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1554                 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID);
1555         /**
1556          */
1557         public static final UnicodeBlock TAGS
1558         = new UnicodeBlock("TAGS", TAGS_ID);
1559 
1560         // New blocks in Unicode 3.2
1561 
1562         /**
1563          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1564          */
1565         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
1566         = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID);
1567         /**
1568          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1569          */
1570         public static final UnicodeBlock CYRILLIC_SUPPLEMENT
1571         = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID);
1572         /**
1573          */
1574         public static final UnicodeBlock TAGALOG
1575         = new UnicodeBlock("TAGALOG", TAGALOG_ID);
1576         /**
1577          */
1578         public static final UnicodeBlock HANUNOO
1579         = new UnicodeBlock("HANUNOO", HANUNOO_ID);
1580         /**
1581          */
1582         public static final UnicodeBlock BUHID
1583         = new UnicodeBlock("BUHID", BUHID_ID);
1584         /**
1585          */
1586         public static final UnicodeBlock TAGBANWA
1587         = new UnicodeBlock("TAGBANWA", TAGBANWA_ID);
1588         /**
1589          */
1590         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
1591         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1592                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID);
1593         /**
1594          */
1595         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
1596         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID);
1597         /**
1598          */
1599         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
1600         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID);
1601         /**
1602          */
1603         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
1604         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1605                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID);
1606         /**
1607          */
1608         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
1609         = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1610                 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID);
1611         /**
1612          */
1613         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
1614         = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID);
1615         /**
1616          */
1617         public static final UnicodeBlock VARIATION_SELECTORS
1618         = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID);
1619         /**
1620          */
1621         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
1622         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1623                 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID);
1624         /**
1625          */
1626         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
1627         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1628                 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID);
1629 
1630         /**
1631          */
1632         public static final UnicodeBlock LIMBU
1633         = new UnicodeBlock("LIMBU", LIMBU_ID);
1634         /**
1635          */
1636         public static final UnicodeBlock TAI_LE
1637         = new UnicodeBlock("TAI_LE", TAI_LE_ID);
1638         /**
1639          */
1640         public static final UnicodeBlock KHMER_SYMBOLS
1641         = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID);
1642 
1643         /**
1644          */
1645         public static final UnicodeBlock PHONETIC_EXTENSIONS
1646         = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID);
1647 
1648         /**
1649          */
1650         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
1651         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1652                 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID);
1653         /**
1654          */
1655         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
1656         = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID);
1657         /**
1658          */
1659         public static final UnicodeBlock LINEAR_B_SYLLABARY
1660         = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID);
1661         /**
1662          */
1663         public static final UnicodeBlock LINEAR_B_IDEOGRAMS
1664         = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID);
1665         /**
1666          */
1667         public static final UnicodeBlock AEGEAN_NUMBERS
1668         = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID);
1669         /**
1670          */
1671         public static final UnicodeBlock UGARITIC
1672         = new UnicodeBlock("UGARITIC", UGARITIC_ID);
1673         /**
1674          */
1675         public static final UnicodeBlock SHAVIAN
1676         = new UnicodeBlock("SHAVIAN", SHAVIAN_ID);
1677         /**
1678          */
1679         public static final UnicodeBlock OSMANYA
1680         = new UnicodeBlock("OSMANYA", OSMANYA_ID);
1681         /**
1682          */
1683         public static final UnicodeBlock CYPRIOT_SYLLABARY
1684         = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID);
1685         /**
1686          */
1687         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
1688         = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID);
1689 
1690         /**
1691          */
1692         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
1693         = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID);
1694 
1695         /* New blocks in Unicode 4.1 */
1696 
1697         /**
1698          */
1699         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
1700                 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
1701                         ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/
1702 
1703         /**
1704          */
1705         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
1706                 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/
1707 
1708         /**
1709          */
1710         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1711                 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/
1712 
1713         /**
1714          */
1715         public static final UnicodeBlock BUGINESE =
1716                 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/
1717 
1718         /**
1719          */
1720         public static final UnicodeBlock CJK_STROKES =
1721                 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/
1722 
1723         /**
1724          */
1725         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1726                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1727                         COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/
1728 
1729         /**
1730          */
1731         public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/
1732 
1733         /**
1734          */
1735         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1736                 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/
1737 
1738         /**
1739          */
1740         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1741                 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/
1742 
1743         /**
1744          */
1745         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1746                 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/
1747 
1748         /**
1749          */
1750         public static final UnicodeBlock GLAGOLITIC =
1751                 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/
1752 
1753         /**
1754          */
1755         public static final UnicodeBlock KHAROSHTHI =
1756                 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/
1757 
1758         /**
1759          */
1760         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
1761                 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/
1762 
1763         /**
1764          */
1765         public static final UnicodeBlock NEW_TAI_LUE =
1766                 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/
1767 
1768         /**
1769          */
1770         public static final UnicodeBlock OLD_PERSIAN =
1771                 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/
1772 
1773         /**
1774          */
1775         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1776                 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1777                         PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/
1778 
1779         /**
1780          */
1781         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1782                 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/
1783 
1784         /**
1785          */
1786         public static final UnicodeBlock SYLOTI_NAGRI =
1787                 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/
1788 
1789         /**
1790          */
1791         public static final UnicodeBlock TIFINAGH =
1792                 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/
1793 
1794         /**
1795          */
1796         public static final UnicodeBlock VERTICAL_FORMS =
1797                 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/
1798 
1799         /**
1800          */
1801         public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/
1802         /**
1803          */
1804         public static final UnicodeBlock BALINESE =
1805                 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/
1806         /**
1807          */
1808         public static final UnicodeBlock LATIN_EXTENDED_C =
1809                 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/
1810         /**
1811          */
1812         public static final UnicodeBlock LATIN_EXTENDED_D =
1813                 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/
1814         /**
1815          */
1816         public static final UnicodeBlock PHAGS_PA =
1817                 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/
1818         /**
1819          */
1820         public static final UnicodeBlock PHOENICIAN =
1821                 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/
1822         /**
1823          */
1824         public static final UnicodeBlock CUNEIFORM =
1825                 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/
1826         /**
1827          */
1828         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
1829                 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
1830                         CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/
1831         /**
1832          */
1833         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
1834                 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/
1835 
1836         /**
1837          */
1838         public static final UnicodeBlock SUNDANESE =
1839                 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */
1840 
1841         /**
1842          */
1843         public static final UnicodeBlock LEPCHA =
1844                 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */
1845 
1846         /**
1847          */
1848         public static final UnicodeBlock OL_CHIKI =
1849                 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */
1850 
1851         /**
1852          */
1853         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1854                 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */
1855 
1856         /**
1857          */
1858         public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */
1859 
1860         /**
1861          */
1862         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
1863                 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */
1864 
1865         /**
1866          */
1867         public static final UnicodeBlock SAURASHTRA =
1868                 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */
1869 
1870         /**
1871          */
1872         public static final UnicodeBlock KAYAH_LI =
1873                 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */
1874 
1875         /**
1876          */
1877         public static final UnicodeBlock REJANG =
1878                 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */
1879 
1880         /**
1881          */
1882         public static final UnicodeBlock CHAM =
1883                 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */
1884 
1885         /**
1886          */
1887         public static final UnicodeBlock ANCIENT_SYMBOLS =
1888                 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */
1889 
1890         /**
1891          */
1892         public static final UnicodeBlock PHAISTOS_DISC =
1893                 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */
1894 
1895         /**
1896          */
1897         public static final UnicodeBlock LYCIAN =
1898                 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */
1899 
1900         /**
1901          */
1902         public static final UnicodeBlock CARIAN =
1903                 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */
1904 
1905         /**
1906          */
1907         public static final UnicodeBlock LYDIAN =
1908                 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */
1909 
1910         /**
1911          */
1912         public static final UnicodeBlock MAHJONG_TILES =
1913                 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */
1914 
1915         /**
1916          */
1917         public static final UnicodeBlock DOMINO_TILES =
1918                 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */
1919 
1920         /* New blocks in Unicode 5.2 */
1921 
1922         /***/
1923         public static final UnicodeBlock SAMARITAN =
1924                 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/
1925         /***/
1926         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1927                 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1928                         UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/
1929         /***/
1930         public static final UnicodeBlock TAI_THAM =
1931                 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/
1932         /***/
1933         public static final UnicodeBlock VEDIC_EXTENSIONS =
1934                 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/
1935         /***/
1936         public static final UnicodeBlock LISU =
1937                 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/
1938         /***/
1939         public static final UnicodeBlock BAMUM =
1940                 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/
1941         /***/
1942         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
1943                 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/
1944         /***/
1945         public static final UnicodeBlock DEVANAGARI_EXTENDED =
1946                 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/
1947         /***/
1948         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
1949                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/
1950         /***/
1951         public static final UnicodeBlock JAVANESE =
1952                 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/
1953         /***/
1954         public static final UnicodeBlock MYANMAR_EXTENDED_A =
1955                 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/
1956         /***/
1957         public static final UnicodeBlock TAI_VIET =
1958                 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/
1959         /***/
1960         public static final UnicodeBlock MEETEI_MAYEK =
1961                 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/
1962         /***/
1963         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
1964                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/
1965         /***/
1966         public static final UnicodeBlock IMPERIAL_ARAMAIC =
1967                 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/
1968         /***/
1969         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
1970                 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/
1971         /***/
1972         public static final UnicodeBlock AVESTAN =
1973                 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/
1974         /***/
1975         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
1976                 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/
1977         /***/
1978         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
1979                 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/
1980         /***/
1981         public static final UnicodeBlock OLD_TURKIC =
1982                 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/
1983         /***/
1984         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
1985                 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/
1986         /***/
1987         public static final UnicodeBlock KAITHI =
1988                 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/
1989         /***/
1990         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
1991                 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/
1992         /***/
1993         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
1994                 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
1995                         ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/
1996         /***/
1997         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
1998                 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
1999                         ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/
2000         /***/
2001         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2002                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2003                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/
2004 
2005         /* New blocks in Unicode 6.0 */
2006 
2007         /***/
2008         public static final UnicodeBlock MANDAIC =
2009                 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/
2010         /***/
2011         public static final UnicodeBlock BATAK =
2012                 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/
2013         /***/
2014         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2015                 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/
2016         /***/
2017         public static final UnicodeBlock BRAHMI =
2018                 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/
2019         /***/
2020         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2021                 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/
2022         /***/
2023         public static final UnicodeBlock KANA_SUPPLEMENT =
2024                 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/
2025         /***/
2026         public static final UnicodeBlock PLAYING_CARDS =
2027                 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/
2028         /***/
2029         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2030                 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2031                         MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/
2032         /***/
2033         public static final UnicodeBlock EMOTICONS =
2034                 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/
2035         /***/
2036         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2037                 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/
2038         /***/
2039         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2040                 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/
2041         /***/
2042         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2043                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2044                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/
2045 
2046         /* New blocks in Unicode 6.1 */
2047 
2048         /***/
2049         public static final UnicodeBlock ARABIC_EXTENDED_A =
2050                 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/
2051         /***/
2052         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2053                 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/
2054         /***/
2055         public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/
2056         /***/
2057         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2058                 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/
2059         /***/
2060         public static final UnicodeBlock MEROITIC_CURSIVE =
2061                 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/
2062         /***/
2063         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2064                 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/
2065         /***/
2066         public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/
2067         /***/
2068         public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/
2069         /***/
2070         public static final UnicodeBlock SORA_SOMPENG =
2071                 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/
2072         /***/
2073         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2074                 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/
2075         /***/
2076         public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/
2077 
2078         /* New blocks in Unicode 7.0 */
2079 
2080         /***/
2081         public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/
2082         /***/
2083         public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2084                 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/
2085         /***/
2086         public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2087                 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/
2088         /***/
2089         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2090                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/
2091         /***/
2092         public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/
2093         /***/
2094         public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/
2095         /***/
2096         public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2097                 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/
2098         /***/
2099         public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/
2100         /***/
2101         public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/
2102         /***/
2103         public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/
2104         /***/
2105         public static final UnicodeBlock LATIN_EXTENDED_E =
2106                 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/
2107         /***/
2108         public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/
2109         /***/
2110         public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/
2111         /***/
2112         public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/
2113         /***/
2114         public static final UnicodeBlock MENDE_KIKAKUI =
2115                 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/
2116         /***/
2117         public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/
2118         /***/
2119         public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/
2120         /***/
2121         public static final UnicodeBlock MYANMAR_EXTENDED_B =
2122                 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/
2123         /***/
2124         public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/
2125         /***/
2126         public static final UnicodeBlock OLD_NORTH_ARABIAN =
2127                 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/
2128         /***/
2129         public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/
2130         /***/
2131         public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2132                 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/
2133         /***/
2134         public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/
2135         /***/
2136         public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/
2137         /***/
2138         public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/
2139         /***/
2140         public static final UnicodeBlock PSALTER_PAHLAVI =
2141                 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/
2142         /***/
2143         public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2144                 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/
2145         /***/
2146         public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/
2147         /***/
2148         public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2149                 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/
2150         /***/
2151         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2152                 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/
2153         /***/
2154         public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/
2155         /***/
2156         public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/
2157 
2158         /* New blocks in Unicode 8.0 */
2159 
2160         /***/
2161         public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/
2162         /***/
2163         public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
2164                 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/
2165         /***/
2166         public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2167                 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/
2168         /***/
2169         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
2170                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
2171                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/
2172         /***/
2173         public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
2174                 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/
2175         /***/
2176         public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/
2177         /***/
2178         public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/
2179         /***/
2180         public static final UnicodeBlock OLD_HUNGARIAN =
2181                 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/
2182         /***/
2183         public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
2184                 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
2185                         SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/
2186         /***/
2187         public static final UnicodeBlock SUTTON_SIGNWRITING =
2188                 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/
2189 
2190         /* New blocks in Unicode 9.0 */
2191 
2192         /***/
2193         public static final UnicodeBlock ADLAM = new UnicodeBlock("ADLAM", ADLAM_ID); /*[1E900]*/
2194         /***/
2195         public static final UnicodeBlock BHAIKSUKI = new UnicodeBlock("BHAIKSUKI", BHAIKSUKI_ID); /*[11C00]*/
2196         /***/
2197         public static final UnicodeBlock CYRILLIC_EXTENDED_C =
2198                 new UnicodeBlock("CYRILLIC_EXTENDED_C", CYRILLIC_EXTENDED_C_ID); /*[1C80]*/
2199         /***/
2200         public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
2201                 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", GLAGOLITIC_SUPPLEMENT_ID); /*[1E000]*/
2202         /***/
2203         public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
2204                 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID); /*[16FE0]*/
2205         /***/
2206         public static final UnicodeBlock MARCHEN = new UnicodeBlock("MARCHEN", MARCHEN_ID); /*[11C70]*/
2207         /***/
2208         public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
2209                 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", MONGOLIAN_SUPPLEMENT_ID); /*[11660]*/
2210         /***/
2211         public static final UnicodeBlock NEWA = new UnicodeBlock("NEWA", NEWA_ID); /*[11400]*/
2212         /***/
2213         public static final UnicodeBlock OSAGE = new UnicodeBlock("OSAGE", OSAGE_ID); /*[104B0]*/
2214         /***/
2215         public static final UnicodeBlock TANGUT = new UnicodeBlock("TANGUT", TANGUT_ID); /*[17000]*/
2216         /***/
2217         public static final UnicodeBlock TANGUT_COMPONENTS =
2218                 new UnicodeBlock("TANGUT_COMPONENTS", TANGUT_COMPONENTS_ID); /*[18800]*/
2219 
2220         // New blocks in Unicode 10.0
2221 
2222         /***/
2223         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F =
2224                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID); /*[2CEB0]*/
2225         /***/
2226         public static final UnicodeBlock KANA_EXTENDED_A =
2227                 new UnicodeBlock("KANA_EXTENDED_A", KANA_EXTENDED_A_ID); /*[1B100]*/
2228         /***/
2229         public static final UnicodeBlock MASARAM_GONDI =
2230                 new UnicodeBlock("MASARAM_GONDI", MASARAM_GONDI_ID); /*[11D00]*/
2231         /***/
2232         public static final UnicodeBlock NUSHU = new UnicodeBlock("NUSHU", NUSHU_ID); /*[1B170]*/
2233         /***/
2234         public static final UnicodeBlock SOYOMBO = new UnicodeBlock("SOYOMBO", SOYOMBO_ID); /*[11A50]*/
2235         /***/
2236         public static final UnicodeBlock SYRIAC_SUPPLEMENT =
2237                 new UnicodeBlock("SYRIAC_SUPPLEMENT", SYRIAC_SUPPLEMENT_ID); /*[0860]*/
2238         /***/
2239         public static final UnicodeBlock ZANABAZAR_SQUARE =
2240                 new UnicodeBlock("ZANABAZAR_SQUARE", ZANABAZAR_SQUARE_ID); /*[11A00]*/
2241 
2242         // New blocks in Unicode 11.0
2243 
2244         /***/
2245         public static final UnicodeBlock CHESS_SYMBOLS =
2246                 new UnicodeBlock("CHESS_SYMBOLS", CHESS_SYMBOLS_ID); /*[1FA00]*/
2247         /***/
2248         public static final UnicodeBlock DOGRA = new UnicodeBlock("DOGRA", DOGRA_ID); /*[11800]*/
2249         /***/
2250         public static final UnicodeBlock GEORGIAN_EXTENDED =
2251                 new UnicodeBlock("GEORGIAN_EXTENDED", GEORGIAN_EXTENDED_ID); /*[1C90]*/
2252         /***/
2253         public static final UnicodeBlock GUNJALA_GONDI =
2254                 new UnicodeBlock("GUNJALA_GONDI", GUNJALA_GONDI_ID); /*[11D60]*/
2255         /***/
2256         public static final UnicodeBlock HANIFI_ROHINGYA =
2257                 new UnicodeBlock("HANIFI_ROHINGYA", HANIFI_ROHINGYA_ID); /*[10D00]*/
2258         /***/
2259         public static final UnicodeBlock INDIC_SIYAQ_NUMBERS =
2260                 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", INDIC_SIYAQ_NUMBERS_ID); /*[1EC70]*/
2261         /***/
2262         public static final UnicodeBlock MAKASAR = new UnicodeBlock("MAKASAR", MAKASAR_ID); /*[11EE0]*/
2263         /***/
2264         public static final UnicodeBlock MAYAN_NUMERALS =
2265                 new UnicodeBlock("MAYAN_NUMERALS", MAYAN_NUMERALS_ID); /*[1D2E0]*/
2266         /***/
2267         public static final UnicodeBlock MEDEFAIDRIN =
2268                 new UnicodeBlock("MEDEFAIDRIN", MEDEFAIDRIN_ID); /*[16E40]*/
2269         /***/
2270         public static final UnicodeBlock OLD_SOGDIAN =
2271                 new UnicodeBlock("OLD_SOGDIAN", OLD_SOGDIAN_ID); /*[10F00]*/
2272         /***/
2273         public static final UnicodeBlock SOGDIAN = new UnicodeBlock("SOGDIAN", SOGDIAN_ID); /*[10F30]*/
2274 
2275         // New blocks in Unicode 12.0
2276 
2277         /***/
2278         public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS =
2279                 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID); /*[13430]*/
2280         /***/
2281         public static final UnicodeBlock ELYMAIC = new UnicodeBlock("ELYMAIC", ELYMAIC_ID); /*[10FE0]*/
2282         /***/
2283         public static final UnicodeBlock NANDINAGARI =
2284                 new UnicodeBlock("NANDINAGARI", NANDINAGARI_ID); /*[119A0]*/
2285         /***/
2286         public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG =
2287                 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", NYIAKENG_PUACHUE_HMONG_ID); /*[1E100]*/
2288         /***/
2289         public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS =
2290                 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", OTTOMAN_SIYAQ_NUMBERS_ID); /*[1ED00]*/
2291         /***/
2292         public static final UnicodeBlock SMALL_KANA_EXTENSION =
2293                 new UnicodeBlock("SMALL_KANA_EXTENSION", SMALL_KANA_EXTENSION_ID); /*[1B130]*/
2294         /***/
2295         public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A =
2296                 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID); /*[1FA70]*/
2297         /***/
2298         public static final UnicodeBlock TAMIL_SUPPLEMENT =
2299                 new UnicodeBlock("TAMIL_SUPPLEMENT", TAMIL_SUPPLEMENT_ID); /*[11FC0]*/
2300         /***/
2301         public static final UnicodeBlock WANCHO = new UnicodeBlock("WANCHO", WANCHO_ID); /*[1E2C0]*/
2302 
2303         // New blocks in Unicode 13.0
2304 
2305         /***/
2306         public static final UnicodeBlock CHORASMIAN =
2307                 new UnicodeBlock("CHORASMIAN", CHORASMIAN_ID); /*[10FB0]*/
2308         /***/
2309         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G =
2310                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G",
2311                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID); /*[30000]*/
2312         /***/
2313         public static final UnicodeBlock DIVES_AKURU =
2314                 new UnicodeBlock("DIVES_AKURU", DIVES_AKURU_ID); /*[11900]*/
2315         /***/
2316         public static final UnicodeBlock KHITAN_SMALL_SCRIPT =
2317                 new UnicodeBlock("KHITAN_SMALL_SCRIPT", KHITAN_SMALL_SCRIPT_ID); /*[18B00]*/
2318         /***/
2319         public static final UnicodeBlock LISU_SUPPLEMENT =
2320                 new UnicodeBlock("LISU_SUPPLEMENT", LISU_SUPPLEMENT_ID); /*[11FB0]*/
2321         /***/
2322         public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING =
2323                 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING", SYMBOLS_FOR_LEGACY_COMPUTING_ID); /*[1FB00]*/
2324         /***/
2325         public static final UnicodeBlock TANGUT_SUPPLEMENT =
2326                 new UnicodeBlock("TANGUT_SUPPLEMENT", TANGUT_SUPPLEMENT_ID); /*[18D00]*/
2327         /***/
2328         public static final UnicodeBlock YEZIDI = new UnicodeBlock("YEZIDI", YEZIDI_ID); /*[10E80]*/
2329 
2330         // New blocks in Unicode 14.0
2331 
2332         /***/
2333         public static final UnicodeBlock ARABIC_EXTENDED_B =
2334                 new UnicodeBlock("ARABIC_EXTENDED_B", ARABIC_EXTENDED_B_ID); /*[0870]*/
2335         /***/
2336         public static final UnicodeBlock CYPRO_MINOAN =
2337                 new UnicodeBlock("CYPRO_MINOAN", CYPRO_MINOAN_ID); /*[12F90]*/
2338         /***/
2339         public static final UnicodeBlock ETHIOPIC_EXTENDED_B =
2340                 new UnicodeBlock("ETHIOPIC_EXTENDED_B", ETHIOPIC_EXTENDED_B_ID); /*[1E7E0]*/
2341         /***/
2342         public static final UnicodeBlock KANA_EXTENDED_B =
2343                 new UnicodeBlock("KANA_EXTENDED_B", KANA_EXTENDED_B_ID); /*[1AFF0]*/
2344         /***/
2345         public static final UnicodeBlock LATIN_EXTENDED_F =
2346                 new UnicodeBlock("LATIN_EXTENDED_F", LATIN_EXTENDED_F_ID); /*[10780]*/
2347         /***/
2348         public static final UnicodeBlock LATIN_EXTENDED_G =
2349                 new UnicodeBlock("LATIN_EXTENDED_G", LATIN_EXTENDED_G_ID); /*[1DF00]*/
2350         /***/
2351         public static final UnicodeBlock OLD_UYGHUR =
2352                 new UnicodeBlock("OLD_UYGHUR", OLD_UYGHUR_ID); /*[10F70]*/
2353         /***/
2354         public static final UnicodeBlock TANGSA = new UnicodeBlock("TANGSA", TANGSA_ID); /*[16A70]*/
2355         /***/
2356         public static final UnicodeBlock TOTO = new UnicodeBlock("TOTO", TOTO_ID); /*[1E290]*/
2357         /***/
2358         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A =
2359                 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A",
2360                         UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A_ID); /*[11AB0]*/
2361         /***/
2362         public static final UnicodeBlock VITHKUQI =
2363                 new UnicodeBlock("VITHKUQI", VITHKUQI_ID); /*[10570]*/
2364         /***/
2365         public static final UnicodeBlock ZNAMENNY_MUSICAL_NOTATION =
2366                 new UnicodeBlock("ZNAMENNY_MUSICAL_NOTATION",
2367                         ZNAMENNY_MUSICAL_NOTATION_ID); /*[1CF00]*/
2368 
2369         // New blocks in Unicode 15.0
2370 
2371         /***/
2372         public static final UnicodeBlock ARABIC_EXTENDED_C =
2373                 new UnicodeBlock("ARABIC_EXTENDED_C", ARABIC_EXTENDED_C_ID); /*[10EC0]*/
2374         /***/
2375         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H =
2376                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H",
2377                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H_ID); /*[31350]*/
2378         /***/
2379         public static final UnicodeBlock CYRILLIC_EXTENDED_D =
2380                 new UnicodeBlock("CYRILLIC_EXTENDED_D", CYRILLIC_EXTENDED_D_ID); /*[1E030]*/
2381         /***/
2382         public static final UnicodeBlock DEVANAGARI_EXTENDED_A =
2383                 new UnicodeBlock("DEVANAGARI_EXTENDED_A", DEVANAGARI_EXTENDED_A_ID); /*[11B00]*/
2384         /***/
2385         public static final UnicodeBlock KAKTOVIK_NUMERALS =
2386                 new UnicodeBlock("KAKTOVIK_NUMERALS", KAKTOVIK_NUMERALS_ID); /*[1D2C0]*/
2387         /***/
2388         public static final UnicodeBlock KAWI = new UnicodeBlock("KAWI", KAWI_ID); /*[11F00]*/
2389         /***/
2390         public static final UnicodeBlock NAG_MUNDARI =
2391                 new UnicodeBlock("NAG_MUNDARI", NAG_MUNDARI_ID); /*[1E4D0]*/
2392 
2393         // New block in Unicode 15.1
2394 
2395         /***/
2396         @android.annotation.FlaggedApi(com.android.icu.Flags.FLAG_ICU_V_API)
2397         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I =
2398                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I",
2399                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I_ID); /*[2EBF0]*/
2400 
2401         /**
2402          */
2403         public static final UnicodeBlock INVALID_CODE
2404         = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID);
2405 
2406         static {
2407             for (int blockId = 0; blockId < COUNT; ++blockId) {
2408                 if (BLOCKS_[blockId] == null) {
2409                     throw new java.lang.IllegalStateException(
2410                             "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized");
2411                 }
2412             }
2413         }
2414 
2415         // public methods --------------------------------------------------
2416 
2417         /**
2418          * <strong>[icu]</strong> Returns the only instance of the UnicodeBlock with the argument ID.
2419          * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned.
2420          * @param id UnicodeBlock ID
2421          * @return the only instance of the UnicodeBlock with the argument ID
2422          *         if it exists, otherwise a INVALID_CODE UnicodeBlock will be
2423          *         returned.
2424          */
getInstance(int id)2425         public static UnicodeBlock getInstance(int id)
2426         {
2427             if (id >= 0 && id < BLOCKS_.length) {
2428                 return BLOCKS_[id];
2429             }
2430             return INVALID_CODE;
2431         }
2432 
2433         /**
2434          * Returns the Unicode allocation block that contains the code point,
2435          * or null if the code point is not a member of a defined block.
2436          * @param ch code point to be tested
2437          * @return the Unicode allocation block that contains the code point
2438          */
of(int ch)2439         public static UnicodeBlock of(int ch)
2440         {
2441             if (ch > MAX_VALUE) {
2442                 return INVALID_CODE;
2443             }
2444 
2445             return UnicodeBlock.getInstance(
2446                     UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK));
2447         }
2448 
2449         /**
2450          * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method.
2451          * Returns the Unicode block with the given name. <strong>[icu] Note:</strong> Unlike
2452          * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches
2453          * against the official UCD name and the Java block name
2454          * (ignoring case).
2455          * @param blockName the name of the block to match
2456          * @return the UnicodeBlock with that name
2457          * @throws IllegalArgumentException if the blockName could not be matched
2458          */
forName(String blockName)2459         public static final UnicodeBlock forName(String blockName) {
2460             Map<String, UnicodeBlock> m = null;
2461             if (mref != null) {
2462                 m = mref.get();
2463             }
2464             if (m == null) {
2465                 m = new HashMap<>(BLOCKS_.length);
2466                 for (int i = 0; i < BLOCKS_.length; ++i) {
2467                     UnicodeBlock b = BLOCKS_[i];
2468                     String name = trimBlockName(
2469                             getPropertyValueName(UProperty.BLOCK, b.getID(),
2470                                     UProperty.NameChoice.LONG));
2471                     m.put(name, b);
2472                 }
2473                 mref = new SoftReference<>(m);
2474             }
2475             UnicodeBlock b = m.get(trimBlockName(blockName));
2476             if (b == null) {
2477                 throw new IllegalArgumentException();
2478             }
2479             return b;
2480         }
2481         private static SoftReference<Map<String, UnicodeBlock>> mref;
2482 
trimBlockName(String name)2483         private static String trimBlockName(String name) {
2484             String upper = name.toUpperCase(Locale.ENGLISH);
2485             StringBuilder result = new StringBuilder(upper.length());
2486             for (int i = 0; i < upper.length(); i++) {
2487                 char c = upper.charAt(i);
2488                 if (c != ' ' && c != '_' && c != '-') {
2489                     result.append(c);
2490                 }
2491             }
2492             return result.toString();
2493         }
2494 
2495         /**
2496          * {icu} Returns the type ID of this Unicode block
2497          * @return integer type ID of this Unicode block
2498          */
getID()2499         public int getID()
2500         {
2501             return m_id_;
2502         }
2503 
2504         // private data members ---------------------------------------------
2505 
2506         /**
2507          * Identification code for this UnicodeBlock
2508          */
2509         private int m_id_;
2510 
2511         // private constructor ----------------------------------------------
2512 
2513         /**
2514          * UnicodeBlock constructor
2515          * @param name name of this UnicodeBlock
2516          * @param id unique id of this UnicodeBlock
2517          * @exception NullPointerException if name is <code>null</code>
2518          */
UnicodeBlock(String name, int id)2519         private UnicodeBlock(String name, int id)
2520         {
2521             super(name);
2522             m_id_ = id;
2523             if (id >= 0) {
2524                 BLOCKS_[id] = this;
2525             }
2526         }
2527     }
2528 
2529     /**
2530      * East Asian Width constants.
2531      * @see UProperty#EAST_ASIAN_WIDTH
2532      * @see UCharacter#getIntPropertyValue
2533      */
2534     public static interface EastAsianWidth
2535     {
2536         /**
2537          */
2538         public static final int NEUTRAL = 0;
2539         /**
2540          */
2541         public static final int AMBIGUOUS = 1;
2542         /**
2543          */
2544         public static final int HALFWIDTH = 2;
2545         /**
2546          */
2547         public static final int FULLWIDTH = 3;
2548         /**
2549          */
2550         public static final int NARROW = 4;
2551         /**
2552          */
2553         public static final int WIDE = 5;
2554         /**
2555          * One more than the highest normal EastAsianWidth value.
2556          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH).
2557          *
2558          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2559          * @hide unsupported on Android
2560          */
2561         @Deprecated
2562         public static final int COUNT = 6;
2563     }
2564 
2565     /**
2566      * Decomposition Type constants.
2567      * @see UProperty#DECOMPOSITION_TYPE
2568      */
2569     public static interface DecompositionType
2570     {
2571         /**
2572          */
2573         public static final int NONE = 0;
2574         /**
2575          */
2576         public static final int CANONICAL = 1;
2577         /**
2578          */
2579         public static final int COMPAT = 2;
2580         /**
2581          */
2582         public static final int CIRCLE = 3;
2583         /**
2584          */
2585         public static final int FINAL = 4;
2586         /**
2587          */
2588         public static final int FONT = 5;
2589         /**
2590          */
2591         public static final int FRACTION = 6;
2592         /**
2593          */
2594         public static final int INITIAL = 7;
2595         /**
2596          */
2597         public static final int ISOLATED = 8;
2598         /**
2599          */
2600         public static final int MEDIAL = 9;
2601         /**
2602          */
2603         public static final int NARROW = 10;
2604         /**
2605          */
2606         public static final int NOBREAK = 11;
2607         /**
2608          */
2609         public static final int SMALL = 12;
2610         /**
2611          */
2612         public static final int SQUARE = 13;
2613         /**
2614          */
2615         public static final int SUB = 14;
2616         /**
2617          */
2618         public static final int SUPER = 15;
2619         /**
2620          */
2621         public static final int VERTICAL = 16;
2622         /**
2623          */
2624         public static final int WIDE = 17;
2625         /**
2626          * One more than the highest normal DecompositionType value.
2627          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE).
2628          *
2629          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2630          * @hide unsupported on Android
2631          */
2632         @Deprecated
2633         public static final int COUNT = 18;
2634     }
2635 
2636     /**
2637      * Joining Type constants.
2638      * @see UProperty#JOINING_TYPE
2639      */
2640     public static interface JoiningType
2641     {
2642         /**
2643          */
2644         public static final int NON_JOINING = 0;
2645         /**
2646          */
2647         public static final int JOIN_CAUSING = 1;
2648         /**
2649          */
2650         public static final int DUAL_JOINING = 2;
2651         /**
2652          */
2653         public static final int LEFT_JOINING = 3;
2654         /**
2655          */
2656         public static final int RIGHT_JOINING = 4;
2657         /**
2658          */
2659         public static final int TRANSPARENT = 5;
2660         /**
2661          * One more than the highest normal JoiningType value.
2662          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE).
2663          *
2664          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2665          * @hide unsupported on Android
2666          */
2667         @Deprecated
2668         public static final int COUNT = 6;
2669     }
2670 
2671     /**
2672      * Joining Group constants.
2673      * @see UProperty#JOINING_GROUP
2674      */
2675     public static interface JoiningGroup
2676     {
2677         /**
2678          */
2679         public static final int NO_JOINING_GROUP = 0;
2680         /**
2681          */
2682         public static final int AIN = 1;
2683         /**
2684          */
2685         public static final int ALAPH = 2;
2686         /**
2687          */
2688         public static final int ALEF = 3;
2689         /**
2690          */
2691         public static final int BEH = 4;
2692         /**
2693          */
2694         public static final int BETH = 5;
2695         /**
2696          */
2697         public static final int DAL = 6;
2698         /**
2699          */
2700         public static final int DALATH_RISH = 7;
2701         /**
2702          */
2703         public static final int E = 8;
2704         /**
2705          */
2706         public static final int FEH = 9;
2707         /**
2708          */
2709         public static final int FINAL_SEMKATH = 10;
2710         /**
2711          */
2712         public static final int GAF = 11;
2713         /**
2714          */
2715         public static final int GAMAL = 12;
2716         /**
2717          */
2718         public static final int HAH = 13;
2719         /***/
2720         public static final int TEH_MARBUTA_GOAL = 14;
2721         /**
2722          */
2723         public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL;
2724         /**
2725          */
2726         public static final int HE = 15;
2727         /**
2728          */
2729         public static final int HEH = 16;
2730         /**
2731          */
2732         public static final int HEH_GOAL = 17;
2733         /**
2734          */
2735         public static final int HETH = 18;
2736         /**
2737          */
2738         public static final int KAF = 19;
2739         /**
2740          */
2741         public static final int KAPH = 20;
2742         /**
2743          */
2744         public static final int KNOTTED_HEH = 21;
2745         /**
2746          */
2747         public static final int LAM = 22;
2748         /**
2749          */
2750         public static final int LAMADH = 23;
2751         /**
2752          */
2753         public static final int MEEM = 24;
2754         /**
2755          */
2756         public static final int MIM = 25;
2757         /**
2758          */
2759         public static final int NOON = 26;
2760         /**
2761          */
2762         public static final int NUN = 27;
2763         /**
2764          */
2765         public static final int PE = 28;
2766         /**
2767          */
2768         public static final int QAF = 29;
2769         /**
2770          */
2771         public static final int QAPH = 30;
2772         /**
2773          */
2774         public static final int REH = 31;
2775         /**
2776          */
2777         public static final int REVERSED_PE = 32;
2778         /**
2779          */
2780         public static final int SAD = 33;
2781         /**
2782          */
2783         public static final int SADHE = 34;
2784         /**
2785          */
2786         public static final int SEEN = 35;
2787         /**
2788          */
2789         public static final int SEMKATH = 36;
2790         /**
2791          */
2792         public static final int SHIN = 37;
2793         /**
2794          */
2795         public static final int SWASH_KAF = 38;
2796         /**
2797          */
2798         public static final int SYRIAC_WAW = 39;
2799         /**
2800          */
2801         public static final int TAH = 40;
2802         /**
2803          */
2804         public static final int TAW = 41;
2805         /**
2806          */
2807         public static final int TEH_MARBUTA = 42;
2808         /**
2809          */
2810         public static final int TETH = 43;
2811         /**
2812          */
2813         public static final int WAW = 44;
2814         /**
2815          */
2816         public static final int YEH = 45;
2817         /**
2818          */
2819         public static final int YEH_BARREE = 46;
2820         /**
2821          */
2822         public static final int YEH_WITH_TAIL = 47;
2823         /**
2824          */
2825         public static final int YUDH = 48;
2826         /**
2827          */
2828         public static final int YUDH_HE = 49;
2829         /**
2830          */
2831         public static final int ZAIN = 50;
2832         /**
2833          */
2834         public static final int FE = 51;
2835         /**
2836          */
2837         public static final int KHAPH = 52;
2838         /**
2839          */
2840         public static final int ZHAIN = 53;
2841         /**
2842          */
2843         public static final int BURUSHASKI_YEH_BARREE = 54;
2844         /***/
2845         public static final int FARSI_YEH = 55;
2846         /***/
2847         public static final int NYA = 56;
2848         /***/
2849         public static final int ROHINGYA_YEH = 57;
2850 
2851         /***/
2852         public static final int MANICHAEAN_ALEPH = 58;
2853         /***/
2854         public static final int MANICHAEAN_AYIN = 59;
2855         /***/
2856         public static final int MANICHAEAN_BETH = 60;
2857         /***/
2858         public static final int MANICHAEAN_DALETH = 61;
2859         /***/
2860         public static final int MANICHAEAN_DHAMEDH = 62;
2861         /***/
2862         public static final int MANICHAEAN_FIVE = 63;
2863         /***/
2864         public static final int MANICHAEAN_GIMEL = 64;
2865         /***/
2866         public static final int MANICHAEAN_HETH = 65;
2867         /***/
2868         public static final int MANICHAEAN_HUNDRED = 66;
2869         /***/
2870         public static final int MANICHAEAN_KAPH = 67;
2871         /***/
2872         public static final int MANICHAEAN_LAMEDH = 68;
2873         /***/
2874         public static final int MANICHAEAN_MEM = 69;
2875         /***/
2876         public static final int MANICHAEAN_NUN = 70;
2877         /***/
2878         public static final int MANICHAEAN_ONE = 71;
2879         /***/
2880         public static final int MANICHAEAN_PE = 72;
2881         /***/
2882         public static final int MANICHAEAN_QOPH = 73;
2883         /***/
2884         public static final int MANICHAEAN_RESH = 74;
2885         /***/
2886         public static final int MANICHAEAN_SADHE = 75;
2887         /***/
2888         public static final int MANICHAEAN_SAMEKH = 76;
2889         /***/
2890         public static final int MANICHAEAN_TAW = 77;
2891         /***/
2892         public static final int MANICHAEAN_TEN = 78;
2893         /***/
2894         public static final int MANICHAEAN_TETH = 79;
2895         /***/
2896         public static final int MANICHAEAN_THAMEDH = 80;
2897         /***/
2898         public static final int MANICHAEAN_TWENTY = 81;
2899         /***/
2900         public static final int MANICHAEAN_WAW = 82;
2901         /***/
2902         public static final int MANICHAEAN_YODH = 83;
2903         /***/
2904         public static final int MANICHAEAN_ZAYIN = 84;
2905         /***/
2906         public static final int STRAIGHT_WAW = 85;
2907 
2908         /***/
2909         public static final int AFRICAN_FEH = 86;
2910         /***/
2911         public static final int AFRICAN_NOON = 87;
2912         /***/
2913         public static final int AFRICAN_QAF = 88;
2914 
2915         /***/
2916         public static final int MALAYALAM_BHA = 89;
2917         /***/
2918         public static final int MALAYALAM_JA = 90;
2919         /***/
2920         public static final int MALAYALAM_LLA = 91;
2921         /***/
2922         public static final int MALAYALAM_LLLA = 92;
2923         /***/
2924         public static final int MALAYALAM_NGA = 93;
2925         /***/
2926         public static final int MALAYALAM_NNA = 94;
2927         /***/
2928         public static final int MALAYALAM_NNNA = 95;
2929         /***/
2930         public static final int MALAYALAM_NYA = 96;
2931         /***/
2932         public static final int MALAYALAM_RA = 97;
2933         /***/
2934         public static final int MALAYALAM_SSA = 98;
2935         /***/
2936         public static final int MALAYALAM_TTA = 99;
2937 
2938         /***/
2939         public static final int HANIFI_ROHINGYA_KINNA_YA = 100;
2940         /***/
2941         public static final int HANIFI_ROHINGYA_PA = 101;
2942 
2943         /***/
2944         public static final int THIN_YEH = 102;
2945         /***/
2946         public static final int VERTICAL_TAIL = 103;
2947 
2948         /**
2949          * One more than the highest normal JoiningGroup value.
2950          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JoiningGroup).
2951          *
2952          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2953          * @hide unsupported on Android
2954          */
2955         @Deprecated
2956         public static final int COUNT = 104;
2957     }
2958 
2959     /**
2960      * Grapheme Cluster Break constants.
2961      * @see UProperty#GRAPHEME_CLUSTER_BREAK
2962      */
2963     public static interface GraphemeClusterBreak {
2964         /**
2965          */
2966         public static final int OTHER = 0;
2967         /**
2968          */
2969         public static final int CONTROL = 1;
2970         /**
2971          */
2972         public static final int CR = 2;
2973         /**
2974          */
2975         public static final int EXTEND = 3;
2976         /**
2977          */
2978         public static final int L = 4;
2979         /**
2980          */
2981         public static final int LF = 5;
2982         /**
2983          */
2984         public static final int LV = 6;
2985         /**
2986          */
2987         public static final int LVT = 7;
2988         /**
2989          */
2990         public static final int T = 8;
2991         /**
2992          */
2993         public static final int V = 9;
2994         /**
2995          */
2996         public static final int SPACING_MARK = 10;
2997         /**
2998          */
2999         public static final int PREPEND = 11;
3000         /***/
3001         public static final int REGIONAL_INDICATOR = 12;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
3002         /***/
3003         public static final int E_BASE = 13;          /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
3004         /***/
3005         public static final int E_BASE_GAZ = 14;      /*[EBG]*/
3006         /***/
3007         public static final int E_MODIFIER = 15;      /*[EM]*/
3008         /***/
3009         public static final int GLUE_AFTER_ZWJ = 16;  /*[GAZ]*/
3010         /***/
3011         public static final int ZWJ = 17;             /*[ZWJ]*/
3012 
3013         /**
3014          * One more than the highest normal GraphemeClusterBreak value.
3015          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK).
3016          *
3017          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3018          * @hide unsupported on Android
3019          */
3020         @Deprecated
3021         public static final int COUNT = 18;
3022     }
3023 
3024     /**
3025      * Word Break constants.
3026      * @see UProperty#WORD_BREAK
3027      */
3028     public static interface WordBreak {
3029         /**
3030          */
3031         public static final int OTHER = 0;
3032         /**
3033          */
3034         public static final int ALETTER = 1;
3035         /**
3036          */
3037         public static final int FORMAT = 2;
3038         /**
3039          */
3040         public static final int KATAKANA = 3;
3041         /**
3042          */
3043         public static final int MIDLETTER = 4;
3044         /**
3045          */
3046         public static final int MIDNUM = 5;
3047         /**
3048          */
3049         public static final int NUMERIC = 6;
3050         /**
3051          */
3052         public static final int EXTENDNUMLET = 7;
3053         /**
3054          */
3055         public static final int CR = 8;
3056         /**
3057          */
3058         public static final int EXTEND = 9;
3059         /**
3060          */
3061         public static final int LF = 10;
3062         /**
3063          */
3064         public static final int MIDNUMLET = 11;
3065         /**
3066          */
3067         public static final int NEWLINE = 12;
3068         /***/
3069         public static final int REGIONAL_INDICATOR = 13;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
3070         /***/
3071         public static final int HEBREW_LETTER = 14;    /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
3072         /***/
3073         public static final int SINGLE_QUOTE = 15;     /*[SQ]*/
3074         /***/
3075         public static final int DOUBLE_QUOTE = 16;     /*[DQ]*/
3076         /***/
3077         public static final int E_BASE = 17;           /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
3078         /***/
3079         public static final int E_BASE_GAZ = 18;       /*[EBG]*/
3080         /***/
3081         public static final int E_MODIFIER = 19;       /*[EM]*/
3082         /***/
3083         public static final int GLUE_AFTER_ZWJ = 20;   /*[GAZ]*/
3084         /***/
3085         public static final int ZWJ = 21;              /*[ZWJ]*/
3086         /***/
3087         public static final int WSEGSPACE = 22;        /*[WSEGSPACE]*/
3088         /**
3089          * One more than the highest normal WordBreak value.
3090          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK).
3091          *
3092          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3093          * @hide unsupported on Android
3094          */
3095         @Deprecated
3096         public static final int COUNT = 23;
3097     }
3098 
3099     /**
3100      * Sentence Break constants.
3101      * @see UProperty#SENTENCE_BREAK
3102      */
3103     public static interface SentenceBreak {
3104         /**
3105          */
3106         public static final int OTHER = 0;
3107         /**
3108          */
3109         public static final int ATERM = 1;
3110         /**
3111          */
3112         public static final int CLOSE = 2;
3113         /**
3114          */
3115         public static final int FORMAT = 3;
3116         /**
3117          */
3118         public static final int LOWER = 4;
3119         /**
3120          */
3121         public static final int NUMERIC = 5;
3122         /**
3123          */
3124         public static final int OLETTER = 6;
3125         /**
3126          */
3127         public static final int SEP = 7;
3128         /**
3129          */
3130         public static final int SP = 8;
3131         /**
3132          */
3133         public static final int STERM = 9;
3134         /**
3135          */
3136         public static final int UPPER = 10;
3137         /**
3138          */
3139         public static final int CR = 11;
3140         /**
3141          */
3142         public static final int EXTEND = 12;
3143         /**
3144          */
3145         public static final int LF = 13;
3146         /**
3147          */
3148         public static final int SCONTINUE = 14;
3149         /**
3150          * One more than the highest normal SentenceBreak value.
3151          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK).
3152          *
3153          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3154          * @hide unsupported on Android
3155          */
3156         @Deprecated
3157         public static final int COUNT = 15;
3158     }
3159 
3160     /**
3161      * Line Break constants.
3162      * @see UProperty#LINE_BREAK
3163      */
3164     public static interface LineBreak
3165     {
3166         /**
3167          */
3168         public static final int UNKNOWN = 0;
3169         /**
3170          */
3171         public static final int AMBIGUOUS = 1;
3172         /**
3173          */
3174         public static final int ALPHABETIC = 2;
3175         /**
3176          */
3177         public static final int BREAK_BOTH = 3;
3178         /**
3179          */
3180         public static final int BREAK_AFTER = 4;
3181         /**
3182          */
3183         public static final int BREAK_BEFORE = 5;
3184         /**
3185          */
3186         public static final int MANDATORY_BREAK = 6;
3187         /**
3188          */
3189         public static final int CONTINGENT_BREAK = 7;
3190         /**
3191          */
3192         public static final int CLOSE_PUNCTUATION = 8;
3193         /**
3194          */
3195         public static final int COMBINING_MARK = 9;
3196         /**
3197          */
3198         public static final int CARRIAGE_RETURN = 10;
3199         /**
3200          */
3201         public static final int EXCLAMATION = 11;
3202         /**
3203          */
3204         public static final int GLUE = 12;
3205         /**
3206          */
3207         public static final int HYPHEN = 13;
3208         /**
3209          */
3210         public static final int IDEOGRAPHIC = 14;
3211         /**
3212          * @see #INSEPARABLE
3213          */
3214         public static final int INSEPERABLE = 15;
3215         /**
3216          * Renamed from the misspelled "inseperable" in Unicode 4.0.1.
3217          */
3218         public static final int INSEPARABLE = 15;
3219         /**
3220          */
3221         public static final int INFIX_NUMERIC = 16;
3222         /**
3223          */
3224         public static final int LINE_FEED = 17;
3225         /**
3226          */
3227         public static final int NONSTARTER = 18;
3228         /**
3229          */
3230         public static final int NUMERIC = 19;
3231         /**
3232          */
3233         public static final int OPEN_PUNCTUATION = 20;
3234         /**
3235          */
3236         public static final int POSTFIX_NUMERIC = 21;
3237         /**
3238          */
3239         public static final int PREFIX_NUMERIC = 22;
3240         /**
3241          */
3242         public static final int QUOTATION = 23;
3243         /**
3244          */
3245         public static final int COMPLEX_CONTEXT = 24;
3246         /**
3247          */
3248         public static final int SURROGATE = 25;
3249         /**
3250          */
3251         public static final int SPACE = 26;
3252         /**
3253          */
3254         public static final int BREAK_SYMBOLS = 27;
3255         /**
3256          */
3257         public static final int ZWSPACE = 28;
3258         /**
3259          */
3260         public static final int NEXT_LINE = 29;  /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
3261         /**
3262          */
3263         public static final int WORD_JOINER = 30;      /*[WJ]*/
3264         /**
3265          */
3266         public static final int H2 = 31;  /* from here on: new in Unicode 4.1/ICU 3.4 */
3267         /**
3268          */
3269         public static final int H3 = 32;
3270         /**
3271          */
3272         public static final int JL = 33;
3273         /**
3274          */
3275         public static final int JT = 34;
3276         /**
3277          */
3278         public static final int JV = 35;
3279         /***/
3280         public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
3281         /***/
3282         public static final int CONDITIONAL_JAPANESE_STARTER = 37;  /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
3283         /***/
3284         public static final int HEBREW_LETTER = 38;  /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
3285         /***/
3286         public static final int REGIONAL_INDICATOR = 39;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
3287         /***/
3288         public static final int E_BASE = 40;  /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
3289         /***/
3290         public static final int E_MODIFIER = 41;  /*[EM]*/
3291         /***/
3292         public static final int ZWJ = 42;  /*[ZWJ]*/
3293         /***/
3294         @android.annotation.FlaggedApi(com.android.icu.Flags.FLAG_ICU_V_API)
3295         public static final int AKSARA = 43;  /*[AK]*/ /* from here on: new in Unicode 15.1/ICU 74 */
3296         /***/
3297         @android.annotation.FlaggedApi(com.android.icu.Flags.FLAG_ICU_V_API)
3298         public static final int AKSARA_PREBASE = 44;  /*[AP]*/
3299         /***/
3300         @android.annotation.FlaggedApi(com.android.icu.Flags.FLAG_ICU_V_API)
3301         public static final int AKSARA_START = 45;  /*[AS]*/
3302         /***/
3303         @android.annotation.FlaggedApi(com.android.icu.Flags.FLAG_ICU_V_API)
3304         public static final int VIRAMA_FINAL = 46;  /*[VF]*/
3305         /***/
3306         @android.annotation.FlaggedApi(com.android.icu.Flags.FLAG_ICU_V_API)
3307         public static final int VIRAMA = 47;  /*[VI]*/
3308         /**
3309          * One more than the highest normal LineBreak value.
3310          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK).
3311          *
3312          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3313          * @hide unsupported on Android
3314          */
3315         @Deprecated
3316         public static final int COUNT = 48;
3317     }
3318 
3319     /**
3320      * Numeric Type constants.
3321      * @see UProperty#NUMERIC_TYPE
3322      */
3323     public static interface NumericType
3324     {
3325         /**
3326          */
3327         public static final int NONE = 0;
3328         /**
3329          */
3330         public static final int DECIMAL = 1;
3331         /**
3332          */
3333         public static final int DIGIT = 2;
3334         /**
3335          */
3336         public static final int NUMERIC = 3;
3337         /**
3338          * One more than the highest normal NumericType value.
3339          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE).
3340          *
3341          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3342          * @hide unsupported on Android
3343          */
3344         @Deprecated
3345         public static final int COUNT = 4;
3346     }
3347 
3348     /**
3349      * Hangul Syllable Type constants.
3350      *
3351      * @see UProperty#HANGUL_SYLLABLE_TYPE
3352      */
3353     public static interface HangulSyllableType
3354     {
3355         /**
3356          */
3357         public static final int NOT_APPLICABLE      = 0;   /*[NA]*/ /*See note !!*/
3358         /**
3359          */
3360         public static final int LEADING_JAMO        = 1;   /*[L]*/
3361         /**
3362          */
3363         public static final int VOWEL_JAMO          = 2;   /*[V]*/
3364         /**
3365          */
3366         public static final int TRAILING_JAMO       = 3;   /*[T]*/
3367         /**
3368          */
3369         public static final int LV_SYLLABLE         = 4;   /*[LV]*/
3370         /**
3371          */
3372         public static final int LVT_SYLLABLE        = 5;   /*[LVT]*/
3373         /**
3374          * One more than the highest normal HangulSyllableType value.
3375          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE).
3376          *
3377          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3378          * @hide unsupported on Android
3379          */
3380         @Deprecated
3381         public static final int COUNT               = 6;
3382     }
3383 
3384     /**
3385      * Bidi Paired Bracket Type constants.
3386      *
3387      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
3388      */
3389     public static interface BidiPairedBracketType {
3390         /**
3391          * Not a paired bracket.
3392          */
3393         public static final int NONE = 0;
3394         /**
3395          * Open paired bracket.
3396          */
3397         public static final int OPEN = 1;
3398         /**
3399          * Close paired bracket.
3400          */
3401         public static final int CLOSE = 2;
3402         /**
3403          * One more than the highest normal BidiPairedBracketType value.
3404          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE).
3405          *
3406          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3407          * @hide unsupported on Android
3408          */
3409         @Deprecated
3410         public static final int COUNT = 3;
3411     }
3412 
3413     /**
3414      * Indic Positional Category constants.
3415      *
3416      * @see UProperty#INDIC_POSITIONAL_CATEGORY
3417      */
3418     public static interface IndicPositionalCategory {
3419         /***/
3420         public static final int NA = 0;
3421         /***/
3422         public static final int BOTTOM = 1;
3423         /***/
3424         public static final int BOTTOM_AND_LEFT = 2;
3425         /***/
3426         public static final int BOTTOM_AND_RIGHT = 3;
3427         /***/
3428         public static final int LEFT = 4;
3429         /***/
3430         public static final int LEFT_AND_RIGHT = 5;
3431         /***/
3432         public static final int OVERSTRUCK = 6;
3433         /***/
3434         public static final int RIGHT = 7;
3435         /***/
3436         public static final int TOP = 8;
3437         /***/
3438         public static final int TOP_AND_BOTTOM = 9;
3439         /***/
3440         public static final int TOP_AND_BOTTOM_AND_RIGHT = 10;
3441         /***/
3442         public static final int TOP_AND_LEFT = 11;
3443         /***/
3444         public static final int TOP_AND_LEFT_AND_RIGHT = 12;
3445         /***/
3446         public static final int TOP_AND_RIGHT = 13;
3447         /***/
3448         public static final int VISUAL_ORDER_LEFT = 14;
3449         /***/
3450         public static final int TOP_AND_BOTTOM_AND_LEFT = 15;
3451     }
3452 
3453     /**
3454      * Indic Syllabic Category constants.
3455      *
3456      * @see UProperty#INDIC_SYLLABIC_CATEGORY
3457      */
3458     public static interface IndicSyllabicCategory {
3459         /***/
3460         public static final int OTHER = 0;
3461         /***/
3462         public static final int AVAGRAHA = 1;
3463         /***/
3464         public static final int BINDU = 2;
3465         /***/
3466         public static final int BRAHMI_JOINING_NUMBER = 3;
3467         /***/
3468         public static final int CANTILLATION_MARK = 4;
3469         /***/
3470         public static final int CONSONANT = 5;
3471         /***/
3472         public static final int CONSONANT_DEAD = 6;
3473         /***/
3474         public static final int CONSONANT_FINAL = 7;
3475         /***/
3476         public static final int CONSONANT_HEAD_LETTER = 8;
3477         /***/
3478         public static final int CONSONANT_INITIAL_POSTFIXED = 9;
3479         /***/
3480         public static final int CONSONANT_KILLER = 10;
3481         /***/
3482         public static final int CONSONANT_MEDIAL = 11;
3483         /***/
3484         public static final int CONSONANT_PLACEHOLDER = 12;
3485         /***/
3486         public static final int CONSONANT_PRECEDING_REPHA = 13;
3487         /***/
3488         public static final int CONSONANT_PREFIXED = 14;
3489         /***/
3490         public static final int CONSONANT_SUBJOINED = 15;
3491         /***/
3492         public static final int CONSONANT_SUCCEEDING_REPHA = 16;
3493         /***/
3494         public static final int CONSONANT_WITH_STACKER = 17;
3495         /***/
3496         public static final int GEMINATION_MARK = 18;
3497         /***/
3498         public static final int INVISIBLE_STACKER = 19;
3499         /***/
3500         public static final int JOINER = 20;
3501         /***/
3502         public static final int MODIFYING_LETTER = 21;
3503         /***/
3504         public static final int NON_JOINER = 22;
3505         /***/
3506         public static final int NUKTA = 23;
3507         /***/
3508         public static final int NUMBER = 24;
3509         /***/
3510         public static final int NUMBER_JOINER = 25;
3511         /***/
3512         public static final int PURE_KILLER = 26;
3513         /***/
3514         public static final int REGISTER_SHIFTER = 27;
3515         /***/
3516         public static final int SYLLABLE_MODIFIER = 28;
3517         /***/
3518         public static final int TONE_LETTER = 29;
3519         /***/
3520         public static final int TONE_MARK = 30;
3521         /***/
3522         public static final int VIRAMA = 31;
3523         /***/
3524         public static final int VISARGA = 32;
3525         /***/
3526         public static final int VOWEL = 33;
3527         /***/
3528         public static final int VOWEL_DEPENDENT = 34;
3529         /***/
3530         public static final int VOWEL_INDEPENDENT = 35;
3531     }
3532 
3533     /**
3534      * Vertical Orientation constants.
3535      *
3536      * @see UProperty#VERTICAL_ORIENTATION
3537      */
3538     public static interface VerticalOrientation {
3539         /***/
3540         public static final int ROTATED = 0;
3541         /***/
3542         public static final int TRANSFORMED_ROTATED = 1;
3543         /***/
3544         public static final int TRANSFORMED_UPRIGHT = 2;
3545         /***/
3546         public static final int UPRIGHT = 3;
3547     }
3548 
3549     /**
3550      * Identifier Status constants.
3551      * See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type.
3552      *
3553      * @see UProperty#IDENTIFIER_STATUS
3554      * @hide Only a subset of ICU is exposed in Android
3555      * @hide draft / provisional / internal are hidden on Android
3556      */
3557     public enum IdentifierStatus {
3558         /** @hide draft / provisional / internal are hidden on Android*/
3559         RESTRICTED,
3560         /** @hide draft / provisional / internal are hidden on Android*/
3561         ALLOWED,
3562     }
3563 
3564     /**
3565      * Identifier Type constants.
3566      * See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type.
3567      *
3568      * @see UProperty#IDENTIFIER_TYPE
3569      * @hide Only a subset of ICU is exposed in Android
3570      * @hide draft / provisional / internal are hidden on Android
3571      */
3572     public enum IdentifierType {
3573         /** @hide draft / provisional / internal are hidden on Android*/
3574         NOT_CHARACTER,
3575         /** @hide draft / provisional / internal are hidden on Android*/
3576         DEPRECATED,
3577         /** @hide draft / provisional / internal are hidden on Android*/
3578         DEFAULT_IGNORABLE,
3579         /** @hide draft / provisional / internal are hidden on Android*/
3580         NOT_NFKC,
3581         /** @hide draft / provisional / internal are hidden on Android*/
3582         NOT_XID,
3583         /** @hide draft / provisional / internal are hidden on Android*/
3584         EXCLUSION,
3585         /** @hide draft / provisional / internal are hidden on Android*/
3586         OBSOLETE,
3587         /** @hide draft / provisional / internal are hidden on Android*/
3588         TECHNICAL,
3589         /** @hide draft / provisional / internal are hidden on Android*/
3590         UNCOMMON_USE,
3591         /** @hide draft / provisional / internal are hidden on Android*/
3592         LIMITED_USE,
3593         /** @hide draft / provisional / internal are hidden on Android*/
3594         INCLUSION,
3595         /** @hide draft / provisional / internal are hidden on Android*/
3596         RECOMMENDED,
3597     }
3598 
3599     // public data members -----------------------------------------------
3600 
3601     /**
3602      * The lowest Unicode code point value, constant 0.
3603      * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}.
3604      */
3605     public static final int MIN_VALUE = Character.MIN_CODE_POINT;
3606 
3607     /**
3608      * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits).
3609      * Same as {@link Character#MAX_CODE_POINT}.
3610      *
3611      * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE}
3612      * which is still a char with the value U+FFFF.
3613      */
3614     public static final int MAX_VALUE = Character.MAX_CODE_POINT;
3615 
3616     /**
3617      * The minimum value for Supplementary code points, constant U+10000.
3618      * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
3619      */
3620     public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT;
3621 
3622     /**
3623      * Unicode value used when translating into Unicode encoding form and there
3624      * is no existing character.
3625      */
3626     public static final int REPLACEMENT_CHAR = '\uFFFD';
3627 
3628     /**
3629      * Special value that is returned by getUnicodeNumericValue(int) when no
3630      * numeric value is defined for a code point.
3631      * @see #getUnicodeNumericValue
3632      */
3633     public static final double NO_NUMERIC_VALUE = -123456789;
3634 
3635     /**
3636      * Compatibility constant for Java Character's MIN_RADIX.
3637      */
3638     public static final int MIN_RADIX = java.lang.Character.MIN_RADIX;
3639 
3640     /**
3641      * Compatibility constant for Java Character's MAX_RADIX.
3642      */
3643     public static final int MAX_RADIX = java.lang.Character.MAX_RADIX;
3644 
3645     /**
3646      * Do not lowercase non-initial parts of words when titlecasing.
3647      * Option bit for titlecasing APIs that take an options bit set.
3648      *
3649      * By default, titlecasing will titlecase the first cased character
3650      * of a word and lowercase all other characters.
3651      * With this option, the other characters will not be modified.
3652      *
3653      * @see #toTitleCase
3654      */
3655     public static final int TITLECASE_NO_LOWERCASE = 0x100;
3656 
3657     /**
3658      * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
3659      * titlecase exactly the characters at breaks from the iterator.
3660      * Option bit for titlecasing APIs that take an options bit set.
3661      *
3662      * By default, titlecasing will take each break iterator index,
3663      * adjust it by looking for the next cased character, and titlecase that one.
3664      * Other characters are lowercased.
3665      *
3666      * This follows Unicode 4 &amp; 5 section 3.13 Default Case Operations:
3667      *
3668      * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
3669      * #29, "Text Boundaries." Between each pair of word boundaries, find the first
3670      * cased character F. If F exists, map F to default_title(F); then map each
3671      * subsequent character C to default_lower(C).
3672      *
3673      * @see #toTitleCase
3674      * @see #TITLECASE_NO_LOWERCASE
3675      */
3676     public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200;
3677 
3678     // public methods ----------------------------------------------------
3679 
3680     /**
3681      * Returnss the numeric value of a decimal digit code point.
3682      * <br>This method observes the semantics of
3683      * <code>java.lang.Character.digit()</code>.  Note that this
3684      * will return positive values for code points for which isDigit
3685      * returns false, just like java.lang.Character.
3686      * <br><em>Semantic Change:</em> In release 1.3.1 and
3687      * prior, this did not treat the European letters as having a
3688      * digit value, and also treated numeric letters and other numbers as
3689      * digits.
3690      * This has been changed to conform to the java semantics.
3691      * <br>A code point is a valid digit if and only if:
3692      * <ul>
3693      *   <li>ch is a decimal digit or one of the european letters, and
3694      *   <li>the value of ch is less than the specified radix.
3695      * </ul>
3696      * @param ch the code point to query
3697      * @param radix the radix
3698      * @return the numeric value represented by the code point in the
3699      * specified radix, or -1 if the code point is not a decimal digit
3700      * or if its value is too large for the radix
3701      */
digit(int ch, int radix)3702     public static int digit(int ch, int radix)
3703     {
3704         if (2 <= radix && radix <= 36) {
3705             int value = digit(ch);
3706             if (value < 0) {
3707                 // ch is not a decimal digit, try latin letters
3708                 value = UCharacterProperty.getEuropeanDigit(ch);
3709             }
3710             return (value < radix) ? value : -1;
3711         } else {
3712             return -1;  // invalid radix
3713         }
3714     }
3715 
3716     /**
3717      * Returnss the numeric value of a decimal digit code point.
3718      * <br>This is a convenience overload of <code>digit(int, int)</code>
3719      * that provides a decimal radix.
3720      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this
3721      * treated numeric letters and other numbers as digits.  This has
3722      * been changed to conform to the java semantics.
3723      * @param ch the code point to query
3724      * @return the numeric value represented by the code point,
3725      * or -1 if the code point is not a decimal digit or if its
3726      * value is too large for a decimal radix
3727      */
digit(int ch)3728     public static int digit(int ch)
3729     {
3730         return UCharacterProperty.INSTANCE.digit(ch);
3731     }
3732 
3733     /**
3734      * Returns the numeric value of the code point as a nonnegative
3735      * integer.
3736      * <br>If the code point does not have a numeric value, then -1 is returned.
3737      * <br>
3738      * If the code point has a numeric value that cannot be represented as a
3739      * nonnegative integer (for example, a fractional value), then -2 is
3740      * returned.
3741      * @param ch the code point to query
3742      * @return the numeric value of the code point, or -1 if it has no numeric
3743      * value, or -2 if it has a numeric value that cannot be represented as a
3744      * nonnegative integer
3745      */
getNumericValue(int ch)3746     public static int getNumericValue(int ch)
3747     {
3748         return UCharacterProperty.INSTANCE.getNumericValue(ch);
3749     }
3750 
3751     /**
3752      * <strong>[icu]</strong> Returns the numeric value for a Unicode code point as defined in the
3753      * Unicode Character Database.
3754      * <p>A "double" return type is necessary because some numeric values are
3755      * fractions, negative, or too large for int.
3756      * <p>For characters without any numeric values in the Unicode Character
3757      * Database, this function will return NO_NUMERIC_VALUE.
3758      * Note: This is different from the Unicode Standard which specifies NaN as the default value.
3759      * <p><em>API Change:</em> In release 2.2 and prior, this API has a
3760      * return type int and returns -1 when the argument ch does not have a
3761      * corresponding numeric value. This has been changed to synch with ICU4C
3762      *
3763      * This corresponds to the ICU4C function u_getNumericValue.
3764      * @param ch Code point to get the numeric value for.
3765      * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined.
3766      */
getUnicodeNumericValue(int ch)3767     public static double getUnicodeNumericValue(int ch)
3768     {
3769         return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch);
3770     }
3771 
3772     /**
3773      * Compatibility override of Java deprecated method.  This
3774      * method will always remain deprecated.
3775      * Same as java.lang.Character.isSpace().
3776      * @param ch the code point
3777      * @return true if the code point is a space character as
3778      * defined by java.lang.Character.isSpace.
3779      * @deprecated ICU 3.4 (Java)
3780      * @hide original deprecated declaration
3781      */
3782     @Deprecated
isSpace(int ch)3783     public static boolean isSpace(int ch) {
3784         return ch <= 0x20 &&
3785                 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d);
3786     }
3787 
3788     /**
3789      * Returns a value indicating a code point's Unicode category.
3790      * Up-to-date Unicode implementation of java.lang.Character.getType()
3791      * except for the above mentioned code points that had their category
3792      * changed.<br>
3793      * Return results are constants from the interface
3794      * <a href=UCharacterCategory.html>UCharacterCategory</a><br>
3795      * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with
3796      * those returned by java.lang.Character.getType.  UCharacterCategory values
3797      * match the ones used in ICU4C, while java.lang.Character type
3798      * values, though similar, skip the value 17.
3799      * @param ch code point whose type is to be determined
3800      * @return category which is a value of UCharacterCategory
3801      */
getType(int ch)3802     public static int getType(int ch)
3803     {
3804         return UCharacterProperty.INSTANCE.getType(ch);
3805     }
3806 
3807     /**
3808      * Determines if a code point has a defined meaning in the up-to-date
3809      * Unicode standard.
3810      * E.g. supplementary code points though allocated space are not defined in
3811      * Unicode yet.<br>
3812      * Up-to-date Unicode implementation of java.lang.Character.isDefined()
3813      * @param ch code point to be determined if it is defined in the most
3814      *        current version of Unicode
3815      * @return true if this code point is defined in unicode
3816      */
isDefined(int ch)3817     public static boolean isDefined(int ch)
3818     {
3819         return getType(ch) != 0;
3820     }
3821 
3822     /**
3823      * Determines if a code point is a Java digit.
3824      * <br>This method observes the semantics of
3825      * <code>java.lang.Character.isDigit()</code>. It returns true for decimal
3826      * digits only.
3827      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated
3828      * numeric letters and other numbers as digits.
3829      * This has been changed to conform to the java semantics.
3830      * @param ch code point to query
3831      * @return true if this code point is a digit
3832      */
isDigit(int ch)3833     public static boolean isDigit(int ch)
3834     {
3835         return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER;
3836     }
3837 
3838     /**
3839      * Determines if the specified code point is an ISO control character.
3840      * A code point is considered to be an ISO control character if it is in
3841      * the range &#92;u0000 through &#92;u001F or in the range &#92;u007F through
3842      * &#92;u009F.<br>
3843      * Up-to-date Unicode implementation of java.lang.Character.isISOControl()
3844      * @param ch code point to determine if it is an ISO control character
3845      * @return true if code point is a ISO control character
3846      */
isISOControl(int ch)3847     public static boolean isISOControl(int ch)
3848     {
3849         return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ &&
3850                 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_));
3851     }
3852 
3853     /**
3854      * Determines if the specified code point is a letter.
3855      * Up-to-date Unicode implementation of java.lang.Character.isLetter()
3856      * @param ch code point to determine if it is a letter
3857      * @return true if code point is a letter
3858      */
isLetter(int ch)3859     public static boolean isLetter(int ch)
3860     {
3861         // if props == 0, it will just fall through and return false
3862         return ((1 << getType(ch))
3863                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3864                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3865                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3866                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3867                         | (1 << UCharacterCategory.OTHER_LETTER))) != 0;
3868     }
3869 
3870     /**
3871      * Determines if the specified code point is a letter or digit.
3872      * <strong>[icu] Note:</strong> This method, unlike java.lang.Character does not regard the ascii
3873      * characters 'A' - 'Z' and 'a' - 'z' as digits.
3874      * @param ch code point to determine if it is a letter or a digit
3875      * @return true if code point is a letter or a digit
3876      */
isLetterOrDigit(int ch)3877     public static boolean isLetterOrDigit(int ch)
3878     {
3879         return ((1 << getType(ch))
3880                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3881                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3882                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3883                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3884                         | (1 << UCharacterCategory.OTHER_LETTER)
3885                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0;
3886     }
3887 
3888     /**
3889      * Compatibility override of Java deprecated method.  This
3890      * method will always remain deprecated.  Delegates to
3891      * java.lang.Character.isJavaIdentifierStart.
3892      * @param cp the code point
3893      * @return true if the code point can start a java identifier.
3894      * @deprecated ICU 3.4 (Java)
3895      * @hide original deprecated declaration
3896      */
3897     @Deprecated
isJavaLetter(int cp)3898     public static boolean isJavaLetter(int cp) {
3899         return isJavaIdentifierStart(cp);
3900     }
3901 
3902     /**
3903      * Compatibility override of Java deprecated method.  This
3904      * method will always remain deprecated.  Delegates to
3905      * java.lang.Character.isJavaIdentifierPart.
3906      * @param cp the code point
3907      * @return true if the code point can continue a java identifier.
3908      * @deprecated ICU 3.4 (Java)
3909      * @hide original deprecated declaration
3910      */
3911     @Deprecated
isJavaLetterOrDigit(int cp)3912     public static boolean isJavaLetterOrDigit(int cp) {
3913         return isJavaIdentifierPart(cp);
3914     }
3915 
3916     /**
3917      * Compatibility override of Java method, delegates to
3918      * java.lang.Character.isJavaIdentifierStart.
3919      * @param cp the code point
3920      * @return true if the code point can start a java identifier.
3921      */
isJavaIdentifierStart(int cp)3922     public static boolean isJavaIdentifierStart(int cp) {
3923         // note, downcast to char for jdk 1.4 compatibility
3924         return java.lang.Character.isJavaIdentifierStart((char)cp);
3925     }
3926 
3927     /**
3928      * Compatibility override of Java method, delegates to
3929      * java.lang.Character.isJavaIdentifierPart.
3930      * @param cp the code point
3931      * @return true if the code point can continue a java identifier.
3932      */
isJavaIdentifierPart(int cp)3933     public static boolean isJavaIdentifierPart(int cp) {
3934         // note, downcast to char for jdk 1.4 compatibility
3935         return java.lang.Character.isJavaIdentifierPart((char)cp);
3936     }
3937 
3938     /**
3939      * Determines if the specified code point is a lowercase character.
3940      * UnicodeData only contains case mappings for code points where they are
3941      * one-to-one mappings; it also omits information about context-sensitive
3942      * case mappings.<br> For more information about Unicode case mapping
3943      * please refer to the
3944      * <a href=https://www.unicode.org/reports/tr21/>Technical report
3945      * #21</a>.<br>
3946      * Up-to-date Unicode implementation of java.lang.Character.isLowerCase()
3947      * @param ch code point to determine if it is in lowercase
3948      * @return true if code point is a lowercase character
3949      */
isLowerCase(int ch)3950     public static boolean isLowerCase(int ch)
3951     {
3952         // if props == 0, it will just fall through and return false
3953         return getType(ch) == UCharacterCategory.LOWERCASE_LETTER;
3954     }
3955 
3956     /**
3957      * Determines if the specified code point is a white space character.
3958      * A code point is considered to be an whitespace character if and only
3959      * if it satisfies one of the following criteria:
3960      * <ul>
3961      * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not
3962      *      also a non-breaking space (&#92;u00A0 or &#92;u2007 or &#92;u202F).
3963      * <li> It is &#92;u0009, HORIZONTAL TABULATION.
3964      * <li> It is &#92;u000A, LINE FEED.
3965      * <li> It is &#92;u000B, VERTICAL TABULATION.
3966      * <li> It is &#92;u000C, FORM FEED.
3967      * <li> It is &#92;u000D, CARRIAGE RETURN.
3968      * <li> It is &#92;u001C, FILE SEPARATOR.
3969      * <li> It is &#92;u001D, GROUP SEPARATOR.
3970      * <li> It is &#92;u001E, RECORD SEPARATOR.
3971      * <li> It is &#92;u001F, UNIT SEPARATOR.
3972      * </ul>
3973      *
3974      * This API tries to sync with the semantics of Java's
3975      * java.lang.Character.isWhitespace(), but it may not return
3976      * the exact same results because of the Unicode version
3977      * difference.
3978      * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)
3979      * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.
3980      * See http://www.unicode.org/versions/Unicode4.0.1/
3981      * @param ch code point to determine if it is a white space
3982      * @return true if the specified code point is a white space character
3983      */
isWhitespace(int ch)3984     public static boolean isWhitespace(int ch)
3985     {
3986         // exclude no-break spaces
3987         // if props == 0, it will just fall through and return false
3988         return ((1 << getType(ch)) &
3989                 ((1 << UCharacterCategory.SPACE_SEPARATOR)
3990                         | (1 << UCharacterCategory.LINE_SEPARATOR)
3991                         | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0
3992                         && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_)
3993                         // TAB VT LF FF CR FS GS RS US NL are all control characters
3994                         // that are white spaces.
3995                         || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f);
3996     }
3997 
3998     /**
3999      * Determines if the specified code point is a Unicode specified space
4000      * character, i.e. if code point is in the category Zs, Zl and Zp.
4001      * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar().
4002      * @param ch code point to determine if it is a space
4003      * @return true if the specified code point is a space character
4004      */
isSpaceChar(int ch)4005     public static boolean isSpaceChar(int ch)
4006     {
4007         // if props == 0, it will just fall through and return false
4008         return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR)
4009                 | (1 << UCharacterCategory.LINE_SEPARATOR)
4010                 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR)))
4011                 != 0;
4012     }
4013 
4014     /**
4015      * Determines if the specified code point is a titlecase character.
4016      * UnicodeData only contains case mappings for code points where they are
4017      * one-to-one mappings; it also omits information about context-sensitive
4018      * case mappings.<br>
4019      * For more information about Unicode case mapping please refer to the
4020      * <a href=https://www.unicode.org/reports/tr21/>
4021      * Technical report #21</a>.<br>
4022      * Up-to-date Unicode implementation of java.lang.Character.isTitleCase().
4023      * @param ch code point to determine if it is in title case
4024      * @return true if the specified code point is a titlecase character
4025      */
isTitleCase(int ch)4026     public static boolean isTitleCase(int ch)
4027     {
4028         // if props == 0, it will just fall through and return false
4029         return getType(ch) == UCharacterCategory.TITLECASE_LETTER;
4030     }
4031 
4032     /**
4033      * Determines if the specified character is permissible as a
4034      * non-initial character of an identifier
4035      * according to UAX #31 Unicode Identifier and Pattern Syntax.
4036      *
4037      * <p>Same as Unicode ID_Continue ({@link UProperty#ID_CONTINUE}).
4038      *
4039      * <p>Note that this differs from {@link java.lang.Character#isUnicodeIdentifierPart(char)}
4040      * which implements a different identifier profile.
4041      *
4042      * @param ch the code point to be tested
4043      * @return true if the code point may occur as a non-initial character of an identifier
4044      */
isUnicodeIdentifierPart(int ch)4045     public static boolean isUnicodeIdentifierPart(int ch)
4046     {
4047         return hasBinaryProperty(ch, UProperty.ID_CONTINUE);  // single code point
4048     }
4049 
4050     /**
4051      * Determines if the specified character is permissible as the first character in an identifier
4052      * according to UAX #31 Unicode Identifier and Pattern Syntax.
4053      *
4054      * <p>Same as Unicode ID_Start ({@link UProperty#ID_START}).
4055      *
4056      * <p>Note that this differs from {@link java.lang.Character#isUnicodeIdentifierStart(char)}
4057      * which implements a different identifier profile.
4058      *
4059      * @param ch the code point to be tested
4060      * @return true if the code point may start an identifier
4061      */
isUnicodeIdentifierStart(int ch)4062     public static boolean isUnicodeIdentifierStart(int ch)
4063     {
4064         return hasBinaryProperty(ch, UProperty.ID_START);  // single code point
4065     }
4066 
4067     /**
4068      * Does the set of Identifier_Type values code point c contain the given type?
4069      *
4070      * <p>Used for UTS #39 General Security Profile for Identifiers
4071      * (https://www.unicode.org/reports/tr39/#General_Security_Profile).
4072      *
4073      * <p>Each code point maps to a <i>set</i> of UIdentifierType values.
4074      *
4075      * @param c code point
4076      * @param type Identifier_Type to check
4077      * @return true if type is in Identifier_Type(c)
4078      * @hide draft / provisional / internal are hidden on Android
4079      */
hasIdentifierType(int c, IdentifierType type)4080     public static final boolean hasIdentifierType(int c, IdentifierType type) {
4081         return UCharacterProperty.INSTANCE.hasIDType(c, type);
4082     }
4083 
4084     /**
4085      * Writes code point c's Identifier_Type as a set of IdentifierType values and
4086      * returns the number of types.
4087      * The set is cleared before c's types are added.
4088      *
4089      * <p>Used for UTS #39 General Security Profile for Identifiers
4090      * (https://www.unicode.org/reports/tr39/#General_Security_Profile).
4091      *
4092      * <p>Each code point maps to a <i>set</i> of IdentifierType values.
4093      * There is always at least one type.
4094      * Only some of the types can be combined with others,
4095      * and usually only a small number of types occur together.
4096      * Future versions might add additional types.
4097      * See UTS #39 and its data files for details.
4098      *
4099      * @param c code point
4100      * @param types output set
4101      * @return number of values in c's Identifier_Type
4102      * @hide draft / provisional / internal are hidden on Android
4103      */
getIdentifierTypes(int c, EnumSet<IdentifierType> types)4104     public static final int getIdentifierTypes(int c, EnumSet<IdentifierType> types) {
4105         return UCharacterProperty.INSTANCE.getIDTypes(c, types);
4106     }
4107 
4108     /**
4109      * Determines if the specified code point should be regarded as an
4110      * ignorable character in a Java identifier.
4111      * A character is Java-identifier-ignorable if it has the general category
4112      * Cf Formatting Control, or it is a non-Java-whitespace ISO control:
4113      * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br>
4114      * Up-to-date Unicode implementation of
4115      * java.lang.Character.isIdentifierIgnorable().<br>
4116      * See <a href=https://www.unicode.org/reports/tr8/>UTR #8</a>.
4117      * <p>Note that Unicode just recommends to ignore Cf (format controls).
4118      * @param ch code point to be determined if it can be ignored in a Unicode
4119      *        identifier.
4120      * @return true if the code point is ignorable
4121      */
isIdentifierIgnorable(int ch)4122     public static boolean isIdentifierIgnorable(int ch)
4123     {
4124         // see java.lang.Character.isIdentifierIgnorable() on range of
4125         // ignorable characters.
4126         if (ch <= 0x9f) {
4127             return isISOControl(ch)
4128                     && !((ch >= 0x9 && ch <= 0xd)
4129                             || (ch >= 0x1c && ch <= 0x1f));
4130         }
4131         return getType(ch) == UCharacterCategory.FORMAT;
4132     }
4133 
4134     /**
4135      * Determines if the specified code point is an uppercase character.
4136      * UnicodeData only contains case mappings for code point where they are
4137      * one-to-one mappings; it also omits information about context-sensitive
4138      * case mappings.<br>
4139      * For language specific case conversion behavior, use
4140      * toUpperCase(locale, str). <br>
4141      * For example, the case conversion for dot-less i and dotted I in Turkish,
4142      * or for final sigma in Greek.
4143      * For more information about Unicode case mapping please refer to the
4144      * <a href=https://www.unicode.org/reports/tr21/>
4145      * Technical report #21</a>.<br>
4146      * Up-to-date Unicode implementation of java.lang.Character.isUpperCase().
4147      * @param ch code point to determine if it is in uppercase
4148      * @return true if the code point is an uppercase character
4149      */
isUpperCase(int ch)4150     public static boolean isUpperCase(int ch)
4151     {
4152         // if props == 0, it will just fall through and return false
4153         return getType(ch) == UCharacterCategory.UPPERCASE_LETTER;
4154     }
4155 
4156     /**
4157      * The given code point is mapped to its lowercase equivalent; if the code
4158      * point has no lowercase equivalent, the code point itself is returned.
4159      * Up-to-date Unicode implementation of java.lang.Character.toLowerCase()
4160      *
4161      * <p>This function only returns the simple, single-code point case mapping.
4162      * Full case mappings should be used whenever possible because they produce
4163      * better results by working on whole strings.
4164      * They take into account the string context and the language and can map
4165      * to a result string with a different length as appropriate.
4166      * Full case mappings are applied by the case mapping functions
4167      * that take String parameters rather than code points (int).
4168      * See also the User Guide chapter on C/POSIX migration:
4169      * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
4170      *
4171      * @param ch code point whose lowercase equivalent is to be retrieved
4172      * @return the lowercase equivalent code point
4173      */
toLowerCase(int ch)4174     public static int toLowerCase(int ch) {
4175         return UCaseProps.INSTANCE.tolower(ch);
4176     }
4177 
4178     /**
4179      * Converts argument code point and returns a String object representing
4180      * the code point's value in UTF-16 format.
4181      * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones.
4182      *
4183      * <p>Up-to-date Unicode implementation of java.lang.Character.toString().
4184      *
4185      * @param ch code point
4186      * @return string representation of the code point, null if code point is not
4187      *         defined in unicode
4188      */
toString(int ch)4189     public static String toString(int ch)
4190     {
4191         if (ch < MIN_VALUE || ch > MAX_VALUE) {
4192             return null;
4193         }
4194 
4195         if (ch < SUPPLEMENTARY_MIN_VALUE) {
4196             return String.valueOf((char)ch);
4197         }
4198 
4199         return new String(Character.toChars(ch));
4200     }
4201 
4202     /**
4203      * Converts the code point argument to titlecase.
4204      * If no titlecase is available, the uppercase is returned. If no uppercase
4205      * is available, the code point itself is returned.
4206      * Up-to-date Unicode implementation of java.lang.Character.toTitleCase()
4207      *
4208      * <p>This function only returns the simple, single-code point case mapping.
4209      * Full case mappings should be used whenever possible because they produce
4210      * better results by working on whole strings.
4211      * They take into account the string context and the language and can map
4212      * to a result string with a different length as appropriate.
4213      * Full case mappings are applied by the case mapping functions
4214      * that take String parameters rather than code points (int).
4215      * See also the User Guide chapter on C/POSIX migration:
4216      * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
4217      *
4218      * @param ch code point  whose title case is to be retrieved
4219      * @return titlecase code point
4220      */
toTitleCase(int ch)4221     public static int toTitleCase(int ch) {
4222         return UCaseProps.INSTANCE.totitle(ch);
4223     }
4224 
4225     /**
4226      * Converts the character argument to uppercase.
4227      * If no uppercase is available, the character itself is returned.
4228      * Up-to-date Unicode implementation of java.lang.Character.toUpperCase()
4229      *
4230      * <p>This function only returns the simple, single-code point case mapping.
4231      * Full case mappings should be used whenever possible because they produce
4232      * better results by working on whole strings.
4233      * They take into account the string context and the language and can map
4234      * to a result string with a different length as appropriate.
4235      * Full case mappings are applied by the case mapping functions
4236      * that take String parameters rather than code points (int).
4237      * See also the User Guide chapter on C/POSIX migration:
4238      * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
4239      *
4240      * @param ch code point whose uppercase is to be retrieved
4241      * @return uppercase code point
4242      */
toUpperCase(int ch)4243     public static int toUpperCase(int ch) {
4244         return UCaseProps.INSTANCE.toupper(ch);
4245     }
4246 
4247     // extra methods not in java.lang.Character --------------------------
4248 
4249     /**
4250      * <strong>[icu]</strong> Determines if the code point is a supplementary character.
4251      * A code point is a supplementary character if and only if it is greater
4252      * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a>
4253      * @param ch code point to be determined if it is in the supplementary
4254      *        plane
4255      * @return true if code point is a supplementary character
4256      */
isSupplementary(int ch)4257     public static boolean isSupplementary(int ch)
4258     {
4259         return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE &&
4260                 ch <= UCharacter.MAX_VALUE;
4261     }
4262 
4263     /**
4264      * <strong>[icu]</strong> Determines if the code point is in the BMP plane.
4265      * @param ch code point to be determined if it is not a supplementary
4266      *        character
4267      * @return true if code point is not a supplementary character
4268      */
isBMP(int ch)4269     public static boolean isBMP(int ch)
4270     {
4271         return (ch >= 0 && ch <= LAST_CHAR_MASK_);
4272     }
4273 
4274     /**
4275      * <strong>[icu]</strong> Determines whether the specified code point is a printable character
4276      * according to the Unicode standard.
4277      * @param ch code point to be determined if it is printable
4278      * @return true if the code point is a printable character
4279      */
isPrintable(int ch)4280     public static boolean isPrintable(int ch)
4281     {
4282         int cat = getType(ch);
4283         // if props == 0, it will just fall through and return false
4284         return (cat != UCharacterCategory.UNASSIGNED &&
4285                 cat != UCharacterCategory.CONTROL &&
4286                 cat != UCharacterCategory.FORMAT &&
4287                 cat != UCharacterCategory.PRIVATE_USE &&
4288                 cat != UCharacterCategory.SURROGATE &&
4289                 cat != UCharacterCategory.GENERAL_OTHER_TYPES);
4290     }
4291 
4292     /**
4293      * <strong>[icu]</strong> Determines whether the specified code point is of base form.
4294      * A code point of base form does not graphically combine with preceding
4295      * characters, and is neither a control nor a format character.
4296      * @param ch code point to be determined if it is of base form
4297      * @return true if the code point is of base form
4298      */
isBaseForm(int ch)4299     public static boolean isBaseForm(int ch)
4300     {
4301         int cat = getType(ch);
4302         // if props == 0, it will just fall through and return false
4303         return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER ||
4304                 cat == UCharacterCategory.OTHER_NUMBER ||
4305                 cat == UCharacterCategory.LETTER_NUMBER ||
4306                 cat == UCharacterCategory.UPPERCASE_LETTER ||
4307                 cat == UCharacterCategory.LOWERCASE_LETTER ||
4308                 cat == UCharacterCategory.TITLECASE_LETTER ||
4309                 cat == UCharacterCategory.MODIFIER_LETTER ||
4310                 cat == UCharacterCategory.OTHER_LETTER ||
4311                 cat == UCharacterCategory.NON_SPACING_MARK ||
4312                 cat == UCharacterCategory.ENCLOSING_MARK ||
4313                 cat == UCharacterCategory.COMBINING_SPACING_MARK;
4314     }
4315 
4316     /**
4317      * <strong>[icu]</strong> Returns the Bidirection property of a code point.
4318      * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
4319      * property.<br>
4320      * Result returned belongs to the interface
4321      * <a href=UCharacterDirection.html>UCharacterDirection</a>
4322      * @param ch the code point to be determined its direction
4323      * @return direction constant from UCharacterDirection.
4324      */
getDirection(int ch)4325     public static int getDirection(int ch)
4326     {
4327         return UBiDiProps.INSTANCE.getClass(ch);
4328     }
4329 
4330     /**
4331      * Determines whether the code point has the "mirrored" property.
4332      * This property is set for characters that are commonly used in
4333      * Right-To-Left contexts and need to be displayed with a "mirrored"
4334      * glyph.
4335      * @param ch code point whose mirror is to be determined
4336      * @return true if the code point has the "mirrored" property
4337      */
isMirrored(int ch)4338     public static boolean isMirrored(int ch)
4339     {
4340         return UBiDiProps.INSTANCE.isMirrored(ch);
4341     }
4342 
4343     /**
4344      * <strong>[icu]</strong> Maps the specified code point to a "mirror-image" code point.
4345      * For code points with the "mirrored" property, implementations sometimes
4346      * need a "poor man's" mapping to another code point such that the default
4347      * glyph may serve as the mirror-image of the default glyph of the
4348      * specified code point.<br>
4349      * This is useful for text conversion to and from codepages with visual
4350      * order, and for displays without glyph selection capabilities.
4351      * @param ch code point whose mirror is to be retrieved
4352      * @return another code point that may serve as a mirror-image substitute,
4353      *         or ch itself if there is no such mapping or ch does not have the
4354      *         "mirrored" property
4355      */
getMirror(int ch)4356     public static int getMirror(int ch)
4357     {
4358         return UBiDiProps.INSTANCE.getMirror(ch);
4359     }
4360 
4361     /**
4362      * <strong>[icu]</strong> Maps the specified character to its paired bracket character.
4363      * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int).
4364      * Otherwise c itself is returned.
4365      * See http://www.unicode.org/reports/tr9/
4366      *
4367      * @param c the code point to be mapped
4368      * @return the paired bracket code point,
4369      *         or c itself if there is no such mapping
4370      *         (Bidi_Paired_Bracket_Type=None)
4371      *
4372      * @see UProperty#BIDI_PAIRED_BRACKET
4373      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
4374      * @see #getMirror(int)
4375      */
getBidiPairedBracket(int c)4376     public static int getBidiPairedBracket(int c) {
4377         return UBiDiProps.INSTANCE.getPairedBracket(c);
4378     }
4379 
4380     /**
4381      * <strong>[icu]</strong> Returns the combining class of the argument codepoint
4382      * @param ch code point whose combining is to be retrieved
4383      * @return the combining class of the codepoint
4384      */
getCombiningClass(int ch)4385     public static int getCombiningClass(int ch)
4386     {
4387         return Normalizer2.getNFDInstance().getCombiningClass(ch);
4388     }
4389 
4390     /**
4391      * <strong>[icu]</strong> A code point is illegal if and only if
4392      * <ul>
4393      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
4394      * <li> A surrogate value, 0xD800 to 0xDFFF
4395      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
4396      * </ul>
4397      * Note: legal does not mean that it is assigned in this version of Unicode.
4398      * @param ch code point to determine if it is a legal code point by itself
4399      * @return true if and only if legal.
4400      */
isLegal(int ch)4401     public static boolean isLegal(int ch)
4402     {
4403         if (ch < MIN_VALUE) {
4404             return false;
4405         }
4406         if (ch < Character.MIN_SURROGATE) {
4407             return true;
4408         }
4409         if (ch <= Character.MAX_SURROGATE) {
4410             return false;
4411         }
4412         if (UCharacterUtility.isNonCharacter(ch)) {
4413             return false;
4414         }
4415         return (ch <= MAX_VALUE);
4416     }
4417 
4418     /**
4419      * <strong>[icu]</strong> A string is legal iff all its code points are legal.
4420      * A code point is illegal if and only if
4421      * <ul>
4422      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
4423      * <li> A surrogate value, 0xD800 to 0xDFFF
4424      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
4425      * </ul>
4426      * Note: legal does not mean that it is assigned in this version of Unicode.
4427      * @param str containing code points to examin
4428      * @return true if and only if legal.
4429      */
isLegal(String str)4430     public static boolean isLegal(String str)
4431     {
4432         int size = str.length();
4433         int codepoint;
4434         for (int i = 0; i < size; i += Character.charCount(codepoint))
4435         {
4436             codepoint = str.codePointAt(i);
4437             if (!isLegal(codepoint)) {
4438                 return false;
4439             }
4440         }
4441         return true;
4442     }
4443 
4444     /**
4445      * <strong>[icu]</strong> Returns the version of Unicode data used.
4446      * @return the unicode version number used
4447      */
getUnicodeVersion()4448     public static VersionInfo getUnicodeVersion()
4449     {
4450         return UCharacterProperty.INSTANCE.m_unicodeVersion_;
4451     }
4452 
4453     /**
4454      * <strong>[icu]</strong> Returns the most current Unicode name of the argument code point, or
4455      * null if the character is unassigned or outside the range
4456      * {@code UCharacter.MIN_VALUE} and {@code UCharacter.MAX_VALUE} or does not
4457      * have a name.
4458      * <br>
4459      * Note calling any methods related to code point names, e.g. {@code getName()}
4460      * incurs a one-time initialization cost to construct the name tables.
4461      * @param ch the code point for which to get the name
4462      * @return most current Unicode name
4463      */
getName(int ch)4464     public static String getName(int ch)
4465     {
4466         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
4467     }
4468 
4469     /**
4470      * <strong>[icu]</strong> Returns the names for each of the characters in a string
4471      * @param s string to format
4472      * @param separator string to go between names
4473      * @return string of names
4474      */
getName(String s, String separator)4475     public static String getName(String s, String separator) {
4476         if (s.length() == 1) { // handle common case
4477             return getName(s.charAt(0));
4478         }
4479         int cp;
4480         StringBuilder sb = new StringBuilder();
4481         for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
4482             cp = s.codePointAt(i);
4483             if (i != 0) sb.append(separator);
4484             sb.append(UCharacter.getName(cp));
4485         }
4486         return sb.toString();
4487     }
4488 
4489     /**
4490      * <strong>[icu]</strong> Returns null.
4491      * Used to return the Unicode_1_Name property value which was of little practical value.
4492      * @param ch the code point for which to get the name
4493      * @return null
4494      * @deprecated ICU 49
4495      * @hide original deprecated declaration
4496      */
4497     @Deprecated
getName1_0(int ch)4498     public static String getName1_0(int ch)
4499     {
4500         return null;
4501     }
4502 
4503     /**
4504      * <strong>[icu]</strong> Returns a name for a valid codepoint. Unlike, getName(int) and
4505      * getName1_0(int), this method will return a name even for codepoints that
4506      * are not assigned a name in UnicodeData.txt.
4507      *
4508      * <p>The names are returned in the following order.
4509      * <ul>
4510      * <li> Most current Unicode name if there is any
4511      * <li> Unicode 1.0 name if there is any
4512      * <li> Extended name in the form of
4513      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g., &lt;noncharacter-fffe&gt;
4514      * </ul>
4515      * Note calling any methods related to code point names, e.g. {@code getName()}
4516      * incurs a one-time initialization cost to construct the name tables.
4517      * @param ch the code point for which to get the name
4518      * @return a name for the argument codepoint
4519      */
getExtendedName(int ch)4520     public static String getExtendedName(int ch) {
4521         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME);
4522     }
4523 
4524     /**
4525      * <strong>[icu]</strong> Returns the corrected name from NameAliases.txt if there is one.
4526      * Returns null if the character is unassigned or outside the range
4527      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
4528      * <br>
4529      * Note calling any methods related to code point names, e.g. {@code getName()}
4530      * incurs a one-time initialization cost to construct the name tables.
4531      * @param ch the code point for which to get the name alias
4532      * @return Unicode name alias, or null
4533      */
getNameAlias(int ch)4534     public static String getNameAlias(int ch)
4535     {
4536         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS);
4537     }
4538 
4539     /**
4540      * <strong>[icu]</strong> Returns null.
4541      * Used to return the ISO 10646 comment for a character.
4542      * The Unicode ISO_Comment property is deprecated and has no values.
4543      *
4544      * @param ch The code point for which to get the ISO comment.
4545      *           It must be the case that {@code 0 <= ch <= 0x10ffff}.
4546      * @return null
4547      * @deprecated ICU 49
4548      * @hide original deprecated declaration
4549      */
4550     @Deprecated
getISOComment(int ch)4551     public static String getISOComment(int ch)
4552     {
4553         return null;
4554     }
4555 
4556     /**
4557      * <strong>[icu]</strong> <p>Finds a Unicode code point by its most current Unicode name and
4558      * return its code point value. All Unicode names are in uppercase.
4559      * Note calling any methods related to code point names, e.g. {@code getName()}
4560      * incurs a one-time initialization cost to construct the name tables.
4561      * @param name most current Unicode character name whose code point is to
4562      *        be returned
4563      * @return code point or -1 if name is not found
4564      */
getCharFromName(String name)4565     public static int getCharFromName(String name){
4566         return UCharacterName.INSTANCE.getCharFromName(
4567                 UCharacterNameChoice.UNICODE_CHAR_NAME, name);
4568     }
4569 
4570     /**
4571      * <strong>[icu]</strong> Returns -1.
4572      * <p>Used to find a Unicode character by its version 1.0 Unicode name and return
4573      * its code point value.
4574      * @param name Unicode 1.0 code point name whose code point is to be
4575      *             returned
4576      * @return -1
4577      * @deprecated ICU 49
4578      * @see #getName1_0(int)
4579      * @hide original deprecated declaration
4580      */
4581     @Deprecated
getCharFromName1_0(String name)4582     public static int getCharFromName1_0(String name){
4583         return -1;
4584     }
4585 
4586     /**
4587      * <strong>[icu]</strong> <p>Find a Unicode character by either its name and return its code
4588      * point value. All Unicode names are in uppercase.
4589      * Extended names are all lowercase except for numbers and are contained
4590      * within angle brackets.
4591      * The names are searched in the following order
4592      * <ul>
4593      * <li> Most current Unicode name if there is any
4594      * <li> Unicode 1.0 name if there is any
4595      * <li> Extended name in the form of
4596      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g. &lt;noncharacter-FFFE&gt;
4597      * </ul>
4598      * Note calling any methods related to code point names, e.g. {@code getName()}
4599      * incurs a one-time initialization cost to construct the name tables.
4600      * @param name codepoint name
4601      * @return code point associated with the name or -1 if the name is not
4602      *         found.
4603      */
getCharFromExtendedName(String name)4604     public static int getCharFromExtendedName(String name){
4605         return UCharacterName.INSTANCE.getCharFromName(
4606                 UCharacterNameChoice.EXTENDED_CHAR_NAME, name);
4607     }
4608 
4609     /**
4610      * <strong>[icu]</strong> <p>Find a Unicode character by its corrected name alias and return
4611      * its code point value. All Unicode names are in uppercase.
4612      * Note calling any methods related to code point names, e.g. {@code getName()}
4613      * incurs a one-time initialization cost to construct the name tables.
4614      * @param name Unicode name alias whose code point is to be returned
4615      * @return code point or -1 if name is not found
4616      */
getCharFromNameAlias(String name)4617     public static int getCharFromNameAlias(String name){
4618         return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name);
4619     }
4620 
4621     /**
4622      * <strong>[icu]</strong> Return the Unicode name for a given property, as given in the
4623      * Unicode database file PropertyAliases.txt.  Most properties
4624      * have more than one name.  The nameChoice determines which one
4625      * is returned.
4626      *
4627      * In addition, this function maps the property
4628      * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
4629      * "General_Category_Mask".  These names are not in
4630      * PropertyAliases.txt.
4631      *
4632      * @param property UProperty selector.
4633      *
4634      * @param nameChoice UProperty.NameChoice selector for which name
4635      * to get.  All properties have a long name.  Most have a short
4636      * name, but some do not.  Unicode allows for additional names; if
4637      * present these will be returned by UProperty.NameChoice.LONG + i,
4638      * where i=1, 2,...
4639      *
4640      * @return a name, or null if Unicode explicitly defines no name
4641      * ("n/a") for a given property/nameChoice.  If a given nameChoice
4642      * throws an exception, then all larger values of nameChoice will
4643      * throw an exception.  If null is returned for a given
4644      * nameChoice, then other nameChoice values may return non-null
4645      * results.
4646      *
4647      * @exception IllegalArgumentException thrown if property or
4648      * nameChoice are invalid.
4649      *
4650      * @see UProperty
4651      * @see UProperty.NameChoice
4652      */
getPropertyName(int property, int nameChoice)4653     public static String getPropertyName(int property,
4654             int nameChoice) {
4655         return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice);
4656     }
4657 
4658     /**
4659      * <strong>[icu]</strong> Return the UProperty selector for a given property name, as
4660      * specified in the Unicode database file PropertyAliases.txt.
4661      * Short, long, and any other variants are recognized.
4662      *
4663      * In addition, this function maps the synthetic names "gcm" /
4664      * "General_Category_Mask" to the property
4665      * UProperty.GENERAL_CATEGORY_MASK.  These names are not in
4666      * PropertyAliases.txt.
4667      *
4668      * @param propertyAlias the property name to be matched.  The name
4669      * is compared using "loose matching" as described in
4670      * PropertyAliases.txt.
4671      *
4672      * @return a UProperty enum.
4673      *
4674      * @exception IllegalArgumentException thrown if propertyAlias
4675      * is not recognized.
4676      *
4677      * @see UProperty
4678      */
getPropertyEnum(CharSequence propertyAlias)4679     public static int getPropertyEnum(CharSequence propertyAlias) {
4680         int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias);
4681         if (propEnum == UProperty.UNDEFINED) {
4682             throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias);
4683         }
4684         return propEnum;
4685     }
4686 
4687     /**
4688      * <strong>[icu]</strong> Return the Unicode name for a given property value, as given in
4689      * the Unicode database file PropertyValueAliases.txt.  Most
4690      * values have more than one name.  The nameChoice determines
4691      * which one is returned.
4692      *
4693      * Note: Some of the names in PropertyValueAliases.txt can only be
4694      * retrieved using UProperty.GENERAL_CATEGORY_MASK, not
4695      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
4696      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
4697      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
4698      *
4699      * @param property UProperty selector constant.
4700      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
4701      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
4702      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
4703      * If out of range, null is returned.
4704      *
4705      * @param value selector for a value for the given property.  In
4706      * general, valid values range from 0 up to some maximum.  There
4707      * are a few exceptions: (1.) UProperty.BLOCK values begin at the
4708      * non-zero value BASIC_LATIN.getID().  (2.)
4709      * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous
4710      * and range from 0..240.  (3.)  UProperty.GENERAL_CATEGORY_MASK values
4711      * are mask values produced by left-shifting 1 by
4712      * UCharacter.getType().  This allows grouped categories such as
4713      * [:L:] to be represented.  Mask values are non-contiguous.
4714      *
4715      * @param nameChoice UProperty.NameChoice selector for which name
4716      * to get.  All values have a long name.  Most have a short name,
4717      * but some do not.  Unicode allows for additional names; if
4718      * present these will be returned by UProperty.NameChoice.LONG + i,
4719      * where i=1, 2,...
4720      *
4721      * @return a name, or null if Unicode explicitly defines no name
4722      * ("n/a") for a given property/value/nameChoice.  If a given
4723      * nameChoice throws an exception, then all larger values of
4724      * nameChoice will throw an exception.  If null is returned for a
4725      * given nameChoice, then other nameChoice values may return
4726      * non-null results.
4727      *
4728      * @exception IllegalArgumentException thrown if property, value,
4729      * or nameChoice are invalid.
4730      *
4731      * @see UProperty
4732      * @see UProperty.NameChoice
4733      */
getPropertyValueName(int property, int value, int nameChoice)4734     public static String getPropertyValueName(int property,
4735             int value,
4736             int nameChoice)
4737     {
4738         if ((property == UProperty.CANONICAL_COMBINING_CLASS
4739                 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS
4740                 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS)
4741                 && value >= UCharacter.getIntPropertyMinValue(
4742                         UProperty.CANONICAL_COMBINING_CLASS)
4743                         && value <= UCharacter.getIntPropertyMaxValue(
4744                                 UProperty.CANONICAL_COMBINING_CLASS)
4745                                 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) {
4746             // this is hard coded for the valid cc
4747             // because PropertyValueAliases.txt does not contain all of them
4748             try {
4749                 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value,
4750                         nameChoice);
4751             }
4752             catch (IllegalArgumentException e) {
4753                 return null;
4754             }
4755         }
4756         return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice);
4757     }
4758 
4759     /**
4760      * <strong>[icu]</strong> Return the property value integer for a given value name, as
4761      * specified in the Unicode database file PropertyValueAliases.txt.
4762      * Short, long, and any other variants are recognized.
4763      *
4764      * Note: Some of the names in PropertyValueAliases.txt will only be
4765      * recognized with UProperty.GENERAL_CATEGORY_MASK, not
4766      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
4767      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
4768      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
4769      *
4770      * @param property UProperty selector constant.
4771      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
4772      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
4773      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
4774      * Only these properties can be enumerated.
4775      *
4776      * @param valueAlias the value name to be matched.  The name is
4777      * compared using "loose matching" as described in
4778      * PropertyValueAliases.txt.
4779      *
4780      * @return a value integer.  Note: UProperty.GENERAL_CATEGORY
4781      * values are mask values produced by left-shifting 1 by
4782      * UCharacter.getType().  This allows grouped categories such as
4783      * [:L:] to be represented.
4784      *
4785      * @see UProperty
4786      * @throws IllegalArgumentException if property is not a valid UProperty
4787      *         selector or valueAlias is not a value of this property
4788      */
getPropertyValueEnum(int property, CharSequence valueAlias)4789     public static int getPropertyValueEnum(int property, CharSequence valueAlias) {
4790         int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias);
4791         if (propEnum == UProperty.UNDEFINED) {
4792             throw new IllegalIcuArgumentException("Invalid name: " + valueAlias);
4793         }
4794         return propEnum;
4795     }
4796 
4797     /**
4798      * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED.
4799      * @param property  Same as {@link #getPropertyValueEnum(int, CharSequence)}
4800      * @param valueAlias    Same as {@link #getPropertyValueEnum(int, CharSequence)}
4801      * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value.
4802      * @deprecated This API is ICU internal only.
4803      * @hide original deprecated declaration
4804      * @hide draft / provisional / internal are hidden on Android
4805      */
4806     @Deprecated
getPropertyValueEnumNoThrow(int property, CharSequence valueAlias)4807     public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) {
4808         return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias);
4809     }
4810 
4811 
4812     /**
4813      * <strong>[icu]</strong> Returns a code point corresponding to the two surrogate code units.
4814      *
4815      * @param lead the lead unit
4816      *        (In ICU 2.1-69 the type of both parameters was <code>char</code>.)
4817      * @param trail the trail unit
4818      * @return code point if lead and trail form a valid surrogate pair.
4819      * @exception IllegalArgumentException thrown when the code units do
4820      *            not form a valid surrogate pair
4821      * @see #toCodePoint(int, int)
4822      */
getCodePoint(int lead, int trail)4823     public static int getCodePoint(int lead, int trail)
4824     {
4825         if (isHighSurrogate(lead) && isLowSurrogate(trail)) {
4826             return toCodePoint(lead, trail);
4827         }
4828         throw new IllegalArgumentException("Not a valid surrogate pair");
4829     }
4830 
4831     // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655
4832     /**
4833      * <strong>[icu]</strong> Returns a code point corresponding to the two surrogate code units.
4834      *
4835      * @param lead the lead char
4836      * @param trail the trail char
4837      * @return code point if surrogate characters are valid.
4838      * @exception IllegalArgumentException thrown when the code units do
4839      *            not form a valid code point
4840      */
getCodePoint(char lead, char trail)4841     public static int getCodePoint(char lead, char trail)
4842     {
4843         return getCodePoint((int) lead, (int) trail);
4844     }
4845     // END Android patch: Keep the `char` version on Android. See ICU-21655
4846 
4847     /**
4848      * <strong>[icu]</strong> Returns the code point corresponding to the BMP code point.
4849      *
4850      * @param char16 the BMP code point
4851      * @return code point if argument is a valid character.
4852      * @exception IllegalArgumentException thrown when char16 is not a valid
4853      *            code point
4854      */
getCodePoint(char char16)4855     public static int getCodePoint(char char16)
4856     {
4857         if (UCharacter.isLegal(char16)) {
4858             return char16;
4859         }
4860         throw new IllegalArgumentException("Illegal codepoint");
4861     }
4862 
4863     /**
4864      * Returns the uppercase version of the argument string.
4865      * Casing is dependent on the default locale and context-sensitive.
4866      * @param str source string to be performed on
4867      * @return uppercase version of the argument string
4868      */
toUpperCase(String str)4869     public static String toUpperCase(String str)
4870     {
4871         return CaseMapImpl.toUpper(getDefaultCaseLocale(), 0, str);
4872     }
4873 
4874     /**
4875      * Returns the lowercase version of the argument string.
4876      * Casing is dependent on the default locale and context-sensitive
4877      * @param str source string to be performed on
4878      * @return lowercase version of the argument string
4879      */
toLowerCase(String str)4880     public static String toLowerCase(String str)
4881     {
4882         return CaseMapImpl.toLower(getDefaultCaseLocale(), 0, str);
4883     }
4884 
4885     /**
4886      * <p>Returns the titlecase version of the argument string.
4887      * <p>Position for titlecasing is determined by the argument break
4888      * iterator, hence the user can customize his break iterator for
4889      * a specialized titlecasing. In this case only the forward iteration
4890      * needs to be implemented.
4891      * If the break iterator passed in is null, the default Unicode algorithm
4892      * will be used to determine the titlecase positions.
4893      *
4894      * <p>Only positions returned by the break iterator will be title cased,
4895      * character in between the positions will all be in lower case.
4896      * <p>Casing is dependent on the default locale and context-sensitive
4897      * @param str source string to be performed on
4898      * @param breakiter break iterator to determine the positions in which
4899      *        the character should be title cased.
4900      * @return titlecase version of the argument string
4901      */
toTitleCase(String str, BreakIterator breakiter)4902     public static String toTitleCase(String str, BreakIterator breakiter)
4903     {
4904         return toTitleCase(Locale.getDefault(), str, breakiter, 0);
4905     }
4906 
getDefaultCaseLocale()4907     private static int getDefaultCaseLocale() {
4908         return UCaseProps.getCaseLocale(Locale.getDefault());
4909     }
4910 
getCaseLocale(Locale locale)4911     private static int getCaseLocale(Locale locale) {
4912         if (locale == null) {
4913             locale = Locale.getDefault();
4914         }
4915         return UCaseProps.getCaseLocale(locale);
4916     }
4917 
getCaseLocale(ULocale locale)4918     private static int getCaseLocale(ULocale locale) {
4919         if (locale == null) {
4920             locale = ULocale.getDefault();
4921         }
4922         return UCaseProps.getCaseLocale(locale);
4923     }
4924 
4925     /**
4926      * Returns the uppercase version of the argument string.
4927      * Casing is dependent on the argument locale and context-sensitive.
4928      * @param locale which string is to be converted in
4929      * @param str source string to be performed on
4930      * @return uppercase version of the argument string
4931      */
toUpperCase(Locale locale, String str)4932     public static String toUpperCase(Locale locale, String str)
4933     {
4934         return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str);
4935     }
4936 
4937     /**
4938      * Returns the uppercase version of the argument string.
4939      * Casing is dependent on the argument locale and context-sensitive.
4940      * @param locale which string is to be converted in
4941      * @param str source string to be performed on
4942      * @return uppercase version of the argument string
4943      */
toUpperCase(ULocale locale, String str)4944     public static String toUpperCase(ULocale locale, String str) {
4945         return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str);
4946     }
4947 
4948     /**
4949      * Returns the lowercase version of the argument string.
4950      * Casing is dependent on the argument locale and context-sensitive
4951      * @param locale which string is to be converted in
4952      * @param str source string to be performed on
4953      * @return lowercase version of the argument string
4954      */
toLowerCase(Locale locale, String str)4955     public static String toLowerCase(Locale locale, String str)
4956     {
4957         return CaseMapImpl.toLower(getCaseLocale(locale), 0, str);
4958     }
4959 
4960     /**
4961      * Returns the lowercase version of the argument string.
4962      * Casing is dependent on the argument locale and context-sensitive
4963      * @param locale which string is to be converted in
4964      * @param str source string to be performed on
4965      * @return lowercase version of the argument string
4966      */
toLowerCase(ULocale locale, String str)4967     public static String toLowerCase(ULocale locale, String str) {
4968         return CaseMapImpl.toLower(getCaseLocale(locale), 0, str);
4969     }
4970 
4971     /**
4972      * <p>Returns the titlecase version of the argument string.
4973      * <p>Position for titlecasing is determined by the argument break
4974      * iterator, hence the user can customize his break iterator for
4975      * a specialized titlecasing. In this case only the forward iteration
4976      * needs to be implemented.
4977      * If the break iterator passed in is null, the default Unicode algorithm
4978      * will be used to determine the titlecase positions.
4979      *
4980      * <p>Only positions returned by the break iterator will be title cased,
4981      * character in between the positions will all be in lower case.
4982      * <p>Casing is dependent on the argument locale and context-sensitive
4983      * @param locale which string is to be converted in
4984      * @param str source string to be performed on
4985      * @param breakiter break iterator to determine the positions in which
4986      *        the character should be title cased.
4987      * @return titlecase version of the argument string
4988      */
toTitleCase(Locale locale, String str, BreakIterator breakiter)4989     public static String toTitleCase(Locale locale, String str,
4990             BreakIterator breakiter)
4991     {
4992         return toTitleCase(locale, str, breakiter, 0);
4993     }
4994 
4995     /**
4996      * <p>Returns the titlecase version of the argument string.
4997      * <p>Position for titlecasing is determined by the argument break
4998      * iterator, hence the user can customize his break iterator for
4999      * a specialized titlecasing. In this case only the forward iteration
5000      * needs to be implemented.
5001      * If the break iterator passed in is null, the default Unicode algorithm
5002      * will be used to determine the titlecase positions.
5003      *
5004      * <p>Only positions returned by the break iterator will be title cased,
5005      * character in between the positions will all be in lower case.
5006      * <p>Casing is dependent on the argument locale and context-sensitive
5007      * @param locale which string is to be converted in
5008      * @param str source string to be performed on
5009      * @param titleIter break iterator to determine the positions in which
5010      *        the character should be title cased.
5011      * @return titlecase version of the argument string
5012      */
toTitleCase(ULocale locale, String str, BreakIterator titleIter)5013     public static String toTitleCase(ULocale locale, String str,
5014             BreakIterator titleIter) {
5015         return toTitleCase(locale, str, titleIter, 0);
5016     }
5017 
5018     /**
5019      * <p>Returns the titlecase version of the argument string.
5020      * <p>Position for titlecasing is determined by the argument break
5021      * iterator, hence the user can customize his break iterator for
5022      * a specialized titlecasing. In this case only the forward iteration
5023      * needs to be implemented.
5024      * If the break iterator passed in is null, the default Unicode algorithm
5025      * will be used to determine the titlecase positions.
5026      *
5027      * <p>Only positions returned by the break iterator will be title cased,
5028      * character in between the positions will all be in lower case.
5029      * <p>Casing is dependent on the argument locale and context-sensitive
5030      * @param locale which string is to be converted in
5031      * @param str source string to be performed on
5032      * @param titleIter break iterator to determine the positions in which
5033      *        the character should be title cased.
5034      * @param options bit set to modify the titlecasing operation
5035      * @return titlecase version of the argument string
5036      * @see #TITLECASE_NO_LOWERCASE
5037      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
5038      */
toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)5039     public static String toTitleCase(ULocale locale, String str,
5040             BreakIterator titleIter, int options) {
5041         if (titleIter == null && locale == null) {
5042             locale = ULocale.getDefault();
5043         }
5044         titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter);
5045         titleIter.setText(str);
5046         return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
5047     }
5048 
5049     /**
5050      * <strong>[icu]</strong> <p>Returns the titlecase version of the argument string.
5051      * <p>Position for titlecasing is determined by the argument break
5052      * iterator, hence the user can customize his break iterator for
5053      * a specialized titlecasing. In this case only the forward iteration
5054      * needs to be implemented.
5055      * If the break iterator passed in is null, the default Unicode algorithm
5056      * will be used to determine the titlecase positions.
5057      *
5058      * <p>Only positions returned by the break iterator will be title cased,
5059      * character in between the positions will all be in lower case.
5060      * <p>Casing is dependent on the argument locale and context-sensitive
5061      * @param locale which string is to be converted in
5062      * @param str source string to be performed on
5063      * @param titleIter break iterator to determine the positions in which
5064      *        the character should be title cased.
5065      * @param options bit set to modify the titlecasing operation
5066      * @return titlecase version of the argument string
5067      * @see #TITLECASE_NO_LOWERCASE
5068      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
5069      */
toTitleCase(Locale locale, String str, BreakIterator titleIter, int options)5070     public static String toTitleCase(Locale locale, String str,
5071             BreakIterator titleIter,
5072             int options) {
5073         if (titleIter == null && locale == null) {
5074             locale = Locale.getDefault();
5075         }
5076         titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter);
5077         titleIter.setText(str);
5078         return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
5079     }
5080 
5081     /**
5082      * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according
5083      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
5084      * folding equivalent, the character itself is returned.
5085      *
5086      * <p>This function only returns the simple, single-code point case mapping.
5087      * Full case mappings should be used whenever possible because they produce
5088      * better results by working on whole strings.
5089      * They can map to a result string with a different length as appropriate.
5090      * Full case mappings are applied by the case mapping functions
5091      * that take String parameters rather than code points (int).
5092      * See also the User Guide chapter on C/POSIX migration:
5093      * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
5094      *
5095      * @param ch             the character to be converted
5096      * @param defaultmapping Indicates whether the default mappings defined in
5097      *                       CaseFolding.txt are to be used, otherwise the
5098      *                       mappings for dotted I and dotless i marked with
5099      *                       'T' in CaseFolding.txt are included.
5100      * @return               the case folding equivalent of the character, if
5101      *                       any; otherwise the character itself.
5102      * @see                  #foldCase(String, boolean)
5103      */
foldCase(int ch, boolean defaultmapping)5104     public static int foldCase(int ch, boolean defaultmapping) {
5105         return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
5106     }
5107 
5108     /**
5109      * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to
5110      * UnicodeData.txt and CaseFolding.txt; if any character has no case
5111      * folding equivalent, the character itself is returned.
5112      * "Full", multiple-code point case folding mappings are returned here.
5113      * For "simple" single-code point mappings use the API
5114      * foldCase(int ch, boolean defaultmapping).
5115      * @param str            the String to be converted
5116      * @param defaultmapping Indicates whether the default mappings defined in
5117      *                       CaseFolding.txt are to be used, otherwise the
5118      *                       mappings for dotted I and dotless i marked with
5119      *                       'T' in CaseFolding.txt are included.
5120      * @return               the case folding equivalent of the character, if
5121      *                       any; otherwise the character itself.
5122      * @see                  #foldCase(int, boolean)
5123      */
foldCase(String str, boolean defaultmapping)5124     public static String foldCase(String str, boolean defaultmapping) {
5125         return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
5126     }
5127 
5128     /**
5129      * <strong>[icu]</strong> Option value for case folding: use default mappings defined in
5130      * CaseFolding.txt.
5131      */
5132     public static final int FOLD_CASE_DEFAULT    =      0x0000;
5133     /**
5134      * <strong>[icu]</strong> Option value for case folding:
5135      * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
5136      * and dotless i appropriately for Turkic languages (tr, az).
5137      *
5138      * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
5139      * are to be included for default mappings and
5140      * excluded for the Turkic-specific mappings.
5141      *
5142      * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
5143      * are to be excluded for default mappings and
5144      * included for the Turkic-specific mappings.
5145      */
5146     public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001;
5147 
5148     /**
5149      * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according
5150      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
5151      * folding equivalent, the character itself is returned.
5152      *
5153      * <p>This function only returns the simple, single-code point case mapping.
5154      * Full case mappings should be used whenever possible because they produce
5155      * better results by working on whole strings.
5156      * They can map to a result string with a different length as appropriate.
5157      * Full case mappings are applied by the case mapping functions
5158      * that take String parameters rather than code points (int).
5159      * See also the User Guide chapter on C/POSIX migration:
5160      * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
5161      *
5162      * @param ch the character to be converted
5163      * @param options A bit set for special processing. Currently the recognised options
5164      * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
5165      * @return the case folding equivalent of the character, if any; otherwise the
5166      * character itself.
5167      * @see #foldCase(String, boolean)
5168      */
foldCase(int ch, int options)5169     public static int foldCase(int ch, int options) {
5170         return UCaseProps.INSTANCE.fold(ch, options);
5171     }
5172 
5173     /**
5174      * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to
5175      * UnicodeData.txt and CaseFolding.txt; if any character has no case
5176      * folding equivalent, the character itself is returned.
5177      * "Full", multiple-code point case folding mappings are returned here.
5178      * For "simple" single-code point mappings use the API
5179      * foldCase(int ch, boolean defaultmapping).
5180      * @param str the String to be converted
5181      * @param options A bit set for special processing. Currently the recognised options
5182      *                are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
5183      * @return the case folding equivalent of the character, if any; otherwise the
5184      *         character itself.
5185      * @see #foldCase(int, boolean)
5186      */
foldCase(String str, int options)5187     public static final String foldCase(String str, int options) {
5188         return CaseMapImpl.fold(options, str);
5189     }
5190 
5191     /**
5192      * <strong>[icu]</strong> Returns the numeric value of a Han character.
5193      *
5194      * <p>This returns the value of Han 'numeric' code points,
5195      * including those for zero, ten, hundred, thousand, ten thousand,
5196      * and hundred million.
5197      * This includes both the standard and 'checkwriting'
5198      * characters, the 'big circle' zero character, and the standard
5199      * zero character.
5200      *
5201      * <p>Note: The Unicode Standard has numeric values for more
5202      * Han characters recognized by this method
5203      * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt),
5204      * and a {@link android.icu.text.NumberFormat} can be used with
5205      * a Chinese {@link android.icu.text.NumberingSystem}.
5206      *
5207      * @param ch code point to query
5208      * @return value if it is a Han 'numeric character,' otherwise return -1.
5209      */
getHanNumericValue(int ch)5210     public static int getHanNumericValue(int ch)
5211     {
5212         switch(ch)
5213         {
5214         case IDEOGRAPHIC_NUMBER_ZERO_ :
5215         case CJK_IDEOGRAPH_COMPLEX_ZERO_ :
5216             return 0; // Han Zero
5217         case CJK_IDEOGRAPH_FIRST_ :
5218         case CJK_IDEOGRAPH_COMPLEX_ONE_ :
5219             return 1; // Han One
5220         case CJK_IDEOGRAPH_SECOND_ :
5221         case CJK_IDEOGRAPH_COMPLEX_TWO_ :
5222             return 2; // Han Two
5223         case CJK_IDEOGRAPH_THIRD_ :
5224         case CJK_IDEOGRAPH_COMPLEX_THREE_ :
5225             return 3; // Han Three
5226         case CJK_IDEOGRAPH_FOURTH_ :
5227         case CJK_IDEOGRAPH_COMPLEX_FOUR_ :
5228             return 4; // Han Four
5229         case CJK_IDEOGRAPH_FIFTH_ :
5230         case CJK_IDEOGRAPH_COMPLEX_FIVE_ :
5231             return 5; // Han Five
5232         case CJK_IDEOGRAPH_SIXTH_ :
5233         case CJK_IDEOGRAPH_COMPLEX_SIX_ :
5234             return 6; // Han Six
5235         case CJK_IDEOGRAPH_SEVENTH_ :
5236         case CJK_IDEOGRAPH_COMPLEX_SEVEN_ :
5237             return 7; // Han Seven
5238         case CJK_IDEOGRAPH_EIGHTH_ :
5239         case CJK_IDEOGRAPH_COMPLEX_EIGHT_ :
5240             return 8; // Han Eight
5241         case CJK_IDEOGRAPH_NINETH_ :
5242         case CJK_IDEOGRAPH_COMPLEX_NINE_ :
5243             return 9; // Han Nine
5244         case CJK_IDEOGRAPH_TEN_ :
5245         case CJK_IDEOGRAPH_COMPLEX_TEN_ :
5246             return 10;
5247         case CJK_IDEOGRAPH_HUNDRED_ :
5248         case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ :
5249             return 100;
5250         case CJK_IDEOGRAPH_THOUSAND_ :
5251         case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ :
5252             return 1000;
5253         case CJK_IDEOGRAPH_TEN_THOUSAND_ :
5254             return 10000;
5255         case CJK_IDEOGRAPH_HUNDRED_MILLION_ :
5256             return 100000000;
5257         }
5258         return -1; // no value
5259     }
5260 
5261     /**
5262      * <strong>[icu]</strong> <p>Returns an iterator for character types, iterating over codepoints.
5263      * <p>Example of use:<br>
5264      * <pre>
5265      * RangeValueIterator iterator = UCharacter.getTypeIterator();
5266      * RangeValueIterator.Element element = new RangeValueIterator.Element();
5267      * while (iterator.next(element)) {
5268      *     System.out.println("Codepoint \\u" +
5269      *                        Integer.toHexString(element.start) +
5270      *                        " to codepoint \\u" +
5271      *                        Integer.toHexString(element.limit - 1) +
5272      *                        " has the character type " +
5273      *                        element.value);
5274      * }
5275      * </pre>
5276      * @return an iterator
5277      */
getTypeIterator()5278     public static RangeValueIterator getTypeIterator()
5279     {
5280         return new UCharacterTypeIterator();
5281     }
5282 
5283     private static final class UCharacterTypeIterator implements RangeValueIterator {
UCharacterTypeIterator()5284         UCharacterTypeIterator() {
5285             reset();
5286         }
5287 
5288         // implements RangeValueIterator
5289         @Override
next(Element element)5290         public boolean next(Element element) {
5291             if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
5292                 element.start=range.startCodePoint;
5293                 element.limit=range.endCodePoint+1;
5294                 element.value=range.value;
5295                 return true;
5296             } else {
5297                 return false;
5298             }
5299         }
5300 
5301         // implements RangeValueIterator
5302         @Override
reset()5303         public void reset() {
5304             trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE);
5305         }
5306 
5307         private Iterator<Trie2.Range> trieIterator;
5308         private Trie2.Range range;
5309 
5310         private static final class MaskType implements Trie2.ValueMapper {
5311             // Extracts the general category ("character type") from the trie value.
5312             @Override
map(int value)5313             public int map(int value) {
5314                 return value & UCharacterProperty.TYPE_MASK;
5315             }
5316         }
5317         private static final MaskType MASK_TYPE=new MaskType();
5318     }
5319 
5320     /**
5321      * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints.
5322      * <p>This API only gets the iterator for the modern, most up-to-date
5323      * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or
5324      * for extended names use getExtendedNameIterator().
5325      * <p>Example of use:<br>
5326      * <pre>
5327      * ValueIterator iterator = UCharacter.getNameIterator();
5328      * ValueIterator.Element element = new ValueIterator.Element();
5329      * while (iterator.next(element)) {
5330      *     System.out.println("Codepoint \\u" +
5331      *                        Integer.toHexString(element.codepoint) +
5332      *                        " has the name " + (String)element.value);
5333      * }
5334      * </pre>
5335      * <p>The maximal range which the name iterator iterates is from
5336      * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.
5337      * @return an iterator
5338      */
getNameIterator()5339     public static ValueIterator getNameIterator(){
5340         return new UCharacterNameIterator(UCharacterName.INSTANCE,
5341                 UCharacterNameChoice.UNICODE_CHAR_NAME);
5342     }
5343 
5344     /**
5345      * <strong>[icu]</strong> Returns an empty iterator.
5346      * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints.
5347      * @return an empty iterator
5348      * @deprecated ICU 49
5349      * @see #getName1_0(int)
5350      * @hide original deprecated declaration
5351      */
5352     @Deprecated
getName1_0Iterator()5353     public static ValueIterator getName1_0Iterator(){
5354         return new DummyValueIterator();
5355     }
5356 
5357     private static final class DummyValueIterator implements ValueIterator {
5358         @Override
next(Element element)5359         public boolean next(Element element) { return false; }
5360         @Override
reset()5361         public void reset() {}
5362         @Override
setRange(int start, int limit)5363         public void setRange(int start, int limit) {}
5364     }
5365 
5366     /**
5367      * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints.
5368      * <p>This API only gets the iterator for the extended names.
5369      * For modern, most up-to-date Unicode names use getNameIterator() or
5370      * for older 1.0 Unicode names use get1_0NameIterator().
5371      * <p>Example of use:<br>
5372      * <pre>
5373      * ValueIterator iterator = UCharacter.getExtendedNameIterator();
5374      * ValueIterator.Element element = new ValueIterator.Element();
5375      * while (iterator.next(element)) {
5376      *     System.out.println("Codepoint \\u" +
5377      *                        Integer.toHexString(element.codepoint) +
5378      *                        " has the name " + (String)element.value);
5379      * }
5380      * </pre>
5381      * <p>The maximal range which the name iterator iterates is from
5382      * @return an iterator
5383      */
getExtendedNameIterator()5384     public static ValueIterator getExtendedNameIterator(){
5385         return new UCharacterNameIterator(UCharacterName.INSTANCE,
5386                 UCharacterNameChoice.EXTENDED_CHAR_NAME);
5387     }
5388 
5389     /**
5390      * <strong>[icu]</strong> Returns the "age" of the code point.
5391      * <p>The "age" is the Unicode version when the code point was first
5392      * designated (as a non-character or for Private Use) or assigned a
5393      * character.
5394      * <p>This can be useful to avoid emitting code points to receiving
5395      * processes that do not accept newer characters.
5396      * <p>The data is from the UCD file DerivedAge.txt.
5397      * @param ch The code point.
5398      * @return the Unicode version number
5399      */
getAge(int ch)5400     public static VersionInfo getAge(int ch)
5401     {
5402         if (ch < MIN_VALUE || ch > MAX_VALUE) {
5403             throw new IllegalArgumentException("Codepoint out of bounds");
5404         }
5405         return UCharacterProperty.INSTANCE.getAge(ch);
5406     }
5407 
5408     /**
5409      * <strong>[icu]</strong> Check a binary Unicode property for a code point.
5410      * <p>Unicode, especially in version 3.2, defines many more properties
5411      * than the original set in UnicodeData.txt.
5412      * <p>This API is intended to reflect Unicode properties as defined in
5413      * the Unicode Character Database (UCD) and Unicode Technical Reports
5414      * (UTR).
5415      * <p>For details about the properties see
5416      * <a href=http://www.unicode.org/>http://www.unicode.org/</a>.
5417      * <p>For names of Unicode properties see the UCD file
5418      * PropertyAliases.txt.
5419      * <p>This API does not check the validity of the codepoint.
5420      * <p>Important: If ICU is built with UCD files from Unicode versions
5421      * below 3.2, then properties marked with "new" are not or
5422      * not fully available.
5423      * @param ch code point to test.
5424      * @param property selector constant from android.icu.lang.UProperty,
5425      *        identifies which binary property to check.
5426      * @return true or false according to the binary Unicode property value
5427      *         for ch. Also false if property is out of bounds or if the
5428      *         Unicode version does not have data for the property at all, or
5429      *         not for this code point.
5430      * @see android.icu.lang.UProperty
5431      */
hasBinaryProperty(int ch, int property)5432     public static boolean hasBinaryProperty(int ch, int property)
5433     {
5434         return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property);
5435     }
5436 
5437     /**
5438      * <strong>[icu]</strong> Returns true if the property is true for the string.
5439      * Same as {@link #hasBinaryProperty(int, int)}
5440      * if the string contains exactly one code point.
5441      *
5442      * <p>Most properties apply only to single code points.
5443      * <a href="https://www.unicode.org/reports/tr51/#Emoji_Sets">UTS #51 Unicode Emoji</a>
5444      * defines several properties of strings.
5445      *
5446      * @param s String to test.
5447      * @param property UProperty selector constant, identifies which binary property to check.
5448      *        Must be BINARY_START&lt;=which&lt;BINARY_LIMIT.
5449      * @return true or false according to the binary Unicode property value for the string.
5450      *         Also false if <code>property</code> is out of bounds or if the Unicode version
5451      *         does not have data for the property at all.
5452      *
5453      * @see android.icu.lang.UProperty
5454      */
hasBinaryProperty(CharSequence s, int property)5455     public static boolean hasBinaryProperty(CharSequence s, int property) {
5456         int length = s.length();
5457         if (length == 1) {
5458             return hasBinaryProperty(s.charAt(0), property);  // single code point
5459         } else if (length == 2) {
5460             // first code point
5461             int c = Character.codePointAt(s, 0);
5462             if (Character.charCount(c) == length) {
5463                 return hasBinaryProperty(c, property);  // single code point
5464             }
5465         }
5466         // Only call into EmojiProps for a relevant property,
5467         // so that we not unnecessarily try to load its data file.
5468         return UProperty.BASIC_EMOJI <= property && property <= UProperty.RGI_EMOJI &&
5469             EmojiProps.INSTANCE.hasBinaryProperty(s, property);
5470     }
5471 
5472     /**
5473      * <strong>[icu]</strong> <p>Check if a code point has the Alphabetic Unicode property.
5474      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).
5475      * <p>Different from UCharacter.isLetter(ch)!
5476      * @param ch codepoint to be tested
5477      */
isUAlphabetic(int ch)5478     public static boolean isUAlphabetic(int ch)
5479     {
5480         return hasBinaryProperty(ch, UProperty.ALPHABETIC);
5481     }
5482 
5483     /**
5484      * <strong>[icu]</strong> <p>Check if a code point has the Lowercase Unicode property.
5485      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).
5486      * <p>This is different from UCharacter.isLowerCase(ch)!
5487      * @param ch codepoint to be tested
5488      */
isULowercase(int ch)5489     public static boolean isULowercase(int ch)
5490     {
5491         return hasBinaryProperty(ch, UProperty.LOWERCASE);
5492     }
5493 
5494     /**
5495      * <strong>[icu]</strong> <p>Check if a code point has the Uppercase Unicode property.
5496      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).
5497      * <p>This is different from UCharacter.isUpperCase(ch)!
5498      * @param ch codepoint to be tested
5499      */
isUUppercase(int ch)5500     public static boolean isUUppercase(int ch)
5501     {
5502         return hasBinaryProperty(ch, UProperty.UPPERCASE);
5503     }
5504 
5505     /**
5506      * <strong>[icu]</strong> <p>Check if a code point has the White_Space Unicode property.
5507      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).
5508      * <p>This is different from both UCharacter.isSpace(ch) and
5509      * UCharacter.isWhitespace(ch)!
5510      * @param ch codepoint to be tested
5511      */
isUWhiteSpace(int ch)5512     public static boolean isUWhiteSpace(int ch)
5513     {
5514         return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
5515     }
5516 
5517     /**
5518      * <strong>[icu]</strong> Returns the property value for a Unicode property type of a code point.
5519      * Also returns binary and mask property values.
5520      * <p>Unicode, especially in version 3.2, defines many more properties than
5521      * the original set in UnicodeData.txt.
5522      * <p>The properties APIs are intended to reflect Unicode properties as
5523      * defined in the Unicode Character Database (UCD) and Unicode Technical
5524      * Reports (UTR). For details about the properties see
5525      * http://www.unicode.org/.
5526      * <p>For names of Unicode properties see the UCD file PropertyAliases.txt.
5527      *
5528      * <pre>
5529      * Sample usage:
5530      * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH);
5531      * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC);
5532      * boolean b = (ideo == 1) ? true : false;
5533      * </pre>
5534      * @param ch code point to test.
5535      * @param type UProperty selector constant, identifies which binary
5536      *        property to check. Must be
5537      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5538      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT or
5539      *        UProperty.MASK_START &lt;= type &lt; UProperty.MASK_LIMIT.
5540      * @return numeric value that is directly the property value or,
5541      *         for enumerated properties, corresponds to the numeric value of
5542      *         the enumerated constant of the respective property value type
5543      *         ({@link ECharacterCategory}, {@link ECharacterDirection},
5544      *         {@link DecompositionType}, etc.).
5545      *         Returns 0 or 1 (for false / true) for binary Unicode properties.
5546      *         Returns a bit-mask for mask properties.
5547      *         Returns 0 if 'type' is out of bounds or if the Unicode version
5548      *         does not have data for the property at all, or not for this code
5549      *         point.
5550      * @see UProperty
5551      * @see #hasBinaryProperty
5552      * @see #getIntPropertyMinValue
5553      * @see #getIntPropertyMaxValue
5554      * @see #getUnicodeVersion
5555      */
getIntPropertyValue(int ch, int type)5556     public static int getIntPropertyValue(int ch, int type)
5557     {
5558         return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type);
5559     }
5560     /**
5561      * <strong>[icu]</strong> Returns a string version of the property value.
5562      * @param propertyEnum The property enum value.
5563      * @param codepoint The codepoint value.
5564      * @param nameChoice The choice of the name.
5565      * @return value as string
5566      * @deprecated This API is ICU internal only.
5567      * @hide original deprecated declaration
5568      * @hide draft / provisional / internal are hidden on Android
5569      */
5570     @Deprecated
5571     ///CLOVER:OFF
getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice)5572     public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) {
5573         if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) ||
5574                 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) {
5575             return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum),
5576                     nameChoice);
5577         }
5578         if (propertyEnum == UProperty.NUMERIC_VALUE) {
5579             return String.valueOf(getUnicodeNumericValue(codepoint));
5580         }
5581         // otherwise must be string property
5582         switch (propertyEnum) {
5583         case UProperty.AGE: return getAge(codepoint).toString();
5584         case UProperty.ISO_COMMENT: return getISOComment(codepoint);
5585         case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint));
5586         case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true));
5587         case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
5588         case UProperty.NAME: return getName(codepoint);
5589         case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true));
5590         case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
5591         case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
5592         case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
5593         case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
5594         case UProperty.UNICODE_1_NAME: return getName1_0(codepoint);
5595         case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
5596         }
5597         throw new IllegalArgumentException("Illegal Property Enum");
5598     }
5599     ///CLOVER:ON
5600 
5601     /**
5602      * <strong>[icu]</strong> Returns the minimum value for an integer/binary Unicode property type.
5603      * Can be used together with UCharacter.getIntPropertyMaxValue(int)
5604      * to allocate arrays of android.icu.text.UnicodeSet or similar.
5605      * @param type UProperty selector constant, identifies which binary
5606      *        property to check. Must be
5607      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5608      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5609      * @return Minimum value returned by UCharacter.getIntPropertyValue(int)
5610      *         for a Unicode property. 0 if the property
5611      *         selector 'type' is out of range.
5612      * @see UProperty
5613      * @see #hasBinaryProperty
5614      * @see #getUnicodeVersion
5615      * @see #getIntPropertyMaxValue
5616      * @see #getIntPropertyValue
5617      */
getIntPropertyMinValue(int type)5618     public static int getIntPropertyMinValue(int type){
5619 
5620         return 0; // undefined; and: all other properties have a minimum value of 0
5621     }
5622 
5623 
5624     /**
5625      * <strong>[icu]</strong> Returns the maximum value for an integer/binary Unicode property.
5626      * Can be used together with UCharacter.getIntPropertyMinValue(int)
5627      * to allocate arrays of android.icu.text.UnicodeSet or similar.
5628      * Examples for min/max values (for Unicode 3.2):
5629      * <ul>
5630      * <li> UProperty.BIDI_CLASS:    0/18
5631      * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL)
5632      * <li> UProperty.SCRIPT:        0/45 (UScript.COMMON/UScript.TAGBANWA)
5633      * <li> UProperty.IDEOGRAPHIC:   0/1  (false/true)
5634      * </ul>
5635      * For undefined UProperty constant values, min/max values will be 0/-1.
5636      * @param type UProperty selector constant, identifies which binary
5637      *        property to check. Must be
5638      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5639      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5640      * @return Maximum value returned by u_getIntPropertyValue for a Unicode
5641      *         property. &lt;= 0 if the property selector 'type' is out of range.
5642      * @see UProperty
5643      * @see #hasBinaryProperty
5644      * @see #getUnicodeVersion
5645      * @see #getIntPropertyMaxValue
5646      * @see #getIntPropertyValue
5647      */
getIntPropertyMaxValue(int type)5648     public static int getIntPropertyMaxValue(int type)
5649     {
5650         return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type);
5651     }
5652 
5653     /**
5654      * Provide the java.lang.Character forDigit API, for convenience.
5655      */
forDigit(int digit, int radix)5656     public static char forDigit(int digit, int radix) {
5657         return java.lang.Character.forDigit(digit, radix);
5658     }
5659 
5660     // JDK 1.5 API coverage
5661 
5662     /**
5663      * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}.
5664      */
5665     public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE;
5666 
5667     /**
5668      * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}.
5669      */
5670     public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE;
5671 
5672     /**
5673      * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}.
5674      */
5675     public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE;
5676 
5677     /**
5678      * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}.
5679      */
5680     public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE;
5681 
5682     /**
5683      * Constant U+D800, same as {@link Character#MIN_SURROGATE}.
5684      */
5685     public static final char MIN_SURROGATE = Character.MIN_SURROGATE;
5686 
5687     /**
5688      * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}.
5689      */
5690     public static final char MAX_SURROGATE = Character.MAX_SURROGATE;
5691 
5692     /**
5693      * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
5694      */
5695     public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT;
5696 
5697     /**
5698      * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}.
5699      */
5700     public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT;
5701 
5702     /**
5703      * Constant U+0000, same as {@link Character#MIN_CODE_POINT}.
5704      */
5705     public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT;
5706 
5707     /**
5708      * Equivalent to {@link Character#isValidCodePoint}.
5709      *
5710      * @param cp the code point to check
5711      * @return true if cp is a valid code point
5712      */
isValidCodePoint(int cp)5713     public static final boolean isValidCodePoint(int cp) {
5714         return cp >= 0 && cp <= MAX_CODE_POINT;
5715     }
5716 
5717     /**
5718      * Same as {@link Character#isSupplementaryCodePoint}.
5719      *
5720      * @param cp the code point to check
5721      * @return true if cp is a supplementary code point
5722      */
isSupplementaryCodePoint(int cp)5723     public static final boolean isSupplementaryCodePoint(int cp) {
5724         return Character.isSupplementaryCodePoint(cp);
5725     }
5726 
5727     /**
5728      * Same as {@link Character#isHighSurrogate},
5729      * except that the ICU version accepts <code>int</code> for code points.
5730      *
5731      * @param codePoint the code point to check
5732      *        (In ICU 3.0-69 the type of this parameter was <code>char</code>.)
5733      * @return true if codePoint is a high (lead) surrogate
5734      */
isHighSurrogate(int codePoint)5735     public static boolean isHighSurrogate(int codePoint) {
5736         return (codePoint & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS;
5737     }
5738 
5739     // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655
5740     /**
5741      * Same as {@link Character#isHighSurrogate},
5742      *
5743      * @param ch the char to check
5744      * @return true if ch is a high (lead) surrogate
5745      */
isHighSurrogate(char ch)5746     public static boolean isHighSurrogate(char ch) {
5747         return isHighSurrogate((int) ch);
5748     }
5749     // END Android patch: Keep the `char` version on Android. See ICU-21655
5750 
5751     /**
5752      * Same as {@link Character#isLowSurrogate},
5753      * except that the ICU version accepts <code>int</code> for code points.
5754      *
5755      * @param codePoint the code point to check
5756      *        (In ICU 3.0-69 the type of this parameter was <code>char</code>.)
5757      * @return true if codePoint is a low (trail) surrogate
5758      */
isLowSurrogate(int codePoint)5759     public static boolean isLowSurrogate(int codePoint) {
5760         return (codePoint & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS;
5761     }
5762 
5763     // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655
5764     /**
5765      * Same as {@link Character#isLowSurrogate},
5766      *
5767      * @param ch the char to check
5768      * @return true if ch is a low (trail) surrogate
5769      */
isLowSurrogate(char ch)5770     public static boolean isLowSurrogate(char ch) {
5771         return isLowSurrogate((int) ch);
5772     }
5773     // END Android patch: Keep the `char` version on Android. See ICU-21655
5774 
5775     /**
5776      * Same as {@link Character#isSurrogatePair},
5777      * except that the ICU version accepts <code>int</code> for code points.
5778      *
5779      * @param high the high (lead) unit
5780      *        (In ICU 3.0-69 the type of both parameters was <code>char</code>.)
5781      * @param low the low (trail) unit
5782      * @return true if high, low form a surrogate pair
5783      */
isSurrogatePair(int high, int low)5784     public static final boolean isSurrogatePair(int high, int low) {
5785         return isHighSurrogate(high) && isLowSurrogate(low);
5786     }
5787 
5788     // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655
5789     /**
5790      * Same as {@link Character#isSurrogatePair}.
5791      *
5792      * @param high the high (lead) char
5793      * @param low the low (trail) char
5794      * @return true if high, low form a surrogate pair
5795      */
isSurrogatePair(char high, char low)5796     public static final boolean isSurrogatePair(char high, char low) {
5797         return isSurrogatePair((int) high, (int) low);
5798     }
5799     // END Android patch: Keep the `char` version on Android. See ICU-21655
5800 
5801     /**
5802      * Same as {@link Character#charCount}.
5803      * Returns the number of chars needed to represent the code point (1 or 2).
5804      * This does not check the code point for validity.
5805      *
5806      * @param cp the code point to check
5807      * @return the number of chars needed to represent the code point
5808      */
charCount(int cp)5809     public static int charCount(int cp) {
5810         return Character.charCount(cp);
5811     }
5812 
5813     /**
5814      * Same as {@link Character#toCodePoint},
5815      * except that the ICU version accepts <code>int</code> for code points.
5816      * Returns the code point represented by the two surrogate code units.
5817      * This does not check the surrogate pair for validity.
5818      *
5819      * @param high the high (lead) surrogate
5820      *        (In ICU 3.0-69 the type of both parameters was <code>char</code>.)
5821      * @param low the low (trail) surrogate
5822      * @return the code point formed by the surrogate pair
5823      * @see #getCodePoint(int, int)
5824      */
toCodePoint(int high, int low)5825     public static final int toCodePoint(int high, int low) {
5826         // see ICU4C U16_GET_SUPPLEMENTARY()
5827         return (high << 10) + low - U16_SURROGATE_OFFSET;
5828     }
5829 
5830     // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655
5831     /**
5832      * Same as {@link Character#toCodePoint}.
5833      * Returns the code point represented by the two surrogate code units.
5834      * This does not check the surrogate pair for validity.
5835      *
5836      * @param high the high (lead) surrogate
5837      * @param low the low (trail) surrogate
5838      * @return the code point formed by the surrogate pair
5839      */
toCodePoint(char high, char low)5840     public static final int toCodePoint(char high, char low) {
5841         return toCodePoint((int) high, (int) low);
5842     }
5843     // END Android patch: Keep the `char` version on Android. See ICU-21655
5844 
5845     /**
5846      * Same as {@link Character#codePointAt(CharSequence, int)}.
5847      * Returns the code point at index.
5848      * This examines only the characters at index and index+1.
5849      *
5850      * @param seq the characters to check
5851      * @param index the index of the first or only char forming the code point
5852      * @return the code point at the index
5853      */
codePointAt(CharSequence seq, int index)5854     public static final int codePointAt(CharSequence seq, int index) {
5855         char c1 = seq.charAt(index++);
5856         if (isHighSurrogate(c1)) {
5857             if (index < seq.length()) {
5858                 char c2 = seq.charAt(index);
5859                 if (isLowSurrogate(c2)) {
5860                     return toCodePoint(c1, c2);
5861                 }
5862             }
5863         }
5864         return c1;
5865     }
5866 
5867     /**
5868      * Same as {@link Character#codePointAt(char[], int)}.
5869      * Returns the code point at index.
5870      * This examines only the characters at index and index+1.
5871      *
5872      * @param text the characters to check
5873      * @param index the index of the first or only char forming the code point
5874      * @return the code point at the index
5875      */
codePointAt(char[] text, int index)5876     public static final int codePointAt(char[] text, int index) {
5877         char c1 = text[index++];
5878         if (isHighSurrogate(c1)) {
5879             if (index < text.length) {
5880                 char c2 = text[index];
5881                 if (isLowSurrogate(c2)) {
5882                     return toCodePoint(c1, c2);
5883                 }
5884             }
5885         }
5886         return c1;
5887     }
5888 
5889     /**
5890      * Same as {@link Character#codePointAt(char[], int, int)}.
5891      * Returns the code point at index.
5892      * This examines only the characters at index and index+1.
5893      *
5894      * @param text the characters to check
5895      * @param index the index of the first or only char forming the code point
5896      * @param limit the limit of the valid text
5897      * @return the code point at the index
5898      */
codePointAt(char[] text, int index, int limit)5899     public static final int codePointAt(char[] text, int index, int limit) {
5900         if (index >= limit || limit > text.length) {
5901             throw new IndexOutOfBoundsException();
5902         }
5903         char c1 = text[index++];
5904         if (isHighSurrogate(c1)) {
5905             if (index < limit) {
5906                 char c2 = text[index];
5907                 if (isLowSurrogate(c2)) {
5908                     return toCodePoint(c1, c2);
5909                 }
5910             }
5911         }
5912         return c1;
5913     }
5914 
5915     /**
5916      * Same as {@link Character#codePointBefore(CharSequence, int)}.
5917      * Return the code point before index.
5918      * This examines only the characters at index-1 and index-2.
5919      *
5920      * @param seq the characters to check
5921      * @param index the index after the last or only char forming the code point
5922      * @return the code point before the index
5923      */
codePointBefore(CharSequence seq, int index)5924     public static final int codePointBefore(CharSequence seq, int index) {
5925         char c2 = seq.charAt(--index);
5926         if (isLowSurrogate(c2)) {
5927             if (index > 0) {
5928                 char c1 = seq.charAt(--index);
5929                 if (isHighSurrogate(c1)) {
5930                     return toCodePoint(c1, c2);
5931                 }
5932             }
5933         }
5934         return c2;
5935     }
5936 
5937     /**
5938      * Same as {@link Character#codePointBefore(char[], int)}.
5939      * Returns the code point before index.
5940      * This examines only the characters at index-1 and index-2.
5941      *
5942      * @param text the characters to check
5943      * @param index the index after the last or only char forming the code point
5944      * @return the code point before the index
5945      */
codePointBefore(char[] text, int index)5946     public static final int codePointBefore(char[] text, int index) {
5947         char c2 = text[--index];
5948         if (isLowSurrogate(c2)) {
5949             if (index > 0) {
5950                 char c1 = text[--index];
5951                 if (isHighSurrogate(c1)) {
5952                     return toCodePoint(c1, c2);
5953                 }
5954             }
5955         }
5956         return c2;
5957     }
5958 
5959     /**
5960      * Same as {@link Character#codePointBefore(char[], int, int)}.
5961      * Return the code point before index.
5962      * This examines only the characters at index-1 and index-2.
5963      *
5964      * @param text the characters to check
5965      * @param index the index after the last or only char forming the code point
5966      * @param limit the start of the valid text
5967      * @return the code point before the index
5968      */
codePointBefore(char[] text, int index, int limit)5969     public static final int codePointBefore(char[] text, int index, int limit) {
5970         if (index <= limit || limit < 0) {
5971             throw new IndexOutOfBoundsException();
5972         }
5973         char c2 = text[--index];
5974         if (isLowSurrogate(c2)) {
5975             if (index > limit) {
5976                 char c1 = text[--index];
5977                 if (isHighSurrogate(c1)) {
5978                     return toCodePoint(c1, c2);
5979                 }
5980             }
5981         }
5982         return c2;
5983     }
5984 
5985     /**
5986      * Same as {@link Character#toChars(int, char[], int)}.
5987      * Writes the chars representing the
5988      * code point into the destination at the given index.
5989      *
5990      * @param cp the code point to convert
5991      * @param dst the destination array into which to put the char(s) representing the code point
5992      * @param dstIndex the index at which to put the first (or only) char
5993      * @return the count of the number of chars written (1 or 2)
5994      * @throws IllegalArgumentException if cp is not a valid code point
5995      */
toChars(int cp, char[] dst, int dstIndex)5996     public static final int toChars(int cp, char[] dst, int dstIndex) {
5997         return Character.toChars(cp, dst, dstIndex);
5998     }
5999 
6000     /**
6001      * Same as {@link Character#toChars(int)}.
6002      * Returns a char array representing the code point.
6003      *
6004      * @param cp the code point to convert
6005      * @return an array containing the char(s) representing the code point
6006      * @throws IllegalArgumentException if cp is not a valid code point
6007      */
toChars(int cp)6008     public static final char[] toChars(int cp) {
6009         return Character.toChars(cp);
6010     }
6011 
6012     /**
6013      * Equivalent to the {@link Character#getDirectionality(char)} method, for
6014      * convenience. Returns a byte representing the directionality of the
6015      * character.
6016      *
6017      * <strong>[icu] Note:</strong> Unlike {@link Character#getDirectionality(char)}, this returns
6018      * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters.
6019      *
6020      * <strong>[icu] Note:</strong> The return value must be tested using the constants defined in {@link
6021      * UCharacterDirection} and its interface {@link
6022      * UCharacterEnums.ECharacterDirection} since the values are different from the ones
6023      * defined by <code>java.lang.Character</code>.
6024      * @param cp the code point to check
6025      * @return the directionality of the code point
6026      * @see #getDirection
6027      */
getDirectionality(int cp)6028     public static byte getDirectionality(int cp)
6029     {
6030         return (byte)getDirection(cp);
6031     }
6032 
6033     /**
6034      * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)}
6035      * method, for convenience.  Counts the number of code points in the range
6036      * of text.
6037      * @param text the characters to check
6038      * @param start the start of the range
6039      * @param limit the limit of the range
6040      * @return the number of code points in the range
6041      */
codePointCount(CharSequence text, int start, int limit)6042     public static int codePointCount(CharSequence text, int start, int limit) {
6043         if (start < 0 || limit < start || limit > text.length()) {
6044             throw new IndexOutOfBoundsException("start (" + start +
6045                     ") or limit (" + limit +
6046                     ") invalid or out of range 0, " + text.length());
6047         }
6048 
6049         int len = limit - start;
6050         while (limit > start) {
6051             char ch = text.charAt(--limit);
6052             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
6053                 ch = text.charAt(--limit);
6054                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
6055                     --len;
6056                     break;
6057                 }
6058             }
6059         }
6060         return len;
6061     }
6062 
6063     /**
6064      * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for
6065      * convenience. Counts the number of code points in the range of text.
6066      * @param text the characters to check
6067      * @param start the start of the range
6068      * @param limit the limit of the range
6069      * @return the number of code points in the range
6070      */
codePointCount(char[] text, int start, int limit)6071     public static int codePointCount(char[] text, int start, int limit) {
6072         if (start < 0 || limit < start || limit > text.length) {
6073             throw new IndexOutOfBoundsException("start (" + start +
6074                     ") or limit (" + limit +
6075                     ") invalid or out of range 0, " + text.length);
6076         }
6077 
6078         int len = limit - start;
6079         while (limit > start) {
6080             char ch = text[--limit];
6081             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
6082                 ch = text[--limit];
6083                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
6084                     --len;
6085                     break;
6086                 }
6087             }
6088         }
6089         return len;
6090     }
6091 
6092     /**
6093      * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)}
6094      * method, for convenience.  Adjusts the char index by a code point offset.
6095      * @param text the characters to check
6096      * @param index the index to adjust
6097      * @param codePointOffset the number of code points by which to offset the index
6098      * @return the adjusted index
6099      */
offsetByCodePoints(CharSequence text, int index, int codePointOffset)6100     public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) {
6101         if (index < 0 || index > text.length()) {
6102             throw new IndexOutOfBoundsException("index ( " + index +
6103                     ") out of range 0, " + text.length());
6104         }
6105 
6106         if (codePointOffset < 0) {
6107             while (++codePointOffset <= 0) {
6108                 char ch = text.charAt(--index);
6109                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) {
6110                     ch = text.charAt(--index);
6111                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
6112                         if (++codePointOffset > 0) {
6113                             return index+1;
6114                         }
6115                     }
6116                 }
6117             }
6118         } else {
6119             int limit = text.length();
6120             while (--codePointOffset >= 0) {
6121                 char ch = text.charAt(index++);
6122                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
6123                     ch = text.charAt(index++);
6124                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
6125                         if (--codePointOffset < 0) {
6126                             return index-1;
6127                         }
6128                     }
6129                 }
6130             }
6131         }
6132 
6133         return index;
6134     }
6135 
6136     /**
6137      * Equivalent to the
6138      * {@link Character#offsetByCodePoints(char[], int, int, int, int)}
6139      * method, for convenience.  Adjusts the char index by a code point offset.
6140      * @param text the characters to check
6141      * @param start the start of the range to check
6142      * @param count the length of the range to check
6143      * @param index the index to adjust
6144      * @param codePointOffset the number of code points by which to offset the index
6145      * @return the adjusted index
6146      */
offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset)6147     public static int offsetByCodePoints(char[] text, int start, int count, int index,
6148             int codePointOffset) {
6149         int limit = start + count;
6150         if (start < 0 || limit < start || limit > text.length || index < start || index > limit) {
6151             throw new IndexOutOfBoundsException("index ( " + index +
6152                     ") out of range " + start +
6153                     ", " + limit +
6154                     " in array 0, " + text.length);
6155         }
6156 
6157         if (codePointOffset < 0) {
6158             while (++codePointOffset <= 0) {
6159                 char ch = text[--index];
6160                 if (index < start) {
6161                     throw new IndexOutOfBoundsException("index ( " + index +
6162                             ") < start (" + start +
6163                             ")");
6164                 }
6165                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) {
6166                     ch = text[--index];
6167                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
6168                         if (++codePointOffset > 0) {
6169                             return index+1;
6170                         }
6171                     }
6172                 }
6173             }
6174         } else {
6175             while (--codePointOffset >= 0) {
6176                 char ch = text[index++];
6177                 if (index > limit) {
6178                     throw new IndexOutOfBoundsException("index ( " + index +
6179                             ") > limit (" + limit +
6180                             ")");
6181                 }
6182                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
6183                     ch = text[index++];
6184                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
6185                         if (--codePointOffset < 0) {
6186                             return index-1;
6187                         }
6188                     }
6189                 }
6190             }
6191         }
6192 
6193         return index;
6194     }
6195 
6196     // private variables -------------------------------------------------
6197 
6198     /**
6199      * To get the last character out from a data type
6200      */
6201     private static final int LAST_CHAR_MASK_ = 0xFFFF;
6202 
6203     //    /**
6204     //     * To get the last byte out from a data type
6205     //     */
6206     //    private static final int LAST_BYTE_MASK_ = 0xFF;
6207     //
6208     //    /**
6209     //     * Shift 16 bits
6210     //     */
6211     //    private static final int SHIFT_16_ = 16;
6212     //
6213     //    /**
6214     //     * Shift 24 bits
6215     //     */
6216     //    private static final int SHIFT_24_ = 24;
6217     //
6218     //    /**
6219     //     * Decimal radix
6220     //     */
6221     //    private static final int DECIMAL_RADIX_ = 10;
6222 
6223     /**
6224      * No break space code point
6225      */
6226     private static final int NO_BREAK_SPACE_ = 0xA0;
6227 
6228     /**
6229      * Figure space code point
6230      */
6231     private static final int FIGURE_SPACE_ = 0x2007;
6232 
6233     /**
6234      * Narrow no break space code point
6235      */
6236     private static final int NARROW_NO_BREAK_SPACE_ = 0x202F;
6237 
6238     /**
6239      * Ideographic number zero code point
6240      */
6241     private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007;
6242 
6243     /**
6244      * CJK Ideograph, First code point
6245      */
6246     private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00;
6247 
6248     /**
6249      * CJK Ideograph, Second code point
6250      */
6251     private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c;
6252 
6253     /**
6254      * CJK Ideograph, Third code point
6255      */
6256     private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09;
6257 
6258     /**
6259      * CJK Ideograph, Fourth code point
6260      */
6261     private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db;
6262 
6263     /**
6264      * CJK Ideograph, FIFTH code point
6265      */
6266     private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94;
6267 
6268     /**
6269      * CJK Ideograph, Sixth code point
6270      */
6271     private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d;
6272 
6273     /**
6274      * CJK Ideograph, Seventh code point
6275      */
6276     private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03;
6277 
6278     /**
6279      * CJK Ideograph, Eighth code point
6280      */
6281     private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b;
6282 
6283     /**
6284      * CJK Ideograph, Nineth code point
6285      */
6286     private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d;
6287 
6288     /**
6289      * Application Program command code point
6290      */
6291     private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F;
6292 
6293     /**
6294      * Unit separator code point
6295      */
6296     private static final int UNIT_SEPARATOR_ = 0x001F;
6297 
6298     /**
6299      * Delete code point
6300      */
6301     private static final int DELETE_ = 0x007F;
6302 
6303     /**
6304      * Han digit characters
6305      */
6306     private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_     = 0x96f6;
6307     private static final int CJK_IDEOGRAPH_COMPLEX_ONE_      = 0x58f9;
6308     private static final int CJK_IDEOGRAPH_COMPLEX_TWO_      = 0x8cb3;
6309     private static final int CJK_IDEOGRAPH_COMPLEX_THREE_    = 0x53c3;
6310     private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_     = 0x8086;
6311     private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_     = 0x4f0d;
6312     private static final int CJK_IDEOGRAPH_COMPLEX_SIX_      = 0x9678;
6313     private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_    = 0x67d2;
6314     private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_    = 0x634c;
6315     private static final int CJK_IDEOGRAPH_COMPLEX_NINE_     = 0x7396;
6316     private static final int CJK_IDEOGRAPH_TEN_              = 0x5341;
6317     private static final int CJK_IDEOGRAPH_COMPLEX_TEN_      = 0x62fe;
6318     private static final int CJK_IDEOGRAPH_HUNDRED_          = 0x767e;
6319     private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_  = 0x4f70;
6320     private static final int CJK_IDEOGRAPH_THOUSAND_         = 0x5343;
6321     private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf;
6322     private static final int CJK_IDEOGRAPH_TEN_THOUSAND_     = 0x824c;
6323     private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_  = 0x5104;
6324 
6325     // private constructor -----------------------------------------------
6326     ///CLOVER:OFF
6327     /**
6328      * Private constructor to prevent instantiation
6329      */
UCharacter()6330     private UCharacter()
6331     {
6332     }
6333     ///CLOVER:ON
6334 }
6335