• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /**
5  *******************************************************************************
6  * Copyright (C) 1996-2016, International Business Machines Corporation and
7  * others. All Rights Reserved.
8  *******************************************************************************
9  */
10 
11 package ohos.global.icu.lang;
12 
13 import java.lang.ref.SoftReference;
14 import java.util.HashMap;
15 import java.util.Iterator;
16 import java.util.Locale;
17 import java.util.Map;
18 
19 import ohos.global.icu.impl.CaseMapImpl;
20 import ohos.global.icu.impl.IllegalIcuArgumentException;
21 import ohos.global.icu.impl.Trie2;
22 import ohos.global.icu.impl.UBiDiProps;
23 import ohos.global.icu.impl.UCaseProps;
24 import ohos.global.icu.impl.UCharacterName;
25 import ohos.global.icu.impl.UCharacterNameChoice;
26 import ohos.global.icu.impl.UCharacterProperty;
27 import ohos.global.icu.impl.UCharacterUtility;
28 import ohos.global.icu.impl.UPropertyAliases;
29 import ohos.global.icu.lang.UCharacterEnums.ECharacterCategory;
30 import ohos.global.icu.lang.UCharacterEnums.ECharacterDirection;
31 import ohos.global.icu.text.BreakIterator;
32 import ohos.global.icu.text.Normalizer2;
33 import ohos.global.icu.util.RangeValueIterator;
34 import ohos.global.icu.util.ULocale;
35 import ohos.global.icu.util.ValueIterator;
36 import ohos.global.icu.util.VersionInfo;
37 
38 /**
39  * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character}.&nbsp;Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'.
40  *
41  * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class.
42  * These extensions provide support for more Unicode properties.
43  * Each ICU release supports the latest version of Unicode available at that time.
44  *
45  * <p>For some time before Java 5 added support for supplementary Unicode code points,
46  * The ICU UCharacter class and many other ICU classes already supported them.
47  * Some UCharacter methods and constants were widened slightly differently than
48  * how the Character class methods and constants were widened later.
49  * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF,
50  * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF.
51  *
52  * <p>Code points are represented in these API using ints. While it would be
53  * more convenient in Java to have a separate primitive datatype for them,
54  * ints suffice in the meantime.
55  *
56  * <p>To use this class please add the jar file name icu4j.jar to the
57  * class path, since it contains data files which supply the information used
58  * by this file.<br>
59  * E.g. In Windows <br>
60  * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br>
61  * Otherwise, another method would be to copy the files uprops.dat and
62  * unames.icu from the icu4j source subdirectory
63  * <i>$ICU4J_SRC/src/ohos.global.icu.impl.data</i> to your class directory
64  * <i>$ICU4J_CLASS/ohos.global.icu.impl.data</i>.
65  *
66  * <p>Aside from the additions for UTF-16 support, and the updated Unicode
67  * properties, the main differences between UCharacter and Character are:
68  * <ul>
69  * <li> UCharacter is not designed to be a char wrapper and does not have
70  *      APIs to which involves management of that single char.<br>
71  *      These include:
72  *      <ul>
73  *        <li> char charValue(),
74  *        <li> int compareTo(java.lang.Character, java.lang.Character), etc.
75  *      </ul>
76  * <li> UCharacter does not include Character APIs that are deprecated, nor
77  *      does it include the Java-specific character information, such as
78  *      boolean isJavaIdentifierPart(char ch).
79  * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric
80  *      values '10' - '35'. UCharacter also does this in digit and
81  *      getNumericValue, to adhere to the java semantics of these
82  *      methods.  New methods unicodeDigit, and
83  *      getUnicodeNumericValue do not treat the above code points
84  *      as having numeric values.  This is a semantic change from ICU4J 1.3.1.
85  * </ul>
86  * <p>
87  * Further detail on differences can be determined using the program
88  *        <a href=
89  * "http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
90  *        ohos.global.icu.dev.test.lang.UCharacterCompare</a>
91  * <p>
92  * In addition to Java compatibility functions, which calculate derived properties,
93  * this API provides low-level access to the Unicode Character Database.
94  * <p>
95  * Unicode assigns each code point (not just assigned character) values for
96  * many properties.
97  * Most of them are simple boolean flags, or constants from a small enumerated list.
98  * For some properties, values are strings or other relatively more complex types.
99  * <p>
100  * For more information see
101  * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a>
102  * (http://www.unicode.org/ucd/)
103  * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU
104  * User Guide chapter on Properties</a>
105  * (http://www.icu-project.org/userguide/properties.html).
106  * <p>
107  * There are also functions that provide easy migration from C/POSIX functions
108  * like isblank(). Their use is generally discouraged because the C/POSIX
109  * standards do not define their semantics beyond the ASCII range, which means
110  * that different implementations exhibit very different behavior.
111  * Instead, Unicode properties should be used directly.
112  * <p>
113  * There are also only a few, broad C/POSIX character classes, and they tend
114  * to be used for conflicting purposes. For example, the "isalpha()" class
115  * is sometimes used to determine word boundaries, while a more sophisticated
116  * approach would at least distinguish initial letters from continuation
117  * characters (the latter including combining marks).
118  * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
119  * Another example: There is no "istitle()" class for titlecase characters.
120  * <p>
121  * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
122  * ICU implements them according to the Standard Recommendations in
123  * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
124  * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
125  * <p>
126  * API access for C/POSIX character classes is as follows:
127  * <pre>{@code
128  * - alpha:     isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC)
129  * - lower:     isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE)
130  * - upper:     isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE)
131  * - punct:     ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)|
132  *               (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)|
133  *               (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0
134  * - digit:     isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER
135  * - xdigit:    hasBinaryProperty(c, UProperty.POSIX_XDIGIT)
136  * - alnum:     hasBinaryProperty(c, UProperty.POSIX_ALNUM)
137  * - space:     isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE)
138  * - blank:     hasBinaryProperty(c, UProperty.POSIX_BLANK)
139  * - cntrl:     getType(c)==CONTROL
140  * - graph:     hasBinaryProperty(c, UProperty.POSIX_GRAPH)
141  * - print:     hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre>
142  * <p>
143  * The C/POSIX character classes are also available in UnicodeSet patterns,
144  * using patterns like [:graph:] or \p{graph}.
145  *
146  * <p><strong>[icu] Note:</strong> There are several ICU (and Java) whitespace functions.
147  * Comparison:<ul>
148  * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
149  *       most of general categories "Z" (separators) + most whitespace ISO controls
150  *       (including no-break spaces, but excluding IS1..IS4)
151  * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
152  * <li> isSpaceChar: just Z (including no-break spaces)</ul>
153  *
154  * <p>
155  * This class is not subclassable.
156  *
157  * @author Syn Wee Quek
158  * @see ohos.global.icu.lang.UCharacterEnums
159  */
160 
161 public final class UCharacter implements ECharacterCategory, ECharacterDirection
162 {
163     // public inner classes ----------------------------------------------
164 
165     /**
166      * <strong>[icu enhancement]</strong> ICU's replacement for {@link java.lang.Character.UnicodeBlock}.&nbsp;Methods, fields, and other functionality specific to ICU are labeled '<strong>[icu]</strong>'.
167      *
168      * A family of character subsets representing the character blocks in the
169      * Unicode specification, generated from Unicode Data file Blocks.txt.
170      * Character blocks generally define characters used for a specific script
171      * or purpose. A character is contained by at most one Unicode block.
172      *
173      * <strong>[icu] Note:</strong> All fields named XXX_ID are specific to ICU.
174      */
175     public static final class UnicodeBlock extends Character.Subset
176     {
177         // block id corresponding to icu4c -----------------------------------
178 
179         /**
180          */
181         public static final int INVALID_CODE_ID = -1;
182         /**
183          */
184         public static final int BASIC_LATIN_ID = 1;
185         /**
186          */
187         public static final int LATIN_1_SUPPLEMENT_ID = 2;
188         /**
189          */
190         public static final int LATIN_EXTENDED_A_ID = 3;
191         /**
192          */
193         public static final int LATIN_EXTENDED_B_ID = 4;
194         /**
195          */
196         public static final int IPA_EXTENSIONS_ID = 5;
197         /**
198          */
199         public static final int SPACING_MODIFIER_LETTERS_ID = 6;
200         /**
201          */
202         public static final int COMBINING_DIACRITICAL_MARKS_ID = 7;
203         /**
204          * Unicode 3.2 renames this block to "Greek and Coptic".
205          */
206         public static final int GREEK_ID = 8;
207         /**
208          */
209         public static final int CYRILLIC_ID = 9;
210         /**
211          */
212         public static final int ARMENIAN_ID = 10;
213         /**
214          */
215         public static final int HEBREW_ID = 11;
216         /**
217          */
218         public static final int ARABIC_ID = 12;
219         /**
220          */
221         public static final int SYRIAC_ID = 13;
222         /**
223          */
224         public static final int THAANA_ID = 14;
225         /**
226          */
227         public static final int DEVANAGARI_ID = 15;
228         /**
229          */
230         public static final int BENGALI_ID = 16;
231         /**
232          */
233         public static final int GURMUKHI_ID = 17;
234         /**
235          */
236         public static final int GUJARATI_ID = 18;
237         /**
238          */
239         public static final int ORIYA_ID = 19;
240         /**
241          */
242         public static final int TAMIL_ID = 20;
243         /**
244          */
245         public static final int TELUGU_ID = 21;
246         /**
247          */
248         public static final int KANNADA_ID = 22;
249         /**
250          */
251         public static final int MALAYALAM_ID = 23;
252         /**
253          */
254         public static final int SINHALA_ID = 24;
255         /**
256          */
257         public static final int THAI_ID = 25;
258         /**
259          */
260         public static final int LAO_ID = 26;
261         /**
262          */
263         public static final int TIBETAN_ID = 27;
264         /**
265          */
266         public static final int MYANMAR_ID = 28;
267         /**
268          */
269         public static final int GEORGIAN_ID = 29;
270         /**
271          */
272         public static final int HANGUL_JAMO_ID = 30;
273         /**
274          */
275         public static final int ETHIOPIC_ID = 31;
276         /**
277          */
278         public static final int CHEROKEE_ID = 32;
279         /**
280          */
281         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33;
282         /**
283          */
284         public static final int OGHAM_ID = 34;
285         /**
286          */
287         public static final int RUNIC_ID = 35;
288         /**
289          */
290         public static final int KHMER_ID = 36;
291         /**
292          */
293         public static final int MONGOLIAN_ID = 37;
294         /**
295          */
296         public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38;
297         /**
298          */
299         public static final int GREEK_EXTENDED_ID = 39;
300         /**
301          */
302         public static final int GENERAL_PUNCTUATION_ID = 40;
303         /**
304          */
305         public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41;
306         /**
307          */
308         public static final int CURRENCY_SYMBOLS_ID = 42;
309         /**
310          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
311          * Symbols".
312          */
313         public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43;
314         /**
315          */
316         public static final int LETTERLIKE_SYMBOLS_ID = 44;
317         /**
318          */
319         public static final int NUMBER_FORMS_ID = 45;
320         /**
321          */
322         public static final int ARROWS_ID = 46;
323         /**
324          */
325         public static final int MATHEMATICAL_OPERATORS_ID = 47;
326         /**
327          */
328         public static final int MISCELLANEOUS_TECHNICAL_ID = 48;
329         /**
330          */
331         public static final int CONTROL_PICTURES_ID = 49;
332         /**
333          */
334         public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50;
335         /**
336          */
337         public static final int ENCLOSED_ALPHANUMERICS_ID = 51;
338         /**
339          */
340         public static final int BOX_DRAWING_ID = 52;
341         /**
342          */
343         public static final int BLOCK_ELEMENTS_ID = 53;
344         /**
345          */
346         public static final int GEOMETRIC_SHAPES_ID = 54;
347         /**
348          */
349         public static final int MISCELLANEOUS_SYMBOLS_ID = 55;
350         /**
351          */
352         public static final int DINGBATS_ID = 56;
353         /**
354          */
355         public static final int BRAILLE_PATTERNS_ID = 57;
356         /**
357          */
358         public static final int CJK_RADICALS_SUPPLEMENT_ID = 58;
359         /**
360          */
361         public static final int KANGXI_RADICALS_ID = 59;
362         /**
363          */
364         public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60;
365         /**
366          */
367         public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61;
368         /**
369          */
370         public static final int HIRAGANA_ID = 62;
371         /**
372          */
373         public static final int KATAKANA_ID = 63;
374         /**
375          */
376         public static final int BOPOMOFO_ID = 64;
377         /**
378          */
379         public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65;
380         /**
381          */
382         public static final int KANBUN_ID = 66;
383         /**
384          */
385         public static final int BOPOMOFO_EXTENDED_ID = 67;
386         /**
387          */
388         public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68;
389         /**
390          */
391         public static final int CJK_COMPATIBILITY_ID = 69;
392         /**
393          */
394         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70;
395         /**
396          */
397         public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71;
398         /**
399          */
400         public static final int YI_SYLLABLES_ID = 72;
401         /**
402          */
403         public static final int YI_RADICALS_ID = 73;
404         /**
405          */
406         public static final int HANGUL_SYLLABLES_ID = 74;
407         /**
408          */
409         public static final int HIGH_SURROGATES_ID = 75;
410         /**
411          */
412         public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76;
413         /**
414          */
415         public static final int LOW_SURROGATES_ID = 77;
416         /**
417          * Same as public static final int PRIVATE_USE.
418          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
419          * and multiple code point ranges had this block.
420          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
421          * and adds separate blocks for the supplementary PUAs.
422          */
423         public static final int PRIVATE_USE_AREA_ID = 78;
424         /**
425          * Same as public static final int PRIVATE_USE_AREA.
426          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
427          * and multiple code point ranges had this block.
428          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
429          * and adds separate blocks for the supplementary PUAs.
430          */
431         public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID;
432         /**
433          */
434         public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79;
435         /**
436          */
437         public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80;
438         /**
439          */
440         public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81;
441         /**
442          */
443         public static final int COMBINING_HALF_MARKS_ID = 82;
444         /**
445          */
446         public static final int CJK_COMPATIBILITY_FORMS_ID = 83;
447         /**
448          */
449         public static final int SMALL_FORM_VARIANTS_ID = 84;
450         /**
451          */
452         public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85;
453         /**
454          */
455         public static final int SPECIALS_ID = 86;
456         /**
457          */
458         public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87;
459         /**
460          */
461         public static final int OLD_ITALIC_ID = 88;
462         /**
463          */
464         public static final int GOTHIC_ID = 89;
465         /**
466          */
467         public static final int DESERET_ID = 90;
468         /**
469          */
470         public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91;
471         /**
472          */
473         public static final int MUSICAL_SYMBOLS_ID = 92;
474         /**
475          */
476         public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93;
477         /**
478          */
479         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94;
480         /**
481          */
482         public static final int
483         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95;
484         /**
485          */
486         public static final int TAGS_ID = 96;
487 
488         // New blocks in Unicode 3.2
489 
490         /**
491          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
492          */
493         public static final int CYRILLIC_SUPPLEMENTARY_ID = 97;
494         /**
495          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
496          */
497 
498         public static final int CYRILLIC_SUPPLEMENT_ID = 97;
499         /**
500          */
501         public static final int TAGALOG_ID = 98;
502         /**
503          */
504         public static final int HANUNOO_ID = 99;
505         /**
506          */
507         public static final int BUHID_ID = 100;
508         /**
509          */
510         public static final int TAGBANWA_ID = 101;
511         /**
512          */
513         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102;
514         /**
515          */
516         public static final int SUPPLEMENTAL_ARROWS_A_ID = 103;
517         /**
518          */
519         public static final int SUPPLEMENTAL_ARROWS_B_ID = 104;
520         /**
521          */
522         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105;
523         /**
524          */
525         public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106;
526         /**
527          */
528         public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107;
529         /**
530          */
531         public static final int VARIATION_SELECTORS_ID = 108;
532         /**
533          */
534         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109;
535         /**
536          */
537         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110;
538 
539         /**
540          */
541         public static final int LIMBU_ID = 111; /*[1900]*/
542         /**
543          */
544         public static final int TAI_LE_ID = 112; /*[1950]*/
545         /**
546          */
547         public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/
548         /**
549          */
550         public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/
551         /**
552          */
553         public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/
554         /**
555          */
556         public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/
557         /**
558          */
559         public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/
560         /**
561          */
562         public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/
563         /**
564          */
565         public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/
566         /**
567          */
568         public static final int UGARITIC_ID = 120; /*[10380]*/
569         /**
570          */
571         public static final int SHAVIAN_ID = 121; /*[10450]*/
572         /**
573          */
574         public static final int OSMANYA_ID = 122; /*[10480]*/
575         /**
576          */
577         public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/
578         /**
579          */
580         public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/
581         /**
582          */
583         public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/
584 
585         /* New blocks in Unicode 4.1 */
586 
587         /**
588          */
589         public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/
590 
591         /**
592          */
593         public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/
594 
595         /**
596          */
597         public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/
598 
599         /**
600          */
601         public static final int BUGINESE_ID = 129; /*[1A00]*/
602 
603         /**
604          */
605         public static final int CJK_STROKES_ID = 130; /*[31C0]*/
606 
607         /**
608          */
609         public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/
610 
611         /**
612          */
613         public static final int COPTIC_ID = 132; /*[2C80]*/
614 
615         /**
616          */
617         public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/
618 
619         /**
620          */
621         public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/
622 
623         /**
624          */
625         public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/
626 
627         /**
628          */
629         public static final int GLAGOLITIC_ID = 136; /*[2C00]*/
630 
631         /**
632          */
633         public static final int KHAROSHTHI_ID = 137; /*[10A00]*/
634 
635         /**
636          */
637         public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/
638 
639         /**
640          */
641         public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/
642 
643         /**
644          */
645         public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/
646 
647         /**
648          */
649         public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/
650 
651         /**
652          */
653         public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/
654 
655         /**
656          */
657         public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/
658 
659         /**
660          */
661         public static final int TIFINAGH_ID = 144; /*[2D30]*/
662 
663         /**
664          */
665         public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/
666 
667         /* New blocks in Unicode 5.0 */
668 
669         /**
670          */
671         public static final int NKO_ID = 146; /*[07C0]*/
672         /**
673          */
674         public static final int BALINESE_ID = 147; /*[1B00]*/
675         /**
676          */
677         public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/
678         /**
679          */
680         public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/
681         /**
682          */
683         public static final int PHAGS_PA_ID = 150; /*[A840]*/
684         /**
685          */
686         public static final int PHOENICIAN_ID = 151; /*[10900]*/
687         /**
688          */
689         public static final int CUNEIFORM_ID = 152; /*[12000]*/
690         /**
691          */
692         public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/
693         /**
694          */
695         public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/
696 
697         /**
698          */
699         public static final int SUNDANESE_ID = 155; /* [1B80] */
700 
701         /**
702          */
703         public static final int LEPCHA_ID = 156; /* [1C00] */
704 
705         /**
706          */
707         public static final int OL_CHIKI_ID = 157; /* [1C50] */
708 
709         /**
710          */
711         public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */
712 
713         /**
714          */
715         public static final int VAI_ID = 159; /* [A500] */
716 
717         /**
718          */
719         public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */
720 
721         /**
722          */
723         public static final int SAURASHTRA_ID = 161; /* [A880] */
724 
725         /**
726          */
727         public static final int KAYAH_LI_ID = 162; /* [A900] */
728 
729         /**
730          */
731         public static final int REJANG_ID = 163; /* [A930] */
732 
733         /**
734          */
735         public static final int CHAM_ID = 164; /* [AA00] */
736 
737         /**
738          */
739         public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */
740 
741         /**
742          */
743         public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */
744 
745         /**
746          */
747         public static final int LYCIAN_ID = 167; /* [10280] */
748 
749         /**
750          */
751         public static final int CARIAN_ID = 168; /* [102A0] */
752 
753         /**
754          */
755         public static final int LYDIAN_ID = 169; /* [10920] */
756 
757         /**
758          */
759         public static final int MAHJONG_TILES_ID = 170; /* [1F000] */
760 
761         /**
762          */
763         public static final int DOMINO_TILES_ID = 171; /* [1F030] */
764 
765         /* New blocks in Unicode 5.2 */
766 
767         /***/
768         public static final int SAMARITAN_ID = 172; /*[0800]*/
769         /***/
770         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/
771         /***/
772         public static final int TAI_THAM_ID = 174; /*[1A20]*/
773         /***/
774         public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/
775         /***/
776         public static final int LISU_ID = 176; /*[A4D0]*/
777         /***/
778         public static final int BAMUM_ID = 177; /*[A6A0]*/
779         /***/
780         public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/
781         /***/
782         public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/
783         /***/
784         public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/
785         /***/
786         public static final int JAVANESE_ID = 181; /*[A980]*/
787         /***/
788         public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/
789         /***/
790         public static final int TAI_VIET_ID = 183; /*[AA80]*/
791         /***/
792         public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/
793         /***/
794         public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/
795         /***/
796         public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/
797         /***/
798         public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/
799         /***/
800         public static final int AVESTAN_ID = 188; /*[10B00]*/
801         /***/
802         public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/
803         /***/
804         public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/
805         /***/
806         public static final int OLD_TURKIC_ID = 191; /*[10C00]*/
807         /***/
808         public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/
809         /***/
810         public static final int KAITHI_ID = 193; /*[11080]*/
811         /***/
812         public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/
813         /***/
814         public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/
815         /***/
816         public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/
817         /***/
818         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/
819 
820         /* New blocks in Unicode 6.0 */
821 
822         /***/
823         public static final int MANDAIC_ID = 198; /*[0840]*/
824         /***/
825         public static final int BATAK_ID = 199; /*[1BC0]*/
826         /***/
827         public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/
828         /***/
829         public static final int BRAHMI_ID = 201; /*[11000]*/
830         /***/
831         public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/
832         /***/
833         public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/
834         /***/
835         public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/
836         /***/
837         public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/
838         /***/
839         public static final int EMOTICONS_ID = 206; /*[1F600]*/
840         /***/
841         public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/
842         /***/
843         public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/
844         /***/
845         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/
846 
847         /* New blocks in Unicode 6.1 */
848 
849         /***/
850         public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/
851         /***/
852         public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/
853         /***/
854         public static final int CHAKMA_ID = 212; /*[11100]*/
855         /***/
856         public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/
857         /***/
858         public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/
859         /***/
860         public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/
861         /***/
862         public static final int MIAO_ID = 216; /*[16F00]*/
863         /***/
864         public static final int SHARADA_ID = 217; /*[11180]*/
865         /***/
866         public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/
867         /***/
868         public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/
869         /***/
870         public static final int TAKRI_ID = 220; /*[11680]*/
871 
872         /* New blocks in Unicode 7.0 */
873 
874         /***/
875         public static final int BASSA_VAH_ID = 221; /*[16AD0]*/
876         /***/
877         public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/
878         /***/
879         public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/
880         /***/
881         public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/
882         /***/
883         public static final int DUPLOYAN_ID = 225; /*[1BC00]*/
884         /***/
885         public static final int ELBASAN_ID = 226; /*[10500]*/
886         /***/
887         public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/
888         /***/
889         public static final int GRANTHA_ID = 228; /*[11300]*/
890         /***/
891         public static final int KHOJKI_ID = 229; /*[11200]*/
892         /***/
893         public static final int KHUDAWADI_ID = 230; /*[112B0]*/
894         /***/
895         public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/
896         /***/
897         public static final int LINEAR_A_ID = 232; /*[10600]*/
898         /***/
899         public static final int MAHAJANI_ID = 233; /*[11150]*/
900         /***/
901         public static final int MANICHAEAN_ID = 234; /*[10AC0]*/
902         /***/
903         public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/
904         /***/
905         public static final int MODI_ID = 236; /*[11600]*/
906         /***/
907         public static final int MRO_ID = 237; /*[16A40]*/
908         /***/
909         public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/
910         /***/
911         public static final int NABATAEAN_ID = 239; /*[10880]*/
912         /***/
913         public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/
914         /***/
915         public static final int OLD_PERMIC_ID = 241; /*[10350]*/
916         /***/
917         public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/
918         /***/
919         public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/
920         /***/
921         public static final int PALMYRENE_ID = 244; /*[10860]*/
922         /***/
923         public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/
924         /***/
925         public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/
926         /***/
927         public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/
928         /***/
929         public static final int SIDDHAM_ID = 248; /*[11580]*/
930         /***/
931         public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/
932         /***/
933         public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/
934         /***/
935         public static final int TIRHUTA_ID = 251; /*[11480]*/
936         /***/
937         public static final int WARANG_CITI_ID = 252; /*[118A0]*/
938 
939         /* New blocks in Unicode 8.0 */
940 
941         /***/
942         public static final int AHOM_ID = 253; /*[11700]*/
943         /***/
944         public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/
945         /***/
946         public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/
947         /***/
948         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/
949         /***/
950         public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/
951         /***/
952         public static final int HATRAN_ID = 258; /*[108E0]*/
953         /***/
954         public static final int MULTANI_ID = 259; /*[11280]*/
955         /***/
956         public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/
957         /***/
958         public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/
959         /***/
960         public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/
961 
962         /* New blocks in Unicode 9.0 */
963 
964         /***/
965         public static final int ADLAM_ID = 263; /*[1E900]*/
966         /***/
967         public static final int BHAIKSUKI_ID = 264; /*[11C00]*/
968         /***/
969         public static final int CYRILLIC_EXTENDED_C_ID = 265; /*[1C80]*/
970         /***/
971         public static final int GLAGOLITIC_SUPPLEMENT_ID = 266; /*[1E000]*/
972         /***/
973         public static final int IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID = 267; /*[16FE0]*/
974         /***/
975         public static final int MARCHEN_ID = 268; /*[11C70]*/
976         /***/
977         public static final int MONGOLIAN_SUPPLEMENT_ID = 269; /*[11660]*/
978         /***/
979         public static final int NEWA_ID = 270; /*[11400]*/
980         /***/
981         public static final int OSAGE_ID = 271; /*[104B0]*/
982         /***/
983         public static final int TANGUT_ID = 272; /*[17000]*/
984         /***/
985         public static final int TANGUT_COMPONENTS_ID = 273; /*[18800]*/
986 
987         // New blocks in Unicode 10.0
988 
989         /***/
990         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID = 274; /*[2CEB0]*/
991         /***/
992         public static final int KANA_EXTENDED_A_ID = 275; /*[1B100]*/
993         /***/
994         public static final int MASARAM_GONDI_ID = 276; /*[11D00]*/
995         /***/
996         public static final int NUSHU_ID = 277; /*[1B170]*/
997         /***/
998         public static final int SOYOMBO_ID = 278; /*[11A50]*/
999         /***/
1000         public static final int SYRIAC_SUPPLEMENT_ID = 279; /*[0860]*/
1001         /***/
1002         public static final int ZANABAZAR_SQUARE_ID = 280; /*[11A00]*/
1003 
1004         // New blocks in Unicode 11.0
1005 
1006         /***/
1007         public static final int CHESS_SYMBOLS_ID = 281; /*[1FA00]*/
1008         /***/
1009         public static final int DOGRA_ID = 282; /*[11800]*/
1010         /***/
1011         public static final int GEORGIAN_EXTENDED_ID = 283; /*[1C90]*/
1012         /***/
1013         public static final int GUNJALA_GONDI_ID = 284; /*[11D60]*/
1014         /***/
1015         public static final int HANIFI_ROHINGYA_ID = 285; /*[10D00]*/
1016         /***/
1017         public static final int INDIC_SIYAQ_NUMBERS_ID = 286; /*[1EC70]*/
1018         /***/
1019         public static final int MAKASAR_ID = 287; /*[11EE0]*/
1020         /***/
1021         public static final int MAYAN_NUMERALS_ID = 288; /*[1D2E0]*/
1022         /***/
1023         public static final int MEDEFAIDRIN_ID = 289; /*[16E40]*/
1024         /***/
1025         public static final int OLD_SOGDIAN_ID = 290; /*[10F00]*/
1026         /***/
1027         public static final int SOGDIAN_ID = 291; /*[10F30]*/
1028 
1029         // New blocks in Unicode 12.0
1030 
1031         /***/
1032         public static final int EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID = 292; /*[13430]*/
1033         /***/
1034         public static final int ELYMAIC_ID = 293; /*[10FE0]*/
1035         /***/
1036         public static final int NANDINAGARI_ID = 294; /*[119A0]*/
1037         /***/
1038         public static final int NYIAKENG_PUACHUE_HMONG_ID = 295; /*[1E100]*/
1039         /***/
1040         public static final int OTTOMAN_SIYAQ_NUMBERS_ID = 296; /*[1ED00]*/
1041         /***/
1042         public static final int SMALL_KANA_EXTENSION_ID = 297; /*[1B130]*/
1043         /***/
1044         public static final int SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID = 298; /*[1FA70]*/
1045         /***/
1046         public static final int TAMIL_SUPPLEMENT_ID = 299; /*[11FC0]*/
1047         /***/
1048         public static final int WANCHO_ID = 300; /*[1E2C0]*/
1049 
1050         // New blocks in Unicode 13.0
1051 
1052         /***/
1053         public static final int CHORASMIAN_ID = 301; /*[10FB0]*/
1054         /***/
1055         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID = 302; /*[30000]*/
1056         /***/
1057         public static final int DIVES_AKURU_ID = 303; /*[11900]*/
1058         /***/
1059         public static final int KHITAN_SMALL_SCRIPT_ID = 304; /*[18B00]*/
1060         /***/
1061         public static final int LISU_SUPPLEMENT_ID = 305; /*[11FB0]*/
1062         /***/
1063         public static final int SYMBOLS_FOR_LEGACY_COMPUTING_ID = 306; /*[1FB00]*/
1064         /***/
1065         public static final int TANGUT_SUPPLEMENT_ID = 307; /*[18D00]*/
1066         /***/
1067         public static final int YEZIDI_ID = 308; /*[10E80]*/
1068 
1069         /**
1070          * One more than the highest normal UnicodeBlock value.
1071          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BLOCK).
1072          *
1073          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
1074          * @hide unsupported on OHOS
1075          */
1076         @Deprecated
1077         public static final int COUNT = 309;
1078 
1079         // blocks objects ---------------------------------------------------
1080 
1081         /**
1082          * Array of UnicodeBlocks, for easy access in getInstance(int)
1083          */
1084         private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT];
1085 
1086         /**
1087          */
1088         public static final UnicodeBlock NO_BLOCK
1089         = new UnicodeBlock("NO_BLOCK", 0);
1090 
1091         /**
1092          */
1093         public static final UnicodeBlock BASIC_LATIN
1094         = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID);
1095         /**
1096          */
1097         public static final UnicodeBlock LATIN_1_SUPPLEMENT
1098         = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID);
1099         /**
1100          */
1101         public static final UnicodeBlock LATIN_EXTENDED_A
1102         = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID);
1103         /**
1104          */
1105         public static final UnicodeBlock LATIN_EXTENDED_B
1106         = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID);
1107         /**
1108          */
1109         public static final UnicodeBlock IPA_EXTENSIONS
1110         = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID);
1111         /**
1112          */
1113         public static final UnicodeBlock SPACING_MODIFIER_LETTERS
1114         = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID);
1115         /**
1116          */
1117         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
1118         = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID);
1119         /**
1120          * Unicode 3.2 renames this block to "Greek and Coptic".
1121          */
1122         public static final UnicodeBlock GREEK
1123         = new UnicodeBlock("GREEK", GREEK_ID);
1124         /**
1125          */
1126         public static final UnicodeBlock CYRILLIC
1127         = new UnicodeBlock("CYRILLIC", CYRILLIC_ID);
1128         /**
1129          */
1130         public static final UnicodeBlock ARMENIAN
1131         = new UnicodeBlock("ARMENIAN", ARMENIAN_ID);
1132         /**
1133          */
1134         public static final UnicodeBlock HEBREW
1135         = new UnicodeBlock("HEBREW", HEBREW_ID);
1136         /**
1137          */
1138         public static final UnicodeBlock ARABIC
1139         = new UnicodeBlock("ARABIC", ARABIC_ID);
1140         /**
1141          */
1142         public static final UnicodeBlock SYRIAC
1143         = new UnicodeBlock("SYRIAC", SYRIAC_ID);
1144         /**
1145          */
1146         public static final UnicodeBlock THAANA
1147         = new UnicodeBlock("THAANA", THAANA_ID);
1148         /**
1149          */
1150         public static final UnicodeBlock DEVANAGARI
1151         = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID);
1152         /**
1153          */
1154         public static final UnicodeBlock BENGALI
1155         = new UnicodeBlock("BENGALI", BENGALI_ID);
1156         /**
1157          */
1158         public static final UnicodeBlock GURMUKHI
1159         = new UnicodeBlock("GURMUKHI", GURMUKHI_ID);
1160         /**
1161          */
1162         public static final UnicodeBlock GUJARATI
1163         = new UnicodeBlock("GUJARATI", GUJARATI_ID);
1164         /**
1165          */
1166         public static final UnicodeBlock ORIYA
1167         = new UnicodeBlock("ORIYA", ORIYA_ID);
1168         /**
1169          */
1170         public static final UnicodeBlock TAMIL
1171         = new UnicodeBlock("TAMIL", TAMIL_ID);
1172         /**
1173          */
1174         public static final UnicodeBlock TELUGU
1175         = new UnicodeBlock("TELUGU", TELUGU_ID);
1176         /**
1177          */
1178         public static final UnicodeBlock KANNADA
1179         = new UnicodeBlock("KANNADA", KANNADA_ID);
1180         /**
1181          */
1182         public static final UnicodeBlock MALAYALAM
1183         = new UnicodeBlock("MALAYALAM", MALAYALAM_ID);
1184         /**
1185          */
1186         public static final UnicodeBlock SINHALA
1187         = new UnicodeBlock("SINHALA", SINHALA_ID);
1188         /**
1189          */
1190         public static final UnicodeBlock THAI
1191         = new UnicodeBlock("THAI", THAI_ID);
1192         /**
1193          */
1194         public static final UnicodeBlock LAO
1195         = new UnicodeBlock("LAO", LAO_ID);
1196         /**
1197          */
1198         public static final UnicodeBlock TIBETAN
1199         = new UnicodeBlock("TIBETAN", TIBETAN_ID);
1200         /**
1201          */
1202         public static final UnicodeBlock MYANMAR
1203         = new UnicodeBlock("MYANMAR", MYANMAR_ID);
1204         /**
1205          */
1206         public static final UnicodeBlock GEORGIAN
1207         = new UnicodeBlock("GEORGIAN", GEORGIAN_ID);
1208         /**
1209          */
1210         public static final UnicodeBlock HANGUL_JAMO
1211         = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID);
1212         /**
1213          */
1214         public static final UnicodeBlock ETHIOPIC
1215         = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID);
1216         /**
1217          */
1218         public static final UnicodeBlock CHEROKEE
1219         = new UnicodeBlock("CHEROKEE", CHEROKEE_ID);
1220         /**
1221          */
1222         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
1223         = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1224                 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID);
1225         /**
1226          */
1227         public static final UnicodeBlock OGHAM
1228         = new UnicodeBlock("OGHAM", OGHAM_ID);
1229         /**
1230          */
1231         public static final UnicodeBlock RUNIC
1232         = new UnicodeBlock("RUNIC", RUNIC_ID);
1233         /**
1234          */
1235         public static final UnicodeBlock KHMER
1236         = new UnicodeBlock("KHMER", KHMER_ID);
1237         /**
1238          */
1239         public static final UnicodeBlock MONGOLIAN
1240         = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID);
1241         /**
1242          */
1243         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
1244         = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID);
1245         /**
1246          */
1247         public static final UnicodeBlock GREEK_EXTENDED
1248         = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID);
1249         /**
1250          */
1251         public static final UnicodeBlock GENERAL_PUNCTUATION
1252         = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID);
1253         /**
1254          */
1255         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
1256         = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID);
1257         /**
1258          */
1259         public static final UnicodeBlock CURRENCY_SYMBOLS
1260         = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID);
1261         /**
1262          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
1263          * Symbols".
1264          */
1265         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
1266         = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID);
1267         /**
1268          */
1269         public static final UnicodeBlock LETTERLIKE_SYMBOLS
1270         = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID);
1271         /**
1272          */
1273         public static final UnicodeBlock NUMBER_FORMS
1274         = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID);
1275         /**
1276          */
1277         public static final UnicodeBlock ARROWS
1278         = new UnicodeBlock("ARROWS", ARROWS_ID);
1279         /**
1280          */
1281         public static final UnicodeBlock MATHEMATICAL_OPERATORS
1282         = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID);
1283         /**
1284          */
1285         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
1286         = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID);
1287         /**
1288          */
1289         public static final UnicodeBlock CONTROL_PICTURES
1290         = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID);
1291         /**
1292          */
1293         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
1294         = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID);
1295         /**
1296          */
1297         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
1298         = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID);
1299         /**
1300          */
1301         public static final UnicodeBlock BOX_DRAWING
1302         = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID);
1303         /**
1304          */
1305         public static final UnicodeBlock BLOCK_ELEMENTS
1306         = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID);
1307         /**
1308          */
1309         public static final UnicodeBlock GEOMETRIC_SHAPES
1310         = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID);
1311         /**
1312          */
1313         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
1314         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID);
1315         /**
1316          */
1317         public static final UnicodeBlock DINGBATS
1318         = new UnicodeBlock("DINGBATS", DINGBATS_ID);
1319         /**
1320          */
1321         public static final UnicodeBlock BRAILLE_PATTERNS
1322         = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID);
1323         /**
1324          */
1325         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
1326         = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID);
1327         /**
1328          */
1329         public static final UnicodeBlock KANGXI_RADICALS
1330         = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID);
1331         /**
1332          */
1333         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1334         = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1335                 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID);
1336         /**
1337          */
1338         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
1339         = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID);
1340         /**
1341          */
1342         public static final UnicodeBlock HIRAGANA
1343         = new UnicodeBlock("HIRAGANA", HIRAGANA_ID);
1344         /**
1345          */
1346         public static final UnicodeBlock KATAKANA
1347         = new UnicodeBlock("KATAKANA", KATAKANA_ID);
1348         /**
1349          */
1350         public static final UnicodeBlock BOPOMOFO
1351         = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID);
1352         /**
1353          */
1354         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
1355         = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID);
1356         /**
1357          */
1358         public static final UnicodeBlock KANBUN
1359         = new UnicodeBlock("KANBUN", KANBUN_ID);
1360         /**
1361          */
1362         public static final UnicodeBlock BOPOMOFO_EXTENDED
1363         = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID);
1364         /**
1365          */
1366         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
1367         = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1368                 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID);
1369         /**
1370          */
1371         public static final UnicodeBlock CJK_COMPATIBILITY
1372         = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID);
1373         /**
1374          */
1375         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1376         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1377                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID);
1378         /**
1379          */
1380         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
1381         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID);
1382         /**
1383          */
1384         public static final UnicodeBlock YI_SYLLABLES
1385         = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID);
1386         /**
1387          */
1388         public static final UnicodeBlock YI_RADICALS
1389         = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID);
1390         /**
1391          */
1392         public static final UnicodeBlock HANGUL_SYLLABLES
1393         = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID);
1394         /**
1395          */
1396         public static final UnicodeBlock HIGH_SURROGATES
1397         = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID);
1398         /**
1399          */
1400         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
1401         = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID);
1402         /**
1403          */
1404         public static final UnicodeBlock LOW_SURROGATES
1405         = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID);
1406         /**
1407          * Same as public static final int PRIVATE_USE.
1408          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1409          * and multiple code point ranges had this block.
1410          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1411          * and adds separate blocks for the supplementary PUAs.
1412          */
1413         public static final UnicodeBlock PRIVATE_USE_AREA
1414         = new UnicodeBlock("PRIVATE_USE_AREA",  78);
1415         /**
1416          * Same as public static final int PRIVATE_USE_AREA.
1417          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1418          * and multiple code point ranges had this block.
1419          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1420          * and adds separate blocks for the supplementary PUAs.
1421          */
1422         public static final UnicodeBlock PRIVATE_USE
1423         = PRIVATE_USE_AREA;
1424         /**
1425          */
1426         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
1427         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID);
1428         /**
1429          */
1430         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
1431         = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID);
1432         /**
1433          */
1434         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
1435         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID);
1436         /**
1437          */
1438         public static final UnicodeBlock COMBINING_HALF_MARKS
1439         = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID);
1440         /**
1441          */
1442         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
1443         = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID);
1444         /**
1445          */
1446         public static final UnicodeBlock SMALL_FORM_VARIANTS
1447         = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID);
1448         /**
1449          */
1450         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
1451         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID);
1452         /**
1453          */
1454         public static final UnicodeBlock SPECIALS
1455         = new UnicodeBlock("SPECIALS", SPECIALS_ID);
1456         /**
1457          */
1458         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
1459         = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID);
1460         /**
1461          */
1462         public static final UnicodeBlock OLD_ITALIC
1463         = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID);
1464         /**
1465          */
1466         public static final UnicodeBlock GOTHIC
1467         = new UnicodeBlock("GOTHIC", GOTHIC_ID);
1468         /**
1469          */
1470         public static final UnicodeBlock DESERET
1471         = new UnicodeBlock("DESERET", DESERET_ID);
1472         /**
1473          */
1474         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
1475         = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID);
1476         /**
1477          */
1478         public static final UnicodeBlock MUSICAL_SYMBOLS
1479         = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID);
1480         /**
1481          */
1482         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
1483         = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1484                 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID);
1485         /**
1486          */
1487         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1488         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1489                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID);
1490         /**
1491          */
1492         public static final UnicodeBlock
1493         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
1494         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1495                 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID);
1496         /**
1497          */
1498         public static final UnicodeBlock TAGS
1499         = new UnicodeBlock("TAGS", TAGS_ID);
1500 
1501         // New blocks in Unicode 3.2
1502 
1503         /**
1504          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1505          */
1506         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
1507         = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID);
1508         /**
1509          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1510          */
1511         public static final UnicodeBlock CYRILLIC_SUPPLEMENT
1512         = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID);
1513         /**
1514          */
1515         public static final UnicodeBlock TAGALOG
1516         = new UnicodeBlock("TAGALOG", TAGALOG_ID);
1517         /**
1518          */
1519         public static final UnicodeBlock HANUNOO
1520         = new UnicodeBlock("HANUNOO", HANUNOO_ID);
1521         /**
1522          */
1523         public static final UnicodeBlock BUHID
1524         = new UnicodeBlock("BUHID", BUHID_ID);
1525         /**
1526          */
1527         public static final UnicodeBlock TAGBANWA
1528         = new UnicodeBlock("TAGBANWA", TAGBANWA_ID);
1529         /**
1530          */
1531         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
1532         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1533                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID);
1534         /**
1535          */
1536         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
1537         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID);
1538         /**
1539          */
1540         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
1541         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID);
1542         /**
1543          */
1544         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
1545         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1546                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID);
1547         /**
1548          */
1549         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
1550         = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1551                 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID);
1552         /**
1553          */
1554         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
1555         = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID);
1556         /**
1557          */
1558         public static final UnicodeBlock VARIATION_SELECTORS
1559         = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID);
1560         /**
1561          */
1562         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
1563         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1564                 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID);
1565         /**
1566          */
1567         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
1568         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1569                 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID);
1570 
1571         /**
1572          */
1573         public static final UnicodeBlock LIMBU
1574         = new UnicodeBlock("LIMBU", LIMBU_ID);
1575         /**
1576          */
1577         public static final UnicodeBlock TAI_LE
1578         = new UnicodeBlock("TAI_LE", TAI_LE_ID);
1579         /**
1580          */
1581         public static final UnicodeBlock KHMER_SYMBOLS
1582         = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID);
1583 
1584         /**
1585          */
1586         public static final UnicodeBlock PHONETIC_EXTENSIONS
1587         = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID);
1588 
1589         /**
1590          */
1591         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
1592         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1593                 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID);
1594         /**
1595          */
1596         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
1597         = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID);
1598         /**
1599          */
1600         public static final UnicodeBlock LINEAR_B_SYLLABARY
1601         = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID);
1602         /**
1603          */
1604         public static final UnicodeBlock LINEAR_B_IDEOGRAMS
1605         = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID);
1606         /**
1607          */
1608         public static final UnicodeBlock AEGEAN_NUMBERS
1609         = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID);
1610         /**
1611          */
1612         public static final UnicodeBlock UGARITIC
1613         = new UnicodeBlock("UGARITIC", UGARITIC_ID);
1614         /**
1615          */
1616         public static final UnicodeBlock SHAVIAN
1617         = new UnicodeBlock("SHAVIAN", SHAVIAN_ID);
1618         /**
1619          */
1620         public static final UnicodeBlock OSMANYA
1621         = new UnicodeBlock("OSMANYA", OSMANYA_ID);
1622         /**
1623          */
1624         public static final UnicodeBlock CYPRIOT_SYLLABARY
1625         = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID);
1626         /**
1627          */
1628         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
1629         = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID);
1630 
1631         /**
1632          */
1633         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
1634         = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID);
1635 
1636         /* New blocks in Unicode 4.1 */
1637 
1638         /**
1639          */
1640         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
1641                 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
1642                         ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/
1643 
1644         /**
1645          */
1646         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
1647                 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/
1648 
1649         /**
1650          */
1651         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1652                 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/
1653 
1654         /**
1655          */
1656         public static final UnicodeBlock BUGINESE =
1657                 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/
1658 
1659         /**
1660          */
1661         public static final UnicodeBlock CJK_STROKES =
1662                 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/
1663 
1664         /**
1665          */
1666         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1667                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1668                         COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/
1669 
1670         /**
1671          */
1672         public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/
1673 
1674         /**
1675          */
1676         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1677                 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/
1678 
1679         /**
1680          */
1681         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1682                 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/
1683 
1684         /**
1685          */
1686         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1687                 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/
1688 
1689         /**
1690          */
1691         public static final UnicodeBlock GLAGOLITIC =
1692                 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/
1693 
1694         /**
1695          */
1696         public static final UnicodeBlock KHAROSHTHI =
1697                 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/
1698 
1699         /**
1700          */
1701         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
1702                 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/
1703 
1704         /**
1705          */
1706         public static final UnicodeBlock NEW_TAI_LUE =
1707                 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/
1708 
1709         /**
1710          */
1711         public static final UnicodeBlock OLD_PERSIAN =
1712                 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/
1713 
1714         /**
1715          */
1716         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1717                 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1718                         PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/
1719 
1720         /**
1721          */
1722         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
1723                 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/
1724 
1725         /**
1726          */
1727         public static final UnicodeBlock SYLOTI_NAGRI =
1728                 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/
1729 
1730         /**
1731          */
1732         public static final UnicodeBlock TIFINAGH =
1733                 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/
1734 
1735         /**
1736          */
1737         public static final UnicodeBlock VERTICAL_FORMS =
1738                 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/
1739 
1740         /**
1741          */
1742         public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/
1743         /**
1744          */
1745         public static final UnicodeBlock BALINESE =
1746                 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/
1747         /**
1748          */
1749         public static final UnicodeBlock LATIN_EXTENDED_C =
1750                 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/
1751         /**
1752          */
1753         public static final UnicodeBlock LATIN_EXTENDED_D =
1754                 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/
1755         /**
1756          */
1757         public static final UnicodeBlock PHAGS_PA =
1758                 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/
1759         /**
1760          */
1761         public static final UnicodeBlock PHOENICIAN =
1762                 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/
1763         /**
1764          */
1765         public static final UnicodeBlock CUNEIFORM =
1766                 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/
1767         /**
1768          */
1769         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
1770                 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
1771                         CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/
1772         /**
1773          */
1774         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
1775                 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/
1776 
1777         /**
1778          */
1779         public static final UnicodeBlock SUNDANESE =
1780                 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */
1781 
1782         /**
1783          */
1784         public static final UnicodeBlock LEPCHA =
1785                 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */
1786 
1787         /**
1788          */
1789         public static final UnicodeBlock OL_CHIKI =
1790                 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */
1791 
1792         /**
1793          */
1794         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
1795                 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */
1796 
1797         /**
1798          */
1799         public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */
1800 
1801         /**
1802          */
1803         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
1804                 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */
1805 
1806         /**
1807          */
1808         public static final UnicodeBlock SAURASHTRA =
1809                 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */
1810 
1811         /**
1812          */
1813         public static final UnicodeBlock KAYAH_LI =
1814                 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */
1815 
1816         /**
1817          */
1818         public static final UnicodeBlock REJANG =
1819                 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */
1820 
1821         /**
1822          */
1823         public static final UnicodeBlock CHAM =
1824                 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */
1825 
1826         /**
1827          */
1828         public static final UnicodeBlock ANCIENT_SYMBOLS =
1829                 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */
1830 
1831         /**
1832          */
1833         public static final UnicodeBlock PHAISTOS_DISC =
1834                 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */
1835 
1836         /**
1837          */
1838         public static final UnicodeBlock LYCIAN =
1839                 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */
1840 
1841         /**
1842          */
1843         public static final UnicodeBlock CARIAN =
1844                 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */
1845 
1846         /**
1847          */
1848         public static final UnicodeBlock LYDIAN =
1849                 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */
1850 
1851         /**
1852          */
1853         public static final UnicodeBlock MAHJONG_TILES =
1854                 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */
1855 
1856         /**
1857          */
1858         public static final UnicodeBlock DOMINO_TILES =
1859                 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */
1860 
1861         /* New blocks in Unicode 5.2 */
1862 
1863         /***/
1864         public static final UnicodeBlock SAMARITAN =
1865                 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/
1866         /***/
1867         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
1868                 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
1869                         UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/
1870         /***/
1871         public static final UnicodeBlock TAI_THAM =
1872                 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/
1873         /***/
1874         public static final UnicodeBlock VEDIC_EXTENSIONS =
1875                 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/
1876         /***/
1877         public static final UnicodeBlock LISU =
1878                 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/
1879         /***/
1880         public static final UnicodeBlock BAMUM =
1881                 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/
1882         /***/
1883         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
1884                 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/
1885         /***/
1886         public static final UnicodeBlock DEVANAGARI_EXTENDED =
1887                 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/
1888         /***/
1889         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
1890                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/
1891         /***/
1892         public static final UnicodeBlock JAVANESE =
1893                 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/
1894         /***/
1895         public static final UnicodeBlock MYANMAR_EXTENDED_A =
1896                 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/
1897         /***/
1898         public static final UnicodeBlock TAI_VIET =
1899                 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/
1900         /***/
1901         public static final UnicodeBlock MEETEI_MAYEK =
1902                 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/
1903         /***/
1904         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
1905                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/
1906         /***/
1907         public static final UnicodeBlock IMPERIAL_ARAMAIC =
1908                 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/
1909         /***/
1910         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
1911                 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/
1912         /***/
1913         public static final UnicodeBlock AVESTAN =
1914                 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/
1915         /***/
1916         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
1917                 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/
1918         /***/
1919         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
1920                 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/
1921         /***/
1922         public static final UnicodeBlock OLD_TURKIC =
1923                 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/
1924         /***/
1925         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
1926                 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/
1927         /***/
1928         public static final UnicodeBlock KAITHI =
1929                 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/
1930         /***/
1931         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
1932                 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/
1933         /***/
1934         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
1935                 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
1936                         ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/
1937         /***/
1938         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
1939                 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
1940                         ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/
1941         /***/
1942         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
1943                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
1944                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/
1945 
1946         /* New blocks in Unicode 6.0 */
1947 
1948         /***/
1949         public static final UnicodeBlock MANDAIC =
1950                 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/
1951         /***/
1952         public static final UnicodeBlock BATAK =
1953                 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/
1954         /***/
1955         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
1956                 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/
1957         /***/
1958         public static final UnicodeBlock BRAHMI =
1959                 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/
1960         /***/
1961         public static final UnicodeBlock BAMUM_SUPPLEMENT =
1962                 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/
1963         /***/
1964         public static final UnicodeBlock KANA_SUPPLEMENT =
1965                 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/
1966         /***/
1967         public static final UnicodeBlock PLAYING_CARDS =
1968                 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/
1969         /***/
1970         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
1971                 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
1972                         MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/
1973         /***/
1974         public static final UnicodeBlock EMOTICONS =
1975                 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/
1976         /***/
1977         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
1978                 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/
1979         /***/
1980         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
1981                 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/
1982         /***/
1983         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
1984                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
1985                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/
1986 
1987         /* New blocks in Unicode 6.1 */
1988 
1989         /***/
1990         public static final UnicodeBlock ARABIC_EXTENDED_A =
1991                 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/
1992         /***/
1993         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
1994                 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/
1995         /***/
1996         public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/
1997         /***/
1998         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
1999                 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/
2000         /***/
2001         public static final UnicodeBlock MEROITIC_CURSIVE =
2002                 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/
2003         /***/
2004         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2005                 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/
2006         /***/
2007         public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/
2008         /***/
2009         public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/
2010         /***/
2011         public static final UnicodeBlock SORA_SOMPENG =
2012                 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/
2013         /***/
2014         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2015                 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/
2016         /***/
2017         public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/
2018 
2019         /* New blocks in Unicode 7.0 */
2020 
2021         /***/
2022         public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/
2023         /***/
2024         public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2025                 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/
2026         /***/
2027         public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2028                 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/
2029         /***/
2030         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2031                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/
2032         /***/
2033         public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/
2034         /***/
2035         public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/
2036         /***/
2037         public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2038                 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/
2039         /***/
2040         public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/
2041         /***/
2042         public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/
2043         /***/
2044         public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/
2045         /***/
2046         public static final UnicodeBlock LATIN_EXTENDED_E =
2047                 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/
2048         /***/
2049         public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/
2050         /***/
2051         public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/
2052         /***/
2053         public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/
2054         /***/
2055         public static final UnicodeBlock MENDE_KIKAKUI =
2056                 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/
2057         /***/
2058         public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/
2059         /***/
2060         public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/
2061         /***/
2062         public static final UnicodeBlock MYANMAR_EXTENDED_B =
2063                 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/
2064         /***/
2065         public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/
2066         /***/
2067         public static final UnicodeBlock OLD_NORTH_ARABIAN =
2068                 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/
2069         /***/
2070         public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/
2071         /***/
2072         public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2073                 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/
2074         /***/
2075         public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/
2076         /***/
2077         public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/
2078         /***/
2079         public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/
2080         /***/
2081         public static final UnicodeBlock PSALTER_PAHLAVI =
2082                 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/
2083         /***/
2084         public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2085                 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/
2086         /***/
2087         public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/
2088         /***/
2089         public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2090                 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/
2091         /***/
2092         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2093                 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/
2094         /***/
2095         public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/
2096         /***/
2097         public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/
2098 
2099         /* New blocks in Unicode 8.0 */
2100 
2101         /***/
2102         public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/
2103         /***/
2104         public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
2105                 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/
2106         /***/
2107         public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2108                 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/
2109         /***/
2110         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
2111                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
2112                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/
2113         /***/
2114         public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
2115                 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/
2116         /***/
2117         public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/
2118         /***/
2119         public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/
2120         /***/
2121         public static final UnicodeBlock OLD_HUNGARIAN =
2122                 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/
2123         /***/
2124         public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
2125                 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
2126                         SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/
2127         /***/
2128         public static final UnicodeBlock SUTTON_SIGNWRITING =
2129                 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/
2130 
2131         /* New blocks in Unicode 9.0 */
2132 
2133         /***/
2134         public static final UnicodeBlock ADLAM = new UnicodeBlock("ADLAM", ADLAM_ID); /*[1E900]*/
2135         /***/
2136         public static final UnicodeBlock BHAIKSUKI = new UnicodeBlock("BHAIKSUKI", BHAIKSUKI_ID); /*[11C00]*/
2137         /***/
2138         public static final UnicodeBlock CYRILLIC_EXTENDED_C =
2139                 new UnicodeBlock("CYRILLIC_EXTENDED_C", CYRILLIC_EXTENDED_C_ID); /*[1C80]*/
2140         /***/
2141         public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
2142                 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", GLAGOLITIC_SUPPLEMENT_ID); /*[1E000]*/
2143         /***/
2144         public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
2145                 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID); /*[16FE0]*/
2146         /***/
2147         public static final UnicodeBlock MARCHEN = new UnicodeBlock("MARCHEN", MARCHEN_ID); /*[11C70]*/
2148         /***/
2149         public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
2150                 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", MONGOLIAN_SUPPLEMENT_ID); /*[11660]*/
2151         /***/
2152         public static final UnicodeBlock NEWA = new UnicodeBlock("NEWA", NEWA_ID); /*[11400]*/
2153         /***/
2154         public static final UnicodeBlock OSAGE = new UnicodeBlock("OSAGE", OSAGE_ID); /*[104B0]*/
2155         /***/
2156         public static final UnicodeBlock TANGUT = new UnicodeBlock("TANGUT", TANGUT_ID); /*[17000]*/
2157         /***/
2158         public static final UnicodeBlock TANGUT_COMPONENTS =
2159                 new UnicodeBlock("TANGUT_COMPONENTS", TANGUT_COMPONENTS_ID); /*[18800]*/
2160 
2161         // New blocks in Unicode 10.0
2162 
2163         /***/
2164         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F =
2165                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID); /*[2CEB0]*/
2166         /***/
2167         public static final UnicodeBlock KANA_EXTENDED_A =
2168                 new UnicodeBlock("KANA_EXTENDED_A", KANA_EXTENDED_A_ID); /*[1B100]*/
2169         /***/
2170         public static final UnicodeBlock MASARAM_GONDI =
2171                 new UnicodeBlock("MASARAM_GONDI", MASARAM_GONDI_ID); /*[11D00]*/
2172         /***/
2173         public static final UnicodeBlock NUSHU = new UnicodeBlock("NUSHU", NUSHU_ID); /*[1B170]*/
2174         /***/
2175         public static final UnicodeBlock SOYOMBO = new UnicodeBlock("SOYOMBO", SOYOMBO_ID); /*[11A50]*/
2176         /***/
2177         public static final UnicodeBlock SYRIAC_SUPPLEMENT =
2178                 new UnicodeBlock("SYRIAC_SUPPLEMENT", SYRIAC_SUPPLEMENT_ID); /*[0860]*/
2179         /***/
2180         public static final UnicodeBlock ZANABAZAR_SQUARE =
2181                 new UnicodeBlock("ZANABAZAR_SQUARE", ZANABAZAR_SQUARE_ID); /*[11A00]*/
2182 
2183         // New blocks in Unicode 11.0
2184 
2185         /***/
2186         public static final UnicodeBlock CHESS_SYMBOLS =
2187                 new UnicodeBlock("CHESS_SYMBOLS", CHESS_SYMBOLS_ID); /*[1FA00]*/
2188         /***/
2189         public static final UnicodeBlock DOGRA = new UnicodeBlock("DOGRA", DOGRA_ID); /*[11800]*/
2190         /***/
2191         public static final UnicodeBlock GEORGIAN_EXTENDED =
2192                 new UnicodeBlock("GEORGIAN_EXTENDED", GEORGIAN_EXTENDED_ID); /*[1C90]*/
2193         /***/
2194         public static final UnicodeBlock GUNJALA_GONDI =
2195                 new UnicodeBlock("GUNJALA_GONDI", GUNJALA_GONDI_ID); /*[11D60]*/
2196         /***/
2197         public static final UnicodeBlock HANIFI_ROHINGYA =
2198                 new UnicodeBlock("HANIFI_ROHINGYA", HANIFI_ROHINGYA_ID); /*[10D00]*/
2199         /***/
2200         public static final UnicodeBlock INDIC_SIYAQ_NUMBERS =
2201                 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", INDIC_SIYAQ_NUMBERS_ID); /*[1EC70]*/
2202         /***/
2203         public static final UnicodeBlock MAKASAR = new UnicodeBlock("MAKASAR", MAKASAR_ID); /*[11EE0]*/
2204         /***/
2205         public static final UnicodeBlock MAYAN_NUMERALS =
2206                 new UnicodeBlock("MAYAN_NUMERALS", MAYAN_NUMERALS_ID); /*[1D2E0]*/
2207         /***/
2208         public static final UnicodeBlock MEDEFAIDRIN =
2209                 new UnicodeBlock("MEDEFAIDRIN", MEDEFAIDRIN_ID); /*[16E40]*/
2210         /***/
2211         public static final UnicodeBlock OLD_SOGDIAN =
2212                 new UnicodeBlock("OLD_SOGDIAN", OLD_SOGDIAN_ID); /*[10F00]*/
2213         /***/
2214         public static final UnicodeBlock SOGDIAN = new UnicodeBlock("SOGDIAN", SOGDIAN_ID); /*[10F30]*/
2215 
2216         // New blocks in Unicode 12.0
2217 
2218         /***/
2219         public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS =
2220                 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID); /*[13430]*/
2221         /***/
2222         public static final UnicodeBlock ELYMAIC = new UnicodeBlock("ELYMAIC", ELYMAIC_ID); /*[10FE0]*/
2223         /***/
2224         public static final UnicodeBlock NANDINAGARI =
2225                 new UnicodeBlock("NANDINAGARI", NANDINAGARI_ID); /*[119A0]*/
2226         /***/
2227         public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG =
2228                 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", NYIAKENG_PUACHUE_HMONG_ID); /*[1E100]*/
2229         /***/
2230         public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS =
2231                 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", OTTOMAN_SIYAQ_NUMBERS_ID); /*[1ED00]*/
2232         /***/
2233         public static final UnicodeBlock SMALL_KANA_EXTENSION =
2234                 new UnicodeBlock("SMALL_KANA_EXTENSION", SMALL_KANA_EXTENSION_ID); /*[1B130]*/
2235         /***/
2236         public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A =
2237                 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID); /*[1FA70]*/
2238         /***/
2239         public static final UnicodeBlock TAMIL_SUPPLEMENT =
2240                 new UnicodeBlock("TAMIL_SUPPLEMENT", TAMIL_SUPPLEMENT_ID); /*[11FC0]*/
2241         /***/
2242         public static final UnicodeBlock WANCHO = new UnicodeBlock("WANCHO", WANCHO_ID); /*[1E2C0]*/
2243 
2244         // New blocks in Unicode 13.0
2245 
2246         /***/
2247         public static final UnicodeBlock CHORASMIAN =
2248                 new UnicodeBlock("CHORASMIAN", CHORASMIAN_ID); /*[10FB0]*/
2249         /***/
2250         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G =
2251                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G",
2252                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID); /*[30000]*/
2253         /***/
2254         public static final UnicodeBlock DIVES_AKURU =
2255                 new UnicodeBlock("DIVES_AKURU", DIVES_AKURU_ID); /*[11900]*/
2256         /***/
2257         public static final UnicodeBlock KHITAN_SMALL_SCRIPT =
2258                 new UnicodeBlock("KHITAN_SMALL_SCRIPT", KHITAN_SMALL_SCRIPT_ID); /*[18B00]*/
2259         /***/
2260         public static final UnicodeBlock LISU_SUPPLEMENT =
2261                 new UnicodeBlock("LISU_SUPPLEMENT", LISU_SUPPLEMENT_ID); /*[11FB0]*/
2262         /***/
2263         public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING =
2264                 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING", SYMBOLS_FOR_LEGACY_COMPUTING_ID); /*[1FB00]*/
2265         /***/
2266         public static final UnicodeBlock TANGUT_SUPPLEMENT =
2267                 new UnicodeBlock("TANGUT_SUPPLEMENT", TANGUT_SUPPLEMENT_ID); /*[18D00]*/
2268         /***/
2269         public static final UnicodeBlock YEZIDI = new UnicodeBlock("YEZIDI", YEZIDI_ID); /*[10E80]*/
2270 
2271         /**
2272          */
2273         public static final UnicodeBlock INVALID_CODE
2274         = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID);
2275 
2276         static {
2277             for (int blockId = 0; blockId < COUNT; ++blockId) {
2278                 if (BLOCKS_[blockId] == null) {
2279                     throw new java.lang.IllegalStateException(
2280                             "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized");
2281                 }
2282             }
2283         }
2284 
2285         // public methods --------------------------------------------------
2286 
2287         /**
2288          * <strong>[icu]</strong> Returns the only instance of the UnicodeBlock with the argument ID.
2289          * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned.
2290          * @param id UnicodeBlock ID
2291          * @return the only instance of the UnicodeBlock with the argument ID
2292          *         if it exists, otherwise a INVALID_CODE UnicodeBlock will be
2293          *         returned.
2294          */
getInstance(int id)2295         public static UnicodeBlock getInstance(int id)
2296         {
2297             if (id >= 0 && id < BLOCKS_.length) {
2298                 return BLOCKS_[id];
2299             }
2300             return INVALID_CODE;
2301         }
2302 
2303         /**
2304          * Returns the Unicode allocation block that contains the code point,
2305          * or null if the code point is not a member of a defined block.
2306          * @param ch code point to be tested
2307          * @return the Unicode allocation block that contains the code point
2308          */
of(int ch)2309         public static UnicodeBlock of(int ch)
2310         {
2311             if (ch > MAX_VALUE) {
2312                 return INVALID_CODE;
2313             }
2314 
2315             return UnicodeBlock.getInstance(
2316                     UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK));
2317         }
2318 
2319         /**
2320          * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method.
2321          * Returns the Unicode block with the given name. <strong>[icu] Note:</strong> Unlike
2322          * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches
2323          * against the official UCD name and the Java block name
2324          * (ignoring case).
2325          * @param blockName the name of the block to match
2326          * @return the UnicodeBlock with that name
2327          * @throws IllegalArgumentException if the blockName could not be matched
2328          */
forName(String blockName)2329         public static final UnicodeBlock forName(String blockName) {
2330             Map<String, UnicodeBlock> m = null;
2331             if (mref != null) {
2332                 m = mref.get();
2333             }
2334             if (m == null) {
2335                 m = new HashMap<>(BLOCKS_.length);
2336                 for (int i = 0; i < BLOCKS_.length; ++i) {
2337                     UnicodeBlock b = BLOCKS_[i];
2338                     String name = trimBlockName(
2339                             getPropertyValueName(UProperty.BLOCK, b.getID(),
2340                                     UProperty.NameChoice.LONG));
2341                     m.put(name, b);
2342                 }
2343                 mref = new SoftReference<>(m);
2344             }
2345             UnicodeBlock b = m.get(trimBlockName(blockName));
2346             if (b == null) {
2347                 throw new IllegalArgumentException();
2348             }
2349             return b;
2350         }
2351         private static SoftReference<Map<String, UnicodeBlock>> mref;
2352 
trimBlockName(String name)2353         private static String trimBlockName(String name) {
2354             String upper = name.toUpperCase(Locale.ENGLISH);
2355             StringBuilder result = new StringBuilder(upper.length());
2356             for (int i = 0; i < upper.length(); i++) {
2357                 char c = upper.charAt(i);
2358                 if (c != ' ' && c != '_' && c != '-') {
2359                     result.append(c);
2360                 }
2361             }
2362             return result.toString();
2363         }
2364 
2365         /**
2366          * {icu} Returns the type ID of this Unicode block
2367          * @return integer type ID of this Unicode block
2368          */
getID()2369         public int getID()
2370         {
2371             return m_id_;
2372         }
2373 
2374         // private data members ---------------------------------------------
2375 
2376         /**
2377          * Identification code for this UnicodeBlock
2378          */
2379         private int m_id_;
2380 
2381         // private constructor ----------------------------------------------
2382 
2383         /**
2384          * UnicodeBlock constructor
2385          * @param name name of this UnicodeBlock
2386          * @param id unique id of this UnicodeBlock
2387          * @exception NullPointerException if name is <code>null</code>
2388          */
UnicodeBlock(String name, int id)2389         private UnicodeBlock(String name, int id)
2390         {
2391             super(name);
2392             m_id_ = id;
2393             if (id >= 0) {
2394                 BLOCKS_[id] = this;
2395             }
2396         }
2397     }
2398 
2399     /**
2400      * East Asian Width constants.
2401      * @see UProperty#EAST_ASIAN_WIDTH
2402      * @see UCharacter#getIntPropertyValue
2403      */
2404     public static interface EastAsianWidth
2405     {
2406         /**
2407          */
2408         public static final int NEUTRAL = 0;
2409         /**
2410          */
2411         public static final int AMBIGUOUS = 1;
2412         /**
2413          */
2414         public static final int HALFWIDTH = 2;
2415         /**
2416          */
2417         public static final int FULLWIDTH = 3;
2418         /**
2419          */
2420         public static final int NARROW = 4;
2421         /**
2422          */
2423         public static final int WIDE = 5;
2424         /**
2425          * One more than the highest normal EastAsianWidth value.
2426          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH).
2427          *
2428          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2429          * @hide unsupported on OHOS
2430          */
2431         @Deprecated
2432         public static final int COUNT = 6;
2433     }
2434 
2435     /**
2436      * Decomposition Type constants.
2437      * @see UProperty#DECOMPOSITION_TYPE
2438      */
2439     public static interface DecompositionType
2440     {
2441         /**
2442          */
2443         public static final int NONE = 0;
2444         /**
2445          */
2446         public static final int CANONICAL = 1;
2447         /**
2448          */
2449         public static final int COMPAT = 2;
2450         /**
2451          */
2452         public static final int CIRCLE = 3;
2453         /**
2454          */
2455         public static final int FINAL = 4;
2456         /**
2457          */
2458         public static final int FONT = 5;
2459         /**
2460          */
2461         public static final int FRACTION = 6;
2462         /**
2463          */
2464         public static final int INITIAL = 7;
2465         /**
2466          */
2467         public static final int ISOLATED = 8;
2468         /**
2469          */
2470         public static final int MEDIAL = 9;
2471         /**
2472          */
2473         public static final int NARROW = 10;
2474         /**
2475          */
2476         public static final int NOBREAK = 11;
2477         /**
2478          */
2479         public static final int SMALL = 12;
2480         /**
2481          */
2482         public static final int SQUARE = 13;
2483         /**
2484          */
2485         public static final int SUB = 14;
2486         /**
2487          */
2488         public static final int SUPER = 15;
2489         /**
2490          */
2491         public static final int VERTICAL = 16;
2492         /**
2493          */
2494         public static final int WIDE = 17;
2495         /**
2496          * One more than the highest normal DecompositionType value.
2497          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE).
2498          *
2499          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2500          * @hide unsupported on OHOS
2501          */
2502         @Deprecated
2503         public static final int COUNT = 18;
2504     }
2505 
2506     /**
2507      * Joining Type constants.
2508      * @see UProperty#JOINING_TYPE
2509      */
2510     public static interface JoiningType
2511     {
2512         /**
2513          */
2514         public static final int NON_JOINING = 0;
2515         /**
2516          */
2517         public static final int JOIN_CAUSING = 1;
2518         /**
2519          */
2520         public static final int DUAL_JOINING = 2;
2521         /**
2522          */
2523         public static final int LEFT_JOINING = 3;
2524         /**
2525          */
2526         public static final int RIGHT_JOINING = 4;
2527         /**
2528          */
2529         public static final int TRANSPARENT = 5;
2530         /**
2531          * One more than the highest normal JoiningType value.
2532          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE).
2533          *
2534          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2535          * @hide unsupported on OHOS
2536          */
2537         @Deprecated
2538         public static final int COUNT = 6;
2539     }
2540 
2541     /**
2542      * Joining Group constants.
2543      * @see UProperty#JOINING_GROUP
2544      */
2545     public static interface JoiningGroup
2546     {
2547         /**
2548          */
2549         public static final int NO_JOINING_GROUP = 0;
2550         /**
2551          */
2552         public static final int AIN = 1;
2553         /**
2554          */
2555         public static final int ALAPH = 2;
2556         /**
2557          */
2558         public static final int ALEF = 3;
2559         /**
2560          */
2561         public static final int BEH = 4;
2562         /**
2563          */
2564         public static final int BETH = 5;
2565         /**
2566          */
2567         public static final int DAL = 6;
2568         /**
2569          */
2570         public static final int DALATH_RISH = 7;
2571         /**
2572          */
2573         public static final int E = 8;
2574         /**
2575          */
2576         public static final int FEH = 9;
2577         /**
2578          */
2579         public static final int FINAL_SEMKATH = 10;
2580         /**
2581          */
2582         public static final int GAF = 11;
2583         /**
2584          */
2585         public static final int GAMAL = 12;
2586         /**
2587          */
2588         public static final int HAH = 13;
2589         /***/
2590         public static final int TEH_MARBUTA_GOAL = 14;
2591         /**
2592          */
2593         public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL;
2594         /**
2595          */
2596         public static final int HE = 15;
2597         /**
2598          */
2599         public static final int HEH = 16;
2600         /**
2601          */
2602         public static final int HEH_GOAL = 17;
2603         /**
2604          */
2605         public static final int HETH = 18;
2606         /**
2607          */
2608         public static final int KAF = 19;
2609         /**
2610          */
2611         public static final int KAPH = 20;
2612         /**
2613          */
2614         public static final int KNOTTED_HEH = 21;
2615         /**
2616          */
2617         public static final int LAM = 22;
2618         /**
2619          */
2620         public static final int LAMADH = 23;
2621         /**
2622          */
2623         public static final int MEEM = 24;
2624         /**
2625          */
2626         public static final int MIM = 25;
2627         /**
2628          */
2629         public static final int NOON = 26;
2630         /**
2631          */
2632         public static final int NUN = 27;
2633         /**
2634          */
2635         public static final int PE = 28;
2636         /**
2637          */
2638         public static final int QAF = 29;
2639         /**
2640          */
2641         public static final int QAPH = 30;
2642         /**
2643          */
2644         public static final int REH = 31;
2645         /**
2646          */
2647         public static final int REVERSED_PE = 32;
2648         /**
2649          */
2650         public static final int SAD = 33;
2651         /**
2652          */
2653         public static final int SADHE = 34;
2654         /**
2655          */
2656         public static final int SEEN = 35;
2657         /**
2658          */
2659         public static final int SEMKATH = 36;
2660         /**
2661          */
2662         public static final int SHIN = 37;
2663         /**
2664          */
2665         public static final int SWASH_KAF = 38;
2666         /**
2667          */
2668         public static final int SYRIAC_WAW = 39;
2669         /**
2670          */
2671         public static final int TAH = 40;
2672         /**
2673          */
2674         public static final int TAW = 41;
2675         /**
2676          */
2677         public static final int TEH_MARBUTA = 42;
2678         /**
2679          */
2680         public static final int TETH = 43;
2681         /**
2682          */
2683         public static final int WAW = 44;
2684         /**
2685          */
2686         public static final int YEH = 45;
2687         /**
2688          */
2689         public static final int YEH_BARREE = 46;
2690         /**
2691          */
2692         public static final int YEH_WITH_TAIL = 47;
2693         /**
2694          */
2695         public static final int YUDH = 48;
2696         /**
2697          */
2698         public static final int YUDH_HE = 49;
2699         /**
2700          */
2701         public static final int ZAIN = 50;
2702         /**
2703          */
2704         public static final int FE = 51;
2705         /**
2706          */
2707         public static final int KHAPH = 52;
2708         /**
2709          */
2710         public static final int ZHAIN = 53;
2711         /**
2712          */
2713         public static final int BURUSHASKI_YEH_BARREE = 54;
2714         /***/
2715         public static final int FARSI_YEH = 55;
2716         /***/
2717         public static final int NYA = 56;
2718         /***/
2719         public static final int ROHINGYA_YEH = 57;
2720 
2721         /***/
2722         public static final int MANICHAEAN_ALEPH = 58;
2723         /***/
2724         public static final int MANICHAEAN_AYIN = 59;
2725         /***/
2726         public static final int MANICHAEAN_BETH = 60;
2727         /***/
2728         public static final int MANICHAEAN_DALETH = 61;
2729         /***/
2730         public static final int MANICHAEAN_DHAMEDH = 62;
2731         /***/
2732         public static final int MANICHAEAN_FIVE = 63;
2733         /***/
2734         public static final int MANICHAEAN_GIMEL = 64;
2735         /***/
2736         public static final int MANICHAEAN_HETH = 65;
2737         /***/
2738         public static final int MANICHAEAN_HUNDRED = 66;
2739         /***/
2740         public static final int MANICHAEAN_KAPH = 67;
2741         /***/
2742         public static final int MANICHAEAN_LAMEDH = 68;
2743         /***/
2744         public static final int MANICHAEAN_MEM = 69;
2745         /***/
2746         public static final int MANICHAEAN_NUN = 70;
2747         /***/
2748         public static final int MANICHAEAN_ONE = 71;
2749         /***/
2750         public static final int MANICHAEAN_PE = 72;
2751         /***/
2752         public static final int MANICHAEAN_QOPH = 73;
2753         /***/
2754         public static final int MANICHAEAN_RESH = 74;
2755         /***/
2756         public static final int MANICHAEAN_SADHE = 75;
2757         /***/
2758         public static final int MANICHAEAN_SAMEKH = 76;
2759         /***/
2760         public static final int MANICHAEAN_TAW = 77;
2761         /***/
2762         public static final int MANICHAEAN_TEN = 78;
2763         /***/
2764         public static final int MANICHAEAN_TETH = 79;
2765         /***/
2766         public static final int MANICHAEAN_THAMEDH = 80;
2767         /***/
2768         public static final int MANICHAEAN_TWENTY = 81;
2769         /***/
2770         public static final int MANICHAEAN_WAW = 82;
2771         /***/
2772         public static final int MANICHAEAN_YODH = 83;
2773         /***/
2774         public static final int MANICHAEAN_ZAYIN = 84;
2775         /***/
2776         public static final int STRAIGHT_WAW = 85;
2777 
2778         /***/
2779         public static final int AFRICAN_FEH = 86;
2780         /***/
2781         public static final int AFRICAN_NOON = 87;
2782         /***/
2783         public static final int AFRICAN_QAF = 88;
2784 
2785         /***/
2786         public static final int MALAYALAM_BHA = 89;
2787         /***/
2788         public static final int MALAYALAM_JA = 90;
2789         /***/
2790         public static final int MALAYALAM_LLA = 91;
2791         /***/
2792         public static final int MALAYALAM_LLLA = 92;
2793         /***/
2794         public static final int MALAYALAM_NGA = 93;
2795         /***/
2796         public static final int MALAYALAM_NNA = 94;
2797         /***/
2798         public static final int MALAYALAM_NNNA = 95;
2799         /***/
2800         public static final int MALAYALAM_NYA = 96;
2801         /***/
2802         public static final int MALAYALAM_RA = 97;
2803         /***/
2804         public static final int MALAYALAM_SSA = 98;
2805         /***/
2806         public static final int MALAYALAM_TTA = 99;
2807 
2808         /***/
2809         public static final int HANIFI_ROHINGYA_KINNA_YA = 100;
2810         /***/
2811         public static final int HANIFI_ROHINGYA_PA = 101;
2812 
2813         /**
2814          * One more than the highest normal JoiningGroup value.
2815          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JoiningGroup).
2816          *
2817          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2818          * @hide unsupported on OHOS
2819          */
2820         @Deprecated
2821         public static final int COUNT = 102;
2822     }
2823 
2824     /**
2825      * Grapheme Cluster Break constants.
2826      * @see UProperty#GRAPHEME_CLUSTER_BREAK
2827      */
2828     public static interface GraphemeClusterBreak {
2829         /**
2830          */
2831         public static final int OTHER = 0;
2832         /**
2833          */
2834         public static final int CONTROL = 1;
2835         /**
2836          */
2837         public static final int CR = 2;
2838         /**
2839          */
2840         public static final int EXTEND = 3;
2841         /**
2842          */
2843         public static final int L = 4;
2844         /**
2845          */
2846         public static final int LF = 5;
2847         /**
2848          */
2849         public static final int LV = 6;
2850         /**
2851          */
2852         public static final int LVT = 7;
2853         /**
2854          */
2855         public static final int T = 8;
2856         /**
2857          */
2858         public static final int V = 9;
2859         /**
2860          */
2861         public static final int SPACING_MARK = 10;
2862         /**
2863          */
2864         public static final int PREPEND = 11;
2865         /***/
2866         public static final int REGIONAL_INDICATOR = 12;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
2867         /***/
2868         public static final int E_BASE = 13;          /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
2869         /***/
2870         public static final int E_BASE_GAZ = 14;      /*[EBG]*/
2871         /***/
2872         public static final int E_MODIFIER = 15;      /*[EM]*/
2873         /***/
2874         public static final int GLUE_AFTER_ZWJ = 16;  /*[GAZ]*/
2875         /***/
2876         public static final int ZWJ = 17;             /*[ZWJ]*/
2877 
2878         /**
2879          * One more than the highest normal GraphemeClusterBreak value.
2880          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK).
2881          *
2882          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2883          * @hide unsupported on OHOS
2884          */
2885         @Deprecated
2886         public static final int COUNT = 18;
2887     }
2888 
2889     /**
2890      * Word Break constants.
2891      * @see UProperty#WORD_BREAK
2892      */
2893     public static interface WordBreak {
2894         /**
2895          */
2896         public static final int OTHER = 0;
2897         /**
2898          */
2899         public static final int ALETTER = 1;
2900         /**
2901          */
2902         public static final int FORMAT = 2;
2903         /**
2904          */
2905         public static final int KATAKANA = 3;
2906         /**
2907          */
2908         public static final int MIDLETTER = 4;
2909         /**
2910          */
2911         public static final int MIDNUM = 5;
2912         /**
2913          */
2914         public static final int NUMERIC = 6;
2915         /**
2916          */
2917         public static final int EXTENDNUMLET = 7;
2918         /**
2919          */
2920         public static final int CR = 8;
2921         /**
2922          */
2923         public static final int EXTEND = 9;
2924         /**
2925          */
2926         public static final int LF = 10;
2927         /**
2928          */
2929         public static final int MIDNUMLET = 11;
2930         /**
2931          */
2932         public static final int NEWLINE = 12;
2933         /***/
2934         public static final int REGIONAL_INDICATOR = 13;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
2935         /***/
2936         public static final int HEBREW_LETTER = 14;    /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
2937         /***/
2938         public static final int SINGLE_QUOTE = 15;     /*[SQ]*/
2939         /***/
2940         public static final int DOUBLE_QUOTE = 16;     /*[DQ]*/
2941         /***/
2942         public static final int E_BASE = 17;           /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
2943         /***/
2944         public static final int E_BASE_GAZ = 18;       /*[EBG]*/
2945         /***/
2946         public static final int E_MODIFIER = 19;       /*[EM]*/
2947         /***/
2948         public static final int GLUE_AFTER_ZWJ = 20;   /*[GAZ]*/
2949         /***/
2950         public static final int ZWJ = 21;              /*[ZWJ]*/
2951         /***/
2952         public static final int WSEGSPACE = 22;        /*[WSEGSPACE]*/
2953         /**
2954          * One more than the highest normal WordBreak value.
2955          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK).
2956          *
2957          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2958          * @hide unsupported on OHOS
2959          */
2960         @Deprecated
2961         public static final int COUNT = 23;
2962     }
2963 
2964     /**
2965      * Sentence Break constants.
2966      * @see UProperty#SENTENCE_BREAK
2967      */
2968     public static interface SentenceBreak {
2969         /**
2970          */
2971         public static final int OTHER = 0;
2972         /**
2973          */
2974         public static final int ATERM = 1;
2975         /**
2976          */
2977         public static final int CLOSE = 2;
2978         /**
2979          */
2980         public static final int FORMAT = 3;
2981         /**
2982          */
2983         public static final int LOWER = 4;
2984         /**
2985          */
2986         public static final int NUMERIC = 5;
2987         /**
2988          */
2989         public static final int OLETTER = 6;
2990         /**
2991          */
2992         public static final int SEP = 7;
2993         /**
2994          */
2995         public static final int SP = 8;
2996         /**
2997          */
2998         public static final int STERM = 9;
2999         /**
3000          */
3001         public static final int UPPER = 10;
3002         /**
3003          */
3004         public static final int CR = 11;
3005         /**
3006          */
3007         public static final int EXTEND = 12;
3008         /**
3009          */
3010         public static final int LF = 13;
3011         /**
3012          */
3013         public static final int SCONTINUE = 14;
3014         /**
3015          * One more than the highest normal SentenceBreak value.
3016          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK).
3017          *
3018          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3019          * @hide unsupported on OHOS
3020          */
3021         @Deprecated
3022         public static final int COUNT = 15;
3023     }
3024 
3025     /**
3026      * Line Break constants.
3027      * @see UProperty#LINE_BREAK
3028      */
3029     public static interface LineBreak
3030     {
3031         /**
3032          */
3033         public static final int UNKNOWN = 0;
3034         /**
3035          */
3036         public static final int AMBIGUOUS = 1;
3037         /**
3038          */
3039         public static final int ALPHABETIC = 2;
3040         /**
3041          */
3042         public static final int BREAK_BOTH = 3;
3043         /**
3044          */
3045         public static final int BREAK_AFTER = 4;
3046         /**
3047          */
3048         public static final int BREAK_BEFORE = 5;
3049         /**
3050          */
3051         public static final int MANDATORY_BREAK = 6;
3052         /**
3053          */
3054         public static final int CONTINGENT_BREAK = 7;
3055         /**
3056          */
3057         public static final int CLOSE_PUNCTUATION = 8;
3058         /**
3059          */
3060         public static final int COMBINING_MARK = 9;
3061         /**
3062          */
3063         public static final int CARRIAGE_RETURN = 10;
3064         /**
3065          */
3066         public static final int EXCLAMATION = 11;
3067         /**
3068          */
3069         public static final int GLUE = 12;
3070         /**
3071          */
3072         public static final int HYPHEN = 13;
3073         /**
3074          */
3075         public static final int IDEOGRAPHIC = 14;
3076         /**
3077          * @see #INSEPARABLE
3078          */
3079         public static final int INSEPERABLE = 15;
3080         /**
3081          * Renamed from the misspelled "inseperable" in Unicode 4.0.1.
3082          */
3083         public static final int INSEPARABLE = 15;
3084         /**
3085          */
3086         public static final int INFIX_NUMERIC = 16;
3087         /**
3088          */
3089         public static final int LINE_FEED = 17;
3090         /**
3091          */
3092         public static final int NONSTARTER = 18;
3093         /**
3094          */
3095         public static final int NUMERIC = 19;
3096         /**
3097          */
3098         public static final int OPEN_PUNCTUATION = 20;
3099         /**
3100          */
3101         public static final int POSTFIX_NUMERIC = 21;
3102         /**
3103          */
3104         public static final int PREFIX_NUMERIC = 22;
3105         /**
3106          */
3107         public static final int QUOTATION = 23;
3108         /**
3109          */
3110         public static final int COMPLEX_CONTEXT = 24;
3111         /**
3112          */
3113         public static final int SURROGATE = 25;
3114         /**
3115          */
3116         public static final int SPACE = 26;
3117         /**
3118          */
3119         public static final int BREAK_SYMBOLS = 27;
3120         /**
3121          */
3122         public static final int ZWSPACE = 28;
3123         /**
3124          */
3125         public static final int NEXT_LINE = 29;  /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
3126         /**
3127          */
3128         public static final int WORD_JOINER = 30;      /*[WJ]*/
3129         /**
3130          */
3131         public static final int H2 = 31;  /* from here on: new in Unicode 4.1/ICU 3.4 */
3132         /**
3133          */
3134         public static final int H3 = 32;
3135         /**
3136          */
3137         public static final int JL = 33;
3138         /**
3139          */
3140         public static final int JT = 34;
3141         /**
3142          */
3143         public static final int JV = 35;
3144         /***/
3145         public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
3146         /***/
3147         public static final int CONDITIONAL_JAPANESE_STARTER = 37;  /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
3148         /***/
3149         public static final int HEBREW_LETTER = 38;  /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
3150         /***/
3151         public static final int REGIONAL_INDICATOR = 39;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
3152         /***/
3153         public static final int E_BASE = 40;  /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
3154         /***/
3155         public static final int E_MODIFIER = 41;  /*[EM]*/
3156         /***/
3157         public static final int ZWJ = 42;  /*[ZWJ]*/
3158         /**
3159          * One more than the highest normal LineBreak value.
3160          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK).
3161          *
3162          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3163          * @hide unsupported on OHOS
3164          */
3165         @Deprecated
3166         public static final int COUNT = 43;
3167     }
3168 
3169     /**
3170      * Numeric Type constants.
3171      * @see UProperty#NUMERIC_TYPE
3172      */
3173     public static interface NumericType
3174     {
3175         /**
3176          */
3177         public static final int NONE = 0;
3178         /**
3179          */
3180         public static final int DECIMAL = 1;
3181         /**
3182          */
3183         public static final int DIGIT = 2;
3184         /**
3185          */
3186         public static final int NUMERIC = 3;
3187         /**
3188          * One more than the highest normal NumericType value.
3189          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE).
3190          *
3191          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3192          * @hide unsupported on OHOS
3193          */
3194         @Deprecated
3195         public static final int COUNT = 4;
3196     }
3197 
3198     /**
3199      * Hangul Syllable Type constants.
3200      *
3201      * @see UProperty#HANGUL_SYLLABLE_TYPE
3202      */
3203     public static interface HangulSyllableType
3204     {
3205         /**
3206          */
3207         public static final int NOT_APPLICABLE      = 0;   /*[NA]*/ /*See note !!*/
3208         /**
3209          */
3210         public static final int LEADING_JAMO        = 1;   /*[L]*/
3211         /**
3212          */
3213         public static final int VOWEL_JAMO          = 2;   /*[V]*/
3214         /**
3215          */
3216         public static final int TRAILING_JAMO       = 3;   /*[T]*/
3217         /**
3218          */
3219         public static final int LV_SYLLABLE         = 4;   /*[LV]*/
3220         /**
3221          */
3222         public static final int LVT_SYLLABLE        = 5;   /*[LVT]*/
3223         /**
3224          * One more than the highest normal HangulSyllableType value.
3225          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE).
3226          *
3227          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3228          * @hide unsupported on OHOS
3229          */
3230         @Deprecated
3231         public static final int COUNT               = 6;
3232     }
3233 
3234     /**
3235      * Bidi Paired Bracket Type constants.
3236      *
3237      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
3238      */
3239     public static interface BidiPairedBracketType {
3240         /**
3241          * Not a paired bracket.
3242          */
3243         public static final int NONE = 0;
3244         /**
3245          * Open paired bracket.
3246          */
3247         public static final int OPEN = 1;
3248         /**
3249          * Close paired bracket.
3250          */
3251         public static final int CLOSE = 2;
3252         /**
3253          * One more than the highest normal BidiPairedBracketType value.
3254          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE).
3255          *
3256          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3257          * @hide unsupported on OHOS
3258          */
3259         @Deprecated
3260         public static final int COUNT = 3;
3261     }
3262 
3263     /**
3264      * Indic Positional Category constants.
3265      *
3266      * @see UProperty#INDIC_POSITIONAL_CATEGORY
3267      */
3268     public static interface IndicPositionalCategory {
3269         /***/
3270         public static final int NA = 0;
3271         /***/
3272         public static final int BOTTOM = 1;
3273         /***/
3274         public static final int BOTTOM_AND_LEFT = 2;
3275         /***/
3276         public static final int BOTTOM_AND_RIGHT = 3;
3277         /***/
3278         public static final int LEFT = 4;
3279         /***/
3280         public static final int LEFT_AND_RIGHT = 5;
3281         /***/
3282         public static final int OVERSTRUCK = 6;
3283         /***/
3284         public static final int RIGHT = 7;
3285         /***/
3286         public static final int TOP = 8;
3287         /***/
3288         public static final int TOP_AND_BOTTOM = 9;
3289         /***/
3290         public static final int TOP_AND_BOTTOM_AND_RIGHT = 10;
3291         /***/
3292         public static final int TOP_AND_LEFT = 11;
3293         /***/
3294         public static final int TOP_AND_LEFT_AND_RIGHT = 12;
3295         /***/
3296         public static final int TOP_AND_RIGHT = 13;
3297         /***/
3298         public static final int VISUAL_ORDER_LEFT = 14;
3299         /***/
3300         public static final int TOP_AND_BOTTOM_AND_LEFT = 15;
3301     }
3302 
3303     /**
3304      * Indic Syllabic Category constants.
3305      *
3306      * @see UProperty#INDIC_SYLLABIC_CATEGORY
3307      */
3308     public static interface IndicSyllabicCategory {
3309         /***/
3310         public static final int OTHER = 0;
3311         /***/
3312         public static final int AVAGRAHA = 1;
3313         /***/
3314         public static final int BINDU = 2;
3315         /***/
3316         public static final int BRAHMI_JOINING_NUMBER = 3;
3317         /***/
3318         public static final int CANTILLATION_MARK = 4;
3319         /***/
3320         public static final int CONSONANT = 5;
3321         /***/
3322         public static final int CONSONANT_DEAD = 6;
3323         /***/
3324         public static final int CONSONANT_FINAL = 7;
3325         /***/
3326         public static final int CONSONANT_HEAD_LETTER = 8;
3327         /***/
3328         public static final int CONSONANT_INITIAL_POSTFIXED = 9;
3329         /***/
3330         public static final int CONSONANT_KILLER = 10;
3331         /***/
3332         public static final int CONSONANT_MEDIAL = 11;
3333         /***/
3334         public static final int CONSONANT_PLACEHOLDER = 12;
3335         /***/
3336         public static final int CONSONANT_PRECEDING_REPHA = 13;
3337         /***/
3338         public static final int CONSONANT_PREFIXED = 14;
3339         /***/
3340         public static final int CONSONANT_SUBJOINED = 15;
3341         /***/
3342         public static final int CONSONANT_SUCCEEDING_REPHA = 16;
3343         /***/
3344         public static final int CONSONANT_WITH_STACKER = 17;
3345         /***/
3346         public static final int GEMINATION_MARK = 18;
3347         /***/
3348         public static final int INVISIBLE_STACKER = 19;
3349         /***/
3350         public static final int JOINER = 20;
3351         /***/
3352         public static final int MODIFYING_LETTER = 21;
3353         /***/
3354         public static final int NON_JOINER = 22;
3355         /***/
3356         public static final int NUKTA = 23;
3357         /***/
3358         public static final int NUMBER = 24;
3359         /***/
3360         public static final int NUMBER_JOINER = 25;
3361         /***/
3362         public static final int PURE_KILLER = 26;
3363         /***/
3364         public static final int REGISTER_SHIFTER = 27;
3365         /***/
3366         public static final int SYLLABLE_MODIFIER = 28;
3367         /***/
3368         public static final int TONE_LETTER = 29;
3369         /***/
3370         public static final int TONE_MARK = 30;
3371         /***/
3372         public static final int VIRAMA = 31;
3373         /***/
3374         public static final int VISARGA = 32;
3375         /***/
3376         public static final int VOWEL = 33;
3377         /***/
3378         public static final int VOWEL_DEPENDENT = 34;
3379         /***/
3380         public static final int VOWEL_INDEPENDENT = 35;
3381     }
3382 
3383     /**
3384      * Vertical Orientation constants.
3385      *
3386      * @see UProperty#VERTICAL_ORIENTATION
3387      */
3388     public static interface VerticalOrientation {
3389         /***/
3390         public static final int ROTATED = 0;
3391         /***/
3392         public static final int TRANSFORMED_ROTATED = 1;
3393         /***/
3394         public static final int TRANSFORMED_UPRIGHT = 2;
3395         /***/
3396         public static final int UPRIGHT = 3;
3397     }
3398 
3399     // public data members -----------------------------------------------
3400 
3401     /**
3402      * The lowest Unicode code point value, constant 0.
3403      * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}.
3404      */
3405     public static final int MIN_VALUE = Character.MIN_CODE_POINT;
3406 
3407     /**
3408      * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits).
3409      * Same as {@link Character#MAX_CODE_POINT}.
3410      *
3411      * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE}
3412      * which is still a char with the value U+FFFF.
3413      */
3414     public static final int MAX_VALUE = Character.MAX_CODE_POINT;
3415 
3416     /**
3417      * The minimum value for Supplementary code points, constant U+10000.
3418      * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
3419      */
3420     public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT;
3421 
3422     /**
3423      * Unicode value used when translating into Unicode encoding form and there
3424      * is no existing character.
3425      */
3426     public static final int REPLACEMENT_CHAR = '\uFFFD';
3427 
3428     /**
3429      * Special value that is returned by getUnicodeNumericValue(int) when no
3430      * numeric value is defined for a code point.
3431      * @see #getUnicodeNumericValue
3432      */
3433     public static final double NO_NUMERIC_VALUE = -123456789;
3434 
3435     /**
3436      * Compatibility constant for Java Character's MIN_RADIX.
3437      */
3438     public static final int MIN_RADIX = java.lang.Character.MIN_RADIX;
3439 
3440     /**
3441      * Compatibility constant for Java Character's MAX_RADIX.
3442      */
3443     public static final int MAX_RADIX = java.lang.Character.MAX_RADIX;
3444 
3445     /**
3446      * Do not lowercase non-initial parts of words when titlecasing.
3447      * Option bit for titlecasing APIs that take an options bit set.
3448      *
3449      * By default, titlecasing will titlecase the first cased character
3450      * of a word and lowercase all other characters.
3451      * With this option, the other characters will not be modified.
3452      *
3453      * @see #toTitleCase
3454      */
3455     public static final int TITLECASE_NO_LOWERCASE = 0x100;
3456 
3457     /**
3458      * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
3459      * titlecase exactly the characters at breaks from the iterator.
3460      * Option bit for titlecasing APIs that take an options bit set.
3461      *
3462      * By default, titlecasing will take each break iterator index,
3463      * adjust it by looking for the next cased character, and titlecase that one.
3464      * Other characters are lowercased.
3465      *
3466      * This follows Unicode 4 &amp; 5 section 3.13 Default Case Operations:
3467      *
3468      * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
3469      * #29, "Text Boundaries." Between each pair of word boundaries, find the first
3470      * cased character F. If F exists, map F to default_title(F); then map each
3471      * subsequent character C to default_lower(C).
3472      *
3473      * @see #toTitleCase
3474      * @see #TITLECASE_NO_LOWERCASE
3475      */
3476     public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200;
3477 
3478     // public methods ----------------------------------------------------
3479 
3480     /**
3481      * Returnss the numeric value of a decimal digit code point.
3482      * <br>This method observes the semantics of
3483      * <code>java.lang.Character.digit()</code>.  Note that this
3484      * will return positive values for code points for which isDigit
3485      * returns false, just like java.lang.Character.
3486      * <br><em>Semantic Change:</em> In release 1.3.1 and
3487      * prior, this did not treat the European letters as having a
3488      * digit value, and also treated numeric letters and other numbers as
3489      * digits.
3490      * This has been changed to conform to the java semantics.
3491      * <br>A code point is a valid digit if and only if:
3492      * <ul>
3493      *   <li>ch is a decimal digit or one of the european letters, and
3494      *   <li>the value of ch is less than the specified radix.
3495      * </ul>
3496      * @param ch the code point to query
3497      * @param radix the radix
3498      * @return the numeric value represented by the code point in the
3499      * specified radix, or -1 if the code point is not a decimal digit
3500      * or if its value is too large for the radix
3501      */
digit(int ch, int radix)3502     public static int digit(int ch, int radix)
3503     {
3504         if (2 <= radix && radix <= 36) {
3505             int value = digit(ch);
3506             if (value < 0) {
3507                 // ch is not a decimal digit, try latin letters
3508                 value = UCharacterProperty.getEuropeanDigit(ch);
3509             }
3510             return (value < radix) ? value : -1;
3511         } else {
3512             return -1;  // invalid radix
3513         }
3514     }
3515 
3516     /**
3517      * Returnss the numeric value of a decimal digit code point.
3518      * <br>This is a convenience overload of <code>digit(int, int)</code>
3519      * that provides a decimal radix.
3520      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this
3521      * treated numeric letters and other numbers as digits.  This has
3522      * been changed to conform to the java semantics.
3523      * @param ch the code point to query
3524      * @return the numeric value represented by the code point,
3525      * or -1 if the code point is not a decimal digit or if its
3526      * value is too large for a decimal radix
3527      */
digit(int ch)3528     public static int digit(int ch)
3529     {
3530         return UCharacterProperty.INSTANCE.digit(ch);
3531     }
3532 
3533     /**
3534      * Returns the numeric value of the code point as a nonnegative
3535      * integer.
3536      * <br>If the code point does not have a numeric value, then -1 is returned.
3537      * <br>
3538      * If the code point has a numeric value that cannot be represented as a
3539      * nonnegative integer (for example, a fractional value), then -2 is
3540      * returned.
3541      * @param ch the code point to query
3542      * @return the numeric value of the code point, or -1 if it has no numeric
3543      * value, or -2 if it has a numeric value that cannot be represented as a
3544      * nonnegative integer
3545      */
getNumericValue(int ch)3546     public static int getNumericValue(int ch)
3547     {
3548         return UCharacterProperty.INSTANCE.getNumericValue(ch);
3549     }
3550 
3551     /**
3552      * <strong>[icu]</strong> Returns the numeric value for a Unicode code point as defined in the
3553      * Unicode Character Database.
3554      * <p>A "double" return type is necessary because some numeric values are
3555      * fractions, negative, or too large for int.
3556      * <p>For characters without any numeric values in the Unicode Character
3557      * Database, this function will return NO_NUMERIC_VALUE.
3558      * Note: This is different from the Unicode Standard which specifies NaN as the default value.
3559      * <p><em>API Change:</em> In release 2.2 and prior, this API has a
3560      * return type int and returns -1 when the argument ch does not have a
3561      * corresponding numeric value. This has been changed to synch with ICU4C
3562      *
3563      * This corresponds to the ICU4C function u_getNumericValue.
3564      * @param ch Code point to get the numeric value for.
3565      * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined.
3566      */
getUnicodeNumericValue(int ch)3567     public static double getUnicodeNumericValue(int ch)
3568     {
3569         return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch);
3570     }
3571 
3572     /**
3573      * Compatibility override of Java deprecated method.  This
3574      * method will always remain deprecated.
3575      * Same as java.lang.Character.isSpace().
3576      * @param ch the code point
3577      * @return true if the code point is a space character as
3578      * defined by java.lang.Character.isSpace.
3579      * @deprecated ICU 3.4 (Java)
3580      * @hide deprecated on icu4j-org
3581      */
3582     @Deprecated
isSpace(int ch)3583     public static boolean isSpace(int ch) {
3584         return ch <= 0x20 &&
3585                 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d);
3586     }
3587 
3588     /**
3589      * Returns a value indicating a code point's Unicode category.
3590      * Up-to-date Unicode implementation of java.lang.Character.getType()
3591      * except for the above mentioned code points that had their category
3592      * changed.<br>
3593      * Return results are constants from the interface
3594      * <a href=UCharacterCategory.html>UCharacterCategory</a><br>
3595      * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with
3596      * those returned by java.lang.Character.getType.  UCharacterCategory values
3597      * match the ones used in ICU4C, while java.lang.Character type
3598      * values, though similar, skip the value 17.
3599      * @param ch code point whose type is to be determined
3600      * @return category which is a value of UCharacterCategory
3601      */
getType(int ch)3602     public static int getType(int ch)
3603     {
3604         return UCharacterProperty.INSTANCE.getType(ch);
3605     }
3606 
3607     /**
3608      * Determines if a code point has a defined meaning in the up-to-date
3609      * Unicode standard.
3610      * E.g. supplementary code points though allocated space are not defined in
3611      * Unicode yet.<br>
3612      * Up-to-date Unicode implementation of java.lang.Character.isDefined()
3613      * @param ch code point to be determined if it is defined in the most
3614      *        current version of Unicode
3615      * @return true if this code point is defined in unicode
3616      */
isDefined(int ch)3617     public static boolean isDefined(int ch)
3618     {
3619         return getType(ch) != 0;
3620     }
3621 
3622     /**
3623      * Determines if a code point is a Java digit.
3624      * <br>This method observes the semantics of
3625      * <code>java.lang.Character.isDigit()</code>. It returns true for decimal
3626      * digits only.
3627      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated
3628      * numeric letters and other numbers as digits.
3629      * This has been changed to conform to the java semantics.
3630      * @param ch code point to query
3631      * @return true if this code point is a digit
3632      */
isDigit(int ch)3633     public static boolean isDigit(int ch)
3634     {
3635         return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER;
3636     }
3637 
3638     /**
3639      * Determines if the specified code point is an ISO control character.
3640      * A code point is considered to be an ISO control character if it is in
3641      * the range &#92;u0000 through &#92;u001F or in the range &#92;u007F through
3642      * &#92;u009F.<br>
3643      * Up-to-date Unicode implementation of java.lang.Character.isISOControl()
3644      * @param ch code point to determine if it is an ISO control character
3645      * @return true if code point is a ISO control character
3646      */
isISOControl(int ch)3647     public static boolean isISOControl(int ch)
3648     {
3649         return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ &&
3650                 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_));
3651     }
3652 
3653     /**
3654      * Determines if the specified code point is a letter.
3655      * Up-to-date Unicode implementation of java.lang.Character.isLetter()
3656      * @param ch code point to determine if it is a letter
3657      * @return true if code point is a letter
3658      */
isLetter(int ch)3659     public static boolean isLetter(int ch)
3660     {
3661         // if props == 0, it will just fall through and return false
3662         return ((1 << getType(ch))
3663                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3664                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3665                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3666                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3667                         | (1 << UCharacterCategory.OTHER_LETTER))) != 0;
3668     }
3669 
3670     /**
3671      * Determines if the specified code point is a letter or digit.
3672      * <strong>[icu] Note:</strong> This method, unlike java.lang.Character does not regard the ascii
3673      * characters 'A' - 'Z' and 'a' - 'z' as digits.
3674      * @param ch code point to determine if it is a letter or a digit
3675      * @return true if code point is a letter or a digit
3676      */
isLetterOrDigit(int ch)3677     public static boolean isLetterOrDigit(int ch)
3678     {
3679         return ((1 << getType(ch))
3680                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3681                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3682                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3683                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3684                         | (1 << UCharacterCategory.OTHER_LETTER)
3685                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0;
3686     }
3687 
3688     /**
3689      * Compatibility override of Java deprecated method.  This
3690      * method will always remain deprecated.  Delegates to
3691      * java.lang.Character.isJavaIdentifierStart.
3692      * @param cp the code point
3693      * @return true if the code point can start a java identifier.
3694      * @deprecated ICU 3.4 (Java)
3695      * @hide deprecated on icu4j-org
3696      */
3697     @Deprecated
isJavaLetter(int cp)3698     public static boolean isJavaLetter(int cp) {
3699         return isJavaIdentifierStart(cp);
3700     }
3701 
3702     /**
3703      * Compatibility override of Java deprecated method.  This
3704      * method will always remain deprecated.  Delegates to
3705      * java.lang.Character.isJavaIdentifierPart.
3706      * @param cp the code point
3707      * @return true if the code point can continue a java identifier.
3708      * @deprecated ICU 3.4 (Java)
3709      * @hide deprecated on icu4j-org
3710      */
3711     @Deprecated
isJavaLetterOrDigit(int cp)3712     public static boolean isJavaLetterOrDigit(int cp) {
3713         return isJavaIdentifierPart(cp);
3714     }
3715 
3716     /**
3717      * Compatibility override of Java method, delegates to
3718      * java.lang.Character.isJavaIdentifierStart.
3719      * @param cp the code point
3720      * @return true if the code point can start a java identifier.
3721      */
isJavaIdentifierStart(int cp)3722     public static boolean isJavaIdentifierStart(int cp) {
3723         // note, downcast to char for jdk 1.4 compatibility
3724         return java.lang.Character.isJavaIdentifierStart((char)cp);
3725     }
3726 
3727     /**
3728      * Compatibility override of Java method, delegates to
3729      * java.lang.Character.isJavaIdentifierPart.
3730      * @param cp the code point
3731      * @return true if the code point can continue a java identifier.
3732      */
isJavaIdentifierPart(int cp)3733     public static boolean isJavaIdentifierPart(int cp) {
3734         // note, downcast to char for jdk 1.4 compatibility
3735         return java.lang.Character.isJavaIdentifierPart((char)cp);
3736     }
3737 
3738     /**
3739      * Determines if the specified code point is a lowercase character.
3740      * UnicodeData only contains case mappings for code points where they are
3741      * one-to-one mappings; it also omits information about context-sensitive
3742      * case mappings.<br> For more information about Unicode case mapping
3743      * please refer to the
3744      * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report
3745      * #21</a>.<br>
3746      * Up-to-date Unicode implementation of java.lang.Character.isLowerCase()
3747      * @param ch code point to determine if it is in lowercase
3748      * @return true if code point is a lowercase character
3749      */
isLowerCase(int ch)3750     public static boolean isLowerCase(int ch)
3751     {
3752         // if props == 0, it will just fall through and return false
3753         return getType(ch) == UCharacterCategory.LOWERCASE_LETTER;
3754     }
3755 
3756     /**
3757      * Determines if the specified code point is a white space character.
3758      * A code point is considered to be an whitespace character if and only
3759      * if it satisfies one of the following criteria:
3760      * <ul>
3761      * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not
3762      *      also a non-breaking space (&#92;u00A0 or &#92;u2007 or &#92;u202F).
3763      * <li> It is &#92;u0009, HORIZONTAL TABULATION.
3764      * <li> It is &#92;u000A, LINE FEED.
3765      * <li> It is &#92;u000B, VERTICAL TABULATION.
3766      * <li> It is &#92;u000C, FORM FEED.
3767      * <li> It is &#92;u000D, CARRIAGE RETURN.
3768      * <li> It is &#92;u001C, FILE SEPARATOR.
3769      * <li> It is &#92;u001D, GROUP SEPARATOR.
3770      * <li> It is &#92;u001E, RECORD SEPARATOR.
3771      * <li> It is &#92;u001F, UNIT SEPARATOR.
3772      * </ul>
3773      *
3774      * This API tries to sync with the semantics of Java's
3775      * java.lang.Character.isWhitespace(), but it may not return
3776      * the exact same results because of the Unicode version
3777      * difference.
3778      * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)
3779      * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.
3780      * See http://www.unicode.org/versions/Unicode4.0.1/
3781      * @param ch code point to determine if it is a white space
3782      * @return true if the specified code point is a white space character
3783      */
isWhitespace(int ch)3784     public static boolean isWhitespace(int ch)
3785     {
3786         // exclude no-break spaces
3787         // if props == 0, it will just fall through and return false
3788         return ((1 << getType(ch)) &
3789                 ((1 << UCharacterCategory.SPACE_SEPARATOR)
3790                         | (1 << UCharacterCategory.LINE_SEPARATOR)
3791                         | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0
3792                         && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_)
3793                         // TAB VT LF FF CR FS GS RS US NL are all control characters
3794                         // that are white spaces.
3795                         || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f);
3796     }
3797 
3798     /**
3799      * Determines if the specified code point is a Unicode specified space
3800      * character, i.e. if code point is in the category Zs, Zl and Zp.
3801      * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar().
3802      * @param ch code point to determine if it is a space
3803      * @return true if the specified code point is a space character
3804      */
isSpaceChar(int ch)3805     public static boolean isSpaceChar(int ch)
3806     {
3807         // if props == 0, it will just fall through and return false
3808         return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR)
3809                 | (1 << UCharacterCategory.LINE_SEPARATOR)
3810                 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR)))
3811                 != 0;
3812     }
3813 
3814     /**
3815      * Determines if the specified code point is a titlecase character.
3816      * UnicodeData only contains case mappings for code points where they are
3817      * one-to-one mappings; it also omits information about context-sensitive
3818      * case mappings.<br>
3819      * For more information about Unicode case mapping please refer to the
3820      * <a href=http://www.unicode.org/unicode/reports/tr21/>
3821      * Technical report #21</a>.<br>
3822      * Up-to-date Unicode implementation of java.lang.Character.isTitleCase().
3823      * @param ch code point to determine if it is in title case
3824      * @return true if the specified code point is a titlecase character
3825      */
isTitleCase(int ch)3826     public static boolean isTitleCase(int ch)
3827     {
3828         // if props == 0, it will just fall through and return false
3829         return getType(ch) == UCharacterCategory.TITLECASE_LETTER;
3830     }
3831 
3832     /**
3833      * Determines if the specified code point may be any part of a Unicode
3834      * identifier other than the starting character.
3835      * A code point may be part of a Unicode identifier if and only if it is
3836      * one of the following:
3837      * <ul>
3838      * <li> Lu Uppercase letter
3839      * <li> Ll Lowercase letter
3840      * <li> Lt Titlecase letter
3841      * <li> Lm Modifier letter
3842      * <li> Lo Other letter
3843      * <li> Nl Letter number
3844      * <li> Pc Connecting punctuation character
3845      * <li> Nd decimal number
3846      * <li> Mc Spacing combining mark
3847      * <li> Mn Non-spacing mark
3848      * <li> Cf formatting code
3849      * </ul>
3850      * Up-to-date Unicode implementation of
3851      * java.lang.Character.isUnicodeIdentifierPart().<br>
3852      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3853      * @param ch code point to determine if is can be part of a Unicode
3854      *        identifier
3855      * @return true if code point is any character belonging a unicode
3856      *         identifier suffix after the first character
3857      */
isUnicodeIdentifierPart(int ch)3858     public static boolean isUnicodeIdentifierPart(int ch)
3859     {
3860         // if props == 0, it will just fall through and return false
3861         // cat == format
3862         return ((1 << getType(ch))
3863                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3864                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3865                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3866                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3867                         | (1 << UCharacterCategory.OTHER_LETTER)
3868                         | (1 << UCharacterCategory.LETTER_NUMBER)
3869                         | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION)
3870                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER)
3871                         | (1 << UCharacterCategory.COMBINING_SPACING_MARK)
3872                         | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0
3873                         || isIdentifierIgnorable(ch);
3874     }
3875 
3876     /**
3877      * Determines if the specified code point is permissible as the first
3878      * character in a Unicode identifier.
3879      * A code point may start a Unicode identifier if it is of type either
3880      * <ul>
3881      * <li> Lu Uppercase letter
3882      * <li> Ll Lowercase letter
3883      * <li> Lt Titlecase letter
3884      * <li> Lm Modifier letter
3885      * <li> Lo Other letter
3886      * <li> Nl Letter number
3887      * </ul>
3888      * Up-to-date Unicode implementation of
3889      * java.lang.Character.isUnicodeIdentifierStart().<br>
3890      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3891      * @param ch code point to determine if it can start a Unicode identifier
3892      * @return true if code point is the first character belonging a unicode
3893      *              identifier
3894      */
isUnicodeIdentifierStart(int ch)3895     public static boolean isUnicodeIdentifierStart(int ch)
3896     {
3897         /*int cat = getType(ch);*/
3898         // if props == 0, it will just fall through and return false
3899         return ((1 << getType(ch))
3900                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
3901                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
3902                         | (1 << UCharacterCategory.TITLECASE_LETTER)
3903                         | (1 << UCharacterCategory.MODIFIER_LETTER)
3904                         | (1 << UCharacterCategory.OTHER_LETTER)
3905                         | (1 << UCharacterCategory.LETTER_NUMBER))) != 0;
3906     }
3907 
3908     /**
3909      * Determines if the specified code point should be regarded as an
3910      * ignorable character in a Java identifier.
3911      * A character is Java-identifier-ignorable if it has the general category
3912      * Cf Formatting Control, or it is a non-Java-whitespace ISO control:
3913      * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br>
3914      * Up-to-date Unicode implementation of
3915      * java.lang.Character.isIdentifierIgnorable().<br>
3916      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
3917      * <p>Note that Unicode just recommends to ignore Cf (format controls).
3918      * @param ch code point to be determined if it can be ignored in a Unicode
3919      *        identifier.
3920      * @return true if the code point is ignorable
3921      */
isIdentifierIgnorable(int ch)3922     public static boolean isIdentifierIgnorable(int ch)
3923     {
3924         // see java.lang.Character.isIdentifierIgnorable() on range of
3925         // ignorable characters.
3926         if (ch <= 0x9f) {
3927             return isISOControl(ch)
3928                     && !((ch >= 0x9 && ch <= 0xd)
3929                             || (ch >= 0x1c && ch <= 0x1f));
3930         }
3931         return getType(ch) == UCharacterCategory.FORMAT;
3932     }
3933 
3934     /**
3935      * Determines if the specified code point is an uppercase character.
3936      * UnicodeData only contains case mappings for code point where they are
3937      * one-to-one mappings; it also omits information about context-sensitive
3938      * case mappings.<br>
3939      * For language specific case conversion behavior, use
3940      * toUpperCase(locale, str). <br>
3941      * For example, the case conversion for dot-less i and dotted I in Turkish,
3942      * or for final sigma in Greek.
3943      * For more information about Unicode case mapping please refer to the
3944      * <a href=http://www.unicode.org/unicode/reports/tr21/>
3945      * Technical report #21</a>.<br>
3946      * Up-to-date Unicode implementation of java.lang.Character.isUpperCase().
3947      * @param ch code point to determine if it is in uppercase
3948      * @return true if the code point is an uppercase character
3949      */
isUpperCase(int ch)3950     public static boolean isUpperCase(int ch)
3951     {
3952         // if props == 0, it will just fall through and return false
3953         return getType(ch) == UCharacterCategory.UPPERCASE_LETTER;
3954     }
3955 
3956     /**
3957      * The given code point is mapped to its lowercase equivalent; if the code
3958      * point has no lowercase equivalent, the code point itself is returned.
3959      * Up-to-date Unicode implementation of java.lang.Character.toLowerCase()
3960      *
3961      * <p>This function only returns the simple, single-code point case mapping.
3962      * Full case mappings should be used whenever possible because they produce
3963      * better results by working on whole strings.
3964      * They take into account the string context and the language and can map
3965      * to a result string with a different length as appropriate.
3966      * Full case mappings are applied by the case mapping functions
3967      * that take String parameters rather than code points (int).
3968      * See also the User Guide chapter on C/POSIX migration:
3969      * http://www.icu-project.org/userguide/posix.html#case_mappings
3970      *
3971      * @param ch code point whose lowercase equivalent is to be retrieved
3972      * @return the lowercase equivalent code point
3973      */
toLowerCase(int ch)3974     public static int toLowerCase(int ch) {
3975         return UCaseProps.INSTANCE.tolower(ch);
3976     }
3977 
3978     /**
3979      * Converts argument code point and returns a String object representing
3980      * the code point's value in UTF-16 format.
3981      * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones.
3982      *
3983      * <p>Up-to-date Unicode implementation of java.lang.Character.toString().
3984      *
3985      * @param ch code point
3986      * @return string representation of the code point, null if code point is not
3987      *         defined in unicode
3988      */
toString(int ch)3989     public static String toString(int ch)
3990     {
3991         if (ch < MIN_VALUE || ch > MAX_VALUE) {
3992             return null;
3993         }
3994 
3995         if (ch < SUPPLEMENTARY_MIN_VALUE) {
3996             return String.valueOf((char)ch);
3997         }
3998 
3999         return new String(Character.toChars(ch));
4000     }
4001 
4002     /**
4003      * Converts the code point argument to titlecase.
4004      * If no titlecase is available, the uppercase is returned. If no uppercase
4005      * is available, the code point itself is returned.
4006      * Up-to-date Unicode implementation of java.lang.Character.toTitleCase()
4007      *
4008      * <p>This function only returns the simple, single-code point case mapping.
4009      * Full case mappings should be used whenever possible because they produce
4010      * better results by working on whole strings.
4011      * They take into account the string context and the language and can map
4012      * to a result string with a different length as appropriate.
4013      * Full case mappings are applied by the case mapping functions
4014      * that take String parameters rather than code points (int).
4015      * See also the User Guide chapter on C/POSIX migration:
4016      * http://www.icu-project.org/userguide/posix.html#case_mappings
4017      *
4018      * @param ch code point  whose title case is to be retrieved
4019      * @return titlecase code point
4020      */
toTitleCase(int ch)4021     public static int toTitleCase(int ch) {
4022         return UCaseProps.INSTANCE.totitle(ch);
4023     }
4024 
4025     /**
4026      * Converts the character argument to uppercase.
4027      * If no uppercase is available, the character itself is returned.
4028      * Up-to-date Unicode implementation of java.lang.Character.toUpperCase()
4029      *
4030      * <p>This function only returns the simple, single-code point case mapping.
4031      * Full case mappings should be used whenever possible because they produce
4032      * better results by working on whole strings.
4033      * They take into account the string context and the language and can map
4034      * to a result string with a different length as appropriate.
4035      * Full case mappings are applied by the case mapping functions
4036      * that take String parameters rather than code points (int).
4037      * See also the User Guide chapter on C/POSIX migration:
4038      * http://www.icu-project.org/userguide/posix.html#case_mappings
4039      *
4040      * @param ch code point whose uppercase is to be retrieved
4041      * @return uppercase code point
4042      */
toUpperCase(int ch)4043     public static int toUpperCase(int ch) {
4044         return UCaseProps.INSTANCE.toupper(ch);
4045     }
4046 
4047     // extra methods not in java.lang.Character --------------------------
4048 
4049     /**
4050      * <strong>[icu]</strong> Determines if the code point is a supplementary character.
4051      * A code point is a supplementary character if and only if it is greater
4052      * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a>
4053      * @param ch code point to be determined if it is in the supplementary
4054      *        plane
4055      * @return true if code point is a supplementary character
4056      */
isSupplementary(int ch)4057     public static boolean isSupplementary(int ch)
4058     {
4059         return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE &&
4060                 ch <= UCharacter.MAX_VALUE;
4061     }
4062 
4063     /**
4064      * <strong>[icu]</strong> Determines if the code point is in the BMP plane.
4065      * @param ch code point to be determined if it is not a supplementary
4066      *        character
4067      * @return true if code point is not a supplementary character
4068      */
isBMP(int ch)4069     public static boolean isBMP(int ch)
4070     {
4071         return (ch >= 0 && ch <= LAST_CHAR_MASK_);
4072     }
4073 
4074     /**
4075      * <strong>[icu]</strong> Determines whether the specified code point is a printable character
4076      * according to the Unicode standard.
4077      * @param ch code point to be determined if it is printable
4078      * @return true if the code point is a printable character
4079      */
isPrintable(int ch)4080     public static boolean isPrintable(int ch)
4081     {
4082         int cat = getType(ch);
4083         // if props == 0, it will just fall through and return false
4084         return (cat != UCharacterCategory.UNASSIGNED &&
4085                 cat != UCharacterCategory.CONTROL &&
4086                 cat != UCharacterCategory.FORMAT &&
4087                 cat != UCharacterCategory.PRIVATE_USE &&
4088                 cat != UCharacterCategory.SURROGATE &&
4089                 cat != UCharacterCategory.GENERAL_OTHER_TYPES);
4090     }
4091 
4092     /**
4093      * <strong>[icu]</strong> Determines whether the specified code point is of base form.
4094      * A code point of base form does not graphically combine with preceding
4095      * characters, and is neither a control nor a format character.
4096      * @param ch code point to be determined if it is of base form
4097      * @return true if the code point is of base form
4098      */
isBaseForm(int ch)4099     public static boolean isBaseForm(int ch)
4100     {
4101         int cat = getType(ch);
4102         // if props == 0, it will just fall through and return false
4103         return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER ||
4104                 cat == UCharacterCategory.OTHER_NUMBER ||
4105                 cat == UCharacterCategory.LETTER_NUMBER ||
4106                 cat == UCharacterCategory.UPPERCASE_LETTER ||
4107                 cat == UCharacterCategory.LOWERCASE_LETTER ||
4108                 cat == UCharacterCategory.TITLECASE_LETTER ||
4109                 cat == UCharacterCategory.MODIFIER_LETTER ||
4110                 cat == UCharacterCategory.OTHER_LETTER ||
4111                 cat == UCharacterCategory.NON_SPACING_MARK ||
4112                 cat == UCharacterCategory.ENCLOSING_MARK ||
4113                 cat == UCharacterCategory.COMBINING_SPACING_MARK;
4114     }
4115 
4116     /**
4117      * <strong>[icu]</strong> Returns the Bidirection property of a code point.
4118      * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
4119      * property.<br>
4120      * Result returned belongs to the interface
4121      * <a href=UCharacterDirection.html>UCharacterDirection</a>
4122      * @param ch the code point to be determined its direction
4123      * @return direction constant from UCharacterDirection.
4124      */
getDirection(int ch)4125     public static int getDirection(int ch)
4126     {
4127         return UBiDiProps.INSTANCE.getClass(ch);
4128     }
4129 
4130     /**
4131      * Determines whether the code point has the "mirrored" property.
4132      * This property is set for characters that are commonly used in
4133      * Right-To-Left contexts and need to be displayed with a "mirrored"
4134      * glyph.
4135      * @param ch code point whose mirror is to be determined
4136      * @return true if the code point has the "mirrored" property
4137      */
isMirrored(int ch)4138     public static boolean isMirrored(int ch)
4139     {
4140         return UBiDiProps.INSTANCE.isMirrored(ch);
4141     }
4142 
4143     /**
4144      * <strong>[icu]</strong> Maps the specified code point to a "mirror-image" code point.
4145      * For code points with the "mirrored" property, implementations sometimes
4146      * need a "poor man's" mapping to another code point such that the default
4147      * glyph may serve as the mirror-image of the default glyph of the
4148      * specified code point.<br>
4149      * This is useful for text conversion to and from codepages with visual
4150      * order, and for displays without glyph selection capabilities.
4151      * @param ch code point whose mirror is to be retrieved
4152      * @return another code point that may serve as a mirror-image substitute,
4153      *         or ch itself if there is no such mapping or ch does not have the
4154      *         "mirrored" property
4155      */
getMirror(int ch)4156     public static int getMirror(int ch)
4157     {
4158         return UBiDiProps.INSTANCE.getMirror(ch);
4159     }
4160 
4161     /**
4162      * <strong>[icu]</strong> Maps the specified character to its paired bracket character.
4163      * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int).
4164      * Otherwise c itself is returned.
4165      * See http://www.unicode.org/reports/tr9/
4166      *
4167      * @param c the code point to be mapped
4168      * @return the paired bracket code point,
4169      *         or c itself if there is no such mapping
4170      *         (Bidi_Paired_Bracket_Type=None)
4171      *
4172      * @see UProperty#BIDI_PAIRED_BRACKET
4173      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
4174      * @see #getMirror(int)
4175      */
getBidiPairedBracket(int c)4176     public static int getBidiPairedBracket(int c) {
4177         return UBiDiProps.INSTANCE.getPairedBracket(c);
4178     }
4179 
4180     /**
4181      * <strong>[icu]</strong> Returns the combining class of the argument codepoint
4182      * @param ch code point whose combining is to be retrieved
4183      * @return the combining class of the codepoint
4184      */
getCombiningClass(int ch)4185     public static int getCombiningClass(int ch)
4186     {
4187         return Normalizer2.getNFDInstance().getCombiningClass(ch);
4188     }
4189 
4190     /**
4191      * <strong>[icu]</strong> A code point is illegal if and only if
4192      * <ul>
4193      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
4194      * <li> A surrogate value, 0xD800 to 0xDFFF
4195      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
4196      * </ul>
4197      * Note: legal does not mean that it is assigned in this version of Unicode.
4198      * @param ch code point to determine if it is a legal code point by itself
4199      * @return true if and only if legal.
4200      */
isLegal(int ch)4201     public static boolean isLegal(int ch)
4202     {
4203         if (ch < MIN_VALUE) {
4204             return false;
4205         }
4206         if (ch < Character.MIN_SURROGATE) {
4207             return true;
4208         }
4209         if (ch <= Character.MAX_SURROGATE) {
4210             return false;
4211         }
4212         if (UCharacterUtility.isNonCharacter(ch)) {
4213             return false;
4214         }
4215         return (ch <= MAX_VALUE);
4216     }
4217 
4218     /**
4219      * <strong>[icu]</strong> A string is legal iff all its code points are legal.
4220      * A code point is illegal if and only if
4221      * <ul>
4222      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
4223      * <li> A surrogate value, 0xD800 to 0xDFFF
4224      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
4225      * </ul>
4226      * Note: legal does not mean that it is assigned in this version of Unicode.
4227      * @param str containing code points to examin
4228      * @return true if and only if legal.
4229      */
isLegal(String str)4230     public static boolean isLegal(String str)
4231     {
4232         int size = str.length();
4233         int codepoint;
4234         for (int i = 0; i < size; i += Character.charCount(codepoint))
4235         {
4236             codepoint = str.codePointAt(i);
4237             if (!isLegal(codepoint)) {
4238                 return false;
4239             }
4240         }
4241         return true;
4242     }
4243 
4244     /**
4245      * <strong>[icu]</strong> Returns the version of Unicode data used.
4246      * @return the unicode version number used
4247      */
getUnicodeVersion()4248     public static VersionInfo getUnicodeVersion()
4249     {
4250         return UCharacterProperty.INSTANCE.m_unicodeVersion_;
4251     }
4252 
4253     /**
4254      * <strong>[icu]</strong> Returns the most current Unicode name of the argument code point, or
4255      * null if the character is unassigned or outside the range
4256      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
4257      * <br>
4258      * Note calling any methods related to code point names, e.g. get*Name*()
4259      * incurs a one-time initialisation cost to construct the name tables.
4260      * @param ch the code point for which to get the name
4261      * @return most current Unicode name
4262      */
getName(int ch)4263     public static String getName(int ch)
4264     {
4265         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
4266     }
4267 
4268     /**
4269      * <strong>[icu]</strong> Returns the names for each of the characters in a string
4270      * @param s string to format
4271      * @param separator string to go between names
4272      * @return string of names
4273      */
getName(String s, String separator)4274     public static String getName(String s, String separator) {
4275         if (s.length() == 1) { // handle common case
4276             return getName(s.charAt(0));
4277         }
4278         int cp;
4279         StringBuilder sb = new StringBuilder();
4280         for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
4281             cp = s.codePointAt(i);
4282             if (i != 0) sb.append(separator);
4283             sb.append(UCharacter.getName(cp));
4284         }
4285         return sb.toString();
4286     }
4287 
4288     /**
4289      * <strong>[icu]</strong> Returns null.
4290      * Used to return the Unicode_1_Name property value which was of little practical value.
4291      * @param ch the code point for which to get the name
4292      * @return null
4293      * @deprecated ICU 49
4294      * @hide deprecated on icu4j-org
4295      */
4296     @Deprecated
getName1_0(int ch)4297     public static String getName1_0(int ch)
4298     {
4299         return null;
4300     }
4301 
4302     /**
4303      * <strong>[icu]</strong> Returns a name for a valid codepoint. Unlike, getName(int) and
4304      * getName1_0(int), this method will return a name even for codepoints that
4305      * are not assigned a name in UnicodeData.txt.
4306      *
4307      * <p>The names are returned in the following order.
4308      * <ul>
4309      * <li> Most current Unicode name if there is any
4310      * <li> Unicode 1.0 name if there is any
4311      * <li> Extended name in the form of
4312      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g., &lt;noncharacter-fffe&gt;
4313      * </ul>
4314      * Note calling any methods related to code point names, e.g. get*Name*()
4315      * incurs a one-time initialisation cost to construct the name tables.
4316      * @param ch the code point for which to get the name
4317      * @return a name for the argument codepoint
4318      */
getExtendedName(int ch)4319     public static String getExtendedName(int ch) {
4320         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME);
4321     }
4322 
4323     /**
4324      * <strong>[icu]</strong> Returns the corrected name from NameAliases.txt if there is one.
4325      * Returns null if the character is unassigned or outside the range
4326      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
4327      * <br>
4328      * Note calling any methods related to code point names, e.g. get*Name*()
4329      * incurs a one-time initialisation cost to construct the name tables.
4330      * @param ch the code point for which to get the name alias
4331      * @return Unicode name alias, or null
4332      */
getNameAlias(int ch)4333     public static String getNameAlias(int ch)
4334     {
4335         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS);
4336     }
4337 
4338     /**
4339      * <strong>[icu]</strong> Returns null.
4340      * Used to return the ISO 10646 comment for a character.
4341      * The Unicode ISO_Comment property is deprecated and has no values.
4342      *
4343      * @param ch The code point for which to get the ISO comment.
4344      *           It must be the case that {@code 0 <= ch <= 0x10ffff}.
4345      * @return null
4346      * @deprecated ICU 49
4347      * @hide deprecated on icu4j-org
4348      */
4349     @Deprecated
getISOComment(int ch)4350     public static String getISOComment(int ch)
4351     {
4352         return null;
4353     }
4354 
4355     /**
4356      * <strong>[icu]</strong> <p>Finds a Unicode code point by its most current Unicode name and
4357      * return its code point value. All Unicode names are in uppercase.
4358      * Note calling any methods related to code point names, e.g. get*Name*()
4359      * incurs a one-time initialisation cost to construct the name tables.
4360      * @param name most current Unicode character name whose code point is to
4361      *        be returned
4362      * @return code point or -1 if name is not found
4363      */
getCharFromName(String name)4364     public static int getCharFromName(String name){
4365         return UCharacterName.INSTANCE.getCharFromName(
4366                 UCharacterNameChoice.UNICODE_CHAR_NAME, name);
4367     }
4368 
4369     /**
4370      * <strong>[icu]</strong> Returns -1.
4371      * <p>Used to find a Unicode character by its version 1.0 Unicode name and return
4372      * its code point value.
4373      * @param name Unicode 1.0 code point name whose code point is to be
4374      *             returned
4375      * @return -1
4376      * @deprecated ICU 49
4377      * @see #getName1_0(int)
4378      * @hide deprecated on icu4j-org
4379      */
4380     @Deprecated
getCharFromName1_0(String name)4381     public static int getCharFromName1_0(String name){
4382         return -1;
4383     }
4384 
4385     /**
4386      * <strong>[icu]</strong> <p>Find a Unicode character by either its name and return its code
4387      * point value. All Unicode names are in uppercase.
4388      * Extended names are all lowercase except for numbers and are contained
4389      * within angle brackets.
4390      * The names are searched in the following order
4391      * <ul>
4392      * <li> Most current Unicode name if there is any
4393      * <li> Unicode 1.0 name if there is any
4394      * <li> Extended name in the form of
4395      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g. &lt;noncharacter-FFFE&gt;
4396      * </ul>
4397      * Note calling any methods related to code point names, e.g. get*Name*()
4398      * incurs a one-time initialisation cost to construct the name tables.
4399      * @param name codepoint name
4400      * @return code point associated with the name or -1 if the name is not
4401      *         found.
4402      */
getCharFromExtendedName(String name)4403     public static int getCharFromExtendedName(String name){
4404         return UCharacterName.INSTANCE.getCharFromName(
4405                 UCharacterNameChoice.EXTENDED_CHAR_NAME, name);
4406     }
4407 
4408     /**
4409      * <strong>[icu]</strong> <p>Find a Unicode character by its corrected name alias and return
4410      * its code point value. All Unicode names are in uppercase.
4411      * Note calling any methods related to code point names, e.g. get*Name*()
4412      * incurs a one-time initialisation cost to construct the name tables.
4413      * @param name Unicode name alias whose code point is to be returned
4414      * @return code point or -1 if name is not found
4415      */
getCharFromNameAlias(String name)4416     public static int getCharFromNameAlias(String name){
4417         return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name);
4418     }
4419 
4420     /**
4421      * <strong>[icu]</strong> Return the Unicode name for a given property, as given in the
4422      * Unicode database file PropertyAliases.txt.  Most properties
4423      * have more than one name.  The nameChoice determines which one
4424      * is returned.
4425      *
4426      * In addition, this function maps the property
4427      * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
4428      * "General_Category_Mask".  These names are not in
4429      * PropertyAliases.txt.
4430      *
4431      * @param property UProperty selector.
4432      *
4433      * @param nameChoice UProperty.NameChoice selector for which name
4434      * to get.  All properties have a long name.  Most have a short
4435      * name, but some do not.  Unicode allows for additional names; if
4436      * present these will be returned by UProperty.NameChoice.LONG + i,
4437      * where i=1, 2,...
4438      *
4439      * @return a name, or null if Unicode explicitly defines no name
4440      * ("n/a") for a given property/nameChoice.  If a given nameChoice
4441      * throws an exception, then all larger values of nameChoice will
4442      * throw an exception.  If null is returned for a given
4443      * nameChoice, then other nameChoice values may return non-null
4444      * results.
4445      *
4446      * @exception IllegalArgumentException thrown if property or
4447      * nameChoice are invalid.
4448      *
4449      * @see UProperty
4450      * @see UProperty.NameChoice
4451      */
getPropertyName(int property, int nameChoice)4452     public static String getPropertyName(int property,
4453             int nameChoice) {
4454         return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice);
4455     }
4456 
4457     /**
4458      * <strong>[icu]</strong> Return the UProperty selector for a given property name, as
4459      * specified in the Unicode database file PropertyAliases.txt.
4460      * Short, long, and any other variants are recognized.
4461      *
4462      * In addition, this function maps the synthetic names "gcm" /
4463      * "General_Category_Mask" to the property
4464      * UProperty.GENERAL_CATEGORY_MASK.  These names are not in
4465      * PropertyAliases.txt.
4466      *
4467      * @param propertyAlias the property name to be matched.  The name
4468      * is compared using "loose matching" as described in
4469      * PropertyAliases.txt.
4470      *
4471      * @return a UProperty enum.
4472      *
4473      * @exception IllegalArgumentException thrown if propertyAlias
4474      * is not recognized.
4475      *
4476      * @see UProperty
4477      */
getPropertyEnum(CharSequence propertyAlias)4478     public static int getPropertyEnum(CharSequence propertyAlias) {
4479         int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias);
4480         if (propEnum == UProperty.UNDEFINED) {
4481             throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias);
4482         }
4483         return propEnum;
4484     }
4485 
4486     /**
4487      * <strong>[icu]</strong> Return the Unicode name for a given property value, as given in
4488      * the Unicode database file PropertyValueAliases.txt.  Most
4489      * values have more than one name.  The nameChoice determines
4490      * which one is returned.
4491      *
4492      * Note: Some of the names in PropertyValueAliases.txt can only be
4493      * retrieved using UProperty.GENERAL_CATEGORY_MASK, not
4494      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
4495      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
4496      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
4497      *
4498      * @param property UProperty selector constant.
4499      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
4500      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
4501      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
4502      * If out of range, null is returned.
4503      *
4504      * @param value selector for a value for the given property.  In
4505      * general, valid values range from 0 up to some maximum.  There
4506      * are a few exceptions: (1.) UProperty.BLOCK values begin at the
4507      * non-zero value BASIC_LATIN.getID().  (2.)
4508      * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous
4509      * and range from 0..240.  (3.)  UProperty.GENERAL_CATEGORY_MASK values
4510      * are mask values produced by left-shifting 1 by
4511      * UCharacter.getType().  This allows grouped categories such as
4512      * [:L:] to be represented.  Mask values are non-contiguous.
4513      *
4514      * @param nameChoice UProperty.NameChoice selector for which name
4515      * to get.  All values have a long name.  Most have a short name,
4516      * but some do not.  Unicode allows for additional names; if
4517      * present these will be returned by UProperty.NameChoice.LONG + i,
4518      * where i=1, 2,...
4519      *
4520      * @return a name, or null if Unicode explicitly defines no name
4521      * ("n/a") for a given property/value/nameChoice.  If a given
4522      * nameChoice throws an exception, then all larger values of
4523      * nameChoice will throw an exception.  If null is returned for a
4524      * given nameChoice, then other nameChoice values may return
4525      * non-null results.
4526      *
4527      * @exception IllegalArgumentException thrown if property, value,
4528      * or nameChoice are invalid.
4529      *
4530      * @see UProperty
4531      * @see UProperty.NameChoice
4532      */
getPropertyValueName(int property, int value, int nameChoice)4533     public static String getPropertyValueName(int property,
4534             int value,
4535             int nameChoice)
4536     {
4537         if ((property == UProperty.CANONICAL_COMBINING_CLASS
4538                 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS
4539                 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS)
4540                 && value >= UCharacter.getIntPropertyMinValue(
4541                         UProperty.CANONICAL_COMBINING_CLASS)
4542                         && value <= UCharacter.getIntPropertyMaxValue(
4543                                 UProperty.CANONICAL_COMBINING_CLASS)
4544                                 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) {
4545             // this is hard coded for the valid cc
4546             // because PropertyValueAliases.txt does not contain all of them
4547             try {
4548                 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value,
4549                         nameChoice);
4550             }
4551             catch (IllegalArgumentException e) {
4552                 return null;
4553             }
4554         }
4555         return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice);
4556     }
4557 
4558     /**
4559      * <strong>[icu]</strong> Return the property value integer for a given value name, as
4560      * specified in the Unicode database file PropertyValueAliases.txt.
4561      * Short, long, and any other variants are recognized.
4562      *
4563      * Note: Some of the names in PropertyValueAliases.txt will only be
4564      * recognized with UProperty.GENERAL_CATEGORY_MASK, not
4565      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
4566      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
4567      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
4568      *
4569      * @param property UProperty selector constant.
4570      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
4571      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
4572      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
4573      * Only these properties can be enumerated.
4574      *
4575      * @param valueAlias the value name to be matched.  The name is
4576      * compared using "loose matching" as described in
4577      * PropertyValueAliases.txt.
4578      *
4579      * @return a value integer.  Note: UProperty.GENERAL_CATEGORY
4580      * values are mask values produced by left-shifting 1 by
4581      * UCharacter.getType().  This allows grouped categories such as
4582      * [:L:] to be represented.
4583      *
4584      * @see UProperty
4585      * @throws IllegalArgumentException if property is not a valid UProperty
4586      *         selector or valueAlias is not a value of this property
4587      */
getPropertyValueEnum(int property, CharSequence valueAlias)4588     public static int getPropertyValueEnum(int property, CharSequence valueAlias) {
4589         int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias);
4590         if (propEnum == UProperty.UNDEFINED) {
4591             throw new IllegalIcuArgumentException("Invalid name: " + valueAlias);
4592         }
4593         return propEnum;
4594     }
4595 
4596     /**
4597      * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED.
4598      * @param property  Same as {@link #getPropertyValueEnum(int, CharSequence)}
4599      * @param valueAlias    Same as {@link #getPropertyValueEnum(int, CharSequence)}
4600      * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value.
4601      * @deprecated This API is ICU internal only.
4602      * @hide deprecated on icu4j-org
4603      * @hide draft / provisional / internal are hidden on OHOS
4604      */
4605     @Deprecated
getPropertyValueEnumNoThrow(int property, CharSequence valueAlias)4606     public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) {
4607         return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias);
4608     }
4609 
4610 
4611     /**
4612      * <strong>[icu]</strong> Returns a code point corresponding to the two surrogate code units.
4613      *
4614      * @param lead the lead char
4615      * @param trail the trail char
4616      * @return code point if surrogate characters are valid.
4617      * @exception IllegalArgumentException thrown when the code units do
4618      *            not form a valid code point
4619      */
getCodePoint(char lead, char trail)4620     public static int getCodePoint(char lead, char trail)
4621     {
4622         if (Character.isSurrogatePair(lead, trail)) {
4623             return Character.toCodePoint(lead, trail);
4624         }
4625         throw new IllegalArgumentException("Illegal surrogate characters");
4626     }
4627 
4628     /**
4629      * <strong>[icu]</strong> Returns the code point corresponding to the BMP code point.
4630      *
4631      * @param char16 the BMP code point
4632      * @return code point if argument is a valid character.
4633      * @exception IllegalArgumentException thrown when char16 is not a valid
4634      *            code point
4635      */
getCodePoint(char char16)4636     public static int getCodePoint(char char16)
4637     {
4638         if (UCharacter.isLegal(char16)) {
4639             return char16;
4640         }
4641         throw new IllegalArgumentException("Illegal codepoint");
4642     }
4643 
4644     /**
4645      * Returns the uppercase version of the argument string.
4646      * Casing is dependent on the default locale and context-sensitive.
4647      * @param str source string to be performed on
4648      * @return uppercase version of the argument string
4649      */
toUpperCase(String str)4650     public static String toUpperCase(String str)
4651     {
4652         return CaseMapImpl.toUpper(getDefaultCaseLocale(), 0, str);
4653     }
4654 
4655     /**
4656      * Returns the lowercase version of the argument string.
4657      * Casing is dependent on the default locale and context-sensitive
4658      * @param str source string to be performed on
4659      * @return lowercase version of the argument string
4660      */
toLowerCase(String str)4661     public static String toLowerCase(String str)
4662     {
4663         return CaseMapImpl.toLower(getDefaultCaseLocale(), 0, str);
4664     }
4665 
4666     /**
4667      * <p>Returns the titlecase version of the argument string.
4668      * <p>Position for titlecasing is determined by the argument break
4669      * iterator, hence the user can customize his break iterator for
4670      * a specialized titlecasing. In this case only the forward iteration
4671      * needs to be implemented.
4672      * If the break iterator passed in is null, the default Unicode algorithm
4673      * will be used to determine the titlecase positions.
4674      *
4675      * <p>Only positions returned by the break iterator will be title cased,
4676      * character in between the positions will all be in lower case.
4677      * <p>Casing is dependent on the default locale and context-sensitive
4678      * @param str source string to be performed on
4679      * @param breakiter break iterator to determine the positions in which
4680      *        the character should be title cased.
4681      * @return titlecase version of the argument string
4682      */
toTitleCase(String str, BreakIterator breakiter)4683     public static String toTitleCase(String str, BreakIterator breakiter)
4684     {
4685         return toTitleCase(Locale.getDefault(), str, breakiter, 0);
4686     }
4687 
getDefaultCaseLocale()4688     private static int getDefaultCaseLocale() {
4689         return UCaseProps.getCaseLocale(Locale.getDefault());
4690     }
4691 
getCaseLocale(Locale locale)4692     private static int getCaseLocale(Locale locale) {
4693         if (locale == null) {
4694             locale = Locale.getDefault();
4695         }
4696         return UCaseProps.getCaseLocale(locale);
4697     }
4698 
getCaseLocale(ULocale locale)4699     private static int getCaseLocale(ULocale locale) {
4700         if (locale == null) {
4701             locale = ULocale.getDefault();
4702         }
4703         return UCaseProps.getCaseLocale(locale);
4704     }
4705 
4706     /**
4707      * Returns the uppercase version of the argument string.
4708      * Casing is dependent on the argument locale and context-sensitive.
4709      * @param locale which string is to be converted in
4710      * @param str source string to be performed on
4711      * @return uppercase version of the argument string
4712      */
toUpperCase(Locale locale, String str)4713     public static String toUpperCase(Locale locale, String str)
4714     {
4715         return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str);
4716     }
4717 
4718     /**
4719      * Returns the uppercase version of the argument string.
4720      * Casing is dependent on the argument locale and context-sensitive.
4721      * @param locale which string is to be converted in
4722      * @param str source string to be performed on
4723      * @return uppercase version of the argument string
4724      */
toUpperCase(ULocale locale, String str)4725     public static String toUpperCase(ULocale locale, String str) {
4726         return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str);
4727     }
4728 
4729     /**
4730      * Returns the lowercase version of the argument string.
4731      * Casing is dependent on the argument locale and context-sensitive
4732      * @param locale which string is to be converted in
4733      * @param str source string to be performed on
4734      * @return lowercase version of the argument string
4735      */
toLowerCase(Locale locale, String str)4736     public static String toLowerCase(Locale locale, String str)
4737     {
4738         return CaseMapImpl.toLower(getCaseLocale(locale), 0, str);
4739     }
4740 
4741     /**
4742      * Returns the lowercase version of the argument string.
4743      * Casing is dependent on the argument locale and context-sensitive
4744      * @param locale which string is to be converted in
4745      * @param str source string to be performed on
4746      * @return lowercase version of the argument string
4747      */
toLowerCase(ULocale locale, String str)4748     public static String toLowerCase(ULocale locale, String str) {
4749         return CaseMapImpl.toLower(getCaseLocale(locale), 0, str);
4750     }
4751 
4752     /**
4753      * <p>Returns the titlecase version of the argument string.
4754      * <p>Position for titlecasing is determined by the argument break
4755      * iterator, hence the user can customize his break iterator for
4756      * a specialized titlecasing. In this case only the forward iteration
4757      * needs to be implemented.
4758      * If the break iterator passed in is null, the default Unicode algorithm
4759      * will be used to determine the titlecase positions.
4760      *
4761      * <p>Only positions returned by the break iterator will be title cased,
4762      * character in between the positions will all be in lower case.
4763      * <p>Casing is dependent on the argument locale and context-sensitive
4764      * @param locale which string is to be converted in
4765      * @param str source string to be performed on
4766      * @param breakiter break iterator to determine the positions in which
4767      *        the character should be title cased.
4768      * @return titlecase version of the argument string
4769      */
toTitleCase(Locale locale, String str, BreakIterator breakiter)4770     public static String toTitleCase(Locale locale, String str,
4771             BreakIterator breakiter)
4772     {
4773         return toTitleCase(locale, str, breakiter, 0);
4774     }
4775 
4776     /**
4777      * <p>Returns the titlecase version of the argument string.
4778      * <p>Position for titlecasing is determined by the argument break
4779      * iterator, hence the user can customize his break iterator for
4780      * a specialized titlecasing. In this case only the forward iteration
4781      * needs to be implemented.
4782      * If the break iterator passed in is null, the default Unicode algorithm
4783      * will be used to determine the titlecase positions.
4784      *
4785      * <p>Only positions returned by the break iterator will be title cased,
4786      * character in between the positions will all be in lower case.
4787      * <p>Casing is dependent on the argument locale and context-sensitive
4788      * @param locale which string is to be converted in
4789      * @param str source string to be performed on
4790      * @param titleIter break iterator to determine the positions in which
4791      *        the character should be title cased.
4792      * @return titlecase version of the argument string
4793      */
toTitleCase(ULocale locale, String str, BreakIterator titleIter)4794     public static String toTitleCase(ULocale locale, String str,
4795             BreakIterator titleIter) {
4796         return toTitleCase(locale, str, titleIter, 0);
4797     }
4798 
4799     /**
4800      * <p>Returns the titlecase version of the argument string.
4801      * <p>Position for titlecasing is determined by the argument break
4802      * iterator, hence the user can customize his break iterator for
4803      * a specialized titlecasing. In this case only the forward iteration
4804      * needs to be implemented.
4805      * If the break iterator passed in is null, the default Unicode algorithm
4806      * will be used to determine the titlecase positions.
4807      *
4808      * <p>Only positions returned by the break iterator will be title cased,
4809      * character in between the positions will all be in lower case.
4810      * <p>Casing is dependent on the argument locale and context-sensitive
4811      * @param locale which string is to be converted in
4812      * @param str source string to be performed on
4813      * @param titleIter break iterator to determine the positions in which
4814      *        the character should be title cased.
4815      * @param options bit set to modify the titlecasing operation
4816      * @return titlecase version of the argument string
4817      * @see #TITLECASE_NO_LOWERCASE
4818      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
4819      */
toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)4820     public static String toTitleCase(ULocale locale, String str,
4821             BreakIterator titleIter, int options) {
4822         if (titleIter == null && locale == null) {
4823             locale = ULocale.getDefault();
4824         }
4825         titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter);
4826         titleIter.setText(str);
4827         return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
4828     }
4829 
4830     /**
4831      * <strong>[icu]</strong> <p>Returns the titlecase version of the argument string.
4832      * <p>Position for titlecasing is determined by the argument break
4833      * iterator, hence the user can customize his break iterator for
4834      * a specialized titlecasing. In this case only the forward iteration
4835      * needs to be implemented.
4836      * If the break iterator passed in is null, the default Unicode algorithm
4837      * will be used to determine the titlecase positions.
4838      *
4839      * <p>Only positions returned by the break iterator will be title cased,
4840      * character in between the positions will all be in lower case.
4841      * <p>Casing is dependent on the argument locale and context-sensitive
4842      * @param locale which string is to be converted in
4843      * @param str source string to be performed on
4844      * @param titleIter break iterator to determine the positions in which
4845      *        the character should be title cased.
4846      * @param options bit set to modify the titlecasing operation
4847      * @return titlecase version of the argument string
4848      * @see #TITLECASE_NO_LOWERCASE
4849      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
4850      */
toTitleCase(Locale locale, String str, BreakIterator titleIter, int options)4851     public static String toTitleCase(Locale locale, String str,
4852             BreakIterator titleIter,
4853             int options) {
4854         if (titleIter == null && locale == null) {
4855             locale = Locale.getDefault();
4856         }
4857         titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter);
4858         titleIter.setText(str);
4859         return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
4860     }
4861 
4862     /**
4863      * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according
4864      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
4865      * folding equivalent, the character itself is returned.
4866      *
4867      * <p>This function only returns the simple, single-code point case mapping.
4868      * Full case mappings should be used whenever possible because they produce
4869      * better results by working on whole strings.
4870      * They can map to a result string with a different length as appropriate.
4871      * Full case mappings are applied by the case mapping functions
4872      * that take String parameters rather than code points (int).
4873      * See also the User Guide chapter on C/POSIX migration:
4874      * http://www.icu-project.org/userguide/posix.html#case_mappings
4875      *
4876      * @param ch             the character to be converted
4877      * @param defaultmapping Indicates whether the default mappings defined in
4878      *                       CaseFolding.txt are to be used, otherwise the
4879      *                       mappings for dotted I and dotless i marked with
4880      *                       'T' in CaseFolding.txt are included.
4881      * @return               the case folding equivalent of the character, if
4882      *                       any; otherwise the character itself.
4883      * @see                  #foldCase(String, boolean)
4884      */
foldCase(int ch, boolean defaultmapping)4885     public static int foldCase(int ch, boolean defaultmapping) {
4886         return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
4887     }
4888 
4889     /**
4890      * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to
4891      * UnicodeData.txt and CaseFolding.txt; if any character has no case
4892      * folding equivalent, the character itself is returned.
4893      * "Full", multiple-code point case folding mappings are returned here.
4894      * For "simple" single-code point mappings use the API
4895      * foldCase(int ch, boolean defaultmapping).
4896      * @param str            the String to be converted
4897      * @param defaultmapping Indicates whether the default mappings defined in
4898      *                       CaseFolding.txt are to be used, otherwise the
4899      *                       mappings for dotted I and dotless i marked with
4900      *                       'T' in CaseFolding.txt are included.
4901      * @return               the case folding equivalent of the character, if
4902      *                       any; otherwise the character itself.
4903      * @see                  #foldCase(int, boolean)
4904      */
foldCase(String str, boolean defaultmapping)4905     public static String foldCase(String str, boolean defaultmapping) {
4906         return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
4907     }
4908 
4909     /**
4910      * <strong>[icu]</strong> Option value for case folding: use default mappings defined in
4911      * CaseFolding.txt.
4912      */
4913     public static final int FOLD_CASE_DEFAULT    =      0x0000;
4914     /**
4915      * <strong>[icu]</strong> Option value for case folding:
4916      * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
4917      * and dotless i appropriately for Turkic languages (tr, az).
4918      *
4919      * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
4920      * are to be included for default mappings and
4921      * excluded for the Turkic-specific mappings.
4922      *
4923      * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
4924      * are to be excluded for default mappings and
4925      * included for the Turkic-specific mappings.
4926      */
4927     public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001;
4928 
4929     /**
4930      * <strong>[icu]</strong> The given character is mapped to its case folding equivalent according
4931      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
4932      * folding equivalent, the character itself is returned.
4933      *
4934      * <p>This function only returns the simple, single-code point case mapping.
4935      * Full case mappings should be used whenever possible because they produce
4936      * better results by working on whole strings.
4937      * They can map to a result string with a different length as appropriate.
4938      * Full case mappings are applied by the case mapping functions
4939      * that take String parameters rather than code points (int).
4940      * See also the User Guide chapter on C/POSIX migration:
4941      * http://www.icu-project.org/userguide/posix.html#case_mappings
4942      *
4943      * @param ch the character to be converted
4944      * @param options A bit set for special processing. Currently the recognised options
4945      * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
4946      * @return the case folding equivalent of the character, if any; otherwise the
4947      * character itself.
4948      * @see #foldCase(String, boolean)
4949      */
foldCase(int ch, int options)4950     public static int foldCase(int ch, int options) {
4951         return UCaseProps.INSTANCE.fold(ch, options);
4952     }
4953 
4954     /**
4955      * <strong>[icu]</strong> The given string is mapped to its case folding equivalent according to
4956      * UnicodeData.txt and CaseFolding.txt; if any character has no case
4957      * folding equivalent, the character itself is returned.
4958      * "Full", multiple-code point case folding mappings are returned here.
4959      * For "simple" single-code point mappings use the API
4960      * foldCase(int ch, boolean defaultmapping).
4961      * @param str the String to be converted
4962      * @param options A bit set for special processing. Currently the recognised options
4963      *                are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
4964      * @return the case folding equivalent of the character, if any; otherwise the
4965      *         character itself.
4966      * @see #foldCase(int, boolean)
4967      */
foldCase(String str, int options)4968     public static final String foldCase(String str, int options) {
4969         return CaseMapImpl.fold(options, str);
4970     }
4971 
4972     /**
4973      * <strong>[icu]</strong> Returns the numeric value of a Han character.
4974      *
4975      * <p>This returns the value of Han 'numeric' code points,
4976      * including those for zero, ten, hundred, thousand, ten thousand,
4977      * and hundred million.
4978      * This includes both the standard and 'checkwriting'
4979      * characters, the 'big circle' zero character, and the standard
4980      * zero character.
4981      *
4982      * <p>Note: The Unicode Standard has numeric values for more
4983      * Han characters recognized by this method
4984      * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt),
4985      * and a {@link ohos.global.icu.text.NumberFormat} can be used with
4986      * a Chinese {@link ohos.global.icu.text.NumberingSystem}.
4987      *
4988      * @param ch code point to query
4989      * @return value if it is a Han 'numeric character,' otherwise return -1.
4990      */
getHanNumericValue(int ch)4991     public static int getHanNumericValue(int ch)
4992     {
4993         switch(ch)
4994         {
4995         case IDEOGRAPHIC_NUMBER_ZERO_ :
4996         case CJK_IDEOGRAPH_COMPLEX_ZERO_ :
4997             return 0; // Han Zero
4998         case CJK_IDEOGRAPH_FIRST_ :
4999         case CJK_IDEOGRAPH_COMPLEX_ONE_ :
5000             return 1; // Han One
5001         case CJK_IDEOGRAPH_SECOND_ :
5002         case CJK_IDEOGRAPH_COMPLEX_TWO_ :
5003             return 2; // Han Two
5004         case CJK_IDEOGRAPH_THIRD_ :
5005         case CJK_IDEOGRAPH_COMPLEX_THREE_ :
5006             return 3; // Han Three
5007         case CJK_IDEOGRAPH_FOURTH_ :
5008         case CJK_IDEOGRAPH_COMPLEX_FOUR_ :
5009             return 4; // Han Four
5010         case CJK_IDEOGRAPH_FIFTH_ :
5011         case CJK_IDEOGRAPH_COMPLEX_FIVE_ :
5012             return 5; // Han Five
5013         case CJK_IDEOGRAPH_SIXTH_ :
5014         case CJK_IDEOGRAPH_COMPLEX_SIX_ :
5015             return 6; // Han Six
5016         case CJK_IDEOGRAPH_SEVENTH_ :
5017         case CJK_IDEOGRAPH_COMPLEX_SEVEN_ :
5018             return 7; // Han Seven
5019         case CJK_IDEOGRAPH_EIGHTH_ :
5020         case CJK_IDEOGRAPH_COMPLEX_EIGHT_ :
5021             return 8; // Han Eight
5022         case CJK_IDEOGRAPH_NINETH_ :
5023         case CJK_IDEOGRAPH_COMPLEX_NINE_ :
5024             return 9; // Han Nine
5025         case CJK_IDEOGRAPH_TEN_ :
5026         case CJK_IDEOGRAPH_COMPLEX_TEN_ :
5027             return 10;
5028         case CJK_IDEOGRAPH_HUNDRED_ :
5029         case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ :
5030             return 100;
5031         case CJK_IDEOGRAPH_THOUSAND_ :
5032         case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ :
5033             return 1000;
5034         case CJK_IDEOGRAPH_TEN_THOUSAND_ :
5035             return 10000;
5036         case CJK_IDEOGRAPH_HUNDRED_MILLION_ :
5037             return 100000000;
5038         }
5039         return -1; // no value
5040     }
5041 
5042     /**
5043      * <strong>[icu]</strong> <p>Returns an iterator for character types, iterating over codepoints.
5044      * <p>Example of use:<br>
5045      * <pre>
5046      * RangeValueIterator iterator = UCharacter.getTypeIterator();
5047      * RangeValueIterator.Element element = new RangeValueIterator.Element();
5048      * while (iterator.next(element)) {
5049      *     System.out.println("Codepoint \\u" +
5050      *                        Integer.toHexString(element.start) +
5051      *                        " to codepoint \\u" +
5052      *                        Integer.toHexString(element.limit - 1) +
5053      *                        " has the character type " +
5054      *                        element.value);
5055      * }
5056      * </pre>
5057      * @return an iterator
5058      */
getTypeIterator()5059     public static RangeValueIterator getTypeIterator()
5060     {
5061         return new UCharacterTypeIterator();
5062     }
5063 
5064     private static final class UCharacterTypeIterator implements RangeValueIterator {
UCharacterTypeIterator()5065         UCharacterTypeIterator() {
5066             reset();
5067         }
5068 
5069         // implements RangeValueIterator
5070         @Override
next(Element element)5071         public boolean next(Element element) {
5072             if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
5073                 element.start=range.startCodePoint;
5074                 element.limit=range.endCodePoint+1;
5075                 element.value=range.value;
5076                 return true;
5077             } else {
5078                 return false;
5079             }
5080         }
5081 
5082         // implements RangeValueIterator
5083         @Override
reset()5084         public void reset() {
5085             trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE);
5086         }
5087 
5088         private Iterator<Trie2.Range> trieIterator;
5089         private Trie2.Range range;
5090 
5091         private static final class MaskType implements Trie2.ValueMapper {
5092             // Extracts the general category ("character type") from the trie value.
5093             @Override
map(int value)5094             public int map(int value) {
5095                 return value & UCharacterProperty.TYPE_MASK;
5096             }
5097         }
5098         private static final MaskType MASK_TYPE=new MaskType();
5099     }
5100 
5101     /**
5102      * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints.
5103      * <p>This API only gets the iterator for the modern, most up-to-date
5104      * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or
5105      * for extended names use getExtendedNameIterator().
5106      * <p>Example of use:<br>
5107      * <pre>
5108      * ValueIterator iterator = UCharacter.getNameIterator();
5109      * ValueIterator.Element element = new ValueIterator.Element();
5110      * while (iterator.next(element)) {
5111      *     System.out.println("Codepoint \\u" +
5112      *                        Integer.toHexString(element.codepoint) +
5113      *                        " has the name " + (String)element.value);
5114      * }
5115      * </pre>
5116      * <p>The maximal range which the name iterator iterates is from
5117      * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.
5118      * @return an iterator
5119      */
getNameIterator()5120     public static ValueIterator getNameIterator(){
5121         return new UCharacterNameIterator(UCharacterName.INSTANCE,
5122                 UCharacterNameChoice.UNICODE_CHAR_NAME);
5123     }
5124 
5125     /**
5126      * <strong>[icu]</strong> Returns an empty iterator.
5127      * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints.
5128      * @return an empty iterator
5129      * @deprecated ICU 49
5130      * @see #getName1_0(int)
5131      * @hide deprecated on icu4j-org
5132      */
5133     @Deprecated
getName1_0Iterator()5134     public static ValueIterator getName1_0Iterator(){
5135         return new DummyValueIterator();
5136     }
5137 
5138     private static final class DummyValueIterator implements ValueIterator {
5139         @Override
next(Element element)5140         public boolean next(Element element) { return false; }
5141         @Override
reset()5142         public void reset() {}
5143         @Override
setRange(int start, int limit)5144         public void setRange(int start, int limit) {}
5145     }
5146 
5147     /**
5148      * <strong>[icu]</strong> <p>Returns an iterator for character names, iterating over codepoints.
5149      * <p>This API only gets the iterator for the extended names.
5150      * For modern, most up-to-date Unicode names use getNameIterator() or
5151      * for older 1.0 Unicode names use get1_0NameIterator().
5152      * <p>Example of use:<br>
5153      * <pre>
5154      * ValueIterator iterator = UCharacter.getExtendedNameIterator();
5155      * ValueIterator.Element element = new ValueIterator.Element();
5156      * while (iterator.next(element)) {
5157      *     System.out.println("Codepoint \\u" +
5158      *                        Integer.toHexString(element.codepoint) +
5159      *                        " has the name " + (String)element.value);
5160      * }
5161      * </pre>
5162      * <p>The maximal range which the name iterator iterates is from
5163      * @return an iterator
5164      */
getExtendedNameIterator()5165     public static ValueIterator getExtendedNameIterator(){
5166         return new UCharacterNameIterator(UCharacterName.INSTANCE,
5167                 UCharacterNameChoice.EXTENDED_CHAR_NAME);
5168     }
5169 
5170     /**
5171      * <strong>[icu]</strong> Returns the "age" of the code point.
5172      * <p>The "age" is the Unicode version when the code point was first
5173      * designated (as a non-character or for Private Use) or assigned a
5174      * character.
5175      * <p>This can be useful to avoid emitting code points to receiving
5176      * processes that do not accept newer characters.
5177      * <p>The data is from the UCD file DerivedAge.txt.
5178      * @param ch The code point.
5179      * @return the Unicode version number
5180      */
getAge(int ch)5181     public static VersionInfo getAge(int ch)
5182     {
5183         if (ch < MIN_VALUE || ch > MAX_VALUE) {
5184             throw new IllegalArgumentException("Codepoint out of bounds");
5185         }
5186         return UCharacterProperty.INSTANCE.getAge(ch);
5187     }
5188 
5189     /**
5190      * <strong>[icu]</strong> Check a binary Unicode property for a code point.
5191      * <p>Unicode, especially in version 3.2, defines many more properties
5192      * than the original set in UnicodeData.txt.
5193      * <p>This API is intended to reflect Unicode properties as defined in
5194      * the Unicode Character Database (UCD) and Unicode Technical Reports
5195      * (UTR).
5196      * <p>For details about the properties see
5197      * <a href=http://www.unicode.org/>http://www.unicode.org/</a>.
5198      * <p>For names of Unicode properties see the UCD file
5199      * PropertyAliases.txt.
5200      * <p>This API does not check the validity of the codepoint.
5201      * <p>Important: If ICU is built with UCD files from Unicode versions
5202      * below 3.2, then properties marked with "new" are not or
5203      * not fully available.
5204      * @param ch code point to test.
5205      * @param property selector constant from ohos.global.icu.lang.UProperty,
5206      *        identifies which binary property to check.
5207      * @return true or false according to the binary Unicode property value
5208      *         for ch. Also false if property is out of bounds or if the
5209      *         Unicode version does not have data for the property at all, or
5210      *         not for this code point.
5211      * @see ohos.global.icu.lang.UProperty
5212      * @see CharacterProperties#getBinaryPropertySet(int)
5213      */
hasBinaryProperty(int ch, int property)5214     public static boolean hasBinaryProperty(int ch, int property)
5215     {
5216         return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property);
5217     }
5218 
5219     /**
5220      * <strong>[icu]</strong> <p>Check if a code point has the Alphabetic Unicode property.
5221      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).
5222      * <p>Different from UCharacter.isLetter(ch)!
5223      * @param ch codepoint to be tested
5224      */
isUAlphabetic(int ch)5225     public static boolean isUAlphabetic(int ch)
5226     {
5227         return hasBinaryProperty(ch, UProperty.ALPHABETIC);
5228     }
5229 
5230     /**
5231      * <strong>[icu]</strong> <p>Check if a code point has the Lowercase Unicode property.
5232      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).
5233      * <p>This is different from UCharacter.isLowerCase(ch)!
5234      * @param ch codepoint to be tested
5235      */
isULowercase(int ch)5236     public static boolean isULowercase(int ch)
5237     {
5238         return hasBinaryProperty(ch, UProperty.LOWERCASE);
5239     }
5240 
5241     /**
5242      * <strong>[icu]</strong> <p>Check if a code point has the Uppercase Unicode property.
5243      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).
5244      * <p>This is different from UCharacter.isUpperCase(ch)!
5245      * @param ch codepoint to be tested
5246      */
isUUppercase(int ch)5247     public static boolean isUUppercase(int ch)
5248     {
5249         return hasBinaryProperty(ch, UProperty.UPPERCASE);
5250     }
5251 
5252     /**
5253      * <strong>[icu]</strong> <p>Check if a code point has the White_Space Unicode property.
5254      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).
5255      * <p>This is different from both UCharacter.isSpace(ch) and
5256      * UCharacter.isWhitespace(ch)!
5257      * @param ch codepoint to be tested
5258      */
isUWhiteSpace(int ch)5259     public static boolean isUWhiteSpace(int ch)
5260     {
5261         return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
5262     }
5263 
5264     /**
5265      * <strong>[icu]</strong> Returns the property value for a Unicode property type of a code point.
5266      * Also returns binary and mask property values.
5267      * <p>Unicode, especially in version 3.2, defines many more properties than
5268      * the original set in UnicodeData.txt.
5269      * <p>The properties APIs are intended to reflect Unicode properties as
5270      * defined in the Unicode Character Database (UCD) and Unicode Technical
5271      * Reports (UTR). For details about the properties see
5272      * http://www.unicode.org/.
5273      * <p>For names of Unicode properties see the UCD file PropertyAliases.txt.
5274      *
5275      * <pre>
5276      * Sample usage:
5277      * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH);
5278      * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC);
5279      * boolean b = (ideo == 1) ? true : false;
5280      * </pre>
5281      * @param ch code point to test.
5282      * @param type UProperty selector constant, identifies which binary
5283      *        property to check. Must be
5284      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5285      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT or
5286      *        UProperty.MASK_START &lt;= type &lt; UProperty.MASK_LIMIT.
5287      * @return numeric value that is directly the property value or,
5288      *         for enumerated properties, corresponds to the numeric value of
5289      *         the enumerated constant of the respective property value type
5290      *         ({@link ECharacterCategory}, {@link ECharacterDirection},
5291      *         {@link DecompositionType}, etc.).
5292      *         Returns 0 or 1 (for false / true) for binary Unicode properties.
5293      *         Returns a bit-mask for mask properties.
5294      *         Returns 0 if 'type' is out of bounds or if the Unicode version
5295      *         does not have data for the property at all, or not for this code
5296      *         point.
5297      * @see UProperty
5298      * @see #hasBinaryProperty
5299      * @see #getIntPropertyMinValue
5300      * @see #getIntPropertyMaxValue
5301      * @see CharacterProperties#getIntPropertyMap(int)
5302      * @see #getUnicodeVersion
5303      */
getIntPropertyValue(int ch, int type)5304     public static int getIntPropertyValue(int ch, int type)
5305     {
5306         return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type);
5307     }
5308     /**
5309      * <strong>[icu]</strong> Returns a string version of the property value.
5310      * @param propertyEnum The property enum value.
5311      * @param codepoint The codepoint value.
5312      * @param nameChoice The choice of the name.
5313      * @return value as string
5314      * @deprecated This API is ICU internal only.
5315      * @hide deprecated on icu4j-org
5316      * @hide draft / provisional / internal are hidden on OHOS
5317      */
5318     @Deprecated
5319     ///CLOVER:OFF
getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice)5320     public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) {
5321         if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) ||
5322                 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) {
5323             return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum),
5324                     nameChoice);
5325         }
5326         if (propertyEnum == UProperty.NUMERIC_VALUE) {
5327             return String.valueOf(getUnicodeNumericValue(codepoint));
5328         }
5329         // otherwise must be string property
5330         switch (propertyEnum) {
5331         case UProperty.AGE: return getAge(codepoint).toString();
5332         case UProperty.ISO_COMMENT: return getISOComment(codepoint);
5333         case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint));
5334         case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true));
5335         case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
5336         case UProperty.NAME: return getName(codepoint);
5337         case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true));
5338         case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
5339         case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
5340         case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
5341         case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
5342         case UProperty.UNICODE_1_NAME: return getName1_0(codepoint);
5343         case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
5344         }
5345         throw new IllegalArgumentException("Illegal Property Enum");
5346     }
5347     ///CLOVER:ON
5348 
5349     /**
5350      * <strong>[icu]</strong> Returns the minimum value for an integer/binary Unicode property type.
5351      * Can be used together with UCharacter.getIntPropertyMaxValue(int)
5352      * to allocate arrays of ohos.global.icu.text.UnicodeSet or similar.
5353      * @param type UProperty selector constant, identifies which binary
5354      *        property to check. Must be
5355      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5356      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5357      * @return Minimum value returned by UCharacter.getIntPropertyValue(int)
5358      *         for a Unicode property. 0 if the property
5359      *         selector 'type' is out of range.
5360      * @see UProperty
5361      * @see #hasBinaryProperty
5362      * @see #getUnicodeVersion
5363      * @see #getIntPropertyMaxValue
5364      * @see #getIntPropertyValue
5365      */
getIntPropertyMinValue(int type)5366     public static int getIntPropertyMinValue(int type){
5367 
5368         return 0; // undefined; and: all other properties have a minimum value of 0
5369     }
5370 
5371 
5372     /**
5373      * <strong>[icu]</strong> Returns the maximum value for an integer/binary Unicode property.
5374      * Can be used together with UCharacter.getIntPropertyMinValue(int)
5375      * to allocate arrays of ohos.global.icu.text.UnicodeSet or similar.
5376      * Examples for min/max values (for Unicode 3.2):
5377      * <ul>
5378      * <li> UProperty.BIDI_CLASS:    0/18
5379      * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL)
5380      * <li> UProperty.SCRIPT:        0/45 (UScript.COMMON/UScript.TAGBANWA)
5381      * <li> UProperty.IDEOGRAPHIC:   0/1  (false/true)
5382      * </ul>
5383      * For undefined UProperty constant values, min/max values will be 0/-1.
5384      * @param type UProperty selector constant, identifies which binary
5385      *        property to check. Must be
5386      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5387      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5388      * @return Maximum value returned by u_getIntPropertyValue for a Unicode
5389      *         property. &lt;= 0 if the property selector 'type' is out of range.
5390      * @see UProperty
5391      * @see #hasBinaryProperty
5392      * @see #getUnicodeVersion
5393      * @see #getIntPropertyMaxValue
5394      * @see #getIntPropertyValue
5395      */
getIntPropertyMaxValue(int type)5396     public static int getIntPropertyMaxValue(int type)
5397     {
5398         return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type);
5399     }
5400 
5401     /**
5402      * Provide the java.lang.Character forDigit API, for convenience.
5403      */
forDigit(int digit, int radix)5404     public static char forDigit(int digit, int radix) {
5405         return java.lang.Character.forDigit(digit, radix);
5406     }
5407 
5408     // JDK 1.5 API coverage
5409 
5410     /**
5411      * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}.
5412      */
5413     public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE;
5414 
5415     /**
5416      * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}.
5417      */
5418     public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE;
5419 
5420     /**
5421      * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}.
5422      */
5423     public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE;
5424 
5425     /**
5426      * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}.
5427      */
5428     public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE;
5429 
5430     /**
5431      * Constant U+D800, same as {@link Character#MIN_SURROGATE}.
5432      */
5433     public static final char MIN_SURROGATE = Character.MIN_SURROGATE;
5434 
5435     /**
5436      * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}.
5437      */
5438     public static final char MAX_SURROGATE = Character.MAX_SURROGATE;
5439 
5440     /**
5441      * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
5442      */
5443     public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT;
5444 
5445     /**
5446      * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}.
5447      */
5448     public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT;
5449 
5450     /**
5451      * Constant U+0000, same as {@link Character#MIN_CODE_POINT}.
5452      */
5453     public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT;
5454 
5455     /**
5456      * Equivalent to {@link Character#isValidCodePoint}.
5457      *
5458      * @param cp the code point to check
5459      * @return true if cp is a valid code point
5460      */
isValidCodePoint(int cp)5461     public static final boolean isValidCodePoint(int cp) {
5462         return cp >= 0 && cp <= MAX_CODE_POINT;
5463     }
5464 
5465     /**
5466      * Same as {@link Character#isSupplementaryCodePoint}.
5467      *
5468      * @param cp the code point to check
5469      * @return true if cp is a supplementary code point
5470      */
isSupplementaryCodePoint(int cp)5471     public static final boolean isSupplementaryCodePoint(int cp) {
5472         return Character.isSupplementaryCodePoint(cp);
5473     }
5474 
5475     /**
5476      * Same as {@link Character#isHighSurrogate}.
5477      *
5478      * @param ch the char to check
5479      * @return true if ch is a high (lead) surrogate
5480      */
isHighSurrogate(char ch)5481     public static boolean isHighSurrogate(char ch) {
5482         return Character.isHighSurrogate(ch);
5483     }
5484 
5485     /**
5486      * Same as {@link Character#isLowSurrogate}.
5487      *
5488      * @param ch the char to check
5489      * @return true if ch is a low (trail) surrogate
5490      */
isLowSurrogate(char ch)5491     public static boolean isLowSurrogate(char ch) {
5492         return Character.isLowSurrogate(ch);
5493     }
5494 
5495     /**
5496      * Same as {@link Character#isSurrogatePair}.
5497      *
5498      * @param high the high (lead) char
5499      * @param low the low (trail) char
5500      * @return true if high, low form a surrogate pair
5501      */
isSurrogatePair(char high, char low)5502     public static final boolean isSurrogatePair(char high, char low) {
5503         return Character.isSurrogatePair(high, low);
5504     }
5505 
5506     /**
5507      * Same as {@link Character#charCount}.
5508      * Returns the number of chars needed to represent the code point (1 or 2).
5509      * This does not check the code point for validity.
5510      *
5511      * @param cp the code point to check
5512      * @return the number of chars needed to represent the code point
5513      */
charCount(int cp)5514     public static int charCount(int cp) {
5515         return Character.charCount(cp);
5516     }
5517 
5518     /**
5519      * Same as {@link Character#toCodePoint}.
5520      * Returns the code point represented by the two surrogate code units.
5521      * This does not check the surrogate pair for validity.
5522      *
5523      * @param high the high (lead) surrogate
5524      * @param low the low (trail) surrogate
5525      * @return the code point formed by the surrogate pair
5526      */
toCodePoint(char high, char low)5527     public static final int toCodePoint(char high, char low) {
5528         return Character.toCodePoint(high, low);
5529     }
5530 
5531     /**
5532      * Same as {@link Character#codePointAt(CharSequence, int)}.
5533      * Returns the code point at index.
5534      * This examines only the characters at index and index+1.
5535      *
5536      * @param seq the characters to check
5537      * @param index the index of the first or only char forming the code point
5538      * @return the code point at the index
5539      */
codePointAt(CharSequence seq, int index)5540     public static final int codePointAt(CharSequence seq, int index) {
5541         char c1 = seq.charAt(index++);
5542         if (isHighSurrogate(c1)) {
5543             if (index < seq.length()) {
5544                 char c2 = seq.charAt(index);
5545                 if (isLowSurrogate(c2)) {
5546                     return toCodePoint(c1, c2);
5547                 }
5548             }
5549         }
5550         return c1;
5551     }
5552 
5553     /**
5554      * Same as {@link Character#codePointAt(char[], int)}.
5555      * Returns the code point at index.
5556      * This examines only the characters at index and index+1.
5557      *
5558      * @param text the characters to check
5559      * @param index the index of the first or only char forming the code point
5560      * @return the code point at the index
5561      */
codePointAt(char[] text, int index)5562     public static final int codePointAt(char[] text, int index) {
5563         char c1 = text[index++];
5564         if (isHighSurrogate(c1)) {
5565             if (index < text.length) {
5566                 char c2 = text[index];
5567                 if (isLowSurrogate(c2)) {
5568                     return toCodePoint(c1, c2);
5569                 }
5570             }
5571         }
5572         return c1;
5573     }
5574 
5575     /**
5576      * Same as {@link Character#codePointAt(char[], int, int)}.
5577      * Returns the code point at index.
5578      * This examines only the characters at index and index+1.
5579      *
5580      * @param text the characters to check
5581      * @param index the index of the first or only char forming the code point
5582      * @param limit the limit of the valid text
5583      * @return the code point at the index
5584      */
codePointAt(char[] text, int index, int limit)5585     public static final int codePointAt(char[] text, int index, int limit) {
5586         if (index >= limit || limit > text.length) {
5587             throw new IndexOutOfBoundsException();
5588         }
5589         char c1 = text[index++];
5590         if (isHighSurrogate(c1)) {
5591             if (index < limit) {
5592                 char c2 = text[index];
5593                 if (isLowSurrogate(c2)) {
5594                     return toCodePoint(c1, c2);
5595                 }
5596             }
5597         }
5598         return c1;
5599     }
5600 
5601     /**
5602      * Same as {@link Character#codePointBefore(CharSequence, int)}.
5603      * Return the code point before index.
5604      * This examines only the characters at index-1 and index-2.
5605      *
5606      * @param seq the characters to check
5607      * @param index the index after the last or only char forming the code point
5608      * @return the code point before the index
5609      */
codePointBefore(CharSequence seq, int index)5610     public static final int codePointBefore(CharSequence seq, int index) {
5611         char c2 = seq.charAt(--index);
5612         if (isLowSurrogate(c2)) {
5613             if (index > 0) {
5614                 char c1 = seq.charAt(--index);
5615                 if (isHighSurrogate(c1)) {
5616                     return toCodePoint(c1, c2);
5617                 }
5618             }
5619         }
5620         return c2;
5621     }
5622 
5623     /**
5624      * Same as {@link Character#codePointBefore(char[], int)}.
5625      * Returns the code point before index.
5626      * This examines only the characters at index-1 and index-2.
5627      *
5628      * @param text the characters to check
5629      * @param index the index after the last or only char forming the code point
5630      * @return the code point before the index
5631      */
codePointBefore(char[] text, int index)5632     public static final int codePointBefore(char[] text, int index) {
5633         char c2 = text[--index];
5634         if (isLowSurrogate(c2)) {
5635             if (index > 0) {
5636                 char c1 = text[--index];
5637                 if (isHighSurrogate(c1)) {
5638                     return toCodePoint(c1, c2);
5639                 }
5640             }
5641         }
5642         return c2;
5643     }
5644 
5645     /**
5646      * Same as {@link Character#codePointBefore(char[], int, int)}.
5647      * Return the code point before index.
5648      * This examines only the characters at index-1 and index-2.
5649      *
5650      * @param text the characters to check
5651      * @param index the index after the last or only char forming the code point
5652      * @param limit the start of the valid text
5653      * @return the code point before the index
5654      */
codePointBefore(char[] text, int index, int limit)5655     public static final int codePointBefore(char[] text, int index, int limit) {
5656         if (index <= limit || limit < 0) {
5657             throw new IndexOutOfBoundsException();
5658         }
5659         char c2 = text[--index];
5660         if (isLowSurrogate(c2)) {
5661             if (index > limit) {
5662                 char c1 = text[--index];
5663                 if (isHighSurrogate(c1)) {
5664                     return toCodePoint(c1, c2);
5665                 }
5666             }
5667         }
5668         return c2;
5669     }
5670 
5671     /**
5672      * Same as {@link Character#toChars(int, char[], int)}.
5673      * Writes the chars representing the
5674      * code point into the destination at the given index.
5675      *
5676      * @param cp the code point to convert
5677      * @param dst the destination array into which to put the char(s) representing the code point
5678      * @param dstIndex the index at which to put the first (or only) char
5679      * @return the count of the number of chars written (1 or 2)
5680      * @throws IllegalArgumentException if cp is not a valid code point
5681      */
toChars(int cp, char[] dst, int dstIndex)5682     public static final int toChars(int cp, char[] dst, int dstIndex) {
5683         return Character.toChars(cp, dst, dstIndex);
5684     }
5685 
5686     /**
5687      * Same as {@link Character#toChars(int)}.
5688      * Returns a char array representing the code point.
5689      *
5690      * @param cp the code point to convert
5691      * @return an array containing the char(s) representing the code point
5692      * @throws IllegalArgumentException if cp is not a valid code point
5693      */
toChars(int cp)5694     public static final char[] toChars(int cp) {
5695         return Character.toChars(cp);
5696     }
5697 
5698     /**
5699      * Equivalent to the {@link Character#getDirectionality(char)} method, for
5700      * convenience. Returns a byte representing the directionality of the
5701      * character.
5702      *
5703      * <strong>[icu] Note:</strong> Unlike {@link Character#getDirectionality(char)}, this returns
5704      * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters.
5705      *
5706      * <strong>[icu] Note:</strong> The return value must be tested using the constants defined in {@link
5707      * UCharacterDirection} and its interface {@link
5708      * UCharacterEnums.ECharacterDirection} since the values are different from the ones
5709      * defined by <code>java.lang.Character</code>.
5710      * @param cp the code point to check
5711      * @return the directionality of the code point
5712      * @see #getDirection
5713      */
getDirectionality(int cp)5714     public static byte getDirectionality(int cp)
5715     {
5716         return (byte)getDirection(cp);
5717     }
5718 
5719     /**
5720      * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)}
5721      * method, for convenience.  Counts the number of code points in the range
5722      * of text.
5723      * @param text the characters to check
5724      * @param start the start of the range
5725      * @param limit the limit of the range
5726      * @return the number of code points in the range
5727      */
codePointCount(CharSequence text, int start, int limit)5728     public static int codePointCount(CharSequence text, int start, int limit) {
5729         if (start < 0 || limit < start || limit > text.length()) {
5730             throw new IndexOutOfBoundsException("start (" + start +
5731                     ") or limit (" + limit +
5732                     ") invalid or out of range 0, " + text.length());
5733         }
5734 
5735         int len = limit - start;
5736         while (limit > start) {
5737             char ch = text.charAt(--limit);
5738             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
5739                 ch = text.charAt(--limit);
5740                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
5741                     --len;
5742                     break;
5743                 }
5744             }
5745         }
5746         return len;
5747     }
5748 
5749     /**
5750      * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for
5751      * convenience. Counts the number of code points in the range of text.
5752      * @param text the characters to check
5753      * @param start the start of the range
5754      * @param limit the limit of the range
5755      * @return the number of code points in the range
5756      */
codePointCount(char[] text, int start, int limit)5757     public static int codePointCount(char[] text, int start, int limit) {
5758         if (start < 0 || limit < start || limit > text.length) {
5759             throw new IndexOutOfBoundsException("start (" + start +
5760                     ") or limit (" + limit +
5761                     ") invalid or out of range 0, " + text.length);
5762         }
5763 
5764         int len = limit - start;
5765         while (limit > start) {
5766             char ch = text[--limit];
5767             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
5768                 ch = text[--limit];
5769                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
5770                     --len;
5771                     break;
5772                 }
5773             }
5774         }
5775         return len;
5776     }
5777 
5778     /**
5779      * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)}
5780      * method, for convenience.  Adjusts the char index by a code point offset.
5781      * @param text the characters to check
5782      * @param index the index to adjust
5783      * @param codePointOffset the number of code points by which to offset the index
5784      * @return the adjusted index
5785      */
offsetByCodePoints(CharSequence text, int index, int codePointOffset)5786     public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) {
5787         if (index < 0 || index > text.length()) {
5788             throw new IndexOutOfBoundsException("index ( " + index +
5789                     ") out of range 0, " + text.length());
5790         }
5791 
5792         if (codePointOffset < 0) {
5793             while (++codePointOffset <= 0) {
5794                 char ch = text.charAt(--index);
5795                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) {
5796                     ch = text.charAt(--index);
5797                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
5798                         if (++codePointOffset > 0) {
5799                             return index+1;
5800                         }
5801                     }
5802                 }
5803             }
5804         } else {
5805             int limit = text.length();
5806             while (--codePointOffset >= 0) {
5807                 char ch = text.charAt(index++);
5808                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
5809                     ch = text.charAt(index++);
5810                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
5811                         if (--codePointOffset < 0) {
5812                             return index-1;
5813                         }
5814                     }
5815                 }
5816             }
5817         }
5818 
5819         return index;
5820     }
5821 
5822     /**
5823      * Equivalent to the
5824      * {@link Character#offsetByCodePoints(char[], int, int, int, int)}
5825      * method, for convenience.  Adjusts the char index by a code point offset.
5826      * @param text the characters to check
5827      * @param start the start of the range to check
5828      * @param count the length of the range to check
5829      * @param index the index to adjust
5830      * @param codePointOffset the number of code points by which to offset the index
5831      * @return the adjusted index
5832      */
offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset)5833     public static int offsetByCodePoints(char[] text, int start, int count, int index,
5834             int codePointOffset) {
5835         int limit = start + count;
5836         if (start < 0 || limit < start || limit > text.length || index < start || index > limit) {
5837             throw new IndexOutOfBoundsException("index ( " + index +
5838                     ") out of range " + start +
5839                     ", " + limit +
5840                     " in array 0, " + text.length);
5841         }
5842 
5843         if (codePointOffset < 0) {
5844             while (++codePointOffset <= 0) {
5845                 char ch = text[--index];
5846                 if (index < start) {
5847                     throw new IndexOutOfBoundsException("index ( " + index +
5848                             ") < start (" + start +
5849                             ")");
5850                 }
5851                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) {
5852                     ch = text[--index];
5853                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
5854                         if (++codePointOffset > 0) {
5855                             return index+1;
5856                         }
5857                     }
5858                 }
5859             }
5860         } else {
5861             while (--codePointOffset >= 0) {
5862                 char ch = text[index++];
5863                 if (index > limit) {
5864                     throw new IndexOutOfBoundsException("index ( " + index +
5865                             ") > limit (" + limit +
5866                             ")");
5867                 }
5868                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
5869                     ch = text[index++];
5870                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
5871                         if (--codePointOffset < 0) {
5872                             return index-1;
5873                         }
5874                     }
5875                 }
5876             }
5877         }
5878 
5879         return index;
5880     }
5881 
5882     // private variables -------------------------------------------------
5883 
5884     /**
5885      * To get the last character out from a data type
5886      */
5887     private static final int LAST_CHAR_MASK_ = 0xFFFF;
5888 
5889     //    /**
5890     //     * To get the last byte out from a data type
5891     //     */
5892     //    private static final int LAST_BYTE_MASK_ = 0xFF;
5893     //
5894     //    /**
5895     //     * Shift 16 bits
5896     //     */
5897     //    private static final int SHIFT_16_ = 16;
5898     //
5899     //    /**
5900     //     * Shift 24 bits
5901     //     */
5902     //    private static final int SHIFT_24_ = 24;
5903     //
5904     //    /**
5905     //     * Decimal radix
5906     //     */
5907     //    private static final int DECIMAL_RADIX_ = 10;
5908 
5909     /**
5910      * No break space code point
5911      */
5912     private static final int NO_BREAK_SPACE_ = 0xA0;
5913 
5914     /**
5915      * Figure space code point
5916      */
5917     private static final int FIGURE_SPACE_ = 0x2007;
5918 
5919     /**
5920      * Narrow no break space code point
5921      */
5922     private static final int NARROW_NO_BREAK_SPACE_ = 0x202F;
5923 
5924     /**
5925      * Ideographic number zero code point
5926      */
5927     private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007;
5928 
5929     /**
5930      * CJK Ideograph, First code point
5931      */
5932     private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00;
5933 
5934     /**
5935      * CJK Ideograph, Second code point
5936      */
5937     private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c;
5938 
5939     /**
5940      * CJK Ideograph, Third code point
5941      */
5942     private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09;
5943 
5944     /**
5945      * CJK Ideograph, Fourth code point
5946      */
5947     private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db;
5948 
5949     /**
5950      * CJK Ideograph, FIFTH code point
5951      */
5952     private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94;
5953 
5954     /**
5955      * CJK Ideograph, Sixth code point
5956      */
5957     private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d;
5958 
5959     /**
5960      * CJK Ideograph, Seventh code point
5961      */
5962     private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03;
5963 
5964     /**
5965      * CJK Ideograph, Eighth code point
5966      */
5967     private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b;
5968 
5969     /**
5970      * CJK Ideograph, Nineth code point
5971      */
5972     private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d;
5973 
5974     /**
5975      * Application Program command code point
5976      */
5977     private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F;
5978 
5979     /**
5980      * Unit separator code point
5981      */
5982     private static final int UNIT_SEPARATOR_ = 0x001F;
5983 
5984     /**
5985      * Delete code point
5986      */
5987     private static final int DELETE_ = 0x007F;
5988 
5989     /**
5990      * Han digit characters
5991      */
5992     private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_     = 0x96f6;
5993     private static final int CJK_IDEOGRAPH_COMPLEX_ONE_      = 0x58f9;
5994     private static final int CJK_IDEOGRAPH_COMPLEX_TWO_      = 0x8cb3;
5995     private static final int CJK_IDEOGRAPH_COMPLEX_THREE_    = 0x53c3;
5996     private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_     = 0x8086;
5997     private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_     = 0x4f0d;
5998     private static final int CJK_IDEOGRAPH_COMPLEX_SIX_      = 0x9678;
5999     private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_    = 0x67d2;
6000     private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_    = 0x634c;
6001     private static final int CJK_IDEOGRAPH_COMPLEX_NINE_     = 0x7396;
6002     private static final int CJK_IDEOGRAPH_TEN_              = 0x5341;
6003     private static final int CJK_IDEOGRAPH_COMPLEX_TEN_      = 0x62fe;
6004     private static final int CJK_IDEOGRAPH_HUNDRED_          = 0x767e;
6005     private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_  = 0x4f70;
6006     private static final int CJK_IDEOGRAPH_THOUSAND_         = 0x5343;
6007     private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf;
6008     private static final int CJK_IDEOGRAPH_TEN_THOUSAND_     = 0x824c;
6009     private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_  = 0x5104;
6010 
6011     // private constructor -----------------------------------------------
6012     ///CLOVER:OFF
6013     /**
6014      * Private constructor to prevent instantiation
6015      */
UCharacter()6016     private UCharacter()
6017     {
6018     }
6019     ///CLOVER:ON
6020 }
6021