• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /**
4  *******************************************************************************
5  * Copyright (C) 1996-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  */
9 
10 package com.ibm.icu.lang;
11 
12 import java.lang.ref.SoftReference;
13 import java.util.HashMap;
14 import java.util.Iterator;
15 import java.util.Locale;
16 import java.util.Map;
17 
18 import com.ibm.icu.impl.CaseMapImpl;
19 import com.ibm.icu.impl.IllegalIcuArgumentException;
20 import com.ibm.icu.impl.Trie2;
21 import com.ibm.icu.impl.UBiDiProps;
22 import com.ibm.icu.impl.UCaseProps;
23 import com.ibm.icu.impl.UCharacterName;
24 import com.ibm.icu.impl.UCharacterNameChoice;
25 import com.ibm.icu.impl.UCharacterProperty;
26 import com.ibm.icu.impl.UCharacterUtility;
27 import com.ibm.icu.impl.UPropertyAliases;
28 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
29 import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection;
30 import com.ibm.icu.text.BreakIterator;
31 import com.ibm.icu.text.Normalizer2;
32 import com.ibm.icu.util.RangeValueIterator;
33 import com.ibm.icu.util.ULocale;
34 import com.ibm.icu.util.ValueIterator;
35 import com.ibm.icu.util.VersionInfo;
36 
37 /**
38  * {@icuenhanced java.lang.Character}.{@icu _usage_}
39  *
40  * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class.
41  * These extensions provide support for more Unicode properties.
42  * Each ICU release supports the latest version of Unicode available at that time.
43  *
44  * <p>For some time before Java 5 added support for supplementary Unicode code points,
45  * The ICU UCharacter class and many other ICU classes already supported them.
46  * Some UCharacter methods and constants were widened slightly differently than
47  * how the Character class methods and constants were widened later.
48  * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF,
49  * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF.
50  *
51  * <p>Code points are represented in these API using ints. While it would be
52  * more convenient in Java to have a separate primitive datatype for them,
53  * ints suffice in the meantime.
54  *
55  * <p>To use this class please add the jar file name icu4j.jar to the
56  * class path, since it contains data files which supply the information used
57  * by this file.<br>
58  * E.g. In Windows <br>
59  * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br>
60  * Otherwise, another method would be to copy the files uprops.dat and
61  * unames.icu from the icu4j source subdirectory
62  * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory
63  * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>.
64  *
65  * <p>Aside from the additions for UTF-16 support, and the updated Unicode
66  * properties, the main differences between UCharacter and Character are:
67  * <ul>
68  * <li> UCharacter is not designed to be a char wrapper and does not have
69  *      APIs to which involves management of that single char.<br>
70  *      These include:
71  *      <ul>
72  *        <li> char charValue(),
73  *        <li> int compareTo(java.lang.Character, java.lang.Character), etc.
74  *      </ul>
75  * <li> UCharacter does not include Character APIs that are deprecated, nor
76  *      does it include the Java-specific character information, such as
77  *      boolean isJavaIdentifierPart(char ch).
78  * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric
79  *      values '10' - '35'. UCharacter also does this in digit and
80  *      getNumericValue, to adhere to the java semantics of these
81  *      methods.  New methods unicodeDigit, and
82  *      getUnicodeNumericValue do not treat the above code points
83  *      as having numeric values.  This is a semantic change from ICU4J 1.3.1.
84  * </ul>
85  * <p>
86  * Further detail on differences can be determined using the program
87  *        <a href=
88  * "http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
89  *        com.ibm.icu.dev.test.lang.UCharacterCompare</a>
90  * <p>
91  * In addition to Java compatibility functions, which calculate derived properties,
92  * this API provides low-level access to the Unicode Character Database.
93  * <p>
94  * Unicode assigns each code point (not just assigned character) values for
95  * many properties.
96  * Most of them are simple boolean flags, or constants from a small enumerated list.
97  * For some properties, values are strings or other relatively more complex types.
98  * <p>
99  * For more information see
100  * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a>
101  * (http://www.unicode.org/ucd/)
102  * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU
103  * User Guide chapter on Properties</a>
104  * (http://www.icu-project.org/userguide/properties.html).
105  * <p>
106  * There are also functions that provide easy migration from C/POSIX functions
107  * like isblank(). Their use is generally discouraged because the C/POSIX
108  * standards do not define their semantics beyond the ASCII range, which means
109  * that different implementations exhibit very different behavior.
110  * Instead, Unicode properties should be used directly.
111  * <p>
112  * There are also only a few, broad C/POSIX character classes, and they tend
113  * to be used for conflicting purposes. For example, the "isalpha()" class
114  * is sometimes used to determine word boundaries, while a more sophisticated
115  * approach would at least distinguish initial letters from continuation
116  * characters (the latter including combining marks).
117  * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
118  * Another example: There is no "istitle()" class for titlecase characters.
119  * <p>
120  * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
121  * ICU implements them according to the Standard Recommendations in
122  * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
123  * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
124  * <p>
125  * API access for C/POSIX character classes is as follows:
126  * <pre>{@code
127  * - alpha:     isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC)
128  * - lower:     isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE)
129  * - upper:     isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE)
130  * - punct:     ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)|
131  *               (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)|
132  *               (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0
133  * - digit:     isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER
134  * - xdigit:    hasBinaryProperty(c, UProperty.POSIX_XDIGIT)
135  * - alnum:     hasBinaryProperty(c, UProperty.POSIX_ALNUM)
136  * - space:     isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE)
137  * - blank:     hasBinaryProperty(c, UProperty.POSIX_BLANK)
138  * - cntrl:     getType(c)==CONTROL
139  * - graph:     hasBinaryProperty(c, UProperty.POSIX_GRAPH)
140  * - print:     hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre>
141  * <p>
142  * The C/POSIX character classes are also available in UnicodeSet patterns,
143  * using patterns like [:graph:] or \p{graph}.
144  *
145  * <p>{@icunote} There are several ICU (and Java) whitespace functions.
146  * Comparison:<ul>
147  * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
148  *       most of general categories "Z" (separators) + most whitespace ISO controls
149  *       (including no-break spaces, but excluding IS1..IS4)
150  * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
151  * <li> isSpaceChar: just Z (including no-break spaces)</ul>
152  *
153  * <p>
154  * This class is not subclassable.
155  *
156  * @author Syn Wee Quek
157  * @stable ICU 2.1
158  * @see com.ibm.icu.lang.UCharacterEnums
159  */
160 
161 public final class UCharacter implements ECharacterCategory, ECharacterDirection
162 {
163     // public inner classes ----------------------------------------------
164 
165     /**
166      * {@icuenhanced java.lang.Character.UnicodeBlock}.{@icu _usage_}
167      *
168      * A family of character subsets representing the character blocks in the
169      * Unicode specification, generated from Unicode Data file Blocks.txt.
170      * Character blocks generally define characters used for a specific script
171      * or purpose. A character is contained by at most one Unicode block.
172      *
173      * {@icunote} All fields named XXX_ID are specific to ICU.
174      *
175      * @stable ICU 2.4
176      */
177     public static final class UnicodeBlock extends Character.Subset
178     {
179         // block id corresponding to icu4c -----------------------------------
180 
181         /**
182          * @stable ICU 2.4
183          */
184         public static final int INVALID_CODE_ID = -1;
185         /**
186          * @stable ICU 2.4
187          */
188         public static final int BASIC_LATIN_ID = 1;
189         /**
190          * @stable ICU 2.4
191          */
192         public static final int LATIN_1_SUPPLEMENT_ID = 2;
193         /**
194          * @stable ICU 2.4
195          */
196         public static final int LATIN_EXTENDED_A_ID = 3;
197         /**
198          * @stable ICU 2.4
199          */
200         public static final int LATIN_EXTENDED_B_ID = 4;
201         /**
202          * @stable ICU 2.4
203          */
204         public static final int IPA_EXTENSIONS_ID = 5;
205         /**
206          * @stable ICU 2.4
207          */
208         public static final int SPACING_MODIFIER_LETTERS_ID = 6;
209         /**
210          * @stable ICU 2.4
211          */
212         public static final int COMBINING_DIACRITICAL_MARKS_ID = 7;
213         /**
214          * Unicode 3.2 renames this block to "Greek and Coptic".
215          * @stable ICU 2.4
216          */
217         public static final int GREEK_ID = 8;
218         /**
219          * @stable ICU 2.4
220          */
221         public static final int CYRILLIC_ID = 9;
222         /**
223          * @stable ICU 2.4
224          */
225         public static final int ARMENIAN_ID = 10;
226         /**
227          * @stable ICU 2.4
228          */
229         public static final int HEBREW_ID = 11;
230         /**
231          * @stable ICU 2.4
232          */
233         public static final int ARABIC_ID = 12;
234         /**
235          * @stable ICU 2.4
236          */
237         public static final int SYRIAC_ID = 13;
238         /**
239          * @stable ICU 2.4
240          */
241         public static final int THAANA_ID = 14;
242         /**
243          * @stable ICU 2.4
244          */
245         public static final int DEVANAGARI_ID = 15;
246         /**
247          * @stable ICU 2.4
248          */
249         public static final int BENGALI_ID = 16;
250         /**
251          * @stable ICU 2.4
252          */
253         public static final int GURMUKHI_ID = 17;
254         /**
255          * @stable ICU 2.4
256          */
257         public static final int GUJARATI_ID = 18;
258         /**
259          * @stable ICU 2.4
260          */
261         public static final int ORIYA_ID = 19;
262         /**
263          * @stable ICU 2.4
264          */
265         public static final int TAMIL_ID = 20;
266         /**
267          * @stable ICU 2.4
268          */
269         public static final int TELUGU_ID = 21;
270         /**
271          * @stable ICU 2.4
272          */
273         public static final int KANNADA_ID = 22;
274         /**
275          * @stable ICU 2.4
276          */
277         public static final int MALAYALAM_ID = 23;
278         /**
279          * @stable ICU 2.4
280          */
281         public static final int SINHALA_ID = 24;
282         /**
283          * @stable ICU 2.4
284          */
285         public static final int THAI_ID = 25;
286         /**
287          * @stable ICU 2.4
288          */
289         public static final int LAO_ID = 26;
290         /**
291          * @stable ICU 2.4
292          */
293         public static final int TIBETAN_ID = 27;
294         /**
295          * @stable ICU 2.4
296          */
297         public static final int MYANMAR_ID = 28;
298         /**
299          * @stable ICU 2.4
300          */
301         public static final int GEORGIAN_ID = 29;
302         /**
303          * @stable ICU 2.4
304          */
305         public static final int HANGUL_JAMO_ID = 30;
306         /**
307          * @stable ICU 2.4
308          */
309         public static final int ETHIOPIC_ID = 31;
310         /**
311          * @stable ICU 2.4
312          */
313         public static final int CHEROKEE_ID = 32;
314         /**
315          * @stable ICU 2.4
316          */
317         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33;
318         /**
319          * @stable ICU 2.4
320          */
321         public static final int OGHAM_ID = 34;
322         /**
323          * @stable ICU 2.4
324          */
325         public static final int RUNIC_ID = 35;
326         /**
327          * @stable ICU 2.4
328          */
329         public static final int KHMER_ID = 36;
330         /**
331          * @stable ICU 2.4
332          */
333         public static final int MONGOLIAN_ID = 37;
334         /**
335          * @stable ICU 2.4
336          */
337         public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38;
338         /**
339          * @stable ICU 2.4
340          */
341         public static final int GREEK_EXTENDED_ID = 39;
342         /**
343          * @stable ICU 2.4
344          */
345         public static final int GENERAL_PUNCTUATION_ID = 40;
346         /**
347          * @stable ICU 2.4
348          */
349         public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41;
350         /**
351          * @stable ICU 2.4
352          */
353         public static final int CURRENCY_SYMBOLS_ID = 42;
354         /**
355          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
356          * Symbols".
357          * @stable ICU 2.4
358          */
359         public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43;
360         /**
361          * @stable ICU 2.4
362          */
363         public static final int LETTERLIKE_SYMBOLS_ID = 44;
364         /**
365          * @stable ICU 2.4
366          */
367         public static final int NUMBER_FORMS_ID = 45;
368         /**
369          * @stable ICU 2.4
370          */
371         public static final int ARROWS_ID = 46;
372         /**
373          * @stable ICU 2.4
374          */
375         public static final int MATHEMATICAL_OPERATORS_ID = 47;
376         /**
377          * @stable ICU 2.4
378          */
379         public static final int MISCELLANEOUS_TECHNICAL_ID = 48;
380         /**
381          * @stable ICU 2.4
382          */
383         public static final int CONTROL_PICTURES_ID = 49;
384         /**
385          * @stable ICU 2.4
386          */
387         public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50;
388         /**
389          * @stable ICU 2.4
390          */
391         public static final int ENCLOSED_ALPHANUMERICS_ID = 51;
392         /**
393          * @stable ICU 2.4
394          */
395         public static final int BOX_DRAWING_ID = 52;
396         /**
397          * @stable ICU 2.4
398          */
399         public static final int BLOCK_ELEMENTS_ID = 53;
400         /**
401          * @stable ICU 2.4
402          */
403         public static final int GEOMETRIC_SHAPES_ID = 54;
404         /**
405          * @stable ICU 2.4
406          */
407         public static final int MISCELLANEOUS_SYMBOLS_ID = 55;
408         /**
409          * @stable ICU 2.4
410          */
411         public static final int DINGBATS_ID = 56;
412         /**
413          * @stable ICU 2.4
414          */
415         public static final int BRAILLE_PATTERNS_ID = 57;
416         /**
417          * @stable ICU 2.4
418          */
419         public static final int CJK_RADICALS_SUPPLEMENT_ID = 58;
420         /**
421          * @stable ICU 2.4
422          */
423         public static final int KANGXI_RADICALS_ID = 59;
424         /**
425          * @stable ICU 2.4
426          */
427         public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60;
428         /**
429          * @stable ICU 2.4
430          */
431         public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61;
432         /**
433          * @stable ICU 2.4
434          */
435         public static final int HIRAGANA_ID = 62;
436         /**
437          * @stable ICU 2.4
438          */
439         public static final int KATAKANA_ID = 63;
440         /**
441          * @stable ICU 2.4
442          */
443         public static final int BOPOMOFO_ID = 64;
444         /**
445          * @stable ICU 2.4
446          */
447         public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65;
448         /**
449          * @stable ICU 2.4
450          */
451         public static final int KANBUN_ID = 66;
452         /**
453          * @stable ICU 2.4
454          */
455         public static final int BOPOMOFO_EXTENDED_ID = 67;
456         /**
457          * @stable ICU 2.4
458          */
459         public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68;
460         /**
461          * @stable ICU 2.4
462          */
463         public static final int CJK_COMPATIBILITY_ID = 69;
464         /**
465          * @stable ICU 2.4
466          */
467         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70;
468         /**
469          * @stable ICU 2.4
470          */
471         public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71;
472         /**
473          * @stable ICU 2.4
474          */
475         public static final int YI_SYLLABLES_ID = 72;
476         /**
477          * @stable ICU 2.4
478          */
479         public static final int YI_RADICALS_ID = 73;
480         /**
481          * @stable ICU 2.4
482          */
483         public static final int HANGUL_SYLLABLES_ID = 74;
484         /**
485          * @stable ICU 2.4
486          */
487         public static final int HIGH_SURROGATES_ID = 75;
488         /**
489          * @stable ICU 2.4
490          */
491         public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76;
492         /**
493          * @stable ICU 2.4
494          */
495         public static final int LOW_SURROGATES_ID = 77;
496         /**
497          * Same as public static final int PRIVATE_USE.
498          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
499          * and multiple code point ranges had this block.
500          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
501          * and adds separate blocks for the supplementary PUAs.
502          * @stable ICU 2.4
503          */
504         public static final int PRIVATE_USE_AREA_ID = 78;
505         /**
506          * Same as public static final int PRIVATE_USE_AREA.
507          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
508          * and multiple code point ranges had this block.
509          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
510          * and adds separate blocks for the supplementary PUAs.
511          * @stable ICU 2.4
512          */
513         public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID;
514         /**
515          * @stable ICU 2.4
516          */
517         public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79;
518         /**
519          * @stable ICU 2.4
520          */
521         public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80;
522         /**
523          * @stable ICU 2.4
524          */
525         public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81;
526         /**
527          * @stable ICU 2.4
528          */
529         public static final int COMBINING_HALF_MARKS_ID = 82;
530         /**
531          * @stable ICU 2.4
532          */
533         public static final int CJK_COMPATIBILITY_FORMS_ID = 83;
534         /**
535          * @stable ICU 2.4
536          */
537         public static final int SMALL_FORM_VARIANTS_ID = 84;
538         /**
539          * @stable ICU 2.4
540          */
541         public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85;
542         /**
543          * @stable ICU 2.4
544          */
545         public static final int SPECIALS_ID = 86;
546         /**
547          * @stable ICU 2.4
548          */
549         public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87;
550         /**
551          * @stable ICU 2.4
552          */
553         public static final int OLD_ITALIC_ID = 88;
554         /**
555          * @stable ICU 2.4
556          */
557         public static final int GOTHIC_ID = 89;
558         /**
559          * @stable ICU 2.4
560          */
561         public static final int DESERET_ID = 90;
562         /**
563          * @stable ICU 2.4
564          */
565         public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91;
566         /**
567          * @stable ICU 2.4
568          */
569         public static final int MUSICAL_SYMBOLS_ID = 92;
570         /**
571          * @stable ICU 2.4
572          */
573         public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93;
574         /**
575          * @stable ICU 2.4
576          */
577         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94;
578         /**
579          * @stable ICU 2.4
580          */
581         public static final int
582         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95;
583         /**
584          * @stable ICU 2.4
585          */
586         public static final int TAGS_ID = 96;
587 
588         // New blocks in Unicode 3.2
589 
590         /**
591          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
592          * @stable ICU 2.4
593          */
594         public static final int CYRILLIC_SUPPLEMENTARY_ID = 97;
595         /**
596          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
597          * @stable ICU 3.0
598          */
599 
600         public static final int CYRILLIC_SUPPLEMENT_ID = 97;
601         /**
602          * @stable ICU 2.4
603          */
604         public static final int TAGALOG_ID = 98;
605         /**
606          * @stable ICU 2.4
607          */
608         public static final int HANUNOO_ID = 99;
609         /**
610          * @stable ICU 2.4
611          */
612         public static final int BUHID_ID = 100;
613         /**
614          * @stable ICU 2.4
615          */
616         public static final int TAGBANWA_ID = 101;
617         /**
618          * @stable ICU 2.4
619          */
620         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102;
621         /**
622          * @stable ICU 2.4
623          */
624         public static final int SUPPLEMENTAL_ARROWS_A_ID = 103;
625         /**
626          * @stable ICU 2.4
627          */
628         public static final int SUPPLEMENTAL_ARROWS_B_ID = 104;
629         /**
630          * @stable ICU 2.4
631          */
632         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105;
633         /**
634          * @stable ICU 2.4
635          */
636         public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106;
637         /**
638          * @stable ICU 2.4
639          */
640         public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107;
641         /**
642          * @stable ICU 2.4
643          */
644         public static final int VARIATION_SELECTORS_ID = 108;
645         /**
646          * @stable ICU 2.4
647          */
648         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109;
649         /**
650          * @stable ICU 2.4
651          */
652         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110;
653 
654         /**
655          * @stable ICU 2.6
656          */
657         public static final int LIMBU_ID = 111; /*[1900]*/
658         /**
659          * @stable ICU 2.6
660          */
661         public static final int TAI_LE_ID = 112; /*[1950]*/
662         /**
663          * @stable ICU 2.6
664          */
665         public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/
666         /**
667          * @stable ICU 2.6
668          */
669         public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/
670         /**
671          * @stable ICU 2.6
672          */
673         public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/
674         /**
675          * @stable ICU 2.6
676          */
677         public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/
678         /**
679          * @stable ICU 2.6
680          */
681         public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/
682         /**
683          * @stable ICU 2.6
684          */
685         public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/
686         /**
687          * @stable ICU 2.6
688          */
689         public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/
690         /**
691          * @stable ICU 2.6
692          */
693         public static final int UGARITIC_ID = 120; /*[10380]*/
694         /**
695          * @stable ICU 2.6
696          */
697         public static final int SHAVIAN_ID = 121; /*[10450]*/
698         /**
699          * @stable ICU 2.6
700          */
701         public static final int OSMANYA_ID = 122; /*[10480]*/
702         /**
703          * @stable ICU 2.6
704          */
705         public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/
706         /**
707          * @stable ICU 2.6
708          */
709         public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/
710         /**
711          * @stable ICU 2.6
712          */
713         public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/
714 
715         /* New blocks in Unicode 4.1 */
716 
717         /**
718          * @stable ICU 3.4
719          */
720         public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/
721 
722         /**
723          * @stable ICU 3.4
724          */
725         public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/
726 
727         /**
728          * @stable ICU 3.4
729          */
730         public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/
731 
732         /**
733          * @stable ICU 3.4
734          */
735         public static final int BUGINESE_ID = 129; /*[1A00]*/
736 
737         /**
738          * @stable ICU 3.4
739          */
740         public static final int CJK_STROKES_ID = 130; /*[31C0]*/
741 
742         /**
743          * @stable ICU 3.4
744          */
745         public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/
746 
747         /**
748          * @stable ICU 3.4
749          */
750         public static final int COPTIC_ID = 132; /*[2C80]*/
751 
752         /**
753          * @stable ICU 3.4
754          */
755         public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/
756 
757         /**
758          * @stable ICU 3.4
759          */
760         public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/
761 
762         /**
763          * @stable ICU 3.4
764          */
765         public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/
766 
767         /**
768          * @stable ICU 3.4
769          */
770         public static final int GLAGOLITIC_ID = 136; /*[2C00]*/
771 
772         /**
773          * @stable ICU 3.4
774          */
775         public static final int KHAROSHTHI_ID = 137; /*[10A00]*/
776 
777         /**
778          * @stable ICU 3.4
779          */
780         public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/
781 
782         /**
783          * @stable ICU 3.4
784          */
785         public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/
786 
787         /**
788          * @stable ICU 3.4
789          */
790         public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/
791 
792         /**
793          * @stable ICU 3.4
794          */
795         public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/
796 
797         /**
798          * @stable ICU 3.4
799          */
800         public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/
801 
802         /**
803          * @stable ICU 3.4
804          */
805         public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/
806 
807         /**
808          * @stable ICU 3.4
809          */
810         public static final int TIFINAGH_ID = 144; /*[2D30]*/
811 
812         /**
813          * @stable ICU 3.4
814          */
815         public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/
816 
817         /* New blocks in Unicode 5.0 */
818 
819         /**
820          * @stable ICU 3.6
821          */
822         public static final int NKO_ID = 146; /*[07C0]*/
823         /**
824          * @stable ICU 3.6
825          */
826         public static final int BALINESE_ID = 147; /*[1B00]*/
827         /**
828          * @stable ICU 3.6
829          */
830         public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/
831         /**
832          * @stable ICU 3.6
833          */
834         public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/
835         /**
836          * @stable ICU 3.6
837          */
838         public static final int PHAGS_PA_ID = 150; /*[A840]*/
839         /**
840          * @stable ICU 3.6
841          */
842         public static final int PHOENICIAN_ID = 151; /*[10900]*/
843         /**
844          * @stable ICU 3.6
845          */
846         public static final int CUNEIFORM_ID = 152; /*[12000]*/
847         /**
848          * @stable ICU 3.6
849          */
850         public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/
851         /**
852          * @stable ICU 3.6
853          */
854         public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/
855 
856         /**
857          * @stable ICU 4.0
858          */
859         public static final int SUNDANESE_ID = 155; /* [1B80] */
860 
861         /**
862          * @stable ICU 4.0
863          */
864         public static final int LEPCHA_ID = 156; /* [1C00] */
865 
866         /**
867          * @stable ICU 4.0
868          */
869         public static final int OL_CHIKI_ID = 157; /* [1C50] */
870 
871         /**
872          * @stable ICU 4.0
873          */
874         public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */
875 
876         /**
877          * @stable ICU 4.0
878          */
879         public static final int VAI_ID = 159; /* [A500] */
880 
881         /**
882          * @stable ICU 4.0
883          */
884         public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */
885 
886         /**
887          * @stable ICU 4.0
888          */
889         public static final int SAURASHTRA_ID = 161; /* [A880] */
890 
891         /**
892          * @stable ICU 4.0
893          */
894         public static final int KAYAH_LI_ID = 162; /* [A900] */
895 
896         /**
897          * @stable ICU 4.0
898          */
899         public static final int REJANG_ID = 163; /* [A930] */
900 
901         /**
902          * @stable ICU 4.0
903          */
904         public static final int CHAM_ID = 164; /* [AA00] */
905 
906         /**
907          * @stable ICU 4.0
908          */
909         public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */
910 
911         /**
912          * @stable ICU 4.0
913          */
914         public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */
915 
916         /**
917          * @stable ICU 4.0
918          */
919         public static final int LYCIAN_ID = 167; /* [10280] */
920 
921         /**
922          * @stable ICU 4.0
923          */
924         public static final int CARIAN_ID = 168; /* [102A0] */
925 
926         /**
927          * @stable ICU 4.0
928          */
929         public static final int LYDIAN_ID = 169; /* [10920] */
930 
931         /**
932          * @stable ICU 4.0
933          */
934         public static final int MAHJONG_TILES_ID = 170; /* [1F000] */
935 
936         /**
937          * @stable ICU 4.0
938          */
939         public static final int DOMINO_TILES_ID = 171; /* [1F030] */
940 
941         /* New blocks in Unicode 5.2 */
942 
943         /** @stable ICU 4.4 */
944         public static final int SAMARITAN_ID = 172; /*[0800]*/
945         /** @stable ICU 4.4 */
946         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/
947         /** @stable ICU 4.4 */
948         public static final int TAI_THAM_ID = 174; /*[1A20]*/
949         /** @stable ICU 4.4 */
950         public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/
951         /** @stable ICU 4.4 */
952         public static final int LISU_ID = 176; /*[A4D0]*/
953         /** @stable ICU 4.4 */
954         public static final int BAMUM_ID = 177; /*[A6A0]*/
955         /** @stable ICU 4.4 */
956         public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/
957         /** @stable ICU 4.4 */
958         public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/
959         /** @stable ICU 4.4 */
960         public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/
961         /** @stable ICU 4.4 */
962         public static final int JAVANESE_ID = 181; /*[A980]*/
963         /** @stable ICU 4.4 */
964         public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/
965         /** @stable ICU 4.4 */
966         public static final int TAI_VIET_ID = 183; /*[AA80]*/
967         /** @stable ICU 4.4 */
968         public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/
969         /** @stable ICU 4.4 */
970         public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/
971         /** @stable ICU 4.4 */
972         public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/
973         /** @stable ICU 4.4 */
974         public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/
975         /** @stable ICU 4.4 */
976         public static final int AVESTAN_ID = 188; /*[10B00]*/
977         /** @stable ICU 4.4 */
978         public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/
979         /** @stable ICU 4.4 */
980         public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/
981         /** @stable ICU 4.4 */
982         public static final int OLD_TURKIC_ID = 191; /*[10C00]*/
983         /** @stable ICU 4.4 */
984         public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/
985         /** @stable ICU 4.4 */
986         public static final int KAITHI_ID = 193; /*[11080]*/
987         /** @stable ICU 4.4 */
988         public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/
989         /** @stable ICU 4.4 */
990         public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/
991         /** @stable ICU 4.4 */
992         public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/
993         /** @stable ICU 4.4 */
994         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/
995 
996         /* New blocks in Unicode 6.0 */
997 
998         /** @stable ICU 4.6 */
999         public static final int MANDAIC_ID = 198; /*[0840]*/
1000         /** @stable ICU 4.6 */
1001         public static final int BATAK_ID = 199; /*[1BC0]*/
1002         /** @stable ICU 4.6 */
1003         public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/
1004         /** @stable ICU 4.6 */
1005         public static final int BRAHMI_ID = 201; /*[11000]*/
1006         /** @stable ICU 4.6 */
1007         public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/
1008         /** @stable ICU 4.6 */
1009         public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/
1010         /** @stable ICU 4.6 */
1011         public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/
1012         /** @stable ICU 4.6 */
1013         public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/
1014         /** @stable ICU 4.6 */
1015         public static final int EMOTICONS_ID = 206; /*[1F600]*/
1016         /** @stable ICU 4.6 */
1017         public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/
1018         /** @stable ICU 4.6 */
1019         public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/
1020         /** @stable ICU 4.6 */
1021         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/
1022 
1023         /* New blocks in Unicode 6.1 */
1024 
1025         /** @stable ICU 49 */
1026         public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/
1027         /** @stable ICU 49 */
1028         public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/
1029         /** @stable ICU 49 */
1030         public static final int CHAKMA_ID = 212; /*[11100]*/
1031         /** @stable ICU 49 */
1032         public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/
1033         /** @stable ICU 49 */
1034         public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/
1035         /** @stable ICU 49 */
1036         public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/
1037         /** @stable ICU 49 */
1038         public static final int MIAO_ID = 216; /*[16F00]*/
1039         /** @stable ICU 49 */
1040         public static final int SHARADA_ID = 217; /*[11180]*/
1041         /** @stable ICU 49 */
1042         public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/
1043         /** @stable ICU 49 */
1044         public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/
1045         /** @stable ICU 49 */
1046         public static final int TAKRI_ID = 220; /*[11680]*/
1047 
1048         /* New blocks in Unicode 7.0 */
1049 
1050         /** @stable ICU 54 */
1051         public static final int BASSA_VAH_ID = 221; /*[16AD0]*/
1052         /** @stable ICU 54 */
1053         public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/
1054         /** @stable ICU 54 */
1055         public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/
1056         /** @stable ICU 54 */
1057         public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/
1058         /** @stable ICU 54 */
1059         public static final int DUPLOYAN_ID = 225; /*[1BC00]*/
1060         /** @stable ICU 54 */
1061         public static final int ELBASAN_ID = 226; /*[10500]*/
1062         /** @stable ICU 54 */
1063         public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/
1064         /** @stable ICU 54 */
1065         public static final int GRANTHA_ID = 228; /*[11300]*/
1066         /** @stable ICU 54 */
1067         public static final int KHOJKI_ID = 229; /*[11200]*/
1068         /** @stable ICU 54 */
1069         public static final int KHUDAWADI_ID = 230; /*[112B0]*/
1070         /** @stable ICU 54 */
1071         public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/
1072         /** @stable ICU 54 */
1073         public static final int LINEAR_A_ID = 232; /*[10600]*/
1074         /** @stable ICU 54 */
1075         public static final int MAHAJANI_ID = 233; /*[11150]*/
1076         /** @stable ICU 54 */
1077         public static final int MANICHAEAN_ID = 234; /*[10AC0]*/
1078         /** @stable ICU 54 */
1079         public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/
1080         /** @stable ICU 54 */
1081         public static final int MODI_ID = 236; /*[11600]*/
1082         /** @stable ICU 54 */
1083         public static final int MRO_ID = 237; /*[16A40]*/
1084         /** @stable ICU 54 */
1085         public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/
1086         /** @stable ICU 54 */
1087         public static final int NABATAEAN_ID = 239; /*[10880]*/
1088         /** @stable ICU 54 */
1089         public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/
1090         /** @stable ICU 54 */
1091         public static final int OLD_PERMIC_ID = 241; /*[10350]*/
1092         /** @stable ICU 54 */
1093         public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/
1094         /** @stable ICU 54 */
1095         public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/
1096         /** @stable ICU 54 */
1097         public static final int PALMYRENE_ID = 244; /*[10860]*/
1098         /** @stable ICU 54 */
1099         public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/
1100         /** @stable ICU 54 */
1101         public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/
1102         /** @stable ICU 54 */
1103         public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/
1104         /** @stable ICU 54 */
1105         public static final int SIDDHAM_ID = 248; /*[11580]*/
1106         /** @stable ICU 54 */
1107         public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/
1108         /** @stable ICU 54 */
1109         public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/
1110         /** @stable ICU 54 */
1111         public static final int TIRHUTA_ID = 251; /*[11480]*/
1112         /** @stable ICU 54 */
1113         public static final int WARANG_CITI_ID = 252; /*[118A0]*/
1114 
1115         /* New blocks in Unicode 8.0 */
1116 
1117         /** @stable ICU 56 */
1118         public static final int AHOM_ID = 253; /*[11700]*/
1119         /** @stable ICU 56 */
1120         public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/
1121         /** @stable ICU 56 */
1122         public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/
1123         /** @stable ICU 56 */
1124         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/
1125         /** @stable ICU 56 */
1126         public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/
1127         /** @stable ICU 56 */
1128         public static final int HATRAN_ID = 258; /*[108E0]*/
1129         /** @stable ICU 56 */
1130         public static final int MULTANI_ID = 259; /*[11280]*/
1131         /** @stable ICU 56 */
1132         public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/
1133         /** @stable ICU 56 */
1134         public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/
1135         /** @stable ICU 56 */
1136         public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/
1137 
1138         /* New blocks in Unicode 9.0 */
1139 
1140         /** @stable ICU 58 */
1141         public static final int ADLAM_ID = 263; /*[1E900]*/
1142         /** @stable ICU 58 */
1143         public static final int BHAIKSUKI_ID = 264; /*[11C00]*/
1144         /** @stable ICU 58 */
1145         public static final int CYRILLIC_EXTENDED_C_ID = 265; /*[1C80]*/
1146         /** @stable ICU 58 */
1147         public static final int GLAGOLITIC_SUPPLEMENT_ID = 266; /*[1E000]*/
1148         /** @stable ICU 58 */
1149         public static final int IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID = 267; /*[16FE0]*/
1150         /** @stable ICU 58 */
1151         public static final int MARCHEN_ID = 268; /*[11C70]*/
1152         /** @stable ICU 58 */
1153         public static final int MONGOLIAN_SUPPLEMENT_ID = 269; /*[11660]*/
1154         /** @stable ICU 58 */
1155         public static final int NEWA_ID = 270; /*[11400]*/
1156         /** @stable ICU 58 */
1157         public static final int OSAGE_ID = 271; /*[104B0]*/
1158         /** @stable ICU 58 */
1159         public static final int TANGUT_ID = 272; /*[17000]*/
1160         /** @stable ICU 58 */
1161         public static final int TANGUT_COMPONENTS_ID = 273; /*[18800]*/
1162 
1163         // New blocks in Unicode 10.0
1164 
1165         /** @stable ICU 60 */
1166         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID = 274; /*[2CEB0]*/
1167         /** @stable ICU 60 */
1168         public static final int KANA_EXTENDED_A_ID = 275; /*[1B100]*/
1169         /** @stable ICU 60 */
1170         public static final int MASARAM_GONDI_ID = 276; /*[11D00]*/
1171         /** @stable ICU 60 */
1172         public static final int NUSHU_ID = 277; /*[1B170]*/
1173         /** @stable ICU 60 */
1174         public static final int SOYOMBO_ID = 278; /*[11A50]*/
1175         /** @stable ICU 60 */
1176         public static final int SYRIAC_SUPPLEMENT_ID = 279; /*[0860]*/
1177         /** @stable ICU 60 */
1178         public static final int ZANABAZAR_SQUARE_ID = 280; /*[11A00]*/
1179 
1180         // New blocks in Unicode 11.0
1181 
1182         /** @stable ICU 62 */
1183         public static final int CHESS_SYMBOLS_ID = 281; /*[1FA00]*/
1184         /** @stable ICU 62 */
1185         public static final int DOGRA_ID = 282; /*[11800]*/
1186         /** @stable ICU 62 */
1187         public static final int GEORGIAN_EXTENDED_ID = 283; /*[1C90]*/
1188         /** @stable ICU 62 */
1189         public static final int GUNJALA_GONDI_ID = 284; /*[11D60]*/
1190         /** @stable ICU 62 */
1191         public static final int HANIFI_ROHINGYA_ID = 285; /*[10D00]*/
1192         /** @stable ICU 62 */
1193         public static final int INDIC_SIYAQ_NUMBERS_ID = 286; /*[1EC70]*/
1194         /** @stable ICU 62 */
1195         public static final int MAKASAR_ID = 287; /*[11EE0]*/
1196         /** @stable ICU 62 */
1197         public static final int MAYAN_NUMERALS_ID = 288; /*[1D2E0]*/
1198         /** @stable ICU 62 */
1199         public static final int MEDEFAIDRIN_ID = 289; /*[16E40]*/
1200         /** @stable ICU 62 */
1201         public static final int OLD_SOGDIAN_ID = 290; /*[10F00]*/
1202         /** @stable ICU 62 */
1203         public static final int SOGDIAN_ID = 291; /*[10F30]*/
1204 
1205         // New blocks in Unicode 12.0
1206 
1207         /** @stable ICU 64 */
1208         public static final int EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID = 292; /*[13430]*/
1209         /** @stable ICU 64 */
1210         public static final int ELYMAIC_ID = 293; /*[10FE0]*/
1211         /** @stable ICU 64 */
1212         public static final int NANDINAGARI_ID = 294; /*[119A0]*/
1213         /** @stable ICU 64 */
1214         public static final int NYIAKENG_PUACHUE_HMONG_ID = 295; /*[1E100]*/
1215         /** @stable ICU 64 */
1216         public static final int OTTOMAN_SIYAQ_NUMBERS_ID = 296; /*[1ED00]*/
1217         /** @stable ICU 64 */
1218         public static final int SMALL_KANA_EXTENSION_ID = 297; /*[1B130]*/
1219         /** @stable ICU 64 */
1220         public static final int SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID = 298; /*[1FA70]*/
1221         /** @stable ICU 64 */
1222         public static final int TAMIL_SUPPLEMENT_ID = 299; /*[11FC0]*/
1223         /** @stable ICU 64 */
1224         public static final int WANCHO_ID = 300; /*[1E2C0]*/
1225 
1226         // New blocks in Unicode 13.0
1227 
1228         /** @stable ICU 66 */
1229         public static final int CHORASMIAN_ID = 301; /*[10FB0]*/
1230         /** @stable ICU 66 */
1231         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID = 302; /*[30000]*/
1232         /** @stable ICU 66 */
1233         public static final int DIVES_AKURU_ID = 303; /*[11900]*/
1234         /** @stable ICU 66 */
1235         public static final int KHITAN_SMALL_SCRIPT_ID = 304; /*[18B00]*/
1236         /** @stable ICU 66 */
1237         public static final int LISU_SUPPLEMENT_ID = 305; /*[11FB0]*/
1238         /** @stable ICU 66 */
1239         public static final int SYMBOLS_FOR_LEGACY_COMPUTING_ID = 306; /*[1FB00]*/
1240         /** @stable ICU 66 */
1241         public static final int TANGUT_SUPPLEMENT_ID = 307; /*[18D00]*/
1242         /** @stable ICU 66 */
1243         public static final int YEZIDI_ID = 308; /*[10E80]*/
1244 
1245         /**
1246          * One more than the highest normal UnicodeBlock value.
1247          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BLOCK).
1248          *
1249          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
1250          */
1251         @Deprecated
1252         public static final int COUNT = 309;
1253 
1254         // blocks objects ---------------------------------------------------
1255 
1256         /**
1257          * Array of UnicodeBlocks, for easy access in getInstance(int)
1258          */
1259         private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT];
1260 
1261         /**
1262          * @stable ICU 2.6
1263          */
1264         public static final UnicodeBlock NO_BLOCK
1265         = new UnicodeBlock("NO_BLOCK", 0);
1266 
1267         /**
1268          * @stable ICU 2.4
1269          */
1270         public static final UnicodeBlock BASIC_LATIN
1271         = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID);
1272         /**
1273          * @stable ICU 2.4
1274          */
1275         public static final UnicodeBlock LATIN_1_SUPPLEMENT
1276         = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID);
1277         /**
1278          * @stable ICU 2.4
1279          */
1280         public static final UnicodeBlock LATIN_EXTENDED_A
1281         = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID);
1282         /**
1283          * @stable ICU 2.4
1284          */
1285         public static final UnicodeBlock LATIN_EXTENDED_B
1286         = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID);
1287         /**
1288          * @stable ICU 2.4
1289          */
1290         public static final UnicodeBlock IPA_EXTENSIONS
1291         = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID);
1292         /**
1293          * @stable ICU 2.4
1294          */
1295         public static final UnicodeBlock SPACING_MODIFIER_LETTERS
1296         = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID);
1297         /**
1298          * @stable ICU 2.4
1299          */
1300         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
1301         = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID);
1302         /**
1303          * Unicode 3.2 renames this block to "Greek and Coptic".
1304          * @stable ICU 2.4
1305          */
1306         public static final UnicodeBlock GREEK
1307         = new UnicodeBlock("GREEK", GREEK_ID);
1308         /**
1309          * @stable ICU 2.4
1310          */
1311         public static final UnicodeBlock CYRILLIC
1312         = new UnicodeBlock("CYRILLIC", CYRILLIC_ID);
1313         /**
1314          * @stable ICU 2.4
1315          */
1316         public static final UnicodeBlock ARMENIAN
1317         = new UnicodeBlock("ARMENIAN", ARMENIAN_ID);
1318         /**
1319          * @stable ICU 2.4
1320          */
1321         public static final UnicodeBlock HEBREW
1322         = new UnicodeBlock("HEBREW", HEBREW_ID);
1323         /**
1324          * @stable ICU 2.4
1325          */
1326         public static final UnicodeBlock ARABIC
1327         = new UnicodeBlock("ARABIC", ARABIC_ID);
1328         /**
1329          * @stable ICU 2.4
1330          */
1331         public static final UnicodeBlock SYRIAC
1332         = new UnicodeBlock("SYRIAC", SYRIAC_ID);
1333         /**
1334          * @stable ICU 2.4
1335          */
1336         public static final UnicodeBlock THAANA
1337         = new UnicodeBlock("THAANA", THAANA_ID);
1338         /**
1339          * @stable ICU 2.4
1340          */
1341         public static final UnicodeBlock DEVANAGARI
1342         = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID);
1343         /**
1344          * @stable ICU 2.4
1345          */
1346         public static final UnicodeBlock BENGALI
1347         = new UnicodeBlock("BENGALI", BENGALI_ID);
1348         /**
1349          * @stable ICU 2.4
1350          */
1351         public static final UnicodeBlock GURMUKHI
1352         = new UnicodeBlock("GURMUKHI", GURMUKHI_ID);
1353         /**
1354          * @stable ICU 2.4
1355          */
1356         public static final UnicodeBlock GUJARATI
1357         = new UnicodeBlock("GUJARATI", GUJARATI_ID);
1358         /**
1359          * @stable ICU 2.4
1360          */
1361         public static final UnicodeBlock ORIYA
1362         = new UnicodeBlock("ORIYA", ORIYA_ID);
1363         /**
1364          * @stable ICU 2.4
1365          */
1366         public static final UnicodeBlock TAMIL
1367         = new UnicodeBlock("TAMIL", TAMIL_ID);
1368         /**
1369          * @stable ICU 2.4
1370          */
1371         public static final UnicodeBlock TELUGU
1372         = new UnicodeBlock("TELUGU", TELUGU_ID);
1373         /**
1374          * @stable ICU 2.4
1375          */
1376         public static final UnicodeBlock KANNADA
1377         = new UnicodeBlock("KANNADA", KANNADA_ID);
1378         /**
1379          * @stable ICU 2.4
1380          */
1381         public static final UnicodeBlock MALAYALAM
1382         = new UnicodeBlock("MALAYALAM", MALAYALAM_ID);
1383         /**
1384          * @stable ICU 2.4
1385          */
1386         public static final UnicodeBlock SINHALA
1387         = new UnicodeBlock("SINHALA", SINHALA_ID);
1388         /**
1389          * @stable ICU 2.4
1390          */
1391         public static final UnicodeBlock THAI
1392         = new UnicodeBlock("THAI", THAI_ID);
1393         /**
1394          * @stable ICU 2.4
1395          */
1396         public static final UnicodeBlock LAO
1397         = new UnicodeBlock("LAO", LAO_ID);
1398         /**
1399          * @stable ICU 2.4
1400          */
1401         public static final UnicodeBlock TIBETAN
1402         = new UnicodeBlock("TIBETAN", TIBETAN_ID);
1403         /**
1404          * @stable ICU 2.4
1405          */
1406         public static final UnicodeBlock MYANMAR
1407         = new UnicodeBlock("MYANMAR", MYANMAR_ID);
1408         /**
1409          * @stable ICU 2.4
1410          */
1411         public static final UnicodeBlock GEORGIAN
1412         = new UnicodeBlock("GEORGIAN", GEORGIAN_ID);
1413         /**
1414          * @stable ICU 2.4
1415          */
1416         public static final UnicodeBlock HANGUL_JAMO
1417         = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID);
1418         /**
1419          * @stable ICU 2.4
1420          */
1421         public static final UnicodeBlock ETHIOPIC
1422         = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID);
1423         /**
1424          * @stable ICU 2.4
1425          */
1426         public static final UnicodeBlock CHEROKEE
1427         = new UnicodeBlock("CHEROKEE", CHEROKEE_ID);
1428         /**
1429          * @stable ICU 2.4
1430          */
1431         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
1432         = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1433                 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID);
1434         /**
1435          * @stable ICU 2.4
1436          */
1437         public static final UnicodeBlock OGHAM
1438         = new UnicodeBlock("OGHAM", OGHAM_ID);
1439         /**
1440          * @stable ICU 2.4
1441          */
1442         public static final UnicodeBlock RUNIC
1443         = new UnicodeBlock("RUNIC", RUNIC_ID);
1444         /**
1445          * @stable ICU 2.4
1446          */
1447         public static final UnicodeBlock KHMER
1448         = new UnicodeBlock("KHMER", KHMER_ID);
1449         /**
1450          * @stable ICU 2.4
1451          */
1452         public static final UnicodeBlock MONGOLIAN
1453         = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID);
1454         /**
1455          * @stable ICU 2.4
1456          */
1457         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
1458         = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID);
1459         /**
1460          * @stable ICU 2.4
1461          */
1462         public static final UnicodeBlock GREEK_EXTENDED
1463         = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID);
1464         /**
1465          * @stable ICU 2.4
1466          */
1467         public static final UnicodeBlock GENERAL_PUNCTUATION
1468         = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID);
1469         /**
1470          * @stable ICU 2.4
1471          */
1472         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
1473         = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID);
1474         /**
1475          * @stable ICU 2.4
1476          */
1477         public static final UnicodeBlock CURRENCY_SYMBOLS
1478         = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID);
1479         /**
1480          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
1481          * Symbols".
1482          * @stable ICU 2.4
1483          */
1484         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
1485         = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID);
1486         /**
1487          * @stable ICU 2.4
1488          */
1489         public static final UnicodeBlock LETTERLIKE_SYMBOLS
1490         = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID);
1491         /**
1492          * @stable ICU 2.4
1493          */
1494         public static final UnicodeBlock NUMBER_FORMS
1495         = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID);
1496         /**
1497          * @stable ICU 2.4
1498          */
1499         public static final UnicodeBlock ARROWS
1500         = new UnicodeBlock("ARROWS", ARROWS_ID);
1501         /**
1502          * @stable ICU 2.4
1503          */
1504         public static final UnicodeBlock MATHEMATICAL_OPERATORS
1505         = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID);
1506         /**
1507          * @stable ICU 2.4
1508          */
1509         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
1510         = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID);
1511         /**
1512          * @stable ICU 2.4
1513          */
1514         public static final UnicodeBlock CONTROL_PICTURES
1515         = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID);
1516         /**
1517          * @stable ICU 2.4
1518          */
1519         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
1520         = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID);
1521         /**
1522          * @stable ICU 2.4
1523          */
1524         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
1525         = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID);
1526         /**
1527          * @stable ICU 2.4
1528          */
1529         public static final UnicodeBlock BOX_DRAWING
1530         = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID);
1531         /**
1532          * @stable ICU 2.4
1533          */
1534         public static final UnicodeBlock BLOCK_ELEMENTS
1535         = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID);
1536         /**
1537          * @stable ICU 2.4
1538          */
1539         public static final UnicodeBlock GEOMETRIC_SHAPES
1540         = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID);
1541         /**
1542          * @stable ICU 2.4
1543          */
1544         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
1545         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID);
1546         /**
1547          * @stable ICU 2.4
1548          */
1549         public static final UnicodeBlock DINGBATS
1550         = new UnicodeBlock("DINGBATS", DINGBATS_ID);
1551         /**
1552          * @stable ICU 2.4
1553          */
1554         public static final UnicodeBlock BRAILLE_PATTERNS
1555         = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID);
1556         /**
1557          * @stable ICU 2.4
1558          */
1559         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
1560         = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID);
1561         /**
1562          * @stable ICU 2.4
1563          */
1564         public static final UnicodeBlock KANGXI_RADICALS
1565         = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID);
1566         /**
1567          * @stable ICU 2.4
1568          */
1569         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1570         = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1571                 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID);
1572         /**
1573          * @stable ICU 2.4
1574          */
1575         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
1576         = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID);
1577         /**
1578          * @stable ICU 2.4
1579          */
1580         public static final UnicodeBlock HIRAGANA
1581         = new UnicodeBlock("HIRAGANA", HIRAGANA_ID);
1582         /**
1583          * @stable ICU 2.4
1584          */
1585         public static final UnicodeBlock KATAKANA
1586         = new UnicodeBlock("KATAKANA", KATAKANA_ID);
1587         /**
1588          * @stable ICU 2.4
1589          */
1590         public static final UnicodeBlock BOPOMOFO
1591         = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID);
1592         /**
1593          * @stable ICU 2.4
1594          */
1595         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
1596         = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID);
1597         /**
1598          * @stable ICU 2.4
1599          */
1600         public static final UnicodeBlock KANBUN
1601         = new UnicodeBlock("KANBUN", KANBUN_ID);
1602         /**
1603          * @stable ICU 2.4
1604          */
1605         public static final UnicodeBlock BOPOMOFO_EXTENDED
1606         = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID);
1607         /**
1608          * @stable ICU 2.4
1609          */
1610         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
1611         = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1612                 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID);
1613         /**
1614          * @stable ICU 2.4
1615          */
1616         public static final UnicodeBlock CJK_COMPATIBILITY
1617         = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID);
1618         /**
1619          * @stable ICU 2.4
1620          */
1621         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1622         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1623                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID);
1624         /**
1625          * @stable ICU 2.4
1626          */
1627         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
1628         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID);
1629         /**
1630          * @stable ICU 2.4
1631          */
1632         public static final UnicodeBlock YI_SYLLABLES
1633         = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID);
1634         /**
1635          * @stable ICU 2.4
1636          */
1637         public static final UnicodeBlock YI_RADICALS
1638         = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID);
1639         /**
1640          * @stable ICU 2.4
1641          */
1642         public static final UnicodeBlock HANGUL_SYLLABLES
1643         = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID);
1644         /**
1645          * @stable ICU 2.4
1646          */
1647         public static final UnicodeBlock HIGH_SURROGATES
1648         = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID);
1649         /**
1650          * @stable ICU 2.4
1651          */
1652         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
1653         = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID);
1654         /**
1655          * @stable ICU 2.4
1656          */
1657         public static final UnicodeBlock LOW_SURROGATES
1658         = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID);
1659         /**
1660          * Same as public static final int PRIVATE_USE.
1661          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1662          * and multiple code point ranges had this block.
1663          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1664          * and adds separate blocks for the supplementary PUAs.
1665          * @stable ICU 2.4
1666          */
1667         public static final UnicodeBlock PRIVATE_USE_AREA
1668         = new UnicodeBlock("PRIVATE_USE_AREA",  78);
1669         /**
1670          * Same as public static final int PRIVATE_USE_AREA.
1671          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1672          * and multiple code point ranges had this block.
1673          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1674          * and adds separate blocks for the supplementary PUAs.
1675          * @stable ICU 2.4
1676          */
1677         public static final UnicodeBlock PRIVATE_USE
1678         = PRIVATE_USE_AREA;
1679         /**
1680          * @stable ICU 2.4
1681          */
1682         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
1683         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID);
1684         /**
1685          * @stable ICU 2.4
1686          */
1687         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
1688         = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID);
1689         /**
1690          * @stable ICU 2.4
1691          */
1692         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
1693         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID);
1694         /**
1695          * @stable ICU 2.4
1696          */
1697         public static final UnicodeBlock COMBINING_HALF_MARKS
1698         = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID);
1699         /**
1700          * @stable ICU 2.4
1701          */
1702         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
1703         = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID);
1704         /**
1705          * @stable ICU 2.4
1706          */
1707         public static final UnicodeBlock SMALL_FORM_VARIANTS
1708         = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID);
1709         /**
1710          * @stable ICU 2.4
1711          */
1712         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
1713         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID);
1714         /**
1715          * @stable ICU 2.4
1716          */
1717         public static final UnicodeBlock SPECIALS
1718         = new UnicodeBlock("SPECIALS", SPECIALS_ID);
1719         /**
1720          * @stable ICU 2.4
1721          */
1722         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
1723         = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID);
1724         /**
1725          * @stable ICU 2.4
1726          */
1727         public static final UnicodeBlock OLD_ITALIC
1728         = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID);
1729         /**
1730          * @stable ICU 2.4
1731          */
1732         public static final UnicodeBlock GOTHIC
1733         = new UnicodeBlock("GOTHIC", GOTHIC_ID);
1734         /**
1735          * @stable ICU 2.4
1736          */
1737         public static final UnicodeBlock DESERET
1738         = new UnicodeBlock("DESERET", DESERET_ID);
1739         /**
1740          * @stable ICU 2.4
1741          */
1742         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
1743         = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID);
1744         /**
1745          * @stable ICU 2.4
1746          */
1747         public static final UnicodeBlock MUSICAL_SYMBOLS
1748         = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID);
1749         /**
1750          * @stable ICU 2.4
1751          */
1752         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
1753         = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1754                 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID);
1755         /**
1756          * @stable ICU 2.4
1757          */
1758         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1759         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1760                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID);
1761         /**
1762          * @stable ICU 2.4
1763          */
1764         public static final UnicodeBlock
1765         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
1766         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1767                 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID);
1768         /**
1769          * @stable ICU 2.4
1770          */
1771         public static final UnicodeBlock TAGS
1772         = new UnicodeBlock("TAGS", TAGS_ID);
1773 
1774         // New blocks in Unicode 3.2
1775 
1776         /**
1777          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1778          * @stable ICU 2.4
1779          */
1780         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
1781         = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID);
1782         /**
1783          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1784          * @stable ICU 3.0
1785          */
1786         public static final UnicodeBlock CYRILLIC_SUPPLEMENT
1787         = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID);
1788         /**
1789          * @stable ICU 2.4
1790          */
1791         public static final UnicodeBlock TAGALOG
1792         = new UnicodeBlock("TAGALOG", TAGALOG_ID);
1793         /**
1794          * @stable ICU 2.4
1795          */
1796         public static final UnicodeBlock HANUNOO
1797         = new UnicodeBlock("HANUNOO", HANUNOO_ID);
1798         /**
1799          * @stable ICU 2.4
1800          */
1801         public static final UnicodeBlock BUHID
1802         = new UnicodeBlock("BUHID", BUHID_ID);
1803         /**
1804          * @stable ICU 2.4
1805          */
1806         public static final UnicodeBlock TAGBANWA
1807         = new UnicodeBlock("TAGBANWA", TAGBANWA_ID);
1808         /**
1809          * @stable ICU 2.4
1810          */
1811         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
1812         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1813                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID);
1814         /**
1815          * @stable ICU 2.4
1816          */
1817         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
1818         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID);
1819         /**
1820          * @stable ICU 2.4
1821          */
1822         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
1823         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID);
1824         /**
1825          * @stable ICU 2.4
1826          */
1827         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
1828         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1829                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID);
1830         /**
1831          * @stable ICU 2.4
1832          */
1833         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
1834         = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1835                 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID);
1836         /**
1837          * @stable ICU 2.4
1838          */
1839         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
1840         = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID);
1841         /**
1842          * @stable ICU 2.4
1843          */
1844         public static final UnicodeBlock VARIATION_SELECTORS
1845         = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID);
1846         /**
1847          * @stable ICU 2.4
1848          */
1849         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
1850         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1851                 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID);
1852         /**
1853          * @stable ICU 2.4
1854          */
1855         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
1856         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1857                 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID);
1858 
1859         /**
1860          * @stable ICU 2.6
1861          */
1862         public static final UnicodeBlock LIMBU
1863         = new UnicodeBlock("LIMBU", LIMBU_ID);
1864         /**
1865          * @stable ICU 2.6
1866          */
1867         public static final UnicodeBlock TAI_LE
1868         = new UnicodeBlock("TAI_LE", TAI_LE_ID);
1869         /**
1870          * @stable ICU 2.6
1871          */
1872         public static final UnicodeBlock KHMER_SYMBOLS
1873         = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID);
1874 
1875         /**
1876          * @stable ICU 2.6
1877          */
1878         public static final UnicodeBlock PHONETIC_EXTENSIONS
1879         = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID);
1880 
1881         /**
1882          * @stable ICU 2.6
1883          */
1884         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
1885         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1886                 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID);
1887         /**
1888          * @stable ICU 2.6
1889          */
1890         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
1891         = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID);
1892         /**
1893          * @stable ICU 2.6
1894          */
1895         public static final UnicodeBlock LINEAR_B_SYLLABARY
1896         = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID);
1897         /**
1898          * @stable ICU 2.6
1899          */
1900         public static final UnicodeBlock LINEAR_B_IDEOGRAMS
1901         = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID);
1902         /**
1903          * @stable ICU 2.6
1904          */
1905         public static final UnicodeBlock AEGEAN_NUMBERS
1906         = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID);
1907         /**
1908          * @stable ICU 2.6
1909          */
1910         public static final UnicodeBlock UGARITIC
1911         = new UnicodeBlock("UGARITIC", UGARITIC_ID);
1912         /**
1913          * @stable ICU 2.6
1914          */
1915         public static final UnicodeBlock SHAVIAN
1916         = new UnicodeBlock("SHAVIAN", SHAVIAN_ID);
1917         /**
1918          * @stable ICU 2.6
1919          */
1920         public static final UnicodeBlock OSMANYA
1921         = new UnicodeBlock("OSMANYA", OSMANYA_ID);
1922         /**
1923          * @stable ICU 2.6
1924          */
1925         public static final UnicodeBlock CYPRIOT_SYLLABARY
1926         = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID);
1927         /**
1928          * @stable ICU 2.6
1929          */
1930         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
1931         = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID);
1932 
1933         /**
1934          * @stable ICU 2.6
1935          */
1936         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
1937         = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID);
1938 
1939         /* New blocks in Unicode 4.1 */
1940 
1941         /**
1942          * @stable ICU 3.4
1943          */
1944         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
1945                 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
1946                         ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/
1947 
1948         /**
1949          * @stable ICU 3.4
1950          */
1951         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
1952                 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/
1953 
1954         /**
1955          * @stable ICU 3.4
1956          */
1957         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1958                 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/
1959 
1960         /**
1961          * @stable ICU 3.4
1962          */
1963         public static final UnicodeBlock BUGINESE =
1964                 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/
1965 
1966         /**
1967          * @stable ICU 3.4
1968          */
1969         public static final UnicodeBlock CJK_STROKES =
1970                 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/
1971 
1972         /**
1973          * @stable ICU 3.4
1974          */
1975         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1976                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1977                         COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/
1978 
1979         /**
1980          * @stable ICU 3.4
1981          */
1982         public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/
1983 
1984         /**
1985          * @stable ICU 3.4
1986          */
1987         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1988                 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/
1989 
1990         /**
1991          * @stable ICU 3.4
1992          */
1993         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1994                 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/
1995 
1996         /**
1997          * @stable ICU 3.4
1998          */
1999         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
2000                 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/
2001 
2002         /**
2003          * @stable ICU 3.4
2004          */
2005         public static final UnicodeBlock GLAGOLITIC =
2006                 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/
2007 
2008         /**
2009          * @stable ICU 3.4
2010          */
2011         public static final UnicodeBlock KHAROSHTHI =
2012                 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/
2013 
2014         /**
2015          * @stable ICU 3.4
2016          */
2017         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2018                 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/
2019 
2020         /**
2021          * @stable ICU 3.4
2022          */
2023         public static final UnicodeBlock NEW_TAI_LUE =
2024                 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/
2025 
2026         /**
2027          * @stable ICU 3.4
2028          */
2029         public static final UnicodeBlock OLD_PERSIAN =
2030                 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/
2031 
2032         /**
2033          * @stable ICU 3.4
2034          */
2035         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
2036                 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
2037                         PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/
2038 
2039         /**
2040          * @stable ICU 3.4
2041          */
2042         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
2043                 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/
2044 
2045         /**
2046          * @stable ICU 3.4
2047          */
2048         public static final UnicodeBlock SYLOTI_NAGRI =
2049                 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/
2050 
2051         /**
2052          * @stable ICU 3.4
2053          */
2054         public static final UnicodeBlock TIFINAGH =
2055                 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/
2056 
2057         /**
2058          * @stable ICU 3.4
2059          */
2060         public static final UnicodeBlock VERTICAL_FORMS =
2061                 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/
2062 
2063         /**
2064          * @stable ICU 3.6
2065          */
2066         public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/
2067         /**
2068          * @stable ICU 3.6
2069          */
2070         public static final UnicodeBlock BALINESE =
2071                 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/
2072         /**
2073          * @stable ICU 3.6
2074          */
2075         public static final UnicodeBlock LATIN_EXTENDED_C =
2076                 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/
2077         /**
2078          * @stable ICU 3.6
2079          */
2080         public static final UnicodeBlock LATIN_EXTENDED_D =
2081                 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/
2082         /**
2083          * @stable ICU 3.6
2084          */
2085         public static final UnicodeBlock PHAGS_PA =
2086                 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/
2087         /**
2088          * @stable ICU 3.6
2089          */
2090         public static final UnicodeBlock PHOENICIAN =
2091                 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/
2092         /**
2093          * @stable ICU 3.6
2094          */
2095         public static final UnicodeBlock CUNEIFORM =
2096                 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/
2097         /**
2098          * @stable ICU 3.6
2099          */
2100         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2101                 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2102                         CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/
2103         /**
2104          * @stable ICU 3.6
2105          */
2106         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2107                 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/
2108 
2109         /**
2110          * @stable ICU 4.0
2111          */
2112         public static final UnicodeBlock SUNDANESE =
2113                 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */
2114 
2115         /**
2116          * @stable ICU 4.0
2117          */
2118         public static final UnicodeBlock LEPCHA =
2119                 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */
2120 
2121         /**
2122          * @stable ICU 4.0
2123          */
2124         public static final UnicodeBlock OL_CHIKI =
2125                 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */
2126 
2127         /**
2128          * @stable ICU 4.0
2129          */
2130         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
2131                 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */
2132 
2133         /**
2134          * @stable ICU 4.0
2135          */
2136         public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */
2137 
2138         /**
2139          * @stable ICU 4.0
2140          */
2141         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2142                 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */
2143 
2144         /**
2145          * @stable ICU 4.0
2146          */
2147         public static final UnicodeBlock SAURASHTRA =
2148                 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */
2149 
2150         /**
2151          * @stable ICU 4.0
2152          */
2153         public static final UnicodeBlock KAYAH_LI =
2154                 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */
2155 
2156         /**
2157          * @stable ICU 4.0
2158          */
2159         public static final UnicodeBlock REJANG =
2160                 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */
2161 
2162         /**
2163          * @stable ICU 4.0
2164          */
2165         public static final UnicodeBlock CHAM =
2166                 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */
2167 
2168         /**
2169          * @stable ICU 4.0
2170          */
2171         public static final UnicodeBlock ANCIENT_SYMBOLS =
2172                 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */
2173 
2174         /**
2175          * @stable ICU 4.0
2176          */
2177         public static final UnicodeBlock PHAISTOS_DISC =
2178                 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */
2179 
2180         /**
2181          * @stable ICU 4.0
2182          */
2183         public static final UnicodeBlock LYCIAN =
2184                 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */
2185 
2186         /**
2187          * @stable ICU 4.0
2188          */
2189         public static final UnicodeBlock CARIAN =
2190                 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */
2191 
2192         /**
2193          * @stable ICU 4.0
2194          */
2195         public static final UnicodeBlock LYDIAN =
2196                 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */
2197 
2198         /**
2199          * @stable ICU 4.0
2200          */
2201         public static final UnicodeBlock MAHJONG_TILES =
2202                 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */
2203 
2204         /**
2205          * @stable ICU 4.0
2206          */
2207         public static final UnicodeBlock DOMINO_TILES =
2208                 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */
2209 
2210         /* New blocks in Unicode 5.2 */
2211 
2212         /** @stable ICU 4.4 */
2213         public static final UnicodeBlock SAMARITAN =
2214                 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/
2215         /** @stable ICU 4.4 */
2216         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
2217                 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
2218                         UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/
2219         /** @stable ICU 4.4 */
2220         public static final UnicodeBlock TAI_THAM =
2221                 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/
2222         /** @stable ICU 4.4 */
2223         public static final UnicodeBlock VEDIC_EXTENSIONS =
2224                 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/
2225         /** @stable ICU 4.4 */
2226         public static final UnicodeBlock LISU =
2227                 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/
2228         /** @stable ICU 4.4 */
2229         public static final UnicodeBlock BAMUM =
2230                 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/
2231         /** @stable ICU 4.4 */
2232         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2233                 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/
2234         /** @stable ICU 4.4 */
2235         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2236                 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/
2237         /** @stable ICU 4.4 */
2238         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2239                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/
2240         /** @stable ICU 4.4 */
2241         public static final UnicodeBlock JAVANESE =
2242                 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/
2243         /** @stable ICU 4.4 */
2244         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2245                 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/
2246         /** @stable ICU 4.4 */
2247         public static final UnicodeBlock TAI_VIET =
2248                 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/
2249         /** @stable ICU 4.4 */
2250         public static final UnicodeBlock MEETEI_MAYEK =
2251                 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/
2252         /** @stable ICU 4.4 */
2253         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2254                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/
2255         /** @stable ICU 4.4 */
2256         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2257                 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/
2258         /** @stable ICU 4.4 */
2259         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2260                 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/
2261         /** @stable ICU 4.4 */
2262         public static final UnicodeBlock AVESTAN =
2263                 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/
2264         /** @stable ICU 4.4 */
2265         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2266                 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/
2267         /** @stable ICU 4.4 */
2268         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2269                 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/
2270         /** @stable ICU 4.4 */
2271         public static final UnicodeBlock OLD_TURKIC =
2272                 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/
2273         /** @stable ICU 4.4 */
2274         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2275                 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/
2276         /** @stable ICU 4.4 */
2277         public static final UnicodeBlock KAITHI =
2278                 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/
2279         /** @stable ICU 4.4 */
2280         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2281                 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/
2282         /** @stable ICU 4.4 */
2283         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2284                 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2285                         ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/
2286         /** @stable ICU 4.4 */
2287         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2288                 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2289                         ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/
2290         /** @stable ICU 4.4 */
2291         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2292                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2293                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/
2294 
2295         /* New blocks in Unicode 6.0 */
2296 
2297         /** @stable ICU 4.6 */
2298         public static final UnicodeBlock MANDAIC =
2299                 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/
2300         /** @stable ICU 4.6 */
2301         public static final UnicodeBlock BATAK =
2302                 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/
2303         /** @stable ICU 4.6 */
2304         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2305                 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/
2306         /** @stable ICU 4.6 */
2307         public static final UnicodeBlock BRAHMI =
2308                 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/
2309         /** @stable ICU 4.6 */
2310         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2311                 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/
2312         /** @stable ICU 4.6 */
2313         public static final UnicodeBlock KANA_SUPPLEMENT =
2314                 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/
2315         /** @stable ICU 4.6 */
2316         public static final UnicodeBlock PLAYING_CARDS =
2317                 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/
2318         /** @stable ICU 4.6 */
2319         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2320                 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2321                         MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/
2322         /** @stable ICU 4.6 */
2323         public static final UnicodeBlock EMOTICONS =
2324                 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/
2325         /** @stable ICU 4.6 */
2326         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2327                 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/
2328         /** @stable ICU 4.6 */
2329         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2330                 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/
2331         /** @stable ICU 4.6 */
2332         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2333                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2334                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/
2335 
2336         /* New blocks in Unicode 6.1 */
2337 
2338         /** @stable ICU 49 */
2339         public static final UnicodeBlock ARABIC_EXTENDED_A =
2340                 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/
2341         /** @stable ICU 49 */
2342         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2343                 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/
2344         /** @stable ICU 49 */
2345         public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/
2346         /** @stable ICU 49 */
2347         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2348                 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/
2349         /** @stable ICU 49 */
2350         public static final UnicodeBlock MEROITIC_CURSIVE =
2351                 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/
2352         /** @stable ICU 49 */
2353         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2354                 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/
2355         /** @stable ICU 49 */
2356         public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/
2357         /** @stable ICU 49 */
2358         public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/
2359         /** @stable ICU 49 */
2360         public static final UnicodeBlock SORA_SOMPENG =
2361                 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/
2362         /** @stable ICU 49 */
2363         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2364                 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/
2365         /** @stable ICU 49 */
2366         public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/
2367 
2368         /* New blocks in Unicode 7.0 */
2369 
2370         /** @stable ICU 54 */
2371         public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/
2372         /** @stable ICU 54 */
2373         public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2374                 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/
2375         /** @stable ICU 54 */
2376         public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2377                 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/
2378         /** @stable ICU 54 */
2379         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2380                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/
2381         /** @stable ICU 54 */
2382         public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/
2383         /** @stable ICU 54 */
2384         public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/
2385         /** @stable ICU 54 */
2386         public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2387                 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/
2388         /** @stable ICU 54 */
2389         public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/
2390         /** @stable ICU 54 */
2391         public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/
2392         /** @stable ICU 54 */
2393         public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/
2394         /** @stable ICU 54 */
2395         public static final UnicodeBlock LATIN_EXTENDED_E =
2396                 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/
2397         /** @stable ICU 54 */
2398         public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/
2399         /** @stable ICU 54 */
2400         public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/
2401         /** @stable ICU 54 */
2402         public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/
2403         /** @stable ICU 54 */
2404         public static final UnicodeBlock MENDE_KIKAKUI =
2405                 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/
2406         /** @stable ICU 54 */
2407         public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/
2408         /** @stable ICU 54 */
2409         public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/
2410         /** @stable ICU 54 */
2411         public static final UnicodeBlock MYANMAR_EXTENDED_B =
2412                 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/
2413         /** @stable ICU 54 */
2414         public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/
2415         /** @stable ICU 54 */
2416         public static final UnicodeBlock OLD_NORTH_ARABIAN =
2417                 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/
2418         /** @stable ICU 54 */
2419         public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/
2420         /** @stable ICU 54 */
2421         public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2422                 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/
2423         /** @stable ICU 54 */
2424         public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/
2425         /** @stable ICU 54 */
2426         public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/
2427         /** @stable ICU 54 */
2428         public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/
2429         /** @stable ICU 54 */
2430         public static final UnicodeBlock PSALTER_PAHLAVI =
2431                 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/
2432         /** @stable ICU 54 */
2433         public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2434                 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/
2435         /** @stable ICU 54 */
2436         public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/
2437         /** @stable ICU 54 */
2438         public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2439                 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/
2440         /** @stable ICU 54 */
2441         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2442                 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/
2443         /** @stable ICU 54 */
2444         public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/
2445         /** @stable ICU 54 */
2446         public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/
2447 
2448         /* New blocks in Unicode 8.0 */
2449 
2450         /** @stable ICU 56 */
2451         public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/
2452         /** @stable ICU 56 */
2453         public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
2454                 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/
2455         /** @stable ICU 56 */
2456         public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2457                 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/
2458         /** @stable ICU 56 */
2459         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
2460                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
2461                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/
2462         /** @stable ICU 56 */
2463         public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
2464                 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/
2465         /** @stable ICU 56 */
2466         public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/
2467         /** @stable ICU 56 */
2468         public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/
2469         /** @stable ICU 56 */
2470         public static final UnicodeBlock OLD_HUNGARIAN =
2471                 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/
2472         /** @stable ICU 56 */
2473         public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
2474                 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
2475                         SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/
2476         /** @stable ICU 56 */
2477         public static final UnicodeBlock SUTTON_SIGNWRITING =
2478                 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/
2479 
2480         /* New blocks in Unicode 9.0 */
2481 
2482         /** @stable ICU 58 */
2483         public static final UnicodeBlock ADLAM = new UnicodeBlock("ADLAM", ADLAM_ID); /*[1E900]*/
2484         /** @stable ICU 58 */
2485         public static final UnicodeBlock BHAIKSUKI = new UnicodeBlock("BHAIKSUKI", BHAIKSUKI_ID); /*[11C00]*/
2486         /** @stable ICU 58 */
2487         public static final UnicodeBlock CYRILLIC_EXTENDED_C =
2488                 new UnicodeBlock("CYRILLIC_EXTENDED_C", CYRILLIC_EXTENDED_C_ID); /*[1C80]*/
2489         /** @stable ICU 58 */
2490         public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
2491                 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", GLAGOLITIC_SUPPLEMENT_ID); /*[1E000]*/
2492         /** @stable ICU 58 */
2493         public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
2494                 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID); /*[16FE0]*/
2495         /** @stable ICU 58 */
2496         public static final UnicodeBlock MARCHEN = new UnicodeBlock("MARCHEN", MARCHEN_ID); /*[11C70]*/
2497         /** @stable ICU 58 */
2498         public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
2499                 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", MONGOLIAN_SUPPLEMENT_ID); /*[11660]*/
2500         /** @stable ICU 58 */
2501         public static final UnicodeBlock NEWA = new UnicodeBlock("NEWA", NEWA_ID); /*[11400]*/
2502         /** @stable ICU 58 */
2503         public static final UnicodeBlock OSAGE = new UnicodeBlock("OSAGE", OSAGE_ID); /*[104B0]*/
2504         /** @stable ICU 58 */
2505         public static final UnicodeBlock TANGUT = new UnicodeBlock("TANGUT", TANGUT_ID); /*[17000]*/
2506         /** @stable ICU 58 */
2507         public static final UnicodeBlock TANGUT_COMPONENTS =
2508                 new UnicodeBlock("TANGUT_COMPONENTS", TANGUT_COMPONENTS_ID); /*[18800]*/
2509 
2510         // New blocks in Unicode 10.0
2511 
2512         /** @stable ICU 60 */
2513         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F =
2514                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID); /*[2CEB0]*/
2515         /** @stable ICU 60 */
2516         public static final UnicodeBlock KANA_EXTENDED_A =
2517                 new UnicodeBlock("KANA_EXTENDED_A", KANA_EXTENDED_A_ID); /*[1B100]*/
2518         /** @stable ICU 60 */
2519         public static final UnicodeBlock MASARAM_GONDI =
2520                 new UnicodeBlock("MASARAM_GONDI", MASARAM_GONDI_ID); /*[11D00]*/
2521         /** @stable ICU 60 */
2522         public static final UnicodeBlock NUSHU = new UnicodeBlock("NUSHU", NUSHU_ID); /*[1B170]*/
2523         /** @stable ICU 60 */
2524         public static final UnicodeBlock SOYOMBO = new UnicodeBlock("SOYOMBO", SOYOMBO_ID); /*[11A50]*/
2525         /** @stable ICU 60 */
2526         public static final UnicodeBlock SYRIAC_SUPPLEMENT =
2527                 new UnicodeBlock("SYRIAC_SUPPLEMENT", SYRIAC_SUPPLEMENT_ID); /*[0860]*/
2528         /** @stable ICU 60 */
2529         public static final UnicodeBlock ZANABAZAR_SQUARE =
2530                 new UnicodeBlock("ZANABAZAR_SQUARE", ZANABAZAR_SQUARE_ID); /*[11A00]*/
2531 
2532         // New blocks in Unicode 11.0
2533 
2534         /** @stable ICU 62 */
2535         public static final UnicodeBlock CHESS_SYMBOLS =
2536                 new UnicodeBlock("CHESS_SYMBOLS", CHESS_SYMBOLS_ID); /*[1FA00]*/
2537         /** @stable ICU 62 */
2538         public static final UnicodeBlock DOGRA = new UnicodeBlock("DOGRA", DOGRA_ID); /*[11800]*/
2539         /** @stable ICU 62 */
2540         public static final UnicodeBlock GEORGIAN_EXTENDED =
2541                 new UnicodeBlock("GEORGIAN_EXTENDED", GEORGIAN_EXTENDED_ID); /*[1C90]*/
2542         /** @stable ICU 62 */
2543         public static final UnicodeBlock GUNJALA_GONDI =
2544                 new UnicodeBlock("GUNJALA_GONDI", GUNJALA_GONDI_ID); /*[11D60]*/
2545         /** @stable ICU 62 */
2546         public static final UnicodeBlock HANIFI_ROHINGYA =
2547                 new UnicodeBlock("HANIFI_ROHINGYA", HANIFI_ROHINGYA_ID); /*[10D00]*/
2548         /** @stable ICU 62 */
2549         public static final UnicodeBlock INDIC_SIYAQ_NUMBERS =
2550                 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", INDIC_SIYAQ_NUMBERS_ID); /*[1EC70]*/
2551         /** @stable ICU 62 */
2552         public static final UnicodeBlock MAKASAR = new UnicodeBlock("MAKASAR", MAKASAR_ID); /*[11EE0]*/
2553         /** @stable ICU 62 */
2554         public static final UnicodeBlock MAYAN_NUMERALS =
2555                 new UnicodeBlock("MAYAN_NUMERALS", MAYAN_NUMERALS_ID); /*[1D2E0]*/
2556         /** @stable ICU 62 */
2557         public static final UnicodeBlock MEDEFAIDRIN =
2558                 new UnicodeBlock("MEDEFAIDRIN", MEDEFAIDRIN_ID); /*[16E40]*/
2559         /** @stable ICU 62 */
2560         public static final UnicodeBlock OLD_SOGDIAN =
2561                 new UnicodeBlock("OLD_SOGDIAN", OLD_SOGDIAN_ID); /*[10F00]*/
2562         /** @stable ICU 62 */
2563         public static final UnicodeBlock SOGDIAN = new UnicodeBlock("SOGDIAN", SOGDIAN_ID); /*[10F30]*/
2564 
2565         // New blocks in Unicode 12.0
2566 
2567         /** @stable ICU 64 */
2568         public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS =
2569                 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID); /*[13430]*/
2570         /** @stable ICU 64 */
2571         public static final UnicodeBlock ELYMAIC = new UnicodeBlock("ELYMAIC", ELYMAIC_ID); /*[10FE0]*/
2572         /** @stable ICU 64 */
2573         public static final UnicodeBlock NANDINAGARI =
2574                 new UnicodeBlock("NANDINAGARI", NANDINAGARI_ID); /*[119A0]*/
2575         /** @stable ICU 64 */
2576         public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG =
2577                 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", NYIAKENG_PUACHUE_HMONG_ID); /*[1E100]*/
2578         /** @stable ICU 64 */
2579         public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS =
2580                 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", OTTOMAN_SIYAQ_NUMBERS_ID); /*[1ED00]*/
2581         /** @stable ICU 64 */
2582         public static final UnicodeBlock SMALL_KANA_EXTENSION =
2583                 new UnicodeBlock("SMALL_KANA_EXTENSION", SMALL_KANA_EXTENSION_ID); /*[1B130]*/
2584         /** @stable ICU 64 */
2585         public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A =
2586                 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID); /*[1FA70]*/
2587         /** @stable ICU 64 */
2588         public static final UnicodeBlock TAMIL_SUPPLEMENT =
2589                 new UnicodeBlock("TAMIL_SUPPLEMENT", TAMIL_SUPPLEMENT_ID); /*[11FC0]*/
2590         /** @stable ICU 64 */
2591         public static final UnicodeBlock WANCHO = new UnicodeBlock("WANCHO", WANCHO_ID); /*[1E2C0]*/
2592 
2593         // New blocks in Unicode 13.0
2594 
2595         /** @stable ICU 66 */
2596         public static final UnicodeBlock CHORASMIAN =
2597                 new UnicodeBlock("CHORASMIAN", CHORASMIAN_ID); /*[10FB0]*/
2598         /** @stable ICU 66 */
2599         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G =
2600                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G",
2601                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID); /*[30000]*/
2602         /** @stable ICU 66 */
2603         public static final UnicodeBlock DIVES_AKURU =
2604                 new UnicodeBlock("DIVES_AKURU", DIVES_AKURU_ID); /*[11900]*/
2605         /** @stable ICU 66 */
2606         public static final UnicodeBlock KHITAN_SMALL_SCRIPT =
2607                 new UnicodeBlock("KHITAN_SMALL_SCRIPT", KHITAN_SMALL_SCRIPT_ID); /*[18B00]*/
2608         /** @stable ICU 66 */
2609         public static final UnicodeBlock LISU_SUPPLEMENT =
2610                 new UnicodeBlock("LISU_SUPPLEMENT", LISU_SUPPLEMENT_ID); /*[11FB0]*/
2611         /** @stable ICU 66 */
2612         public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING =
2613                 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING", SYMBOLS_FOR_LEGACY_COMPUTING_ID); /*[1FB00]*/
2614         /** @stable ICU 66 */
2615         public static final UnicodeBlock TANGUT_SUPPLEMENT =
2616                 new UnicodeBlock("TANGUT_SUPPLEMENT", TANGUT_SUPPLEMENT_ID); /*[18D00]*/
2617         /** @stable ICU 66 */
2618         public static final UnicodeBlock YEZIDI = new UnicodeBlock("YEZIDI", YEZIDI_ID); /*[10E80]*/
2619 
2620         /**
2621          * @stable ICU 2.4
2622          */
2623         public static final UnicodeBlock INVALID_CODE
2624         = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID);
2625 
2626         static {
2627             for (int blockId = 0; blockId < COUNT; ++blockId) {
2628                 if (BLOCKS_[blockId] == null) {
2629                     throw new java.lang.IllegalStateException(
2630                             "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized");
2631                 }
2632             }
2633         }
2634 
2635         // public methods --------------------------------------------------
2636 
2637         /**
2638          * {@icu} Returns the only instance of the UnicodeBlock with the argument ID.
2639          * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned.
2640          * @param id UnicodeBlock ID
2641          * @return the only instance of the UnicodeBlock with the argument ID
2642          *         if it exists, otherwise a INVALID_CODE UnicodeBlock will be
2643          *         returned.
2644          * @stable ICU 2.4
2645          */
getInstance(int id)2646         public static UnicodeBlock getInstance(int id)
2647         {
2648             if (id >= 0 && id < BLOCKS_.length) {
2649                 return BLOCKS_[id];
2650             }
2651             return INVALID_CODE;
2652         }
2653 
2654         /**
2655          * Returns the Unicode allocation block that contains the code point,
2656          * or null if the code point is not a member of a defined block.
2657          * @param ch code point to be tested
2658          * @return the Unicode allocation block that contains the code point
2659          * @stable ICU 2.4
2660          */
of(int ch)2661         public static UnicodeBlock of(int ch)
2662         {
2663             if (ch > MAX_VALUE) {
2664                 return INVALID_CODE;
2665             }
2666 
2667             return UnicodeBlock.getInstance(
2668                     UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK));
2669         }
2670 
2671         /**
2672          * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method.
2673          * Returns the Unicode block with the given name. {@icunote} Unlike
2674          * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches
2675          * against the official UCD name and the Java block name
2676          * (ignoring case).
2677          * @param blockName the name of the block to match
2678          * @return the UnicodeBlock with that name
2679          * @throws IllegalArgumentException if the blockName could not be matched
2680          * @stable ICU 3.0
2681          */
forName(String blockName)2682         public static final UnicodeBlock forName(String blockName) {
2683             Map<String, UnicodeBlock> m = null;
2684             if (mref != null) {
2685                 m = mref.get();
2686             }
2687             if (m == null) {
2688                 m = new HashMap<>(BLOCKS_.length);
2689                 for (int i = 0; i < BLOCKS_.length; ++i) {
2690                     UnicodeBlock b = BLOCKS_[i];
2691                     String name = trimBlockName(
2692                             getPropertyValueName(UProperty.BLOCK, b.getID(),
2693                                     UProperty.NameChoice.LONG));
2694                     m.put(name, b);
2695                 }
2696                 mref = new SoftReference<>(m);
2697             }
2698             UnicodeBlock b = m.get(trimBlockName(blockName));
2699             if (b == null) {
2700                 throw new IllegalArgumentException();
2701             }
2702             return b;
2703         }
2704         private static SoftReference<Map<String, UnicodeBlock>> mref;
2705 
trimBlockName(String name)2706         private static String trimBlockName(String name) {
2707             String upper = name.toUpperCase(Locale.ENGLISH);
2708             StringBuilder result = new StringBuilder(upper.length());
2709             for (int i = 0; i < upper.length(); i++) {
2710                 char c = upper.charAt(i);
2711                 if (c != ' ' && c != '_' && c != '-') {
2712                     result.append(c);
2713                 }
2714             }
2715             return result.toString();
2716         }
2717 
2718         /**
2719          * {icu} Returns the type ID of this Unicode block
2720          * @return integer type ID of this Unicode block
2721          * @stable ICU 2.4
2722          */
getID()2723         public int getID()
2724         {
2725             return m_id_;
2726         }
2727 
2728         // private data members ---------------------------------------------
2729 
2730         /**
2731          * Identification code for this UnicodeBlock
2732          */
2733         private int m_id_;
2734 
2735         // private constructor ----------------------------------------------
2736 
2737         /**
2738          * UnicodeBlock constructor
2739          * @param name name of this UnicodeBlock
2740          * @param id unique id of this UnicodeBlock
2741          * @exception NullPointerException if name is <code>null</code>
2742          */
UnicodeBlock(String name, int id)2743         private UnicodeBlock(String name, int id)
2744         {
2745             super(name);
2746             m_id_ = id;
2747             if (id >= 0) {
2748                 BLOCKS_[id] = this;
2749             }
2750         }
2751     }
2752 
2753     /**
2754      * East Asian Width constants.
2755      * @see UProperty#EAST_ASIAN_WIDTH
2756      * @see UCharacter#getIntPropertyValue
2757      * @stable ICU 2.4
2758      */
2759     public static interface EastAsianWidth
2760     {
2761         /**
2762          * @stable ICU 2.4
2763          */
2764         public static final int NEUTRAL = 0;
2765         /**
2766          * @stable ICU 2.4
2767          */
2768         public static final int AMBIGUOUS = 1;
2769         /**
2770          * @stable ICU 2.4
2771          */
2772         public static final int HALFWIDTH = 2;
2773         /**
2774          * @stable ICU 2.4
2775          */
2776         public static final int FULLWIDTH = 3;
2777         /**
2778          * @stable ICU 2.4
2779          */
2780         public static final int NARROW = 4;
2781         /**
2782          * @stable ICU 2.4
2783          */
2784         public static final int WIDE = 5;
2785         /**
2786          * One more than the highest normal EastAsianWidth value.
2787          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH).
2788          *
2789          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2790          */
2791         @Deprecated
2792         public static final int COUNT = 6;
2793     }
2794 
2795     /**
2796      * Decomposition Type constants.
2797      * @see UProperty#DECOMPOSITION_TYPE
2798      * @stable ICU 2.4
2799      */
2800     public static interface DecompositionType
2801     {
2802         /**
2803          * @stable ICU 2.4
2804          */
2805         public static final int NONE = 0;
2806         /**
2807          * @stable ICU 2.4
2808          */
2809         public static final int CANONICAL = 1;
2810         /**
2811          * @stable ICU 2.4
2812          */
2813         public static final int COMPAT = 2;
2814         /**
2815          * @stable ICU 2.4
2816          */
2817         public static final int CIRCLE = 3;
2818         /**
2819          * @stable ICU 2.4
2820          */
2821         public static final int FINAL = 4;
2822         /**
2823          * @stable ICU 2.4
2824          */
2825         public static final int FONT = 5;
2826         /**
2827          * @stable ICU 2.4
2828          */
2829         public static final int FRACTION = 6;
2830         /**
2831          * @stable ICU 2.4
2832          */
2833         public static final int INITIAL = 7;
2834         /**
2835          * @stable ICU 2.4
2836          */
2837         public static final int ISOLATED = 8;
2838         /**
2839          * @stable ICU 2.4
2840          */
2841         public static final int MEDIAL = 9;
2842         /**
2843          * @stable ICU 2.4
2844          */
2845         public static final int NARROW = 10;
2846         /**
2847          * @stable ICU 2.4
2848          */
2849         public static final int NOBREAK = 11;
2850         /**
2851          * @stable ICU 2.4
2852          */
2853         public static final int SMALL = 12;
2854         /**
2855          * @stable ICU 2.4
2856          */
2857         public static final int SQUARE = 13;
2858         /**
2859          * @stable ICU 2.4
2860          */
2861         public static final int SUB = 14;
2862         /**
2863          * @stable ICU 2.4
2864          */
2865         public static final int SUPER = 15;
2866         /**
2867          * @stable ICU 2.4
2868          */
2869         public static final int VERTICAL = 16;
2870         /**
2871          * @stable ICU 2.4
2872          */
2873         public static final int WIDE = 17;
2874         /**
2875          * One more than the highest normal DecompositionType value.
2876          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE).
2877          *
2878          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2879          */
2880         @Deprecated
2881         public static final int COUNT = 18;
2882     }
2883 
2884     /**
2885      * Joining Type constants.
2886      * @see UProperty#JOINING_TYPE
2887      * @stable ICU 2.4
2888      */
2889     public static interface JoiningType
2890     {
2891         /**
2892          * @stable ICU 2.4
2893          */
2894         public static final int NON_JOINING = 0;
2895         /**
2896          * @stable ICU 2.4
2897          */
2898         public static final int JOIN_CAUSING = 1;
2899         /**
2900          * @stable ICU 2.4
2901          */
2902         public static final int DUAL_JOINING = 2;
2903         /**
2904          * @stable ICU 2.4
2905          */
2906         public static final int LEFT_JOINING = 3;
2907         /**
2908          * @stable ICU 2.4
2909          */
2910         public static final int RIGHT_JOINING = 4;
2911         /**
2912          * @stable ICU 2.4
2913          */
2914         public static final int TRANSPARENT = 5;
2915         /**
2916          * One more than the highest normal JoiningType value.
2917          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE).
2918          *
2919          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2920          */
2921         @Deprecated
2922         public static final int COUNT = 6;
2923     }
2924 
2925     /**
2926      * Joining Group constants.
2927      * @see UProperty#JOINING_GROUP
2928      * @stable ICU 2.4
2929      */
2930     public static interface JoiningGroup
2931     {
2932         /**
2933          * @stable ICU 2.4
2934          */
2935         public static final int NO_JOINING_GROUP = 0;
2936         /**
2937          * @stable ICU 2.4
2938          */
2939         public static final int AIN = 1;
2940         /**
2941          * @stable ICU 2.4
2942          */
2943         public static final int ALAPH = 2;
2944         /**
2945          * @stable ICU 2.4
2946          */
2947         public static final int ALEF = 3;
2948         /**
2949          * @stable ICU 2.4
2950          */
2951         public static final int BEH = 4;
2952         /**
2953          * @stable ICU 2.4
2954          */
2955         public static final int BETH = 5;
2956         /**
2957          * @stable ICU 2.4
2958          */
2959         public static final int DAL = 6;
2960         /**
2961          * @stable ICU 2.4
2962          */
2963         public static final int DALATH_RISH = 7;
2964         /**
2965          * @stable ICU 2.4
2966          */
2967         public static final int E = 8;
2968         /**
2969          * @stable ICU 2.4
2970          */
2971         public static final int FEH = 9;
2972         /**
2973          * @stable ICU 2.4
2974          */
2975         public static final int FINAL_SEMKATH = 10;
2976         /**
2977          * @stable ICU 2.4
2978          */
2979         public static final int GAF = 11;
2980         /**
2981          * @stable ICU 2.4
2982          */
2983         public static final int GAMAL = 12;
2984         /**
2985          * @stable ICU 2.4
2986          */
2987         public static final int HAH = 13;
2988         /** @stable ICU 4.6 */
2989         public static final int TEH_MARBUTA_GOAL = 14;
2990         /**
2991          * @stable ICU 2.4
2992          */
2993         public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL;
2994         /**
2995          * @stable ICU 2.4
2996          */
2997         public static final int HE = 15;
2998         /**
2999          * @stable ICU 2.4
3000          */
3001         public static final int HEH = 16;
3002         /**
3003          * @stable ICU 2.4
3004          */
3005         public static final int HEH_GOAL = 17;
3006         /**
3007          * @stable ICU 2.4
3008          */
3009         public static final int HETH = 18;
3010         /**
3011          * @stable ICU 2.4
3012          */
3013         public static final int KAF = 19;
3014         /**
3015          * @stable ICU 2.4
3016          */
3017         public static final int KAPH = 20;
3018         /**
3019          * @stable ICU 2.4
3020          */
3021         public static final int KNOTTED_HEH = 21;
3022         /**
3023          * @stable ICU 2.4
3024          */
3025         public static final int LAM = 22;
3026         /**
3027          * @stable ICU 2.4
3028          */
3029         public static final int LAMADH = 23;
3030         /**
3031          * @stable ICU 2.4
3032          */
3033         public static final int MEEM = 24;
3034         /**
3035          * @stable ICU 2.4
3036          */
3037         public static final int MIM = 25;
3038         /**
3039          * @stable ICU 2.4
3040          */
3041         public static final int NOON = 26;
3042         /**
3043          * @stable ICU 2.4
3044          */
3045         public static final int NUN = 27;
3046         /**
3047          * @stable ICU 2.4
3048          */
3049         public static final int PE = 28;
3050         /**
3051          * @stable ICU 2.4
3052          */
3053         public static final int QAF = 29;
3054         /**
3055          * @stable ICU 2.4
3056          */
3057         public static final int QAPH = 30;
3058         /**
3059          * @stable ICU 2.4
3060          */
3061         public static final int REH = 31;
3062         /**
3063          * @stable ICU 2.4
3064          */
3065         public static final int REVERSED_PE = 32;
3066         /**
3067          * @stable ICU 2.4
3068          */
3069         public static final int SAD = 33;
3070         /**
3071          * @stable ICU 2.4
3072          */
3073         public static final int SADHE = 34;
3074         /**
3075          * @stable ICU 2.4
3076          */
3077         public static final int SEEN = 35;
3078         /**
3079          * @stable ICU 2.4
3080          */
3081         public static final int SEMKATH = 36;
3082         /**
3083          * @stable ICU 2.4
3084          */
3085         public static final int SHIN = 37;
3086         /**
3087          * @stable ICU 2.4
3088          */
3089         public static final int SWASH_KAF = 38;
3090         /**
3091          * @stable ICU 2.4
3092          */
3093         public static final int SYRIAC_WAW = 39;
3094         /**
3095          * @stable ICU 2.4
3096          */
3097         public static final int TAH = 40;
3098         /**
3099          * @stable ICU 2.4
3100          */
3101         public static final int TAW = 41;
3102         /**
3103          * @stable ICU 2.4
3104          */
3105         public static final int TEH_MARBUTA = 42;
3106         /**
3107          * @stable ICU 2.4
3108          */
3109         public static final int TETH = 43;
3110         /**
3111          * @stable ICU 2.4
3112          */
3113         public static final int WAW = 44;
3114         /**
3115          * @stable ICU 2.4
3116          */
3117         public static final int YEH = 45;
3118         /**
3119          * @stable ICU 2.4
3120          */
3121         public static final int YEH_BARREE = 46;
3122         /**
3123          * @stable ICU 2.4
3124          */
3125         public static final int YEH_WITH_TAIL = 47;
3126         /**
3127          * @stable ICU 2.4
3128          */
3129         public static final int YUDH = 48;
3130         /**
3131          * @stable ICU 2.4
3132          */
3133         public static final int YUDH_HE = 49;
3134         /**
3135          * @stable ICU 2.4
3136          */
3137         public static final int ZAIN = 50;
3138         /**
3139          * @stable ICU 2.6
3140          */
3141         public static final int FE = 51;
3142         /**
3143          * @stable ICU 2.6
3144          */
3145         public static final int KHAPH = 52;
3146         /**
3147          * @stable ICU 2.6
3148          */
3149         public static final int ZHAIN = 53;
3150         /**
3151          * @stable ICU 4.0
3152          */
3153         public static final int BURUSHASKI_YEH_BARREE = 54;
3154         /** @stable ICU 4.4 */
3155         public static final int FARSI_YEH = 55;
3156         /** @stable ICU 4.4 */
3157         public static final int NYA = 56;
3158         /** @stable ICU 49 */
3159         public static final int ROHINGYA_YEH = 57;
3160 
3161         /** @stable ICU 54 */
3162         public static final int MANICHAEAN_ALEPH = 58;
3163         /** @stable ICU 54 */
3164         public static final int MANICHAEAN_AYIN = 59;
3165         /** @stable ICU 54 */
3166         public static final int MANICHAEAN_BETH = 60;
3167         /** @stable ICU 54 */
3168         public static final int MANICHAEAN_DALETH = 61;
3169         /** @stable ICU 54 */
3170         public static final int MANICHAEAN_DHAMEDH = 62;
3171         /** @stable ICU 54 */
3172         public static final int MANICHAEAN_FIVE = 63;
3173         /** @stable ICU 54 */
3174         public static final int MANICHAEAN_GIMEL = 64;
3175         /** @stable ICU 54 */
3176         public static final int MANICHAEAN_HETH = 65;
3177         /** @stable ICU 54 */
3178         public static final int MANICHAEAN_HUNDRED = 66;
3179         /** @stable ICU 54 */
3180         public static final int MANICHAEAN_KAPH = 67;
3181         /** @stable ICU 54 */
3182         public static final int MANICHAEAN_LAMEDH = 68;
3183         /** @stable ICU 54 */
3184         public static final int MANICHAEAN_MEM = 69;
3185         /** @stable ICU 54 */
3186         public static final int MANICHAEAN_NUN = 70;
3187         /** @stable ICU 54 */
3188         public static final int MANICHAEAN_ONE = 71;
3189         /** @stable ICU 54 */
3190         public static final int MANICHAEAN_PE = 72;
3191         /** @stable ICU 54 */
3192         public static final int MANICHAEAN_QOPH = 73;
3193         /** @stable ICU 54 */
3194         public static final int MANICHAEAN_RESH = 74;
3195         /** @stable ICU 54 */
3196         public static final int MANICHAEAN_SADHE = 75;
3197         /** @stable ICU 54 */
3198         public static final int MANICHAEAN_SAMEKH = 76;
3199         /** @stable ICU 54 */
3200         public static final int MANICHAEAN_TAW = 77;
3201         /** @stable ICU 54 */
3202         public static final int MANICHAEAN_TEN = 78;
3203         /** @stable ICU 54 */
3204         public static final int MANICHAEAN_TETH = 79;
3205         /** @stable ICU 54 */
3206         public static final int MANICHAEAN_THAMEDH = 80;
3207         /** @stable ICU 54 */
3208         public static final int MANICHAEAN_TWENTY = 81;
3209         /** @stable ICU 54 */
3210         public static final int MANICHAEAN_WAW = 82;
3211         /** @stable ICU 54 */
3212         public static final int MANICHAEAN_YODH = 83;
3213         /** @stable ICU 54 */
3214         public static final int MANICHAEAN_ZAYIN = 84;
3215         /** @stable ICU 54 */
3216         public static final int STRAIGHT_WAW = 85;
3217 
3218         /** @stable ICU 58 */
3219         public static final int AFRICAN_FEH = 86;
3220         /** @stable ICU 58 */
3221         public static final int AFRICAN_NOON = 87;
3222         /** @stable ICU 58 */
3223         public static final int AFRICAN_QAF = 88;
3224 
3225         /** @stable ICU 60 */
3226         public static final int MALAYALAM_BHA = 89;
3227         /** @stable ICU 60 */
3228         public static final int MALAYALAM_JA = 90;
3229         /** @stable ICU 60 */
3230         public static final int MALAYALAM_LLA = 91;
3231         /** @stable ICU 60 */
3232         public static final int MALAYALAM_LLLA = 92;
3233         /** @stable ICU 60 */
3234         public static final int MALAYALAM_NGA = 93;
3235         /** @stable ICU 60 */
3236         public static final int MALAYALAM_NNA = 94;
3237         /** @stable ICU 60 */
3238         public static final int MALAYALAM_NNNA = 95;
3239         /** @stable ICU 60 */
3240         public static final int MALAYALAM_NYA = 96;
3241         /** @stable ICU 60 */
3242         public static final int MALAYALAM_RA = 97;
3243         /** @stable ICU 60 */
3244         public static final int MALAYALAM_SSA = 98;
3245         /** @stable ICU 60 */
3246         public static final int MALAYALAM_TTA = 99;
3247 
3248         /** @stable ICU 62 */
3249         public static final int HANIFI_ROHINGYA_KINNA_YA = 100;
3250         /** @stable ICU 62 */
3251         public static final int HANIFI_ROHINGYA_PA = 101;
3252 
3253         /**
3254          * One more than the highest normal JoiningGroup value.
3255          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JoiningGroup).
3256          *
3257          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3258          */
3259         @Deprecated
3260         public static final int COUNT = 102;
3261     }
3262 
3263     /**
3264      * Grapheme Cluster Break constants.
3265      * @see UProperty#GRAPHEME_CLUSTER_BREAK
3266      * @stable ICU 3.4
3267      */
3268     public static interface GraphemeClusterBreak {
3269         /**
3270          * @stable ICU 3.4
3271          */
3272         public static final int OTHER = 0;
3273         /**
3274          * @stable ICU 3.4
3275          */
3276         public static final int CONTROL = 1;
3277         /**
3278          * @stable ICU 3.4
3279          */
3280         public static final int CR = 2;
3281         /**
3282          * @stable ICU 3.4
3283          */
3284         public static final int EXTEND = 3;
3285         /**
3286          * @stable ICU 3.4
3287          */
3288         public static final int L = 4;
3289         /**
3290          * @stable ICU 3.4
3291          */
3292         public static final int LF = 5;
3293         /**
3294          * @stable ICU 3.4
3295          */
3296         public static final int LV = 6;
3297         /**
3298          * @stable ICU 3.4
3299          */
3300         public static final int LVT = 7;
3301         /**
3302          * @stable ICU 3.4
3303          */
3304         public static final int T = 8;
3305         /**
3306          * @stable ICU 3.4
3307          */
3308         public static final int V = 9;
3309         /**
3310          * @stable ICU 4.0
3311          */
3312         public static final int SPACING_MARK = 10;
3313         /**
3314          * @stable ICU 4.0
3315          */
3316         public static final int PREPEND = 11;
3317         /** @stable ICU 50 */
3318         public static final int REGIONAL_INDICATOR = 12;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
3319         /** @stable ICU 58 */
3320         public static final int E_BASE = 13;          /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
3321         /** @stable ICU 58 */
3322         public static final int E_BASE_GAZ = 14;      /*[EBG]*/
3323         /** @stable ICU 58 */
3324         public static final int E_MODIFIER = 15;      /*[EM]*/
3325         /** @stable ICU 58 */
3326         public static final int GLUE_AFTER_ZWJ = 16;  /*[GAZ]*/
3327         /** @stable ICU 58 */
3328         public static final int ZWJ = 17;             /*[ZWJ]*/
3329 
3330         /**
3331          * One more than the highest normal GraphemeClusterBreak value.
3332          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK).
3333          *
3334          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3335          */
3336         @Deprecated
3337         public static final int COUNT = 18;
3338     }
3339 
3340     /**
3341      * Word Break constants.
3342      * @see UProperty#WORD_BREAK
3343      * @stable ICU 3.4
3344      */
3345     public static interface WordBreak {
3346         /**
3347          * @stable ICU 3.8
3348          */
3349         public static final int OTHER = 0;
3350         /**
3351          * @stable ICU 3.8
3352          */
3353         public static final int ALETTER = 1;
3354         /**
3355          * @stable ICU 3.8
3356          */
3357         public static final int FORMAT = 2;
3358         /**
3359          * @stable ICU 3.8
3360          */
3361         public static final int KATAKANA = 3;
3362         /**
3363          * @stable ICU 3.8
3364          */
3365         public static final int MIDLETTER = 4;
3366         /**
3367          * @stable ICU 3.8
3368          */
3369         public static final int MIDNUM = 5;
3370         /**
3371          * @stable ICU 3.8
3372          */
3373         public static final int NUMERIC = 6;
3374         /**
3375          * @stable ICU 3.8
3376          */
3377         public static final int EXTENDNUMLET = 7;
3378         /**
3379          * @stable ICU 4.0
3380          */
3381         public static final int CR = 8;
3382         /**
3383          * @stable ICU 4.0
3384          */
3385         public static final int EXTEND = 9;
3386         /**
3387          * @stable ICU 4.0
3388          */
3389         public static final int LF = 10;
3390         /**
3391          * @stable ICU 4.0
3392          */
3393         public static final int MIDNUMLET = 11;
3394         /**
3395          * @stable ICU 4.0
3396          */
3397         public static final int NEWLINE = 12;
3398         /** @stable ICU 50 */
3399         public static final int REGIONAL_INDICATOR = 13;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
3400         /** @stable ICU 52 */
3401         public static final int HEBREW_LETTER = 14;    /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
3402         /** @stable ICU 52 */
3403         public static final int SINGLE_QUOTE = 15;     /*[SQ]*/
3404         /** @stable ICU 52 */
3405         public static final int DOUBLE_QUOTE = 16;     /*[DQ]*/
3406         /** @stable ICU 58 */
3407         public static final int E_BASE = 17;           /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
3408         /** @stable ICU 58 */
3409         public static final int E_BASE_GAZ = 18;       /*[EBG]*/
3410         /** @stable ICU 58 */
3411         public static final int E_MODIFIER = 19;       /*[EM]*/
3412         /** @stable ICU 58 */
3413         public static final int GLUE_AFTER_ZWJ = 20;   /*[GAZ]*/
3414         /** @stable ICU 58 */
3415         public static final int ZWJ = 21;              /*[ZWJ]*/
3416         /** @stable ICU 62 */
3417         public static final int WSEGSPACE = 22;        /*[WSEGSPACE]*/
3418         /**
3419          * One more than the highest normal WordBreak value.
3420          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK).
3421          *
3422          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3423          */
3424         @Deprecated
3425         public static final int COUNT = 23;
3426     }
3427 
3428     /**
3429      * Sentence Break constants.
3430      * @see UProperty#SENTENCE_BREAK
3431      * @stable ICU 3.4
3432      */
3433     public static interface SentenceBreak {
3434         /**
3435          * @stable ICU 3.8
3436          */
3437         public static final int OTHER = 0;
3438         /**
3439          * @stable ICU 3.8
3440          */
3441         public static final int ATERM = 1;
3442         /**
3443          * @stable ICU 3.8
3444          */
3445         public static final int CLOSE = 2;
3446         /**
3447          * @stable ICU 3.8
3448          */
3449         public static final int FORMAT = 3;
3450         /**
3451          * @stable ICU 3.8
3452          */
3453         public static final int LOWER = 4;
3454         /**
3455          * @stable ICU 3.8
3456          */
3457         public static final int NUMERIC = 5;
3458         /**
3459          * @stable ICU 3.8
3460          */
3461         public static final int OLETTER = 6;
3462         /**
3463          * @stable ICU 3.8
3464          */
3465         public static final int SEP = 7;
3466         /**
3467          * @stable ICU 3.8
3468          */
3469         public static final int SP = 8;
3470         /**
3471          * @stable ICU 3.8
3472          */
3473         public static final int STERM = 9;
3474         /**
3475          * @stable ICU 3.8
3476          */
3477         public static final int UPPER = 10;
3478         /**
3479          * @stable ICU 4.0
3480          */
3481         public static final int CR = 11;
3482         /**
3483          * @stable ICU 4.0
3484          */
3485         public static final int EXTEND = 12;
3486         /**
3487          * @stable ICU 4.0
3488          */
3489         public static final int LF = 13;
3490         /**
3491          * @stable ICU 4.0
3492          */
3493         public static final int SCONTINUE = 14;
3494         /**
3495          * One more than the highest normal SentenceBreak value.
3496          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK).
3497          *
3498          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3499          */
3500         @Deprecated
3501         public static final int COUNT = 15;
3502     }
3503 
3504     /**
3505      * Line Break constants.
3506      * @see UProperty#LINE_BREAK
3507      * @stable ICU 2.4
3508      */
3509     public static interface LineBreak
3510     {
3511         /**
3512          * @stable ICU 2.4
3513          */
3514         public static final int UNKNOWN = 0;
3515         /**
3516          * @stable ICU 2.4
3517          */
3518         public static final int AMBIGUOUS = 1;
3519         /**
3520          * @stable ICU 2.4
3521          */
3522         public static final int ALPHABETIC = 2;
3523         /**
3524          * @stable ICU 2.4
3525          */
3526         public static final int BREAK_BOTH = 3;
3527         /**
3528          * @stable ICU 2.4
3529          */
3530         public static final int BREAK_AFTER = 4;
3531         /**
3532          * @stable ICU 2.4
3533          */
3534         public static final int BREAK_BEFORE = 5;
3535         /**
3536          * @stable ICU 2.4
3537          */
3538         public static final int MANDATORY_BREAK = 6;
3539         /**
3540          * @stable ICU 2.4
3541          */
3542         public static final int CONTINGENT_BREAK = 7;
3543         /**
3544          * @stable ICU 2.4
3545          */
3546         public static final int CLOSE_PUNCTUATION = 8;
3547         /**
3548          * @stable ICU 2.4
3549          */
3550         public static final int COMBINING_MARK = 9;
3551         /**
3552          * @stable ICU 2.4
3553          */
3554         public static final int CARRIAGE_RETURN = 10;
3555         /**
3556          * @stable ICU 2.4
3557          */
3558         public static final int EXCLAMATION = 11;
3559         /**
3560          * @stable ICU 2.4
3561          */
3562         public static final int GLUE = 12;
3563         /**
3564          * @stable ICU 2.4
3565          */
3566         public static final int HYPHEN = 13;
3567         /**
3568          * @stable ICU 2.4
3569          */
3570         public static final int IDEOGRAPHIC = 14;
3571         /**
3572          * @see #INSEPARABLE
3573          * @stable ICU 2.4
3574          */
3575         public static final int INSEPERABLE = 15;
3576         /**
3577          * Renamed from the misspelled "inseperable" in Unicode 4.0.1.
3578          * @stable ICU 3.0
3579          */
3580         public static final int INSEPARABLE = 15;
3581         /**
3582          * @stable ICU 2.4
3583          */
3584         public static final int INFIX_NUMERIC = 16;
3585         /**
3586          * @stable ICU 2.4
3587          */
3588         public static final int LINE_FEED = 17;
3589         /**
3590          * @stable ICU 2.4
3591          */
3592         public static final int NONSTARTER = 18;
3593         /**
3594          * @stable ICU 2.4
3595          */
3596         public static final int NUMERIC = 19;
3597         /**
3598          * @stable ICU 2.4
3599          */
3600         public static final int OPEN_PUNCTUATION = 20;
3601         /**
3602          * @stable ICU 2.4
3603          */
3604         public static final int POSTFIX_NUMERIC = 21;
3605         /**
3606          * @stable ICU 2.4
3607          */
3608         public static final int PREFIX_NUMERIC = 22;
3609         /**
3610          * @stable ICU 2.4
3611          */
3612         public static final int QUOTATION = 23;
3613         /**
3614          * @stable ICU 2.4
3615          */
3616         public static final int COMPLEX_CONTEXT = 24;
3617         /**
3618          * @stable ICU 2.4
3619          */
3620         public static final int SURROGATE = 25;
3621         /**
3622          * @stable ICU 2.4
3623          */
3624         public static final int SPACE = 26;
3625         /**
3626          * @stable ICU 2.4
3627          */
3628         public static final int BREAK_SYMBOLS = 27;
3629         /**
3630          * @stable ICU 2.4
3631          */
3632         public static final int ZWSPACE = 28;
3633         /**
3634          * @stable ICU 2.6
3635          */
3636         public static final int NEXT_LINE = 29;  /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
3637         /**
3638          * @stable ICU 2.6
3639          */
3640         public static final int WORD_JOINER = 30;      /*[WJ]*/
3641         /**
3642          * @stable ICU 3.4
3643          */
3644         public static final int H2 = 31;  /* from here on: new in Unicode 4.1/ICU 3.4 */
3645         /**
3646          * @stable ICU 3.4
3647          */
3648         public static final int H3 = 32;
3649         /**
3650          * @stable ICU 3.4
3651          */
3652         public static final int JL = 33;
3653         /**
3654          * @stable ICU 3.4
3655          */
3656         public static final int JT = 34;
3657         /**
3658          * @stable ICU 3.4
3659          */
3660         public static final int JV = 35;
3661         /** @stable ICU 4.4 */
3662         public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
3663         /** @stable ICU 49 */
3664         public static final int CONDITIONAL_JAPANESE_STARTER = 37;  /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
3665         /** @stable ICU 49 */
3666         public static final int HEBREW_LETTER = 38;  /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
3667         /** @stable ICU 50 */
3668         public static final int REGIONAL_INDICATOR = 39;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
3669         /** @stable ICU 58 */
3670         public static final int E_BASE = 40;  /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
3671         /** @stable ICU 58 */
3672         public static final int E_MODIFIER = 41;  /*[EM]*/
3673         /** @stable ICU 58 */
3674         public static final int ZWJ = 42;  /*[ZWJ]*/
3675         /**
3676          * One more than the highest normal LineBreak value.
3677          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK).
3678          *
3679          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3680          */
3681         @Deprecated
3682         public static final int COUNT = 43;
3683     }
3684 
3685     /**
3686      * Numeric Type constants.
3687      * @see UProperty#NUMERIC_TYPE
3688      * @stable ICU 2.4
3689      */
3690     public static interface NumericType
3691     {
3692         /**
3693          * @stable ICU 2.4
3694          */
3695         public static final int NONE = 0;
3696         /**
3697          * @stable ICU 2.4
3698          */
3699         public static final int DECIMAL = 1;
3700         /**
3701          * @stable ICU 2.4
3702          */
3703         public static final int DIGIT = 2;
3704         /**
3705          * @stable ICU 2.4
3706          */
3707         public static final int NUMERIC = 3;
3708         /**
3709          * One more than the highest normal NumericType value.
3710          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE).
3711          *
3712          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3713          */
3714         @Deprecated
3715         public static final int COUNT = 4;
3716     }
3717 
3718     /**
3719      * Hangul Syllable Type constants.
3720      *
3721      * @see UProperty#HANGUL_SYLLABLE_TYPE
3722      * @stable ICU 2.6
3723      */
3724     public static interface HangulSyllableType
3725     {
3726         /**
3727          * @stable ICU 2.6
3728          */
3729         public static final int NOT_APPLICABLE      = 0;   /*[NA]*/ /*See note !!*/
3730         /**
3731          * @stable ICU 2.6
3732          */
3733         public static final int LEADING_JAMO        = 1;   /*[L]*/
3734         /**
3735          * @stable ICU 2.6
3736          */
3737         public static final int VOWEL_JAMO          = 2;   /*[V]*/
3738         /**
3739          * @stable ICU 2.6
3740          */
3741         public static final int TRAILING_JAMO       = 3;   /*[T]*/
3742         /**
3743          * @stable ICU 2.6
3744          */
3745         public static final int LV_SYLLABLE         = 4;   /*[LV]*/
3746         /**
3747          * @stable ICU 2.6
3748          */
3749         public static final int LVT_SYLLABLE        = 5;   /*[LVT]*/
3750         /**
3751          * One more than the highest normal HangulSyllableType value.
3752          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE).
3753          *
3754          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3755          */
3756         @Deprecated
3757         public static final int COUNT               = 6;
3758     }
3759 
3760     /**
3761      * Bidi Paired Bracket Type constants.
3762      *
3763      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
3764      * @stable ICU 52
3765      */
3766     public static interface BidiPairedBracketType {
3767         /**
3768          * Not a paired bracket.
3769          * @stable ICU 52
3770          */
3771         public static final int NONE = 0;
3772         /**
3773          * Open paired bracket.
3774          * @stable ICU 52
3775          */
3776         public static final int OPEN = 1;
3777         /**
3778          * Close paired bracket.
3779          * @stable ICU 52
3780          */
3781         public static final int CLOSE = 2;
3782         /**
3783          * One more than the highest normal BidiPairedBracketType value.
3784          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE).
3785          *
3786          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3787          */
3788         @Deprecated
3789         public static final int COUNT = 3;
3790     }
3791 
3792     /**
3793      * Indic Positional Category constants.
3794      *
3795      * @see UProperty#INDIC_POSITIONAL_CATEGORY
3796      * @stable ICU 63
3797      */
3798     public static interface IndicPositionalCategory {
3799         /** @stable ICU 63 */
3800         public static final int NA = 0;
3801         /** @stable ICU 63 */
3802         public static final int BOTTOM = 1;
3803         /** @stable ICU 63 */
3804         public static final int BOTTOM_AND_LEFT = 2;
3805         /** @stable ICU 63 */
3806         public static final int BOTTOM_AND_RIGHT = 3;
3807         /** @stable ICU 63 */
3808         public static final int LEFT = 4;
3809         /** @stable ICU 63 */
3810         public static final int LEFT_AND_RIGHT = 5;
3811         /** @stable ICU 63 */
3812         public static final int OVERSTRUCK = 6;
3813         /** @stable ICU 63 */
3814         public static final int RIGHT = 7;
3815         /** @stable ICU 63 */
3816         public static final int TOP = 8;
3817         /** @stable ICU 63 */
3818         public static final int TOP_AND_BOTTOM = 9;
3819         /** @stable ICU 63 */
3820         public static final int TOP_AND_BOTTOM_AND_RIGHT = 10;
3821         /** @stable ICU 63 */
3822         public static final int TOP_AND_LEFT = 11;
3823         /** @stable ICU 63 */
3824         public static final int TOP_AND_LEFT_AND_RIGHT = 12;
3825         /** @stable ICU 63 */
3826         public static final int TOP_AND_RIGHT = 13;
3827         /** @stable ICU 63 */
3828         public static final int VISUAL_ORDER_LEFT = 14;
3829         /** @stable ICU 66 */
3830         public static final int TOP_AND_BOTTOM_AND_LEFT = 15;
3831     }
3832 
3833     /**
3834      * Indic Syllabic Category constants.
3835      *
3836      * @see UProperty#INDIC_SYLLABIC_CATEGORY
3837      * @stable ICU 63
3838      */
3839     public static interface IndicSyllabicCategory {
3840         /** @stable ICU 63 */
3841         public static final int OTHER = 0;
3842         /** @stable ICU 63 */
3843         public static final int AVAGRAHA = 1;
3844         /** @stable ICU 63 */
3845         public static final int BINDU = 2;
3846         /** @stable ICU 63 */
3847         public static final int BRAHMI_JOINING_NUMBER = 3;
3848         /** @stable ICU 63 */
3849         public static final int CANTILLATION_MARK = 4;
3850         /** @stable ICU 63 */
3851         public static final int CONSONANT = 5;
3852         /** @stable ICU 63 */
3853         public static final int CONSONANT_DEAD = 6;
3854         /** @stable ICU 63 */
3855         public static final int CONSONANT_FINAL = 7;
3856         /** @stable ICU 63 */
3857         public static final int CONSONANT_HEAD_LETTER = 8;
3858         /** @stable ICU 63 */
3859         public static final int CONSONANT_INITIAL_POSTFIXED = 9;
3860         /** @stable ICU 63 */
3861         public static final int CONSONANT_KILLER = 10;
3862         /** @stable ICU 63 */
3863         public static final int CONSONANT_MEDIAL = 11;
3864         /** @stable ICU 63 */
3865         public static final int CONSONANT_PLACEHOLDER = 12;
3866         /** @stable ICU 63 */
3867         public static final int CONSONANT_PRECEDING_REPHA = 13;
3868         /** @stable ICU 63 */
3869         public static final int CONSONANT_PREFIXED = 14;
3870         /** @stable ICU 63 */
3871         public static final int CONSONANT_SUBJOINED = 15;
3872         /** @stable ICU 63 */
3873         public static final int CONSONANT_SUCCEEDING_REPHA = 16;
3874         /** @stable ICU 63 */
3875         public static final int CONSONANT_WITH_STACKER = 17;
3876         /** @stable ICU 63 */
3877         public static final int GEMINATION_MARK = 18;
3878         /** @stable ICU 63 */
3879         public static final int INVISIBLE_STACKER = 19;
3880         /** @stable ICU 63 */
3881         public static final int JOINER = 20;
3882         /** @stable ICU 63 */
3883         public static final int MODIFYING_LETTER = 21;
3884         /** @stable ICU 63 */
3885         public static final int NON_JOINER = 22;
3886         /** @stable ICU 63 */
3887         public static final int NUKTA = 23;
3888         /** @stable ICU 63 */
3889         public static final int NUMBER = 24;
3890         /** @stable ICU 63 */
3891         public static final int NUMBER_JOINER = 25;
3892         /** @stable ICU 63 */
3893         public static final int PURE_KILLER = 26;
3894         /** @stable ICU 63 */
3895         public static final int REGISTER_SHIFTER = 27;
3896         /** @stable ICU 63 */
3897         public static final int SYLLABLE_MODIFIER = 28;
3898         /** @stable ICU 63 */
3899         public static final int TONE_LETTER = 29;
3900         /** @stable ICU 63 */
3901         public static final int TONE_MARK = 30;
3902         /** @stable ICU 63 */
3903         public static final int VIRAMA = 31;
3904         /** @stable ICU 63 */
3905         public static final int VISARGA = 32;
3906         /** @stable ICU 63 */
3907         public static final int VOWEL = 33;
3908         /** @stable ICU 63 */
3909         public static final int VOWEL_DEPENDENT = 34;
3910         /** @stable ICU 63 */
3911         public static final int VOWEL_INDEPENDENT = 35;
3912     }
3913 
3914     /**
3915      * Vertical Orientation constants.
3916      *
3917      * @see UProperty#VERTICAL_ORIENTATION
3918      * @stable ICU 63
3919      */
3920     public static interface VerticalOrientation {
3921         /** @stable ICU 63 */
3922         public static final int ROTATED = 0;
3923         /** @stable ICU 63 */
3924         public static final int TRANSFORMED_ROTATED = 1;
3925         /** @stable ICU 63 */
3926         public static final int TRANSFORMED_UPRIGHT = 2;
3927         /** @stable ICU 63 */
3928         public static final int UPRIGHT = 3;
3929     }
3930 
3931     // public data members -----------------------------------------------
3932 
3933     /**
3934      * The lowest Unicode code point value, constant 0.
3935      * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}.
3936      *
3937      * @stable ICU 2.1
3938      */
3939     public static final int MIN_VALUE = Character.MIN_CODE_POINT;
3940 
3941     /**
3942      * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits).
3943      * Same as {@link Character#MAX_CODE_POINT}.
3944      *
3945      * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE}
3946      * which is still a char with the value U+FFFF.
3947      *
3948      * @stable ICU 2.1
3949      */
3950     public static final int MAX_VALUE = Character.MAX_CODE_POINT;
3951 
3952     /**
3953      * The minimum value for Supplementary code points, constant U+10000.
3954      * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
3955      *
3956      * @stable ICU 2.1
3957      */
3958     public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT;
3959 
3960     /**
3961      * Unicode value used when translating into Unicode encoding form and there
3962      * is no existing character.
3963      * @stable ICU 2.1
3964      */
3965     public static final int REPLACEMENT_CHAR = '\uFFFD';
3966 
3967     /**
3968      * Special value that is returned by getUnicodeNumericValue(int) when no
3969      * numeric value is defined for a code point.
3970      * @stable ICU 2.4
3971      * @see #getUnicodeNumericValue
3972      */
3973     public static final double NO_NUMERIC_VALUE = -123456789;
3974 
3975     /**
3976      * Compatibility constant for Java Character's MIN_RADIX.
3977      * @stable ICU 3.4
3978      */
3979     public static final int MIN_RADIX = java.lang.Character.MIN_RADIX;
3980 
3981     /**
3982      * Compatibility constant for Java Character's MAX_RADIX.
3983      * @stable ICU 3.4
3984      */
3985     public static final int MAX_RADIX = java.lang.Character.MAX_RADIX;
3986 
3987     /**
3988      * Do not lowercase non-initial parts of words when titlecasing.
3989      * Option bit for titlecasing APIs that take an options bit set.
3990      *
3991      * By default, titlecasing will titlecase the first cased character
3992      * of a word and lowercase all other characters.
3993      * With this option, the other characters will not be modified.
3994      *
3995      * @see #toTitleCase
3996      * @stable ICU 3.8
3997      */
3998     public static final int TITLECASE_NO_LOWERCASE = 0x100;
3999 
4000     /**
4001      * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
4002      * titlecase exactly the characters at breaks from the iterator.
4003      * Option bit for titlecasing APIs that take an options bit set.
4004      *
4005      * By default, titlecasing will take each break iterator index,
4006      * adjust it by looking for the next cased character, and titlecase that one.
4007      * Other characters are lowercased.
4008      *
4009      * This follows Unicode 4 &amp; 5 section 3.13 Default Case Operations:
4010      *
4011      * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
4012      * #29, "Text Boundaries." Between each pair of word boundaries, find the first
4013      * cased character F. If F exists, map F to default_title(F); then map each
4014      * subsequent character C to default_lower(C).
4015      *
4016      * @see #toTitleCase
4017      * @see #TITLECASE_NO_LOWERCASE
4018      * @stable ICU 3.8
4019      */
4020     public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200;
4021 
4022     // public methods ----------------------------------------------------
4023 
4024     /**
4025      * Returnss the numeric value of a decimal digit code point.
4026      * <br>This method observes the semantics of
4027      * <code>java.lang.Character.digit()</code>.  Note that this
4028      * will return positive values for code points for which isDigit
4029      * returns false, just like java.lang.Character.
4030      * <br><em>Semantic Change:</em> In release 1.3.1 and
4031      * prior, this did not treat the European letters as having a
4032      * digit value, and also treated numeric letters and other numbers as
4033      * digits.
4034      * This has been changed to conform to the java semantics.
4035      * <br>A code point is a valid digit if and only if:
4036      * <ul>
4037      *   <li>ch is a decimal digit or one of the european letters, and
4038      *   <li>the value of ch is less than the specified radix.
4039      * </ul>
4040      * @param ch the code point to query
4041      * @param radix the radix
4042      * @return the numeric value represented by the code point in the
4043      * specified radix, or -1 if the code point is not a decimal digit
4044      * or if its value is too large for the radix
4045      * @stable ICU 2.1
4046      */
digit(int ch, int radix)4047     public static int digit(int ch, int radix)
4048     {
4049         if (2 <= radix && radix <= 36) {
4050             int value = digit(ch);
4051             if (value < 0) {
4052                 // ch is not a decimal digit, try latin letters
4053                 value = UCharacterProperty.getEuropeanDigit(ch);
4054             }
4055             return (value < radix) ? value : -1;
4056         } else {
4057             return -1;  // invalid radix
4058         }
4059     }
4060 
4061     /**
4062      * Returnss the numeric value of a decimal digit code point.
4063      * <br>This is a convenience overload of <code>digit(int, int)</code>
4064      * that provides a decimal radix.
4065      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this
4066      * treated numeric letters and other numbers as digits.  This has
4067      * been changed to conform to the java semantics.
4068      * @param ch the code point to query
4069      * @return the numeric value represented by the code point,
4070      * or -1 if the code point is not a decimal digit or if its
4071      * value is too large for a decimal radix
4072      * @stable ICU 2.1
4073      */
digit(int ch)4074     public static int digit(int ch)
4075     {
4076         return UCharacterProperty.INSTANCE.digit(ch);
4077     }
4078 
4079     /**
4080      * Returns the numeric value of the code point as a nonnegative
4081      * integer.
4082      * <br>If the code point does not have a numeric value, then -1 is returned.
4083      * <br>
4084      * If the code point has a numeric value that cannot be represented as a
4085      * nonnegative integer (for example, a fractional value), then -2 is
4086      * returned.
4087      * @param ch the code point to query
4088      * @return the numeric value of the code point, or -1 if it has no numeric
4089      * value, or -2 if it has a numeric value that cannot be represented as a
4090      * nonnegative integer
4091      * @stable ICU 2.1
4092      */
getNumericValue(int ch)4093     public static int getNumericValue(int ch)
4094     {
4095         return UCharacterProperty.INSTANCE.getNumericValue(ch);
4096     }
4097 
4098     /**
4099      * {@icu} Returns the numeric value for a Unicode code point as defined in the
4100      * Unicode Character Database.
4101      * <p>A "double" return type is necessary because some numeric values are
4102      * fractions, negative, or too large for int.
4103      * <p>For characters without any numeric values in the Unicode Character
4104      * Database, this function will return NO_NUMERIC_VALUE.
4105      * Note: This is different from the Unicode Standard which specifies NaN as the default value.
4106      * <p><em>API Change:</em> In release 2.2 and prior, this API has a
4107      * return type int and returns -1 when the argument ch does not have a
4108      * corresponding numeric value. This has been changed to synch with ICU4C
4109      *
4110      * This corresponds to the ICU4C function u_getNumericValue.
4111      * @param ch Code point to get the numeric value for.
4112      * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined.
4113      * @stable ICU 2.4
4114      */
getUnicodeNumericValue(int ch)4115     public static double getUnicodeNumericValue(int ch)
4116     {
4117         return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch);
4118     }
4119 
4120     /**
4121      * Compatibility override of Java deprecated method.  This
4122      * method will always remain deprecated.
4123      * Same as java.lang.Character.isSpace().
4124      * @param ch the code point
4125      * @return true if the code point is a space character as
4126      * defined by java.lang.Character.isSpace.
4127      * @deprecated ICU 3.4 (Java)
4128      */
4129     @Deprecated
isSpace(int ch)4130     public static boolean isSpace(int ch) {
4131         return ch <= 0x20 &&
4132                 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d);
4133     }
4134 
4135     /**
4136      * Returns a value indicating a code point's Unicode category.
4137      * Up-to-date Unicode implementation of java.lang.Character.getType()
4138      * except for the above mentioned code points that had their category
4139      * changed.<br>
4140      * Return results are constants from the interface
4141      * <a href=UCharacterCategory.html>UCharacterCategory</a><br>
4142      * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with
4143      * those returned by java.lang.Character.getType.  UCharacterCategory values
4144      * match the ones used in ICU4C, while java.lang.Character type
4145      * values, though similar, skip the value 17.
4146      * @param ch code point whose type is to be determined
4147      * @return category which is a value of UCharacterCategory
4148      * @stable ICU 2.1
4149      */
getType(int ch)4150     public static int getType(int ch)
4151     {
4152         return UCharacterProperty.INSTANCE.getType(ch);
4153     }
4154 
4155     /**
4156      * Determines if a code point has a defined meaning in the up-to-date
4157      * Unicode standard.
4158      * E.g. supplementary code points though allocated space are not defined in
4159      * Unicode yet.<br>
4160      * Up-to-date Unicode implementation of java.lang.Character.isDefined()
4161      * @param ch code point to be determined if it is defined in the most
4162      *        current version of Unicode
4163      * @return true if this code point is defined in unicode
4164      * @stable ICU 2.1
4165      */
isDefined(int ch)4166     public static boolean isDefined(int ch)
4167     {
4168         return getType(ch) != 0;
4169     }
4170 
4171     /**
4172      * Determines if a code point is a Java digit.
4173      * <br>This method observes the semantics of
4174      * <code>java.lang.Character.isDigit()</code>. It returns true for decimal
4175      * digits only.
4176      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated
4177      * numeric letters and other numbers as digits.
4178      * This has been changed to conform to the java semantics.
4179      * @param ch code point to query
4180      * @return true if this code point is a digit
4181      * @stable ICU 2.1
4182      */
isDigit(int ch)4183     public static boolean isDigit(int ch)
4184     {
4185         return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER;
4186     }
4187 
4188     /**
4189      * Determines if the specified code point is an ISO control character.
4190      * A code point is considered to be an ISO control character if it is in
4191      * the range &#92;u0000 through &#92;u001F or in the range &#92;u007F through
4192      * &#92;u009F.<br>
4193      * Up-to-date Unicode implementation of java.lang.Character.isISOControl()
4194      * @param ch code point to determine if it is an ISO control character
4195      * @return true if code point is a ISO control character
4196      * @stable ICU 2.1
4197      */
isISOControl(int ch)4198     public static boolean isISOControl(int ch)
4199     {
4200         return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ &&
4201                 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_));
4202     }
4203 
4204     /**
4205      * Determines if the specified code point is a letter.
4206      * Up-to-date Unicode implementation of java.lang.Character.isLetter()
4207      * @param ch code point to determine if it is a letter
4208      * @return true if code point is a letter
4209      * @stable ICU 2.1
4210      */
isLetter(int ch)4211     public static boolean isLetter(int ch)
4212     {
4213         // if props == 0, it will just fall through and return false
4214         return ((1 << getType(ch))
4215                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
4216                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
4217                         | (1 << UCharacterCategory.TITLECASE_LETTER)
4218                         | (1 << UCharacterCategory.MODIFIER_LETTER)
4219                         | (1 << UCharacterCategory.OTHER_LETTER))) != 0;
4220     }
4221 
4222     /**
4223      * Determines if the specified code point is a letter or digit.
4224      * {@icunote} This method, unlike java.lang.Character does not regard the ascii
4225      * characters 'A' - 'Z' and 'a' - 'z' as digits.
4226      * @param ch code point to determine if it is a letter or a digit
4227      * @return true if code point is a letter or a digit
4228      * @stable ICU 2.1
4229      */
isLetterOrDigit(int ch)4230     public static boolean isLetterOrDigit(int ch)
4231     {
4232         return ((1 << getType(ch))
4233                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
4234                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
4235                         | (1 << UCharacterCategory.TITLECASE_LETTER)
4236                         | (1 << UCharacterCategory.MODIFIER_LETTER)
4237                         | (1 << UCharacterCategory.OTHER_LETTER)
4238                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0;
4239     }
4240 
4241     /**
4242      * Compatibility override of Java deprecated method.  This
4243      * method will always remain deprecated.  Delegates to
4244      * java.lang.Character.isJavaIdentifierStart.
4245      * @param cp the code point
4246      * @return true if the code point can start a java identifier.
4247      * @deprecated ICU 3.4 (Java)
4248      */
4249     @Deprecated
isJavaLetter(int cp)4250     public static boolean isJavaLetter(int cp) {
4251         return isJavaIdentifierStart(cp);
4252     }
4253 
4254     /**
4255      * Compatibility override of Java deprecated method.  This
4256      * method will always remain deprecated.  Delegates to
4257      * java.lang.Character.isJavaIdentifierPart.
4258      * @param cp the code point
4259      * @return true if the code point can continue a java identifier.
4260      * @deprecated ICU 3.4 (Java)
4261      */
4262     @Deprecated
isJavaLetterOrDigit(int cp)4263     public static boolean isJavaLetterOrDigit(int cp) {
4264         return isJavaIdentifierPart(cp);
4265     }
4266 
4267     /**
4268      * Compatibility override of Java method, delegates to
4269      * java.lang.Character.isJavaIdentifierStart.
4270      * @param cp the code point
4271      * @return true if the code point can start a java identifier.
4272      * @stable ICU 3.4
4273      */
isJavaIdentifierStart(int cp)4274     public static boolean isJavaIdentifierStart(int cp) {
4275         // note, downcast to char for jdk 1.4 compatibility
4276         return java.lang.Character.isJavaIdentifierStart((char)cp);
4277     }
4278 
4279     /**
4280      * Compatibility override of Java method, delegates to
4281      * java.lang.Character.isJavaIdentifierPart.
4282      * @param cp the code point
4283      * @return true if the code point can continue a java identifier.
4284      * @stable ICU 3.4
4285      */
isJavaIdentifierPart(int cp)4286     public static boolean isJavaIdentifierPart(int cp) {
4287         // note, downcast to char for jdk 1.4 compatibility
4288         return java.lang.Character.isJavaIdentifierPart((char)cp);
4289     }
4290 
4291     /**
4292      * Determines if the specified code point is a lowercase character.
4293      * UnicodeData only contains case mappings for code points where they are
4294      * one-to-one mappings; it also omits information about context-sensitive
4295      * case mappings.<br> For more information about Unicode case mapping
4296      * please refer to the
4297      * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report
4298      * #21</a>.<br>
4299      * Up-to-date Unicode implementation of java.lang.Character.isLowerCase()
4300      * @param ch code point to determine if it is in lowercase
4301      * @return true if code point is a lowercase character
4302      * @stable ICU 2.1
4303      */
isLowerCase(int ch)4304     public static boolean isLowerCase(int ch)
4305     {
4306         // if props == 0, it will just fall through and return false
4307         return getType(ch) == UCharacterCategory.LOWERCASE_LETTER;
4308     }
4309 
4310     /**
4311      * Determines if the specified code point is a white space character.
4312      * A code point is considered to be an whitespace character if and only
4313      * if it satisfies one of the following criteria:
4314      * <ul>
4315      * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not
4316      *      also a non-breaking space (&#92;u00A0 or &#92;u2007 or &#92;u202F).
4317      * <li> It is &#92;u0009, HORIZONTAL TABULATION.
4318      * <li> It is &#92;u000A, LINE FEED.
4319      * <li> It is &#92;u000B, VERTICAL TABULATION.
4320      * <li> It is &#92;u000C, FORM FEED.
4321      * <li> It is &#92;u000D, CARRIAGE RETURN.
4322      * <li> It is &#92;u001C, FILE SEPARATOR.
4323      * <li> It is &#92;u001D, GROUP SEPARATOR.
4324      * <li> It is &#92;u001E, RECORD SEPARATOR.
4325      * <li> It is &#92;u001F, UNIT SEPARATOR.
4326      * </ul>
4327      *
4328      * This API tries to sync with the semantics of Java's
4329      * java.lang.Character.isWhitespace(), but it may not return
4330      * the exact same results because of the Unicode version
4331      * difference.
4332      * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)
4333      * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.
4334      * See http://www.unicode.org/versions/Unicode4.0.1/
4335      * @param ch code point to determine if it is a white space
4336      * @return true if the specified code point is a white space character
4337      * @stable ICU 2.1
4338      */
isWhitespace(int ch)4339     public static boolean isWhitespace(int ch)
4340     {
4341         // exclude no-break spaces
4342         // if props == 0, it will just fall through and return false
4343         return ((1 << getType(ch)) &
4344                 ((1 << UCharacterCategory.SPACE_SEPARATOR)
4345                         | (1 << UCharacterCategory.LINE_SEPARATOR)
4346                         | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0
4347                         && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_)
4348                         // TAB VT LF FF CR FS GS RS US NL are all control characters
4349                         // that are white spaces.
4350                         || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f);
4351     }
4352 
4353     /**
4354      * Determines if the specified code point is a Unicode specified space
4355      * character, i.e. if code point is in the category Zs, Zl and Zp.
4356      * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar().
4357      * @param ch code point to determine if it is a space
4358      * @return true if the specified code point is a space character
4359      * @stable ICU 2.1
4360      */
isSpaceChar(int ch)4361     public static boolean isSpaceChar(int ch)
4362     {
4363         // if props == 0, it will just fall through and return false
4364         return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR)
4365                 | (1 << UCharacterCategory.LINE_SEPARATOR)
4366                 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR)))
4367                 != 0;
4368     }
4369 
4370     /**
4371      * Determines if the specified code point is a titlecase character.
4372      * UnicodeData only contains case mappings for code points where they are
4373      * one-to-one mappings; it also omits information about context-sensitive
4374      * case mappings.<br>
4375      * For more information about Unicode case mapping please refer to the
4376      * <a href=http://www.unicode.org/unicode/reports/tr21/>
4377      * Technical report #21</a>.<br>
4378      * Up-to-date Unicode implementation of java.lang.Character.isTitleCase().
4379      * @param ch code point to determine if it is in title case
4380      * @return true if the specified code point is a titlecase character
4381      * @stable ICU 2.1
4382      */
isTitleCase(int ch)4383     public static boolean isTitleCase(int ch)
4384     {
4385         // if props == 0, it will just fall through and return false
4386         return getType(ch) == UCharacterCategory.TITLECASE_LETTER;
4387     }
4388 
4389     /**
4390      * Determines if the specified code point may be any part of a Unicode
4391      * identifier other than the starting character.
4392      * A code point may be part of a Unicode identifier if and only if it is
4393      * one of the following:
4394      * <ul>
4395      * <li> Lu Uppercase letter
4396      * <li> Ll Lowercase letter
4397      * <li> Lt Titlecase letter
4398      * <li> Lm Modifier letter
4399      * <li> Lo Other letter
4400      * <li> Nl Letter number
4401      * <li> Pc Connecting punctuation character
4402      * <li> Nd decimal number
4403      * <li> Mc Spacing combining mark
4404      * <li> Mn Non-spacing mark
4405      * <li> Cf formatting code
4406      * </ul>
4407      * Up-to-date Unicode implementation of
4408      * java.lang.Character.isUnicodeIdentifierPart().<br>
4409      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
4410      * @param ch code point to determine if is can be part of a Unicode
4411      *        identifier
4412      * @return true if code point is any character belonging a unicode
4413      *         identifier suffix after the first character
4414      * @stable ICU 2.1
4415      */
isUnicodeIdentifierPart(int ch)4416     public static boolean isUnicodeIdentifierPart(int ch)
4417     {
4418         // if props == 0, it will just fall through and return false
4419         // cat == format
4420         return ((1 << getType(ch))
4421                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
4422                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
4423                         | (1 << UCharacterCategory.TITLECASE_LETTER)
4424                         | (1 << UCharacterCategory.MODIFIER_LETTER)
4425                         | (1 << UCharacterCategory.OTHER_LETTER)
4426                         | (1 << UCharacterCategory.LETTER_NUMBER)
4427                         | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION)
4428                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER)
4429                         | (1 << UCharacterCategory.COMBINING_SPACING_MARK)
4430                         | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0
4431                         || isIdentifierIgnorable(ch);
4432     }
4433 
4434     /**
4435      * Determines if the specified code point is permissible as the first
4436      * character in a Unicode identifier.
4437      * A code point may start a Unicode identifier if it is of type either
4438      * <ul>
4439      * <li> Lu Uppercase letter
4440      * <li> Ll Lowercase letter
4441      * <li> Lt Titlecase letter
4442      * <li> Lm Modifier letter
4443      * <li> Lo Other letter
4444      * <li> Nl Letter number
4445      * </ul>
4446      * Up-to-date Unicode implementation of
4447      * java.lang.Character.isUnicodeIdentifierStart().<br>
4448      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
4449      * @param ch code point to determine if it can start a Unicode identifier
4450      * @return true if code point is the first character belonging a unicode
4451      *              identifier
4452      * @stable ICU 2.1
4453      */
isUnicodeIdentifierStart(int ch)4454     public static boolean isUnicodeIdentifierStart(int ch)
4455     {
4456         /*int cat = getType(ch);*/
4457         // if props == 0, it will just fall through and return false
4458         return ((1 << getType(ch))
4459                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
4460                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
4461                         | (1 << UCharacterCategory.TITLECASE_LETTER)
4462                         | (1 << UCharacterCategory.MODIFIER_LETTER)
4463                         | (1 << UCharacterCategory.OTHER_LETTER)
4464                         | (1 << UCharacterCategory.LETTER_NUMBER))) != 0;
4465     }
4466 
4467     /**
4468      * Determines if the specified code point should be regarded as an
4469      * ignorable character in a Java identifier.
4470      * A character is Java-identifier-ignorable if it has the general category
4471      * Cf Formatting Control, or it is a non-Java-whitespace ISO control:
4472      * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br>
4473      * Up-to-date Unicode implementation of
4474      * java.lang.Character.isIdentifierIgnorable().<br>
4475      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
4476      * <p>Note that Unicode just recommends to ignore Cf (format controls).
4477      * @param ch code point to be determined if it can be ignored in a Unicode
4478      *        identifier.
4479      * @return true if the code point is ignorable
4480      * @stable ICU 2.1
4481      */
isIdentifierIgnorable(int ch)4482     public static boolean isIdentifierIgnorable(int ch)
4483     {
4484         // see java.lang.Character.isIdentifierIgnorable() on range of
4485         // ignorable characters.
4486         if (ch <= 0x9f) {
4487             return isISOControl(ch)
4488                     && !((ch >= 0x9 && ch <= 0xd)
4489                             || (ch >= 0x1c && ch <= 0x1f));
4490         }
4491         return getType(ch) == UCharacterCategory.FORMAT;
4492     }
4493 
4494     /**
4495      * Determines if the specified code point is an uppercase character.
4496      * UnicodeData only contains case mappings for code point where they are
4497      * one-to-one mappings; it also omits information about context-sensitive
4498      * case mappings.<br>
4499      * For language specific case conversion behavior, use
4500      * toUpperCase(locale, str). <br>
4501      * For example, the case conversion for dot-less i and dotted I in Turkish,
4502      * or for final sigma in Greek.
4503      * For more information about Unicode case mapping please refer to the
4504      * <a href=http://www.unicode.org/unicode/reports/tr21/>
4505      * Technical report #21</a>.<br>
4506      * Up-to-date Unicode implementation of java.lang.Character.isUpperCase().
4507      * @param ch code point to determine if it is in uppercase
4508      * @return true if the code point is an uppercase character
4509      * @stable ICU 2.1
4510      */
isUpperCase(int ch)4511     public static boolean isUpperCase(int ch)
4512     {
4513         // if props == 0, it will just fall through and return false
4514         return getType(ch) == UCharacterCategory.UPPERCASE_LETTER;
4515     }
4516 
4517     /**
4518      * The given code point is mapped to its lowercase equivalent; if the code
4519      * point has no lowercase equivalent, the code point itself is returned.
4520      * Up-to-date Unicode implementation of java.lang.Character.toLowerCase()
4521      *
4522      * <p>This function only returns the simple, single-code point case mapping.
4523      * Full case mappings should be used whenever possible because they produce
4524      * better results by working on whole strings.
4525      * They take into account the string context and the language and can map
4526      * to a result string with a different length as appropriate.
4527      * Full case mappings are applied by the case mapping functions
4528      * that take String parameters rather than code points (int).
4529      * See also the User Guide chapter on C/POSIX migration:
4530      * http://www.icu-project.org/userguide/posix.html#case_mappings
4531      *
4532      * @param ch code point whose lowercase equivalent is to be retrieved
4533      * @return the lowercase equivalent code point
4534      * @stable ICU 2.1
4535      */
toLowerCase(int ch)4536     public static int toLowerCase(int ch) {
4537         return UCaseProps.INSTANCE.tolower(ch);
4538     }
4539 
4540     /**
4541      * Converts argument code point and returns a String object representing
4542      * the code point's value in UTF-16 format.
4543      * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones.
4544      *
4545      * <p>Up-to-date Unicode implementation of java.lang.Character.toString().
4546      *
4547      * @param ch code point
4548      * @return string representation of the code point, null if code point is not
4549      *         defined in unicode
4550      * @stable ICU 2.1
4551      */
toString(int ch)4552     public static String toString(int ch)
4553     {
4554         if (ch < MIN_VALUE || ch > MAX_VALUE) {
4555             return null;
4556         }
4557 
4558         if (ch < SUPPLEMENTARY_MIN_VALUE) {
4559             return String.valueOf((char)ch);
4560         }
4561 
4562         return new String(Character.toChars(ch));
4563     }
4564 
4565     /**
4566      * Converts the code point argument to titlecase.
4567      * If no titlecase is available, the uppercase is returned. If no uppercase
4568      * is available, the code point itself is returned.
4569      * Up-to-date Unicode implementation of java.lang.Character.toTitleCase()
4570      *
4571      * <p>This function only returns the simple, single-code point case mapping.
4572      * Full case mappings should be used whenever possible because they produce
4573      * better results by working on whole strings.
4574      * They take into account the string context and the language and can map
4575      * to a result string with a different length as appropriate.
4576      * Full case mappings are applied by the case mapping functions
4577      * that take String parameters rather than code points (int).
4578      * See also the User Guide chapter on C/POSIX migration:
4579      * http://www.icu-project.org/userguide/posix.html#case_mappings
4580      *
4581      * @param ch code point  whose title case is to be retrieved
4582      * @return titlecase code point
4583      * @stable ICU 2.1
4584      */
toTitleCase(int ch)4585     public static int toTitleCase(int ch) {
4586         return UCaseProps.INSTANCE.totitle(ch);
4587     }
4588 
4589     /**
4590      * Converts the character argument to uppercase.
4591      * If no uppercase is available, the character itself is returned.
4592      * Up-to-date Unicode implementation of java.lang.Character.toUpperCase()
4593      *
4594      * <p>This function only returns the simple, single-code point case mapping.
4595      * Full case mappings should be used whenever possible because they produce
4596      * better results by working on whole strings.
4597      * They take into account the string context and the language and can map
4598      * to a result string with a different length as appropriate.
4599      * Full case mappings are applied by the case mapping functions
4600      * that take String parameters rather than code points (int).
4601      * See also the User Guide chapter on C/POSIX migration:
4602      * http://www.icu-project.org/userguide/posix.html#case_mappings
4603      *
4604      * @param ch code point whose uppercase is to be retrieved
4605      * @return uppercase code point
4606      * @stable ICU 2.1
4607      */
toUpperCase(int ch)4608     public static int toUpperCase(int ch) {
4609         return UCaseProps.INSTANCE.toupper(ch);
4610     }
4611 
4612     // extra methods not in java.lang.Character --------------------------
4613 
4614     /**
4615      * {@icu} Determines if the code point is a supplementary character.
4616      * A code point is a supplementary character if and only if it is greater
4617      * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a>
4618      * @param ch code point to be determined if it is in the supplementary
4619      *        plane
4620      * @return true if code point is a supplementary character
4621      * @stable ICU 2.1
4622      */
isSupplementary(int ch)4623     public static boolean isSupplementary(int ch)
4624     {
4625         return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE &&
4626                 ch <= UCharacter.MAX_VALUE;
4627     }
4628 
4629     /**
4630      * {@icu} Determines if the code point is in the BMP plane.
4631      * @param ch code point to be determined if it is not a supplementary
4632      *        character
4633      * @return true if code point is not a supplementary character
4634      * @stable ICU 2.1
4635      */
isBMP(int ch)4636     public static boolean isBMP(int ch)
4637     {
4638         return (ch >= 0 && ch <= LAST_CHAR_MASK_);
4639     }
4640 
4641     /**
4642      * {@icu} Determines whether the specified code point is a printable character
4643      * according to the Unicode standard.
4644      * @param ch code point to be determined if it is printable
4645      * @return true if the code point is a printable character
4646      * @stable ICU 2.1
4647      */
isPrintable(int ch)4648     public static boolean isPrintable(int ch)
4649     {
4650         int cat = getType(ch);
4651         // if props == 0, it will just fall through and return false
4652         return (cat != UCharacterCategory.UNASSIGNED &&
4653                 cat != UCharacterCategory.CONTROL &&
4654                 cat != UCharacterCategory.FORMAT &&
4655                 cat != UCharacterCategory.PRIVATE_USE &&
4656                 cat != UCharacterCategory.SURROGATE &&
4657                 cat != UCharacterCategory.GENERAL_OTHER_TYPES);
4658     }
4659 
4660     /**
4661      * {@icu} Determines whether the specified code point is of base form.
4662      * A code point of base form does not graphically combine with preceding
4663      * characters, and is neither a control nor a format character.
4664      * @param ch code point to be determined if it is of base form
4665      * @return true if the code point is of base form
4666      * @stable ICU 2.1
4667      */
isBaseForm(int ch)4668     public static boolean isBaseForm(int ch)
4669     {
4670         int cat = getType(ch);
4671         // if props == 0, it will just fall through and return false
4672         return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER ||
4673                 cat == UCharacterCategory.OTHER_NUMBER ||
4674                 cat == UCharacterCategory.LETTER_NUMBER ||
4675                 cat == UCharacterCategory.UPPERCASE_LETTER ||
4676                 cat == UCharacterCategory.LOWERCASE_LETTER ||
4677                 cat == UCharacterCategory.TITLECASE_LETTER ||
4678                 cat == UCharacterCategory.MODIFIER_LETTER ||
4679                 cat == UCharacterCategory.OTHER_LETTER ||
4680                 cat == UCharacterCategory.NON_SPACING_MARK ||
4681                 cat == UCharacterCategory.ENCLOSING_MARK ||
4682                 cat == UCharacterCategory.COMBINING_SPACING_MARK;
4683     }
4684 
4685     /**
4686      * {@icu} Returns the Bidirection property of a code point.
4687      * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
4688      * property.<br>
4689      * Result returned belongs to the interface
4690      * <a href=UCharacterDirection.html>UCharacterDirection</a>
4691      * @param ch the code point to be determined its direction
4692      * @return direction constant from UCharacterDirection.
4693      * @stable ICU 2.1
4694      */
getDirection(int ch)4695     public static int getDirection(int ch)
4696     {
4697         return UBiDiProps.INSTANCE.getClass(ch);
4698     }
4699 
4700     /**
4701      * Determines whether the code point has the "mirrored" property.
4702      * This property is set for characters that are commonly used in
4703      * Right-To-Left contexts and need to be displayed with a "mirrored"
4704      * glyph.
4705      * @param ch code point whose mirror is to be determined
4706      * @return true if the code point has the "mirrored" property
4707      * @stable ICU 2.1
4708      */
isMirrored(int ch)4709     public static boolean isMirrored(int ch)
4710     {
4711         return UBiDiProps.INSTANCE.isMirrored(ch);
4712     }
4713 
4714     /**
4715      * {@icu} Maps the specified code point to a "mirror-image" code point.
4716      * For code points with the "mirrored" property, implementations sometimes
4717      * need a "poor man's" mapping to another code point such that the default
4718      * glyph may serve as the mirror-image of the default glyph of the
4719      * specified code point.<br>
4720      * This is useful for text conversion to and from codepages with visual
4721      * order, and for displays without glyph selection capabilities.
4722      * @param ch code point whose mirror is to be retrieved
4723      * @return another code point that may serve as a mirror-image substitute,
4724      *         or ch itself if there is no such mapping or ch does not have the
4725      *         "mirrored" property
4726      * @stable ICU 2.1
4727      */
getMirror(int ch)4728     public static int getMirror(int ch)
4729     {
4730         return UBiDiProps.INSTANCE.getMirror(ch);
4731     }
4732 
4733     /**
4734      * {@icu} Maps the specified character to its paired bracket character.
4735      * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int).
4736      * Otherwise c itself is returned.
4737      * See http://www.unicode.org/reports/tr9/
4738      *
4739      * @param c the code point to be mapped
4740      * @return the paired bracket code point,
4741      *         or c itself if there is no such mapping
4742      *         (Bidi_Paired_Bracket_Type=None)
4743      *
4744      * @see UProperty#BIDI_PAIRED_BRACKET
4745      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
4746      * @see #getMirror(int)
4747      * @stable ICU 52
4748      */
getBidiPairedBracket(int c)4749     public static int getBidiPairedBracket(int c) {
4750         return UBiDiProps.INSTANCE.getPairedBracket(c);
4751     }
4752 
4753     /**
4754      * {@icu} Returns the combining class of the argument codepoint
4755      * @param ch code point whose combining is to be retrieved
4756      * @return the combining class of the codepoint
4757      * @stable ICU 2.1
4758      */
getCombiningClass(int ch)4759     public static int getCombiningClass(int ch)
4760     {
4761         return Normalizer2.getNFDInstance().getCombiningClass(ch);
4762     }
4763 
4764     /**
4765      * {@icu} A code point is illegal if and only if
4766      * <ul>
4767      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
4768      * <li> A surrogate value, 0xD800 to 0xDFFF
4769      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
4770      * </ul>
4771      * Note: legal does not mean that it is assigned in this version of Unicode.
4772      * @param ch code point to determine if it is a legal code point by itself
4773      * @return true if and only if legal.
4774      * @stable ICU 2.1
4775      */
isLegal(int ch)4776     public static boolean isLegal(int ch)
4777     {
4778         if (ch < MIN_VALUE) {
4779             return false;
4780         }
4781         if (ch < Character.MIN_SURROGATE) {
4782             return true;
4783         }
4784         if (ch <= Character.MAX_SURROGATE) {
4785             return false;
4786         }
4787         if (UCharacterUtility.isNonCharacter(ch)) {
4788             return false;
4789         }
4790         return (ch <= MAX_VALUE);
4791     }
4792 
4793     /**
4794      * {@icu} A string is legal iff all its code points are legal.
4795      * A code point is illegal if and only if
4796      * <ul>
4797      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
4798      * <li> A surrogate value, 0xD800 to 0xDFFF
4799      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
4800      * </ul>
4801      * Note: legal does not mean that it is assigned in this version of Unicode.
4802      * @param str containing code points to examin
4803      * @return true if and only if legal.
4804      * @stable ICU 2.1
4805      */
isLegal(String str)4806     public static boolean isLegal(String str)
4807     {
4808         int size = str.length();
4809         int codepoint;
4810         for (int i = 0; i < size; i += Character.charCount(codepoint))
4811         {
4812             codepoint = str.codePointAt(i);
4813             if (!isLegal(codepoint)) {
4814                 return false;
4815             }
4816         }
4817         return true;
4818     }
4819 
4820     /**
4821      * {@icu} Returns the version of Unicode data used.
4822      * @return the unicode version number used
4823      * @stable ICU 2.1
4824      */
getUnicodeVersion()4825     public static VersionInfo getUnicodeVersion()
4826     {
4827         return UCharacterProperty.INSTANCE.m_unicodeVersion_;
4828     }
4829 
4830     /**
4831      * {@icu} Returns the most current Unicode name of the argument code point, or
4832      * null if the character is unassigned or outside the range
4833      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
4834      * <br>
4835      * Note calling any methods related to code point names, e.g. get*Name*()
4836      * incurs a one-time initialisation cost to construct the name tables.
4837      * @param ch the code point for which to get the name
4838      * @return most current Unicode name
4839      * @stable ICU 2.1
4840      */
getName(int ch)4841     public static String getName(int ch)
4842     {
4843         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
4844     }
4845 
4846     /**
4847      * {@icu} Returns the names for each of the characters in a string
4848      * @param s string to format
4849      * @param separator string to go between names
4850      * @return string of names
4851      * @stable ICU 3.8
4852      */
getName(String s, String separator)4853     public static String getName(String s, String separator) {
4854         if (s.length() == 1) { // handle common case
4855             return getName(s.charAt(0));
4856         }
4857         int cp;
4858         StringBuilder sb = new StringBuilder();
4859         for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
4860             cp = s.codePointAt(i);
4861             if (i != 0) sb.append(separator);
4862             sb.append(UCharacter.getName(cp));
4863         }
4864         return sb.toString();
4865     }
4866 
4867     /**
4868      * {@icu} Returns null.
4869      * Used to return the Unicode_1_Name property value which was of little practical value.
4870      * @param ch the code point for which to get the name
4871      * @return null
4872      * @deprecated ICU 49
4873      */
4874     @Deprecated
getName1_0(int ch)4875     public static String getName1_0(int ch)
4876     {
4877         return null;
4878     }
4879 
4880     /**
4881      * {@icu} Returns a name for a valid codepoint. Unlike, getName(int) and
4882      * getName1_0(int), this method will return a name even for codepoints that
4883      * are not assigned a name in UnicodeData.txt.
4884      *
4885      * <p>The names are returned in the following order.
4886      * <ul>
4887      * <li> Most current Unicode name if there is any
4888      * <li> Unicode 1.0 name if there is any
4889      * <li> Extended name in the form of
4890      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g., &lt;noncharacter-fffe&gt;
4891      * </ul>
4892      * Note calling any methods related to code point names, e.g. get*Name*()
4893      * incurs a one-time initialisation cost to construct the name tables.
4894      * @param ch the code point for which to get the name
4895      * @return a name for the argument codepoint
4896      * @stable ICU 2.6
4897      */
getExtendedName(int ch)4898     public static String getExtendedName(int ch) {
4899         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME);
4900     }
4901 
4902     /**
4903      * {@icu} Returns the corrected name from NameAliases.txt if there is one.
4904      * Returns null if the character is unassigned or outside the range
4905      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
4906      * <br>
4907      * Note calling any methods related to code point names, e.g. get*Name*()
4908      * incurs a one-time initialisation cost to construct the name tables.
4909      * @param ch the code point for which to get the name alias
4910      * @return Unicode name alias, or null
4911      * @stable ICU 4.4
4912      */
getNameAlias(int ch)4913     public static String getNameAlias(int ch)
4914     {
4915         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS);
4916     }
4917 
4918     /**
4919      * {@icu} Returns null.
4920      * Used to return the ISO 10646 comment for a character.
4921      * The Unicode ISO_Comment property is deprecated and has no values.
4922      *
4923      * @param ch The code point for which to get the ISO comment.
4924      *           It must be the case that {@code 0 <= ch <= 0x10ffff}.
4925      * @return null
4926      * @deprecated ICU 49
4927      */
4928     @Deprecated
getISOComment(int ch)4929     public static String getISOComment(int ch)
4930     {
4931         return null;
4932     }
4933 
4934     /**
4935      * {@icu} <p>Finds a Unicode code point by its most current Unicode name and
4936      * return its code point value. All Unicode names are in uppercase.
4937      * Note calling any methods related to code point names, e.g. get*Name*()
4938      * incurs a one-time initialisation cost to construct the name tables.
4939      * @param name most current Unicode character name whose code point is to
4940      *        be returned
4941      * @return code point or -1 if name is not found
4942      * @stable ICU 2.1
4943      */
getCharFromName(String name)4944     public static int getCharFromName(String name){
4945         return UCharacterName.INSTANCE.getCharFromName(
4946                 UCharacterNameChoice.UNICODE_CHAR_NAME, name);
4947     }
4948 
4949     /**
4950      * {@icu} Returns -1.
4951      * <p>Used to find a Unicode character by its version 1.0 Unicode name and return
4952      * its code point value.
4953      * @param name Unicode 1.0 code point name whose code point is to be
4954      *             returned
4955      * @return -1
4956      * @deprecated ICU 49
4957      * @see #getName1_0(int)
4958      */
4959     @Deprecated
getCharFromName1_0(String name)4960     public static int getCharFromName1_0(String name){
4961         return -1;
4962     }
4963 
4964     /**
4965      * {@icu} <p>Find a Unicode character by either its name and return its code
4966      * point value. All Unicode names are in uppercase.
4967      * Extended names are all lowercase except for numbers and are contained
4968      * within angle brackets.
4969      * The names are searched in the following order
4970      * <ul>
4971      * <li> Most current Unicode name if there is any
4972      * <li> Unicode 1.0 name if there is any
4973      * <li> Extended name in the form of
4974      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g. &lt;noncharacter-FFFE&gt;
4975      * </ul>
4976      * Note calling any methods related to code point names, e.g. get*Name*()
4977      * incurs a one-time initialisation cost to construct the name tables.
4978      * @param name codepoint name
4979      * @return code point associated with the name or -1 if the name is not
4980      *         found.
4981      * @stable ICU 2.6
4982      */
getCharFromExtendedName(String name)4983     public static int getCharFromExtendedName(String name){
4984         return UCharacterName.INSTANCE.getCharFromName(
4985                 UCharacterNameChoice.EXTENDED_CHAR_NAME, name);
4986     }
4987 
4988     /**
4989      * {@icu} <p>Find a Unicode character by its corrected name alias and return
4990      * its code point value. All Unicode names are in uppercase.
4991      * Note calling any methods related to code point names, e.g. get*Name*()
4992      * incurs a one-time initialisation cost to construct the name tables.
4993      * @param name Unicode name alias whose code point is to be returned
4994      * @return code point or -1 if name is not found
4995      * @stable ICU 4.4
4996      */
getCharFromNameAlias(String name)4997     public static int getCharFromNameAlias(String name){
4998         return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name);
4999     }
5000 
5001     /**
5002      * {@icu} Return the Unicode name for a given property, as given in the
5003      * Unicode database file PropertyAliases.txt.  Most properties
5004      * have more than one name.  The nameChoice determines which one
5005      * is returned.
5006      *
5007      * In addition, this function maps the property
5008      * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
5009      * "General_Category_Mask".  These names are not in
5010      * PropertyAliases.txt.
5011      *
5012      * @param property UProperty selector.
5013      *
5014      * @param nameChoice UProperty.NameChoice selector for which name
5015      * to get.  All properties have a long name.  Most have a short
5016      * name, but some do not.  Unicode allows for additional names; if
5017      * present these will be returned by UProperty.NameChoice.LONG + i,
5018      * where i=1, 2,...
5019      *
5020      * @return a name, or null if Unicode explicitly defines no name
5021      * ("n/a") for a given property/nameChoice.  If a given nameChoice
5022      * throws an exception, then all larger values of nameChoice will
5023      * throw an exception.  If null is returned for a given
5024      * nameChoice, then other nameChoice values may return non-null
5025      * results.
5026      *
5027      * @exception IllegalArgumentException thrown if property or
5028      * nameChoice are invalid.
5029      *
5030      * @see UProperty
5031      * @see UProperty.NameChoice
5032      * @stable ICU 2.4
5033      */
getPropertyName(int property, int nameChoice)5034     public static String getPropertyName(int property,
5035             int nameChoice) {
5036         return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice);
5037     }
5038 
5039     /**
5040      * {@icu} Return the UProperty selector for a given property name, as
5041      * specified in the Unicode database file PropertyAliases.txt.
5042      * Short, long, and any other variants are recognized.
5043      *
5044      * In addition, this function maps the synthetic names "gcm" /
5045      * "General_Category_Mask" to the property
5046      * UProperty.GENERAL_CATEGORY_MASK.  These names are not in
5047      * PropertyAliases.txt.
5048      *
5049      * @param propertyAlias the property name to be matched.  The name
5050      * is compared using "loose matching" as described in
5051      * PropertyAliases.txt.
5052      *
5053      * @return a UProperty enum.
5054      *
5055      * @exception IllegalArgumentException thrown if propertyAlias
5056      * is not recognized.
5057      *
5058      * @see UProperty
5059      * @stable ICU 2.4
5060      */
getPropertyEnum(CharSequence propertyAlias)5061     public static int getPropertyEnum(CharSequence propertyAlias) {
5062         int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias);
5063         if (propEnum == UProperty.UNDEFINED) {
5064             throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias);
5065         }
5066         return propEnum;
5067     }
5068 
5069     /**
5070      * {@icu} Return the Unicode name for a given property value, as given in
5071      * the Unicode database file PropertyValueAliases.txt.  Most
5072      * values have more than one name.  The nameChoice determines
5073      * which one is returned.
5074      *
5075      * Note: Some of the names in PropertyValueAliases.txt can only be
5076      * retrieved using UProperty.GENERAL_CATEGORY_MASK, not
5077      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
5078      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
5079      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
5080      *
5081      * @param property UProperty selector constant.
5082      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
5083      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
5084      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
5085      * If out of range, null is returned.
5086      *
5087      * @param value selector for a value for the given property.  In
5088      * general, valid values range from 0 up to some maximum.  There
5089      * are a few exceptions: (1.) UProperty.BLOCK values begin at the
5090      * non-zero value BASIC_LATIN.getID().  (2.)
5091      * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous
5092      * and range from 0..240.  (3.)  UProperty.GENERAL_CATEGORY_MASK values
5093      * are mask values produced by left-shifting 1 by
5094      * UCharacter.getType().  This allows grouped categories such as
5095      * [:L:] to be represented.  Mask values are non-contiguous.
5096      *
5097      * @param nameChoice UProperty.NameChoice selector for which name
5098      * to get.  All values have a long name.  Most have a short name,
5099      * but some do not.  Unicode allows for additional names; if
5100      * present these will be returned by UProperty.NameChoice.LONG + i,
5101      * where i=1, 2,...
5102      *
5103      * @return a name, or null if Unicode explicitly defines no name
5104      * ("n/a") for a given property/value/nameChoice.  If a given
5105      * nameChoice throws an exception, then all larger values of
5106      * nameChoice will throw an exception.  If null is returned for a
5107      * given nameChoice, then other nameChoice values may return
5108      * non-null results.
5109      *
5110      * @exception IllegalArgumentException thrown if property, value,
5111      * or nameChoice are invalid.
5112      *
5113      * @see UProperty
5114      * @see UProperty.NameChoice
5115      * @stable ICU 2.4
5116      */
getPropertyValueName(int property, int value, int nameChoice)5117     public static String getPropertyValueName(int property,
5118             int value,
5119             int nameChoice)
5120     {
5121         if ((property == UProperty.CANONICAL_COMBINING_CLASS
5122                 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS
5123                 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS)
5124                 && value >= UCharacter.getIntPropertyMinValue(
5125                         UProperty.CANONICAL_COMBINING_CLASS)
5126                         && value <= UCharacter.getIntPropertyMaxValue(
5127                                 UProperty.CANONICAL_COMBINING_CLASS)
5128                                 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) {
5129             // this is hard coded for the valid cc
5130             // because PropertyValueAliases.txt does not contain all of them
5131             try {
5132                 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value,
5133                         nameChoice);
5134             }
5135             catch (IllegalArgumentException e) {
5136                 return null;
5137             }
5138         }
5139         return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice);
5140     }
5141 
5142     /**
5143      * {@icu} Return the property value integer for a given value name, as
5144      * specified in the Unicode database file PropertyValueAliases.txt.
5145      * Short, long, and any other variants are recognized.
5146      *
5147      * Note: Some of the names in PropertyValueAliases.txt will only be
5148      * recognized with UProperty.GENERAL_CATEGORY_MASK, not
5149      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
5150      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
5151      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
5152      *
5153      * @param property UProperty selector constant.
5154      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
5155      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
5156      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
5157      * Only these properties can be enumerated.
5158      *
5159      * @param valueAlias the value name to be matched.  The name is
5160      * compared using "loose matching" as described in
5161      * PropertyValueAliases.txt.
5162      *
5163      * @return a value integer.  Note: UProperty.GENERAL_CATEGORY
5164      * values are mask values produced by left-shifting 1 by
5165      * UCharacter.getType().  This allows grouped categories such as
5166      * [:L:] to be represented.
5167      *
5168      * @see UProperty
5169      * @throws IllegalArgumentException if property is not a valid UProperty
5170      *         selector or valueAlias is not a value of this property
5171      * @stable ICU 2.4
5172      */
getPropertyValueEnum(int property, CharSequence valueAlias)5173     public static int getPropertyValueEnum(int property, CharSequence valueAlias) {
5174         int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias);
5175         if (propEnum == UProperty.UNDEFINED) {
5176             throw new IllegalIcuArgumentException("Invalid name: " + valueAlias);
5177         }
5178         return propEnum;
5179     }
5180 
5181     /**
5182      * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED.
5183      * @param property  Same as {@link #getPropertyValueEnum(int, CharSequence)}
5184      * @param valueAlias    Same as {@link #getPropertyValueEnum(int, CharSequence)}
5185      * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value.
5186      * @internal
5187      * @deprecated This API is ICU internal only.
5188      */
5189     @Deprecated
getPropertyValueEnumNoThrow(int property, CharSequence valueAlias)5190     public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) {
5191         return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias);
5192     }
5193 
5194 
5195     /**
5196      * {@icu} Returns a code point corresponding to the two surrogate code units.
5197      *
5198      * @param lead the lead char
5199      * @param trail the trail char
5200      * @return code point if surrogate characters are valid.
5201      * @exception IllegalArgumentException thrown when the code units do
5202      *            not form a valid code point
5203      * @stable ICU 2.1
5204      */
getCodePoint(char lead, char trail)5205     public static int getCodePoint(char lead, char trail)
5206     {
5207         if (Character.isSurrogatePair(lead, trail)) {
5208             return Character.toCodePoint(lead, trail);
5209         }
5210         throw new IllegalArgumentException("Illegal surrogate characters");
5211     }
5212 
5213     /**
5214      * {@icu} Returns the code point corresponding to the BMP code point.
5215      *
5216      * @param char16 the BMP code point
5217      * @return code point if argument is a valid character.
5218      * @exception IllegalArgumentException thrown when char16 is not a valid
5219      *            code point
5220      * @stable ICU 2.1
5221      */
getCodePoint(char char16)5222     public static int getCodePoint(char char16)
5223     {
5224         if (UCharacter.isLegal(char16)) {
5225             return char16;
5226         }
5227         throw new IllegalArgumentException("Illegal codepoint");
5228     }
5229 
5230     /**
5231      * Returns the uppercase version of the argument string.
5232      * Casing is dependent on the default locale and context-sensitive.
5233      * @param str source string to be performed on
5234      * @return uppercase version of the argument string
5235      * @stable ICU 2.1
5236      */
toUpperCase(String str)5237     public static String toUpperCase(String str)
5238     {
5239         return CaseMapImpl.toUpper(getDefaultCaseLocale(), 0, str);
5240     }
5241 
5242     /**
5243      * Returns the lowercase version of the argument string.
5244      * Casing is dependent on the default locale and context-sensitive
5245      * @param str source string to be performed on
5246      * @return lowercase version of the argument string
5247      * @stable ICU 2.1
5248      */
toLowerCase(String str)5249     public static String toLowerCase(String str)
5250     {
5251         return CaseMapImpl.toLower(getDefaultCaseLocale(), 0, str);
5252     }
5253 
5254     /**
5255      * <p>Returns the titlecase version of the argument string.
5256      * <p>Position for titlecasing is determined by the argument break
5257      * iterator, hence the user can customize his break iterator for
5258      * a specialized titlecasing. In this case only the forward iteration
5259      * needs to be implemented.
5260      * If the break iterator passed in is null, the default Unicode algorithm
5261      * will be used to determine the titlecase positions.
5262      *
5263      * <p>Only positions returned by the break iterator will be title cased,
5264      * character in between the positions will all be in lower case.
5265      * <p>Casing is dependent on the default locale and context-sensitive
5266      * @param str source string to be performed on
5267      * @param breakiter break iterator to determine the positions in which
5268      *        the character should be title cased.
5269      * @return titlecase version of the argument string
5270      * @stable ICU 2.6
5271      */
toTitleCase(String str, BreakIterator breakiter)5272     public static String toTitleCase(String str, BreakIterator breakiter)
5273     {
5274         return toTitleCase(Locale.getDefault(), str, breakiter, 0);
5275     }
5276 
getDefaultCaseLocale()5277     private static int getDefaultCaseLocale() {
5278         return UCaseProps.getCaseLocale(Locale.getDefault());
5279     }
5280 
getCaseLocale(Locale locale)5281     private static int getCaseLocale(Locale locale) {
5282         if (locale == null) {
5283             locale = Locale.getDefault();
5284         }
5285         return UCaseProps.getCaseLocale(locale);
5286     }
5287 
getCaseLocale(ULocale locale)5288     private static int getCaseLocale(ULocale locale) {
5289         if (locale == null) {
5290             locale = ULocale.getDefault();
5291         }
5292         return UCaseProps.getCaseLocale(locale);
5293     }
5294 
5295     /**
5296      * Returns the uppercase version of the argument string.
5297      * Casing is dependent on the argument locale and context-sensitive.
5298      * @param locale which string is to be converted in
5299      * @param str source string to be performed on
5300      * @return uppercase version of the argument string
5301      * @stable ICU 2.1
5302      */
toUpperCase(Locale locale, String str)5303     public static String toUpperCase(Locale locale, String str)
5304     {
5305         return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str);
5306     }
5307 
5308     /**
5309      * Returns the uppercase version of the argument string.
5310      * Casing is dependent on the argument locale and context-sensitive.
5311      * @param locale which string is to be converted in
5312      * @param str source string to be performed on
5313      * @return uppercase version of the argument string
5314      * @stable ICU 3.2
5315      */
toUpperCase(ULocale locale, String str)5316     public static String toUpperCase(ULocale locale, String str) {
5317         return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str);
5318     }
5319 
5320     /**
5321      * Returns the lowercase version of the argument string.
5322      * Casing is dependent on the argument locale and context-sensitive
5323      * @param locale which string is to be converted in
5324      * @param str source string to be performed on
5325      * @return lowercase version of the argument string
5326      * @stable ICU 2.1
5327      */
toLowerCase(Locale locale, String str)5328     public static String toLowerCase(Locale locale, String str)
5329     {
5330         return CaseMapImpl.toLower(getCaseLocale(locale), 0, str);
5331     }
5332 
5333     /**
5334      * Returns the lowercase version of the argument string.
5335      * Casing is dependent on the argument locale and context-sensitive
5336      * @param locale which string is to be converted in
5337      * @param str source string to be performed on
5338      * @return lowercase version of the argument string
5339      * @stable ICU 3.2
5340      */
toLowerCase(ULocale locale, String str)5341     public static String toLowerCase(ULocale locale, String str) {
5342         return CaseMapImpl.toLower(getCaseLocale(locale), 0, str);
5343     }
5344 
5345     /**
5346      * <p>Returns the titlecase version of the argument string.
5347      * <p>Position for titlecasing is determined by the argument break
5348      * iterator, hence the user can customize his break iterator for
5349      * a specialized titlecasing. In this case only the forward iteration
5350      * needs to be implemented.
5351      * If the break iterator passed in is null, the default Unicode algorithm
5352      * will be used to determine the titlecase positions.
5353      *
5354      * <p>Only positions returned by the break iterator will be title cased,
5355      * character in between the positions will all be in lower case.
5356      * <p>Casing is dependent on the argument locale and context-sensitive
5357      * @param locale which string is to be converted in
5358      * @param str source string to be performed on
5359      * @param breakiter break iterator to determine the positions in which
5360      *        the character should be title cased.
5361      * @return titlecase version of the argument string
5362      * @stable ICU 2.6
5363      */
toTitleCase(Locale locale, String str, BreakIterator breakiter)5364     public static String toTitleCase(Locale locale, String str,
5365             BreakIterator breakiter)
5366     {
5367         return toTitleCase(locale, str, breakiter, 0);
5368     }
5369 
5370     /**
5371      * <p>Returns the titlecase version of the argument string.
5372      * <p>Position for titlecasing is determined by the argument break
5373      * iterator, hence the user can customize his break iterator for
5374      * a specialized titlecasing. In this case only the forward iteration
5375      * needs to be implemented.
5376      * If the break iterator passed in is null, the default Unicode algorithm
5377      * will be used to determine the titlecase positions.
5378      *
5379      * <p>Only positions returned by the break iterator will be title cased,
5380      * character in between the positions will all be in lower case.
5381      * <p>Casing is dependent on the argument locale and context-sensitive
5382      * @param locale which string is to be converted in
5383      * @param str source string to be performed on
5384      * @param titleIter break iterator to determine the positions in which
5385      *        the character should be title cased.
5386      * @return titlecase version of the argument string
5387      * @stable ICU 3.2
5388      */
toTitleCase(ULocale locale, String str, BreakIterator titleIter)5389     public static String toTitleCase(ULocale locale, String str,
5390             BreakIterator titleIter) {
5391         return toTitleCase(locale, str, titleIter, 0);
5392     }
5393 
5394     /**
5395      * <p>Returns the titlecase version of the argument string.
5396      * <p>Position for titlecasing is determined by the argument break
5397      * iterator, hence the user can customize his break iterator for
5398      * a specialized titlecasing. In this case only the forward iteration
5399      * needs to be implemented.
5400      * If the break iterator passed in is null, the default Unicode algorithm
5401      * will be used to determine the titlecase positions.
5402      *
5403      * <p>Only positions returned by the break iterator will be title cased,
5404      * character in between the positions will all be in lower case.
5405      * <p>Casing is dependent on the argument locale and context-sensitive
5406      * @param locale which string is to be converted in
5407      * @param str source string to be performed on
5408      * @param titleIter break iterator to determine the positions in which
5409      *        the character should be title cased.
5410      * @param options bit set to modify the titlecasing operation
5411      * @return titlecase version of the argument string
5412      * @stable ICU 3.8
5413      * @see #TITLECASE_NO_LOWERCASE
5414      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
5415      */
toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)5416     public static String toTitleCase(ULocale locale, String str,
5417             BreakIterator titleIter, int options) {
5418         if (titleIter == null && locale == null) {
5419             locale = ULocale.getDefault();
5420         }
5421         titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter);
5422         titleIter.setText(str);
5423         return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
5424     }
5425 
5426     /**
5427      * {@icu} <p>Returns the titlecase version of the argument string.
5428      * <p>Position for titlecasing is determined by the argument break
5429      * iterator, hence the user can customize his break iterator for
5430      * a specialized titlecasing. In this case only the forward iteration
5431      * needs to be implemented.
5432      * If the break iterator passed in is null, the default Unicode algorithm
5433      * will be used to determine the titlecase positions.
5434      *
5435      * <p>Only positions returned by the break iterator will be title cased,
5436      * character in between the positions will all be in lower case.
5437      * <p>Casing is dependent on the argument locale and context-sensitive
5438      * @param locale which string is to be converted in
5439      * @param str source string to be performed on
5440      * @param titleIter break iterator to determine the positions in which
5441      *        the character should be title cased.
5442      * @param options bit set to modify the titlecasing operation
5443      * @return titlecase version of the argument string
5444      * @see #TITLECASE_NO_LOWERCASE
5445      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
5446      * @stable ICU 54
5447      */
toTitleCase(Locale locale, String str, BreakIterator titleIter, int options)5448     public static String toTitleCase(Locale locale, String str,
5449             BreakIterator titleIter,
5450             int options) {
5451         if (titleIter == null && locale == null) {
5452             locale = Locale.getDefault();
5453         }
5454         titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter);
5455         titleIter.setText(str);
5456         return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
5457     }
5458 
5459     /**
5460      * {@icu} The given character is mapped to its case folding equivalent according
5461      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
5462      * folding equivalent, the character itself is returned.
5463      *
5464      * <p>This function only returns the simple, single-code point case mapping.
5465      * Full case mappings should be used whenever possible because they produce
5466      * better results by working on whole strings.
5467      * They can map to a result string with a different length as appropriate.
5468      * Full case mappings are applied by the case mapping functions
5469      * that take String parameters rather than code points (int).
5470      * See also the User Guide chapter on C/POSIX migration:
5471      * http://www.icu-project.org/userguide/posix.html#case_mappings
5472      *
5473      * @param ch             the character to be converted
5474      * @param defaultmapping Indicates whether the default mappings defined in
5475      *                       CaseFolding.txt are to be used, otherwise the
5476      *                       mappings for dotted I and dotless i marked with
5477      *                       'T' in CaseFolding.txt are included.
5478      * @return               the case folding equivalent of the character, if
5479      *                       any; otherwise the character itself.
5480      * @see                  #foldCase(String, boolean)
5481      * @stable ICU 2.1
5482      */
foldCase(int ch, boolean defaultmapping)5483     public static int foldCase(int ch, boolean defaultmapping) {
5484         return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
5485     }
5486 
5487     /**
5488      * {@icu} The given string is mapped to its case folding equivalent according to
5489      * UnicodeData.txt and CaseFolding.txt; if any character has no case
5490      * folding equivalent, the character itself is returned.
5491      * "Full", multiple-code point case folding mappings are returned here.
5492      * For "simple" single-code point mappings use the API
5493      * foldCase(int ch, boolean defaultmapping).
5494      * @param str            the String to be converted
5495      * @param defaultmapping Indicates whether the default mappings defined in
5496      *                       CaseFolding.txt are to be used, otherwise the
5497      *                       mappings for dotted I and dotless i marked with
5498      *                       'T' in CaseFolding.txt are included.
5499      * @return               the case folding equivalent of the character, if
5500      *                       any; otherwise the character itself.
5501      * @see                  #foldCase(int, boolean)
5502      * @stable ICU 2.1
5503      */
foldCase(String str, boolean defaultmapping)5504     public static String foldCase(String str, boolean defaultmapping) {
5505         return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
5506     }
5507 
5508     /**
5509      * {@icu} Option value for case folding: use default mappings defined in
5510      * CaseFolding.txt.
5511      * @stable ICU 2.6
5512      */
5513     public static final int FOLD_CASE_DEFAULT    =      0x0000;
5514     /**
5515      * {@icu} Option value for case folding:
5516      * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
5517      * and dotless i appropriately for Turkic languages (tr, az).
5518      *
5519      * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
5520      * are to be included for default mappings and
5521      * excluded for the Turkic-specific mappings.
5522      *
5523      * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
5524      * are to be excluded for default mappings and
5525      * included for the Turkic-specific mappings.
5526      *
5527      * @stable ICU 2.6
5528      */
5529     public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001;
5530 
5531     /**
5532      * {@icu} The given character is mapped to its case folding equivalent according
5533      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
5534      * folding equivalent, the character itself is returned.
5535      *
5536      * <p>This function only returns the simple, single-code point case mapping.
5537      * Full case mappings should be used whenever possible because they produce
5538      * better results by working on whole strings.
5539      * They can map to a result string with a different length as appropriate.
5540      * Full case mappings are applied by the case mapping functions
5541      * that take String parameters rather than code points (int).
5542      * See also the User Guide chapter on C/POSIX migration:
5543      * http://www.icu-project.org/userguide/posix.html#case_mappings
5544      *
5545      * @param ch the character to be converted
5546      * @param options A bit set for special processing. Currently the recognised options
5547      * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
5548      * @return the case folding equivalent of the character, if any; otherwise the
5549      * character itself.
5550      * @see #foldCase(String, boolean)
5551      * @stable ICU 2.6
5552      */
foldCase(int ch, int options)5553     public static int foldCase(int ch, int options) {
5554         return UCaseProps.INSTANCE.fold(ch, options);
5555     }
5556 
5557     /**
5558      * {@icu} The given string is mapped to its case folding equivalent according to
5559      * UnicodeData.txt and CaseFolding.txt; if any character has no case
5560      * folding equivalent, the character itself is returned.
5561      * "Full", multiple-code point case folding mappings are returned here.
5562      * For "simple" single-code point mappings use the API
5563      * foldCase(int ch, boolean defaultmapping).
5564      * @param str the String to be converted
5565      * @param options A bit set for special processing. Currently the recognised options
5566      *                are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
5567      * @return the case folding equivalent of the character, if any; otherwise the
5568      *         character itself.
5569      * @see #foldCase(int, boolean)
5570      * @stable ICU 2.6
5571      */
foldCase(String str, int options)5572     public static final String foldCase(String str, int options) {
5573         return CaseMapImpl.fold(options, str);
5574     }
5575 
5576     /**
5577      * {@icu} Returns the numeric value of a Han character.
5578      *
5579      * <p>This returns the value of Han 'numeric' code points,
5580      * including those for zero, ten, hundred, thousand, ten thousand,
5581      * and hundred million.
5582      * This includes both the standard and 'checkwriting'
5583      * characters, the 'big circle' zero character, and the standard
5584      * zero character.
5585      *
5586      * <p>Note: The Unicode Standard has numeric values for more
5587      * Han characters recognized by this method
5588      * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt),
5589      * and a {@link com.ibm.icu.text.NumberFormat} can be used with
5590      * a Chinese {@link com.ibm.icu.text.NumberingSystem}.
5591      *
5592      * @param ch code point to query
5593      * @return value if it is a Han 'numeric character,' otherwise return -1.
5594      * @stable ICU 2.4
5595      */
getHanNumericValue(int ch)5596     public static int getHanNumericValue(int ch)
5597     {
5598         switch(ch)
5599         {
5600         case IDEOGRAPHIC_NUMBER_ZERO_ :
5601         case CJK_IDEOGRAPH_COMPLEX_ZERO_ :
5602             return 0; // Han Zero
5603         case CJK_IDEOGRAPH_FIRST_ :
5604         case CJK_IDEOGRAPH_COMPLEX_ONE_ :
5605             return 1; // Han One
5606         case CJK_IDEOGRAPH_SECOND_ :
5607         case CJK_IDEOGRAPH_COMPLEX_TWO_ :
5608             return 2; // Han Two
5609         case CJK_IDEOGRAPH_THIRD_ :
5610         case CJK_IDEOGRAPH_COMPLEX_THREE_ :
5611             return 3; // Han Three
5612         case CJK_IDEOGRAPH_FOURTH_ :
5613         case CJK_IDEOGRAPH_COMPLEX_FOUR_ :
5614             return 4; // Han Four
5615         case CJK_IDEOGRAPH_FIFTH_ :
5616         case CJK_IDEOGRAPH_COMPLEX_FIVE_ :
5617             return 5; // Han Five
5618         case CJK_IDEOGRAPH_SIXTH_ :
5619         case CJK_IDEOGRAPH_COMPLEX_SIX_ :
5620             return 6; // Han Six
5621         case CJK_IDEOGRAPH_SEVENTH_ :
5622         case CJK_IDEOGRAPH_COMPLEX_SEVEN_ :
5623             return 7; // Han Seven
5624         case CJK_IDEOGRAPH_EIGHTH_ :
5625         case CJK_IDEOGRAPH_COMPLEX_EIGHT_ :
5626             return 8; // Han Eight
5627         case CJK_IDEOGRAPH_NINETH_ :
5628         case CJK_IDEOGRAPH_COMPLEX_NINE_ :
5629             return 9; // Han Nine
5630         case CJK_IDEOGRAPH_TEN_ :
5631         case CJK_IDEOGRAPH_COMPLEX_TEN_ :
5632             return 10;
5633         case CJK_IDEOGRAPH_HUNDRED_ :
5634         case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ :
5635             return 100;
5636         case CJK_IDEOGRAPH_THOUSAND_ :
5637         case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ :
5638             return 1000;
5639         case CJK_IDEOGRAPH_TEN_THOUSAND_ :
5640             return 10000;
5641         case CJK_IDEOGRAPH_HUNDRED_MILLION_ :
5642             return 100000000;
5643         }
5644         return -1; // no value
5645     }
5646 
5647     /**
5648      * {@icu} <p>Returns an iterator for character types, iterating over codepoints.
5649      * <p>Example of use:<br>
5650      * <pre>
5651      * RangeValueIterator iterator = UCharacter.getTypeIterator();
5652      * RangeValueIterator.Element element = new RangeValueIterator.Element();
5653      * while (iterator.next(element)) {
5654      *     System.out.println("Codepoint \\u" +
5655      *                        Integer.toHexString(element.start) +
5656      *                        " to codepoint \\u" +
5657      *                        Integer.toHexString(element.limit - 1) +
5658      *                        " has the character type " +
5659      *                        element.value);
5660      * }
5661      * </pre>
5662      * @return an iterator
5663      * @stable ICU 2.6
5664      */
getTypeIterator()5665     public static RangeValueIterator getTypeIterator()
5666     {
5667         return new UCharacterTypeIterator();
5668     }
5669 
5670     private static final class UCharacterTypeIterator implements RangeValueIterator {
UCharacterTypeIterator()5671         UCharacterTypeIterator() {
5672             reset();
5673         }
5674 
5675         // implements RangeValueIterator
5676         @Override
next(Element element)5677         public boolean next(Element element) {
5678             if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
5679                 element.start=range.startCodePoint;
5680                 element.limit=range.endCodePoint+1;
5681                 element.value=range.value;
5682                 return true;
5683             } else {
5684                 return false;
5685             }
5686         }
5687 
5688         // implements RangeValueIterator
5689         @Override
reset()5690         public void reset() {
5691             trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE);
5692         }
5693 
5694         private Iterator<Trie2.Range> trieIterator;
5695         private Trie2.Range range;
5696 
5697         private static final class MaskType implements Trie2.ValueMapper {
5698             // Extracts the general category ("character type") from the trie value.
5699             @Override
map(int value)5700             public int map(int value) {
5701                 return value & UCharacterProperty.TYPE_MASK;
5702             }
5703         }
5704         private static final MaskType MASK_TYPE=new MaskType();
5705     }
5706 
5707     /**
5708      * {@icu} <p>Returns an iterator for character names, iterating over codepoints.
5709      * <p>This API only gets the iterator for the modern, most up-to-date
5710      * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or
5711      * for extended names use getExtendedNameIterator().
5712      * <p>Example of use:<br>
5713      * <pre>
5714      * ValueIterator iterator = UCharacter.getNameIterator();
5715      * ValueIterator.Element element = new ValueIterator.Element();
5716      * while (iterator.next(element)) {
5717      *     System.out.println("Codepoint \\u" +
5718      *                        Integer.toHexString(element.codepoint) +
5719      *                        " has the name " + (String)element.value);
5720      * }
5721      * </pre>
5722      * <p>The maximal range which the name iterator iterates is from
5723      * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.
5724      * @return an iterator
5725      * @stable ICU 2.6
5726      */
getNameIterator()5727     public static ValueIterator getNameIterator(){
5728         return new UCharacterNameIterator(UCharacterName.INSTANCE,
5729                 UCharacterNameChoice.UNICODE_CHAR_NAME);
5730     }
5731 
5732     /**
5733      * {@icu} Returns an empty iterator.
5734      * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints.
5735      * @return an empty iterator
5736      * @deprecated ICU 49
5737      * @see #getName1_0(int)
5738      */
5739     @Deprecated
getName1_0Iterator()5740     public static ValueIterator getName1_0Iterator(){
5741         return new DummyValueIterator();
5742     }
5743 
5744     private static final class DummyValueIterator implements ValueIterator {
5745         @Override
next(Element element)5746         public boolean next(Element element) { return false; }
5747         @Override
reset()5748         public void reset() {}
5749         @Override
setRange(int start, int limit)5750         public void setRange(int start, int limit) {}
5751     }
5752 
5753     /**
5754      * {@icu} <p>Returns an iterator for character names, iterating over codepoints.
5755      * <p>This API only gets the iterator for the extended names.
5756      * For modern, most up-to-date Unicode names use getNameIterator() or
5757      * for older 1.0 Unicode names use get1_0NameIterator().
5758      * <p>Example of use:<br>
5759      * <pre>
5760      * ValueIterator iterator = UCharacter.getExtendedNameIterator();
5761      * ValueIterator.Element element = new ValueIterator.Element();
5762      * while (iterator.next(element)) {
5763      *     System.out.println("Codepoint \\u" +
5764      *                        Integer.toHexString(element.codepoint) +
5765      *                        " has the name " + (String)element.value);
5766      * }
5767      * </pre>
5768      * <p>The maximal range which the name iterator iterates is from
5769      * @return an iterator
5770      * @stable ICU 2.6
5771      */
getExtendedNameIterator()5772     public static ValueIterator getExtendedNameIterator(){
5773         return new UCharacterNameIterator(UCharacterName.INSTANCE,
5774                 UCharacterNameChoice.EXTENDED_CHAR_NAME);
5775     }
5776 
5777     /**
5778      * {@icu} Returns the "age" of the code point.
5779      * <p>The "age" is the Unicode version when the code point was first
5780      * designated (as a non-character or for Private Use) or assigned a
5781      * character.
5782      * <p>This can be useful to avoid emitting code points to receiving
5783      * processes that do not accept newer characters.
5784      * <p>The data is from the UCD file DerivedAge.txt.
5785      * @param ch The code point.
5786      * @return the Unicode version number
5787      * @stable ICU 2.6
5788      */
getAge(int ch)5789     public static VersionInfo getAge(int ch)
5790     {
5791         if (ch < MIN_VALUE || ch > MAX_VALUE) {
5792             throw new IllegalArgumentException("Codepoint out of bounds");
5793         }
5794         return UCharacterProperty.INSTANCE.getAge(ch);
5795     }
5796 
5797     /**
5798      * {@icu} Check a binary Unicode property for a code point.
5799      * <p>Unicode, especially in version 3.2, defines many more properties
5800      * than the original set in UnicodeData.txt.
5801      * <p>This API is intended to reflect Unicode properties as defined in
5802      * the Unicode Character Database (UCD) and Unicode Technical Reports
5803      * (UTR).
5804      * <p>For details about the properties see
5805      * <a href=http://www.unicode.org/>http://www.unicode.org/</a>.
5806      * <p>For names of Unicode properties see the UCD file
5807      * PropertyAliases.txt.
5808      * <p>This API does not check the validity of the codepoint.
5809      * <p>Important: If ICU is built with UCD files from Unicode versions
5810      * below 3.2, then properties marked with "new" are not or
5811      * not fully available.
5812      * @param ch code point to test.
5813      * @param property selector constant from com.ibm.icu.lang.UProperty,
5814      *        identifies which binary property to check.
5815      * @return true or false according to the binary Unicode property value
5816      *         for ch. Also false if property is out of bounds or if the
5817      *         Unicode version does not have data for the property at all, or
5818      *         not for this code point.
5819      * @see com.ibm.icu.lang.UProperty
5820      * @see CharacterProperties#getBinaryPropertySet(int)
5821      * @stable ICU 2.6
5822      */
hasBinaryProperty(int ch, int property)5823     public static boolean hasBinaryProperty(int ch, int property)
5824     {
5825         return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property);
5826     }
5827 
5828     /**
5829      * {@icu} <p>Check if a code point has the Alphabetic Unicode property.
5830      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).
5831      * <p>Different from UCharacter.isLetter(ch)!
5832      * @stable ICU 2.6
5833      * @param ch codepoint to be tested
5834      */
isUAlphabetic(int ch)5835     public static boolean isUAlphabetic(int ch)
5836     {
5837         return hasBinaryProperty(ch, UProperty.ALPHABETIC);
5838     }
5839 
5840     /**
5841      * {@icu} <p>Check if a code point has the Lowercase Unicode property.
5842      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).
5843      * <p>This is different from UCharacter.isLowerCase(ch)!
5844      * @param ch codepoint to be tested
5845      * @stable ICU 2.6
5846      */
isULowercase(int ch)5847     public static boolean isULowercase(int ch)
5848     {
5849         return hasBinaryProperty(ch, UProperty.LOWERCASE);
5850     }
5851 
5852     /**
5853      * {@icu} <p>Check if a code point has the Uppercase Unicode property.
5854      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).
5855      * <p>This is different from UCharacter.isUpperCase(ch)!
5856      * @param ch codepoint to be tested
5857      * @stable ICU 2.6
5858      */
isUUppercase(int ch)5859     public static boolean isUUppercase(int ch)
5860     {
5861         return hasBinaryProperty(ch, UProperty.UPPERCASE);
5862     }
5863 
5864     /**
5865      * {@icu} <p>Check if a code point has the White_Space Unicode property.
5866      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).
5867      * <p>This is different from both UCharacter.isSpace(ch) and
5868      * UCharacter.isWhitespace(ch)!
5869      * @param ch codepoint to be tested
5870      * @stable ICU 2.6
5871      */
isUWhiteSpace(int ch)5872     public static boolean isUWhiteSpace(int ch)
5873     {
5874         return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
5875     }
5876 
5877     /**
5878      * {@icu} Returns the property value for a Unicode property type of a code point.
5879      * Also returns binary and mask property values.
5880      * <p>Unicode, especially in version 3.2, defines many more properties than
5881      * the original set in UnicodeData.txt.
5882      * <p>The properties APIs are intended to reflect Unicode properties as
5883      * defined in the Unicode Character Database (UCD) and Unicode Technical
5884      * Reports (UTR). For details about the properties see
5885      * http://www.unicode.org/.
5886      * <p>For names of Unicode properties see the UCD file PropertyAliases.txt.
5887      *
5888      * <pre>
5889      * Sample usage:
5890      * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH);
5891      * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC);
5892      * boolean b = (ideo == 1) ? true : false;
5893      * </pre>
5894      * @param ch code point to test.
5895      * @param type UProperty selector constant, identifies which binary
5896      *        property to check. Must be
5897      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5898      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT or
5899      *        UProperty.MASK_START &lt;= type &lt; UProperty.MASK_LIMIT.
5900      * @return numeric value that is directly the property value or,
5901      *         for enumerated properties, corresponds to the numeric value of
5902      *         the enumerated constant of the respective property value type
5903      *         ({@link ECharacterCategory}, {@link ECharacterDirection},
5904      *         {@link DecompositionType}, etc.).
5905      *         Returns 0 or 1 (for false / true) for binary Unicode properties.
5906      *         Returns a bit-mask for mask properties.
5907      *         Returns 0 if 'type' is out of bounds or if the Unicode version
5908      *         does not have data for the property at all, or not for this code
5909      *         point.
5910      * @see UProperty
5911      * @see #hasBinaryProperty
5912      * @see #getIntPropertyMinValue
5913      * @see #getIntPropertyMaxValue
5914      * @see CharacterProperties#getIntPropertyMap(int)
5915      * @see #getUnicodeVersion
5916      * @stable ICU 2.4
5917      */
getIntPropertyValue(int ch, int type)5918     public static int getIntPropertyValue(int ch, int type)
5919     {
5920         return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type);
5921     }
5922     /**
5923      * {@icu} Returns a string version of the property value.
5924      * @param propertyEnum The property enum value.
5925      * @param codepoint The codepoint value.
5926      * @param nameChoice The choice of the name.
5927      * @return value as string
5928      * @internal
5929      * @deprecated This API is ICU internal only.
5930      */
5931     @Deprecated
5932     ///CLOVER:OFF
getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice)5933     public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) {
5934         if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) ||
5935                 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) {
5936             return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum),
5937                     nameChoice);
5938         }
5939         if (propertyEnum == UProperty.NUMERIC_VALUE) {
5940             return String.valueOf(getUnicodeNumericValue(codepoint));
5941         }
5942         // otherwise must be string property
5943         switch (propertyEnum) {
5944         case UProperty.AGE: return getAge(codepoint).toString();
5945         case UProperty.ISO_COMMENT: return getISOComment(codepoint);
5946         case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint));
5947         case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true));
5948         case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
5949         case UProperty.NAME: return getName(codepoint);
5950         case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true));
5951         case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
5952         case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
5953         case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
5954         case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
5955         case UProperty.UNICODE_1_NAME: return getName1_0(codepoint);
5956         case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
5957         }
5958         throw new IllegalArgumentException("Illegal Property Enum");
5959     }
5960     ///CLOVER:ON
5961 
5962     /**
5963      * {@icu} Returns the minimum value for an integer/binary Unicode property type.
5964      * Can be used together with UCharacter.getIntPropertyMaxValue(int)
5965      * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
5966      * @param type UProperty selector constant, identifies which binary
5967      *        property to check. Must be
5968      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5969      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5970      * @return Minimum value returned by UCharacter.getIntPropertyValue(int)
5971      *         for a Unicode property. 0 if the property
5972      *         selector 'type' is out of range.
5973      * @see UProperty
5974      * @see #hasBinaryProperty
5975      * @see #getUnicodeVersion
5976      * @see #getIntPropertyMaxValue
5977      * @see #getIntPropertyValue
5978      * @stable ICU 2.4
5979      */
getIntPropertyMinValue(int type)5980     public static int getIntPropertyMinValue(int type){
5981 
5982         return 0; // undefined; and: all other properties have a minimum value of 0
5983     }
5984 
5985 
5986     /**
5987      * {@icu} Returns the maximum value for an integer/binary Unicode property.
5988      * Can be used together with UCharacter.getIntPropertyMinValue(int)
5989      * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
5990      * Examples for min/max values (for Unicode 3.2):
5991      * <ul>
5992      * <li> UProperty.BIDI_CLASS:    0/18
5993      * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL)
5994      * <li> UProperty.SCRIPT:        0/45 (UScript.COMMON/UScript.TAGBANWA)
5995      * <li> UProperty.IDEOGRAPHIC:   0/1  (false/true)
5996      * </ul>
5997      * For undefined UProperty constant values, min/max values will be 0/-1.
5998      * @param type UProperty selector constant, identifies which binary
5999      *        property to check. Must be
6000      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
6001      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
6002      * @return Maximum value returned by u_getIntPropertyValue for a Unicode
6003      *         property. &lt;= 0 if the property selector 'type' is out of range.
6004      * @see UProperty
6005      * @see #hasBinaryProperty
6006      * @see #getUnicodeVersion
6007      * @see #getIntPropertyMaxValue
6008      * @see #getIntPropertyValue
6009      * @stable ICU 2.4
6010      */
getIntPropertyMaxValue(int type)6011     public static int getIntPropertyMaxValue(int type)
6012     {
6013         return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type);
6014     }
6015 
6016     /**
6017      * Provide the java.lang.Character forDigit API, for convenience.
6018      * @stable ICU 3.0
6019      */
forDigit(int digit, int radix)6020     public static char forDigit(int digit, int radix) {
6021         return java.lang.Character.forDigit(digit, radix);
6022     }
6023 
6024     // JDK 1.5 API coverage
6025 
6026     /**
6027      * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}.
6028      *
6029      * @stable ICU 3.0
6030      */
6031     public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE;
6032 
6033     /**
6034      * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}.
6035      *
6036      * @stable ICU 3.0
6037      */
6038     public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE;
6039 
6040     /**
6041      * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}.
6042      *
6043      * @stable ICU 3.0
6044      */
6045     public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE;
6046 
6047     /**
6048      * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}.
6049      *
6050      * @stable ICU 3.0
6051      */
6052     public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE;
6053 
6054     /**
6055      * Constant U+D800, same as {@link Character#MIN_SURROGATE}.
6056      *
6057      * @stable ICU 3.0
6058      */
6059     public static final char MIN_SURROGATE = Character.MIN_SURROGATE;
6060 
6061     /**
6062      * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}.
6063      *
6064      * @stable ICU 3.0
6065      */
6066     public static final char MAX_SURROGATE = Character.MAX_SURROGATE;
6067 
6068     /**
6069      * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
6070      *
6071      * @stable ICU 3.0
6072      */
6073     public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT;
6074 
6075     /**
6076      * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}.
6077      *
6078      * @stable ICU 3.0
6079      */
6080     public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT;
6081 
6082     /**
6083      * Constant U+0000, same as {@link Character#MIN_CODE_POINT}.
6084      *
6085      * @stable ICU 3.0
6086      */
6087     public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT;
6088 
6089     /**
6090      * Equivalent to {@link Character#isValidCodePoint}.
6091      *
6092      * @param cp the code point to check
6093      * @return true if cp is a valid code point
6094      * @stable ICU 3.0
6095      */
isValidCodePoint(int cp)6096     public static final boolean isValidCodePoint(int cp) {
6097         return cp >= 0 && cp <= MAX_CODE_POINT;
6098     }
6099 
6100     /**
6101      * Same as {@link Character#isSupplementaryCodePoint}.
6102      *
6103      * @param cp the code point to check
6104      * @return true if cp is a supplementary code point
6105      * @stable ICU 3.0
6106      */
isSupplementaryCodePoint(int cp)6107     public static final boolean isSupplementaryCodePoint(int cp) {
6108         return Character.isSupplementaryCodePoint(cp);
6109     }
6110 
6111     /**
6112      * Same as {@link Character#isHighSurrogate}.
6113      *
6114      * @param ch the char to check
6115      * @return true if ch is a high (lead) surrogate
6116      * @stable ICU 3.0
6117      */
isHighSurrogate(char ch)6118     public static boolean isHighSurrogate(char ch) {
6119         return Character.isHighSurrogate(ch);
6120     }
6121 
6122     /**
6123      * Same as {@link Character#isLowSurrogate}.
6124      *
6125      * @param ch the char to check
6126      * @return true if ch is a low (trail) surrogate
6127      * @stable ICU 3.0
6128      */
isLowSurrogate(char ch)6129     public static boolean isLowSurrogate(char ch) {
6130         return Character.isLowSurrogate(ch);
6131     }
6132 
6133     /**
6134      * Same as {@link Character#isSurrogatePair}.
6135      *
6136      * @param high the high (lead) char
6137      * @param low the low (trail) char
6138      * @return true if high, low form a surrogate pair
6139      * @stable ICU 3.0
6140      */
isSurrogatePair(char high, char low)6141     public static final boolean isSurrogatePair(char high, char low) {
6142         return Character.isSurrogatePair(high, low);
6143     }
6144 
6145     /**
6146      * Same as {@link Character#charCount}.
6147      * Returns the number of chars needed to represent the code point (1 or 2).
6148      * This does not check the code point for validity.
6149      *
6150      * @param cp the code point to check
6151      * @return the number of chars needed to represent the code point
6152      * @stable ICU 3.0
6153      */
charCount(int cp)6154     public static int charCount(int cp) {
6155         return Character.charCount(cp);
6156     }
6157 
6158     /**
6159      * Same as {@link Character#toCodePoint}.
6160      * Returns the code point represented by the two surrogate code units.
6161      * This does not check the surrogate pair for validity.
6162      *
6163      * @param high the high (lead) surrogate
6164      * @param low the low (trail) surrogate
6165      * @return the code point formed by the surrogate pair
6166      * @stable ICU 3.0
6167      */
toCodePoint(char high, char low)6168     public static final int toCodePoint(char high, char low) {
6169         return Character.toCodePoint(high, low);
6170     }
6171 
6172     /**
6173      * Same as {@link Character#codePointAt(CharSequence, int)}.
6174      * Returns the code point at index.
6175      * This examines only the characters at index and index+1.
6176      *
6177      * @param seq the characters to check
6178      * @param index the index of the first or only char forming the code point
6179      * @return the code point at the index
6180      * @stable ICU 3.0
6181      */
codePointAt(CharSequence seq, int index)6182     public static final int codePointAt(CharSequence seq, int index) {
6183         char c1 = seq.charAt(index++);
6184         if (isHighSurrogate(c1)) {
6185             if (index < seq.length()) {
6186                 char c2 = seq.charAt(index);
6187                 if (isLowSurrogate(c2)) {
6188                     return toCodePoint(c1, c2);
6189                 }
6190             }
6191         }
6192         return c1;
6193     }
6194 
6195     /**
6196      * Same as {@link Character#codePointAt(char[], int)}.
6197      * Returns the code point at index.
6198      * This examines only the characters at index and index+1.
6199      *
6200      * @param text the characters to check
6201      * @param index the index of the first or only char forming the code point
6202      * @return the code point at the index
6203      * @stable ICU 3.0
6204      */
codePointAt(char[] text, int index)6205     public static final int codePointAt(char[] text, int index) {
6206         char c1 = text[index++];
6207         if (isHighSurrogate(c1)) {
6208             if (index < text.length) {
6209                 char c2 = text[index];
6210                 if (isLowSurrogate(c2)) {
6211                     return toCodePoint(c1, c2);
6212                 }
6213             }
6214         }
6215         return c1;
6216     }
6217 
6218     /**
6219      * Same as {@link Character#codePointAt(char[], int, int)}.
6220      * Returns the code point at index.
6221      * This examines only the characters at index and index+1.
6222      *
6223      * @param text the characters to check
6224      * @param index the index of the first or only char forming the code point
6225      * @param limit the limit of the valid text
6226      * @return the code point at the index
6227      * @stable ICU 3.0
6228      */
codePointAt(char[] text, int index, int limit)6229     public static final int codePointAt(char[] text, int index, int limit) {
6230         if (index >= limit || limit > text.length) {
6231             throw new IndexOutOfBoundsException();
6232         }
6233         char c1 = text[index++];
6234         if (isHighSurrogate(c1)) {
6235             if (index < limit) {
6236                 char c2 = text[index];
6237                 if (isLowSurrogate(c2)) {
6238                     return toCodePoint(c1, c2);
6239                 }
6240             }
6241         }
6242         return c1;
6243     }
6244 
6245     /**
6246      * Same as {@link Character#codePointBefore(CharSequence, int)}.
6247      * Return the code point before index.
6248      * This examines only the characters at index-1 and index-2.
6249      *
6250      * @param seq the characters to check
6251      * @param index the index after the last or only char forming the code point
6252      * @return the code point before the index
6253      * @stable ICU 3.0
6254      */
codePointBefore(CharSequence seq, int index)6255     public static final int codePointBefore(CharSequence seq, int index) {
6256         char c2 = seq.charAt(--index);
6257         if (isLowSurrogate(c2)) {
6258             if (index > 0) {
6259                 char c1 = seq.charAt(--index);
6260                 if (isHighSurrogate(c1)) {
6261                     return toCodePoint(c1, c2);
6262                 }
6263             }
6264         }
6265         return c2;
6266     }
6267 
6268     /**
6269      * Same as {@link Character#codePointBefore(char[], int)}.
6270      * Returns the code point before index.
6271      * This examines only the characters at index-1 and index-2.
6272      *
6273      * @param text the characters to check
6274      * @param index the index after the last or only char forming the code point
6275      * @return the code point before the index
6276      * @stable ICU 3.0
6277      */
codePointBefore(char[] text, int index)6278     public static final int codePointBefore(char[] text, int index) {
6279         char c2 = text[--index];
6280         if (isLowSurrogate(c2)) {
6281             if (index > 0) {
6282                 char c1 = text[--index];
6283                 if (isHighSurrogate(c1)) {
6284                     return toCodePoint(c1, c2);
6285                 }
6286             }
6287         }
6288         return c2;
6289     }
6290 
6291     /**
6292      * Same as {@link Character#codePointBefore(char[], int, int)}.
6293      * Return the code point before index.
6294      * This examines only the characters at index-1 and index-2.
6295      *
6296      * @param text the characters to check
6297      * @param index the index after the last or only char forming the code point
6298      * @param limit the start of the valid text
6299      * @return the code point before the index
6300      * @stable ICU 3.0
6301      */
codePointBefore(char[] text, int index, int limit)6302     public static final int codePointBefore(char[] text, int index, int limit) {
6303         if (index <= limit || limit < 0) {
6304             throw new IndexOutOfBoundsException();
6305         }
6306         char c2 = text[--index];
6307         if (isLowSurrogate(c2)) {
6308             if (index > limit) {
6309                 char c1 = text[--index];
6310                 if (isHighSurrogate(c1)) {
6311                     return toCodePoint(c1, c2);
6312                 }
6313             }
6314         }
6315         return c2;
6316     }
6317 
6318     /**
6319      * Same as {@link Character#toChars(int, char[], int)}.
6320      * Writes the chars representing the
6321      * code point into the destination at the given index.
6322      *
6323      * @param cp the code point to convert
6324      * @param dst the destination array into which to put the char(s) representing the code point
6325      * @param dstIndex the index at which to put the first (or only) char
6326      * @return the count of the number of chars written (1 or 2)
6327      * @throws IllegalArgumentException if cp is not a valid code point
6328      * @stable ICU 3.0
6329      */
toChars(int cp, char[] dst, int dstIndex)6330     public static final int toChars(int cp, char[] dst, int dstIndex) {
6331         return Character.toChars(cp, dst, dstIndex);
6332     }
6333 
6334     /**
6335      * Same as {@link Character#toChars(int)}.
6336      * Returns a char array representing the code point.
6337      *
6338      * @param cp the code point to convert
6339      * @return an array containing the char(s) representing the code point
6340      * @throws IllegalArgumentException if cp is not a valid code point
6341      * @stable ICU 3.0
6342      */
toChars(int cp)6343     public static final char[] toChars(int cp) {
6344         return Character.toChars(cp);
6345     }
6346 
6347     /**
6348      * Equivalent to the {@link Character#getDirectionality(char)} method, for
6349      * convenience. Returns a byte representing the directionality of the
6350      * character.
6351      *
6352      * {@icunote} Unlike {@link Character#getDirectionality(char)}, this returns
6353      * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters.
6354      *
6355      * {@icunote} The return value must be tested using the constants defined in {@link
6356      * UCharacterDirection} and its interface {@link
6357      * UCharacterEnums.ECharacterDirection} since the values are different from the ones
6358      * defined by <code>java.lang.Character</code>.
6359      * @param cp the code point to check
6360      * @return the directionality of the code point
6361      * @see #getDirection
6362      * @stable ICU 3.0
6363      */
getDirectionality(int cp)6364     public static byte getDirectionality(int cp)
6365     {
6366         return (byte)getDirection(cp);
6367     }
6368 
6369     /**
6370      * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)}
6371      * method, for convenience.  Counts the number of code points in the range
6372      * of text.
6373      * @param text the characters to check
6374      * @param start the start of the range
6375      * @param limit the limit of the range
6376      * @return the number of code points in the range
6377      * @stable ICU 3.0
6378      */
codePointCount(CharSequence text, int start, int limit)6379     public static int codePointCount(CharSequence text, int start, int limit) {
6380         if (start < 0 || limit < start || limit > text.length()) {
6381             throw new IndexOutOfBoundsException("start (" + start +
6382                     ") or limit (" + limit +
6383                     ") invalid or out of range 0, " + text.length());
6384         }
6385 
6386         int len = limit - start;
6387         while (limit > start) {
6388             char ch = text.charAt(--limit);
6389             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
6390                 ch = text.charAt(--limit);
6391                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
6392                     --len;
6393                     break;
6394                 }
6395             }
6396         }
6397         return len;
6398     }
6399 
6400     /**
6401      * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for
6402      * convenience. Counts the number of code points in the range of text.
6403      * @param text the characters to check
6404      * @param start the start of the range
6405      * @param limit the limit of the range
6406      * @return the number of code points in the range
6407      * @stable ICU 3.0
6408      */
codePointCount(char[] text, int start, int limit)6409     public static int codePointCount(char[] text, int start, int limit) {
6410         if (start < 0 || limit < start || limit > text.length) {
6411             throw new IndexOutOfBoundsException("start (" + start +
6412                     ") or limit (" + limit +
6413                     ") invalid or out of range 0, " + text.length);
6414         }
6415 
6416         int len = limit - start;
6417         while (limit > start) {
6418             char ch = text[--limit];
6419             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
6420                 ch = text[--limit];
6421                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
6422                     --len;
6423                     break;
6424                 }
6425             }
6426         }
6427         return len;
6428     }
6429 
6430     /**
6431      * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)}
6432      * method, for convenience.  Adjusts the char index by a code point offset.
6433      * @param text the characters to check
6434      * @param index the index to adjust
6435      * @param codePointOffset the number of code points by which to offset the index
6436      * @return the adjusted index
6437      * @stable ICU 3.0
6438      */
offsetByCodePoints(CharSequence text, int index, int codePointOffset)6439     public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) {
6440         if (index < 0 || index > text.length()) {
6441             throw new IndexOutOfBoundsException("index ( " + index +
6442                     ") out of range 0, " + text.length());
6443         }
6444 
6445         if (codePointOffset < 0) {
6446             while (++codePointOffset <= 0) {
6447                 char ch = text.charAt(--index);
6448                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) {
6449                     ch = text.charAt(--index);
6450                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
6451                         if (++codePointOffset > 0) {
6452                             return index+1;
6453                         }
6454                     }
6455                 }
6456             }
6457         } else {
6458             int limit = text.length();
6459             while (--codePointOffset >= 0) {
6460                 char ch = text.charAt(index++);
6461                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
6462                     ch = text.charAt(index++);
6463                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
6464                         if (--codePointOffset < 0) {
6465                             return index-1;
6466                         }
6467                     }
6468                 }
6469             }
6470         }
6471 
6472         return index;
6473     }
6474 
6475     /**
6476      * Equivalent to the
6477      * {@link Character#offsetByCodePoints(char[], int, int, int, int)}
6478      * method, for convenience.  Adjusts the char index by a code point offset.
6479      * @param text the characters to check
6480      * @param start the start of the range to check
6481      * @param count the length of the range to check
6482      * @param index the index to adjust
6483      * @param codePointOffset the number of code points by which to offset the index
6484      * @return the adjusted index
6485      * @stable ICU 3.0
6486      */
offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset)6487     public static int offsetByCodePoints(char[] text, int start, int count, int index,
6488             int codePointOffset) {
6489         int limit = start + count;
6490         if (start < 0 || limit < start || limit > text.length || index < start || index > limit) {
6491             throw new IndexOutOfBoundsException("index ( " + index +
6492                     ") out of range " + start +
6493                     ", " + limit +
6494                     " in array 0, " + text.length);
6495         }
6496 
6497         if (codePointOffset < 0) {
6498             while (++codePointOffset <= 0) {
6499                 char ch = text[--index];
6500                 if (index < start) {
6501                     throw new IndexOutOfBoundsException("index ( " + index +
6502                             ") < start (" + start +
6503                             ")");
6504                 }
6505                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) {
6506                     ch = text[--index];
6507                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
6508                         if (++codePointOffset > 0) {
6509                             return index+1;
6510                         }
6511                     }
6512                 }
6513             }
6514         } else {
6515             while (--codePointOffset >= 0) {
6516                 char ch = text[index++];
6517                 if (index > limit) {
6518                     throw new IndexOutOfBoundsException("index ( " + index +
6519                             ") > limit (" + limit +
6520                             ")");
6521                 }
6522                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
6523                     ch = text[index++];
6524                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
6525                         if (--codePointOffset < 0) {
6526                             return index-1;
6527                         }
6528                     }
6529                 }
6530             }
6531         }
6532 
6533         return index;
6534     }
6535 
6536     // private variables -------------------------------------------------
6537 
6538     /**
6539      * To get the last character out from a data type
6540      */
6541     private static final int LAST_CHAR_MASK_ = 0xFFFF;
6542 
6543     //    /**
6544     //     * To get the last byte out from a data type
6545     //     */
6546     //    private static final int LAST_BYTE_MASK_ = 0xFF;
6547     //
6548     //    /**
6549     //     * Shift 16 bits
6550     //     */
6551     //    private static final int SHIFT_16_ = 16;
6552     //
6553     //    /**
6554     //     * Shift 24 bits
6555     //     */
6556     //    private static final int SHIFT_24_ = 24;
6557     //
6558     //    /**
6559     //     * Decimal radix
6560     //     */
6561     //    private static final int DECIMAL_RADIX_ = 10;
6562 
6563     /**
6564      * No break space code point
6565      */
6566     private static final int NO_BREAK_SPACE_ = 0xA0;
6567 
6568     /**
6569      * Figure space code point
6570      */
6571     private static final int FIGURE_SPACE_ = 0x2007;
6572 
6573     /**
6574      * Narrow no break space code point
6575      */
6576     private static final int NARROW_NO_BREAK_SPACE_ = 0x202F;
6577 
6578     /**
6579      * Ideographic number zero code point
6580      */
6581     private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007;
6582 
6583     /**
6584      * CJK Ideograph, First code point
6585      */
6586     private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00;
6587 
6588     /**
6589      * CJK Ideograph, Second code point
6590      */
6591     private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c;
6592 
6593     /**
6594      * CJK Ideograph, Third code point
6595      */
6596     private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09;
6597 
6598     /**
6599      * CJK Ideograph, Fourth code point
6600      */
6601     private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db;
6602 
6603     /**
6604      * CJK Ideograph, FIFTH code point
6605      */
6606     private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94;
6607 
6608     /**
6609      * CJK Ideograph, Sixth code point
6610      */
6611     private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d;
6612 
6613     /**
6614      * CJK Ideograph, Seventh code point
6615      */
6616     private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03;
6617 
6618     /**
6619      * CJK Ideograph, Eighth code point
6620      */
6621     private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b;
6622 
6623     /**
6624      * CJK Ideograph, Nineth code point
6625      */
6626     private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d;
6627 
6628     /**
6629      * Application Program command code point
6630      */
6631     private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F;
6632 
6633     /**
6634      * Unit separator code point
6635      */
6636     private static final int UNIT_SEPARATOR_ = 0x001F;
6637 
6638     /**
6639      * Delete code point
6640      */
6641     private static final int DELETE_ = 0x007F;
6642 
6643     /**
6644      * Han digit characters
6645      */
6646     private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_     = 0x96f6;
6647     private static final int CJK_IDEOGRAPH_COMPLEX_ONE_      = 0x58f9;
6648     private static final int CJK_IDEOGRAPH_COMPLEX_TWO_      = 0x8cb3;
6649     private static final int CJK_IDEOGRAPH_COMPLEX_THREE_    = 0x53c3;
6650     private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_     = 0x8086;
6651     private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_     = 0x4f0d;
6652     private static final int CJK_IDEOGRAPH_COMPLEX_SIX_      = 0x9678;
6653     private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_    = 0x67d2;
6654     private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_    = 0x634c;
6655     private static final int CJK_IDEOGRAPH_COMPLEX_NINE_     = 0x7396;
6656     private static final int CJK_IDEOGRAPH_TEN_              = 0x5341;
6657     private static final int CJK_IDEOGRAPH_COMPLEX_TEN_      = 0x62fe;
6658     private static final int CJK_IDEOGRAPH_HUNDRED_          = 0x767e;
6659     private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_  = 0x4f70;
6660     private static final int CJK_IDEOGRAPH_THOUSAND_         = 0x5343;
6661     private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf;
6662     private static final int CJK_IDEOGRAPH_TEN_THOUSAND_     = 0x824c;
6663     private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_  = 0x5104;
6664 
6665     // private constructor -----------------------------------------------
6666     ///CLOVER:OFF
6667     /**
6668      * Private constructor to prevent instantiation
6669      */
UCharacter()6670     private UCharacter()
6671     {
6672     }
6673     ///CLOVER:ON
6674 }
6675