• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /**
4  *******************************************************************************
5  * Copyright (C) 1996-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  */
9 
10 package com.ibm.icu.lang;
11 
12 import java.lang.ref.SoftReference;
13 import java.util.HashMap;
14 import java.util.Iterator;
15 import java.util.Locale;
16 import java.util.Map;
17 
18 import com.ibm.icu.impl.CaseMapImpl;
19 import com.ibm.icu.impl.EmojiProps;
20 import com.ibm.icu.impl.IllegalIcuArgumentException;
21 import com.ibm.icu.impl.Trie2;
22 import com.ibm.icu.impl.UBiDiProps;
23 import com.ibm.icu.impl.UCaseProps;
24 import com.ibm.icu.impl.UCharacterName;
25 import com.ibm.icu.impl.UCharacterNameChoice;
26 import com.ibm.icu.impl.UCharacterProperty;
27 import com.ibm.icu.impl.UCharacterUtility;
28 import com.ibm.icu.impl.UPropertyAliases;
29 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
30 import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection;
31 import com.ibm.icu.text.BreakIterator;
32 import com.ibm.icu.text.Normalizer2;
33 import com.ibm.icu.util.RangeValueIterator;
34 import com.ibm.icu.util.ULocale;
35 import com.ibm.icu.util.ValueIterator;
36 import com.ibm.icu.util.VersionInfo;
37 
38 /**
39  * {@icuenhanced java.lang.Character}.{@icu _usage_}
40  *
41  * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class.
42  * These extensions provide support for more Unicode properties.
43  * Each ICU release supports the latest version of Unicode available at that time.
44  *
45  * <p>For some time before Java 5 added support for supplementary Unicode code points,
46  * The ICU UCharacter class and many other ICU classes already supported them.
47  * Some UCharacter methods and constants were widened slightly differently than
48  * how the Character class methods and constants were widened later.
49  * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF,
50  * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF.
51  *
52  * <p>Code points are represented in these API using ints. While it would be
53  * more convenient in Java to have a separate primitive datatype for them,
54  * ints suffice in the meantime.
55  *
56  * <p>To use this class please add the jar file name icu4j.jar to the
57  * class path, since it contains data files which supply the information used
58  * by this file.<br>
59  * E.g. In Windows <br>
60  * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br>
61  * Otherwise, another method would be to copy the files uprops.dat and
62  * unames.icu from the icu4j source subdirectory
63  * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory
64  * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>.
65  *
66  * <p>Aside from the additions for UTF-16 support, and the updated Unicode
67  * properties, the main differences between UCharacter and Character are:
68  * <ul>
69  * <li> UCharacter is not designed to be a char wrapper and does not have
70  *      APIs to which involves management of that single char.<br>
71  *      These include:
72  *      <ul>
73  *        <li> char charValue(),
74  *        <li> int compareTo(java.lang.Character, java.lang.Character), etc.
75  *      </ul>
76  * <li> UCharacter does not include Character APIs that are deprecated, nor
77  *      does it include the Java-specific character information, such as
78  *      boolean isJavaIdentifierPart(char ch).
79  * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric
80  *      values '10' - '35'. UCharacter also does this in digit and
81  *      getNumericValue, to adhere to the java semantics of these
82  *      methods.  New methods unicodeDigit, and
83  *      getUnicodeNumericValue do not treat the above code points
84  *      as having numeric values.  This is a semantic change from ICU4J 1.3.1.
85  * </ul>
86  * <p>
87  * Further detail on differences can be determined using the program
88  *        <a href=
89  * "https://github.com/unicode-org/icu/blob/main/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
90  *        com.ibm.icu.dev.test.lang.UCharacterCompare</a>
91  * <p>
92  * In addition to Java compatibility functions, which calculate derived properties,
93  * this API provides low-level access to the Unicode Character Database.
94  * <p>
95  * Unicode assigns each code point (not just assigned character) values for
96  * many properties.
97  * Most of them are simple boolean flags, or constants from a small enumerated list.
98  * For some properties, values are strings or other relatively more complex types.
99  * <p>
100  * For more information see
101  * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a>
102  * (http://www.unicode.org/ucd/)
103  * and the <a href="https://unicode-org.github.io/icu/userguide/strings/properties">ICU
104  * User Guide chapter on Properties</a>
105  * (https://unicode-org.github.io/icu/userguide/strings/properties).
106  * <p>
107  * There are also functions that provide easy migration from C/POSIX functions
108  * like isblank(). Their use is generally discouraged because the C/POSIX
109  * standards do not define their semantics beyond the ASCII range, which means
110  * that different implementations exhibit very different behavior.
111  * Instead, Unicode properties should be used directly.
112  * <p>
113  * There are also only a few, broad C/POSIX character classes, and they tend
114  * to be used for conflicting purposes. For example, the "isalpha()" class
115  * is sometimes used to determine word boundaries, while a more sophisticated
116  * approach would at least distinguish initial letters from continuation
117  * characters (the latter including combining marks).
118  * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
119  * Another example: There is no "istitle()" class for titlecase characters.
120  * <p>
121  * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
122  * ICU implements them according to the Standard Recommendations in
123  * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
124  * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
125  * <p>
126  * API access for C/POSIX character classes is as follows:
127  * <pre>{@code
128  * - alpha:     isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC)
129  * - lower:     isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE)
130  * - upper:     isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE)
131  * - punct:     ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)|
132  *               (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)|
133  *               (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0
134  * - digit:     isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER
135  * - xdigit:    hasBinaryProperty(c, UProperty.POSIX_XDIGIT)
136  * - alnum:     hasBinaryProperty(c, UProperty.POSIX_ALNUM)
137  * - space:     isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE)
138  * - blank:     hasBinaryProperty(c, UProperty.POSIX_BLANK)
139  * - cntrl:     getType(c)==CONTROL
140  * - graph:     hasBinaryProperty(c, UProperty.POSIX_GRAPH)
141  * - print:     hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre>
142  * <p>
143  * The C/POSIX character classes are also available in UnicodeSet patterns,
144  * using patterns like [:graph:] or \p{graph}.
145  *
146  * <p>{@icunote} There are several ICU (and Java) whitespace functions.
147  * Comparison:<ul>
148  * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
149  *       most of general categories "Z" (separators) + most whitespace ISO controls
150  *       (including no-break spaces, but excluding IS1..IS4)
151  * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
152  * <li> isSpaceChar: just Z (including no-break spaces)</ul>
153  *
154  * <p>
155  * This class is not subclassable.
156  *
157  * @author Syn Wee Quek
158  * @stable ICU 2.1
159  * @see com.ibm.icu.lang.UCharacterEnums
160  */
161 
162 public final class UCharacter implements ECharacterCategory, ECharacterDirection
163 {
164     /**
165      * Lead surrogate bitmask
166      */
167     private static final int LEAD_SURROGATE_BITMASK = 0xFFFFFC00;
168 
169     /**
170      * Trail surrogate bitmask
171      */
172     private static final int TRAIL_SURROGATE_BITMASK = 0xFFFFFC00;
173 
174     /**
175      * Lead surrogate bits
176      */
177     private static final int LEAD_SURROGATE_BITS = 0xD800;
178 
179     /**
180      * Trail surrogate bits
181      */
182     private static final int TRAIL_SURROGATE_BITS = 0xDC00;
183 
184     private static final int U16_SURROGATE_OFFSET = ((0xd800 << 10) + 0xdc00 - 0x10000);
185 
186     // public inner classes ----------------------------------------------
187 
188     /**
189      * {@icuenhanced java.lang.Character.UnicodeBlock}.{@icu _usage_}
190      *
191      * A family of character subsets representing the character blocks in the
192      * Unicode specification, generated from Unicode Data file Blocks.txt.
193      * Character blocks generally define characters used for a specific script
194      * or purpose. A character is contained by at most one Unicode block.
195      *
196      * {@icunote} All fields named XXX_ID are specific to ICU.
197      *
198      * @stable ICU 2.4
199      */
200     public static final class UnicodeBlock extends Character.Subset
201     {
202         // block id corresponding to icu4c -----------------------------------
203 
204         /**
205          * @stable ICU 2.4
206          */
207         public static final int INVALID_CODE_ID = -1;
208         /**
209          * @stable ICU 2.4
210          */
211         public static final int BASIC_LATIN_ID = 1;
212         /**
213          * @stable ICU 2.4
214          */
215         public static final int LATIN_1_SUPPLEMENT_ID = 2;
216         /**
217          * @stable ICU 2.4
218          */
219         public static final int LATIN_EXTENDED_A_ID = 3;
220         /**
221          * @stable ICU 2.4
222          */
223         public static final int LATIN_EXTENDED_B_ID = 4;
224         /**
225          * @stable ICU 2.4
226          */
227         public static final int IPA_EXTENSIONS_ID = 5;
228         /**
229          * @stable ICU 2.4
230          */
231         public static final int SPACING_MODIFIER_LETTERS_ID = 6;
232         /**
233          * @stable ICU 2.4
234          */
235         public static final int COMBINING_DIACRITICAL_MARKS_ID = 7;
236         /**
237          * Unicode 3.2 renames this block to "Greek and Coptic".
238          * @stable ICU 2.4
239          */
240         public static final int GREEK_ID = 8;
241         /**
242          * @stable ICU 2.4
243          */
244         public static final int CYRILLIC_ID = 9;
245         /**
246          * @stable ICU 2.4
247          */
248         public static final int ARMENIAN_ID = 10;
249         /**
250          * @stable ICU 2.4
251          */
252         public static final int HEBREW_ID = 11;
253         /**
254          * @stable ICU 2.4
255          */
256         public static final int ARABIC_ID = 12;
257         /**
258          * @stable ICU 2.4
259          */
260         public static final int SYRIAC_ID = 13;
261         /**
262          * @stable ICU 2.4
263          */
264         public static final int THAANA_ID = 14;
265         /**
266          * @stable ICU 2.4
267          */
268         public static final int DEVANAGARI_ID = 15;
269         /**
270          * @stable ICU 2.4
271          */
272         public static final int BENGALI_ID = 16;
273         /**
274          * @stable ICU 2.4
275          */
276         public static final int GURMUKHI_ID = 17;
277         /**
278          * @stable ICU 2.4
279          */
280         public static final int GUJARATI_ID = 18;
281         /**
282          * @stable ICU 2.4
283          */
284         public static final int ORIYA_ID = 19;
285         /**
286          * @stable ICU 2.4
287          */
288         public static final int TAMIL_ID = 20;
289         /**
290          * @stable ICU 2.4
291          */
292         public static final int TELUGU_ID = 21;
293         /**
294          * @stable ICU 2.4
295          */
296         public static final int KANNADA_ID = 22;
297         /**
298          * @stable ICU 2.4
299          */
300         public static final int MALAYALAM_ID = 23;
301         /**
302          * @stable ICU 2.4
303          */
304         public static final int SINHALA_ID = 24;
305         /**
306          * @stable ICU 2.4
307          */
308         public static final int THAI_ID = 25;
309         /**
310          * @stable ICU 2.4
311          */
312         public static final int LAO_ID = 26;
313         /**
314          * @stable ICU 2.4
315          */
316         public static final int TIBETAN_ID = 27;
317         /**
318          * @stable ICU 2.4
319          */
320         public static final int MYANMAR_ID = 28;
321         /**
322          * @stable ICU 2.4
323          */
324         public static final int GEORGIAN_ID = 29;
325         /**
326          * @stable ICU 2.4
327          */
328         public static final int HANGUL_JAMO_ID = 30;
329         /**
330          * @stable ICU 2.4
331          */
332         public static final int ETHIOPIC_ID = 31;
333         /**
334          * @stable ICU 2.4
335          */
336         public static final int CHEROKEE_ID = 32;
337         /**
338          * @stable ICU 2.4
339          */
340         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33;
341         /**
342          * @stable ICU 2.4
343          */
344         public static final int OGHAM_ID = 34;
345         /**
346          * @stable ICU 2.4
347          */
348         public static final int RUNIC_ID = 35;
349         /**
350          * @stable ICU 2.4
351          */
352         public static final int KHMER_ID = 36;
353         /**
354          * @stable ICU 2.4
355          */
356         public static final int MONGOLIAN_ID = 37;
357         /**
358          * @stable ICU 2.4
359          */
360         public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38;
361         /**
362          * @stable ICU 2.4
363          */
364         public static final int GREEK_EXTENDED_ID = 39;
365         /**
366          * @stable ICU 2.4
367          */
368         public static final int GENERAL_PUNCTUATION_ID = 40;
369         /**
370          * @stable ICU 2.4
371          */
372         public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41;
373         /**
374          * @stable ICU 2.4
375          */
376         public static final int CURRENCY_SYMBOLS_ID = 42;
377         /**
378          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
379          * Symbols".
380          * @stable ICU 2.4
381          */
382         public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43;
383         /**
384          * @stable ICU 2.4
385          */
386         public static final int LETTERLIKE_SYMBOLS_ID = 44;
387         /**
388          * @stable ICU 2.4
389          */
390         public static final int NUMBER_FORMS_ID = 45;
391         /**
392          * @stable ICU 2.4
393          */
394         public static final int ARROWS_ID = 46;
395         /**
396          * @stable ICU 2.4
397          */
398         public static final int MATHEMATICAL_OPERATORS_ID = 47;
399         /**
400          * @stable ICU 2.4
401          */
402         public static final int MISCELLANEOUS_TECHNICAL_ID = 48;
403         /**
404          * @stable ICU 2.4
405          */
406         public static final int CONTROL_PICTURES_ID = 49;
407         /**
408          * @stable ICU 2.4
409          */
410         public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50;
411         /**
412          * @stable ICU 2.4
413          */
414         public static final int ENCLOSED_ALPHANUMERICS_ID = 51;
415         /**
416          * @stable ICU 2.4
417          */
418         public static final int BOX_DRAWING_ID = 52;
419         /**
420          * @stable ICU 2.4
421          */
422         public static final int BLOCK_ELEMENTS_ID = 53;
423         /**
424          * @stable ICU 2.4
425          */
426         public static final int GEOMETRIC_SHAPES_ID = 54;
427         /**
428          * @stable ICU 2.4
429          */
430         public static final int MISCELLANEOUS_SYMBOLS_ID = 55;
431         /**
432          * @stable ICU 2.4
433          */
434         public static final int DINGBATS_ID = 56;
435         /**
436          * @stable ICU 2.4
437          */
438         public static final int BRAILLE_PATTERNS_ID = 57;
439         /**
440          * @stable ICU 2.4
441          */
442         public static final int CJK_RADICALS_SUPPLEMENT_ID = 58;
443         /**
444          * @stable ICU 2.4
445          */
446         public static final int KANGXI_RADICALS_ID = 59;
447         /**
448          * @stable ICU 2.4
449          */
450         public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60;
451         /**
452          * @stable ICU 2.4
453          */
454         public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61;
455         /**
456          * @stable ICU 2.4
457          */
458         public static final int HIRAGANA_ID = 62;
459         /**
460          * @stable ICU 2.4
461          */
462         public static final int KATAKANA_ID = 63;
463         /**
464          * @stable ICU 2.4
465          */
466         public static final int BOPOMOFO_ID = 64;
467         /**
468          * @stable ICU 2.4
469          */
470         public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65;
471         /**
472          * @stable ICU 2.4
473          */
474         public static final int KANBUN_ID = 66;
475         /**
476          * @stable ICU 2.4
477          */
478         public static final int BOPOMOFO_EXTENDED_ID = 67;
479         /**
480          * @stable ICU 2.4
481          */
482         public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68;
483         /**
484          * @stable ICU 2.4
485          */
486         public static final int CJK_COMPATIBILITY_ID = 69;
487         /**
488          * @stable ICU 2.4
489          */
490         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70;
491         /**
492          * @stable ICU 2.4
493          */
494         public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71;
495         /**
496          * @stable ICU 2.4
497          */
498         public static final int YI_SYLLABLES_ID = 72;
499         /**
500          * @stable ICU 2.4
501          */
502         public static final int YI_RADICALS_ID = 73;
503         /**
504          * @stable ICU 2.4
505          */
506         public static final int HANGUL_SYLLABLES_ID = 74;
507         /**
508          * @stable ICU 2.4
509          */
510         public static final int HIGH_SURROGATES_ID = 75;
511         /**
512          * @stable ICU 2.4
513          */
514         public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76;
515         /**
516          * @stable ICU 2.4
517          */
518         public static final int LOW_SURROGATES_ID = 77;
519         /**
520          * Same as public static final int PRIVATE_USE.
521          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
522          * and multiple code point ranges had this block.
523          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
524          * and adds separate blocks for the supplementary PUAs.
525          * @stable ICU 2.4
526          */
527         public static final int PRIVATE_USE_AREA_ID = 78;
528         /**
529          * Same as public static final int PRIVATE_USE_AREA.
530          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
531          * and multiple code point ranges had this block.
532          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
533          * and adds separate blocks for the supplementary PUAs.
534          * @stable ICU 2.4
535          */
536         public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID;
537         /**
538          * @stable ICU 2.4
539          */
540         public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79;
541         /**
542          * @stable ICU 2.4
543          */
544         public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80;
545         /**
546          * @stable ICU 2.4
547          */
548         public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81;
549         /**
550          * @stable ICU 2.4
551          */
552         public static final int COMBINING_HALF_MARKS_ID = 82;
553         /**
554          * @stable ICU 2.4
555          */
556         public static final int CJK_COMPATIBILITY_FORMS_ID = 83;
557         /**
558          * @stable ICU 2.4
559          */
560         public static final int SMALL_FORM_VARIANTS_ID = 84;
561         /**
562          * @stable ICU 2.4
563          */
564         public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85;
565         /**
566          * @stable ICU 2.4
567          */
568         public static final int SPECIALS_ID = 86;
569         /**
570          * @stable ICU 2.4
571          */
572         public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87;
573         /**
574          * @stable ICU 2.4
575          */
576         public static final int OLD_ITALIC_ID = 88;
577         /**
578          * @stable ICU 2.4
579          */
580         public static final int GOTHIC_ID = 89;
581         /**
582          * @stable ICU 2.4
583          */
584         public static final int DESERET_ID = 90;
585         /**
586          * @stable ICU 2.4
587          */
588         public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91;
589         /**
590          * @stable ICU 2.4
591          */
592         public static final int MUSICAL_SYMBOLS_ID = 92;
593         /**
594          * @stable ICU 2.4
595          */
596         public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93;
597         /**
598          * @stable ICU 2.4
599          */
600         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94;
601         /**
602          * @stable ICU 2.4
603          */
604         public static final int
605         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95;
606         /**
607          * @stable ICU 2.4
608          */
609         public static final int TAGS_ID = 96;
610 
611         // New blocks in Unicode 3.2
612 
613         /**
614          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
615          * @stable ICU 2.4
616          */
617         public static final int CYRILLIC_SUPPLEMENTARY_ID = 97;
618         /**
619          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
620          * @stable ICU 3.0
621          */
622 
623         public static final int CYRILLIC_SUPPLEMENT_ID = 97;
624         /**
625          * @stable ICU 2.4
626          */
627         public static final int TAGALOG_ID = 98;
628         /**
629          * @stable ICU 2.4
630          */
631         public static final int HANUNOO_ID = 99;
632         /**
633          * @stable ICU 2.4
634          */
635         public static final int BUHID_ID = 100;
636         /**
637          * @stable ICU 2.4
638          */
639         public static final int TAGBANWA_ID = 101;
640         /**
641          * @stable ICU 2.4
642          */
643         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102;
644         /**
645          * @stable ICU 2.4
646          */
647         public static final int SUPPLEMENTAL_ARROWS_A_ID = 103;
648         /**
649          * @stable ICU 2.4
650          */
651         public static final int SUPPLEMENTAL_ARROWS_B_ID = 104;
652         /**
653          * @stable ICU 2.4
654          */
655         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105;
656         /**
657          * @stable ICU 2.4
658          */
659         public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106;
660         /**
661          * @stable ICU 2.4
662          */
663         public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107;
664         /**
665          * @stable ICU 2.4
666          */
667         public static final int VARIATION_SELECTORS_ID = 108;
668         /**
669          * @stable ICU 2.4
670          */
671         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109;
672         /**
673          * @stable ICU 2.4
674          */
675         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110;
676 
677         /**
678          * @stable ICU 2.6
679          */
680         public static final int LIMBU_ID = 111; /*[1900]*/
681         /**
682          * @stable ICU 2.6
683          */
684         public static final int TAI_LE_ID = 112; /*[1950]*/
685         /**
686          * @stable ICU 2.6
687          */
688         public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/
689         /**
690          * @stable ICU 2.6
691          */
692         public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/
693         /**
694          * @stable ICU 2.6
695          */
696         public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/
697         /**
698          * @stable ICU 2.6
699          */
700         public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/
701         /**
702          * @stable ICU 2.6
703          */
704         public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/
705         /**
706          * @stable ICU 2.6
707          */
708         public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/
709         /**
710          * @stable ICU 2.6
711          */
712         public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/
713         /**
714          * @stable ICU 2.6
715          */
716         public static final int UGARITIC_ID = 120; /*[10380]*/
717         /**
718          * @stable ICU 2.6
719          */
720         public static final int SHAVIAN_ID = 121; /*[10450]*/
721         /**
722          * @stable ICU 2.6
723          */
724         public static final int OSMANYA_ID = 122; /*[10480]*/
725         /**
726          * @stable ICU 2.6
727          */
728         public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/
729         /**
730          * @stable ICU 2.6
731          */
732         public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/
733         /**
734          * @stable ICU 2.6
735          */
736         public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/
737 
738         /* New blocks in Unicode 4.1 */
739 
740         /**
741          * @stable ICU 3.4
742          */
743         public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/
744 
745         /**
746          * @stable ICU 3.4
747          */
748         public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/
749 
750         /**
751          * @stable ICU 3.4
752          */
753         public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/
754 
755         /**
756          * @stable ICU 3.4
757          */
758         public static final int BUGINESE_ID = 129; /*[1A00]*/
759 
760         /**
761          * @stable ICU 3.4
762          */
763         public static final int CJK_STROKES_ID = 130; /*[31C0]*/
764 
765         /**
766          * @stable ICU 3.4
767          */
768         public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/
769 
770         /**
771          * @stable ICU 3.4
772          */
773         public static final int COPTIC_ID = 132; /*[2C80]*/
774 
775         /**
776          * @stable ICU 3.4
777          */
778         public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/
779 
780         /**
781          * @stable ICU 3.4
782          */
783         public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/
784 
785         /**
786          * @stable ICU 3.4
787          */
788         public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/
789 
790         /**
791          * @stable ICU 3.4
792          */
793         public static final int GLAGOLITIC_ID = 136; /*[2C00]*/
794 
795         /**
796          * @stable ICU 3.4
797          */
798         public static final int KHAROSHTHI_ID = 137; /*[10A00]*/
799 
800         /**
801          * @stable ICU 3.4
802          */
803         public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/
804 
805         /**
806          * @stable ICU 3.4
807          */
808         public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/
809 
810         /**
811          * @stable ICU 3.4
812          */
813         public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/
814 
815         /**
816          * @stable ICU 3.4
817          */
818         public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/
819 
820         /**
821          * @stable ICU 3.4
822          */
823         public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/
824 
825         /**
826          * @stable ICU 3.4
827          */
828         public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/
829 
830         /**
831          * @stable ICU 3.4
832          */
833         public static final int TIFINAGH_ID = 144; /*[2D30]*/
834 
835         /**
836          * @stable ICU 3.4
837          */
838         public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/
839 
840         /* New blocks in Unicode 5.0 */
841 
842         /**
843          * @stable ICU 3.6
844          */
845         public static final int NKO_ID = 146; /*[07C0]*/
846         /**
847          * @stable ICU 3.6
848          */
849         public static final int BALINESE_ID = 147; /*[1B00]*/
850         /**
851          * @stable ICU 3.6
852          */
853         public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/
854         /**
855          * @stable ICU 3.6
856          */
857         public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/
858         /**
859          * @stable ICU 3.6
860          */
861         public static final int PHAGS_PA_ID = 150; /*[A840]*/
862         /**
863          * @stable ICU 3.6
864          */
865         public static final int PHOENICIAN_ID = 151; /*[10900]*/
866         /**
867          * @stable ICU 3.6
868          */
869         public static final int CUNEIFORM_ID = 152; /*[12000]*/
870         /**
871          * @stable ICU 3.6
872          */
873         public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/
874         /**
875          * @stable ICU 3.6
876          */
877         public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/
878 
879         /**
880          * @stable ICU 4.0
881          */
882         public static final int SUNDANESE_ID = 155; /* [1B80] */
883 
884         /**
885          * @stable ICU 4.0
886          */
887         public static final int LEPCHA_ID = 156; /* [1C00] */
888 
889         /**
890          * @stable ICU 4.0
891          */
892         public static final int OL_CHIKI_ID = 157; /* [1C50] */
893 
894         /**
895          * @stable ICU 4.0
896          */
897         public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */
898 
899         /**
900          * @stable ICU 4.0
901          */
902         public static final int VAI_ID = 159; /* [A500] */
903 
904         /**
905          * @stable ICU 4.0
906          */
907         public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */
908 
909         /**
910          * @stable ICU 4.0
911          */
912         public static final int SAURASHTRA_ID = 161; /* [A880] */
913 
914         /**
915          * @stable ICU 4.0
916          */
917         public static final int KAYAH_LI_ID = 162; /* [A900] */
918 
919         /**
920          * @stable ICU 4.0
921          */
922         public static final int REJANG_ID = 163; /* [A930] */
923 
924         /**
925          * @stable ICU 4.0
926          */
927         public static final int CHAM_ID = 164; /* [AA00] */
928 
929         /**
930          * @stable ICU 4.0
931          */
932         public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */
933 
934         /**
935          * @stable ICU 4.0
936          */
937         public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */
938 
939         /**
940          * @stable ICU 4.0
941          */
942         public static final int LYCIAN_ID = 167; /* [10280] */
943 
944         /**
945          * @stable ICU 4.0
946          */
947         public static final int CARIAN_ID = 168; /* [102A0] */
948 
949         /**
950          * @stable ICU 4.0
951          */
952         public static final int LYDIAN_ID = 169; /* [10920] */
953 
954         /**
955          * @stable ICU 4.0
956          */
957         public static final int MAHJONG_TILES_ID = 170; /* [1F000] */
958 
959         /**
960          * @stable ICU 4.0
961          */
962         public static final int DOMINO_TILES_ID = 171; /* [1F030] */
963 
964         /* New blocks in Unicode 5.2 */
965 
966         /** @stable ICU 4.4 */
967         public static final int SAMARITAN_ID = 172; /*[0800]*/
968         /** @stable ICU 4.4 */
969         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/
970         /** @stable ICU 4.4 */
971         public static final int TAI_THAM_ID = 174; /*[1A20]*/
972         /** @stable ICU 4.4 */
973         public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/
974         /** @stable ICU 4.4 */
975         public static final int LISU_ID = 176; /*[A4D0]*/
976         /** @stable ICU 4.4 */
977         public static final int BAMUM_ID = 177; /*[A6A0]*/
978         /** @stable ICU 4.4 */
979         public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/
980         /** @stable ICU 4.4 */
981         public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/
982         /** @stable ICU 4.4 */
983         public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/
984         /** @stable ICU 4.4 */
985         public static final int JAVANESE_ID = 181; /*[A980]*/
986         /** @stable ICU 4.4 */
987         public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/
988         /** @stable ICU 4.4 */
989         public static final int TAI_VIET_ID = 183; /*[AA80]*/
990         /** @stable ICU 4.4 */
991         public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/
992         /** @stable ICU 4.4 */
993         public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/
994         /** @stable ICU 4.4 */
995         public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/
996         /** @stable ICU 4.4 */
997         public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/
998         /** @stable ICU 4.4 */
999         public static final int AVESTAN_ID = 188; /*[10B00]*/
1000         /** @stable ICU 4.4 */
1001         public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/
1002         /** @stable ICU 4.4 */
1003         public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/
1004         /** @stable ICU 4.4 */
1005         public static final int OLD_TURKIC_ID = 191; /*[10C00]*/
1006         /** @stable ICU 4.4 */
1007         public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/
1008         /** @stable ICU 4.4 */
1009         public static final int KAITHI_ID = 193; /*[11080]*/
1010         /** @stable ICU 4.4 */
1011         public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/
1012         /** @stable ICU 4.4 */
1013         public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/
1014         /** @stable ICU 4.4 */
1015         public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/
1016         /** @stable ICU 4.4 */
1017         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/
1018 
1019         /* New blocks in Unicode 6.0 */
1020 
1021         /** @stable ICU 4.6 */
1022         public static final int MANDAIC_ID = 198; /*[0840]*/
1023         /** @stable ICU 4.6 */
1024         public static final int BATAK_ID = 199; /*[1BC0]*/
1025         /** @stable ICU 4.6 */
1026         public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/
1027         /** @stable ICU 4.6 */
1028         public static final int BRAHMI_ID = 201; /*[11000]*/
1029         /** @stable ICU 4.6 */
1030         public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/
1031         /** @stable ICU 4.6 */
1032         public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/
1033         /** @stable ICU 4.6 */
1034         public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/
1035         /** @stable ICU 4.6 */
1036         public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/
1037         /** @stable ICU 4.6 */
1038         public static final int EMOTICONS_ID = 206; /*[1F600]*/
1039         /** @stable ICU 4.6 */
1040         public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/
1041         /** @stable ICU 4.6 */
1042         public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/
1043         /** @stable ICU 4.6 */
1044         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/
1045 
1046         /* New blocks in Unicode 6.1 */
1047 
1048         /** @stable ICU 49 */
1049         public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/
1050         /** @stable ICU 49 */
1051         public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/
1052         /** @stable ICU 49 */
1053         public static final int CHAKMA_ID = 212; /*[11100]*/
1054         /** @stable ICU 49 */
1055         public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/
1056         /** @stable ICU 49 */
1057         public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/
1058         /** @stable ICU 49 */
1059         public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/
1060         /** @stable ICU 49 */
1061         public static final int MIAO_ID = 216; /*[16F00]*/
1062         /** @stable ICU 49 */
1063         public static final int SHARADA_ID = 217; /*[11180]*/
1064         /** @stable ICU 49 */
1065         public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/
1066         /** @stable ICU 49 */
1067         public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/
1068         /** @stable ICU 49 */
1069         public static final int TAKRI_ID = 220; /*[11680]*/
1070 
1071         /* New blocks in Unicode 7.0 */
1072 
1073         /** @stable ICU 54 */
1074         public static final int BASSA_VAH_ID = 221; /*[16AD0]*/
1075         /** @stable ICU 54 */
1076         public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/
1077         /** @stable ICU 54 */
1078         public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/
1079         /** @stable ICU 54 */
1080         public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/
1081         /** @stable ICU 54 */
1082         public static final int DUPLOYAN_ID = 225; /*[1BC00]*/
1083         /** @stable ICU 54 */
1084         public static final int ELBASAN_ID = 226; /*[10500]*/
1085         /** @stable ICU 54 */
1086         public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/
1087         /** @stable ICU 54 */
1088         public static final int GRANTHA_ID = 228; /*[11300]*/
1089         /** @stable ICU 54 */
1090         public static final int KHOJKI_ID = 229; /*[11200]*/
1091         /** @stable ICU 54 */
1092         public static final int KHUDAWADI_ID = 230; /*[112B0]*/
1093         /** @stable ICU 54 */
1094         public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/
1095         /** @stable ICU 54 */
1096         public static final int LINEAR_A_ID = 232; /*[10600]*/
1097         /** @stable ICU 54 */
1098         public static final int MAHAJANI_ID = 233; /*[11150]*/
1099         /** @stable ICU 54 */
1100         public static final int MANICHAEAN_ID = 234; /*[10AC0]*/
1101         /** @stable ICU 54 */
1102         public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/
1103         /** @stable ICU 54 */
1104         public static final int MODI_ID = 236; /*[11600]*/
1105         /** @stable ICU 54 */
1106         public static final int MRO_ID = 237; /*[16A40]*/
1107         /** @stable ICU 54 */
1108         public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/
1109         /** @stable ICU 54 */
1110         public static final int NABATAEAN_ID = 239; /*[10880]*/
1111         /** @stable ICU 54 */
1112         public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/
1113         /** @stable ICU 54 */
1114         public static final int OLD_PERMIC_ID = 241; /*[10350]*/
1115         /** @stable ICU 54 */
1116         public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/
1117         /** @stable ICU 54 */
1118         public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/
1119         /** @stable ICU 54 */
1120         public static final int PALMYRENE_ID = 244; /*[10860]*/
1121         /** @stable ICU 54 */
1122         public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/
1123         /** @stable ICU 54 */
1124         public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/
1125         /** @stable ICU 54 */
1126         public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/
1127         /** @stable ICU 54 */
1128         public static final int SIDDHAM_ID = 248; /*[11580]*/
1129         /** @stable ICU 54 */
1130         public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/
1131         /** @stable ICU 54 */
1132         public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/
1133         /** @stable ICU 54 */
1134         public static final int TIRHUTA_ID = 251; /*[11480]*/
1135         /** @stable ICU 54 */
1136         public static final int WARANG_CITI_ID = 252; /*[118A0]*/
1137 
1138         /* New blocks in Unicode 8.0 */
1139 
1140         /** @stable ICU 56 */
1141         public static final int AHOM_ID = 253; /*[11700]*/
1142         /** @stable ICU 56 */
1143         public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/
1144         /** @stable ICU 56 */
1145         public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/
1146         /** @stable ICU 56 */
1147         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/
1148         /** @stable ICU 56 */
1149         public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/
1150         /** @stable ICU 56 */
1151         public static final int HATRAN_ID = 258; /*[108E0]*/
1152         /** @stable ICU 56 */
1153         public static final int MULTANI_ID = 259; /*[11280]*/
1154         /** @stable ICU 56 */
1155         public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/
1156         /** @stable ICU 56 */
1157         public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/
1158         /** @stable ICU 56 */
1159         public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/
1160 
1161         /* New blocks in Unicode 9.0 */
1162 
1163         /** @stable ICU 58 */
1164         public static final int ADLAM_ID = 263; /*[1E900]*/
1165         /** @stable ICU 58 */
1166         public static final int BHAIKSUKI_ID = 264; /*[11C00]*/
1167         /** @stable ICU 58 */
1168         public static final int CYRILLIC_EXTENDED_C_ID = 265; /*[1C80]*/
1169         /** @stable ICU 58 */
1170         public static final int GLAGOLITIC_SUPPLEMENT_ID = 266; /*[1E000]*/
1171         /** @stable ICU 58 */
1172         public static final int IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID = 267; /*[16FE0]*/
1173         /** @stable ICU 58 */
1174         public static final int MARCHEN_ID = 268; /*[11C70]*/
1175         /** @stable ICU 58 */
1176         public static final int MONGOLIAN_SUPPLEMENT_ID = 269; /*[11660]*/
1177         /** @stable ICU 58 */
1178         public static final int NEWA_ID = 270; /*[11400]*/
1179         /** @stable ICU 58 */
1180         public static final int OSAGE_ID = 271; /*[104B0]*/
1181         /** @stable ICU 58 */
1182         public static final int TANGUT_ID = 272; /*[17000]*/
1183         /** @stable ICU 58 */
1184         public static final int TANGUT_COMPONENTS_ID = 273; /*[18800]*/
1185 
1186         // New blocks in Unicode 10.0
1187 
1188         /** @stable ICU 60 */
1189         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID = 274; /*[2CEB0]*/
1190         /** @stable ICU 60 */
1191         public static final int KANA_EXTENDED_A_ID = 275; /*[1B100]*/
1192         /** @stable ICU 60 */
1193         public static final int MASARAM_GONDI_ID = 276; /*[11D00]*/
1194         /** @stable ICU 60 */
1195         public static final int NUSHU_ID = 277; /*[1B170]*/
1196         /** @stable ICU 60 */
1197         public static final int SOYOMBO_ID = 278; /*[11A50]*/
1198         /** @stable ICU 60 */
1199         public static final int SYRIAC_SUPPLEMENT_ID = 279; /*[0860]*/
1200         /** @stable ICU 60 */
1201         public static final int ZANABAZAR_SQUARE_ID = 280; /*[11A00]*/
1202 
1203         // New blocks in Unicode 11.0
1204 
1205         /** @stable ICU 62 */
1206         public static final int CHESS_SYMBOLS_ID = 281; /*[1FA00]*/
1207         /** @stable ICU 62 */
1208         public static final int DOGRA_ID = 282; /*[11800]*/
1209         /** @stable ICU 62 */
1210         public static final int GEORGIAN_EXTENDED_ID = 283; /*[1C90]*/
1211         /** @stable ICU 62 */
1212         public static final int GUNJALA_GONDI_ID = 284; /*[11D60]*/
1213         /** @stable ICU 62 */
1214         public static final int HANIFI_ROHINGYA_ID = 285; /*[10D00]*/
1215         /** @stable ICU 62 */
1216         public static final int INDIC_SIYAQ_NUMBERS_ID = 286; /*[1EC70]*/
1217         /** @stable ICU 62 */
1218         public static final int MAKASAR_ID = 287; /*[11EE0]*/
1219         /** @stable ICU 62 */
1220         public static final int MAYAN_NUMERALS_ID = 288; /*[1D2E0]*/
1221         /** @stable ICU 62 */
1222         public static final int MEDEFAIDRIN_ID = 289; /*[16E40]*/
1223         /** @stable ICU 62 */
1224         public static final int OLD_SOGDIAN_ID = 290; /*[10F00]*/
1225         /** @stable ICU 62 */
1226         public static final int SOGDIAN_ID = 291; /*[10F30]*/
1227 
1228         // New blocks in Unicode 12.0
1229 
1230         /** @stable ICU 64 */
1231         public static final int EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID = 292; /*[13430]*/
1232         /** @stable ICU 64 */
1233         public static final int ELYMAIC_ID = 293; /*[10FE0]*/
1234         /** @stable ICU 64 */
1235         public static final int NANDINAGARI_ID = 294; /*[119A0]*/
1236         /** @stable ICU 64 */
1237         public static final int NYIAKENG_PUACHUE_HMONG_ID = 295; /*[1E100]*/
1238         /** @stable ICU 64 */
1239         public static final int OTTOMAN_SIYAQ_NUMBERS_ID = 296; /*[1ED00]*/
1240         /** @stable ICU 64 */
1241         public static final int SMALL_KANA_EXTENSION_ID = 297; /*[1B130]*/
1242         /** @stable ICU 64 */
1243         public static final int SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID = 298; /*[1FA70]*/
1244         /** @stable ICU 64 */
1245         public static final int TAMIL_SUPPLEMENT_ID = 299; /*[11FC0]*/
1246         /** @stable ICU 64 */
1247         public static final int WANCHO_ID = 300; /*[1E2C0]*/
1248 
1249         // New blocks in Unicode 13.0
1250 
1251         /** @stable ICU 66 */
1252         public static final int CHORASMIAN_ID = 301; /*[10FB0]*/
1253         /** @stable ICU 66 */
1254         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID = 302; /*[30000]*/
1255         /** @stable ICU 66 */
1256         public static final int DIVES_AKURU_ID = 303; /*[11900]*/
1257         /** @stable ICU 66 */
1258         public static final int KHITAN_SMALL_SCRIPT_ID = 304; /*[18B00]*/
1259         /** @stable ICU 66 */
1260         public static final int LISU_SUPPLEMENT_ID = 305; /*[11FB0]*/
1261         /** @stable ICU 66 */
1262         public static final int SYMBOLS_FOR_LEGACY_COMPUTING_ID = 306; /*[1FB00]*/
1263         /** @stable ICU 66 */
1264         public static final int TANGUT_SUPPLEMENT_ID = 307; /*[18D00]*/
1265         /** @stable ICU 66 */
1266         public static final int YEZIDI_ID = 308; /*[10E80]*/
1267 
1268         // New blocks in Unicode 14.0
1269 
1270         /** @stable ICU 70 */
1271         public static final int ARABIC_EXTENDED_B_ID = 309; /*[0870]*/
1272         /** @stable ICU 70 */
1273         public static final int CYPRO_MINOAN_ID = 310; /*[12F90]*/
1274         /** @stable ICU 70 */
1275         public static final int ETHIOPIC_EXTENDED_B_ID = 311; /*[1E7E0]*/
1276         /** @stable ICU 70 */
1277         public static final int KANA_EXTENDED_B_ID = 312; /*[1AFF0]*/
1278         /** @stable ICU 70 */
1279         public static final int LATIN_EXTENDED_F_ID = 313; /*[10780]*/
1280         /** @stable ICU 70 */
1281         public static final int LATIN_EXTENDED_G_ID = 314; /*[1DF00]*/
1282         /** @stable ICU 70 */
1283         public static final int OLD_UYGHUR_ID = 315; /*[10F70]*/
1284         /** @stable ICU 70 */
1285         public static final int TANGSA_ID = 316; /*[16A70]*/
1286         /** @stable ICU 70 */
1287         public static final int TOTO_ID = 317; /*[1E290]*/
1288         /** @stable ICU 70 */
1289         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A_ID = 318; /*[11AB0]*/
1290         /** @stable ICU 70 */
1291         public static final int VITHKUQI_ID = 319; /*[10570]*/
1292         /** @stable ICU 70 */
1293         public static final int ZNAMENNY_MUSICAL_NOTATION_ID = 320; /*[1CF00]*/
1294 
1295         // New blocks in Unicode 15.0
1296 
1297         /** @stable ICU 72 */
1298         public static final int ARABIC_EXTENDED_C_ID = 321; /*[10EC0]*/
1299         /** @stable ICU 72 */
1300         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H_ID = 322; /*[31350]*/
1301         /** @stable ICU 72 */
1302         public static final int CYRILLIC_EXTENDED_D_ID = 323; /*[1E030]*/
1303         /** @stable ICU 72 */
1304         public static final int DEVANAGARI_EXTENDED_A_ID = 324; /*[11B00]*/
1305         /** @stable ICU 72 */
1306         public static final int KAKTOVIK_NUMERALS_ID = 325; /*[1D2C0]*/
1307         /** @stable ICU 72 */
1308         public static final int KAWI_ID = 326; /*[11F00]*/
1309         /** @stable ICU 72 */
1310         public static final int NAG_MUNDARI_ID = 327; /*[1E4D0]*/
1311 
1312         /**
1313          * One more than the highest normal UnicodeBlock value.
1314          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BLOCK).
1315          *
1316          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
1317          */
1318         @Deprecated
1319         public static final int COUNT = 328;
1320 
1321         // blocks objects ---------------------------------------------------
1322 
1323         /**
1324          * Array of UnicodeBlocks, for easy access in getInstance(int)
1325          */
1326         private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT];
1327 
1328         /**
1329          * @stable ICU 2.6
1330          */
1331         public static final UnicodeBlock NO_BLOCK
1332         = new UnicodeBlock("NO_BLOCK", 0);
1333 
1334         /**
1335          * @stable ICU 2.4
1336          */
1337         public static final UnicodeBlock BASIC_LATIN
1338         = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID);
1339         /**
1340          * @stable ICU 2.4
1341          */
1342         public static final UnicodeBlock LATIN_1_SUPPLEMENT
1343         = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID);
1344         /**
1345          * @stable ICU 2.4
1346          */
1347         public static final UnicodeBlock LATIN_EXTENDED_A
1348         = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID);
1349         /**
1350          * @stable ICU 2.4
1351          */
1352         public static final UnicodeBlock LATIN_EXTENDED_B
1353         = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID);
1354         /**
1355          * @stable ICU 2.4
1356          */
1357         public static final UnicodeBlock IPA_EXTENSIONS
1358         = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID);
1359         /**
1360          * @stable ICU 2.4
1361          */
1362         public static final UnicodeBlock SPACING_MODIFIER_LETTERS
1363         = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID);
1364         /**
1365          * @stable ICU 2.4
1366          */
1367         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
1368         = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID);
1369         /**
1370          * Unicode 3.2 renames this block to "Greek and Coptic".
1371          * @stable ICU 2.4
1372          */
1373         public static final UnicodeBlock GREEK
1374         = new UnicodeBlock("GREEK", GREEK_ID);
1375         /**
1376          * @stable ICU 2.4
1377          */
1378         public static final UnicodeBlock CYRILLIC
1379         = new UnicodeBlock("CYRILLIC", CYRILLIC_ID);
1380         /**
1381          * @stable ICU 2.4
1382          */
1383         public static final UnicodeBlock ARMENIAN
1384         = new UnicodeBlock("ARMENIAN", ARMENIAN_ID);
1385         /**
1386          * @stable ICU 2.4
1387          */
1388         public static final UnicodeBlock HEBREW
1389         = new UnicodeBlock("HEBREW", HEBREW_ID);
1390         /**
1391          * @stable ICU 2.4
1392          */
1393         public static final UnicodeBlock ARABIC
1394         = new UnicodeBlock("ARABIC", ARABIC_ID);
1395         /**
1396          * @stable ICU 2.4
1397          */
1398         public static final UnicodeBlock SYRIAC
1399         = new UnicodeBlock("SYRIAC", SYRIAC_ID);
1400         /**
1401          * @stable ICU 2.4
1402          */
1403         public static final UnicodeBlock THAANA
1404         = new UnicodeBlock("THAANA", THAANA_ID);
1405         /**
1406          * @stable ICU 2.4
1407          */
1408         public static final UnicodeBlock DEVANAGARI
1409         = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID);
1410         /**
1411          * @stable ICU 2.4
1412          */
1413         public static final UnicodeBlock BENGALI
1414         = new UnicodeBlock("BENGALI", BENGALI_ID);
1415         /**
1416          * @stable ICU 2.4
1417          */
1418         public static final UnicodeBlock GURMUKHI
1419         = new UnicodeBlock("GURMUKHI", GURMUKHI_ID);
1420         /**
1421          * @stable ICU 2.4
1422          */
1423         public static final UnicodeBlock GUJARATI
1424         = new UnicodeBlock("GUJARATI", GUJARATI_ID);
1425         /**
1426          * @stable ICU 2.4
1427          */
1428         public static final UnicodeBlock ORIYA
1429         = new UnicodeBlock("ORIYA", ORIYA_ID);
1430         /**
1431          * @stable ICU 2.4
1432          */
1433         public static final UnicodeBlock TAMIL
1434         = new UnicodeBlock("TAMIL", TAMIL_ID);
1435         /**
1436          * @stable ICU 2.4
1437          */
1438         public static final UnicodeBlock TELUGU
1439         = new UnicodeBlock("TELUGU", TELUGU_ID);
1440         /**
1441          * @stable ICU 2.4
1442          */
1443         public static final UnicodeBlock KANNADA
1444         = new UnicodeBlock("KANNADA", KANNADA_ID);
1445         /**
1446          * @stable ICU 2.4
1447          */
1448         public static final UnicodeBlock MALAYALAM
1449         = new UnicodeBlock("MALAYALAM", MALAYALAM_ID);
1450         /**
1451          * @stable ICU 2.4
1452          */
1453         public static final UnicodeBlock SINHALA
1454         = new UnicodeBlock("SINHALA", SINHALA_ID);
1455         /**
1456          * @stable ICU 2.4
1457          */
1458         public static final UnicodeBlock THAI
1459         = new UnicodeBlock("THAI", THAI_ID);
1460         /**
1461          * @stable ICU 2.4
1462          */
1463         public static final UnicodeBlock LAO
1464         = new UnicodeBlock("LAO", LAO_ID);
1465         /**
1466          * @stable ICU 2.4
1467          */
1468         public static final UnicodeBlock TIBETAN
1469         = new UnicodeBlock("TIBETAN", TIBETAN_ID);
1470         /**
1471          * @stable ICU 2.4
1472          */
1473         public static final UnicodeBlock MYANMAR
1474         = new UnicodeBlock("MYANMAR", MYANMAR_ID);
1475         /**
1476          * @stable ICU 2.4
1477          */
1478         public static final UnicodeBlock GEORGIAN
1479         = new UnicodeBlock("GEORGIAN", GEORGIAN_ID);
1480         /**
1481          * @stable ICU 2.4
1482          */
1483         public static final UnicodeBlock HANGUL_JAMO
1484         = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID);
1485         /**
1486          * @stable ICU 2.4
1487          */
1488         public static final UnicodeBlock ETHIOPIC
1489         = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID);
1490         /**
1491          * @stable ICU 2.4
1492          */
1493         public static final UnicodeBlock CHEROKEE
1494         = new UnicodeBlock("CHEROKEE", CHEROKEE_ID);
1495         /**
1496          * @stable ICU 2.4
1497          */
1498         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
1499         = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1500                 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID);
1501         /**
1502          * @stable ICU 2.4
1503          */
1504         public static final UnicodeBlock OGHAM
1505         = new UnicodeBlock("OGHAM", OGHAM_ID);
1506         /**
1507          * @stable ICU 2.4
1508          */
1509         public static final UnicodeBlock RUNIC
1510         = new UnicodeBlock("RUNIC", RUNIC_ID);
1511         /**
1512          * @stable ICU 2.4
1513          */
1514         public static final UnicodeBlock KHMER
1515         = new UnicodeBlock("KHMER", KHMER_ID);
1516         /**
1517          * @stable ICU 2.4
1518          */
1519         public static final UnicodeBlock MONGOLIAN
1520         = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID);
1521         /**
1522          * @stable ICU 2.4
1523          */
1524         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
1525         = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID);
1526         /**
1527          * @stable ICU 2.4
1528          */
1529         public static final UnicodeBlock GREEK_EXTENDED
1530         = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID);
1531         /**
1532          * @stable ICU 2.4
1533          */
1534         public static final UnicodeBlock GENERAL_PUNCTUATION
1535         = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID);
1536         /**
1537          * @stable ICU 2.4
1538          */
1539         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
1540         = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID);
1541         /**
1542          * @stable ICU 2.4
1543          */
1544         public static final UnicodeBlock CURRENCY_SYMBOLS
1545         = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID);
1546         /**
1547          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
1548          * Symbols".
1549          * @stable ICU 2.4
1550          */
1551         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
1552         = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID);
1553         /**
1554          * @stable ICU 2.4
1555          */
1556         public static final UnicodeBlock LETTERLIKE_SYMBOLS
1557         = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID);
1558         /**
1559          * @stable ICU 2.4
1560          */
1561         public static final UnicodeBlock NUMBER_FORMS
1562         = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID);
1563         /**
1564          * @stable ICU 2.4
1565          */
1566         public static final UnicodeBlock ARROWS
1567         = new UnicodeBlock("ARROWS", ARROWS_ID);
1568         /**
1569          * @stable ICU 2.4
1570          */
1571         public static final UnicodeBlock MATHEMATICAL_OPERATORS
1572         = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID);
1573         /**
1574          * @stable ICU 2.4
1575          */
1576         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
1577         = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID);
1578         /**
1579          * @stable ICU 2.4
1580          */
1581         public static final UnicodeBlock CONTROL_PICTURES
1582         = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID);
1583         /**
1584          * @stable ICU 2.4
1585          */
1586         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
1587         = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID);
1588         /**
1589          * @stable ICU 2.4
1590          */
1591         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
1592         = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID);
1593         /**
1594          * @stable ICU 2.4
1595          */
1596         public static final UnicodeBlock BOX_DRAWING
1597         = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID);
1598         /**
1599          * @stable ICU 2.4
1600          */
1601         public static final UnicodeBlock BLOCK_ELEMENTS
1602         = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID);
1603         /**
1604          * @stable ICU 2.4
1605          */
1606         public static final UnicodeBlock GEOMETRIC_SHAPES
1607         = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID);
1608         /**
1609          * @stable ICU 2.4
1610          */
1611         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
1612         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID);
1613         /**
1614          * @stable ICU 2.4
1615          */
1616         public static final UnicodeBlock DINGBATS
1617         = new UnicodeBlock("DINGBATS", DINGBATS_ID);
1618         /**
1619          * @stable ICU 2.4
1620          */
1621         public static final UnicodeBlock BRAILLE_PATTERNS
1622         = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID);
1623         /**
1624          * @stable ICU 2.4
1625          */
1626         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
1627         = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID);
1628         /**
1629          * @stable ICU 2.4
1630          */
1631         public static final UnicodeBlock KANGXI_RADICALS
1632         = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID);
1633         /**
1634          * @stable ICU 2.4
1635          */
1636         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1637         = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1638                 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID);
1639         /**
1640          * @stable ICU 2.4
1641          */
1642         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
1643         = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID);
1644         /**
1645          * @stable ICU 2.4
1646          */
1647         public static final UnicodeBlock HIRAGANA
1648         = new UnicodeBlock("HIRAGANA", HIRAGANA_ID);
1649         /**
1650          * @stable ICU 2.4
1651          */
1652         public static final UnicodeBlock KATAKANA
1653         = new UnicodeBlock("KATAKANA", KATAKANA_ID);
1654         /**
1655          * @stable ICU 2.4
1656          */
1657         public static final UnicodeBlock BOPOMOFO
1658         = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID);
1659         /**
1660          * @stable ICU 2.4
1661          */
1662         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
1663         = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID);
1664         /**
1665          * @stable ICU 2.4
1666          */
1667         public static final UnicodeBlock KANBUN
1668         = new UnicodeBlock("KANBUN", KANBUN_ID);
1669         /**
1670          * @stable ICU 2.4
1671          */
1672         public static final UnicodeBlock BOPOMOFO_EXTENDED
1673         = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID);
1674         /**
1675          * @stable ICU 2.4
1676          */
1677         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
1678         = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1679                 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID);
1680         /**
1681          * @stable ICU 2.4
1682          */
1683         public static final UnicodeBlock CJK_COMPATIBILITY
1684         = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID);
1685         /**
1686          * @stable ICU 2.4
1687          */
1688         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1689         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1690                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID);
1691         /**
1692          * @stable ICU 2.4
1693          */
1694         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
1695         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID);
1696         /**
1697          * @stable ICU 2.4
1698          */
1699         public static final UnicodeBlock YI_SYLLABLES
1700         = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID);
1701         /**
1702          * @stable ICU 2.4
1703          */
1704         public static final UnicodeBlock YI_RADICALS
1705         = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID);
1706         /**
1707          * @stable ICU 2.4
1708          */
1709         public static final UnicodeBlock HANGUL_SYLLABLES
1710         = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID);
1711         /**
1712          * @stable ICU 2.4
1713          */
1714         public static final UnicodeBlock HIGH_SURROGATES
1715         = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID);
1716         /**
1717          * @stable ICU 2.4
1718          */
1719         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
1720         = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID);
1721         /**
1722          * @stable ICU 2.4
1723          */
1724         public static final UnicodeBlock LOW_SURROGATES
1725         = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID);
1726         /**
1727          * Same as public static final int PRIVATE_USE.
1728          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1729          * and multiple code point ranges had this block.
1730          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1731          * and adds separate blocks for the supplementary PUAs.
1732          * @stable ICU 2.4
1733          */
1734         public static final UnicodeBlock PRIVATE_USE_AREA
1735         = new UnicodeBlock("PRIVATE_USE_AREA",  78);
1736         /**
1737          * Same as public static final int PRIVATE_USE_AREA.
1738          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1739          * and multiple code point ranges had this block.
1740          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1741          * and adds separate blocks for the supplementary PUAs.
1742          * @stable ICU 2.4
1743          */
1744         public static final UnicodeBlock PRIVATE_USE
1745         = PRIVATE_USE_AREA;
1746         /**
1747          * @stable ICU 2.4
1748          */
1749         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
1750         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID);
1751         /**
1752          * @stable ICU 2.4
1753          */
1754         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
1755         = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID);
1756         /**
1757          * @stable ICU 2.4
1758          */
1759         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
1760         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID);
1761         /**
1762          * @stable ICU 2.4
1763          */
1764         public static final UnicodeBlock COMBINING_HALF_MARKS
1765         = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID);
1766         /**
1767          * @stable ICU 2.4
1768          */
1769         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
1770         = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID);
1771         /**
1772          * @stable ICU 2.4
1773          */
1774         public static final UnicodeBlock SMALL_FORM_VARIANTS
1775         = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID);
1776         /**
1777          * @stable ICU 2.4
1778          */
1779         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
1780         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID);
1781         /**
1782          * @stable ICU 2.4
1783          */
1784         public static final UnicodeBlock SPECIALS
1785         = new UnicodeBlock("SPECIALS", SPECIALS_ID);
1786         /**
1787          * @stable ICU 2.4
1788          */
1789         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
1790         = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID);
1791         /**
1792          * @stable ICU 2.4
1793          */
1794         public static final UnicodeBlock OLD_ITALIC
1795         = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID);
1796         /**
1797          * @stable ICU 2.4
1798          */
1799         public static final UnicodeBlock GOTHIC
1800         = new UnicodeBlock("GOTHIC", GOTHIC_ID);
1801         /**
1802          * @stable ICU 2.4
1803          */
1804         public static final UnicodeBlock DESERET
1805         = new UnicodeBlock("DESERET", DESERET_ID);
1806         /**
1807          * @stable ICU 2.4
1808          */
1809         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
1810         = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID);
1811         /**
1812          * @stable ICU 2.4
1813          */
1814         public static final UnicodeBlock MUSICAL_SYMBOLS
1815         = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID);
1816         /**
1817          * @stable ICU 2.4
1818          */
1819         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
1820         = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1821                 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID);
1822         /**
1823          * @stable ICU 2.4
1824          */
1825         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1826         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1827                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID);
1828         /**
1829          * @stable ICU 2.4
1830          */
1831         public static final UnicodeBlock
1832         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
1833         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1834                 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID);
1835         /**
1836          * @stable ICU 2.4
1837          */
1838         public static final UnicodeBlock TAGS
1839         = new UnicodeBlock("TAGS", TAGS_ID);
1840 
1841         // New blocks in Unicode 3.2
1842 
1843         /**
1844          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1845          * @stable ICU 2.4
1846          */
1847         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
1848         = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID);
1849         /**
1850          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1851          * @stable ICU 3.0
1852          */
1853         public static final UnicodeBlock CYRILLIC_SUPPLEMENT
1854         = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID);
1855         /**
1856          * @stable ICU 2.4
1857          */
1858         public static final UnicodeBlock TAGALOG
1859         = new UnicodeBlock("TAGALOG", TAGALOG_ID);
1860         /**
1861          * @stable ICU 2.4
1862          */
1863         public static final UnicodeBlock HANUNOO
1864         = new UnicodeBlock("HANUNOO", HANUNOO_ID);
1865         /**
1866          * @stable ICU 2.4
1867          */
1868         public static final UnicodeBlock BUHID
1869         = new UnicodeBlock("BUHID", BUHID_ID);
1870         /**
1871          * @stable ICU 2.4
1872          */
1873         public static final UnicodeBlock TAGBANWA
1874         = new UnicodeBlock("TAGBANWA", TAGBANWA_ID);
1875         /**
1876          * @stable ICU 2.4
1877          */
1878         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
1879         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1880                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID);
1881         /**
1882          * @stable ICU 2.4
1883          */
1884         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
1885         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID);
1886         /**
1887          * @stable ICU 2.4
1888          */
1889         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
1890         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID);
1891         /**
1892          * @stable ICU 2.4
1893          */
1894         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
1895         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1896                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID);
1897         /**
1898          * @stable ICU 2.4
1899          */
1900         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
1901         = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1902                 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID);
1903         /**
1904          * @stable ICU 2.4
1905          */
1906         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
1907         = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID);
1908         /**
1909          * @stable ICU 2.4
1910          */
1911         public static final UnicodeBlock VARIATION_SELECTORS
1912         = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID);
1913         /**
1914          * @stable ICU 2.4
1915          */
1916         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
1917         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1918                 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID);
1919         /**
1920          * @stable ICU 2.4
1921          */
1922         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
1923         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1924                 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID);
1925 
1926         /**
1927          * @stable ICU 2.6
1928          */
1929         public static final UnicodeBlock LIMBU
1930         = new UnicodeBlock("LIMBU", LIMBU_ID);
1931         /**
1932          * @stable ICU 2.6
1933          */
1934         public static final UnicodeBlock TAI_LE
1935         = new UnicodeBlock("TAI_LE", TAI_LE_ID);
1936         /**
1937          * @stable ICU 2.6
1938          */
1939         public static final UnicodeBlock KHMER_SYMBOLS
1940         = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID);
1941 
1942         /**
1943          * @stable ICU 2.6
1944          */
1945         public static final UnicodeBlock PHONETIC_EXTENSIONS
1946         = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID);
1947 
1948         /**
1949          * @stable ICU 2.6
1950          */
1951         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
1952         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1953                 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID);
1954         /**
1955          * @stable ICU 2.6
1956          */
1957         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
1958         = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID);
1959         /**
1960          * @stable ICU 2.6
1961          */
1962         public static final UnicodeBlock LINEAR_B_SYLLABARY
1963         = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID);
1964         /**
1965          * @stable ICU 2.6
1966          */
1967         public static final UnicodeBlock LINEAR_B_IDEOGRAMS
1968         = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID);
1969         /**
1970          * @stable ICU 2.6
1971          */
1972         public static final UnicodeBlock AEGEAN_NUMBERS
1973         = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID);
1974         /**
1975          * @stable ICU 2.6
1976          */
1977         public static final UnicodeBlock UGARITIC
1978         = new UnicodeBlock("UGARITIC", UGARITIC_ID);
1979         /**
1980          * @stable ICU 2.6
1981          */
1982         public static final UnicodeBlock SHAVIAN
1983         = new UnicodeBlock("SHAVIAN", SHAVIAN_ID);
1984         /**
1985          * @stable ICU 2.6
1986          */
1987         public static final UnicodeBlock OSMANYA
1988         = new UnicodeBlock("OSMANYA", OSMANYA_ID);
1989         /**
1990          * @stable ICU 2.6
1991          */
1992         public static final UnicodeBlock CYPRIOT_SYLLABARY
1993         = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID);
1994         /**
1995          * @stable ICU 2.6
1996          */
1997         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
1998         = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID);
1999 
2000         /**
2001          * @stable ICU 2.6
2002          */
2003         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
2004         = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID);
2005 
2006         /* New blocks in Unicode 4.1 */
2007 
2008         /**
2009          * @stable ICU 3.4
2010          */
2011         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2012                 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2013                         ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/
2014 
2015         /**
2016          * @stable ICU 3.4
2017          */
2018         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2019                 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/
2020 
2021         /**
2022          * @stable ICU 3.4
2023          */
2024         public static final UnicodeBlock ARABIC_SUPPLEMENT =
2025                 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/
2026 
2027         /**
2028          * @stable ICU 3.4
2029          */
2030         public static final UnicodeBlock BUGINESE =
2031                 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/
2032 
2033         /**
2034          * @stable ICU 3.4
2035          */
2036         public static final UnicodeBlock CJK_STROKES =
2037                 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/
2038 
2039         /**
2040          * @stable ICU 3.4
2041          */
2042         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
2043                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
2044                         COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/
2045 
2046         /**
2047          * @stable ICU 3.4
2048          */
2049         public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/
2050 
2051         /**
2052          * @stable ICU 3.4
2053          */
2054         public static final UnicodeBlock ETHIOPIC_EXTENDED =
2055                 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/
2056 
2057         /**
2058          * @stable ICU 3.4
2059          */
2060         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
2061                 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/
2062 
2063         /**
2064          * @stable ICU 3.4
2065          */
2066         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
2067                 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/
2068 
2069         /**
2070          * @stable ICU 3.4
2071          */
2072         public static final UnicodeBlock GLAGOLITIC =
2073                 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/
2074 
2075         /**
2076          * @stable ICU 3.4
2077          */
2078         public static final UnicodeBlock KHAROSHTHI =
2079                 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/
2080 
2081         /**
2082          * @stable ICU 3.4
2083          */
2084         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2085                 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/
2086 
2087         /**
2088          * @stable ICU 3.4
2089          */
2090         public static final UnicodeBlock NEW_TAI_LUE =
2091                 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/
2092 
2093         /**
2094          * @stable ICU 3.4
2095          */
2096         public static final UnicodeBlock OLD_PERSIAN =
2097                 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/
2098 
2099         /**
2100          * @stable ICU 3.4
2101          */
2102         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
2103                 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
2104                         PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/
2105 
2106         /**
2107          * @stable ICU 3.4
2108          */
2109         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
2110                 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/
2111 
2112         /**
2113          * @stable ICU 3.4
2114          */
2115         public static final UnicodeBlock SYLOTI_NAGRI =
2116                 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/
2117 
2118         /**
2119          * @stable ICU 3.4
2120          */
2121         public static final UnicodeBlock TIFINAGH =
2122                 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/
2123 
2124         /**
2125          * @stable ICU 3.4
2126          */
2127         public static final UnicodeBlock VERTICAL_FORMS =
2128                 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/
2129 
2130         /**
2131          * @stable ICU 3.6
2132          */
2133         public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/
2134         /**
2135          * @stable ICU 3.6
2136          */
2137         public static final UnicodeBlock BALINESE =
2138                 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/
2139         /**
2140          * @stable ICU 3.6
2141          */
2142         public static final UnicodeBlock LATIN_EXTENDED_C =
2143                 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/
2144         /**
2145          * @stable ICU 3.6
2146          */
2147         public static final UnicodeBlock LATIN_EXTENDED_D =
2148                 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/
2149         /**
2150          * @stable ICU 3.6
2151          */
2152         public static final UnicodeBlock PHAGS_PA =
2153                 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/
2154         /**
2155          * @stable ICU 3.6
2156          */
2157         public static final UnicodeBlock PHOENICIAN =
2158                 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/
2159         /**
2160          * @stable ICU 3.6
2161          */
2162         public static final UnicodeBlock CUNEIFORM =
2163                 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/
2164         /**
2165          * @stable ICU 3.6
2166          */
2167         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2168                 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2169                         CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/
2170         /**
2171          * @stable ICU 3.6
2172          */
2173         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2174                 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/
2175 
2176         /**
2177          * @stable ICU 4.0
2178          */
2179         public static final UnicodeBlock SUNDANESE =
2180                 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */
2181 
2182         /**
2183          * @stable ICU 4.0
2184          */
2185         public static final UnicodeBlock LEPCHA =
2186                 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */
2187 
2188         /**
2189          * @stable ICU 4.0
2190          */
2191         public static final UnicodeBlock OL_CHIKI =
2192                 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */
2193 
2194         /**
2195          * @stable ICU 4.0
2196          */
2197         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
2198                 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */
2199 
2200         /**
2201          * @stable ICU 4.0
2202          */
2203         public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */
2204 
2205         /**
2206          * @stable ICU 4.0
2207          */
2208         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2209                 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */
2210 
2211         /**
2212          * @stable ICU 4.0
2213          */
2214         public static final UnicodeBlock SAURASHTRA =
2215                 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */
2216 
2217         /**
2218          * @stable ICU 4.0
2219          */
2220         public static final UnicodeBlock KAYAH_LI =
2221                 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */
2222 
2223         /**
2224          * @stable ICU 4.0
2225          */
2226         public static final UnicodeBlock REJANG =
2227                 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */
2228 
2229         /**
2230          * @stable ICU 4.0
2231          */
2232         public static final UnicodeBlock CHAM =
2233                 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */
2234 
2235         /**
2236          * @stable ICU 4.0
2237          */
2238         public static final UnicodeBlock ANCIENT_SYMBOLS =
2239                 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */
2240 
2241         /**
2242          * @stable ICU 4.0
2243          */
2244         public static final UnicodeBlock PHAISTOS_DISC =
2245                 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */
2246 
2247         /**
2248          * @stable ICU 4.0
2249          */
2250         public static final UnicodeBlock LYCIAN =
2251                 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */
2252 
2253         /**
2254          * @stable ICU 4.0
2255          */
2256         public static final UnicodeBlock CARIAN =
2257                 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */
2258 
2259         /**
2260          * @stable ICU 4.0
2261          */
2262         public static final UnicodeBlock LYDIAN =
2263                 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */
2264 
2265         /**
2266          * @stable ICU 4.0
2267          */
2268         public static final UnicodeBlock MAHJONG_TILES =
2269                 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */
2270 
2271         /**
2272          * @stable ICU 4.0
2273          */
2274         public static final UnicodeBlock DOMINO_TILES =
2275                 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */
2276 
2277         /* New blocks in Unicode 5.2 */
2278 
2279         /** @stable ICU 4.4 */
2280         public static final UnicodeBlock SAMARITAN =
2281                 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/
2282         /** @stable ICU 4.4 */
2283         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
2284                 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
2285                         UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/
2286         /** @stable ICU 4.4 */
2287         public static final UnicodeBlock TAI_THAM =
2288                 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/
2289         /** @stable ICU 4.4 */
2290         public static final UnicodeBlock VEDIC_EXTENSIONS =
2291                 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/
2292         /** @stable ICU 4.4 */
2293         public static final UnicodeBlock LISU =
2294                 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/
2295         /** @stable ICU 4.4 */
2296         public static final UnicodeBlock BAMUM =
2297                 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/
2298         /** @stable ICU 4.4 */
2299         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2300                 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/
2301         /** @stable ICU 4.4 */
2302         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2303                 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/
2304         /** @stable ICU 4.4 */
2305         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2306                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/
2307         /** @stable ICU 4.4 */
2308         public static final UnicodeBlock JAVANESE =
2309                 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/
2310         /** @stable ICU 4.4 */
2311         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2312                 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/
2313         /** @stable ICU 4.4 */
2314         public static final UnicodeBlock TAI_VIET =
2315                 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/
2316         /** @stable ICU 4.4 */
2317         public static final UnicodeBlock MEETEI_MAYEK =
2318                 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/
2319         /** @stable ICU 4.4 */
2320         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2321                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/
2322         /** @stable ICU 4.4 */
2323         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2324                 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/
2325         /** @stable ICU 4.4 */
2326         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2327                 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/
2328         /** @stable ICU 4.4 */
2329         public static final UnicodeBlock AVESTAN =
2330                 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/
2331         /** @stable ICU 4.4 */
2332         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2333                 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/
2334         /** @stable ICU 4.4 */
2335         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2336                 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/
2337         /** @stable ICU 4.4 */
2338         public static final UnicodeBlock OLD_TURKIC =
2339                 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/
2340         /** @stable ICU 4.4 */
2341         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2342                 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/
2343         /** @stable ICU 4.4 */
2344         public static final UnicodeBlock KAITHI =
2345                 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/
2346         /** @stable ICU 4.4 */
2347         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2348                 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/
2349         /** @stable ICU 4.4 */
2350         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2351                 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2352                         ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/
2353         /** @stable ICU 4.4 */
2354         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2355                 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2356                         ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/
2357         /** @stable ICU 4.4 */
2358         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2359                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2360                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/
2361 
2362         /* New blocks in Unicode 6.0 */
2363 
2364         /** @stable ICU 4.6 */
2365         public static final UnicodeBlock MANDAIC =
2366                 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/
2367         /** @stable ICU 4.6 */
2368         public static final UnicodeBlock BATAK =
2369                 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/
2370         /** @stable ICU 4.6 */
2371         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2372                 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/
2373         /** @stable ICU 4.6 */
2374         public static final UnicodeBlock BRAHMI =
2375                 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/
2376         /** @stable ICU 4.6 */
2377         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2378                 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/
2379         /** @stable ICU 4.6 */
2380         public static final UnicodeBlock KANA_SUPPLEMENT =
2381                 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/
2382         /** @stable ICU 4.6 */
2383         public static final UnicodeBlock PLAYING_CARDS =
2384                 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/
2385         /** @stable ICU 4.6 */
2386         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2387                 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2388                         MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/
2389         /** @stable ICU 4.6 */
2390         public static final UnicodeBlock EMOTICONS =
2391                 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/
2392         /** @stable ICU 4.6 */
2393         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2394                 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/
2395         /** @stable ICU 4.6 */
2396         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2397                 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/
2398         /** @stable ICU 4.6 */
2399         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2400                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2401                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/
2402 
2403         /* New blocks in Unicode 6.1 */
2404 
2405         /** @stable ICU 49 */
2406         public static final UnicodeBlock ARABIC_EXTENDED_A =
2407                 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/
2408         /** @stable ICU 49 */
2409         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2410                 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/
2411         /** @stable ICU 49 */
2412         public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/
2413         /** @stable ICU 49 */
2414         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2415                 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/
2416         /** @stable ICU 49 */
2417         public static final UnicodeBlock MEROITIC_CURSIVE =
2418                 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/
2419         /** @stable ICU 49 */
2420         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2421                 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/
2422         /** @stable ICU 49 */
2423         public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/
2424         /** @stable ICU 49 */
2425         public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/
2426         /** @stable ICU 49 */
2427         public static final UnicodeBlock SORA_SOMPENG =
2428                 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/
2429         /** @stable ICU 49 */
2430         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2431                 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/
2432         /** @stable ICU 49 */
2433         public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/
2434 
2435         /* New blocks in Unicode 7.0 */
2436 
2437         /** @stable ICU 54 */
2438         public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/
2439         /** @stable ICU 54 */
2440         public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2441                 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/
2442         /** @stable ICU 54 */
2443         public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2444                 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/
2445         /** @stable ICU 54 */
2446         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2447                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/
2448         /** @stable ICU 54 */
2449         public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/
2450         /** @stable ICU 54 */
2451         public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/
2452         /** @stable ICU 54 */
2453         public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2454                 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/
2455         /** @stable ICU 54 */
2456         public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/
2457         /** @stable ICU 54 */
2458         public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/
2459         /** @stable ICU 54 */
2460         public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/
2461         /** @stable ICU 54 */
2462         public static final UnicodeBlock LATIN_EXTENDED_E =
2463                 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/
2464         /** @stable ICU 54 */
2465         public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/
2466         /** @stable ICU 54 */
2467         public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/
2468         /** @stable ICU 54 */
2469         public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/
2470         /** @stable ICU 54 */
2471         public static final UnicodeBlock MENDE_KIKAKUI =
2472                 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/
2473         /** @stable ICU 54 */
2474         public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/
2475         /** @stable ICU 54 */
2476         public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/
2477         /** @stable ICU 54 */
2478         public static final UnicodeBlock MYANMAR_EXTENDED_B =
2479                 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/
2480         /** @stable ICU 54 */
2481         public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/
2482         /** @stable ICU 54 */
2483         public static final UnicodeBlock OLD_NORTH_ARABIAN =
2484                 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/
2485         /** @stable ICU 54 */
2486         public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/
2487         /** @stable ICU 54 */
2488         public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2489                 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/
2490         /** @stable ICU 54 */
2491         public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/
2492         /** @stable ICU 54 */
2493         public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/
2494         /** @stable ICU 54 */
2495         public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/
2496         /** @stable ICU 54 */
2497         public static final UnicodeBlock PSALTER_PAHLAVI =
2498                 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/
2499         /** @stable ICU 54 */
2500         public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2501                 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/
2502         /** @stable ICU 54 */
2503         public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/
2504         /** @stable ICU 54 */
2505         public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2506                 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/
2507         /** @stable ICU 54 */
2508         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2509                 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/
2510         /** @stable ICU 54 */
2511         public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/
2512         /** @stable ICU 54 */
2513         public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/
2514 
2515         /* New blocks in Unicode 8.0 */
2516 
2517         /** @stable ICU 56 */
2518         public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/
2519         /** @stable ICU 56 */
2520         public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
2521                 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/
2522         /** @stable ICU 56 */
2523         public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2524                 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/
2525         /** @stable ICU 56 */
2526         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
2527                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
2528                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/
2529         /** @stable ICU 56 */
2530         public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
2531                 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/
2532         /** @stable ICU 56 */
2533         public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/
2534         /** @stable ICU 56 */
2535         public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/
2536         /** @stable ICU 56 */
2537         public static final UnicodeBlock OLD_HUNGARIAN =
2538                 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/
2539         /** @stable ICU 56 */
2540         public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
2541                 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
2542                         SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/
2543         /** @stable ICU 56 */
2544         public static final UnicodeBlock SUTTON_SIGNWRITING =
2545                 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/
2546 
2547         /* New blocks in Unicode 9.0 */
2548 
2549         /** @stable ICU 58 */
2550         public static final UnicodeBlock ADLAM = new UnicodeBlock("ADLAM", ADLAM_ID); /*[1E900]*/
2551         /** @stable ICU 58 */
2552         public static final UnicodeBlock BHAIKSUKI = new UnicodeBlock("BHAIKSUKI", BHAIKSUKI_ID); /*[11C00]*/
2553         /** @stable ICU 58 */
2554         public static final UnicodeBlock CYRILLIC_EXTENDED_C =
2555                 new UnicodeBlock("CYRILLIC_EXTENDED_C", CYRILLIC_EXTENDED_C_ID); /*[1C80]*/
2556         /** @stable ICU 58 */
2557         public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
2558                 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", GLAGOLITIC_SUPPLEMENT_ID); /*[1E000]*/
2559         /** @stable ICU 58 */
2560         public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
2561                 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID); /*[16FE0]*/
2562         /** @stable ICU 58 */
2563         public static final UnicodeBlock MARCHEN = new UnicodeBlock("MARCHEN", MARCHEN_ID); /*[11C70]*/
2564         /** @stable ICU 58 */
2565         public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
2566                 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", MONGOLIAN_SUPPLEMENT_ID); /*[11660]*/
2567         /** @stable ICU 58 */
2568         public static final UnicodeBlock NEWA = new UnicodeBlock("NEWA", NEWA_ID); /*[11400]*/
2569         /** @stable ICU 58 */
2570         public static final UnicodeBlock OSAGE = new UnicodeBlock("OSAGE", OSAGE_ID); /*[104B0]*/
2571         /** @stable ICU 58 */
2572         public static final UnicodeBlock TANGUT = new UnicodeBlock("TANGUT", TANGUT_ID); /*[17000]*/
2573         /** @stable ICU 58 */
2574         public static final UnicodeBlock TANGUT_COMPONENTS =
2575                 new UnicodeBlock("TANGUT_COMPONENTS", TANGUT_COMPONENTS_ID); /*[18800]*/
2576 
2577         // New blocks in Unicode 10.0
2578 
2579         /** @stable ICU 60 */
2580         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F =
2581                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID); /*[2CEB0]*/
2582         /** @stable ICU 60 */
2583         public static final UnicodeBlock KANA_EXTENDED_A =
2584                 new UnicodeBlock("KANA_EXTENDED_A", KANA_EXTENDED_A_ID); /*[1B100]*/
2585         /** @stable ICU 60 */
2586         public static final UnicodeBlock MASARAM_GONDI =
2587                 new UnicodeBlock("MASARAM_GONDI", MASARAM_GONDI_ID); /*[11D00]*/
2588         /** @stable ICU 60 */
2589         public static final UnicodeBlock NUSHU = new UnicodeBlock("NUSHU", NUSHU_ID); /*[1B170]*/
2590         /** @stable ICU 60 */
2591         public static final UnicodeBlock SOYOMBO = new UnicodeBlock("SOYOMBO", SOYOMBO_ID); /*[11A50]*/
2592         /** @stable ICU 60 */
2593         public static final UnicodeBlock SYRIAC_SUPPLEMENT =
2594                 new UnicodeBlock("SYRIAC_SUPPLEMENT", SYRIAC_SUPPLEMENT_ID); /*[0860]*/
2595         /** @stable ICU 60 */
2596         public static final UnicodeBlock ZANABAZAR_SQUARE =
2597                 new UnicodeBlock("ZANABAZAR_SQUARE", ZANABAZAR_SQUARE_ID); /*[11A00]*/
2598 
2599         // New blocks in Unicode 11.0
2600 
2601         /** @stable ICU 62 */
2602         public static final UnicodeBlock CHESS_SYMBOLS =
2603                 new UnicodeBlock("CHESS_SYMBOLS", CHESS_SYMBOLS_ID); /*[1FA00]*/
2604         /** @stable ICU 62 */
2605         public static final UnicodeBlock DOGRA = new UnicodeBlock("DOGRA", DOGRA_ID); /*[11800]*/
2606         /** @stable ICU 62 */
2607         public static final UnicodeBlock GEORGIAN_EXTENDED =
2608                 new UnicodeBlock("GEORGIAN_EXTENDED", GEORGIAN_EXTENDED_ID); /*[1C90]*/
2609         /** @stable ICU 62 */
2610         public static final UnicodeBlock GUNJALA_GONDI =
2611                 new UnicodeBlock("GUNJALA_GONDI", GUNJALA_GONDI_ID); /*[11D60]*/
2612         /** @stable ICU 62 */
2613         public static final UnicodeBlock HANIFI_ROHINGYA =
2614                 new UnicodeBlock("HANIFI_ROHINGYA", HANIFI_ROHINGYA_ID); /*[10D00]*/
2615         /** @stable ICU 62 */
2616         public static final UnicodeBlock INDIC_SIYAQ_NUMBERS =
2617                 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", INDIC_SIYAQ_NUMBERS_ID); /*[1EC70]*/
2618         /** @stable ICU 62 */
2619         public static final UnicodeBlock MAKASAR = new UnicodeBlock("MAKASAR", MAKASAR_ID); /*[11EE0]*/
2620         /** @stable ICU 62 */
2621         public static final UnicodeBlock MAYAN_NUMERALS =
2622                 new UnicodeBlock("MAYAN_NUMERALS", MAYAN_NUMERALS_ID); /*[1D2E0]*/
2623         /** @stable ICU 62 */
2624         public static final UnicodeBlock MEDEFAIDRIN =
2625                 new UnicodeBlock("MEDEFAIDRIN", MEDEFAIDRIN_ID); /*[16E40]*/
2626         /** @stable ICU 62 */
2627         public static final UnicodeBlock OLD_SOGDIAN =
2628                 new UnicodeBlock("OLD_SOGDIAN", OLD_SOGDIAN_ID); /*[10F00]*/
2629         /** @stable ICU 62 */
2630         public static final UnicodeBlock SOGDIAN = new UnicodeBlock("SOGDIAN", SOGDIAN_ID); /*[10F30]*/
2631 
2632         // New blocks in Unicode 12.0
2633 
2634         /** @stable ICU 64 */
2635         public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS =
2636                 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID); /*[13430]*/
2637         /** @stable ICU 64 */
2638         public static final UnicodeBlock ELYMAIC = new UnicodeBlock("ELYMAIC", ELYMAIC_ID); /*[10FE0]*/
2639         /** @stable ICU 64 */
2640         public static final UnicodeBlock NANDINAGARI =
2641                 new UnicodeBlock("NANDINAGARI", NANDINAGARI_ID); /*[119A0]*/
2642         /** @stable ICU 64 */
2643         public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG =
2644                 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", NYIAKENG_PUACHUE_HMONG_ID); /*[1E100]*/
2645         /** @stable ICU 64 */
2646         public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS =
2647                 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", OTTOMAN_SIYAQ_NUMBERS_ID); /*[1ED00]*/
2648         /** @stable ICU 64 */
2649         public static final UnicodeBlock SMALL_KANA_EXTENSION =
2650                 new UnicodeBlock("SMALL_KANA_EXTENSION", SMALL_KANA_EXTENSION_ID); /*[1B130]*/
2651         /** @stable ICU 64 */
2652         public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A =
2653                 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID); /*[1FA70]*/
2654         /** @stable ICU 64 */
2655         public static final UnicodeBlock TAMIL_SUPPLEMENT =
2656                 new UnicodeBlock("TAMIL_SUPPLEMENT", TAMIL_SUPPLEMENT_ID); /*[11FC0]*/
2657         /** @stable ICU 64 */
2658         public static final UnicodeBlock WANCHO = new UnicodeBlock("WANCHO", WANCHO_ID); /*[1E2C0]*/
2659 
2660         // New blocks in Unicode 13.0
2661 
2662         /** @stable ICU 66 */
2663         public static final UnicodeBlock CHORASMIAN =
2664                 new UnicodeBlock("CHORASMIAN", CHORASMIAN_ID); /*[10FB0]*/
2665         /** @stable ICU 66 */
2666         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G =
2667                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G",
2668                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID); /*[30000]*/
2669         /** @stable ICU 66 */
2670         public static final UnicodeBlock DIVES_AKURU =
2671                 new UnicodeBlock("DIVES_AKURU", DIVES_AKURU_ID); /*[11900]*/
2672         /** @stable ICU 66 */
2673         public static final UnicodeBlock KHITAN_SMALL_SCRIPT =
2674                 new UnicodeBlock("KHITAN_SMALL_SCRIPT", KHITAN_SMALL_SCRIPT_ID); /*[18B00]*/
2675         /** @stable ICU 66 */
2676         public static final UnicodeBlock LISU_SUPPLEMENT =
2677                 new UnicodeBlock("LISU_SUPPLEMENT", LISU_SUPPLEMENT_ID); /*[11FB0]*/
2678         /** @stable ICU 66 */
2679         public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING =
2680                 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING", SYMBOLS_FOR_LEGACY_COMPUTING_ID); /*[1FB00]*/
2681         /** @stable ICU 66 */
2682         public static final UnicodeBlock TANGUT_SUPPLEMENT =
2683                 new UnicodeBlock("TANGUT_SUPPLEMENT", TANGUT_SUPPLEMENT_ID); /*[18D00]*/
2684         /** @stable ICU 66 */
2685         public static final UnicodeBlock YEZIDI = new UnicodeBlock("YEZIDI", YEZIDI_ID); /*[10E80]*/
2686 
2687         // New blocks in Unicode 14.0
2688 
2689         /** @stable ICU 70 */
2690         public static final UnicodeBlock ARABIC_EXTENDED_B =
2691                 new UnicodeBlock("ARABIC_EXTENDED_B", ARABIC_EXTENDED_B_ID); /*[0870]*/
2692         /** @stable ICU 70 */
2693         public static final UnicodeBlock CYPRO_MINOAN =
2694                 new UnicodeBlock("CYPRO_MINOAN", CYPRO_MINOAN_ID); /*[12F90]*/
2695         /** @stable ICU 70 */
2696         public static final UnicodeBlock ETHIOPIC_EXTENDED_B =
2697                 new UnicodeBlock("ETHIOPIC_EXTENDED_B", ETHIOPIC_EXTENDED_B_ID); /*[1E7E0]*/
2698         /** @stable ICU 70 */
2699         public static final UnicodeBlock KANA_EXTENDED_B =
2700                 new UnicodeBlock("KANA_EXTENDED_B", KANA_EXTENDED_B_ID); /*[1AFF0]*/
2701         /** @stable ICU 70 */
2702         public static final UnicodeBlock LATIN_EXTENDED_F =
2703                 new UnicodeBlock("LATIN_EXTENDED_F", LATIN_EXTENDED_F_ID); /*[10780]*/
2704         /** @stable ICU 70 */
2705         public static final UnicodeBlock LATIN_EXTENDED_G =
2706                 new UnicodeBlock("LATIN_EXTENDED_G", LATIN_EXTENDED_G_ID); /*[1DF00]*/
2707         /** @stable ICU 70 */
2708         public static final UnicodeBlock OLD_UYGHUR =
2709                 new UnicodeBlock("OLD_UYGHUR", OLD_UYGHUR_ID); /*[10F70]*/
2710         /** @stable ICU 70 */
2711         public static final UnicodeBlock TANGSA = new UnicodeBlock("TANGSA", TANGSA_ID); /*[16A70]*/
2712         /** @stable ICU 70 */
2713         public static final UnicodeBlock TOTO = new UnicodeBlock("TOTO", TOTO_ID); /*[1E290]*/
2714         /** @stable ICU 70 */
2715         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A =
2716                 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A",
2717                         UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A_ID); /*[11AB0]*/
2718         /** @stable ICU 70 */
2719         public static final UnicodeBlock VITHKUQI =
2720                 new UnicodeBlock("VITHKUQI", VITHKUQI_ID); /*[10570]*/
2721         /** @stable ICU 70 */
2722         public static final UnicodeBlock ZNAMENNY_MUSICAL_NOTATION =
2723                 new UnicodeBlock("ZNAMENNY_MUSICAL_NOTATION",
2724                         ZNAMENNY_MUSICAL_NOTATION_ID); /*[1CF00]*/
2725 
2726         // New blocks in Unicode 15.0
2727 
2728         /** @stable ICU 72 */
2729         public static final UnicodeBlock ARABIC_EXTENDED_C =
2730                 new UnicodeBlock("ARABIC_EXTENDED_C", ARABIC_EXTENDED_C_ID); /*[10EC0]*/
2731         /** @stable ICU 72 */
2732         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H =
2733                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H",
2734                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H_ID); /*[31350]*/
2735         /** @stable ICU 72 */
2736         public static final UnicodeBlock CYRILLIC_EXTENDED_D =
2737                 new UnicodeBlock("CYRILLIC_EXTENDED_D", CYRILLIC_EXTENDED_D_ID); /*[1E030]*/
2738         /** @stable ICU 72 */
2739         public static final UnicodeBlock DEVANAGARI_EXTENDED_A =
2740                 new UnicodeBlock("DEVANAGARI_EXTENDED_A", DEVANAGARI_EXTENDED_A_ID); /*[11B00]*/
2741         /** @stable ICU 72 */
2742         public static final UnicodeBlock KAKTOVIK_NUMERALS =
2743                 new UnicodeBlock("KAKTOVIK_NUMERALS", KAKTOVIK_NUMERALS_ID); /*[1D2C0]*/
2744         /** @stable ICU 72 */
2745         public static final UnicodeBlock KAWI = new UnicodeBlock("KAWI", KAWI_ID); /*[11F00]*/
2746         /** @stable ICU 72 */
2747         public static final UnicodeBlock NAG_MUNDARI =
2748                 new UnicodeBlock("NAG_MUNDARI", NAG_MUNDARI_ID); /*[1E4D0]*/
2749 
2750         /**
2751          * @stable ICU 2.4
2752          */
2753         public static final UnicodeBlock INVALID_CODE
2754         = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID);
2755 
2756         static {
2757             for (int blockId = 0; blockId < COUNT; ++blockId) {
2758                 if (BLOCKS_[blockId] == null) {
2759                     throw new java.lang.IllegalStateException(
2760                             "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized");
2761                 }
2762             }
2763         }
2764 
2765         // public methods --------------------------------------------------
2766 
2767         /**
2768          * {@icu} Returns the only instance of the UnicodeBlock with the argument ID.
2769          * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned.
2770          * @param id UnicodeBlock ID
2771          * @return the only instance of the UnicodeBlock with the argument ID
2772          *         if it exists, otherwise a INVALID_CODE UnicodeBlock will be
2773          *         returned.
2774          * @stable ICU 2.4
2775          */
getInstance(int id)2776         public static UnicodeBlock getInstance(int id)
2777         {
2778             if (id >= 0 && id < BLOCKS_.length) {
2779                 return BLOCKS_[id];
2780             }
2781             return INVALID_CODE;
2782         }
2783 
2784         /**
2785          * Returns the Unicode allocation block that contains the code point,
2786          * or null if the code point is not a member of a defined block.
2787          * @param ch code point to be tested
2788          * @return the Unicode allocation block that contains the code point
2789          * @stable ICU 2.4
2790          */
of(int ch)2791         public static UnicodeBlock of(int ch)
2792         {
2793             if (ch > MAX_VALUE) {
2794                 return INVALID_CODE;
2795             }
2796 
2797             return UnicodeBlock.getInstance(
2798                     UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK));
2799         }
2800 
2801         /**
2802          * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method.
2803          * Returns the Unicode block with the given name. {@icunote} Unlike
2804          * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches
2805          * against the official UCD name and the Java block name
2806          * (ignoring case).
2807          * @param blockName the name of the block to match
2808          * @return the UnicodeBlock with that name
2809          * @throws IllegalArgumentException if the blockName could not be matched
2810          * @stable ICU 3.0
2811          */
forName(String blockName)2812         public static final UnicodeBlock forName(String blockName) {
2813             Map<String, UnicodeBlock> m = null;
2814             if (mref != null) {
2815                 m = mref.get();
2816             }
2817             if (m == null) {
2818                 m = new HashMap<>(BLOCKS_.length);
2819                 for (int i = 0; i < BLOCKS_.length; ++i) {
2820                     UnicodeBlock b = BLOCKS_[i];
2821                     String name = trimBlockName(
2822                             getPropertyValueName(UProperty.BLOCK, b.getID(),
2823                                     UProperty.NameChoice.LONG));
2824                     m.put(name, b);
2825                 }
2826                 mref = new SoftReference<>(m);
2827             }
2828             UnicodeBlock b = m.get(trimBlockName(blockName));
2829             if (b == null) {
2830                 throw new IllegalArgumentException();
2831             }
2832             return b;
2833         }
2834         private static SoftReference<Map<String, UnicodeBlock>> mref;
2835 
trimBlockName(String name)2836         private static String trimBlockName(String name) {
2837             String upper = name.toUpperCase(Locale.ENGLISH);
2838             StringBuilder result = new StringBuilder(upper.length());
2839             for (int i = 0; i < upper.length(); i++) {
2840                 char c = upper.charAt(i);
2841                 if (c != ' ' && c != '_' && c != '-') {
2842                     result.append(c);
2843                 }
2844             }
2845             return result.toString();
2846         }
2847 
2848         /**
2849          * {icu} Returns the type ID of this Unicode block
2850          * @return integer type ID of this Unicode block
2851          * @stable ICU 2.4
2852          */
getID()2853         public int getID()
2854         {
2855             return m_id_;
2856         }
2857 
2858         // private data members ---------------------------------------------
2859 
2860         /**
2861          * Identification code for this UnicodeBlock
2862          */
2863         private int m_id_;
2864 
2865         // private constructor ----------------------------------------------
2866 
2867         /**
2868          * UnicodeBlock constructor
2869          * @param name name of this UnicodeBlock
2870          * @param id unique id of this UnicodeBlock
2871          * @exception NullPointerException if name is <code>null</code>
2872          */
UnicodeBlock(String name, int id)2873         private UnicodeBlock(String name, int id)
2874         {
2875             super(name);
2876             m_id_ = id;
2877             if (id >= 0) {
2878                 BLOCKS_[id] = this;
2879             }
2880         }
2881     }
2882 
2883     /**
2884      * East Asian Width constants.
2885      * @see UProperty#EAST_ASIAN_WIDTH
2886      * @see UCharacter#getIntPropertyValue
2887      * @stable ICU 2.4
2888      */
2889     public static interface EastAsianWidth
2890     {
2891         /**
2892          * @stable ICU 2.4
2893          */
2894         public static final int NEUTRAL = 0;
2895         /**
2896          * @stable ICU 2.4
2897          */
2898         public static final int AMBIGUOUS = 1;
2899         /**
2900          * @stable ICU 2.4
2901          */
2902         public static final int HALFWIDTH = 2;
2903         /**
2904          * @stable ICU 2.4
2905          */
2906         public static final int FULLWIDTH = 3;
2907         /**
2908          * @stable ICU 2.4
2909          */
2910         public static final int NARROW = 4;
2911         /**
2912          * @stable ICU 2.4
2913          */
2914         public static final int WIDE = 5;
2915         /**
2916          * One more than the highest normal EastAsianWidth value.
2917          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH).
2918          *
2919          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2920          */
2921         @Deprecated
2922         public static final int COUNT = 6;
2923     }
2924 
2925     /**
2926      * Decomposition Type constants.
2927      * @see UProperty#DECOMPOSITION_TYPE
2928      * @stable ICU 2.4
2929      */
2930     public static interface DecompositionType
2931     {
2932         /**
2933          * @stable ICU 2.4
2934          */
2935         public static final int NONE = 0;
2936         /**
2937          * @stable ICU 2.4
2938          */
2939         public static final int CANONICAL = 1;
2940         /**
2941          * @stable ICU 2.4
2942          */
2943         public static final int COMPAT = 2;
2944         /**
2945          * @stable ICU 2.4
2946          */
2947         public static final int CIRCLE = 3;
2948         /**
2949          * @stable ICU 2.4
2950          */
2951         public static final int FINAL = 4;
2952         /**
2953          * @stable ICU 2.4
2954          */
2955         public static final int FONT = 5;
2956         /**
2957          * @stable ICU 2.4
2958          */
2959         public static final int FRACTION = 6;
2960         /**
2961          * @stable ICU 2.4
2962          */
2963         public static final int INITIAL = 7;
2964         /**
2965          * @stable ICU 2.4
2966          */
2967         public static final int ISOLATED = 8;
2968         /**
2969          * @stable ICU 2.4
2970          */
2971         public static final int MEDIAL = 9;
2972         /**
2973          * @stable ICU 2.4
2974          */
2975         public static final int NARROW = 10;
2976         /**
2977          * @stable ICU 2.4
2978          */
2979         public static final int NOBREAK = 11;
2980         /**
2981          * @stable ICU 2.4
2982          */
2983         public static final int SMALL = 12;
2984         /**
2985          * @stable ICU 2.4
2986          */
2987         public static final int SQUARE = 13;
2988         /**
2989          * @stable ICU 2.4
2990          */
2991         public static final int SUB = 14;
2992         /**
2993          * @stable ICU 2.4
2994          */
2995         public static final int SUPER = 15;
2996         /**
2997          * @stable ICU 2.4
2998          */
2999         public static final int VERTICAL = 16;
3000         /**
3001          * @stable ICU 2.4
3002          */
3003         public static final int WIDE = 17;
3004         /**
3005          * One more than the highest normal DecompositionType value.
3006          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE).
3007          *
3008          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3009          */
3010         @Deprecated
3011         public static final int COUNT = 18;
3012     }
3013 
3014     /**
3015      * Joining Type constants.
3016      * @see UProperty#JOINING_TYPE
3017      * @stable ICU 2.4
3018      */
3019     public static interface JoiningType
3020     {
3021         /**
3022          * @stable ICU 2.4
3023          */
3024         public static final int NON_JOINING = 0;
3025         /**
3026          * @stable ICU 2.4
3027          */
3028         public static final int JOIN_CAUSING = 1;
3029         /**
3030          * @stable ICU 2.4
3031          */
3032         public static final int DUAL_JOINING = 2;
3033         /**
3034          * @stable ICU 2.4
3035          */
3036         public static final int LEFT_JOINING = 3;
3037         /**
3038          * @stable ICU 2.4
3039          */
3040         public static final int RIGHT_JOINING = 4;
3041         /**
3042          * @stable ICU 2.4
3043          */
3044         public static final int TRANSPARENT = 5;
3045         /**
3046          * One more than the highest normal JoiningType value.
3047          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE).
3048          *
3049          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3050          */
3051         @Deprecated
3052         public static final int COUNT = 6;
3053     }
3054 
3055     /**
3056      * Joining Group constants.
3057      * @see UProperty#JOINING_GROUP
3058      * @stable ICU 2.4
3059      */
3060     public static interface JoiningGroup
3061     {
3062         /**
3063          * @stable ICU 2.4
3064          */
3065         public static final int NO_JOINING_GROUP = 0;
3066         /**
3067          * @stable ICU 2.4
3068          */
3069         public static final int AIN = 1;
3070         /**
3071          * @stable ICU 2.4
3072          */
3073         public static final int ALAPH = 2;
3074         /**
3075          * @stable ICU 2.4
3076          */
3077         public static final int ALEF = 3;
3078         /**
3079          * @stable ICU 2.4
3080          */
3081         public static final int BEH = 4;
3082         /**
3083          * @stable ICU 2.4
3084          */
3085         public static final int BETH = 5;
3086         /**
3087          * @stable ICU 2.4
3088          */
3089         public static final int DAL = 6;
3090         /**
3091          * @stable ICU 2.4
3092          */
3093         public static final int DALATH_RISH = 7;
3094         /**
3095          * @stable ICU 2.4
3096          */
3097         public static final int E = 8;
3098         /**
3099          * @stable ICU 2.4
3100          */
3101         public static final int FEH = 9;
3102         /**
3103          * @stable ICU 2.4
3104          */
3105         public static final int FINAL_SEMKATH = 10;
3106         /**
3107          * @stable ICU 2.4
3108          */
3109         public static final int GAF = 11;
3110         /**
3111          * @stable ICU 2.4
3112          */
3113         public static final int GAMAL = 12;
3114         /**
3115          * @stable ICU 2.4
3116          */
3117         public static final int HAH = 13;
3118         /** @stable ICU 4.6 */
3119         public static final int TEH_MARBUTA_GOAL = 14;
3120         /**
3121          * @stable ICU 2.4
3122          */
3123         public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL;
3124         /**
3125          * @stable ICU 2.4
3126          */
3127         public static final int HE = 15;
3128         /**
3129          * @stable ICU 2.4
3130          */
3131         public static final int HEH = 16;
3132         /**
3133          * @stable ICU 2.4
3134          */
3135         public static final int HEH_GOAL = 17;
3136         /**
3137          * @stable ICU 2.4
3138          */
3139         public static final int HETH = 18;
3140         /**
3141          * @stable ICU 2.4
3142          */
3143         public static final int KAF = 19;
3144         /**
3145          * @stable ICU 2.4
3146          */
3147         public static final int KAPH = 20;
3148         /**
3149          * @stable ICU 2.4
3150          */
3151         public static final int KNOTTED_HEH = 21;
3152         /**
3153          * @stable ICU 2.4
3154          */
3155         public static final int LAM = 22;
3156         /**
3157          * @stable ICU 2.4
3158          */
3159         public static final int LAMADH = 23;
3160         /**
3161          * @stable ICU 2.4
3162          */
3163         public static final int MEEM = 24;
3164         /**
3165          * @stable ICU 2.4
3166          */
3167         public static final int MIM = 25;
3168         /**
3169          * @stable ICU 2.4
3170          */
3171         public static final int NOON = 26;
3172         /**
3173          * @stable ICU 2.4
3174          */
3175         public static final int NUN = 27;
3176         /**
3177          * @stable ICU 2.4
3178          */
3179         public static final int PE = 28;
3180         /**
3181          * @stable ICU 2.4
3182          */
3183         public static final int QAF = 29;
3184         /**
3185          * @stable ICU 2.4
3186          */
3187         public static final int QAPH = 30;
3188         /**
3189          * @stable ICU 2.4
3190          */
3191         public static final int REH = 31;
3192         /**
3193          * @stable ICU 2.4
3194          */
3195         public static final int REVERSED_PE = 32;
3196         /**
3197          * @stable ICU 2.4
3198          */
3199         public static final int SAD = 33;
3200         /**
3201          * @stable ICU 2.4
3202          */
3203         public static final int SADHE = 34;
3204         /**
3205          * @stable ICU 2.4
3206          */
3207         public static final int SEEN = 35;
3208         /**
3209          * @stable ICU 2.4
3210          */
3211         public static final int SEMKATH = 36;
3212         /**
3213          * @stable ICU 2.4
3214          */
3215         public static final int SHIN = 37;
3216         /**
3217          * @stable ICU 2.4
3218          */
3219         public static final int SWASH_KAF = 38;
3220         /**
3221          * @stable ICU 2.4
3222          */
3223         public static final int SYRIAC_WAW = 39;
3224         /**
3225          * @stable ICU 2.4
3226          */
3227         public static final int TAH = 40;
3228         /**
3229          * @stable ICU 2.4
3230          */
3231         public static final int TAW = 41;
3232         /**
3233          * @stable ICU 2.4
3234          */
3235         public static final int TEH_MARBUTA = 42;
3236         /**
3237          * @stable ICU 2.4
3238          */
3239         public static final int TETH = 43;
3240         /**
3241          * @stable ICU 2.4
3242          */
3243         public static final int WAW = 44;
3244         /**
3245          * @stable ICU 2.4
3246          */
3247         public static final int YEH = 45;
3248         /**
3249          * @stable ICU 2.4
3250          */
3251         public static final int YEH_BARREE = 46;
3252         /**
3253          * @stable ICU 2.4
3254          */
3255         public static final int YEH_WITH_TAIL = 47;
3256         /**
3257          * @stable ICU 2.4
3258          */
3259         public static final int YUDH = 48;
3260         /**
3261          * @stable ICU 2.4
3262          */
3263         public static final int YUDH_HE = 49;
3264         /**
3265          * @stable ICU 2.4
3266          */
3267         public static final int ZAIN = 50;
3268         /**
3269          * @stable ICU 2.6
3270          */
3271         public static final int FE = 51;
3272         /**
3273          * @stable ICU 2.6
3274          */
3275         public static final int KHAPH = 52;
3276         /**
3277          * @stable ICU 2.6
3278          */
3279         public static final int ZHAIN = 53;
3280         /**
3281          * @stable ICU 4.0
3282          */
3283         public static final int BURUSHASKI_YEH_BARREE = 54;
3284         /** @stable ICU 4.4 */
3285         public static final int FARSI_YEH = 55;
3286         /** @stable ICU 4.4 */
3287         public static final int NYA = 56;
3288         /** @stable ICU 49 */
3289         public static final int ROHINGYA_YEH = 57;
3290 
3291         /** @stable ICU 54 */
3292         public static final int MANICHAEAN_ALEPH = 58;
3293         /** @stable ICU 54 */
3294         public static final int MANICHAEAN_AYIN = 59;
3295         /** @stable ICU 54 */
3296         public static final int MANICHAEAN_BETH = 60;
3297         /** @stable ICU 54 */
3298         public static final int MANICHAEAN_DALETH = 61;
3299         /** @stable ICU 54 */
3300         public static final int MANICHAEAN_DHAMEDH = 62;
3301         /** @stable ICU 54 */
3302         public static final int MANICHAEAN_FIVE = 63;
3303         /** @stable ICU 54 */
3304         public static final int MANICHAEAN_GIMEL = 64;
3305         /** @stable ICU 54 */
3306         public static final int MANICHAEAN_HETH = 65;
3307         /** @stable ICU 54 */
3308         public static final int MANICHAEAN_HUNDRED = 66;
3309         /** @stable ICU 54 */
3310         public static final int MANICHAEAN_KAPH = 67;
3311         /** @stable ICU 54 */
3312         public static final int MANICHAEAN_LAMEDH = 68;
3313         /** @stable ICU 54 */
3314         public static final int MANICHAEAN_MEM = 69;
3315         /** @stable ICU 54 */
3316         public static final int MANICHAEAN_NUN = 70;
3317         /** @stable ICU 54 */
3318         public static final int MANICHAEAN_ONE = 71;
3319         /** @stable ICU 54 */
3320         public static final int MANICHAEAN_PE = 72;
3321         /** @stable ICU 54 */
3322         public static final int MANICHAEAN_QOPH = 73;
3323         /** @stable ICU 54 */
3324         public static final int MANICHAEAN_RESH = 74;
3325         /** @stable ICU 54 */
3326         public static final int MANICHAEAN_SADHE = 75;
3327         /** @stable ICU 54 */
3328         public static final int MANICHAEAN_SAMEKH = 76;
3329         /** @stable ICU 54 */
3330         public static final int MANICHAEAN_TAW = 77;
3331         /** @stable ICU 54 */
3332         public static final int MANICHAEAN_TEN = 78;
3333         /** @stable ICU 54 */
3334         public static final int MANICHAEAN_TETH = 79;
3335         /** @stable ICU 54 */
3336         public static final int MANICHAEAN_THAMEDH = 80;
3337         /** @stable ICU 54 */
3338         public static final int MANICHAEAN_TWENTY = 81;
3339         /** @stable ICU 54 */
3340         public static final int MANICHAEAN_WAW = 82;
3341         /** @stable ICU 54 */
3342         public static final int MANICHAEAN_YODH = 83;
3343         /** @stable ICU 54 */
3344         public static final int MANICHAEAN_ZAYIN = 84;
3345         /** @stable ICU 54 */
3346         public static final int STRAIGHT_WAW = 85;
3347 
3348         /** @stable ICU 58 */
3349         public static final int AFRICAN_FEH = 86;
3350         /** @stable ICU 58 */
3351         public static final int AFRICAN_NOON = 87;
3352         /** @stable ICU 58 */
3353         public static final int AFRICAN_QAF = 88;
3354 
3355         /** @stable ICU 60 */
3356         public static final int MALAYALAM_BHA = 89;
3357         /** @stable ICU 60 */
3358         public static final int MALAYALAM_JA = 90;
3359         /** @stable ICU 60 */
3360         public static final int MALAYALAM_LLA = 91;
3361         /** @stable ICU 60 */
3362         public static final int MALAYALAM_LLLA = 92;
3363         /** @stable ICU 60 */
3364         public static final int MALAYALAM_NGA = 93;
3365         /** @stable ICU 60 */
3366         public static final int MALAYALAM_NNA = 94;
3367         /** @stable ICU 60 */
3368         public static final int MALAYALAM_NNNA = 95;
3369         /** @stable ICU 60 */
3370         public static final int MALAYALAM_NYA = 96;
3371         /** @stable ICU 60 */
3372         public static final int MALAYALAM_RA = 97;
3373         /** @stable ICU 60 */
3374         public static final int MALAYALAM_SSA = 98;
3375         /** @stable ICU 60 */
3376         public static final int MALAYALAM_TTA = 99;
3377 
3378         /** @stable ICU 62 */
3379         public static final int HANIFI_ROHINGYA_KINNA_YA = 100;
3380         /** @stable ICU 62 */
3381         public static final int HANIFI_ROHINGYA_PA = 101;
3382 
3383         /** @stable ICU 70 */
3384         public static final int THIN_YEH = 102;
3385         /** @stable ICU 70 */
3386         public static final int VERTICAL_TAIL = 103;
3387 
3388         /**
3389          * One more than the highest normal JoiningGroup value.
3390          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JoiningGroup).
3391          *
3392          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3393          */
3394         @Deprecated
3395         public static final int COUNT = 104;
3396     }
3397 
3398     /**
3399      * Grapheme Cluster Break constants.
3400      * @see UProperty#GRAPHEME_CLUSTER_BREAK
3401      * @stable ICU 3.4
3402      */
3403     public static interface GraphemeClusterBreak {
3404         /**
3405          * @stable ICU 3.4
3406          */
3407         public static final int OTHER = 0;
3408         /**
3409          * @stable ICU 3.4
3410          */
3411         public static final int CONTROL = 1;
3412         /**
3413          * @stable ICU 3.4
3414          */
3415         public static final int CR = 2;
3416         /**
3417          * @stable ICU 3.4
3418          */
3419         public static final int EXTEND = 3;
3420         /**
3421          * @stable ICU 3.4
3422          */
3423         public static final int L = 4;
3424         /**
3425          * @stable ICU 3.4
3426          */
3427         public static final int LF = 5;
3428         /**
3429          * @stable ICU 3.4
3430          */
3431         public static final int LV = 6;
3432         /**
3433          * @stable ICU 3.4
3434          */
3435         public static final int LVT = 7;
3436         /**
3437          * @stable ICU 3.4
3438          */
3439         public static final int T = 8;
3440         /**
3441          * @stable ICU 3.4
3442          */
3443         public static final int V = 9;
3444         /**
3445          * @stable ICU 4.0
3446          */
3447         public static final int SPACING_MARK = 10;
3448         /**
3449          * @stable ICU 4.0
3450          */
3451         public static final int PREPEND = 11;
3452         /** @stable ICU 50 */
3453         public static final int REGIONAL_INDICATOR = 12;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
3454         /** @stable ICU 58 */
3455         public static final int E_BASE = 13;          /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
3456         /** @stable ICU 58 */
3457         public static final int E_BASE_GAZ = 14;      /*[EBG]*/
3458         /** @stable ICU 58 */
3459         public static final int E_MODIFIER = 15;      /*[EM]*/
3460         /** @stable ICU 58 */
3461         public static final int GLUE_AFTER_ZWJ = 16;  /*[GAZ]*/
3462         /** @stable ICU 58 */
3463         public static final int ZWJ = 17;             /*[ZWJ]*/
3464 
3465         /**
3466          * One more than the highest normal GraphemeClusterBreak value.
3467          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK).
3468          *
3469          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3470          */
3471         @Deprecated
3472         public static final int COUNT = 18;
3473     }
3474 
3475     /**
3476      * Word Break constants.
3477      * @see UProperty#WORD_BREAK
3478      * @stable ICU 3.4
3479      */
3480     public static interface WordBreak {
3481         /**
3482          * @stable ICU 3.8
3483          */
3484         public static final int OTHER = 0;
3485         /**
3486          * @stable ICU 3.8
3487          */
3488         public static final int ALETTER = 1;
3489         /**
3490          * @stable ICU 3.8
3491          */
3492         public static final int FORMAT = 2;
3493         /**
3494          * @stable ICU 3.8
3495          */
3496         public static final int KATAKANA = 3;
3497         /**
3498          * @stable ICU 3.8
3499          */
3500         public static final int MIDLETTER = 4;
3501         /**
3502          * @stable ICU 3.8
3503          */
3504         public static final int MIDNUM = 5;
3505         /**
3506          * @stable ICU 3.8
3507          */
3508         public static final int NUMERIC = 6;
3509         /**
3510          * @stable ICU 3.8
3511          */
3512         public static final int EXTENDNUMLET = 7;
3513         /**
3514          * @stable ICU 4.0
3515          */
3516         public static final int CR = 8;
3517         /**
3518          * @stable ICU 4.0
3519          */
3520         public static final int EXTEND = 9;
3521         /**
3522          * @stable ICU 4.0
3523          */
3524         public static final int LF = 10;
3525         /**
3526          * @stable ICU 4.0
3527          */
3528         public static final int MIDNUMLET = 11;
3529         /**
3530          * @stable ICU 4.0
3531          */
3532         public static final int NEWLINE = 12;
3533         /** @stable ICU 50 */
3534         public static final int REGIONAL_INDICATOR = 13;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
3535         /** @stable ICU 52 */
3536         public static final int HEBREW_LETTER = 14;    /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
3537         /** @stable ICU 52 */
3538         public static final int SINGLE_QUOTE = 15;     /*[SQ]*/
3539         /** @stable ICU 52 */
3540         public static final int DOUBLE_QUOTE = 16;     /*[DQ]*/
3541         /** @stable ICU 58 */
3542         public static final int E_BASE = 17;           /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
3543         /** @stable ICU 58 */
3544         public static final int E_BASE_GAZ = 18;       /*[EBG]*/
3545         /** @stable ICU 58 */
3546         public static final int E_MODIFIER = 19;       /*[EM]*/
3547         /** @stable ICU 58 */
3548         public static final int GLUE_AFTER_ZWJ = 20;   /*[GAZ]*/
3549         /** @stable ICU 58 */
3550         public static final int ZWJ = 21;              /*[ZWJ]*/
3551         /** @stable ICU 62 */
3552         public static final int WSEGSPACE = 22;        /*[WSEGSPACE]*/
3553         /**
3554          * One more than the highest normal WordBreak value.
3555          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK).
3556          *
3557          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3558          */
3559         @Deprecated
3560         public static final int COUNT = 23;
3561     }
3562 
3563     /**
3564      * Sentence Break constants.
3565      * @see UProperty#SENTENCE_BREAK
3566      * @stable ICU 3.4
3567      */
3568     public static interface SentenceBreak {
3569         /**
3570          * @stable ICU 3.8
3571          */
3572         public static final int OTHER = 0;
3573         /**
3574          * @stable ICU 3.8
3575          */
3576         public static final int ATERM = 1;
3577         /**
3578          * @stable ICU 3.8
3579          */
3580         public static final int CLOSE = 2;
3581         /**
3582          * @stable ICU 3.8
3583          */
3584         public static final int FORMAT = 3;
3585         /**
3586          * @stable ICU 3.8
3587          */
3588         public static final int LOWER = 4;
3589         /**
3590          * @stable ICU 3.8
3591          */
3592         public static final int NUMERIC = 5;
3593         /**
3594          * @stable ICU 3.8
3595          */
3596         public static final int OLETTER = 6;
3597         /**
3598          * @stable ICU 3.8
3599          */
3600         public static final int SEP = 7;
3601         /**
3602          * @stable ICU 3.8
3603          */
3604         public static final int SP = 8;
3605         /**
3606          * @stable ICU 3.8
3607          */
3608         public static final int STERM = 9;
3609         /**
3610          * @stable ICU 3.8
3611          */
3612         public static final int UPPER = 10;
3613         /**
3614          * @stable ICU 4.0
3615          */
3616         public static final int CR = 11;
3617         /**
3618          * @stable ICU 4.0
3619          */
3620         public static final int EXTEND = 12;
3621         /**
3622          * @stable ICU 4.0
3623          */
3624         public static final int LF = 13;
3625         /**
3626          * @stable ICU 4.0
3627          */
3628         public static final int SCONTINUE = 14;
3629         /**
3630          * One more than the highest normal SentenceBreak value.
3631          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK).
3632          *
3633          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3634          */
3635         @Deprecated
3636         public static final int COUNT = 15;
3637     }
3638 
3639     /**
3640      * Line Break constants.
3641      * @see UProperty#LINE_BREAK
3642      * @stable ICU 2.4
3643      */
3644     public static interface LineBreak
3645     {
3646         /**
3647          * @stable ICU 2.4
3648          */
3649         public static final int UNKNOWN = 0;
3650         /**
3651          * @stable ICU 2.4
3652          */
3653         public static final int AMBIGUOUS = 1;
3654         /**
3655          * @stable ICU 2.4
3656          */
3657         public static final int ALPHABETIC = 2;
3658         /**
3659          * @stable ICU 2.4
3660          */
3661         public static final int BREAK_BOTH = 3;
3662         /**
3663          * @stable ICU 2.4
3664          */
3665         public static final int BREAK_AFTER = 4;
3666         /**
3667          * @stable ICU 2.4
3668          */
3669         public static final int BREAK_BEFORE = 5;
3670         /**
3671          * @stable ICU 2.4
3672          */
3673         public static final int MANDATORY_BREAK = 6;
3674         /**
3675          * @stable ICU 2.4
3676          */
3677         public static final int CONTINGENT_BREAK = 7;
3678         /**
3679          * @stable ICU 2.4
3680          */
3681         public static final int CLOSE_PUNCTUATION = 8;
3682         /**
3683          * @stable ICU 2.4
3684          */
3685         public static final int COMBINING_MARK = 9;
3686         /**
3687          * @stable ICU 2.4
3688          */
3689         public static final int CARRIAGE_RETURN = 10;
3690         /**
3691          * @stable ICU 2.4
3692          */
3693         public static final int EXCLAMATION = 11;
3694         /**
3695          * @stable ICU 2.4
3696          */
3697         public static final int GLUE = 12;
3698         /**
3699          * @stable ICU 2.4
3700          */
3701         public static final int HYPHEN = 13;
3702         /**
3703          * @stable ICU 2.4
3704          */
3705         public static final int IDEOGRAPHIC = 14;
3706         /**
3707          * @see #INSEPARABLE
3708          * @stable ICU 2.4
3709          */
3710         public static final int INSEPERABLE = 15;
3711         /**
3712          * Renamed from the misspelled "inseperable" in Unicode 4.0.1.
3713          * @stable ICU 3.0
3714          */
3715         public static final int INSEPARABLE = 15;
3716         /**
3717          * @stable ICU 2.4
3718          */
3719         public static final int INFIX_NUMERIC = 16;
3720         /**
3721          * @stable ICU 2.4
3722          */
3723         public static final int LINE_FEED = 17;
3724         /**
3725          * @stable ICU 2.4
3726          */
3727         public static final int NONSTARTER = 18;
3728         /**
3729          * @stable ICU 2.4
3730          */
3731         public static final int NUMERIC = 19;
3732         /**
3733          * @stable ICU 2.4
3734          */
3735         public static final int OPEN_PUNCTUATION = 20;
3736         /**
3737          * @stable ICU 2.4
3738          */
3739         public static final int POSTFIX_NUMERIC = 21;
3740         /**
3741          * @stable ICU 2.4
3742          */
3743         public static final int PREFIX_NUMERIC = 22;
3744         /**
3745          * @stable ICU 2.4
3746          */
3747         public static final int QUOTATION = 23;
3748         /**
3749          * @stable ICU 2.4
3750          */
3751         public static final int COMPLEX_CONTEXT = 24;
3752         /**
3753          * @stable ICU 2.4
3754          */
3755         public static final int SURROGATE = 25;
3756         /**
3757          * @stable ICU 2.4
3758          */
3759         public static final int SPACE = 26;
3760         /**
3761          * @stable ICU 2.4
3762          */
3763         public static final int BREAK_SYMBOLS = 27;
3764         /**
3765          * @stable ICU 2.4
3766          */
3767         public static final int ZWSPACE = 28;
3768         /**
3769          * @stable ICU 2.6
3770          */
3771         public static final int NEXT_LINE = 29;  /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
3772         /**
3773          * @stable ICU 2.6
3774          */
3775         public static final int WORD_JOINER = 30;      /*[WJ]*/
3776         /**
3777          * @stable ICU 3.4
3778          */
3779         public static final int H2 = 31;  /* from here on: new in Unicode 4.1/ICU 3.4 */
3780         /**
3781          * @stable ICU 3.4
3782          */
3783         public static final int H3 = 32;
3784         /**
3785          * @stable ICU 3.4
3786          */
3787         public static final int JL = 33;
3788         /**
3789          * @stable ICU 3.4
3790          */
3791         public static final int JT = 34;
3792         /**
3793          * @stable ICU 3.4
3794          */
3795         public static final int JV = 35;
3796         /** @stable ICU 4.4 */
3797         public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
3798         /** @stable ICU 49 */
3799         public static final int CONDITIONAL_JAPANESE_STARTER = 37;  /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
3800         /** @stable ICU 49 */
3801         public static final int HEBREW_LETTER = 38;  /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
3802         /** @stable ICU 50 */
3803         public static final int REGIONAL_INDICATOR = 39;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
3804         /** @stable ICU 58 */
3805         public static final int E_BASE = 40;  /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
3806         /** @stable ICU 58 */
3807         public static final int E_MODIFIER = 41;  /*[EM]*/
3808         /** @stable ICU 58 */
3809         public static final int ZWJ = 42;  /*[ZWJ]*/
3810         /**
3811          * One more than the highest normal LineBreak value.
3812          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK).
3813          *
3814          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3815          */
3816         @Deprecated
3817         public static final int COUNT = 43;
3818     }
3819 
3820     /**
3821      * Numeric Type constants.
3822      * @see UProperty#NUMERIC_TYPE
3823      * @stable ICU 2.4
3824      */
3825     public static interface NumericType
3826     {
3827         /**
3828          * @stable ICU 2.4
3829          */
3830         public static final int NONE = 0;
3831         /**
3832          * @stable ICU 2.4
3833          */
3834         public static final int DECIMAL = 1;
3835         /**
3836          * @stable ICU 2.4
3837          */
3838         public static final int DIGIT = 2;
3839         /**
3840          * @stable ICU 2.4
3841          */
3842         public static final int NUMERIC = 3;
3843         /**
3844          * One more than the highest normal NumericType value.
3845          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE).
3846          *
3847          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3848          */
3849         @Deprecated
3850         public static final int COUNT = 4;
3851     }
3852 
3853     /**
3854      * Hangul Syllable Type constants.
3855      *
3856      * @see UProperty#HANGUL_SYLLABLE_TYPE
3857      * @stable ICU 2.6
3858      */
3859     public static interface HangulSyllableType
3860     {
3861         /**
3862          * @stable ICU 2.6
3863          */
3864         public static final int NOT_APPLICABLE      = 0;   /*[NA]*/ /*See note !!*/
3865         /**
3866          * @stable ICU 2.6
3867          */
3868         public static final int LEADING_JAMO        = 1;   /*[L]*/
3869         /**
3870          * @stable ICU 2.6
3871          */
3872         public static final int VOWEL_JAMO          = 2;   /*[V]*/
3873         /**
3874          * @stable ICU 2.6
3875          */
3876         public static final int TRAILING_JAMO       = 3;   /*[T]*/
3877         /**
3878          * @stable ICU 2.6
3879          */
3880         public static final int LV_SYLLABLE         = 4;   /*[LV]*/
3881         /**
3882          * @stable ICU 2.6
3883          */
3884         public static final int LVT_SYLLABLE        = 5;   /*[LVT]*/
3885         /**
3886          * One more than the highest normal HangulSyllableType value.
3887          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE).
3888          *
3889          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3890          */
3891         @Deprecated
3892         public static final int COUNT               = 6;
3893     }
3894 
3895     /**
3896      * Bidi Paired Bracket Type constants.
3897      *
3898      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
3899      * @stable ICU 52
3900      */
3901     public static interface BidiPairedBracketType {
3902         /**
3903          * Not a paired bracket.
3904          * @stable ICU 52
3905          */
3906         public static final int NONE = 0;
3907         /**
3908          * Open paired bracket.
3909          * @stable ICU 52
3910          */
3911         public static final int OPEN = 1;
3912         /**
3913          * Close paired bracket.
3914          * @stable ICU 52
3915          */
3916         public static final int CLOSE = 2;
3917         /**
3918          * One more than the highest normal BidiPairedBracketType value.
3919          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE).
3920          *
3921          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3922          */
3923         @Deprecated
3924         public static final int COUNT = 3;
3925     }
3926 
3927     /**
3928      * Indic Positional Category constants.
3929      *
3930      * @see UProperty#INDIC_POSITIONAL_CATEGORY
3931      * @stable ICU 63
3932      */
3933     public static interface IndicPositionalCategory {
3934         /** @stable ICU 63 */
3935         public static final int NA = 0;
3936         /** @stable ICU 63 */
3937         public static final int BOTTOM = 1;
3938         /** @stable ICU 63 */
3939         public static final int BOTTOM_AND_LEFT = 2;
3940         /** @stable ICU 63 */
3941         public static final int BOTTOM_AND_RIGHT = 3;
3942         /** @stable ICU 63 */
3943         public static final int LEFT = 4;
3944         /** @stable ICU 63 */
3945         public static final int LEFT_AND_RIGHT = 5;
3946         /** @stable ICU 63 */
3947         public static final int OVERSTRUCK = 6;
3948         /** @stable ICU 63 */
3949         public static final int RIGHT = 7;
3950         /** @stable ICU 63 */
3951         public static final int TOP = 8;
3952         /** @stable ICU 63 */
3953         public static final int TOP_AND_BOTTOM = 9;
3954         /** @stable ICU 63 */
3955         public static final int TOP_AND_BOTTOM_AND_RIGHT = 10;
3956         /** @stable ICU 63 */
3957         public static final int TOP_AND_LEFT = 11;
3958         /** @stable ICU 63 */
3959         public static final int TOP_AND_LEFT_AND_RIGHT = 12;
3960         /** @stable ICU 63 */
3961         public static final int TOP_AND_RIGHT = 13;
3962         /** @stable ICU 63 */
3963         public static final int VISUAL_ORDER_LEFT = 14;
3964         /** @stable ICU 66 */
3965         public static final int TOP_AND_BOTTOM_AND_LEFT = 15;
3966     }
3967 
3968     /**
3969      * Indic Syllabic Category constants.
3970      *
3971      * @see UProperty#INDIC_SYLLABIC_CATEGORY
3972      * @stable ICU 63
3973      */
3974     public static interface IndicSyllabicCategory {
3975         /** @stable ICU 63 */
3976         public static final int OTHER = 0;
3977         /** @stable ICU 63 */
3978         public static final int AVAGRAHA = 1;
3979         /** @stable ICU 63 */
3980         public static final int BINDU = 2;
3981         /** @stable ICU 63 */
3982         public static final int BRAHMI_JOINING_NUMBER = 3;
3983         /** @stable ICU 63 */
3984         public static final int CANTILLATION_MARK = 4;
3985         /** @stable ICU 63 */
3986         public static final int CONSONANT = 5;
3987         /** @stable ICU 63 */
3988         public static final int CONSONANT_DEAD = 6;
3989         /** @stable ICU 63 */
3990         public static final int CONSONANT_FINAL = 7;
3991         /** @stable ICU 63 */
3992         public static final int CONSONANT_HEAD_LETTER = 8;
3993         /** @stable ICU 63 */
3994         public static final int CONSONANT_INITIAL_POSTFIXED = 9;
3995         /** @stable ICU 63 */
3996         public static final int CONSONANT_KILLER = 10;
3997         /** @stable ICU 63 */
3998         public static final int CONSONANT_MEDIAL = 11;
3999         /** @stable ICU 63 */
4000         public static final int CONSONANT_PLACEHOLDER = 12;
4001         /** @stable ICU 63 */
4002         public static final int CONSONANT_PRECEDING_REPHA = 13;
4003         /** @stable ICU 63 */
4004         public static final int CONSONANT_PREFIXED = 14;
4005         /** @stable ICU 63 */
4006         public static final int CONSONANT_SUBJOINED = 15;
4007         /** @stable ICU 63 */
4008         public static final int CONSONANT_SUCCEEDING_REPHA = 16;
4009         /** @stable ICU 63 */
4010         public static final int CONSONANT_WITH_STACKER = 17;
4011         /** @stable ICU 63 */
4012         public static final int GEMINATION_MARK = 18;
4013         /** @stable ICU 63 */
4014         public static final int INVISIBLE_STACKER = 19;
4015         /** @stable ICU 63 */
4016         public static final int JOINER = 20;
4017         /** @stable ICU 63 */
4018         public static final int MODIFYING_LETTER = 21;
4019         /** @stable ICU 63 */
4020         public static final int NON_JOINER = 22;
4021         /** @stable ICU 63 */
4022         public static final int NUKTA = 23;
4023         /** @stable ICU 63 */
4024         public static final int NUMBER = 24;
4025         /** @stable ICU 63 */
4026         public static final int NUMBER_JOINER = 25;
4027         /** @stable ICU 63 */
4028         public static final int PURE_KILLER = 26;
4029         /** @stable ICU 63 */
4030         public static final int REGISTER_SHIFTER = 27;
4031         /** @stable ICU 63 */
4032         public static final int SYLLABLE_MODIFIER = 28;
4033         /** @stable ICU 63 */
4034         public static final int TONE_LETTER = 29;
4035         /** @stable ICU 63 */
4036         public static final int TONE_MARK = 30;
4037         /** @stable ICU 63 */
4038         public static final int VIRAMA = 31;
4039         /** @stable ICU 63 */
4040         public static final int VISARGA = 32;
4041         /** @stable ICU 63 */
4042         public static final int VOWEL = 33;
4043         /** @stable ICU 63 */
4044         public static final int VOWEL_DEPENDENT = 34;
4045         /** @stable ICU 63 */
4046         public static final int VOWEL_INDEPENDENT = 35;
4047     }
4048 
4049     /**
4050      * Vertical Orientation constants.
4051      *
4052      * @see UProperty#VERTICAL_ORIENTATION
4053      * @stable ICU 63
4054      */
4055     public static interface VerticalOrientation {
4056         /** @stable ICU 63 */
4057         public static final int ROTATED = 0;
4058         /** @stable ICU 63 */
4059         public static final int TRANSFORMED_ROTATED = 1;
4060         /** @stable ICU 63 */
4061         public static final int TRANSFORMED_UPRIGHT = 2;
4062         /** @stable ICU 63 */
4063         public static final int UPRIGHT = 3;
4064     }
4065 
4066     // public data members -----------------------------------------------
4067 
4068     /**
4069      * The lowest Unicode code point value, constant 0.
4070      * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}.
4071      *
4072      * @stable ICU 2.1
4073      */
4074     public static final int MIN_VALUE = Character.MIN_CODE_POINT;
4075 
4076     /**
4077      * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits).
4078      * Same as {@link Character#MAX_CODE_POINT}.
4079      *
4080      * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE}
4081      * which is still a char with the value U+FFFF.
4082      *
4083      * @stable ICU 2.1
4084      */
4085     public static final int MAX_VALUE = Character.MAX_CODE_POINT;
4086 
4087     /**
4088      * The minimum value for Supplementary code points, constant U+10000.
4089      * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
4090      *
4091      * @stable ICU 2.1
4092      */
4093     public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT;
4094 
4095     /**
4096      * Unicode value used when translating into Unicode encoding form and there
4097      * is no existing character.
4098      * @stable ICU 2.1
4099      */
4100     public static final int REPLACEMENT_CHAR = '\uFFFD';
4101 
4102     /**
4103      * Special value that is returned by getUnicodeNumericValue(int) when no
4104      * numeric value is defined for a code point.
4105      * @stable ICU 2.4
4106      * @see #getUnicodeNumericValue
4107      */
4108     public static final double NO_NUMERIC_VALUE = -123456789;
4109 
4110     /**
4111      * Compatibility constant for Java Character's MIN_RADIX.
4112      * @stable ICU 3.4
4113      */
4114     public static final int MIN_RADIX = java.lang.Character.MIN_RADIX;
4115 
4116     /**
4117      * Compatibility constant for Java Character's MAX_RADIX.
4118      * @stable ICU 3.4
4119      */
4120     public static final int MAX_RADIX = java.lang.Character.MAX_RADIX;
4121 
4122     /**
4123      * Do not lowercase non-initial parts of words when titlecasing.
4124      * Option bit for titlecasing APIs that take an options bit set.
4125      *
4126      * By default, titlecasing will titlecase the first cased character
4127      * of a word and lowercase all other characters.
4128      * With this option, the other characters will not be modified.
4129      *
4130      * @see #toTitleCase
4131      * @stable ICU 3.8
4132      */
4133     public static final int TITLECASE_NO_LOWERCASE = 0x100;
4134 
4135     /**
4136      * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
4137      * titlecase exactly the characters at breaks from the iterator.
4138      * Option bit for titlecasing APIs that take an options bit set.
4139      *
4140      * By default, titlecasing will take each break iterator index,
4141      * adjust it by looking for the next cased character, and titlecase that one.
4142      * Other characters are lowercased.
4143      *
4144      * This follows Unicode 4 &amp; 5 section 3.13 Default Case Operations:
4145      *
4146      * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
4147      * #29, "Text Boundaries." Between each pair of word boundaries, find the first
4148      * cased character F. If F exists, map F to default_title(F); then map each
4149      * subsequent character C to default_lower(C).
4150      *
4151      * @see #toTitleCase
4152      * @see #TITLECASE_NO_LOWERCASE
4153      * @stable ICU 3.8
4154      */
4155     public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200;
4156 
4157     // public methods ----------------------------------------------------
4158 
4159     /**
4160      * Returnss the numeric value of a decimal digit code point.
4161      * <br>This method observes the semantics of
4162      * <code>java.lang.Character.digit()</code>.  Note that this
4163      * will return positive values for code points for which isDigit
4164      * returns false, just like java.lang.Character.
4165      * <br><em>Semantic Change:</em> In release 1.3.1 and
4166      * prior, this did not treat the European letters as having a
4167      * digit value, and also treated numeric letters and other numbers as
4168      * digits.
4169      * This has been changed to conform to the java semantics.
4170      * <br>A code point is a valid digit if and only if:
4171      * <ul>
4172      *   <li>ch is a decimal digit or one of the european letters, and
4173      *   <li>the value of ch is less than the specified radix.
4174      * </ul>
4175      * @param ch the code point to query
4176      * @param radix the radix
4177      * @return the numeric value represented by the code point in the
4178      * specified radix, or -1 if the code point is not a decimal digit
4179      * or if its value is too large for the radix
4180      * @stable ICU 2.1
4181      */
digit(int ch, int radix)4182     public static int digit(int ch, int radix)
4183     {
4184         if (2 <= radix && radix <= 36) {
4185             int value = digit(ch);
4186             if (value < 0) {
4187                 // ch is not a decimal digit, try latin letters
4188                 value = UCharacterProperty.getEuropeanDigit(ch);
4189             }
4190             return (value < radix) ? value : -1;
4191         } else {
4192             return -1;  // invalid radix
4193         }
4194     }
4195 
4196     /**
4197      * Returnss the numeric value of a decimal digit code point.
4198      * <br>This is a convenience overload of <code>digit(int, int)</code>
4199      * that provides a decimal radix.
4200      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this
4201      * treated numeric letters and other numbers as digits.  This has
4202      * been changed to conform to the java semantics.
4203      * @param ch the code point to query
4204      * @return the numeric value represented by the code point,
4205      * or -1 if the code point is not a decimal digit or if its
4206      * value is too large for a decimal radix
4207      * @stable ICU 2.1
4208      */
digit(int ch)4209     public static int digit(int ch)
4210     {
4211         return UCharacterProperty.INSTANCE.digit(ch);
4212     }
4213 
4214     /**
4215      * Returns the numeric value of the code point as a nonnegative
4216      * integer.
4217      * <br>If the code point does not have a numeric value, then -1 is returned.
4218      * <br>
4219      * If the code point has a numeric value that cannot be represented as a
4220      * nonnegative integer (for example, a fractional value), then -2 is
4221      * returned.
4222      * @param ch the code point to query
4223      * @return the numeric value of the code point, or -1 if it has no numeric
4224      * value, or -2 if it has a numeric value that cannot be represented as a
4225      * nonnegative integer
4226      * @stable ICU 2.1
4227      */
getNumericValue(int ch)4228     public static int getNumericValue(int ch)
4229     {
4230         return UCharacterProperty.INSTANCE.getNumericValue(ch);
4231     }
4232 
4233     /**
4234      * {@icu} Returns the numeric value for a Unicode code point as defined in the
4235      * Unicode Character Database.
4236      * <p>A "double" return type is necessary because some numeric values are
4237      * fractions, negative, or too large for int.
4238      * <p>For characters without any numeric values in the Unicode Character
4239      * Database, this function will return NO_NUMERIC_VALUE.
4240      * Note: This is different from the Unicode Standard which specifies NaN as the default value.
4241      * <p><em>API Change:</em> In release 2.2 and prior, this API has a
4242      * return type int and returns -1 when the argument ch does not have a
4243      * corresponding numeric value. This has been changed to synch with ICU4C
4244      *
4245      * This corresponds to the ICU4C function u_getNumericValue.
4246      * @param ch Code point to get the numeric value for.
4247      * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined.
4248      * @stable ICU 2.4
4249      */
getUnicodeNumericValue(int ch)4250     public static double getUnicodeNumericValue(int ch)
4251     {
4252         return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch);
4253     }
4254 
4255     /**
4256      * Compatibility override of Java deprecated method.  This
4257      * method will always remain deprecated.
4258      * Same as java.lang.Character.isSpace().
4259      * @param ch the code point
4260      * @return true if the code point is a space character as
4261      * defined by java.lang.Character.isSpace.
4262      * @deprecated ICU 3.4 (Java)
4263      */
4264     @Deprecated
isSpace(int ch)4265     public static boolean isSpace(int ch) {
4266         return ch <= 0x20 &&
4267                 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d);
4268     }
4269 
4270     /**
4271      * Returns a value indicating a code point's Unicode category.
4272      * Up-to-date Unicode implementation of java.lang.Character.getType()
4273      * except for the above mentioned code points that had their category
4274      * changed.<br>
4275      * Return results are constants from the interface
4276      * <a href=UCharacterCategory.html>UCharacterCategory</a><br>
4277      * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with
4278      * those returned by java.lang.Character.getType.  UCharacterCategory values
4279      * match the ones used in ICU4C, while java.lang.Character type
4280      * values, though similar, skip the value 17.
4281      * @param ch code point whose type is to be determined
4282      * @return category which is a value of UCharacterCategory
4283      * @stable ICU 2.1
4284      */
getType(int ch)4285     public static int getType(int ch)
4286     {
4287         return UCharacterProperty.INSTANCE.getType(ch);
4288     }
4289 
4290     /**
4291      * Determines if a code point has a defined meaning in the up-to-date
4292      * Unicode standard.
4293      * E.g. supplementary code points though allocated space are not defined in
4294      * Unicode yet.<br>
4295      * Up-to-date Unicode implementation of java.lang.Character.isDefined()
4296      * @param ch code point to be determined if it is defined in the most
4297      *        current version of Unicode
4298      * @return true if this code point is defined in unicode
4299      * @stable ICU 2.1
4300      */
isDefined(int ch)4301     public static boolean isDefined(int ch)
4302     {
4303         return getType(ch) != 0;
4304     }
4305 
4306     /**
4307      * Determines if a code point is a Java digit.
4308      * <br>This method observes the semantics of
4309      * <code>java.lang.Character.isDigit()</code>. It returns true for decimal
4310      * digits only.
4311      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated
4312      * numeric letters and other numbers as digits.
4313      * This has been changed to conform to the java semantics.
4314      * @param ch code point to query
4315      * @return true if this code point is a digit
4316      * @stable ICU 2.1
4317      */
isDigit(int ch)4318     public static boolean isDigit(int ch)
4319     {
4320         return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER;
4321     }
4322 
4323     /**
4324      * Determines if the specified code point is an ISO control character.
4325      * A code point is considered to be an ISO control character if it is in
4326      * the range &#92;u0000 through &#92;u001F or in the range &#92;u007F through
4327      * &#92;u009F.<br>
4328      * Up-to-date Unicode implementation of java.lang.Character.isISOControl()
4329      * @param ch code point to determine if it is an ISO control character
4330      * @return true if code point is a ISO control character
4331      * @stable ICU 2.1
4332      */
isISOControl(int ch)4333     public static boolean isISOControl(int ch)
4334     {
4335         return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ &&
4336                 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_));
4337     }
4338 
4339     /**
4340      * Determines if the specified code point is a letter.
4341      * Up-to-date Unicode implementation of java.lang.Character.isLetter()
4342      * @param ch code point to determine if it is a letter
4343      * @return true if code point is a letter
4344      * @stable ICU 2.1
4345      */
isLetter(int ch)4346     public static boolean isLetter(int ch)
4347     {
4348         // if props == 0, it will just fall through and return false
4349         return ((1 << getType(ch))
4350                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
4351                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
4352                         | (1 << UCharacterCategory.TITLECASE_LETTER)
4353                         | (1 << UCharacterCategory.MODIFIER_LETTER)
4354                         | (1 << UCharacterCategory.OTHER_LETTER))) != 0;
4355     }
4356 
4357     /**
4358      * Determines if the specified code point is a letter or digit.
4359      * {@icunote} This method, unlike java.lang.Character does not regard the ascii
4360      * characters 'A' - 'Z' and 'a' - 'z' as digits.
4361      * @param ch code point to determine if it is a letter or a digit
4362      * @return true if code point is a letter or a digit
4363      * @stable ICU 2.1
4364      */
isLetterOrDigit(int ch)4365     public static boolean isLetterOrDigit(int ch)
4366     {
4367         return ((1 << getType(ch))
4368                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
4369                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
4370                         | (1 << UCharacterCategory.TITLECASE_LETTER)
4371                         | (1 << UCharacterCategory.MODIFIER_LETTER)
4372                         | (1 << UCharacterCategory.OTHER_LETTER)
4373                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0;
4374     }
4375 
4376     /**
4377      * Compatibility override of Java deprecated method.  This
4378      * method will always remain deprecated.  Delegates to
4379      * java.lang.Character.isJavaIdentifierStart.
4380      * @param cp the code point
4381      * @return true if the code point can start a java identifier.
4382      * @deprecated ICU 3.4 (Java)
4383      */
4384     @Deprecated
isJavaLetter(int cp)4385     public static boolean isJavaLetter(int cp) {
4386         return isJavaIdentifierStart(cp);
4387     }
4388 
4389     /**
4390      * Compatibility override of Java deprecated method.  This
4391      * method will always remain deprecated.  Delegates to
4392      * java.lang.Character.isJavaIdentifierPart.
4393      * @param cp the code point
4394      * @return true if the code point can continue a java identifier.
4395      * @deprecated ICU 3.4 (Java)
4396      */
4397     @Deprecated
isJavaLetterOrDigit(int cp)4398     public static boolean isJavaLetterOrDigit(int cp) {
4399         return isJavaIdentifierPart(cp);
4400     }
4401 
4402     /**
4403      * Compatibility override of Java method, delegates to
4404      * java.lang.Character.isJavaIdentifierStart.
4405      * @param cp the code point
4406      * @return true if the code point can start a java identifier.
4407      * @stable ICU 3.4
4408      */
isJavaIdentifierStart(int cp)4409     public static boolean isJavaIdentifierStart(int cp) {
4410         // note, downcast to char for jdk 1.4 compatibility
4411         return java.lang.Character.isJavaIdentifierStart((char)cp);
4412     }
4413 
4414     /**
4415      * Compatibility override of Java method, delegates to
4416      * java.lang.Character.isJavaIdentifierPart.
4417      * @param cp the code point
4418      * @return true if the code point can continue a java identifier.
4419      * @stable ICU 3.4
4420      */
isJavaIdentifierPart(int cp)4421     public static boolean isJavaIdentifierPart(int cp) {
4422         // note, downcast to char for jdk 1.4 compatibility
4423         return java.lang.Character.isJavaIdentifierPart((char)cp);
4424     }
4425 
4426     /**
4427      * Determines if the specified code point is a lowercase character.
4428      * UnicodeData only contains case mappings for code points where they are
4429      * one-to-one mappings; it also omits information about context-sensitive
4430      * case mappings.<br> For more information about Unicode case mapping
4431      * please refer to the
4432      * <a href=https://www.unicode.org/reports/tr21/>Technical report
4433      * #21</a>.<br>
4434      * Up-to-date Unicode implementation of java.lang.Character.isLowerCase()
4435      * @param ch code point to determine if it is in lowercase
4436      * @return true if code point is a lowercase character
4437      * @stable ICU 2.1
4438      */
isLowerCase(int ch)4439     public static boolean isLowerCase(int ch)
4440     {
4441         // if props == 0, it will just fall through and return false
4442         return getType(ch) == UCharacterCategory.LOWERCASE_LETTER;
4443     }
4444 
4445     /**
4446      * Determines if the specified code point is a white space character.
4447      * A code point is considered to be an whitespace character if and only
4448      * if it satisfies one of the following criteria:
4449      * <ul>
4450      * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not
4451      *      also a non-breaking space (&#92;u00A0 or &#92;u2007 or &#92;u202F).
4452      * <li> It is &#92;u0009, HORIZONTAL TABULATION.
4453      * <li> It is &#92;u000A, LINE FEED.
4454      * <li> It is &#92;u000B, VERTICAL TABULATION.
4455      * <li> It is &#92;u000C, FORM FEED.
4456      * <li> It is &#92;u000D, CARRIAGE RETURN.
4457      * <li> It is &#92;u001C, FILE SEPARATOR.
4458      * <li> It is &#92;u001D, GROUP SEPARATOR.
4459      * <li> It is &#92;u001E, RECORD SEPARATOR.
4460      * <li> It is &#92;u001F, UNIT SEPARATOR.
4461      * </ul>
4462      *
4463      * This API tries to sync with the semantics of Java's
4464      * java.lang.Character.isWhitespace(), but it may not return
4465      * the exact same results because of the Unicode version
4466      * difference.
4467      * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)
4468      * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.
4469      * See http://www.unicode.org/versions/Unicode4.0.1/
4470      * @param ch code point to determine if it is a white space
4471      * @return true if the specified code point is a white space character
4472      * @stable ICU 2.1
4473      */
isWhitespace(int ch)4474     public static boolean isWhitespace(int ch)
4475     {
4476         // exclude no-break spaces
4477         // if props == 0, it will just fall through and return false
4478         return ((1 << getType(ch)) &
4479                 ((1 << UCharacterCategory.SPACE_SEPARATOR)
4480                         | (1 << UCharacterCategory.LINE_SEPARATOR)
4481                         | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0
4482                         && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_)
4483                         // TAB VT LF FF CR FS GS RS US NL are all control characters
4484                         // that are white spaces.
4485                         || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f);
4486     }
4487 
4488     /**
4489      * Determines if the specified code point is a Unicode specified space
4490      * character, i.e. if code point is in the category Zs, Zl and Zp.
4491      * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar().
4492      * @param ch code point to determine if it is a space
4493      * @return true if the specified code point is a space character
4494      * @stable ICU 2.1
4495      */
isSpaceChar(int ch)4496     public static boolean isSpaceChar(int ch)
4497     {
4498         // if props == 0, it will just fall through and return false
4499         return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR)
4500                 | (1 << UCharacterCategory.LINE_SEPARATOR)
4501                 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR)))
4502                 != 0;
4503     }
4504 
4505     /**
4506      * Determines if the specified code point is a titlecase character.
4507      * UnicodeData only contains case mappings for code points where they are
4508      * one-to-one mappings; it also omits information about context-sensitive
4509      * case mappings.<br>
4510      * For more information about Unicode case mapping please refer to the
4511      * <a href=https://www.unicode.org/reports/tr21/>
4512      * Technical report #21</a>.<br>
4513      * Up-to-date Unicode implementation of java.lang.Character.isTitleCase().
4514      * @param ch code point to determine if it is in title case
4515      * @return true if the specified code point is a titlecase character
4516      * @stable ICU 2.1
4517      */
isTitleCase(int ch)4518     public static boolean isTitleCase(int ch)
4519     {
4520         // if props == 0, it will just fall through and return false
4521         return getType(ch) == UCharacterCategory.TITLECASE_LETTER;
4522     }
4523 
4524     /**
4525      * Determines if the specified code point may be any part of a Unicode
4526      * identifier other than the starting character.
4527      * A code point may be part of a Unicode identifier if and only if it is
4528      * one of the following:
4529      * <ul>
4530      * <li> Lu Uppercase letter
4531      * <li> Ll Lowercase letter
4532      * <li> Lt Titlecase letter
4533      * <li> Lm Modifier letter
4534      * <li> Lo Other letter
4535      * <li> Nl Letter number
4536      * <li> Pc Connecting punctuation character
4537      * <li> Nd decimal number
4538      * <li> Mc Spacing combining mark
4539      * <li> Mn Non-spacing mark
4540      * <li> Cf formatting code
4541      * </ul>
4542      * Up-to-date Unicode implementation of
4543      * java.lang.Character.isUnicodeIdentifierPart().<br>
4544      * See <a href=https://www.unicode.org/reports/tr8/>UTR #8</a>.
4545      * @param ch code point to determine if is can be part of a Unicode
4546      *        identifier
4547      * @return true if code point is any character belonging a unicode
4548      *         identifier suffix after the first character
4549      * @stable ICU 2.1
4550      */
isUnicodeIdentifierPart(int ch)4551     public static boolean isUnicodeIdentifierPart(int ch)
4552     {
4553         // if props == 0, it will just fall through and return false
4554         // cat == format
4555         return ((1 << getType(ch))
4556                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
4557                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
4558                         | (1 << UCharacterCategory.TITLECASE_LETTER)
4559                         | (1 << UCharacterCategory.MODIFIER_LETTER)
4560                         | (1 << UCharacterCategory.OTHER_LETTER)
4561                         | (1 << UCharacterCategory.LETTER_NUMBER)
4562                         | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION)
4563                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER)
4564                         | (1 << UCharacterCategory.COMBINING_SPACING_MARK)
4565                         | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0
4566                         || isIdentifierIgnorable(ch);
4567     }
4568 
4569     /**
4570      * Determines if the specified code point is permissible as the first
4571      * character in a Unicode identifier.
4572      * A code point may start a Unicode identifier if it is of type either
4573      * <ul>
4574      * <li> Lu Uppercase letter
4575      * <li> Ll Lowercase letter
4576      * <li> Lt Titlecase letter
4577      * <li> Lm Modifier letter
4578      * <li> Lo Other letter
4579      * <li> Nl Letter number
4580      * </ul>
4581      * Up-to-date Unicode implementation of
4582      * java.lang.Character.isUnicodeIdentifierStart().<br>
4583      * See <a href=https://www.unicode.org/reports/tr8/>UTR #8</a>.
4584      * @param ch code point to determine if it can start a Unicode identifier
4585      * @return true if code point is the first character belonging a unicode
4586      *              identifier
4587      * @stable ICU 2.1
4588      */
isUnicodeIdentifierStart(int ch)4589     public static boolean isUnicodeIdentifierStart(int ch)
4590     {
4591         /*int cat = getType(ch);*/
4592         // if props == 0, it will just fall through and return false
4593         return ((1 << getType(ch))
4594                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
4595                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
4596                         | (1 << UCharacterCategory.TITLECASE_LETTER)
4597                         | (1 << UCharacterCategory.MODIFIER_LETTER)
4598                         | (1 << UCharacterCategory.OTHER_LETTER)
4599                         | (1 << UCharacterCategory.LETTER_NUMBER))) != 0;
4600     }
4601 
4602     /**
4603      * Determines if the specified code point should be regarded as an
4604      * ignorable character in a Java identifier.
4605      * A character is Java-identifier-ignorable if it has the general category
4606      * Cf Formatting Control, or it is a non-Java-whitespace ISO control:
4607      * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br>
4608      * Up-to-date Unicode implementation of
4609      * java.lang.Character.isIdentifierIgnorable().<br>
4610      * See <a href=https://www.unicode.org/reports/tr8/>UTR #8</a>.
4611      * <p>Note that Unicode just recommends to ignore Cf (format controls).
4612      * @param ch code point to be determined if it can be ignored in a Unicode
4613      *        identifier.
4614      * @return true if the code point is ignorable
4615      * @stable ICU 2.1
4616      */
isIdentifierIgnorable(int ch)4617     public static boolean isIdentifierIgnorable(int ch)
4618     {
4619         // see java.lang.Character.isIdentifierIgnorable() on range of
4620         // ignorable characters.
4621         if (ch <= 0x9f) {
4622             return isISOControl(ch)
4623                     && !((ch >= 0x9 && ch <= 0xd)
4624                             || (ch >= 0x1c && ch <= 0x1f));
4625         }
4626         return getType(ch) == UCharacterCategory.FORMAT;
4627     }
4628 
4629     /**
4630      * Determines if the specified code point is an uppercase character.
4631      * UnicodeData only contains case mappings for code point where they are
4632      * one-to-one mappings; it also omits information about context-sensitive
4633      * case mappings.<br>
4634      * For language specific case conversion behavior, use
4635      * toUpperCase(locale, str). <br>
4636      * For example, the case conversion for dot-less i and dotted I in Turkish,
4637      * or for final sigma in Greek.
4638      * For more information about Unicode case mapping please refer to the
4639      * <a href=https://www.unicode.org/reports/tr21/>
4640      * Technical report #21</a>.<br>
4641      * Up-to-date Unicode implementation of java.lang.Character.isUpperCase().
4642      * @param ch code point to determine if it is in uppercase
4643      * @return true if the code point is an uppercase character
4644      * @stable ICU 2.1
4645      */
isUpperCase(int ch)4646     public static boolean isUpperCase(int ch)
4647     {
4648         // if props == 0, it will just fall through and return false
4649         return getType(ch) == UCharacterCategory.UPPERCASE_LETTER;
4650     }
4651 
4652     /**
4653      * The given code point is mapped to its lowercase equivalent; if the code
4654      * point has no lowercase equivalent, the code point itself is returned.
4655      * Up-to-date Unicode implementation of java.lang.Character.toLowerCase()
4656      *
4657      * <p>This function only returns the simple, single-code point case mapping.
4658      * Full case mappings should be used whenever possible because they produce
4659      * better results by working on whole strings.
4660      * They take into account the string context and the language and can map
4661      * to a result string with a different length as appropriate.
4662      * Full case mappings are applied by the case mapping functions
4663      * that take String parameters rather than code points (int).
4664      * See also the User Guide chapter on C/POSIX migration:
4665      * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
4666      *
4667      * @param ch code point whose lowercase equivalent is to be retrieved
4668      * @return the lowercase equivalent code point
4669      * @stable ICU 2.1
4670      */
toLowerCase(int ch)4671     public static int toLowerCase(int ch) {
4672         return UCaseProps.INSTANCE.tolower(ch);
4673     }
4674 
4675     /**
4676      * Converts argument code point and returns a String object representing
4677      * the code point's value in UTF-16 format.
4678      * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones.
4679      *
4680      * <p>Up-to-date Unicode implementation of java.lang.Character.toString().
4681      *
4682      * @param ch code point
4683      * @return string representation of the code point, null if code point is not
4684      *         defined in unicode
4685      * @stable ICU 2.1
4686      */
toString(int ch)4687     public static String toString(int ch)
4688     {
4689         if (ch < MIN_VALUE || ch > MAX_VALUE) {
4690             return null;
4691         }
4692 
4693         if (ch < SUPPLEMENTARY_MIN_VALUE) {
4694             return String.valueOf((char)ch);
4695         }
4696 
4697         return new String(Character.toChars(ch));
4698     }
4699 
4700     /**
4701      * Converts the code point argument to titlecase.
4702      * If no titlecase is available, the uppercase is returned. If no uppercase
4703      * is available, the code point itself is returned.
4704      * Up-to-date Unicode implementation of java.lang.Character.toTitleCase()
4705      *
4706      * <p>This function only returns the simple, single-code point case mapping.
4707      * Full case mappings should be used whenever possible because they produce
4708      * better results by working on whole strings.
4709      * They take into account the string context and the language and can map
4710      * to a result string with a different length as appropriate.
4711      * Full case mappings are applied by the case mapping functions
4712      * that take String parameters rather than code points (int).
4713      * See also the User Guide chapter on C/POSIX migration:
4714      * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
4715      *
4716      * @param ch code point  whose title case is to be retrieved
4717      * @return titlecase code point
4718      * @stable ICU 2.1
4719      */
toTitleCase(int ch)4720     public static int toTitleCase(int ch) {
4721         return UCaseProps.INSTANCE.totitle(ch);
4722     }
4723 
4724     /**
4725      * Converts the character argument to uppercase.
4726      * If no uppercase is available, the character itself is returned.
4727      * Up-to-date Unicode implementation of java.lang.Character.toUpperCase()
4728      *
4729      * <p>This function only returns the simple, single-code point case mapping.
4730      * Full case mappings should be used whenever possible because they produce
4731      * better results by working on whole strings.
4732      * They take into account the string context and the language and can map
4733      * to a result string with a different length as appropriate.
4734      * Full case mappings are applied by the case mapping functions
4735      * that take String parameters rather than code points (int).
4736      * See also the User Guide chapter on C/POSIX migration:
4737      * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
4738      *
4739      * @param ch code point whose uppercase is to be retrieved
4740      * @return uppercase code point
4741      * @stable ICU 2.1
4742      */
toUpperCase(int ch)4743     public static int toUpperCase(int ch) {
4744         return UCaseProps.INSTANCE.toupper(ch);
4745     }
4746 
4747     // extra methods not in java.lang.Character --------------------------
4748 
4749     /**
4750      * {@icu} Determines if the code point is a supplementary character.
4751      * A code point is a supplementary character if and only if it is greater
4752      * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a>
4753      * @param ch code point to be determined if it is in the supplementary
4754      *        plane
4755      * @return true if code point is a supplementary character
4756      * @stable ICU 2.1
4757      */
isSupplementary(int ch)4758     public static boolean isSupplementary(int ch)
4759     {
4760         return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE &&
4761                 ch <= UCharacter.MAX_VALUE;
4762     }
4763 
4764     /**
4765      * {@icu} Determines if the code point is in the BMP plane.
4766      * @param ch code point to be determined if it is not a supplementary
4767      *        character
4768      * @return true if code point is not a supplementary character
4769      * @stable ICU 2.1
4770      */
isBMP(int ch)4771     public static boolean isBMP(int ch)
4772     {
4773         return (ch >= 0 && ch <= LAST_CHAR_MASK_);
4774     }
4775 
4776     /**
4777      * {@icu} Determines whether the specified code point is a printable character
4778      * according to the Unicode standard.
4779      * @param ch code point to be determined if it is printable
4780      * @return true if the code point is a printable character
4781      * @stable ICU 2.1
4782      */
isPrintable(int ch)4783     public static boolean isPrintable(int ch)
4784     {
4785         int cat = getType(ch);
4786         // if props == 0, it will just fall through and return false
4787         return (cat != UCharacterCategory.UNASSIGNED &&
4788                 cat != UCharacterCategory.CONTROL &&
4789                 cat != UCharacterCategory.FORMAT &&
4790                 cat != UCharacterCategory.PRIVATE_USE &&
4791                 cat != UCharacterCategory.SURROGATE &&
4792                 cat != UCharacterCategory.GENERAL_OTHER_TYPES);
4793     }
4794 
4795     /**
4796      * {@icu} Determines whether the specified code point is of base form.
4797      * A code point of base form does not graphically combine with preceding
4798      * characters, and is neither a control nor a format character.
4799      * @param ch code point to be determined if it is of base form
4800      * @return true if the code point is of base form
4801      * @stable ICU 2.1
4802      */
isBaseForm(int ch)4803     public static boolean isBaseForm(int ch)
4804     {
4805         int cat = getType(ch);
4806         // if props == 0, it will just fall through and return false
4807         return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER ||
4808                 cat == UCharacterCategory.OTHER_NUMBER ||
4809                 cat == UCharacterCategory.LETTER_NUMBER ||
4810                 cat == UCharacterCategory.UPPERCASE_LETTER ||
4811                 cat == UCharacterCategory.LOWERCASE_LETTER ||
4812                 cat == UCharacterCategory.TITLECASE_LETTER ||
4813                 cat == UCharacterCategory.MODIFIER_LETTER ||
4814                 cat == UCharacterCategory.OTHER_LETTER ||
4815                 cat == UCharacterCategory.NON_SPACING_MARK ||
4816                 cat == UCharacterCategory.ENCLOSING_MARK ||
4817                 cat == UCharacterCategory.COMBINING_SPACING_MARK;
4818     }
4819 
4820     /**
4821      * {@icu} Returns the Bidirection property of a code point.
4822      * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
4823      * property.<br>
4824      * Result returned belongs to the interface
4825      * <a href=UCharacterDirection.html>UCharacterDirection</a>
4826      * @param ch the code point to be determined its direction
4827      * @return direction constant from UCharacterDirection.
4828      * @stable ICU 2.1
4829      */
getDirection(int ch)4830     public static int getDirection(int ch)
4831     {
4832         return UBiDiProps.INSTANCE.getClass(ch);
4833     }
4834 
4835     /**
4836      * Determines whether the code point has the "mirrored" property.
4837      * This property is set for characters that are commonly used in
4838      * Right-To-Left contexts and need to be displayed with a "mirrored"
4839      * glyph.
4840      * @param ch code point whose mirror is to be determined
4841      * @return true if the code point has the "mirrored" property
4842      * @stable ICU 2.1
4843      */
isMirrored(int ch)4844     public static boolean isMirrored(int ch)
4845     {
4846         return UBiDiProps.INSTANCE.isMirrored(ch);
4847     }
4848 
4849     /**
4850      * {@icu} Maps the specified code point to a "mirror-image" code point.
4851      * For code points with the "mirrored" property, implementations sometimes
4852      * need a "poor man's" mapping to another code point such that the default
4853      * glyph may serve as the mirror-image of the default glyph of the
4854      * specified code point.<br>
4855      * This is useful for text conversion to and from codepages with visual
4856      * order, and for displays without glyph selection capabilities.
4857      * @param ch code point whose mirror is to be retrieved
4858      * @return another code point that may serve as a mirror-image substitute,
4859      *         or ch itself if there is no such mapping or ch does not have the
4860      *         "mirrored" property
4861      * @stable ICU 2.1
4862      */
getMirror(int ch)4863     public static int getMirror(int ch)
4864     {
4865         return UBiDiProps.INSTANCE.getMirror(ch);
4866     }
4867 
4868     /**
4869      * {@icu} Maps the specified character to its paired bracket character.
4870      * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int).
4871      * Otherwise c itself is returned.
4872      * See http://www.unicode.org/reports/tr9/
4873      *
4874      * @param c the code point to be mapped
4875      * @return the paired bracket code point,
4876      *         or c itself if there is no such mapping
4877      *         (Bidi_Paired_Bracket_Type=None)
4878      *
4879      * @see UProperty#BIDI_PAIRED_BRACKET
4880      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
4881      * @see #getMirror(int)
4882      * @stable ICU 52
4883      */
getBidiPairedBracket(int c)4884     public static int getBidiPairedBracket(int c) {
4885         return UBiDiProps.INSTANCE.getPairedBracket(c);
4886     }
4887 
4888     /**
4889      * {@icu} Returns the combining class of the argument codepoint
4890      * @param ch code point whose combining is to be retrieved
4891      * @return the combining class of the codepoint
4892      * @stable ICU 2.1
4893      */
getCombiningClass(int ch)4894     public static int getCombiningClass(int ch)
4895     {
4896         return Normalizer2.getNFDInstance().getCombiningClass(ch);
4897     }
4898 
4899     /**
4900      * {@icu} A code point is illegal if and only if
4901      * <ul>
4902      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
4903      * <li> A surrogate value, 0xD800 to 0xDFFF
4904      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
4905      * </ul>
4906      * Note: legal does not mean that it is assigned in this version of Unicode.
4907      * @param ch code point to determine if it is a legal code point by itself
4908      * @return true if and only if legal.
4909      * @stable ICU 2.1
4910      */
isLegal(int ch)4911     public static boolean isLegal(int ch)
4912     {
4913         if (ch < MIN_VALUE) {
4914             return false;
4915         }
4916         if (ch < Character.MIN_SURROGATE) {
4917             return true;
4918         }
4919         if (ch <= Character.MAX_SURROGATE) {
4920             return false;
4921         }
4922         if (UCharacterUtility.isNonCharacter(ch)) {
4923             return false;
4924         }
4925         return (ch <= MAX_VALUE);
4926     }
4927 
4928     /**
4929      * {@icu} A string is legal iff all its code points are legal.
4930      * A code point is illegal if and only if
4931      * <ul>
4932      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
4933      * <li> A surrogate value, 0xD800 to 0xDFFF
4934      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
4935      * </ul>
4936      * Note: legal does not mean that it is assigned in this version of Unicode.
4937      * @param str containing code points to examin
4938      * @return true if and only if legal.
4939      * @stable ICU 2.1
4940      */
isLegal(String str)4941     public static boolean isLegal(String str)
4942     {
4943         int size = str.length();
4944         int codepoint;
4945         for (int i = 0; i < size; i += Character.charCount(codepoint))
4946         {
4947             codepoint = str.codePointAt(i);
4948             if (!isLegal(codepoint)) {
4949                 return false;
4950             }
4951         }
4952         return true;
4953     }
4954 
4955     /**
4956      * {@icu} Returns the version of Unicode data used.
4957      * @return the unicode version number used
4958      * @stable ICU 2.1
4959      */
getUnicodeVersion()4960     public static VersionInfo getUnicodeVersion()
4961     {
4962         return UCharacterProperty.INSTANCE.m_unicodeVersion_;
4963     }
4964 
4965     /**
4966      * {@icu} Returns the most current Unicode name of the argument code point, or
4967      * null if the character is unassigned or outside the range
4968      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
4969      * <br>
4970      * Note calling any methods related to code point names, e.g. get*Name*()
4971      * incurs a one-time initialization cost to construct the name tables.
4972      * @param ch the code point for which to get the name
4973      * @return most current Unicode name
4974      * @stable ICU 2.1
4975      */
getName(int ch)4976     public static String getName(int ch)
4977     {
4978         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
4979     }
4980 
4981     /**
4982      * {@icu} Returns the names for each of the characters in a string
4983      * @param s string to format
4984      * @param separator string to go between names
4985      * @return string of names
4986      * @stable ICU 3.8
4987      */
getName(String s, String separator)4988     public static String getName(String s, String separator) {
4989         if (s.length() == 1) { // handle common case
4990             return getName(s.charAt(0));
4991         }
4992         int cp;
4993         StringBuilder sb = new StringBuilder();
4994         for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
4995             cp = s.codePointAt(i);
4996             if (i != 0) sb.append(separator);
4997             sb.append(UCharacter.getName(cp));
4998         }
4999         return sb.toString();
5000     }
5001 
5002     /**
5003      * {@icu} Returns null.
5004      * Used to return the Unicode_1_Name property value which was of little practical value.
5005      * @param ch the code point for which to get the name
5006      * @return null
5007      * @deprecated ICU 49
5008      */
5009     @Deprecated
getName1_0(int ch)5010     public static String getName1_0(int ch)
5011     {
5012         return null;
5013     }
5014 
5015     /**
5016      * {@icu} Returns a name for a valid codepoint. Unlike, getName(int) and
5017      * getName1_0(int), this method will return a name even for codepoints that
5018      * are not assigned a name in UnicodeData.txt.
5019      *
5020      * <p>The names are returned in the following order.
5021      * <ul>
5022      * <li> Most current Unicode name if there is any
5023      * <li> Unicode 1.0 name if there is any
5024      * <li> Extended name in the form of
5025      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g., &lt;noncharacter-fffe&gt;
5026      * </ul>
5027      * Note calling any methods related to code point names, e.g. get*Name*()
5028      * incurs a one-time initialization cost to construct the name tables.
5029      * @param ch the code point for which to get the name
5030      * @return a name for the argument codepoint
5031      * @stable ICU 2.6
5032      */
getExtendedName(int ch)5033     public static String getExtendedName(int ch) {
5034         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME);
5035     }
5036 
5037     /**
5038      * {@icu} Returns the corrected name from NameAliases.txt if there is one.
5039      * Returns null if the character is unassigned or outside the range
5040      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
5041      * <br>
5042      * Note calling any methods related to code point names, e.g. get*Name*()
5043      * incurs a one-time initialization cost to construct the name tables.
5044      * @param ch the code point for which to get the name alias
5045      * @return Unicode name alias, or null
5046      * @stable ICU 4.4
5047      */
getNameAlias(int ch)5048     public static String getNameAlias(int ch)
5049     {
5050         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS);
5051     }
5052 
5053     /**
5054      * {@icu} Returns null.
5055      * Used to return the ISO 10646 comment for a character.
5056      * The Unicode ISO_Comment property is deprecated and has no values.
5057      *
5058      * @param ch The code point for which to get the ISO comment.
5059      *           It must be the case that {@code 0 <= ch <= 0x10ffff}.
5060      * @return null
5061      * @deprecated ICU 49
5062      */
5063     @Deprecated
getISOComment(int ch)5064     public static String getISOComment(int ch)
5065     {
5066         return null;
5067     }
5068 
5069     /**
5070      * {@icu} <p>Finds a Unicode code point by its most current Unicode name and
5071      * return its code point value. All Unicode names are in uppercase.
5072      * Note calling any methods related to code point names, e.g. get*Name*()
5073      * incurs a one-time initialization cost to construct the name tables.
5074      * @param name most current Unicode character name whose code point is to
5075      *        be returned
5076      * @return code point or -1 if name is not found
5077      * @stable ICU 2.1
5078      */
getCharFromName(String name)5079     public static int getCharFromName(String name){
5080         return UCharacterName.INSTANCE.getCharFromName(
5081                 UCharacterNameChoice.UNICODE_CHAR_NAME, name);
5082     }
5083 
5084     /**
5085      * {@icu} Returns -1.
5086      * <p>Used to find a Unicode character by its version 1.0 Unicode name and return
5087      * its code point value.
5088      * @param name Unicode 1.0 code point name whose code point is to be
5089      *             returned
5090      * @return -1
5091      * @deprecated ICU 49
5092      * @see #getName1_0(int)
5093      */
5094     @Deprecated
getCharFromName1_0(String name)5095     public static int getCharFromName1_0(String name){
5096         return -1;
5097     }
5098 
5099     /**
5100      * {@icu} <p>Find a Unicode character by either its name and return its code
5101      * point value. All Unicode names are in uppercase.
5102      * Extended names are all lowercase except for numbers and are contained
5103      * within angle brackets.
5104      * The names are searched in the following order
5105      * <ul>
5106      * <li> Most current Unicode name if there is any
5107      * <li> Unicode 1.0 name if there is any
5108      * <li> Extended name in the form of
5109      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g. &lt;noncharacter-FFFE&gt;
5110      * </ul>
5111      * Note calling any methods related to code point names, e.g. get*Name*()
5112      * incurs a one-time initialization cost to construct the name tables.
5113      * @param name codepoint name
5114      * @return code point associated with the name or -1 if the name is not
5115      *         found.
5116      * @stable ICU 2.6
5117      */
getCharFromExtendedName(String name)5118     public static int getCharFromExtendedName(String name){
5119         return UCharacterName.INSTANCE.getCharFromName(
5120                 UCharacterNameChoice.EXTENDED_CHAR_NAME, name);
5121     }
5122 
5123     /**
5124      * {@icu} <p>Find a Unicode character by its corrected name alias and return
5125      * its code point value. All Unicode names are in uppercase.
5126      * Note calling any methods related to code point names, e.g. get*Name*()
5127      * incurs a one-time initialization cost to construct the name tables.
5128      * @param name Unicode name alias whose code point is to be returned
5129      * @return code point or -1 if name is not found
5130      * @stable ICU 4.4
5131      */
getCharFromNameAlias(String name)5132     public static int getCharFromNameAlias(String name){
5133         return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name);
5134     }
5135 
5136     /**
5137      * {@icu} Return the Unicode name for a given property, as given in the
5138      * Unicode database file PropertyAliases.txt.  Most properties
5139      * have more than one name.  The nameChoice determines which one
5140      * is returned.
5141      *
5142      * In addition, this function maps the property
5143      * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
5144      * "General_Category_Mask".  These names are not in
5145      * PropertyAliases.txt.
5146      *
5147      * @param property UProperty selector.
5148      *
5149      * @param nameChoice UProperty.NameChoice selector for which name
5150      * to get.  All properties have a long name.  Most have a short
5151      * name, but some do not.  Unicode allows for additional names; if
5152      * present these will be returned by UProperty.NameChoice.LONG + i,
5153      * where i=1, 2,...
5154      *
5155      * @return a name, or null if Unicode explicitly defines no name
5156      * ("n/a") for a given property/nameChoice.  If a given nameChoice
5157      * throws an exception, then all larger values of nameChoice will
5158      * throw an exception.  If null is returned for a given
5159      * nameChoice, then other nameChoice values may return non-null
5160      * results.
5161      *
5162      * @exception IllegalArgumentException thrown if property or
5163      * nameChoice are invalid.
5164      *
5165      * @see UProperty
5166      * @see UProperty.NameChoice
5167      * @stable ICU 2.4
5168      */
getPropertyName(int property, int nameChoice)5169     public static String getPropertyName(int property,
5170             int nameChoice) {
5171         return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice);
5172     }
5173 
5174     /**
5175      * {@icu} Return the UProperty selector for a given property name, as
5176      * specified in the Unicode database file PropertyAliases.txt.
5177      * Short, long, and any other variants are recognized.
5178      *
5179      * In addition, this function maps the synthetic names "gcm" /
5180      * "General_Category_Mask" to the property
5181      * UProperty.GENERAL_CATEGORY_MASK.  These names are not in
5182      * PropertyAliases.txt.
5183      *
5184      * @param propertyAlias the property name to be matched.  The name
5185      * is compared using "loose matching" as described in
5186      * PropertyAliases.txt.
5187      *
5188      * @return a UProperty enum.
5189      *
5190      * @exception IllegalArgumentException thrown if propertyAlias
5191      * is not recognized.
5192      *
5193      * @see UProperty
5194      * @stable ICU 2.4
5195      */
getPropertyEnum(CharSequence propertyAlias)5196     public static int getPropertyEnum(CharSequence propertyAlias) {
5197         int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias);
5198         if (propEnum == UProperty.UNDEFINED) {
5199             throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias);
5200         }
5201         return propEnum;
5202     }
5203 
5204     /**
5205      * {@icu} Return the Unicode name for a given property value, as given in
5206      * the Unicode database file PropertyValueAliases.txt.  Most
5207      * values have more than one name.  The nameChoice determines
5208      * which one is returned.
5209      *
5210      * Note: Some of the names in PropertyValueAliases.txt can only be
5211      * retrieved using UProperty.GENERAL_CATEGORY_MASK, not
5212      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
5213      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
5214      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
5215      *
5216      * @param property UProperty selector constant.
5217      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
5218      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
5219      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
5220      * If out of range, null is returned.
5221      *
5222      * @param value selector for a value for the given property.  In
5223      * general, valid values range from 0 up to some maximum.  There
5224      * are a few exceptions: (1.) UProperty.BLOCK values begin at the
5225      * non-zero value BASIC_LATIN.getID().  (2.)
5226      * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous
5227      * and range from 0..240.  (3.)  UProperty.GENERAL_CATEGORY_MASK values
5228      * are mask values produced by left-shifting 1 by
5229      * UCharacter.getType().  This allows grouped categories such as
5230      * [:L:] to be represented.  Mask values are non-contiguous.
5231      *
5232      * @param nameChoice UProperty.NameChoice selector for which name
5233      * to get.  All values have a long name.  Most have a short name,
5234      * but some do not.  Unicode allows for additional names; if
5235      * present these will be returned by UProperty.NameChoice.LONG + i,
5236      * where i=1, 2,...
5237      *
5238      * @return a name, or null if Unicode explicitly defines no name
5239      * ("n/a") for a given property/value/nameChoice.  If a given
5240      * nameChoice throws an exception, then all larger values of
5241      * nameChoice will throw an exception.  If null is returned for a
5242      * given nameChoice, then other nameChoice values may return
5243      * non-null results.
5244      *
5245      * @exception IllegalArgumentException thrown if property, value,
5246      * or nameChoice are invalid.
5247      *
5248      * @see UProperty
5249      * @see UProperty.NameChoice
5250      * @stable ICU 2.4
5251      */
getPropertyValueName(int property, int value, int nameChoice)5252     public static String getPropertyValueName(int property,
5253             int value,
5254             int nameChoice)
5255     {
5256         if ((property == UProperty.CANONICAL_COMBINING_CLASS
5257                 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS
5258                 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS)
5259                 && value >= UCharacter.getIntPropertyMinValue(
5260                         UProperty.CANONICAL_COMBINING_CLASS)
5261                         && value <= UCharacter.getIntPropertyMaxValue(
5262                                 UProperty.CANONICAL_COMBINING_CLASS)
5263                                 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) {
5264             // this is hard coded for the valid cc
5265             // because PropertyValueAliases.txt does not contain all of them
5266             try {
5267                 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value,
5268                         nameChoice);
5269             }
5270             catch (IllegalArgumentException e) {
5271                 return null;
5272             }
5273         }
5274         return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice);
5275     }
5276 
5277     /**
5278      * {@icu} Return the property value integer for a given value name, as
5279      * specified in the Unicode database file PropertyValueAliases.txt.
5280      * Short, long, and any other variants are recognized.
5281      *
5282      * Note: Some of the names in PropertyValueAliases.txt will only be
5283      * recognized with UProperty.GENERAL_CATEGORY_MASK, not
5284      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
5285      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
5286      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
5287      *
5288      * @param property UProperty selector constant.
5289      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
5290      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
5291      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
5292      * Only these properties can be enumerated.
5293      *
5294      * @param valueAlias the value name to be matched.  The name is
5295      * compared using "loose matching" as described in
5296      * PropertyValueAliases.txt.
5297      *
5298      * @return a value integer.  Note: UProperty.GENERAL_CATEGORY
5299      * values are mask values produced by left-shifting 1 by
5300      * UCharacter.getType().  This allows grouped categories such as
5301      * [:L:] to be represented.
5302      *
5303      * @see UProperty
5304      * @throws IllegalArgumentException if property is not a valid UProperty
5305      *         selector or valueAlias is not a value of this property
5306      * @stable ICU 2.4
5307      */
getPropertyValueEnum(int property, CharSequence valueAlias)5308     public static int getPropertyValueEnum(int property, CharSequence valueAlias) {
5309         int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias);
5310         if (propEnum == UProperty.UNDEFINED) {
5311             throw new IllegalIcuArgumentException("Invalid name: " + valueAlias);
5312         }
5313         return propEnum;
5314     }
5315 
5316     /**
5317      * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED.
5318      * @param property  Same as {@link #getPropertyValueEnum(int, CharSequence)}
5319      * @param valueAlias    Same as {@link #getPropertyValueEnum(int, CharSequence)}
5320      * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value.
5321      * @internal
5322      * @deprecated This API is ICU internal only.
5323      */
5324     @Deprecated
getPropertyValueEnumNoThrow(int property, CharSequence valueAlias)5325     public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) {
5326         return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias);
5327     }
5328 
5329 
5330     /**
5331      * {@icu} Returns a code point corresponding to the two surrogate code units.
5332      *
5333      * @param lead the lead unit
5334      *        (In ICU 2.1-69 the type of both parameters was <code>char</code>.)
5335      * @param trail the trail unit
5336      * @return code point if lead and trail form a valid surrogate pair.
5337      * @exception IllegalArgumentException thrown when the code units do
5338      *            not form a valid surrogate pair
5339      * @stable ICU 70
5340      * @see #toCodePoint(int, int)
5341      */
getCodePoint(int lead, int trail)5342     public static int getCodePoint(int lead, int trail)
5343     {
5344         if (isHighSurrogate(lead) && isLowSurrogate(trail)) {
5345             return toCodePoint(lead, trail);
5346         }
5347         throw new IllegalArgumentException("Not a valid surrogate pair");
5348     }
5349 
5350     // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655
5351     /**
5352      * {@icu} Returns a code point corresponding to the two surrogate code units.
5353      *
5354      * @param lead the lead char
5355      * @param trail the trail char
5356      * @return code point if surrogate characters are valid.
5357      * @exception IllegalArgumentException thrown when the code units do
5358      *            not form a valid code point
5359      * @stable ICU 2.1
5360      */
getCodePoint(char lead, char trail)5361     public static int getCodePoint(char lead, char trail)
5362     {
5363         return getCodePoint((int) lead, (int) trail);
5364     }
5365     // END Android patch: Keep the `char` version on Android. See ICU-21655
5366 
5367     /**
5368      * {@icu} Returns the code point corresponding to the BMP code point.
5369      *
5370      * @param char16 the BMP code point
5371      * @return code point if argument is a valid character.
5372      * @exception IllegalArgumentException thrown when char16 is not a valid
5373      *            code point
5374      * @stable ICU 2.1
5375      */
getCodePoint(char char16)5376     public static int getCodePoint(char char16)
5377     {
5378         if (UCharacter.isLegal(char16)) {
5379             return char16;
5380         }
5381         throw new IllegalArgumentException("Illegal codepoint");
5382     }
5383 
5384     /**
5385      * Returns the uppercase version of the argument string.
5386      * Casing is dependent on the default locale and context-sensitive.
5387      * @param str source string to be performed on
5388      * @return uppercase version of the argument string
5389      * @stable ICU 2.1
5390      */
toUpperCase(String str)5391     public static String toUpperCase(String str)
5392     {
5393         return CaseMapImpl.toUpper(getDefaultCaseLocale(), 0, str);
5394     }
5395 
5396     /**
5397      * Returns the lowercase version of the argument string.
5398      * Casing is dependent on the default locale and context-sensitive
5399      * @param str source string to be performed on
5400      * @return lowercase version of the argument string
5401      * @stable ICU 2.1
5402      */
toLowerCase(String str)5403     public static String toLowerCase(String str)
5404     {
5405         return CaseMapImpl.toLower(getDefaultCaseLocale(), 0, str);
5406     }
5407 
5408     /**
5409      * <p>Returns the titlecase version of the argument string.
5410      * <p>Position for titlecasing is determined by the argument break
5411      * iterator, hence the user can customize his break iterator for
5412      * a specialized titlecasing. In this case only the forward iteration
5413      * needs to be implemented.
5414      * If the break iterator passed in is null, the default Unicode algorithm
5415      * will be used to determine the titlecase positions.
5416      *
5417      * <p>Only positions returned by the break iterator will be title cased,
5418      * character in between the positions will all be in lower case.
5419      * <p>Casing is dependent on the default locale and context-sensitive
5420      * @param str source string to be performed on
5421      * @param breakiter break iterator to determine the positions in which
5422      *        the character should be title cased.
5423      * @return titlecase version of the argument string
5424      * @stable ICU 2.6
5425      */
toTitleCase(String str, BreakIterator breakiter)5426     public static String toTitleCase(String str, BreakIterator breakiter)
5427     {
5428         return toTitleCase(Locale.getDefault(), str, breakiter, 0);
5429     }
5430 
getDefaultCaseLocale()5431     private static int getDefaultCaseLocale() {
5432         return UCaseProps.getCaseLocale(Locale.getDefault());
5433     }
5434 
getCaseLocale(Locale locale)5435     private static int getCaseLocale(Locale locale) {
5436         if (locale == null) {
5437             locale = Locale.getDefault();
5438         }
5439         return UCaseProps.getCaseLocale(locale);
5440     }
5441 
getCaseLocale(ULocale locale)5442     private static int getCaseLocale(ULocale locale) {
5443         if (locale == null) {
5444             locale = ULocale.getDefault();
5445         }
5446         return UCaseProps.getCaseLocale(locale);
5447     }
5448 
5449     /**
5450      * Returns the uppercase version of the argument string.
5451      * Casing is dependent on the argument locale and context-sensitive.
5452      * @param locale which string is to be converted in
5453      * @param str source string to be performed on
5454      * @return uppercase version of the argument string
5455      * @stable ICU 2.1
5456      */
toUpperCase(Locale locale, String str)5457     public static String toUpperCase(Locale locale, String str)
5458     {
5459         return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str);
5460     }
5461 
5462     /**
5463      * Returns the uppercase version of the argument string.
5464      * Casing is dependent on the argument locale and context-sensitive.
5465      * @param locale which string is to be converted in
5466      * @param str source string to be performed on
5467      * @return uppercase version of the argument string
5468      * @stable ICU 3.2
5469      */
toUpperCase(ULocale locale, String str)5470     public static String toUpperCase(ULocale locale, String str) {
5471         return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str);
5472     }
5473 
5474     /**
5475      * Returns the lowercase version of the argument string.
5476      * Casing is dependent on the argument locale and context-sensitive
5477      * @param locale which string is to be converted in
5478      * @param str source string to be performed on
5479      * @return lowercase version of the argument string
5480      * @stable ICU 2.1
5481      */
toLowerCase(Locale locale, String str)5482     public static String toLowerCase(Locale locale, String str)
5483     {
5484         return CaseMapImpl.toLower(getCaseLocale(locale), 0, str);
5485     }
5486 
5487     /**
5488      * Returns the lowercase version of the argument string.
5489      * Casing is dependent on the argument locale and context-sensitive
5490      * @param locale which string is to be converted in
5491      * @param str source string to be performed on
5492      * @return lowercase version of the argument string
5493      * @stable ICU 3.2
5494      */
toLowerCase(ULocale locale, String str)5495     public static String toLowerCase(ULocale locale, String str) {
5496         return CaseMapImpl.toLower(getCaseLocale(locale), 0, str);
5497     }
5498 
5499     /**
5500      * <p>Returns the titlecase version of the argument string.
5501      * <p>Position for titlecasing is determined by the argument break
5502      * iterator, hence the user can customize his break iterator for
5503      * a specialized titlecasing. In this case only the forward iteration
5504      * needs to be implemented.
5505      * If the break iterator passed in is null, the default Unicode algorithm
5506      * will be used to determine the titlecase positions.
5507      *
5508      * <p>Only positions returned by the break iterator will be title cased,
5509      * character in between the positions will all be in lower case.
5510      * <p>Casing is dependent on the argument locale and context-sensitive
5511      * @param locale which string is to be converted in
5512      * @param str source string to be performed on
5513      * @param breakiter break iterator to determine the positions in which
5514      *        the character should be title cased.
5515      * @return titlecase version of the argument string
5516      * @stable ICU 2.6
5517      */
toTitleCase(Locale locale, String str, BreakIterator breakiter)5518     public static String toTitleCase(Locale locale, String str,
5519             BreakIterator breakiter)
5520     {
5521         return toTitleCase(locale, str, breakiter, 0);
5522     }
5523 
5524     /**
5525      * <p>Returns the titlecase version of the argument string.
5526      * <p>Position for titlecasing is determined by the argument break
5527      * iterator, hence the user can customize his break iterator for
5528      * a specialized titlecasing. In this case only the forward iteration
5529      * needs to be implemented.
5530      * If the break iterator passed in is null, the default Unicode algorithm
5531      * will be used to determine the titlecase positions.
5532      *
5533      * <p>Only positions returned by the break iterator will be title cased,
5534      * character in between the positions will all be in lower case.
5535      * <p>Casing is dependent on the argument locale and context-sensitive
5536      * @param locale which string is to be converted in
5537      * @param str source string to be performed on
5538      * @param titleIter break iterator to determine the positions in which
5539      *        the character should be title cased.
5540      * @return titlecase version of the argument string
5541      * @stable ICU 3.2
5542      */
toTitleCase(ULocale locale, String str, BreakIterator titleIter)5543     public static String toTitleCase(ULocale locale, String str,
5544             BreakIterator titleIter) {
5545         return toTitleCase(locale, str, titleIter, 0);
5546     }
5547 
5548     /**
5549      * <p>Returns the titlecase version of the argument string.
5550      * <p>Position for titlecasing is determined by the argument break
5551      * iterator, hence the user can customize his break iterator for
5552      * a specialized titlecasing. In this case only the forward iteration
5553      * needs to be implemented.
5554      * If the break iterator passed in is null, the default Unicode algorithm
5555      * will be used to determine the titlecase positions.
5556      *
5557      * <p>Only positions returned by the break iterator will be title cased,
5558      * character in between the positions will all be in lower case.
5559      * <p>Casing is dependent on the argument locale and context-sensitive
5560      * @param locale which string is to be converted in
5561      * @param str source string to be performed on
5562      * @param titleIter break iterator to determine the positions in which
5563      *        the character should be title cased.
5564      * @param options bit set to modify the titlecasing operation
5565      * @return titlecase version of the argument string
5566      * @stable ICU 3.8
5567      * @see #TITLECASE_NO_LOWERCASE
5568      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
5569      */
toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)5570     public static String toTitleCase(ULocale locale, String str,
5571             BreakIterator titleIter, int options) {
5572         if (titleIter == null && locale == null) {
5573             locale = ULocale.getDefault();
5574         }
5575         titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter);
5576         titleIter.setText(str);
5577         return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
5578     }
5579 
5580     /**
5581      * {@icu} <p>Returns the titlecase version of the argument string.
5582      * <p>Position for titlecasing is determined by the argument break
5583      * iterator, hence the user can customize his break iterator for
5584      * a specialized titlecasing. In this case only the forward iteration
5585      * needs to be implemented.
5586      * If the break iterator passed in is null, the default Unicode algorithm
5587      * will be used to determine the titlecase positions.
5588      *
5589      * <p>Only positions returned by the break iterator will be title cased,
5590      * character in between the positions will all be in lower case.
5591      * <p>Casing is dependent on the argument locale and context-sensitive
5592      * @param locale which string is to be converted in
5593      * @param str source string to be performed on
5594      * @param titleIter break iterator to determine the positions in which
5595      *        the character should be title cased.
5596      * @param options bit set to modify the titlecasing operation
5597      * @return titlecase version of the argument string
5598      * @see #TITLECASE_NO_LOWERCASE
5599      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
5600      * @stable ICU 54
5601      */
toTitleCase(Locale locale, String str, BreakIterator titleIter, int options)5602     public static String toTitleCase(Locale locale, String str,
5603             BreakIterator titleIter,
5604             int options) {
5605         if (titleIter == null && locale == null) {
5606             locale = Locale.getDefault();
5607         }
5608         titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter);
5609         titleIter.setText(str);
5610         return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
5611     }
5612 
5613     /**
5614      * {@icu} The given character is mapped to its case folding equivalent according
5615      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
5616      * folding equivalent, the character itself is returned.
5617      *
5618      * <p>This function only returns the simple, single-code point case mapping.
5619      * Full case mappings should be used whenever possible because they produce
5620      * better results by working on whole strings.
5621      * They can map to a result string with a different length as appropriate.
5622      * Full case mappings are applied by the case mapping functions
5623      * that take String parameters rather than code points (int).
5624      * See also the User Guide chapter on C/POSIX migration:
5625      * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
5626      *
5627      * @param ch             the character to be converted
5628      * @param defaultmapping Indicates whether the default mappings defined in
5629      *                       CaseFolding.txt are to be used, otherwise the
5630      *                       mappings for dotted I and dotless i marked with
5631      *                       'T' in CaseFolding.txt are included.
5632      * @return               the case folding equivalent of the character, if
5633      *                       any; otherwise the character itself.
5634      * @see                  #foldCase(String, boolean)
5635      * @stable ICU 2.1
5636      */
foldCase(int ch, boolean defaultmapping)5637     public static int foldCase(int ch, boolean defaultmapping) {
5638         return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
5639     }
5640 
5641     /**
5642      * {@icu} The given string is mapped to its case folding equivalent according to
5643      * UnicodeData.txt and CaseFolding.txt; if any character has no case
5644      * folding equivalent, the character itself is returned.
5645      * "Full", multiple-code point case folding mappings are returned here.
5646      * For "simple" single-code point mappings use the API
5647      * foldCase(int ch, boolean defaultmapping).
5648      * @param str            the String to be converted
5649      * @param defaultmapping Indicates whether the default mappings defined in
5650      *                       CaseFolding.txt are to be used, otherwise the
5651      *                       mappings for dotted I and dotless i marked with
5652      *                       'T' in CaseFolding.txt are included.
5653      * @return               the case folding equivalent of the character, if
5654      *                       any; otherwise the character itself.
5655      * @see                  #foldCase(int, boolean)
5656      * @stable ICU 2.1
5657      */
foldCase(String str, boolean defaultmapping)5658     public static String foldCase(String str, boolean defaultmapping) {
5659         return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
5660     }
5661 
5662     /**
5663      * {@icu} Option value for case folding: use default mappings defined in
5664      * CaseFolding.txt.
5665      * @stable ICU 2.6
5666      */
5667     public static final int FOLD_CASE_DEFAULT    =      0x0000;
5668     /**
5669      * {@icu} Option value for case folding:
5670      * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
5671      * and dotless i appropriately for Turkic languages (tr, az).
5672      *
5673      * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
5674      * are to be included for default mappings and
5675      * excluded for the Turkic-specific mappings.
5676      *
5677      * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
5678      * are to be excluded for default mappings and
5679      * included for the Turkic-specific mappings.
5680      *
5681      * @stable ICU 2.6
5682      */
5683     public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001;
5684 
5685     /**
5686      * {@icu} The given character is mapped to its case folding equivalent according
5687      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
5688      * folding equivalent, the character itself is returned.
5689      *
5690      * <p>This function only returns the simple, single-code point case mapping.
5691      * Full case mappings should be used whenever possible because they produce
5692      * better results by working on whole strings.
5693      * They can map to a result string with a different length as appropriate.
5694      * Full case mappings are applied by the case mapping functions
5695      * that take String parameters rather than code points (int).
5696      * See also the User Guide chapter on C/POSIX migration:
5697      * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
5698      *
5699      * @param ch the character to be converted
5700      * @param options A bit set for special processing. Currently the recognised options
5701      * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
5702      * @return the case folding equivalent of the character, if any; otherwise the
5703      * character itself.
5704      * @see #foldCase(String, boolean)
5705      * @stable ICU 2.6
5706      */
foldCase(int ch, int options)5707     public static int foldCase(int ch, int options) {
5708         return UCaseProps.INSTANCE.fold(ch, options);
5709     }
5710 
5711     /**
5712      * {@icu} The given string is mapped to its case folding equivalent according to
5713      * UnicodeData.txt and CaseFolding.txt; if any character has no case
5714      * folding equivalent, the character itself is returned.
5715      * "Full", multiple-code point case folding mappings are returned here.
5716      * For "simple" single-code point mappings use the API
5717      * foldCase(int ch, boolean defaultmapping).
5718      * @param str the String to be converted
5719      * @param options A bit set for special processing. Currently the recognised options
5720      *                are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
5721      * @return the case folding equivalent of the character, if any; otherwise the
5722      *         character itself.
5723      * @see #foldCase(int, boolean)
5724      * @stable ICU 2.6
5725      */
foldCase(String str, int options)5726     public static final String foldCase(String str, int options) {
5727         return CaseMapImpl.fold(options, str);
5728     }
5729 
5730     /**
5731      * {@icu} Returns the numeric value of a Han character.
5732      *
5733      * <p>This returns the value of Han 'numeric' code points,
5734      * including those for zero, ten, hundred, thousand, ten thousand,
5735      * and hundred million.
5736      * This includes both the standard and 'checkwriting'
5737      * characters, the 'big circle' zero character, and the standard
5738      * zero character.
5739      *
5740      * <p>Note: The Unicode Standard has numeric values for more
5741      * Han characters recognized by this method
5742      * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt),
5743      * and a {@link com.ibm.icu.text.NumberFormat} can be used with
5744      * a Chinese {@link com.ibm.icu.text.NumberingSystem}.
5745      *
5746      * @param ch code point to query
5747      * @return value if it is a Han 'numeric character,' otherwise return -1.
5748      * @stable ICU 2.4
5749      */
getHanNumericValue(int ch)5750     public static int getHanNumericValue(int ch)
5751     {
5752         switch(ch)
5753         {
5754         case IDEOGRAPHIC_NUMBER_ZERO_ :
5755         case CJK_IDEOGRAPH_COMPLEX_ZERO_ :
5756             return 0; // Han Zero
5757         case CJK_IDEOGRAPH_FIRST_ :
5758         case CJK_IDEOGRAPH_COMPLEX_ONE_ :
5759             return 1; // Han One
5760         case CJK_IDEOGRAPH_SECOND_ :
5761         case CJK_IDEOGRAPH_COMPLEX_TWO_ :
5762             return 2; // Han Two
5763         case CJK_IDEOGRAPH_THIRD_ :
5764         case CJK_IDEOGRAPH_COMPLEX_THREE_ :
5765             return 3; // Han Three
5766         case CJK_IDEOGRAPH_FOURTH_ :
5767         case CJK_IDEOGRAPH_COMPLEX_FOUR_ :
5768             return 4; // Han Four
5769         case CJK_IDEOGRAPH_FIFTH_ :
5770         case CJK_IDEOGRAPH_COMPLEX_FIVE_ :
5771             return 5; // Han Five
5772         case CJK_IDEOGRAPH_SIXTH_ :
5773         case CJK_IDEOGRAPH_COMPLEX_SIX_ :
5774             return 6; // Han Six
5775         case CJK_IDEOGRAPH_SEVENTH_ :
5776         case CJK_IDEOGRAPH_COMPLEX_SEVEN_ :
5777             return 7; // Han Seven
5778         case CJK_IDEOGRAPH_EIGHTH_ :
5779         case CJK_IDEOGRAPH_COMPLEX_EIGHT_ :
5780             return 8; // Han Eight
5781         case CJK_IDEOGRAPH_NINETH_ :
5782         case CJK_IDEOGRAPH_COMPLEX_NINE_ :
5783             return 9; // Han Nine
5784         case CJK_IDEOGRAPH_TEN_ :
5785         case CJK_IDEOGRAPH_COMPLEX_TEN_ :
5786             return 10;
5787         case CJK_IDEOGRAPH_HUNDRED_ :
5788         case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ :
5789             return 100;
5790         case CJK_IDEOGRAPH_THOUSAND_ :
5791         case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ :
5792             return 1000;
5793         case CJK_IDEOGRAPH_TEN_THOUSAND_ :
5794             return 10000;
5795         case CJK_IDEOGRAPH_HUNDRED_MILLION_ :
5796             return 100000000;
5797         }
5798         return -1; // no value
5799     }
5800 
5801     /**
5802      * {@icu} <p>Returns an iterator for character types, iterating over codepoints.
5803      * <p>Example of use:<br>
5804      * <pre>
5805      * RangeValueIterator iterator = UCharacter.getTypeIterator();
5806      * RangeValueIterator.Element element = new RangeValueIterator.Element();
5807      * while (iterator.next(element)) {
5808      *     System.out.println("Codepoint \\u" +
5809      *                        Integer.toHexString(element.start) +
5810      *                        " to codepoint \\u" +
5811      *                        Integer.toHexString(element.limit - 1) +
5812      *                        " has the character type " +
5813      *                        element.value);
5814      * }
5815      * </pre>
5816      * @return an iterator
5817      * @stable ICU 2.6
5818      */
getTypeIterator()5819     public static RangeValueIterator getTypeIterator()
5820     {
5821         return new UCharacterTypeIterator();
5822     }
5823 
5824     private static final class UCharacterTypeIterator implements RangeValueIterator {
UCharacterTypeIterator()5825         UCharacterTypeIterator() {
5826             reset();
5827         }
5828 
5829         // implements RangeValueIterator
5830         @Override
next(Element element)5831         public boolean next(Element element) {
5832             if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
5833                 element.start=range.startCodePoint;
5834                 element.limit=range.endCodePoint+1;
5835                 element.value=range.value;
5836                 return true;
5837             } else {
5838                 return false;
5839             }
5840         }
5841 
5842         // implements RangeValueIterator
5843         @Override
reset()5844         public void reset() {
5845             trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE);
5846         }
5847 
5848         private Iterator<Trie2.Range> trieIterator;
5849         private Trie2.Range range;
5850 
5851         private static final class MaskType implements Trie2.ValueMapper {
5852             // Extracts the general category ("character type") from the trie value.
5853             @Override
map(int value)5854             public int map(int value) {
5855                 return value & UCharacterProperty.TYPE_MASK;
5856             }
5857         }
5858         private static final MaskType MASK_TYPE=new MaskType();
5859     }
5860 
5861     /**
5862      * {@icu} <p>Returns an iterator for character names, iterating over codepoints.
5863      * <p>This API only gets the iterator for the modern, most up-to-date
5864      * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or
5865      * for extended names use getExtendedNameIterator().
5866      * <p>Example of use:<br>
5867      * <pre>
5868      * ValueIterator iterator = UCharacter.getNameIterator();
5869      * ValueIterator.Element element = new ValueIterator.Element();
5870      * while (iterator.next(element)) {
5871      *     System.out.println("Codepoint \\u" +
5872      *                        Integer.toHexString(element.codepoint) +
5873      *                        " has the name " + (String)element.value);
5874      * }
5875      * </pre>
5876      * <p>The maximal range which the name iterator iterates is from
5877      * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.
5878      * @return an iterator
5879      * @stable ICU 2.6
5880      */
getNameIterator()5881     public static ValueIterator getNameIterator(){
5882         return new UCharacterNameIterator(UCharacterName.INSTANCE,
5883                 UCharacterNameChoice.UNICODE_CHAR_NAME);
5884     }
5885 
5886     /**
5887      * {@icu} Returns an empty iterator.
5888      * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints.
5889      * @return an empty iterator
5890      * @deprecated ICU 49
5891      * @see #getName1_0(int)
5892      */
5893     @Deprecated
getName1_0Iterator()5894     public static ValueIterator getName1_0Iterator(){
5895         return new DummyValueIterator();
5896     }
5897 
5898     private static final class DummyValueIterator implements ValueIterator {
5899         @Override
next(Element element)5900         public boolean next(Element element) { return false; }
5901         @Override
reset()5902         public void reset() {}
5903         @Override
setRange(int start, int limit)5904         public void setRange(int start, int limit) {}
5905     }
5906 
5907     /**
5908      * {@icu} <p>Returns an iterator for character names, iterating over codepoints.
5909      * <p>This API only gets the iterator for the extended names.
5910      * For modern, most up-to-date Unicode names use getNameIterator() or
5911      * for older 1.0 Unicode names use get1_0NameIterator().
5912      * <p>Example of use:<br>
5913      * <pre>
5914      * ValueIterator iterator = UCharacter.getExtendedNameIterator();
5915      * ValueIterator.Element element = new ValueIterator.Element();
5916      * while (iterator.next(element)) {
5917      *     System.out.println("Codepoint \\u" +
5918      *                        Integer.toHexString(element.codepoint) +
5919      *                        " has the name " + (String)element.value);
5920      * }
5921      * </pre>
5922      * <p>The maximal range which the name iterator iterates is from
5923      * @return an iterator
5924      * @stable ICU 2.6
5925      */
getExtendedNameIterator()5926     public static ValueIterator getExtendedNameIterator(){
5927         return new UCharacterNameIterator(UCharacterName.INSTANCE,
5928                 UCharacterNameChoice.EXTENDED_CHAR_NAME);
5929     }
5930 
5931     /**
5932      * {@icu} Returns the "age" of the code point.
5933      * <p>The "age" is the Unicode version when the code point was first
5934      * designated (as a non-character or for Private Use) or assigned a
5935      * character.
5936      * <p>This can be useful to avoid emitting code points to receiving
5937      * processes that do not accept newer characters.
5938      * <p>The data is from the UCD file DerivedAge.txt.
5939      * @param ch The code point.
5940      * @return the Unicode version number
5941      * @stable ICU 2.6
5942      */
getAge(int ch)5943     public static VersionInfo getAge(int ch)
5944     {
5945         if (ch < MIN_VALUE || ch > MAX_VALUE) {
5946             throw new IllegalArgumentException("Codepoint out of bounds");
5947         }
5948         return UCharacterProperty.INSTANCE.getAge(ch);
5949     }
5950 
5951     /**
5952      * {@icu} Check a binary Unicode property for a code point.
5953      * <p>Unicode, especially in version 3.2, defines many more properties
5954      * than the original set in UnicodeData.txt.
5955      * <p>This API is intended to reflect Unicode properties as defined in
5956      * the Unicode Character Database (UCD) and Unicode Technical Reports
5957      * (UTR).
5958      * <p>For details about the properties see
5959      * <a href=http://www.unicode.org/>http://www.unicode.org/</a>.
5960      * <p>For names of Unicode properties see the UCD file
5961      * PropertyAliases.txt.
5962      * <p>This API does not check the validity of the codepoint.
5963      * <p>Important: If ICU is built with UCD files from Unicode versions
5964      * below 3.2, then properties marked with "new" are not or
5965      * not fully available.
5966      * @param ch code point to test.
5967      * @param property selector constant from com.ibm.icu.lang.UProperty,
5968      *        identifies which binary property to check.
5969      * @return true or false according to the binary Unicode property value
5970      *         for ch. Also false if property is out of bounds or if the
5971      *         Unicode version does not have data for the property at all, or
5972      *         not for this code point.
5973      * @see com.ibm.icu.lang.UProperty
5974      * @see CharacterProperties#getBinaryPropertySet(int)
5975      * @stable ICU 2.6
5976      */
hasBinaryProperty(int ch, int property)5977     public static boolean hasBinaryProperty(int ch, int property)
5978     {
5979         return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property);
5980     }
5981 
5982     /**
5983      * {@icu} Returns true if the property is true for the string.
5984      * Same as {@link #hasBinaryProperty(int, int)}
5985      * if the string contains exactly one code point.
5986      *
5987      * <p>Most properties apply only to single code points.
5988      * <a href="https://www.unicode.org/reports/tr51/#Emoji_Sets">UTS #51 Unicode Emoji</a>
5989      * defines several properties of strings.
5990      *
5991      * @param s String to test.
5992      * @param property UProperty selector constant, identifies which binary property to check.
5993      *        Must be BINARY_START&lt;=which&lt;BINARY_LIMIT.
5994      * @return true or false according to the binary Unicode property value for the string.
5995      *         Also false if <code>property</code> is out of bounds or if the Unicode version
5996      *         does not have data for the property at all.
5997      *
5998      * @see com.ibm.icu.lang.UProperty
5999      * @see CharacterProperties#getBinaryPropertySet(int)
6000      * @stable ICU 70
6001      */
hasBinaryProperty(CharSequence s, int property)6002     public static boolean hasBinaryProperty(CharSequence s, int property) {
6003         int length = s.length();
6004         if (length == 1) {
6005             return hasBinaryProperty(s.charAt(0), property);  // single code point
6006         } else if (length == 2) {
6007             // first code point
6008             int c = Character.codePointAt(s, 0);
6009             if (Character.charCount(c) == length) {
6010                 return hasBinaryProperty(c, property);  // single code point
6011             }
6012         }
6013         // Only call into EmojiProps for a relevant property,
6014         // so that we not unnecessarily try to load its data file.
6015         return UProperty.BASIC_EMOJI <= property && property <= UProperty.RGI_EMOJI &&
6016             EmojiProps.INSTANCE.hasBinaryProperty(s, property);
6017     }
6018 
6019     /**
6020      * {@icu} <p>Check if a code point has the Alphabetic Unicode property.
6021      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).
6022      * <p>Different from UCharacter.isLetter(ch)!
6023      * @stable ICU 2.6
6024      * @param ch codepoint to be tested
6025      */
isUAlphabetic(int ch)6026     public static boolean isUAlphabetic(int ch)
6027     {
6028         return hasBinaryProperty(ch, UProperty.ALPHABETIC);
6029     }
6030 
6031     /**
6032      * {@icu} <p>Check if a code point has the Lowercase Unicode property.
6033      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).
6034      * <p>This is different from UCharacter.isLowerCase(ch)!
6035      * @param ch codepoint to be tested
6036      * @stable ICU 2.6
6037      */
isULowercase(int ch)6038     public static boolean isULowercase(int ch)
6039     {
6040         return hasBinaryProperty(ch, UProperty.LOWERCASE);
6041     }
6042 
6043     /**
6044      * {@icu} <p>Check if a code point has the Uppercase Unicode property.
6045      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).
6046      * <p>This is different from UCharacter.isUpperCase(ch)!
6047      * @param ch codepoint to be tested
6048      * @stable ICU 2.6
6049      */
isUUppercase(int ch)6050     public static boolean isUUppercase(int ch)
6051     {
6052         return hasBinaryProperty(ch, UProperty.UPPERCASE);
6053     }
6054 
6055     /**
6056      * {@icu} <p>Check if a code point has the White_Space Unicode property.
6057      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).
6058      * <p>This is different from both UCharacter.isSpace(ch) and
6059      * UCharacter.isWhitespace(ch)!
6060      * @param ch codepoint to be tested
6061      * @stable ICU 2.6
6062      */
isUWhiteSpace(int ch)6063     public static boolean isUWhiteSpace(int ch)
6064     {
6065         return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
6066     }
6067 
6068     /**
6069      * {@icu} Returns the property value for a Unicode property type of a code point.
6070      * Also returns binary and mask property values.
6071      * <p>Unicode, especially in version 3.2, defines many more properties than
6072      * the original set in UnicodeData.txt.
6073      * <p>The properties APIs are intended to reflect Unicode properties as
6074      * defined in the Unicode Character Database (UCD) and Unicode Technical
6075      * Reports (UTR). For details about the properties see
6076      * http://www.unicode.org/.
6077      * <p>For names of Unicode properties see the UCD file PropertyAliases.txt.
6078      *
6079      * <pre>
6080      * Sample usage:
6081      * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH);
6082      * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC);
6083      * boolean b = (ideo == 1) ? true : false;
6084      * </pre>
6085      * @param ch code point to test.
6086      * @param type UProperty selector constant, identifies which binary
6087      *        property to check. Must be
6088      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
6089      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT or
6090      *        UProperty.MASK_START &lt;= type &lt; UProperty.MASK_LIMIT.
6091      * @return numeric value that is directly the property value or,
6092      *         for enumerated properties, corresponds to the numeric value of
6093      *         the enumerated constant of the respective property value type
6094      *         ({@link ECharacterCategory}, {@link ECharacterDirection},
6095      *         {@link DecompositionType}, etc.).
6096      *         Returns 0 or 1 (for false / true) for binary Unicode properties.
6097      *         Returns a bit-mask for mask properties.
6098      *         Returns 0 if 'type' is out of bounds or if the Unicode version
6099      *         does not have data for the property at all, or not for this code
6100      *         point.
6101      * @see UProperty
6102      * @see #hasBinaryProperty
6103      * @see #getIntPropertyMinValue
6104      * @see #getIntPropertyMaxValue
6105      * @see CharacterProperties#getIntPropertyMap(int)
6106      * @see #getUnicodeVersion
6107      * @stable ICU 2.4
6108      */
getIntPropertyValue(int ch, int type)6109     public static int getIntPropertyValue(int ch, int type)
6110     {
6111         return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type);
6112     }
6113     /**
6114      * {@icu} Returns a string version of the property value.
6115      * @param propertyEnum The property enum value.
6116      * @param codepoint The codepoint value.
6117      * @param nameChoice The choice of the name.
6118      * @return value as string
6119      * @internal
6120      * @deprecated This API is ICU internal only.
6121      */
6122     @Deprecated
6123     ///CLOVER:OFF
getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice)6124     public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) {
6125         if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) ||
6126                 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) {
6127             return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum),
6128                     nameChoice);
6129         }
6130         if (propertyEnum == UProperty.NUMERIC_VALUE) {
6131             return String.valueOf(getUnicodeNumericValue(codepoint));
6132         }
6133         // otherwise must be string property
6134         switch (propertyEnum) {
6135         case UProperty.AGE: return getAge(codepoint).toString();
6136         case UProperty.ISO_COMMENT: return getISOComment(codepoint);
6137         case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint));
6138         case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true));
6139         case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
6140         case UProperty.NAME: return getName(codepoint);
6141         case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true));
6142         case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
6143         case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
6144         case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
6145         case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
6146         case UProperty.UNICODE_1_NAME: return getName1_0(codepoint);
6147         case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
6148         }
6149         throw new IllegalArgumentException("Illegal Property Enum");
6150     }
6151     ///CLOVER:ON
6152 
6153     /**
6154      * {@icu} Returns the minimum value for an integer/binary Unicode property type.
6155      * Can be used together with UCharacter.getIntPropertyMaxValue(int)
6156      * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
6157      * @param type UProperty selector constant, identifies which binary
6158      *        property to check. Must be
6159      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
6160      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
6161      * @return Minimum value returned by UCharacter.getIntPropertyValue(int)
6162      *         for a Unicode property. 0 if the property
6163      *         selector 'type' is out of range.
6164      * @see UProperty
6165      * @see #hasBinaryProperty
6166      * @see #getUnicodeVersion
6167      * @see #getIntPropertyMaxValue
6168      * @see #getIntPropertyValue
6169      * @stable ICU 2.4
6170      */
getIntPropertyMinValue(int type)6171     public static int getIntPropertyMinValue(int type){
6172 
6173         return 0; // undefined; and: all other properties have a minimum value of 0
6174     }
6175 
6176 
6177     /**
6178      * {@icu} Returns the maximum value for an integer/binary Unicode property.
6179      * Can be used together with UCharacter.getIntPropertyMinValue(int)
6180      * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
6181      * Examples for min/max values (for Unicode 3.2):
6182      * <ul>
6183      * <li> UProperty.BIDI_CLASS:    0/18
6184      * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL)
6185      * <li> UProperty.SCRIPT:        0/45 (UScript.COMMON/UScript.TAGBANWA)
6186      * <li> UProperty.IDEOGRAPHIC:   0/1  (false/true)
6187      * </ul>
6188      * For undefined UProperty constant values, min/max values will be 0/-1.
6189      * @param type UProperty selector constant, identifies which binary
6190      *        property to check. Must be
6191      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
6192      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
6193      * @return Maximum value returned by u_getIntPropertyValue for a Unicode
6194      *         property. &lt;= 0 if the property selector 'type' is out of range.
6195      * @see UProperty
6196      * @see #hasBinaryProperty
6197      * @see #getUnicodeVersion
6198      * @see #getIntPropertyMaxValue
6199      * @see #getIntPropertyValue
6200      * @stable ICU 2.4
6201      */
getIntPropertyMaxValue(int type)6202     public static int getIntPropertyMaxValue(int type)
6203     {
6204         return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type);
6205     }
6206 
6207     /**
6208      * Provide the java.lang.Character forDigit API, for convenience.
6209      * @stable ICU 3.0
6210      */
forDigit(int digit, int radix)6211     public static char forDigit(int digit, int radix) {
6212         return java.lang.Character.forDigit(digit, radix);
6213     }
6214 
6215     // JDK 1.5 API coverage
6216 
6217     /**
6218      * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}.
6219      *
6220      * @stable ICU 3.0
6221      */
6222     public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE;
6223 
6224     /**
6225      * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}.
6226      *
6227      * @stable ICU 3.0
6228      */
6229     public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE;
6230 
6231     /**
6232      * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}.
6233      *
6234      * @stable ICU 3.0
6235      */
6236     public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE;
6237 
6238     /**
6239      * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}.
6240      *
6241      * @stable ICU 3.0
6242      */
6243     public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE;
6244 
6245     /**
6246      * Constant U+D800, same as {@link Character#MIN_SURROGATE}.
6247      *
6248      * @stable ICU 3.0
6249      */
6250     public static final char MIN_SURROGATE = Character.MIN_SURROGATE;
6251 
6252     /**
6253      * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}.
6254      *
6255      * @stable ICU 3.0
6256      */
6257     public static final char MAX_SURROGATE = Character.MAX_SURROGATE;
6258 
6259     /**
6260      * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
6261      *
6262      * @stable ICU 3.0
6263      */
6264     public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT;
6265 
6266     /**
6267      * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}.
6268      *
6269      * @stable ICU 3.0
6270      */
6271     public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT;
6272 
6273     /**
6274      * Constant U+0000, same as {@link Character#MIN_CODE_POINT}.
6275      *
6276      * @stable ICU 3.0
6277      */
6278     public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT;
6279 
6280     /**
6281      * Equivalent to {@link Character#isValidCodePoint}.
6282      *
6283      * @param cp the code point to check
6284      * @return true if cp is a valid code point
6285      * @stable ICU 3.0
6286      */
isValidCodePoint(int cp)6287     public static final boolean isValidCodePoint(int cp) {
6288         return cp >= 0 && cp <= MAX_CODE_POINT;
6289     }
6290 
6291     /**
6292      * Same as {@link Character#isSupplementaryCodePoint}.
6293      *
6294      * @param cp the code point to check
6295      * @return true if cp is a supplementary code point
6296      * @stable ICU 3.0
6297      */
isSupplementaryCodePoint(int cp)6298     public static final boolean isSupplementaryCodePoint(int cp) {
6299         return Character.isSupplementaryCodePoint(cp);
6300     }
6301 
6302     /**
6303      * Same as {@link Character#isHighSurrogate},
6304      * except that the ICU version accepts <code>int</code> for code points.
6305      *
6306      * @param codePoint the code point to check
6307      *        (In ICU 3.0-69 the type of this parameter was <code>char</code>.)
6308      * @return true if codePoint is a high (lead) surrogate
6309      * @stable ICU 70
6310      */
isHighSurrogate(int codePoint)6311     public static boolean isHighSurrogate(int codePoint) {
6312         return (codePoint & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS;
6313     }
6314 
6315     // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655
6316     /**
6317      * Same as {@link Character#isHighSurrogate},
6318      *
6319      * @param ch the char to check
6320      * @return true if ch is a high (lead) surrogate
6321      * @stable ICU 3.0
6322      */
isHighSurrogate(char ch)6323     public static boolean isHighSurrogate(char ch) {
6324         return isHighSurrogate((int) ch);
6325     }
6326     // END Android patch: Keep the `char` version on Android. See ICU-21655
6327 
6328     /**
6329      * Same as {@link Character#isLowSurrogate},
6330      * except that the ICU version accepts <code>int</code> for code points.
6331      *
6332      * @param codePoint the code point to check
6333      *        (In ICU 3.0-69 the type of this parameter was <code>char</code>.)
6334      * @return true if codePoint is a low (trail) surrogate
6335      * @stable ICU 70
6336      */
isLowSurrogate(int codePoint)6337     public static boolean isLowSurrogate(int codePoint) {
6338         return (codePoint & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS;
6339     }
6340 
6341     // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655
6342     /**
6343      * Same as {@link Character#isLowSurrogate},
6344      *
6345      * @param ch the char to check
6346      * @return true if ch is a low (trail) surrogate
6347      * @stable ICU 3.0
6348      */
isLowSurrogate(char ch)6349     public static boolean isLowSurrogate(char ch) {
6350         return isLowSurrogate((int) ch);
6351     }
6352     // END Android patch: Keep the `char` version on Android. See ICU-21655
6353 
6354     /**
6355      * Same as {@link Character#isSurrogatePair},
6356      * except that the ICU version accepts <code>int</code> for code points.
6357      *
6358      * @param high the high (lead) unit
6359      *        (In ICU 3.0-69 the type of both parameters was <code>char</code>.)
6360      * @param low the low (trail) unit
6361      * @return true if high, low form a surrogate pair
6362      * @stable ICU 70
6363      */
isSurrogatePair(int high, int low)6364     public static final boolean isSurrogatePair(int high, int low) {
6365         return isHighSurrogate(high) && isLowSurrogate(low);
6366     }
6367 
6368     // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655
6369     /**
6370      * Same as {@link Character#isSurrogatePair}.
6371      *
6372      * @param high the high (lead) char
6373      * @param low the low (trail) char
6374      * @return true if high, low form a surrogate pair
6375      * @stable ICU 3.0
6376      */
isSurrogatePair(char high, char low)6377     public static final boolean isSurrogatePair(char high, char low) {
6378         return isSurrogatePair((int) high, (int) low);
6379     }
6380     // END Android patch: Keep the `char` version on Android. See ICU-21655
6381 
6382     /**
6383      * Same as {@link Character#charCount}.
6384      * Returns the number of chars needed to represent the code point (1 or 2).
6385      * This does not check the code point for validity.
6386      *
6387      * @param cp the code point to check
6388      * @return the number of chars needed to represent the code point
6389      * @stable ICU 3.0
6390      */
charCount(int cp)6391     public static int charCount(int cp) {
6392         return Character.charCount(cp);
6393     }
6394 
6395     /**
6396      * Same as {@link Character#toCodePoint},
6397      * except that the ICU version accepts <code>int</code> for code points.
6398      * Returns the code point represented by the two surrogate code units.
6399      * This does not check the surrogate pair for validity.
6400      *
6401      * @param high the high (lead) surrogate
6402      *        (In ICU 3.0-69 the type of both parameters was <code>char</code>.)
6403      * @param low the low (trail) surrogate
6404      * @return the code point formed by the surrogate pair
6405      * @stable ICU 70
6406      * @see #getCodePoint(int, int)
6407      */
toCodePoint(int high, int low)6408     public static final int toCodePoint(int high, int low) {
6409         // see ICU4C U16_GET_SUPPLEMENTARY()
6410         return (high << 10) + low - U16_SURROGATE_OFFSET;
6411     }
6412 
6413     // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655
6414     /**
6415      * Same as {@link Character#toCodePoint}.
6416      * Returns the code point represented by the two surrogate code units.
6417      * This does not check the surrogate pair for validity.
6418      *
6419      * @param high the high (lead) surrogate
6420      * @param low the low (trail) surrogate
6421      * @return the code point formed by the surrogate pair
6422      * @stable ICU 3.0
6423      */
toCodePoint(char high, char low)6424     public static final int toCodePoint(char high, char low) {
6425         return toCodePoint((int) high, (int) low);
6426     }
6427     // END Android patch: Keep the `char` version on Android. See ICU-21655
6428 
6429     /**
6430      * Same as {@link Character#codePointAt(CharSequence, int)}.
6431      * Returns the code point at index.
6432      * This examines only the characters at index and index+1.
6433      *
6434      * @param seq the characters to check
6435      * @param index the index of the first or only char forming the code point
6436      * @return the code point at the index
6437      * @stable ICU 3.0
6438      */
codePointAt(CharSequence seq, int index)6439     public static final int codePointAt(CharSequence seq, int index) {
6440         char c1 = seq.charAt(index++);
6441         if (isHighSurrogate(c1)) {
6442             if (index < seq.length()) {
6443                 char c2 = seq.charAt(index);
6444                 if (isLowSurrogate(c2)) {
6445                     return toCodePoint(c1, c2);
6446                 }
6447             }
6448         }
6449         return c1;
6450     }
6451 
6452     /**
6453      * Same as {@link Character#codePointAt(char[], int)}.
6454      * Returns the code point at index.
6455      * This examines only the characters at index and index+1.
6456      *
6457      * @param text the characters to check
6458      * @param index the index of the first or only char forming the code point
6459      * @return the code point at the index
6460      * @stable ICU 3.0
6461      */
codePointAt(char[] text, int index)6462     public static final int codePointAt(char[] text, int index) {
6463         char c1 = text[index++];
6464         if (isHighSurrogate(c1)) {
6465             if (index < text.length) {
6466                 char c2 = text[index];
6467                 if (isLowSurrogate(c2)) {
6468                     return toCodePoint(c1, c2);
6469                 }
6470             }
6471         }
6472         return c1;
6473     }
6474 
6475     /**
6476      * Same as {@link Character#codePointAt(char[], int, int)}.
6477      * Returns the code point at index.
6478      * This examines only the characters at index and index+1.
6479      *
6480      * @param text the characters to check
6481      * @param index the index of the first or only char forming the code point
6482      * @param limit the limit of the valid text
6483      * @return the code point at the index
6484      * @stable ICU 3.0
6485      */
codePointAt(char[] text, int index, int limit)6486     public static final int codePointAt(char[] text, int index, int limit) {
6487         if (index >= limit || limit > text.length) {
6488             throw new IndexOutOfBoundsException();
6489         }
6490         char c1 = text[index++];
6491         if (isHighSurrogate(c1)) {
6492             if (index < limit) {
6493                 char c2 = text[index];
6494                 if (isLowSurrogate(c2)) {
6495                     return toCodePoint(c1, c2);
6496                 }
6497             }
6498         }
6499         return c1;
6500     }
6501 
6502     /**
6503      * Same as {@link Character#codePointBefore(CharSequence, int)}.
6504      * Return the code point before index.
6505      * This examines only the characters at index-1 and index-2.
6506      *
6507      * @param seq the characters to check
6508      * @param index the index after the last or only char forming the code point
6509      * @return the code point before the index
6510      * @stable ICU 3.0
6511      */
codePointBefore(CharSequence seq, int index)6512     public static final int codePointBefore(CharSequence seq, int index) {
6513         char c2 = seq.charAt(--index);
6514         if (isLowSurrogate(c2)) {
6515             if (index > 0) {
6516                 char c1 = seq.charAt(--index);
6517                 if (isHighSurrogate(c1)) {
6518                     return toCodePoint(c1, c2);
6519                 }
6520             }
6521         }
6522         return c2;
6523     }
6524 
6525     /**
6526      * Same as {@link Character#codePointBefore(char[], int)}.
6527      * Returns the code point before index.
6528      * This examines only the characters at index-1 and index-2.
6529      *
6530      * @param text the characters to check
6531      * @param index the index after the last or only char forming the code point
6532      * @return the code point before the index
6533      * @stable ICU 3.0
6534      */
codePointBefore(char[] text, int index)6535     public static final int codePointBefore(char[] text, int index) {
6536         char c2 = text[--index];
6537         if (isLowSurrogate(c2)) {
6538             if (index > 0) {
6539                 char c1 = text[--index];
6540                 if (isHighSurrogate(c1)) {
6541                     return toCodePoint(c1, c2);
6542                 }
6543             }
6544         }
6545         return c2;
6546     }
6547 
6548     /**
6549      * Same as {@link Character#codePointBefore(char[], int, int)}.
6550      * Return the code point before index.
6551      * This examines only the characters at index-1 and index-2.
6552      *
6553      * @param text the characters to check
6554      * @param index the index after the last or only char forming the code point
6555      * @param limit the start of the valid text
6556      * @return the code point before the index
6557      * @stable ICU 3.0
6558      */
codePointBefore(char[] text, int index, int limit)6559     public static final int codePointBefore(char[] text, int index, int limit) {
6560         if (index <= limit || limit < 0) {
6561             throw new IndexOutOfBoundsException();
6562         }
6563         char c2 = text[--index];
6564         if (isLowSurrogate(c2)) {
6565             if (index > limit) {
6566                 char c1 = text[--index];
6567                 if (isHighSurrogate(c1)) {
6568                     return toCodePoint(c1, c2);
6569                 }
6570             }
6571         }
6572         return c2;
6573     }
6574 
6575     /**
6576      * Same as {@link Character#toChars(int, char[], int)}.
6577      * Writes the chars representing the
6578      * code point into the destination at the given index.
6579      *
6580      * @param cp the code point to convert
6581      * @param dst the destination array into which to put the char(s) representing the code point
6582      * @param dstIndex the index at which to put the first (or only) char
6583      * @return the count of the number of chars written (1 or 2)
6584      * @throws IllegalArgumentException if cp is not a valid code point
6585      * @stable ICU 3.0
6586      */
toChars(int cp, char[] dst, int dstIndex)6587     public static final int toChars(int cp, char[] dst, int dstIndex) {
6588         return Character.toChars(cp, dst, dstIndex);
6589     }
6590 
6591     /**
6592      * Same as {@link Character#toChars(int)}.
6593      * Returns a char array representing the code point.
6594      *
6595      * @param cp the code point to convert
6596      * @return an array containing the char(s) representing the code point
6597      * @throws IllegalArgumentException if cp is not a valid code point
6598      * @stable ICU 3.0
6599      */
toChars(int cp)6600     public static final char[] toChars(int cp) {
6601         return Character.toChars(cp);
6602     }
6603 
6604     /**
6605      * Equivalent to the {@link Character#getDirectionality(char)} method, for
6606      * convenience. Returns a byte representing the directionality of the
6607      * character.
6608      *
6609      * {@icunote} Unlike {@link Character#getDirectionality(char)}, this returns
6610      * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters.
6611      *
6612      * {@icunote} The return value must be tested using the constants defined in {@link
6613      * UCharacterDirection} and its interface {@link
6614      * UCharacterEnums.ECharacterDirection} since the values are different from the ones
6615      * defined by <code>java.lang.Character</code>.
6616      * @param cp the code point to check
6617      * @return the directionality of the code point
6618      * @see #getDirection
6619      * @stable ICU 3.0
6620      */
getDirectionality(int cp)6621     public static byte getDirectionality(int cp)
6622     {
6623         return (byte)getDirection(cp);
6624     }
6625 
6626     /**
6627      * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)}
6628      * method, for convenience.  Counts the number of code points in the range
6629      * of text.
6630      * @param text the characters to check
6631      * @param start the start of the range
6632      * @param limit the limit of the range
6633      * @return the number of code points in the range
6634      * @stable ICU 3.0
6635      */
codePointCount(CharSequence text, int start, int limit)6636     public static int codePointCount(CharSequence text, int start, int limit) {
6637         if (start < 0 || limit < start || limit > text.length()) {
6638             throw new IndexOutOfBoundsException("start (" + start +
6639                     ") or limit (" + limit +
6640                     ") invalid or out of range 0, " + text.length());
6641         }
6642 
6643         int len = limit - start;
6644         while (limit > start) {
6645             char ch = text.charAt(--limit);
6646             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
6647                 ch = text.charAt(--limit);
6648                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
6649                     --len;
6650                     break;
6651                 }
6652             }
6653         }
6654         return len;
6655     }
6656 
6657     /**
6658      * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for
6659      * convenience. Counts the number of code points in the range of text.
6660      * @param text the characters to check
6661      * @param start the start of the range
6662      * @param limit the limit of the range
6663      * @return the number of code points in the range
6664      * @stable ICU 3.0
6665      */
codePointCount(char[] text, int start, int limit)6666     public static int codePointCount(char[] text, int start, int limit) {
6667         if (start < 0 || limit < start || limit > text.length) {
6668             throw new IndexOutOfBoundsException("start (" + start +
6669                     ") or limit (" + limit +
6670                     ") invalid or out of range 0, " + text.length);
6671         }
6672 
6673         int len = limit - start;
6674         while (limit > start) {
6675             char ch = text[--limit];
6676             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
6677                 ch = text[--limit];
6678                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
6679                     --len;
6680                     break;
6681                 }
6682             }
6683         }
6684         return len;
6685     }
6686 
6687     /**
6688      * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)}
6689      * method, for convenience.  Adjusts the char index by a code point offset.
6690      * @param text the characters to check
6691      * @param index the index to adjust
6692      * @param codePointOffset the number of code points by which to offset the index
6693      * @return the adjusted index
6694      * @stable ICU 3.0
6695      */
offsetByCodePoints(CharSequence text, int index, int codePointOffset)6696     public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) {
6697         if (index < 0 || index > text.length()) {
6698             throw new IndexOutOfBoundsException("index ( " + index +
6699                     ") out of range 0, " + text.length());
6700         }
6701 
6702         if (codePointOffset < 0) {
6703             while (++codePointOffset <= 0) {
6704                 char ch = text.charAt(--index);
6705                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) {
6706                     ch = text.charAt(--index);
6707                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
6708                         if (++codePointOffset > 0) {
6709                             return index+1;
6710                         }
6711                     }
6712                 }
6713             }
6714         } else {
6715             int limit = text.length();
6716             while (--codePointOffset >= 0) {
6717                 char ch = text.charAt(index++);
6718                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
6719                     ch = text.charAt(index++);
6720                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
6721                         if (--codePointOffset < 0) {
6722                             return index-1;
6723                         }
6724                     }
6725                 }
6726             }
6727         }
6728 
6729         return index;
6730     }
6731 
6732     /**
6733      * Equivalent to the
6734      * {@link Character#offsetByCodePoints(char[], int, int, int, int)}
6735      * method, for convenience.  Adjusts the char index by a code point offset.
6736      * @param text the characters to check
6737      * @param start the start of the range to check
6738      * @param count the length of the range to check
6739      * @param index the index to adjust
6740      * @param codePointOffset the number of code points by which to offset the index
6741      * @return the adjusted index
6742      * @stable ICU 3.0
6743      */
offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset)6744     public static int offsetByCodePoints(char[] text, int start, int count, int index,
6745             int codePointOffset) {
6746         int limit = start + count;
6747         if (start < 0 || limit < start || limit > text.length || index < start || index > limit) {
6748             throw new IndexOutOfBoundsException("index ( " + index +
6749                     ") out of range " + start +
6750                     ", " + limit +
6751                     " in array 0, " + text.length);
6752         }
6753 
6754         if (codePointOffset < 0) {
6755             while (++codePointOffset <= 0) {
6756                 char ch = text[--index];
6757                 if (index < start) {
6758                     throw new IndexOutOfBoundsException("index ( " + index +
6759                             ") < start (" + start +
6760                             ")");
6761                 }
6762                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) {
6763                     ch = text[--index];
6764                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
6765                         if (++codePointOffset > 0) {
6766                             return index+1;
6767                         }
6768                     }
6769                 }
6770             }
6771         } else {
6772             while (--codePointOffset >= 0) {
6773                 char ch = text[index++];
6774                 if (index > limit) {
6775                     throw new IndexOutOfBoundsException("index ( " + index +
6776                             ") > limit (" + limit +
6777                             ")");
6778                 }
6779                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
6780                     ch = text[index++];
6781                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
6782                         if (--codePointOffset < 0) {
6783                             return index-1;
6784                         }
6785                     }
6786                 }
6787             }
6788         }
6789 
6790         return index;
6791     }
6792 
6793     // private variables -------------------------------------------------
6794 
6795     /**
6796      * To get the last character out from a data type
6797      */
6798     private static final int LAST_CHAR_MASK_ = 0xFFFF;
6799 
6800     //    /**
6801     //     * To get the last byte out from a data type
6802     //     */
6803     //    private static final int LAST_BYTE_MASK_ = 0xFF;
6804     //
6805     //    /**
6806     //     * Shift 16 bits
6807     //     */
6808     //    private static final int SHIFT_16_ = 16;
6809     //
6810     //    /**
6811     //     * Shift 24 bits
6812     //     */
6813     //    private static final int SHIFT_24_ = 24;
6814     //
6815     //    /**
6816     //     * Decimal radix
6817     //     */
6818     //    private static final int DECIMAL_RADIX_ = 10;
6819 
6820     /**
6821      * No break space code point
6822      */
6823     private static final int NO_BREAK_SPACE_ = 0xA0;
6824 
6825     /**
6826      * Figure space code point
6827      */
6828     private static final int FIGURE_SPACE_ = 0x2007;
6829 
6830     /**
6831      * Narrow no break space code point
6832      */
6833     private static final int NARROW_NO_BREAK_SPACE_ = 0x202F;
6834 
6835     /**
6836      * Ideographic number zero code point
6837      */
6838     private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007;
6839 
6840     /**
6841      * CJK Ideograph, First code point
6842      */
6843     private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00;
6844 
6845     /**
6846      * CJK Ideograph, Second code point
6847      */
6848     private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c;
6849 
6850     /**
6851      * CJK Ideograph, Third code point
6852      */
6853     private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09;
6854 
6855     /**
6856      * CJK Ideograph, Fourth code point
6857      */
6858     private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db;
6859 
6860     /**
6861      * CJK Ideograph, FIFTH code point
6862      */
6863     private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94;
6864 
6865     /**
6866      * CJK Ideograph, Sixth code point
6867      */
6868     private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d;
6869 
6870     /**
6871      * CJK Ideograph, Seventh code point
6872      */
6873     private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03;
6874 
6875     /**
6876      * CJK Ideograph, Eighth code point
6877      */
6878     private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b;
6879 
6880     /**
6881      * CJK Ideograph, Nineth code point
6882      */
6883     private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d;
6884 
6885     /**
6886      * Application Program command code point
6887      */
6888     private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F;
6889 
6890     /**
6891      * Unit separator code point
6892      */
6893     private static final int UNIT_SEPARATOR_ = 0x001F;
6894 
6895     /**
6896      * Delete code point
6897      */
6898     private static final int DELETE_ = 0x007F;
6899 
6900     /**
6901      * Han digit characters
6902      */
6903     private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_     = 0x96f6;
6904     private static final int CJK_IDEOGRAPH_COMPLEX_ONE_      = 0x58f9;
6905     private static final int CJK_IDEOGRAPH_COMPLEX_TWO_      = 0x8cb3;
6906     private static final int CJK_IDEOGRAPH_COMPLEX_THREE_    = 0x53c3;
6907     private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_     = 0x8086;
6908     private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_     = 0x4f0d;
6909     private static final int CJK_IDEOGRAPH_COMPLEX_SIX_      = 0x9678;
6910     private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_    = 0x67d2;
6911     private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_    = 0x634c;
6912     private static final int CJK_IDEOGRAPH_COMPLEX_NINE_     = 0x7396;
6913     private static final int CJK_IDEOGRAPH_TEN_              = 0x5341;
6914     private static final int CJK_IDEOGRAPH_COMPLEX_TEN_      = 0x62fe;
6915     private static final int CJK_IDEOGRAPH_HUNDRED_          = 0x767e;
6916     private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_  = 0x4f70;
6917     private static final int CJK_IDEOGRAPH_THOUSAND_         = 0x5343;
6918     private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf;
6919     private static final int CJK_IDEOGRAPH_TEN_THOUSAND_     = 0x824c;
6920     private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_  = 0x5104;
6921 
6922     // private constructor -----------------------------------------------
6923     ///CLOVER:OFF
6924     /**
6925      * Private constructor to prevent instantiation
6926      */
UCharacter()6927     private UCharacter()
6928     {
6929     }
6930     ///CLOVER:ON
6931 }
6932