• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /**
4  *******************************************************************************
5  * Copyright (C) 1996-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  */
9 
10 package com.ibm.icu.lang;
11 
12 import java.lang.ref.SoftReference;
13 import java.util.EnumSet;
14 import java.util.HashMap;
15 import java.util.Iterator;
16 import java.util.Locale;
17 import java.util.Map;
18 
19 import com.ibm.icu.impl.CaseMapImpl;
20 import com.ibm.icu.impl.EmojiProps;
21 import com.ibm.icu.impl.IllegalIcuArgumentException;
22 import com.ibm.icu.impl.Trie2;
23 import com.ibm.icu.impl.UBiDiProps;
24 import com.ibm.icu.impl.UCaseProps;
25 import com.ibm.icu.impl.UCharacterName;
26 import com.ibm.icu.impl.UCharacterNameChoice;
27 import com.ibm.icu.impl.UCharacterProperty;
28 import com.ibm.icu.impl.UCharacterUtility;
29 import com.ibm.icu.impl.UPropertyAliases;
30 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
31 import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection;
32 import com.ibm.icu.text.BreakIterator;
33 import com.ibm.icu.text.Normalizer2;
34 import com.ibm.icu.util.RangeValueIterator;
35 import com.ibm.icu.util.ULocale;
36 import com.ibm.icu.util.ValueIterator;
37 import com.ibm.icu.util.VersionInfo;
38 
39 /**
40  * {@icuenhanced java.lang.Character}.{@icu _usage_}
41  *
42  * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class.
43  * These extensions provide support for more Unicode properties.
44  * Each ICU release supports the latest version of Unicode available at that time.
45  *
46  * <p>For some time before Java 5 added support for supplementary Unicode code points,
47  * The ICU UCharacter class and many other ICU classes already supported them.
48  * Some UCharacter methods and constants were widened slightly differently than
49  * how the Character class methods and constants were widened later.
50  * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF,
51  * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF.
52  *
53  * <p>Code points are represented in these API using ints. While it would be
54  * more convenient in Java to have a separate primitive datatype for them,
55  * ints suffice in the meantime.
56  *
57  * <p>To use this class please add the jar file name icu4j.jar to the
58  * class path, since it contains data files which supply the information used
59  * by this file.<br>
60  * E.g. In Windows <br>
61  * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br>
62  * Otherwise, another method would be to copy the files uprops.dat and
63  * unames.icu from the icu4j source subdirectory
64  * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory
65  * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>.
66  *
67  * <p>Aside from the additions for UTF-16 support, and the updated Unicode
68  * properties, the main differences between UCharacter and Character are:
69  * <ul>
70  * <li> UCharacter is not designed to be a char wrapper and does not have
71  *      APIs to which involves management of that single char.<br>
72  *      These include:
73  *      <ul>
74  *        <li> char charValue(),
75  *        <li> int compareTo(java.lang.Character, java.lang.Character), etc.
76  *      </ul>
77  * <li> UCharacter does not include Character APIs that are deprecated, nor
78  *      does it include the Java-specific character information, such as
79  *      boolean isJavaIdentifierPart(char ch).
80  * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric
81  *      values '10' - '35'. UCharacter also does this in digit and
82  *      getNumericValue, to adhere to the java semantics of these
83  *      methods.  New methods unicodeDigit, and
84  *      getUnicodeNumericValue do not treat the above code points
85  *      as having numeric values.  This is a semantic change from ICU4J 1.3.1.
86  * </ul>
87  * <p>
88  * Further detail on differences can be determined using the program
89  *        <a href=
90  * "https://github.com/unicode-org/icu/blob/main/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
91  *        com.ibm.icu.dev.test.lang.UCharacterCompare</a>
92  * <p>
93  * In addition to Java compatibility functions, which calculate derived properties,
94  * this API provides low-level access to the Unicode Character Database.
95  * <p>
96  * Unicode assigns each code point (not just assigned character) values for
97  * many properties.
98  * Most of them are simple boolean flags, or constants from a small enumerated list.
99  * For some properties, values are strings or other relatively more complex types.
100  * <p>
101  * For more information see
102  * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a>
103  * (http://www.unicode.org/ucd/)
104  * and the <a href="https://unicode-org.github.io/icu/userguide/strings/properties">ICU
105  * User Guide chapter on Properties</a>
106  * (https://unicode-org.github.io/icu/userguide/strings/properties).
107  * <p>
108  * There are also functions that provide easy migration from C/POSIX functions
109  * like isblank(). Their use is generally discouraged because the C/POSIX
110  * standards do not define their semantics beyond the ASCII range, which means
111  * that different implementations exhibit very different behavior.
112  * Instead, Unicode properties should be used directly.
113  * <p>
114  * There are also only a few, broad C/POSIX character classes, and they tend
115  * to be used for conflicting purposes. For example, the "isalpha()" class
116  * is sometimes used to determine word boundaries, while a more sophisticated
117  * approach would at least distinguish initial letters from continuation
118  * characters (the latter including combining marks).
119  * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
120  * Another example: There is no "istitle()" class for titlecase characters.
121  * <p>
122  * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
123  * ICU implements them according to the Standard Recommendations in
124  * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
125  * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
126  * <p>
127  * API access for C/POSIX character classes is as follows:
128  * <pre>{@code
129  * - alpha:     isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC)
130  * - lower:     isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE)
131  * - upper:     isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE)
132  * - punct:     ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)|
133  *               (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)|
134  *               (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0
135  * - digit:     isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER
136  * - xdigit:    hasBinaryProperty(c, UProperty.POSIX_XDIGIT)
137  * - alnum:     hasBinaryProperty(c, UProperty.POSIX_ALNUM)
138  * - space:     isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE)
139  * - blank:     hasBinaryProperty(c, UProperty.POSIX_BLANK)
140  * - cntrl:     getType(c)==CONTROL
141  * - graph:     hasBinaryProperty(c, UProperty.POSIX_GRAPH)
142  * - print:     hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre>
143  * <p>
144  * The C/POSIX character classes are also available in UnicodeSet patterns,
145  * using patterns like [:graph:] or \p{graph}.
146  *
147  * <p>{@icunote} There are several ICU (and Java) whitespace functions.
148  * Comparison:<ul>
149  * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
150  *       most of general categories "Z" (separators) + most whitespace ISO controls
151  *       (including no-break spaces, but excluding IS1..IS4)
152  * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
153  * <li> isSpaceChar: just Z (including no-break spaces)</ul>
154  *
155  * <p>
156  * This class is not subclassable.
157  *
158  * @author Syn Wee Quek
159  * @stable ICU 2.1
160  * @see com.ibm.icu.lang.UCharacterEnums
161  */
162 
163 public final class UCharacter implements ECharacterCategory, ECharacterDirection
164 {
165     /**
166      * Lead surrogate bitmask
167      */
168     private static final int LEAD_SURROGATE_BITMASK = 0xFFFFFC00;
169 
170     /**
171      * Trail surrogate bitmask
172      */
173     private static final int TRAIL_SURROGATE_BITMASK = 0xFFFFFC00;
174 
175     /**
176      * Lead surrogate bits
177      */
178     private static final int LEAD_SURROGATE_BITS = 0xD800;
179 
180     /**
181      * Trail surrogate bits
182      */
183     private static final int TRAIL_SURROGATE_BITS = 0xDC00;
184 
185     private static final int U16_SURROGATE_OFFSET = ((0xd800 << 10) + 0xdc00 - 0x10000);
186 
187     // public inner classes ----------------------------------------------
188 
189     /**
190      * {@icuenhanced java.lang.Character.UnicodeBlock}.{@icu _usage_}
191      *
192      * A family of character subsets representing the character blocks in the
193      * Unicode specification, generated from Unicode Data file Blocks.txt.
194      * Character blocks generally define characters used for a specific script
195      * or purpose. A character is contained by at most one Unicode block.
196      *
197      * {@icunote} All fields named XXX_ID are specific to ICU.
198      *
199      * @stable ICU 2.4
200      */
201     public static final class UnicodeBlock extends Character.Subset
202     {
203         // block id corresponding to icu4c -----------------------------------
204 
205         /**
206          * @stable ICU 2.4
207          */
208         public static final int INVALID_CODE_ID = -1;
209         /**
210          * @stable ICU 2.4
211          */
212         public static final int BASIC_LATIN_ID = 1;
213         /**
214          * @stable ICU 2.4
215          */
216         public static final int LATIN_1_SUPPLEMENT_ID = 2;
217         /**
218          * @stable ICU 2.4
219          */
220         public static final int LATIN_EXTENDED_A_ID = 3;
221         /**
222          * @stable ICU 2.4
223          */
224         public static final int LATIN_EXTENDED_B_ID = 4;
225         /**
226          * @stable ICU 2.4
227          */
228         public static final int IPA_EXTENSIONS_ID = 5;
229         /**
230          * @stable ICU 2.4
231          */
232         public static final int SPACING_MODIFIER_LETTERS_ID = 6;
233         /**
234          * @stable ICU 2.4
235          */
236         public static final int COMBINING_DIACRITICAL_MARKS_ID = 7;
237         /**
238          * Unicode 3.2 renames this block to "Greek and Coptic".
239          * @stable ICU 2.4
240          */
241         public static final int GREEK_ID = 8;
242         /**
243          * @stable ICU 2.4
244          */
245         public static final int CYRILLIC_ID = 9;
246         /**
247          * @stable ICU 2.4
248          */
249         public static final int ARMENIAN_ID = 10;
250         /**
251          * @stable ICU 2.4
252          */
253         public static final int HEBREW_ID = 11;
254         /**
255          * @stable ICU 2.4
256          */
257         public static final int ARABIC_ID = 12;
258         /**
259          * @stable ICU 2.4
260          */
261         public static final int SYRIAC_ID = 13;
262         /**
263          * @stable ICU 2.4
264          */
265         public static final int THAANA_ID = 14;
266         /**
267          * @stable ICU 2.4
268          */
269         public static final int DEVANAGARI_ID = 15;
270         /**
271          * @stable ICU 2.4
272          */
273         public static final int BENGALI_ID = 16;
274         /**
275          * @stable ICU 2.4
276          */
277         public static final int GURMUKHI_ID = 17;
278         /**
279          * @stable ICU 2.4
280          */
281         public static final int GUJARATI_ID = 18;
282         /**
283          * @stable ICU 2.4
284          */
285         public static final int ORIYA_ID = 19;
286         /**
287          * @stable ICU 2.4
288          */
289         public static final int TAMIL_ID = 20;
290         /**
291          * @stable ICU 2.4
292          */
293         public static final int TELUGU_ID = 21;
294         /**
295          * @stable ICU 2.4
296          */
297         public static final int KANNADA_ID = 22;
298         /**
299          * @stable ICU 2.4
300          */
301         public static final int MALAYALAM_ID = 23;
302         /**
303          * @stable ICU 2.4
304          */
305         public static final int SINHALA_ID = 24;
306         /**
307          * @stable ICU 2.4
308          */
309         public static final int THAI_ID = 25;
310         /**
311          * @stable ICU 2.4
312          */
313         public static final int LAO_ID = 26;
314         /**
315          * @stable ICU 2.4
316          */
317         public static final int TIBETAN_ID = 27;
318         /**
319          * @stable ICU 2.4
320          */
321         public static final int MYANMAR_ID = 28;
322         /**
323          * @stable ICU 2.4
324          */
325         public static final int GEORGIAN_ID = 29;
326         /**
327          * @stable ICU 2.4
328          */
329         public static final int HANGUL_JAMO_ID = 30;
330         /**
331          * @stable ICU 2.4
332          */
333         public static final int ETHIOPIC_ID = 31;
334         /**
335          * @stable ICU 2.4
336          */
337         public static final int CHEROKEE_ID = 32;
338         /**
339          * @stable ICU 2.4
340          */
341         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33;
342         /**
343          * @stable ICU 2.4
344          */
345         public static final int OGHAM_ID = 34;
346         /**
347          * @stable ICU 2.4
348          */
349         public static final int RUNIC_ID = 35;
350         /**
351          * @stable ICU 2.4
352          */
353         public static final int KHMER_ID = 36;
354         /**
355          * @stable ICU 2.4
356          */
357         public static final int MONGOLIAN_ID = 37;
358         /**
359          * @stable ICU 2.4
360          */
361         public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38;
362         /**
363          * @stable ICU 2.4
364          */
365         public static final int GREEK_EXTENDED_ID = 39;
366         /**
367          * @stable ICU 2.4
368          */
369         public static final int GENERAL_PUNCTUATION_ID = 40;
370         /**
371          * @stable ICU 2.4
372          */
373         public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41;
374         /**
375          * @stable ICU 2.4
376          */
377         public static final int CURRENCY_SYMBOLS_ID = 42;
378         /**
379          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
380          * Symbols".
381          * @stable ICU 2.4
382          */
383         public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43;
384         /**
385          * @stable ICU 2.4
386          */
387         public static final int LETTERLIKE_SYMBOLS_ID = 44;
388         /**
389          * @stable ICU 2.4
390          */
391         public static final int NUMBER_FORMS_ID = 45;
392         /**
393          * @stable ICU 2.4
394          */
395         public static final int ARROWS_ID = 46;
396         /**
397          * @stable ICU 2.4
398          */
399         public static final int MATHEMATICAL_OPERATORS_ID = 47;
400         /**
401          * @stable ICU 2.4
402          */
403         public static final int MISCELLANEOUS_TECHNICAL_ID = 48;
404         /**
405          * @stable ICU 2.4
406          */
407         public static final int CONTROL_PICTURES_ID = 49;
408         /**
409          * @stable ICU 2.4
410          */
411         public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50;
412         /**
413          * @stable ICU 2.4
414          */
415         public static final int ENCLOSED_ALPHANUMERICS_ID = 51;
416         /**
417          * @stable ICU 2.4
418          */
419         public static final int BOX_DRAWING_ID = 52;
420         /**
421          * @stable ICU 2.4
422          */
423         public static final int BLOCK_ELEMENTS_ID = 53;
424         /**
425          * @stable ICU 2.4
426          */
427         public static final int GEOMETRIC_SHAPES_ID = 54;
428         /**
429          * @stable ICU 2.4
430          */
431         public static final int MISCELLANEOUS_SYMBOLS_ID = 55;
432         /**
433          * @stable ICU 2.4
434          */
435         public static final int DINGBATS_ID = 56;
436         /**
437          * @stable ICU 2.4
438          */
439         public static final int BRAILLE_PATTERNS_ID = 57;
440         /**
441          * @stable ICU 2.4
442          */
443         public static final int CJK_RADICALS_SUPPLEMENT_ID = 58;
444         /**
445          * @stable ICU 2.4
446          */
447         public static final int KANGXI_RADICALS_ID = 59;
448         /**
449          * @stable ICU 2.4
450          */
451         public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60;
452         /**
453          * @stable ICU 2.4
454          */
455         public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61;
456         /**
457          * @stable ICU 2.4
458          */
459         public static final int HIRAGANA_ID = 62;
460         /**
461          * @stable ICU 2.4
462          */
463         public static final int KATAKANA_ID = 63;
464         /**
465          * @stable ICU 2.4
466          */
467         public static final int BOPOMOFO_ID = 64;
468         /**
469          * @stable ICU 2.4
470          */
471         public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65;
472         /**
473          * @stable ICU 2.4
474          */
475         public static final int KANBUN_ID = 66;
476         /**
477          * @stable ICU 2.4
478          */
479         public static final int BOPOMOFO_EXTENDED_ID = 67;
480         /**
481          * @stable ICU 2.4
482          */
483         public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68;
484         /**
485          * @stable ICU 2.4
486          */
487         public static final int CJK_COMPATIBILITY_ID = 69;
488         /**
489          * @stable ICU 2.4
490          */
491         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70;
492         /**
493          * @stable ICU 2.4
494          */
495         public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71;
496         /**
497          * @stable ICU 2.4
498          */
499         public static final int YI_SYLLABLES_ID = 72;
500         /**
501          * @stable ICU 2.4
502          */
503         public static final int YI_RADICALS_ID = 73;
504         /**
505          * @stable ICU 2.4
506          */
507         public static final int HANGUL_SYLLABLES_ID = 74;
508         /**
509          * @stable ICU 2.4
510          */
511         public static final int HIGH_SURROGATES_ID = 75;
512         /**
513          * @stable ICU 2.4
514          */
515         public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76;
516         /**
517          * @stable ICU 2.4
518          */
519         public static final int LOW_SURROGATES_ID = 77;
520         /**
521          * Same as public static final int PRIVATE_USE.
522          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
523          * and multiple code point ranges had this block.
524          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
525          * and adds separate blocks for the supplementary PUAs.
526          * @stable ICU 2.4
527          */
528         public static final int PRIVATE_USE_AREA_ID = 78;
529         /**
530          * Same as public static final int PRIVATE_USE_AREA.
531          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
532          * and multiple code point ranges had this block.
533          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
534          * and adds separate blocks for the supplementary PUAs.
535          * @stable ICU 2.4
536          */
537         public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID;
538         /**
539          * @stable ICU 2.4
540          */
541         public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79;
542         /**
543          * @stable ICU 2.4
544          */
545         public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80;
546         /**
547          * @stable ICU 2.4
548          */
549         public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81;
550         /**
551          * @stable ICU 2.4
552          */
553         public static final int COMBINING_HALF_MARKS_ID = 82;
554         /**
555          * @stable ICU 2.4
556          */
557         public static final int CJK_COMPATIBILITY_FORMS_ID = 83;
558         /**
559          * @stable ICU 2.4
560          */
561         public static final int SMALL_FORM_VARIANTS_ID = 84;
562         /**
563          * @stable ICU 2.4
564          */
565         public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85;
566         /**
567          * @stable ICU 2.4
568          */
569         public static final int SPECIALS_ID = 86;
570         /**
571          * @stable ICU 2.4
572          */
573         public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87;
574         /**
575          * @stable ICU 2.4
576          */
577         public static final int OLD_ITALIC_ID = 88;
578         /**
579          * @stable ICU 2.4
580          */
581         public static final int GOTHIC_ID = 89;
582         /**
583          * @stable ICU 2.4
584          */
585         public static final int DESERET_ID = 90;
586         /**
587          * @stable ICU 2.4
588          */
589         public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91;
590         /**
591          * @stable ICU 2.4
592          */
593         public static final int MUSICAL_SYMBOLS_ID = 92;
594         /**
595          * @stable ICU 2.4
596          */
597         public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93;
598         /**
599          * @stable ICU 2.4
600          */
601         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94;
602         /**
603          * @stable ICU 2.4
604          */
605         public static final int
606         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95;
607         /**
608          * @stable ICU 2.4
609          */
610         public static final int TAGS_ID = 96;
611 
612         // New blocks in Unicode 3.2
613 
614         /**
615          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
616          * @stable ICU 2.4
617          */
618         public static final int CYRILLIC_SUPPLEMENTARY_ID = 97;
619         /**
620          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
621          * @stable ICU 3.0
622          */
623 
624         public static final int CYRILLIC_SUPPLEMENT_ID = 97;
625         /**
626          * @stable ICU 2.4
627          */
628         public static final int TAGALOG_ID = 98;
629         /**
630          * @stable ICU 2.4
631          */
632         public static final int HANUNOO_ID = 99;
633         /**
634          * @stable ICU 2.4
635          */
636         public static final int BUHID_ID = 100;
637         /**
638          * @stable ICU 2.4
639          */
640         public static final int TAGBANWA_ID = 101;
641         /**
642          * @stable ICU 2.4
643          */
644         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102;
645         /**
646          * @stable ICU 2.4
647          */
648         public static final int SUPPLEMENTAL_ARROWS_A_ID = 103;
649         /**
650          * @stable ICU 2.4
651          */
652         public static final int SUPPLEMENTAL_ARROWS_B_ID = 104;
653         /**
654          * @stable ICU 2.4
655          */
656         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105;
657         /**
658          * @stable ICU 2.4
659          */
660         public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106;
661         /**
662          * @stable ICU 2.4
663          */
664         public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107;
665         /**
666          * @stable ICU 2.4
667          */
668         public static final int VARIATION_SELECTORS_ID = 108;
669         /**
670          * @stable ICU 2.4
671          */
672         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109;
673         /**
674          * @stable ICU 2.4
675          */
676         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110;
677 
678         /**
679          * @stable ICU 2.6
680          */
681         public static final int LIMBU_ID = 111; /*[1900]*/
682         /**
683          * @stable ICU 2.6
684          */
685         public static final int TAI_LE_ID = 112; /*[1950]*/
686         /**
687          * @stable ICU 2.6
688          */
689         public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/
690         /**
691          * @stable ICU 2.6
692          */
693         public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/
694         /**
695          * @stable ICU 2.6
696          */
697         public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/
698         /**
699          * @stable ICU 2.6
700          */
701         public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/
702         /**
703          * @stable ICU 2.6
704          */
705         public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/
706         /**
707          * @stable ICU 2.6
708          */
709         public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/
710         /**
711          * @stable ICU 2.6
712          */
713         public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/
714         /**
715          * @stable ICU 2.6
716          */
717         public static final int UGARITIC_ID = 120; /*[10380]*/
718         /**
719          * @stable ICU 2.6
720          */
721         public static final int SHAVIAN_ID = 121; /*[10450]*/
722         /**
723          * @stable ICU 2.6
724          */
725         public static final int OSMANYA_ID = 122; /*[10480]*/
726         /**
727          * @stable ICU 2.6
728          */
729         public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/
730         /**
731          * @stable ICU 2.6
732          */
733         public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/
734         /**
735          * @stable ICU 2.6
736          */
737         public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/
738 
739         /* New blocks in Unicode 4.1 */
740 
741         /**
742          * @stable ICU 3.4
743          */
744         public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/
745 
746         /**
747          * @stable ICU 3.4
748          */
749         public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/
750 
751         /**
752          * @stable ICU 3.4
753          */
754         public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/
755 
756         /**
757          * @stable ICU 3.4
758          */
759         public static final int BUGINESE_ID = 129; /*[1A00]*/
760 
761         /**
762          * @stable ICU 3.4
763          */
764         public static final int CJK_STROKES_ID = 130; /*[31C0]*/
765 
766         /**
767          * @stable ICU 3.4
768          */
769         public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/
770 
771         /**
772          * @stable ICU 3.4
773          */
774         public static final int COPTIC_ID = 132; /*[2C80]*/
775 
776         /**
777          * @stable ICU 3.4
778          */
779         public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/
780 
781         /**
782          * @stable ICU 3.4
783          */
784         public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/
785 
786         /**
787          * @stable ICU 3.4
788          */
789         public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/
790 
791         /**
792          * @stable ICU 3.4
793          */
794         public static final int GLAGOLITIC_ID = 136; /*[2C00]*/
795 
796         /**
797          * @stable ICU 3.4
798          */
799         public static final int KHAROSHTHI_ID = 137; /*[10A00]*/
800 
801         /**
802          * @stable ICU 3.4
803          */
804         public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/
805 
806         /**
807          * @stable ICU 3.4
808          */
809         public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/
810 
811         /**
812          * @stable ICU 3.4
813          */
814         public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/
815 
816         /**
817          * @stable ICU 3.4
818          */
819         public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/
820 
821         /**
822          * @stable ICU 3.4
823          */
824         public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/
825 
826         /**
827          * @stable ICU 3.4
828          */
829         public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/
830 
831         /**
832          * @stable ICU 3.4
833          */
834         public static final int TIFINAGH_ID = 144; /*[2D30]*/
835 
836         /**
837          * @stable ICU 3.4
838          */
839         public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/
840 
841         /* New blocks in Unicode 5.0 */
842 
843         /**
844          * @stable ICU 3.6
845          */
846         public static final int NKO_ID = 146; /*[07C0]*/
847         /**
848          * @stable ICU 3.6
849          */
850         public static final int BALINESE_ID = 147; /*[1B00]*/
851         /**
852          * @stable ICU 3.6
853          */
854         public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/
855         /**
856          * @stable ICU 3.6
857          */
858         public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/
859         /**
860          * @stable ICU 3.6
861          */
862         public static final int PHAGS_PA_ID = 150; /*[A840]*/
863         /**
864          * @stable ICU 3.6
865          */
866         public static final int PHOENICIAN_ID = 151; /*[10900]*/
867         /**
868          * @stable ICU 3.6
869          */
870         public static final int CUNEIFORM_ID = 152; /*[12000]*/
871         /**
872          * @stable ICU 3.6
873          */
874         public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/
875         /**
876          * @stable ICU 3.6
877          */
878         public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/
879 
880         /**
881          * @stable ICU 4.0
882          */
883         public static final int SUNDANESE_ID = 155; /* [1B80] */
884 
885         /**
886          * @stable ICU 4.0
887          */
888         public static final int LEPCHA_ID = 156; /* [1C00] */
889 
890         /**
891          * @stable ICU 4.0
892          */
893         public static final int OL_CHIKI_ID = 157; /* [1C50] */
894 
895         /**
896          * @stable ICU 4.0
897          */
898         public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */
899 
900         /**
901          * @stable ICU 4.0
902          */
903         public static final int VAI_ID = 159; /* [A500] */
904 
905         /**
906          * @stable ICU 4.0
907          */
908         public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */
909 
910         /**
911          * @stable ICU 4.0
912          */
913         public static final int SAURASHTRA_ID = 161; /* [A880] */
914 
915         /**
916          * @stable ICU 4.0
917          */
918         public static final int KAYAH_LI_ID = 162; /* [A900] */
919 
920         /**
921          * @stable ICU 4.0
922          */
923         public static final int REJANG_ID = 163; /* [A930] */
924 
925         /**
926          * @stable ICU 4.0
927          */
928         public static final int CHAM_ID = 164; /* [AA00] */
929 
930         /**
931          * @stable ICU 4.0
932          */
933         public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */
934 
935         /**
936          * @stable ICU 4.0
937          */
938         public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */
939 
940         /**
941          * @stable ICU 4.0
942          */
943         public static final int LYCIAN_ID = 167; /* [10280] */
944 
945         /**
946          * @stable ICU 4.0
947          */
948         public static final int CARIAN_ID = 168; /* [102A0] */
949 
950         /**
951          * @stable ICU 4.0
952          */
953         public static final int LYDIAN_ID = 169; /* [10920] */
954 
955         /**
956          * @stable ICU 4.0
957          */
958         public static final int MAHJONG_TILES_ID = 170; /* [1F000] */
959 
960         /**
961          * @stable ICU 4.0
962          */
963         public static final int DOMINO_TILES_ID = 171; /* [1F030] */
964 
965         /* New blocks in Unicode 5.2 */
966 
967         /** @stable ICU 4.4 */
968         public static final int SAMARITAN_ID = 172; /*[0800]*/
969         /** @stable ICU 4.4 */
970         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/
971         /** @stable ICU 4.4 */
972         public static final int TAI_THAM_ID = 174; /*[1A20]*/
973         /** @stable ICU 4.4 */
974         public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/
975         /** @stable ICU 4.4 */
976         public static final int LISU_ID = 176; /*[A4D0]*/
977         /** @stable ICU 4.4 */
978         public static final int BAMUM_ID = 177; /*[A6A0]*/
979         /** @stable ICU 4.4 */
980         public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/
981         /** @stable ICU 4.4 */
982         public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/
983         /** @stable ICU 4.4 */
984         public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/
985         /** @stable ICU 4.4 */
986         public static final int JAVANESE_ID = 181; /*[A980]*/
987         /** @stable ICU 4.4 */
988         public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/
989         /** @stable ICU 4.4 */
990         public static final int TAI_VIET_ID = 183; /*[AA80]*/
991         /** @stable ICU 4.4 */
992         public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/
993         /** @stable ICU 4.4 */
994         public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/
995         /** @stable ICU 4.4 */
996         public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/
997         /** @stable ICU 4.4 */
998         public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/
999         /** @stable ICU 4.4 */
1000         public static final int AVESTAN_ID = 188; /*[10B00]*/
1001         /** @stable ICU 4.4 */
1002         public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/
1003         /** @stable ICU 4.4 */
1004         public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/
1005         /** @stable ICU 4.4 */
1006         public static final int OLD_TURKIC_ID = 191; /*[10C00]*/
1007         /** @stable ICU 4.4 */
1008         public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/
1009         /** @stable ICU 4.4 */
1010         public static final int KAITHI_ID = 193; /*[11080]*/
1011         /** @stable ICU 4.4 */
1012         public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/
1013         /** @stable ICU 4.4 */
1014         public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/
1015         /** @stable ICU 4.4 */
1016         public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/
1017         /** @stable ICU 4.4 */
1018         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/
1019 
1020         /* New blocks in Unicode 6.0 */
1021 
1022         /** @stable ICU 4.6 */
1023         public static final int MANDAIC_ID = 198; /*[0840]*/
1024         /** @stable ICU 4.6 */
1025         public static final int BATAK_ID = 199; /*[1BC0]*/
1026         /** @stable ICU 4.6 */
1027         public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/
1028         /** @stable ICU 4.6 */
1029         public static final int BRAHMI_ID = 201; /*[11000]*/
1030         /** @stable ICU 4.6 */
1031         public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/
1032         /** @stable ICU 4.6 */
1033         public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/
1034         /** @stable ICU 4.6 */
1035         public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/
1036         /** @stable ICU 4.6 */
1037         public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/
1038         /** @stable ICU 4.6 */
1039         public static final int EMOTICONS_ID = 206; /*[1F600]*/
1040         /** @stable ICU 4.6 */
1041         public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/
1042         /** @stable ICU 4.6 */
1043         public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/
1044         /** @stable ICU 4.6 */
1045         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/
1046 
1047         /* New blocks in Unicode 6.1 */
1048 
1049         /** @stable ICU 49 */
1050         public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/
1051         /** @stable ICU 49 */
1052         public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/
1053         /** @stable ICU 49 */
1054         public static final int CHAKMA_ID = 212; /*[11100]*/
1055         /** @stable ICU 49 */
1056         public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/
1057         /** @stable ICU 49 */
1058         public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/
1059         /** @stable ICU 49 */
1060         public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/
1061         /** @stable ICU 49 */
1062         public static final int MIAO_ID = 216; /*[16F00]*/
1063         /** @stable ICU 49 */
1064         public static final int SHARADA_ID = 217; /*[11180]*/
1065         /** @stable ICU 49 */
1066         public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/
1067         /** @stable ICU 49 */
1068         public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/
1069         /** @stable ICU 49 */
1070         public static final int TAKRI_ID = 220; /*[11680]*/
1071 
1072         /* New blocks in Unicode 7.0 */
1073 
1074         /** @stable ICU 54 */
1075         public static final int BASSA_VAH_ID = 221; /*[16AD0]*/
1076         /** @stable ICU 54 */
1077         public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/
1078         /** @stable ICU 54 */
1079         public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/
1080         /** @stable ICU 54 */
1081         public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/
1082         /** @stable ICU 54 */
1083         public static final int DUPLOYAN_ID = 225; /*[1BC00]*/
1084         /** @stable ICU 54 */
1085         public static final int ELBASAN_ID = 226; /*[10500]*/
1086         /** @stable ICU 54 */
1087         public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/
1088         /** @stable ICU 54 */
1089         public static final int GRANTHA_ID = 228; /*[11300]*/
1090         /** @stable ICU 54 */
1091         public static final int KHOJKI_ID = 229; /*[11200]*/
1092         /** @stable ICU 54 */
1093         public static final int KHUDAWADI_ID = 230; /*[112B0]*/
1094         /** @stable ICU 54 */
1095         public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/
1096         /** @stable ICU 54 */
1097         public static final int LINEAR_A_ID = 232; /*[10600]*/
1098         /** @stable ICU 54 */
1099         public static final int MAHAJANI_ID = 233; /*[11150]*/
1100         /** @stable ICU 54 */
1101         public static final int MANICHAEAN_ID = 234; /*[10AC0]*/
1102         /** @stable ICU 54 */
1103         public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/
1104         /** @stable ICU 54 */
1105         public static final int MODI_ID = 236; /*[11600]*/
1106         /** @stable ICU 54 */
1107         public static final int MRO_ID = 237; /*[16A40]*/
1108         /** @stable ICU 54 */
1109         public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/
1110         /** @stable ICU 54 */
1111         public static final int NABATAEAN_ID = 239; /*[10880]*/
1112         /** @stable ICU 54 */
1113         public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/
1114         /** @stable ICU 54 */
1115         public static final int OLD_PERMIC_ID = 241; /*[10350]*/
1116         /** @stable ICU 54 */
1117         public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/
1118         /** @stable ICU 54 */
1119         public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/
1120         /** @stable ICU 54 */
1121         public static final int PALMYRENE_ID = 244; /*[10860]*/
1122         /** @stable ICU 54 */
1123         public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/
1124         /** @stable ICU 54 */
1125         public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/
1126         /** @stable ICU 54 */
1127         public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/
1128         /** @stable ICU 54 */
1129         public static final int SIDDHAM_ID = 248; /*[11580]*/
1130         /** @stable ICU 54 */
1131         public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/
1132         /** @stable ICU 54 */
1133         public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/
1134         /** @stable ICU 54 */
1135         public static final int TIRHUTA_ID = 251; /*[11480]*/
1136         /** @stable ICU 54 */
1137         public static final int WARANG_CITI_ID = 252; /*[118A0]*/
1138 
1139         /* New blocks in Unicode 8.0 */
1140 
1141         /** @stable ICU 56 */
1142         public static final int AHOM_ID = 253; /*[11700]*/
1143         /** @stable ICU 56 */
1144         public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/
1145         /** @stable ICU 56 */
1146         public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/
1147         /** @stable ICU 56 */
1148         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/
1149         /** @stable ICU 56 */
1150         public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/
1151         /** @stable ICU 56 */
1152         public static final int HATRAN_ID = 258; /*[108E0]*/
1153         /** @stable ICU 56 */
1154         public static final int MULTANI_ID = 259; /*[11280]*/
1155         /** @stable ICU 56 */
1156         public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/
1157         /** @stable ICU 56 */
1158         public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/
1159         /** @stable ICU 56 */
1160         public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/
1161 
1162         /* New blocks in Unicode 9.0 */
1163 
1164         /** @stable ICU 58 */
1165         public static final int ADLAM_ID = 263; /*[1E900]*/
1166         /** @stable ICU 58 */
1167         public static final int BHAIKSUKI_ID = 264; /*[11C00]*/
1168         /** @stable ICU 58 */
1169         public static final int CYRILLIC_EXTENDED_C_ID = 265; /*[1C80]*/
1170         /** @stable ICU 58 */
1171         public static final int GLAGOLITIC_SUPPLEMENT_ID = 266; /*[1E000]*/
1172         /** @stable ICU 58 */
1173         public static final int IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID = 267; /*[16FE0]*/
1174         /** @stable ICU 58 */
1175         public static final int MARCHEN_ID = 268; /*[11C70]*/
1176         /** @stable ICU 58 */
1177         public static final int MONGOLIAN_SUPPLEMENT_ID = 269; /*[11660]*/
1178         /** @stable ICU 58 */
1179         public static final int NEWA_ID = 270; /*[11400]*/
1180         /** @stable ICU 58 */
1181         public static final int OSAGE_ID = 271; /*[104B0]*/
1182         /** @stable ICU 58 */
1183         public static final int TANGUT_ID = 272; /*[17000]*/
1184         /** @stable ICU 58 */
1185         public static final int TANGUT_COMPONENTS_ID = 273; /*[18800]*/
1186 
1187         // New blocks in Unicode 10.0
1188 
1189         /** @stable ICU 60 */
1190         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID = 274; /*[2CEB0]*/
1191         /** @stable ICU 60 */
1192         public static final int KANA_EXTENDED_A_ID = 275; /*[1B100]*/
1193         /** @stable ICU 60 */
1194         public static final int MASARAM_GONDI_ID = 276; /*[11D00]*/
1195         /** @stable ICU 60 */
1196         public static final int NUSHU_ID = 277; /*[1B170]*/
1197         /** @stable ICU 60 */
1198         public static final int SOYOMBO_ID = 278; /*[11A50]*/
1199         /** @stable ICU 60 */
1200         public static final int SYRIAC_SUPPLEMENT_ID = 279; /*[0860]*/
1201         /** @stable ICU 60 */
1202         public static final int ZANABAZAR_SQUARE_ID = 280; /*[11A00]*/
1203 
1204         // New blocks in Unicode 11.0
1205 
1206         /** @stable ICU 62 */
1207         public static final int CHESS_SYMBOLS_ID = 281; /*[1FA00]*/
1208         /** @stable ICU 62 */
1209         public static final int DOGRA_ID = 282; /*[11800]*/
1210         /** @stable ICU 62 */
1211         public static final int GEORGIAN_EXTENDED_ID = 283; /*[1C90]*/
1212         /** @stable ICU 62 */
1213         public static final int GUNJALA_GONDI_ID = 284; /*[11D60]*/
1214         /** @stable ICU 62 */
1215         public static final int HANIFI_ROHINGYA_ID = 285; /*[10D00]*/
1216         /** @stable ICU 62 */
1217         public static final int INDIC_SIYAQ_NUMBERS_ID = 286; /*[1EC70]*/
1218         /** @stable ICU 62 */
1219         public static final int MAKASAR_ID = 287; /*[11EE0]*/
1220         /** @stable ICU 62 */
1221         public static final int MAYAN_NUMERALS_ID = 288; /*[1D2E0]*/
1222         /** @stable ICU 62 */
1223         public static final int MEDEFAIDRIN_ID = 289; /*[16E40]*/
1224         /** @stable ICU 62 */
1225         public static final int OLD_SOGDIAN_ID = 290; /*[10F00]*/
1226         /** @stable ICU 62 */
1227         public static final int SOGDIAN_ID = 291; /*[10F30]*/
1228 
1229         // New blocks in Unicode 12.0
1230 
1231         /** @stable ICU 64 */
1232         public static final int EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID = 292; /*[13430]*/
1233         /** @stable ICU 64 */
1234         public static final int ELYMAIC_ID = 293; /*[10FE0]*/
1235         /** @stable ICU 64 */
1236         public static final int NANDINAGARI_ID = 294; /*[119A0]*/
1237         /** @stable ICU 64 */
1238         public static final int NYIAKENG_PUACHUE_HMONG_ID = 295; /*[1E100]*/
1239         /** @stable ICU 64 */
1240         public static final int OTTOMAN_SIYAQ_NUMBERS_ID = 296; /*[1ED00]*/
1241         /** @stable ICU 64 */
1242         public static final int SMALL_KANA_EXTENSION_ID = 297; /*[1B130]*/
1243         /** @stable ICU 64 */
1244         public static final int SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID = 298; /*[1FA70]*/
1245         /** @stable ICU 64 */
1246         public static final int TAMIL_SUPPLEMENT_ID = 299; /*[11FC0]*/
1247         /** @stable ICU 64 */
1248         public static final int WANCHO_ID = 300; /*[1E2C0]*/
1249 
1250         // New blocks in Unicode 13.0
1251 
1252         /** @stable ICU 66 */
1253         public static final int CHORASMIAN_ID = 301; /*[10FB0]*/
1254         /** @stable ICU 66 */
1255         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID = 302; /*[30000]*/
1256         /** @stable ICU 66 */
1257         public static final int DIVES_AKURU_ID = 303; /*[11900]*/
1258         /** @stable ICU 66 */
1259         public static final int KHITAN_SMALL_SCRIPT_ID = 304; /*[18B00]*/
1260         /** @stable ICU 66 */
1261         public static final int LISU_SUPPLEMENT_ID = 305; /*[11FB0]*/
1262         /** @stable ICU 66 */
1263         public static final int SYMBOLS_FOR_LEGACY_COMPUTING_ID = 306; /*[1FB00]*/
1264         /** @stable ICU 66 */
1265         public static final int TANGUT_SUPPLEMENT_ID = 307; /*[18D00]*/
1266         /** @stable ICU 66 */
1267         public static final int YEZIDI_ID = 308; /*[10E80]*/
1268 
1269         // New blocks in Unicode 14.0
1270 
1271         /** @stable ICU 70 */
1272         public static final int ARABIC_EXTENDED_B_ID = 309; /*[0870]*/
1273         /** @stable ICU 70 */
1274         public static final int CYPRO_MINOAN_ID = 310; /*[12F90]*/
1275         /** @stable ICU 70 */
1276         public static final int ETHIOPIC_EXTENDED_B_ID = 311; /*[1E7E0]*/
1277         /** @stable ICU 70 */
1278         public static final int KANA_EXTENDED_B_ID = 312; /*[1AFF0]*/
1279         /** @stable ICU 70 */
1280         public static final int LATIN_EXTENDED_F_ID = 313; /*[10780]*/
1281         /** @stable ICU 70 */
1282         public static final int LATIN_EXTENDED_G_ID = 314; /*[1DF00]*/
1283         /** @stable ICU 70 */
1284         public static final int OLD_UYGHUR_ID = 315; /*[10F70]*/
1285         /** @stable ICU 70 */
1286         public static final int TANGSA_ID = 316; /*[16A70]*/
1287         /** @stable ICU 70 */
1288         public static final int TOTO_ID = 317; /*[1E290]*/
1289         /** @stable ICU 70 */
1290         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A_ID = 318; /*[11AB0]*/
1291         /** @stable ICU 70 */
1292         public static final int VITHKUQI_ID = 319; /*[10570]*/
1293         /** @stable ICU 70 */
1294         public static final int ZNAMENNY_MUSICAL_NOTATION_ID = 320; /*[1CF00]*/
1295 
1296         // New blocks in Unicode 15.0
1297 
1298         /** @stable ICU 72 */
1299         public static final int ARABIC_EXTENDED_C_ID = 321; /*[10EC0]*/
1300         /** @stable ICU 72 */
1301         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H_ID = 322; /*[31350]*/
1302         /** @stable ICU 72 */
1303         public static final int CYRILLIC_EXTENDED_D_ID = 323; /*[1E030]*/
1304         /** @stable ICU 72 */
1305         public static final int DEVANAGARI_EXTENDED_A_ID = 324; /*[11B00]*/
1306         /** @stable ICU 72 */
1307         public static final int KAKTOVIK_NUMERALS_ID = 325; /*[1D2C0]*/
1308         /** @stable ICU 72 */
1309         public static final int KAWI_ID = 326; /*[11F00]*/
1310         /** @stable ICU 72 */
1311         public static final int NAG_MUNDARI_ID = 327; /*[1E4D0]*/
1312 
1313         // New block in Unicode 15.1
1314 
1315         /** @stable ICU 74 */
1316         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I_ID = 328; /*[2EBF0]*/
1317 
1318         /**
1319          * One more than the highest normal UnicodeBlock value.
1320          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BLOCK).
1321          *
1322          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
1323          */
1324         @Deprecated
1325         public static final int COUNT = 329;
1326 
1327         // blocks objects ---------------------------------------------------
1328 
1329         /**
1330          * Array of UnicodeBlocks, for easy access in getInstance(int)
1331          */
1332         private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT];
1333 
1334         /**
1335          * @stable ICU 2.6
1336          */
1337         public static final UnicodeBlock NO_BLOCK
1338         = new UnicodeBlock("NO_BLOCK", 0);
1339 
1340         /**
1341          * @stable ICU 2.4
1342          */
1343         public static final UnicodeBlock BASIC_LATIN
1344         = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID);
1345         /**
1346          * @stable ICU 2.4
1347          */
1348         public static final UnicodeBlock LATIN_1_SUPPLEMENT
1349         = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID);
1350         /**
1351          * @stable ICU 2.4
1352          */
1353         public static final UnicodeBlock LATIN_EXTENDED_A
1354         = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID);
1355         /**
1356          * @stable ICU 2.4
1357          */
1358         public static final UnicodeBlock LATIN_EXTENDED_B
1359         = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID);
1360         /**
1361          * @stable ICU 2.4
1362          */
1363         public static final UnicodeBlock IPA_EXTENSIONS
1364         = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID);
1365         /**
1366          * @stable ICU 2.4
1367          */
1368         public static final UnicodeBlock SPACING_MODIFIER_LETTERS
1369         = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID);
1370         /**
1371          * @stable ICU 2.4
1372          */
1373         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
1374         = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID);
1375         /**
1376          * Unicode 3.2 renames this block to "Greek and Coptic".
1377          * @stable ICU 2.4
1378          */
1379         public static final UnicodeBlock GREEK
1380         = new UnicodeBlock("GREEK", GREEK_ID);
1381         /**
1382          * @stable ICU 2.4
1383          */
1384         public static final UnicodeBlock CYRILLIC
1385         = new UnicodeBlock("CYRILLIC", CYRILLIC_ID);
1386         /**
1387          * @stable ICU 2.4
1388          */
1389         public static final UnicodeBlock ARMENIAN
1390         = new UnicodeBlock("ARMENIAN", ARMENIAN_ID);
1391         /**
1392          * @stable ICU 2.4
1393          */
1394         public static final UnicodeBlock HEBREW
1395         = new UnicodeBlock("HEBREW", HEBREW_ID);
1396         /**
1397          * @stable ICU 2.4
1398          */
1399         public static final UnicodeBlock ARABIC
1400         = new UnicodeBlock("ARABIC", ARABIC_ID);
1401         /**
1402          * @stable ICU 2.4
1403          */
1404         public static final UnicodeBlock SYRIAC
1405         = new UnicodeBlock("SYRIAC", SYRIAC_ID);
1406         /**
1407          * @stable ICU 2.4
1408          */
1409         public static final UnicodeBlock THAANA
1410         = new UnicodeBlock("THAANA", THAANA_ID);
1411         /**
1412          * @stable ICU 2.4
1413          */
1414         public static final UnicodeBlock DEVANAGARI
1415         = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID);
1416         /**
1417          * @stable ICU 2.4
1418          */
1419         public static final UnicodeBlock BENGALI
1420         = new UnicodeBlock("BENGALI", BENGALI_ID);
1421         /**
1422          * @stable ICU 2.4
1423          */
1424         public static final UnicodeBlock GURMUKHI
1425         = new UnicodeBlock("GURMUKHI", GURMUKHI_ID);
1426         /**
1427          * @stable ICU 2.4
1428          */
1429         public static final UnicodeBlock GUJARATI
1430         = new UnicodeBlock("GUJARATI", GUJARATI_ID);
1431         /**
1432          * @stable ICU 2.4
1433          */
1434         public static final UnicodeBlock ORIYA
1435         = new UnicodeBlock("ORIYA", ORIYA_ID);
1436         /**
1437          * @stable ICU 2.4
1438          */
1439         public static final UnicodeBlock TAMIL
1440         = new UnicodeBlock("TAMIL", TAMIL_ID);
1441         /**
1442          * @stable ICU 2.4
1443          */
1444         public static final UnicodeBlock TELUGU
1445         = new UnicodeBlock("TELUGU", TELUGU_ID);
1446         /**
1447          * @stable ICU 2.4
1448          */
1449         public static final UnicodeBlock KANNADA
1450         = new UnicodeBlock("KANNADA", KANNADA_ID);
1451         /**
1452          * @stable ICU 2.4
1453          */
1454         public static final UnicodeBlock MALAYALAM
1455         = new UnicodeBlock("MALAYALAM", MALAYALAM_ID);
1456         /**
1457          * @stable ICU 2.4
1458          */
1459         public static final UnicodeBlock SINHALA
1460         = new UnicodeBlock("SINHALA", SINHALA_ID);
1461         /**
1462          * @stable ICU 2.4
1463          */
1464         public static final UnicodeBlock THAI
1465         = new UnicodeBlock("THAI", THAI_ID);
1466         /**
1467          * @stable ICU 2.4
1468          */
1469         public static final UnicodeBlock LAO
1470         = new UnicodeBlock("LAO", LAO_ID);
1471         /**
1472          * @stable ICU 2.4
1473          */
1474         public static final UnicodeBlock TIBETAN
1475         = new UnicodeBlock("TIBETAN", TIBETAN_ID);
1476         /**
1477          * @stable ICU 2.4
1478          */
1479         public static final UnicodeBlock MYANMAR
1480         = new UnicodeBlock("MYANMAR", MYANMAR_ID);
1481         /**
1482          * @stable ICU 2.4
1483          */
1484         public static final UnicodeBlock GEORGIAN
1485         = new UnicodeBlock("GEORGIAN", GEORGIAN_ID);
1486         /**
1487          * @stable ICU 2.4
1488          */
1489         public static final UnicodeBlock HANGUL_JAMO
1490         = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID);
1491         /**
1492          * @stable ICU 2.4
1493          */
1494         public static final UnicodeBlock ETHIOPIC
1495         = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID);
1496         /**
1497          * @stable ICU 2.4
1498          */
1499         public static final UnicodeBlock CHEROKEE
1500         = new UnicodeBlock("CHEROKEE", CHEROKEE_ID);
1501         /**
1502          * @stable ICU 2.4
1503          */
1504         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
1505         = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1506                 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID);
1507         /**
1508          * @stable ICU 2.4
1509          */
1510         public static final UnicodeBlock OGHAM
1511         = new UnicodeBlock("OGHAM", OGHAM_ID);
1512         /**
1513          * @stable ICU 2.4
1514          */
1515         public static final UnicodeBlock RUNIC
1516         = new UnicodeBlock("RUNIC", RUNIC_ID);
1517         /**
1518          * @stable ICU 2.4
1519          */
1520         public static final UnicodeBlock KHMER
1521         = new UnicodeBlock("KHMER", KHMER_ID);
1522         /**
1523          * @stable ICU 2.4
1524          */
1525         public static final UnicodeBlock MONGOLIAN
1526         = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID);
1527         /**
1528          * @stable ICU 2.4
1529          */
1530         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
1531         = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID);
1532         /**
1533          * @stable ICU 2.4
1534          */
1535         public static final UnicodeBlock GREEK_EXTENDED
1536         = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID);
1537         /**
1538          * @stable ICU 2.4
1539          */
1540         public static final UnicodeBlock GENERAL_PUNCTUATION
1541         = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID);
1542         /**
1543          * @stable ICU 2.4
1544          */
1545         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
1546         = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID);
1547         /**
1548          * @stable ICU 2.4
1549          */
1550         public static final UnicodeBlock CURRENCY_SYMBOLS
1551         = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID);
1552         /**
1553          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
1554          * Symbols".
1555          * @stable ICU 2.4
1556          */
1557         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
1558         = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID);
1559         /**
1560          * @stable ICU 2.4
1561          */
1562         public static final UnicodeBlock LETTERLIKE_SYMBOLS
1563         = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID);
1564         /**
1565          * @stable ICU 2.4
1566          */
1567         public static final UnicodeBlock NUMBER_FORMS
1568         = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID);
1569         /**
1570          * @stable ICU 2.4
1571          */
1572         public static final UnicodeBlock ARROWS
1573         = new UnicodeBlock("ARROWS", ARROWS_ID);
1574         /**
1575          * @stable ICU 2.4
1576          */
1577         public static final UnicodeBlock MATHEMATICAL_OPERATORS
1578         = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID);
1579         /**
1580          * @stable ICU 2.4
1581          */
1582         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
1583         = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID);
1584         /**
1585          * @stable ICU 2.4
1586          */
1587         public static final UnicodeBlock CONTROL_PICTURES
1588         = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID);
1589         /**
1590          * @stable ICU 2.4
1591          */
1592         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
1593         = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID);
1594         /**
1595          * @stable ICU 2.4
1596          */
1597         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
1598         = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID);
1599         /**
1600          * @stable ICU 2.4
1601          */
1602         public static final UnicodeBlock BOX_DRAWING
1603         = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID);
1604         /**
1605          * @stable ICU 2.4
1606          */
1607         public static final UnicodeBlock BLOCK_ELEMENTS
1608         = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID);
1609         /**
1610          * @stable ICU 2.4
1611          */
1612         public static final UnicodeBlock GEOMETRIC_SHAPES
1613         = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID);
1614         /**
1615          * @stable ICU 2.4
1616          */
1617         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
1618         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID);
1619         /**
1620          * @stable ICU 2.4
1621          */
1622         public static final UnicodeBlock DINGBATS
1623         = new UnicodeBlock("DINGBATS", DINGBATS_ID);
1624         /**
1625          * @stable ICU 2.4
1626          */
1627         public static final UnicodeBlock BRAILLE_PATTERNS
1628         = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID);
1629         /**
1630          * @stable ICU 2.4
1631          */
1632         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
1633         = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID);
1634         /**
1635          * @stable ICU 2.4
1636          */
1637         public static final UnicodeBlock KANGXI_RADICALS
1638         = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID);
1639         /**
1640          * @stable ICU 2.4
1641          */
1642         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1643         = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1644                 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID);
1645         /**
1646          * @stable ICU 2.4
1647          */
1648         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
1649         = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID);
1650         /**
1651          * @stable ICU 2.4
1652          */
1653         public static final UnicodeBlock HIRAGANA
1654         = new UnicodeBlock("HIRAGANA", HIRAGANA_ID);
1655         /**
1656          * @stable ICU 2.4
1657          */
1658         public static final UnicodeBlock KATAKANA
1659         = new UnicodeBlock("KATAKANA", KATAKANA_ID);
1660         /**
1661          * @stable ICU 2.4
1662          */
1663         public static final UnicodeBlock BOPOMOFO
1664         = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID);
1665         /**
1666          * @stable ICU 2.4
1667          */
1668         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
1669         = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID);
1670         /**
1671          * @stable ICU 2.4
1672          */
1673         public static final UnicodeBlock KANBUN
1674         = new UnicodeBlock("KANBUN", KANBUN_ID);
1675         /**
1676          * @stable ICU 2.4
1677          */
1678         public static final UnicodeBlock BOPOMOFO_EXTENDED
1679         = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID);
1680         /**
1681          * @stable ICU 2.4
1682          */
1683         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
1684         = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1685                 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID);
1686         /**
1687          * @stable ICU 2.4
1688          */
1689         public static final UnicodeBlock CJK_COMPATIBILITY
1690         = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID);
1691         /**
1692          * @stable ICU 2.4
1693          */
1694         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1695         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1696                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID);
1697         /**
1698          * @stable ICU 2.4
1699          */
1700         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
1701         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID);
1702         /**
1703          * @stable ICU 2.4
1704          */
1705         public static final UnicodeBlock YI_SYLLABLES
1706         = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID);
1707         /**
1708          * @stable ICU 2.4
1709          */
1710         public static final UnicodeBlock YI_RADICALS
1711         = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID);
1712         /**
1713          * @stable ICU 2.4
1714          */
1715         public static final UnicodeBlock HANGUL_SYLLABLES
1716         = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID);
1717         /**
1718          * @stable ICU 2.4
1719          */
1720         public static final UnicodeBlock HIGH_SURROGATES
1721         = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID);
1722         /**
1723          * @stable ICU 2.4
1724          */
1725         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
1726         = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID);
1727         /**
1728          * @stable ICU 2.4
1729          */
1730         public static final UnicodeBlock LOW_SURROGATES
1731         = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID);
1732         /**
1733          * Same as public static final int PRIVATE_USE.
1734          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1735          * and multiple code point ranges had this block.
1736          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1737          * and adds separate blocks for the supplementary PUAs.
1738          * @stable ICU 2.4
1739          */
1740         public static final UnicodeBlock PRIVATE_USE_AREA
1741         = new UnicodeBlock("PRIVATE_USE_AREA",  78);
1742         /**
1743          * Same as public static final int PRIVATE_USE_AREA.
1744          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1745          * and multiple code point ranges had this block.
1746          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1747          * and adds separate blocks for the supplementary PUAs.
1748          * @stable ICU 2.4
1749          */
1750         public static final UnicodeBlock PRIVATE_USE
1751         = PRIVATE_USE_AREA;
1752         /**
1753          * @stable ICU 2.4
1754          */
1755         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
1756         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID);
1757         /**
1758          * @stable ICU 2.4
1759          */
1760         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
1761         = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID);
1762         /**
1763          * @stable ICU 2.4
1764          */
1765         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
1766         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID);
1767         /**
1768          * @stable ICU 2.4
1769          */
1770         public static final UnicodeBlock COMBINING_HALF_MARKS
1771         = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID);
1772         /**
1773          * @stable ICU 2.4
1774          */
1775         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
1776         = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID);
1777         /**
1778          * @stable ICU 2.4
1779          */
1780         public static final UnicodeBlock SMALL_FORM_VARIANTS
1781         = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID);
1782         /**
1783          * @stable ICU 2.4
1784          */
1785         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
1786         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID);
1787         /**
1788          * @stable ICU 2.4
1789          */
1790         public static final UnicodeBlock SPECIALS
1791         = new UnicodeBlock("SPECIALS", SPECIALS_ID);
1792         /**
1793          * @stable ICU 2.4
1794          */
1795         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
1796         = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID);
1797         /**
1798          * @stable ICU 2.4
1799          */
1800         public static final UnicodeBlock OLD_ITALIC
1801         = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID);
1802         /**
1803          * @stable ICU 2.4
1804          */
1805         public static final UnicodeBlock GOTHIC
1806         = new UnicodeBlock("GOTHIC", GOTHIC_ID);
1807         /**
1808          * @stable ICU 2.4
1809          */
1810         public static final UnicodeBlock DESERET
1811         = new UnicodeBlock("DESERET", DESERET_ID);
1812         /**
1813          * @stable ICU 2.4
1814          */
1815         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
1816         = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID);
1817         /**
1818          * @stable ICU 2.4
1819          */
1820         public static final UnicodeBlock MUSICAL_SYMBOLS
1821         = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID);
1822         /**
1823          * @stable ICU 2.4
1824          */
1825         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
1826         = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1827                 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID);
1828         /**
1829          * @stable ICU 2.4
1830          */
1831         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1832         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1833                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID);
1834         /**
1835          * @stable ICU 2.4
1836          */
1837         public static final UnicodeBlock
1838         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
1839         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1840                 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID);
1841         /**
1842          * @stable ICU 2.4
1843          */
1844         public static final UnicodeBlock TAGS
1845         = new UnicodeBlock("TAGS", TAGS_ID);
1846 
1847         // New blocks in Unicode 3.2
1848 
1849         /**
1850          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1851          * @stable ICU 2.4
1852          */
1853         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
1854         = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID);
1855         /**
1856          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1857          * @stable ICU 3.0
1858          */
1859         public static final UnicodeBlock CYRILLIC_SUPPLEMENT
1860         = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID);
1861         /**
1862          * @stable ICU 2.4
1863          */
1864         public static final UnicodeBlock TAGALOG
1865         = new UnicodeBlock("TAGALOG", TAGALOG_ID);
1866         /**
1867          * @stable ICU 2.4
1868          */
1869         public static final UnicodeBlock HANUNOO
1870         = new UnicodeBlock("HANUNOO", HANUNOO_ID);
1871         /**
1872          * @stable ICU 2.4
1873          */
1874         public static final UnicodeBlock BUHID
1875         = new UnicodeBlock("BUHID", BUHID_ID);
1876         /**
1877          * @stable ICU 2.4
1878          */
1879         public static final UnicodeBlock TAGBANWA
1880         = new UnicodeBlock("TAGBANWA", TAGBANWA_ID);
1881         /**
1882          * @stable ICU 2.4
1883          */
1884         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
1885         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1886                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID);
1887         /**
1888          * @stable ICU 2.4
1889          */
1890         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
1891         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID);
1892         /**
1893          * @stable ICU 2.4
1894          */
1895         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
1896         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID);
1897         /**
1898          * @stable ICU 2.4
1899          */
1900         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
1901         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1902                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID);
1903         /**
1904          * @stable ICU 2.4
1905          */
1906         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
1907         = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1908                 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID);
1909         /**
1910          * @stable ICU 2.4
1911          */
1912         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
1913         = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID);
1914         /**
1915          * @stable ICU 2.4
1916          */
1917         public static final UnicodeBlock VARIATION_SELECTORS
1918         = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID);
1919         /**
1920          * @stable ICU 2.4
1921          */
1922         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
1923         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1924                 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID);
1925         /**
1926          * @stable ICU 2.4
1927          */
1928         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
1929         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1930                 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID);
1931 
1932         /**
1933          * @stable ICU 2.6
1934          */
1935         public static final UnicodeBlock LIMBU
1936         = new UnicodeBlock("LIMBU", LIMBU_ID);
1937         /**
1938          * @stable ICU 2.6
1939          */
1940         public static final UnicodeBlock TAI_LE
1941         = new UnicodeBlock("TAI_LE", TAI_LE_ID);
1942         /**
1943          * @stable ICU 2.6
1944          */
1945         public static final UnicodeBlock KHMER_SYMBOLS
1946         = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID);
1947 
1948         /**
1949          * @stable ICU 2.6
1950          */
1951         public static final UnicodeBlock PHONETIC_EXTENSIONS
1952         = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID);
1953 
1954         /**
1955          * @stable ICU 2.6
1956          */
1957         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
1958         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1959                 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID);
1960         /**
1961          * @stable ICU 2.6
1962          */
1963         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
1964         = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID);
1965         /**
1966          * @stable ICU 2.6
1967          */
1968         public static final UnicodeBlock LINEAR_B_SYLLABARY
1969         = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID);
1970         /**
1971          * @stable ICU 2.6
1972          */
1973         public static final UnicodeBlock LINEAR_B_IDEOGRAMS
1974         = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID);
1975         /**
1976          * @stable ICU 2.6
1977          */
1978         public static final UnicodeBlock AEGEAN_NUMBERS
1979         = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID);
1980         /**
1981          * @stable ICU 2.6
1982          */
1983         public static final UnicodeBlock UGARITIC
1984         = new UnicodeBlock("UGARITIC", UGARITIC_ID);
1985         /**
1986          * @stable ICU 2.6
1987          */
1988         public static final UnicodeBlock SHAVIAN
1989         = new UnicodeBlock("SHAVIAN", SHAVIAN_ID);
1990         /**
1991          * @stable ICU 2.6
1992          */
1993         public static final UnicodeBlock OSMANYA
1994         = new UnicodeBlock("OSMANYA", OSMANYA_ID);
1995         /**
1996          * @stable ICU 2.6
1997          */
1998         public static final UnicodeBlock CYPRIOT_SYLLABARY
1999         = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID);
2000         /**
2001          * @stable ICU 2.6
2002          */
2003         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
2004         = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID);
2005 
2006         /**
2007          * @stable ICU 2.6
2008          */
2009         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
2010         = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID);
2011 
2012         /* New blocks in Unicode 4.1 */
2013 
2014         /**
2015          * @stable ICU 3.4
2016          */
2017         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
2018                 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
2019                         ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/
2020 
2021         /**
2022          * @stable ICU 3.4
2023          */
2024         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
2025                 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/
2026 
2027         /**
2028          * @stable ICU 3.4
2029          */
2030         public static final UnicodeBlock ARABIC_SUPPLEMENT =
2031                 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/
2032 
2033         /**
2034          * @stable ICU 3.4
2035          */
2036         public static final UnicodeBlock BUGINESE =
2037                 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/
2038 
2039         /**
2040          * @stable ICU 3.4
2041          */
2042         public static final UnicodeBlock CJK_STROKES =
2043                 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/
2044 
2045         /**
2046          * @stable ICU 3.4
2047          */
2048         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
2049                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
2050                         COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/
2051 
2052         /**
2053          * @stable ICU 3.4
2054          */
2055         public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/
2056 
2057         /**
2058          * @stable ICU 3.4
2059          */
2060         public static final UnicodeBlock ETHIOPIC_EXTENDED =
2061                 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/
2062 
2063         /**
2064          * @stable ICU 3.4
2065          */
2066         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
2067                 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/
2068 
2069         /**
2070          * @stable ICU 3.4
2071          */
2072         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
2073                 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/
2074 
2075         /**
2076          * @stable ICU 3.4
2077          */
2078         public static final UnicodeBlock GLAGOLITIC =
2079                 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/
2080 
2081         /**
2082          * @stable ICU 3.4
2083          */
2084         public static final UnicodeBlock KHAROSHTHI =
2085                 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/
2086 
2087         /**
2088          * @stable ICU 3.4
2089          */
2090         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
2091                 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/
2092 
2093         /**
2094          * @stable ICU 3.4
2095          */
2096         public static final UnicodeBlock NEW_TAI_LUE =
2097                 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/
2098 
2099         /**
2100          * @stable ICU 3.4
2101          */
2102         public static final UnicodeBlock OLD_PERSIAN =
2103                 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/
2104 
2105         /**
2106          * @stable ICU 3.4
2107          */
2108         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
2109                 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
2110                         PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/
2111 
2112         /**
2113          * @stable ICU 3.4
2114          */
2115         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
2116                 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/
2117 
2118         /**
2119          * @stable ICU 3.4
2120          */
2121         public static final UnicodeBlock SYLOTI_NAGRI =
2122                 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/
2123 
2124         /**
2125          * @stable ICU 3.4
2126          */
2127         public static final UnicodeBlock TIFINAGH =
2128                 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/
2129 
2130         /**
2131          * @stable ICU 3.4
2132          */
2133         public static final UnicodeBlock VERTICAL_FORMS =
2134                 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/
2135 
2136         /**
2137          * @stable ICU 3.6
2138          */
2139         public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/
2140         /**
2141          * @stable ICU 3.6
2142          */
2143         public static final UnicodeBlock BALINESE =
2144                 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/
2145         /**
2146          * @stable ICU 3.6
2147          */
2148         public static final UnicodeBlock LATIN_EXTENDED_C =
2149                 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/
2150         /**
2151          * @stable ICU 3.6
2152          */
2153         public static final UnicodeBlock LATIN_EXTENDED_D =
2154                 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/
2155         /**
2156          * @stable ICU 3.6
2157          */
2158         public static final UnicodeBlock PHAGS_PA =
2159                 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/
2160         /**
2161          * @stable ICU 3.6
2162          */
2163         public static final UnicodeBlock PHOENICIAN =
2164                 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/
2165         /**
2166          * @stable ICU 3.6
2167          */
2168         public static final UnicodeBlock CUNEIFORM =
2169                 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/
2170         /**
2171          * @stable ICU 3.6
2172          */
2173         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2174                 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2175                         CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/
2176         /**
2177          * @stable ICU 3.6
2178          */
2179         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2180                 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/
2181 
2182         /**
2183          * @stable ICU 4.0
2184          */
2185         public static final UnicodeBlock SUNDANESE =
2186                 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */
2187 
2188         /**
2189          * @stable ICU 4.0
2190          */
2191         public static final UnicodeBlock LEPCHA =
2192                 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */
2193 
2194         /**
2195          * @stable ICU 4.0
2196          */
2197         public static final UnicodeBlock OL_CHIKI =
2198                 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */
2199 
2200         /**
2201          * @stable ICU 4.0
2202          */
2203         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
2204                 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */
2205 
2206         /**
2207          * @stable ICU 4.0
2208          */
2209         public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */
2210 
2211         /**
2212          * @stable ICU 4.0
2213          */
2214         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2215                 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */
2216 
2217         /**
2218          * @stable ICU 4.0
2219          */
2220         public static final UnicodeBlock SAURASHTRA =
2221                 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */
2222 
2223         /**
2224          * @stable ICU 4.0
2225          */
2226         public static final UnicodeBlock KAYAH_LI =
2227                 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */
2228 
2229         /**
2230          * @stable ICU 4.0
2231          */
2232         public static final UnicodeBlock REJANG =
2233                 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */
2234 
2235         /**
2236          * @stable ICU 4.0
2237          */
2238         public static final UnicodeBlock CHAM =
2239                 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */
2240 
2241         /**
2242          * @stable ICU 4.0
2243          */
2244         public static final UnicodeBlock ANCIENT_SYMBOLS =
2245                 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */
2246 
2247         /**
2248          * @stable ICU 4.0
2249          */
2250         public static final UnicodeBlock PHAISTOS_DISC =
2251                 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */
2252 
2253         /**
2254          * @stable ICU 4.0
2255          */
2256         public static final UnicodeBlock LYCIAN =
2257                 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */
2258 
2259         /**
2260          * @stable ICU 4.0
2261          */
2262         public static final UnicodeBlock CARIAN =
2263                 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */
2264 
2265         /**
2266          * @stable ICU 4.0
2267          */
2268         public static final UnicodeBlock LYDIAN =
2269                 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */
2270 
2271         /**
2272          * @stable ICU 4.0
2273          */
2274         public static final UnicodeBlock MAHJONG_TILES =
2275                 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */
2276 
2277         /**
2278          * @stable ICU 4.0
2279          */
2280         public static final UnicodeBlock DOMINO_TILES =
2281                 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */
2282 
2283         /* New blocks in Unicode 5.2 */
2284 
2285         /** @stable ICU 4.4 */
2286         public static final UnicodeBlock SAMARITAN =
2287                 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/
2288         /** @stable ICU 4.4 */
2289         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
2290                 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
2291                         UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/
2292         /** @stable ICU 4.4 */
2293         public static final UnicodeBlock TAI_THAM =
2294                 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/
2295         /** @stable ICU 4.4 */
2296         public static final UnicodeBlock VEDIC_EXTENSIONS =
2297                 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/
2298         /** @stable ICU 4.4 */
2299         public static final UnicodeBlock LISU =
2300                 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/
2301         /** @stable ICU 4.4 */
2302         public static final UnicodeBlock BAMUM =
2303                 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/
2304         /** @stable ICU 4.4 */
2305         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2306                 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/
2307         /** @stable ICU 4.4 */
2308         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2309                 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/
2310         /** @stable ICU 4.4 */
2311         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2312                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/
2313         /** @stable ICU 4.4 */
2314         public static final UnicodeBlock JAVANESE =
2315                 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/
2316         /** @stable ICU 4.4 */
2317         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2318                 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/
2319         /** @stable ICU 4.4 */
2320         public static final UnicodeBlock TAI_VIET =
2321                 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/
2322         /** @stable ICU 4.4 */
2323         public static final UnicodeBlock MEETEI_MAYEK =
2324                 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/
2325         /** @stable ICU 4.4 */
2326         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2327                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/
2328         /** @stable ICU 4.4 */
2329         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2330                 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/
2331         /** @stable ICU 4.4 */
2332         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2333                 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/
2334         /** @stable ICU 4.4 */
2335         public static final UnicodeBlock AVESTAN =
2336                 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/
2337         /** @stable ICU 4.4 */
2338         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2339                 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/
2340         /** @stable ICU 4.4 */
2341         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2342                 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/
2343         /** @stable ICU 4.4 */
2344         public static final UnicodeBlock OLD_TURKIC =
2345                 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/
2346         /** @stable ICU 4.4 */
2347         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2348                 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/
2349         /** @stable ICU 4.4 */
2350         public static final UnicodeBlock KAITHI =
2351                 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/
2352         /** @stable ICU 4.4 */
2353         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2354                 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/
2355         /** @stable ICU 4.4 */
2356         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2357                 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2358                         ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/
2359         /** @stable ICU 4.4 */
2360         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2361                 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2362                         ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/
2363         /** @stable ICU 4.4 */
2364         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2365                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2366                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/
2367 
2368         /* New blocks in Unicode 6.0 */
2369 
2370         /** @stable ICU 4.6 */
2371         public static final UnicodeBlock MANDAIC =
2372                 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/
2373         /** @stable ICU 4.6 */
2374         public static final UnicodeBlock BATAK =
2375                 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/
2376         /** @stable ICU 4.6 */
2377         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2378                 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/
2379         /** @stable ICU 4.6 */
2380         public static final UnicodeBlock BRAHMI =
2381                 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/
2382         /** @stable ICU 4.6 */
2383         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2384                 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/
2385         /** @stable ICU 4.6 */
2386         public static final UnicodeBlock KANA_SUPPLEMENT =
2387                 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/
2388         /** @stable ICU 4.6 */
2389         public static final UnicodeBlock PLAYING_CARDS =
2390                 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/
2391         /** @stable ICU 4.6 */
2392         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2393                 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2394                         MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/
2395         /** @stable ICU 4.6 */
2396         public static final UnicodeBlock EMOTICONS =
2397                 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/
2398         /** @stable ICU 4.6 */
2399         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2400                 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/
2401         /** @stable ICU 4.6 */
2402         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2403                 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/
2404         /** @stable ICU 4.6 */
2405         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2406                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2407                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/
2408 
2409         /* New blocks in Unicode 6.1 */
2410 
2411         /** @stable ICU 49 */
2412         public static final UnicodeBlock ARABIC_EXTENDED_A =
2413                 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/
2414         /** @stable ICU 49 */
2415         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2416                 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/
2417         /** @stable ICU 49 */
2418         public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/
2419         /** @stable ICU 49 */
2420         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2421                 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/
2422         /** @stable ICU 49 */
2423         public static final UnicodeBlock MEROITIC_CURSIVE =
2424                 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/
2425         /** @stable ICU 49 */
2426         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2427                 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/
2428         /** @stable ICU 49 */
2429         public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/
2430         /** @stable ICU 49 */
2431         public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/
2432         /** @stable ICU 49 */
2433         public static final UnicodeBlock SORA_SOMPENG =
2434                 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/
2435         /** @stable ICU 49 */
2436         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2437                 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/
2438         /** @stable ICU 49 */
2439         public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/
2440 
2441         /* New blocks in Unicode 7.0 */
2442 
2443         /** @stable ICU 54 */
2444         public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/
2445         /** @stable ICU 54 */
2446         public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2447                 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/
2448         /** @stable ICU 54 */
2449         public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2450                 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/
2451         /** @stable ICU 54 */
2452         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2453                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/
2454         /** @stable ICU 54 */
2455         public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/
2456         /** @stable ICU 54 */
2457         public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/
2458         /** @stable ICU 54 */
2459         public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2460                 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/
2461         /** @stable ICU 54 */
2462         public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/
2463         /** @stable ICU 54 */
2464         public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/
2465         /** @stable ICU 54 */
2466         public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/
2467         /** @stable ICU 54 */
2468         public static final UnicodeBlock LATIN_EXTENDED_E =
2469                 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/
2470         /** @stable ICU 54 */
2471         public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/
2472         /** @stable ICU 54 */
2473         public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/
2474         /** @stable ICU 54 */
2475         public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/
2476         /** @stable ICU 54 */
2477         public static final UnicodeBlock MENDE_KIKAKUI =
2478                 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/
2479         /** @stable ICU 54 */
2480         public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/
2481         /** @stable ICU 54 */
2482         public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/
2483         /** @stable ICU 54 */
2484         public static final UnicodeBlock MYANMAR_EXTENDED_B =
2485                 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/
2486         /** @stable ICU 54 */
2487         public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/
2488         /** @stable ICU 54 */
2489         public static final UnicodeBlock OLD_NORTH_ARABIAN =
2490                 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/
2491         /** @stable ICU 54 */
2492         public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/
2493         /** @stable ICU 54 */
2494         public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2495                 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/
2496         /** @stable ICU 54 */
2497         public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/
2498         /** @stable ICU 54 */
2499         public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/
2500         /** @stable ICU 54 */
2501         public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/
2502         /** @stable ICU 54 */
2503         public static final UnicodeBlock PSALTER_PAHLAVI =
2504                 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/
2505         /** @stable ICU 54 */
2506         public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2507                 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/
2508         /** @stable ICU 54 */
2509         public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/
2510         /** @stable ICU 54 */
2511         public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2512                 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/
2513         /** @stable ICU 54 */
2514         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2515                 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/
2516         /** @stable ICU 54 */
2517         public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/
2518         /** @stable ICU 54 */
2519         public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/
2520 
2521         /* New blocks in Unicode 8.0 */
2522 
2523         /** @stable ICU 56 */
2524         public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/
2525         /** @stable ICU 56 */
2526         public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
2527                 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/
2528         /** @stable ICU 56 */
2529         public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2530                 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/
2531         /** @stable ICU 56 */
2532         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
2533                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
2534                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/
2535         /** @stable ICU 56 */
2536         public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
2537                 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/
2538         /** @stable ICU 56 */
2539         public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/
2540         /** @stable ICU 56 */
2541         public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/
2542         /** @stable ICU 56 */
2543         public static final UnicodeBlock OLD_HUNGARIAN =
2544                 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/
2545         /** @stable ICU 56 */
2546         public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
2547                 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
2548                         SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/
2549         /** @stable ICU 56 */
2550         public static final UnicodeBlock SUTTON_SIGNWRITING =
2551                 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/
2552 
2553         /* New blocks in Unicode 9.0 */
2554 
2555         /** @stable ICU 58 */
2556         public static final UnicodeBlock ADLAM = new UnicodeBlock("ADLAM", ADLAM_ID); /*[1E900]*/
2557         /** @stable ICU 58 */
2558         public static final UnicodeBlock BHAIKSUKI = new UnicodeBlock("BHAIKSUKI", BHAIKSUKI_ID); /*[11C00]*/
2559         /** @stable ICU 58 */
2560         public static final UnicodeBlock CYRILLIC_EXTENDED_C =
2561                 new UnicodeBlock("CYRILLIC_EXTENDED_C", CYRILLIC_EXTENDED_C_ID); /*[1C80]*/
2562         /** @stable ICU 58 */
2563         public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
2564                 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", GLAGOLITIC_SUPPLEMENT_ID); /*[1E000]*/
2565         /** @stable ICU 58 */
2566         public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
2567                 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID); /*[16FE0]*/
2568         /** @stable ICU 58 */
2569         public static final UnicodeBlock MARCHEN = new UnicodeBlock("MARCHEN", MARCHEN_ID); /*[11C70]*/
2570         /** @stable ICU 58 */
2571         public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
2572                 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", MONGOLIAN_SUPPLEMENT_ID); /*[11660]*/
2573         /** @stable ICU 58 */
2574         public static final UnicodeBlock NEWA = new UnicodeBlock("NEWA", NEWA_ID); /*[11400]*/
2575         /** @stable ICU 58 */
2576         public static final UnicodeBlock OSAGE = new UnicodeBlock("OSAGE", OSAGE_ID); /*[104B0]*/
2577         /** @stable ICU 58 */
2578         public static final UnicodeBlock TANGUT = new UnicodeBlock("TANGUT", TANGUT_ID); /*[17000]*/
2579         /** @stable ICU 58 */
2580         public static final UnicodeBlock TANGUT_COMPONENTS =
2581                 new UnicodeBlock("TANGUT_COMPONENTS", TANGUT_COMPONENTS_ID); /*[18800]*/
2582 
2583         // New blocks in Unicode 10.0
2584 
2585         /** @stable ICU 60 */
2586         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F =
2587                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID); /*[2CEB0]*/
2588         /** @stable ICU 60 */
2589         public static final UnicodeBlock KANA_EXTENDED_A =
2590                 new UnicodeBlock("KANA_EXTENDED_A", KANA_EXTENDED_A_ID); /*[1B100]*/
2591         /** @stable ICU 60 */
2592         public static final UnicodeBlock MASARAM_GONDI =
2593                 new UnicodeBlock("MASARAM_GONDI", MASARAM_GONDI_ID); /*[11D00]*/
2594         /** @stable ICU 60 */
2595         public static final UnicodeBlock NUSHU = new UnicodeBlock("NUSHU", NUSHU_ID); /*[1B170]*/
2596         /** @stable ICU 60 */
2597         public static final UnicodeBlock SOYOMBO = new UnicodeBlock("SOYOMBO", SOYOMBO_ID); /*[11A50]*/
2598         /** @stable ICU 60 */
2599         public static final UnicodeBlock SYRIAC_SUPPLEMENT =
2600                 new UnicodeBlock("SYRIAC_SUPPLEMENT", SYRIAC_SUPPLEMENT_ID); /*[0860]*/
2601         /** @stable ICU 60 */
2602         public static final UnicodeBlock ZANABAZAR_SQUARE =
2603                 new UnicodeBlock("ZANABAZAR_SQUARE", ZANABAZAR_SQUARE_ID); /*[11A00]*/
2604 
2605         // New blocks in Unicode 11.0
2606 
2607         /** @stable ICU 62 */
2608         public static final UnicodeBlock CHESS_SYMBOLS =
2609                 new UnicodeBlock("CHESS_SYMBOLS", CHESS_SYMBOLS_ID); /*[1FA00]*/
2610         /** @stable ICU 62 */
2611         public static final UnicodeBlock DOGRA = new UnicodeBlock("DOGRA", DOGRA_ID); /*[11800]*/
2612         /** @stable ICU 62 */
2613         public static final UnicodeBlock GEORGIAN_EXTENDED =
2614                 new UnicodeBlock("GEORGIAN_EXTENDED", GEORGIAN_EXTENDED_ID); /*[1C90]*/
2615         /** @stable ICU 62 */
2616         public static final UnicodeBlock GUNJALA_GONDI =
2617                 new UnicodeBlock("GUNJALA_GONDI", GUNJALA_GONDI_ID); /*[11D60]*/
2618         /** @stable ICU 62 */
2619         public static final UnicodeBlock HANIFI_ROHINGYA =
2620                 new UnicodeBlock("HANIFI_ROHINGYA", HANIFI_ROHINGYA_ID); /*[10D00]*/
2621         /** @stable ICU 62 */
2622         public static final UnicodeBlock INDIC_SIYAQ_NUMBERS =
2623                 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", INDIC_SIYAQ_NUMBERS_ID); /*[1EC70]*/
2624         /** @stable ICU 62 */
2625         public static final UnicodeBlock MAKASAR = new UnicodeBlock("MAKASAR", MAKASAR_ID); /*[11EE0]*/
2626         /** @stable ICU 62 */
2627         public static final UnicodeBlock MAYAN_NUMERALS =
2628                 new UnicodeBlock("MAYAN_NUMERALS", MAYAN_NUMERALS_ID); /*[1D2E0]*/
2629         /** @stable ICU 62 */
2630         public static final UnicodeBlock MEDEFAIDRIN =
2631                 new UnicodeBlock("MEDEFAIDRIN", MEDEFAIDRIN_ID); /*[16E40]*/
2632         /** @stable ICU 62 */
2633         public static final UnicodeBlock OLD_SOGDIAN =
2634                 new UnicodeBlock("OLD_SOGDIAN", OLD_SOGDIAN_ID); /*[10F00]*/
2635         /** @stable ICU 62 */
2636         public static final UnicodeBlock SOGDIAN = new UnicodeBlock("SOGDIAN", SOGDIAN_ID); /*[10F30]*/
2637 
2638         // New blocks in Unicode 12.0
2639 
2640         /** @stable ICU 64 */
2641         public static final UnicodeBlock EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS =
2642                 new UnicodeBlock("EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS", EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS_ID); /*[13430]*/
2643         /** @stable ICU 64 */
2644         public static final UnicodeBlock ELYMAIC = new UnicodeBlock("ELYMAIC", ELYMAIC_ID); /*[10FE0]*/
2645         /** @stable ICU 64 */
2646         public static final UnicodeBlock NANDINAGARI =
2647                 new UnicodeBlock("NANDINAGARI", NANDINAGARI_ID); /*[119A0]*/
2648         /** @stable ICU 64 */
2649         public static final UnicodeBlock NYIAKENG_PUACHUE_HMONG =
2650                 new UnicodeBlock("NYIAKENG_PUACHUE_HMONG", NYIAKENG_PUACHUE_HMONG_ID); /*[1E100]*/
2651         /** @stable ICU 64 */
2652         public static final UnicodeBlock OTTOMAN_SIYAQ_NUMBERS =
2653                 new UnicodeBlock("OTTOMAN_SIYAQ_NUMBERS", OTTOMAN_SIYAQ_NUMBERS_ID); /*[1ED00]*/
2654         /** @stable ICU 64 */
2655         public static final UnicodeBlock SMALL_KANA_EXTENSION =
2656                 new UnicodeBlock("SMALL_KANA_EXTENSION", SMALL_KANA_EXTENSION_ID); /*[1B130]*/
2657         /** @stable ICU 64 */
2658         public static final UnicodeBlock SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A =
2659                 new UnicodeBlock("SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A", SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A_ID); /*[1FA70]*/
2660         /** @stable ICU 64 */
2661         public static final UnicodeBlock TAMIL_SUPPLEMENT =
2662                 new UnicodeBlock("TAMIL_SUPPLEMENT", TAMIL_SUPPLEMENT_ID); /*[11FC0]*/
2663         /** @stable ICU 64 */
2664         public static final UnicodeBlock WANCHO = new UnicodeBlock("WANCHO", WANCHO_ID); /*[1E2C0]*/
2665 
2666         // New blocks in Unicode 13.0
2667 
2668         /** @stable ICU 66 */
2669         public static final UnicodeBlock CHORASMIAN =
2670                 new UnicodeBlock("CHORASMIAN", CHORASMIAN_ID); /*[10FB0]*/
2671         /** @stable ICU 66 */
2672         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G =
2673                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G",
2674                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G_ID); /*[30000]*/
2675         /** @stable ICU 66 */
2676         public static final UnicodeBlock DIVES_AKURU =
2677                 new UnicodeBlock("DIVES_AKURU", DIVES_AKURU_ID); /*[11900]*/
2678         /** @stable ICU 66 */
2679         public static final UnicodeBlock KHITAN_SMALL_SCRIPT =
2680                 new UnicodeBlock("KHITAN_SMALL_SCRIPT", KHITAN_SMALL_SCRIPT_ID); /*[18B00]*/
2681         /** @stable ICU 66 */
2682         public static final UnicodeBlock LISU_SUPPLEMENT =
2683                 new UnicodeBlock("LISU_SUPPLEMENT", LISU_SUPPLEMENT_ID); /*[11FB0]*/
2684         /** @stable ICU 66 */
2685         public static final UnicodeBlock SYMBOLS_FOR_LEGACY_COMPUTING =
2686                 new UnicodeBlock("SYMBOLS_FOR_LEGACY_COMPUTING", SYMBOLS_FOR_LEGACY_COMPUTING_ID); /*[1FB00]*/
2687         /** @stable ICU 66 */
2688         public static final UnicodeBlock TANGUT_SUPPLEMENT =
2689                 new UnicodeBlock("TANGUT_SUPPLEMENT", TANGUT_SUPPLEMENT_ID); /*[18D00]*/
2690         /** @stable ICU 66 */
2691         public static final UnicodeBlock YEZIDI = new UnicodeBlock("YEZIDI", YEZIDI_ID); /*[10E80]*/
2692 
2693         // New blocks in Unicode 14.0
2694 
2695         /** @stable ICU 70 */
2696         public static final UnicodeBlock ARABIC_EXTENDED_B =
2697                 new UnicodeBlock("ARABIC_EXTENDED_B", ARABIC_EXTENDED_B_ID); /*[0870]*/
2698         /** @stable ICU 70 */
2699         public static final UnicodeBlock CYPRO_MINOAN =
2700                 new UnicodeBlock("CYPRO_MINOAN", CYPRO_MINOAN_ID); /*[12F90]*/
2701         /** @stable ICU 70 */
2702         public static final UnicodeBlock ETHIOPIC_EXTENDED_B =
2703                 new UnicodeBlock("ETHIOPIC_EXTENDED_B", ETHIOPIC_EXTENDED_B_ID); /*[1E7E0]*/
2704         /** @stable ICU 70 */
2705         public static final UnicodeBlock KANA_EXTENDED_B =
2706                 new UnicodeBlock("KANA_EXTENDED_B", KANA_EXTENDED_B_ID); /*[1AFF0]*/
2707         /** @stable ICU 70 */
2708         public static final UnicodeBlock LATIN_EXTENDED_F =
2709                 new UnicodeBlock("LATIN_EXTENDED_F", LATIN_EXTENDED_F_ID); /*[10780]*/
2710         /** @stable ICU 70 */
2711         public static final UnicodeBlock LATIN_EXTENDED_G =
2712                 new UnicodeBlock("LATIN_EXTENDED_G", LATIN_EXTENDED_G_ID); /*[1DF00]*/
2713         /** @stable ICU 70 */
2714         public static final UnicodeBlock OLD_UYGHUR =
2715                 new UnicodeBlock("OLD_UYGHUR", OLD_UYGHUR_ID); /*[10F70]*/
2716         /** @stable ICU 70 */
2717         public static final UnicodeBlock TANGSA = new UnicodeBlock("TANGSA", TANGSA_ID); /*[16A70]*/
2718         /** @stable ICU 70 */
2719         public static final UnicodeBlock TOTO = new UnicodeBlock("TOTO", TOTO_ID); /*[1E290]*/
2720         /** @stable ICU 70 */
2721         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A =
2722                 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A",
2723                         UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A_ID); /*[11AB0]*/
2724         /** @stable ICU 70 */
2725         public static final UnicodeBlock VITHKUQI =
2726                 new UnicodeBlock("VITHKUQI", VITHKUQI_ID); /*[10570]*/
2727         /** @stable ICU 70 */
2728         public static final UnicodeBlock ZNAMENNY_MUSICAL_NOTATION =
2729                 new UnicodeBlock("ZNAMENNY_MUSICAL_NOTATION",
2730                         ZNAMENNY_MUSICAL_NOTATION_ID); /*[1CF00]*/
2731 
2732         // New blocks in Unicode 15.0
2733 
2734         /** @stable ICU 72 */
2735         public static final UnicodeBlock ARABIC_EXTENDED_C =
2736                 new UnicodeBlock("ARABIC_EXTENDED_C", ARABIC_EXTENDED_C_ID); /*[10EC0]*/
2737         /** @stable ICU 72 */
2738         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H =
2739                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H",
2740                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H_ID); /*[31350]*/
2741         /** @stable ICU 72 */
2742         public static final UnicodeBlock CYRILLIC_EXTENDED_D =
2743                 new UnicodeBlock("CYRILLIC_EXTENDED_D", CYRILLIC_EXTENDED_D_ID); /*[1E030]*/
2744         /** @stable ICU 72 */
2745         public static final UnicodeBlock DEVANAGARI_EXTENDED_A =
2746                 new UnicodeBlock("DEVANAGARI_EXTENDED_A", DEVANAGARI_EXTENDED_A_ID); /*[11B00]*/
2747         /** @stable ICU 72 */
2748         public static final UnicodeBlock KAKTOVIK_NUMERALS =
2749                 new UnicodeBlock("KAKTOVIK_NUMERALS", KAKTOVIK_NUMERALS_ID); /*[1D2C0]*/
2750         /** @stable ICU 72 */
2751         public static final UnicodeBlock KAWI = new UnicodeBlock("KAWI", KAWI_ID); /*[11F00]*/
2752         /** @stable ICU 72 */
2753         public static final UnicodeBlock NAG_MUNDARI =
2754                 new UnicodeBlock("NAG_MUNDARI", NAG_MUNDARI_ID); /*[1E4D0]*/
2755 
2756         // New block in Unicode 15.1
2757 
2758         /** @stable ICU 74 */
2759         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I =
2760                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I",
2761                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I_ID); /*[2EBF0]*/
2762 
2763         /**
2764          * @stable ICU 2.4
2765          */
2766         public static final UnicodeBlock INVALID_CODE
2767         = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID);
2768 
2769         static {
2770             for (int blockId = 0; blockId < COUNT; ++blockId) {
2771                 if (BLOCKS_[blockId] == null) {
2772                     throw new java.lang.IllegalStateException(
2773                             "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized");
2774                 }
2775             }
2776         }
2777 
2778         // public methods --------------------------------------------------
2779 
2780         /**
2781          * {@icu} Returns the only instance of the UnicodeBlock with the argument ID.
2782          * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned.
2783          * @param id UnicodeBlock ID
2784          * @return the only instance of the UnicodeBlock with the argument ID
2785          *         if it exists, otherwise a INVALID_CODE UnicodeBlock will be
2786          *         returned.
2787          * @stable ICU 2.4
2788          */
getInstance(int id)2789         public static UnicodeBlock getInstance(int id)
2790         {
2791             if (id >= 0 && id < BLOCKS_.length) {
2792                 return BLOCKS_[id];
2793             }
2794             return INVALID_CODE;
2795         }
2796 
2797         /**
2798          * Returns the Unicode allocation block that contains the code point,
2799          * or null if the code point is not a member of a defined block.
2800          * @param ch code point to be tested
2801          * @return the Unicode allocation block that contains the code point
2802          * @stable ICU 2.4
2803          */
of(int ch)2804         public static UnicodeBlock of(int ch)
2805         {
2806             if (ch > MAX_VALUE) {
2807                 return INVALID_CODE;
2808             }
2809 
2810             return UnicodeBlock.getInstance(
2811                     UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK));
2812         }
2813 
2814         /**
2815          * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method.
2816          * Returns the Unicode block with the given name. {@icunote} Unlike
2817          * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches
2818          * against the official UCD name and the Java block name
2819          * (ignoring case).
2820          * @param blockName the name of the block to match
2821          * @return the UnicodeBlock with that name
2822          * @throws IllegalArgumentException if the blockName could not be matched
2823          * @stable ICU 3.0
2824          */
forName(String blockName)2825         public static final UnicodeBlock forName(String blockName) {
2826             Map<String, UnicodeBlock> m = null;
2827             if (mref != null) {
2828                 m = mref.get();
2829             }
2830             if (m == null) {
2831                 m = new HashMap<>(BLOCKS_.length);
2832                 for (int i = 0; i < BLOCKS_.length; ++i) {
2833                     UnicodeBlock b = BLOCKS_[i];
2834                     String name = trimBlockName(
2835                             getPropertyValueName(UProperty.BLOCK, b.getID(),
2836                                     UProperty.NameChoice.LONG));
2837                     m.put(name, b);
2838                 }
2839                 mref = new SoftReference<>(m);
2840             }
2841             UnicodeBlock b = m.get(trimBlockName(blockName));
2842             if (b == null) {
2843                 throw new IllegalArgumentException();
2844             }
2845             return b;
2846         }
2847         private static SoftReference<Map<String, UnicodeBlock>> mref;
2848 
trimBlockName(String name)2849         private static String trimBlockName(String name) {
2850             String upper = name.toUpperCase(Locale.ENGLISH);
2851             StringBuilder result = new StringBuilder(upper.length());
2852             for (int i = 0; i < upper.length(); i++) {
2853                 char c = upper.charAt(i);
2854                 if (c != ' ' && c != '_' && c != '-') {
2855                     result.append(c);
2856                 }
2857             }
2858             return result.toString();
2859         }
2860 
2861         /**
2862          * {icu} Returns the type ID of this Unicode block
2863          * @return integer type ID of this Unicode block
2864          * @stable ICU 2.4
2865          */
getID()2866         public int getID()
2867         {
2868             return m_id_;
2869         }
2870 
2871         // private data members ---------------------------------------------
2872 
2873         /**
2874          * Identification code for this UnicodeBlock
2875          */
2876         private int m_id_;
2877 
2878         // private constructor ----------------------------------------------
2879 
2880         /**
2881          * UnicodeBlock constructor
2882          * @param name name of this UnicodeBlock
2883          * @param id unique id of this UnicodeBlock
2884          * @exception NullPointerException if name is <code>null</code>
2885          */
UnicodeBlock(String name, int id)2886         private UnicodeBlock(String name, int id)
2887         {
2888             super(name);
2889             m_id_ = id;
2890             if (id >= 0) {
2891                 BLOCKS_[id] = this;
2892             }
2893         }
2894     }
2895 
2896     /**
2897      * East Asian Width constants.
2898      * @see UProperty#EAST_ASIAN_WIDTH
2899      * @see UCharacter#getIntPropertyValue
2900      * @stable ICU 2.4
2901      */
2902     public static interface EastAsianWidth
2903     {
2904         /**
2905          * @stable ICU 2.4
2906          */
2907         public static final int NEUTRAL = 0;
2908         /**
2909          * @stable ICU 2.4
2910          */
2911         public static final int AMBIGUOUS = 1;
2912         /**
2913          * @stable ICU 2.4
2914          */
2915         public static final int HALFWIDTH = 2;
2916         /**
2917          * @stable ICU 2.4
2918          */
2919         public static final int FULLWIDTH = 3;
2920         /**
2921          * @stable ICU 2.4
2922          */
2923         public static final int NARROW = 4;
2924         /**
2925          * @stable ICU 2.4
2926          */
2927         public static final int WIDE = 5;
2928         /**
2929          * One more than the highest normal EastAsianWidth value.
2930          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH).
2931          *
2932          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2933          */
2934         @Deprecated
2935         public static final int COUNT = 6;
2936     }
2937 
2938     /**
2939      * Decomposition Type constants.
2940      * @see UProperty#DECOMPOSITION_TYPE
2941      * @stable ICU 2.4
2942      */
2943     public static interface DecompositionType
2944     {
2945         /**
2946          * @stable ICU 2.4
2947          */
2948         public static final int NONE = 0;
2949         /**
2950          * @stable ICU 2.4
2951          */
2952         public static final int CANONICAL = 1;
2953         /**
2954          * @stable ICU 2.4
2955          */
2956         public static final int COMPAT = 2;
2957         /**
2958          * @stable ICU 2.4
2959          */
2960         public static final int CIRCLE = 3;
2961         /**
2962          * @stable ICU 2.4
2963          */
2964         public static final int FINAL = 4;
2965         /**
2966          * @stable ICU 2.4
2967          */
2968         public static final int FONT = 5;
2969         /**
2970          * @stable ICU 2.4
2971          */
2972         public static final int FRACTION = 6;
2973         /**
2974          * @stable ICU 2.4
2975          */
2976         public static final int INITIAL = 7;
2977         /**
2978          * @stable ICU 2.4
2979          */
2980         public static final int ISOLATED = 8;
2981         /**
2982          * @stable ICU 2.4
2983          */
2984         public static final int MEDIAL = 9;
2985         /**
2986          * @stable ICU 2.4
2987          */
2988         public static final int NARROW = 10;
2989         /**
2990          * @stable ICU 2.4
2991          */
2992         public static final int NOBREAK = 11;
2993         /**
2994          * @stable ICU 2.4
2995          */
2996         public static final int SMALL = 12;
2997         /**
2998          * @stable ICU 2.4
2999          */
3000         public static final int SQUARE = 13;
3001         /**
3002          * @stable ICU 2.4
3003          */
3004         public static final int SUB = 14;
3005         /**
3006          * @stable ICU 2.4
3007          */
3008         public static final int SUPER = 15;
3009         /**
3010          * @stable ICU 2.4
3011          */
3012         public static final int VERTICAL = 16;
3013         /**
3014          * @stable ICU 2.4
3015          */
3016         public static final int WIDE = 17;
3017         /**
3018          * One more than the highest normal DecompositionType value.
3019          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE).
3020          *
3021          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3022          */
3023         @Deprecated
3024         public static final int COUNT = 18;
3025     }
3026 
3027     /**
3028      * Joining Type constants.
3029      * @see UProperty#JOINING_TYPE
3030      * @stable ICU 2.4
3031      */
3032     public static interface JoiningType
3033     {
3034         /**
3035          * @stable ICU 2.4
3036          */
3037         public static final int NON_JOINING = 0;
3038         /**
3039          * @stable ICU 2.4
3040          */
3041         public static final int JOIN_CAUSING = 1;
3042         /**
3043          * @stable ICU 2.4
3044          */
3045         public static final int DUAL_JOINING = 2;
3046         /**
3047          * @stable ICU 2.4
3048          */
3049         public static final int LEFT_JOINING = 3;
3050         /**
3051          * @stable ICU 2.4
3052          */
3053         public static final int RIGHT_JOINING = 4;
3054         /**
3055          * @stable ICU 2.4
3056          */
3057         public static final int TRANSPARENT = 5;
3058         /**
3059          * One more than the highest normal JoiningType value.
3060          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE).
3061          *
3062          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3063          */
3064         @Deprecated
3065         public static final int COUNT = 6;
3066     }
3067 
3068     /**
3069      * Joining Group constants.
3070      * @see UProperty#JOINING_GROUP
3071      * @stable ICU 2.4
3072      */
3073     public static interface JoiningGroup
3074     {
3075         /**
3076          * @stable ICU 2.4
3077          */
3078         public static final int NO_JOINING_GROUP = 0;
3079         /**
3080          * @stable ICU 2.4
3081          */
3082         public static final int AIN = 1;
3083         /**
3084          * @stable ICU 2.4
3085          */
3086         public static final int ALAPH = 2;
3087         /**
3088          * @stable ICU 2.4
3089          */
3090         public static final int ALEF = 3;
3091         /**
3092          * @stable ICU 2.4
3093          */
3094         public static final int BEH = 4;
3095         /**
3096          * @stable ICU 2.4
3097          */
3098         public static final int BETH = 5;
3099         /**
3100          * @stable ICU 2.4
3101          */
3102         public static final int DAL = 6;
3103         /**
3104          * @stable ICU 2.4
3105          */
3106         public static final int DALATH_RISH = 7;
3107         /**
3108          * @stable ICU 2.4
3109          */
3110         public static final int E = 8;
3111         /**
3112          * @stable ICU 2.4
3113          */
3114         public static final int FEH = 9;
3115         /**
3116          * @stable ICU 2.4
3117          */
3118         public static final int FINAL_SEMKATH = 10;
3119         /**
3120          * @stable ICU 2.4
3121          */
3122         public static final int GAF = 11;
3123         /**
3124          * @stable ICU 2.4
3125          */
3126         public static final int GAMAL = 12;
3127         /**
3128          * @stable ICU 2.4
3129          */
3130         public static final int HAH = 13;
3131         /** @stable ICU 4.6 */
3132         public static final int TEH_MARBUTA_GOAL = 14;
3133         /**
3134          * @stable ICU 2.4
3135          */
3136         public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL;
3137         /**
3138          * @stable ICU 2.4
3139          */
3140         public static final int HE = 15;
3141         /**
3142          * @stable ICU 2.4
3143          */
3144         public static final int HEH = 16;
3145         /**
3146          * @stable ICU 2.4
3147          */
3148         public static final int HEH_GOAL = 17;
3149         /**
3150          * @stable ICU 2.4
3151          */
3152         public static final int HETH = 18;
3153         /**
3154          * @stable ICU 2.4
3155          */
3156         public static final int KAF = 19;
3157         /**
3158          * @stable ICU 2.4
3159          */
3160         public static final int KAPH = 20;
3161         /**
3162          * @stable ICU 2.4
3163          */
3164         public static final int KNOTTED_HEH = 21;
3165         /**
3166          * @stable ICU 2.4
3167          */
3168         public static final int LAM = 22;
3169         /**
3170          * @stable ICU 2.4
3171          */
3172         public static final int LAMADH = 23;
3173         /**
3174          * @stable ICU 2.4
3175          */
3176         public static final int MEEM = 24;
3177         /**
3178          * @stable ICU 2.4
3179          */
3180         public static final int MIM = 25;
3181         /**
3182          * @stable ICU 2.4
3183          */
3184         public static final int NOON = 26;
3185         /**
3186          * @stable ICU 2.4
3187          */
3188         public static final int NUN = 27;
3189         /**
3190          * @stable ICU 2.4
3191          */
3192         public static final int PE = 28;
3193         /**
3194          * @stable ICU 2.4
3195          */
3196         public static final int QAF = 29;
3197         /**
3198          * @stable ICU 2.4
3199          */
3200         public static final int QAPH = 30;
3201         /**
3202          * @stable ICU 2.4
3203          */
3204         public static final int REH = 31;
3205         /**
3206          * @stable ICU 2.4
3207          */
3208         public static final int REVERSED_PE = 32;
3209         /**
3210          * @stable ICU 2.4
3211          */
3212         public static final int SAD = 33;
3213         /**
3214          * @stable ICU 2.4
3215          */
3216         public static final int SADHE = 34;
3217         /**
3218          * @stable ICU 2.4
3219          */
3220         public static final int SEEN = 35;
3221         /**
3222          * @stable ICU 2.4
3223          */
3224         public static final int SEMKATH = 36;
3225         /**
3226          * @stable ICU 2.4
3227          */
3228         public static final int SHIN = 37;
3229         /**
3230          * @stable ICU 2.4
3231          */
3232         public static final int SWASH_KAF = 38;
3233         /**
3234          * @stable ICU 2.4
3235          */
3236         public static final int SYRIAC_WAW = 39;
3237         /**
3238          * @stable ICU 2.4
3239          */
3240         public static final int TAH = 40;
3241         /**
3242          * @stable ICU 2.4
3243          */
3244         public static final int TAW = 41;
3245         /**
3246          * @stable ICU 2.4
3247          */
3248         public static final int TEH_MARBUTA = 42;
3249         /**
3250          * @stable ICU 2.4
3251          */
3252         public static final int TETH = 43;
3253         /**
3254          * @stable ICU 2.4
3255          */
3256         public static final int WAW = 44;
3257         /**
3258          * @stable ICU 2.4
3259          */
3260         public static final int YEH = 45;
3261         /**
3262          * @stable ICU 2.4
3263          */
3264         public static final int YEH_BARREE = 46;
3265         /**
3266          * @stable ICU 2.4
3267          */
3268         public static final int YEH_WITH_TAIL = 47;
3269         /**
3270          * @stable ICU 2.4
3271          */
3272         public static final int YUDH = 48;
3273         /**
3274          * @stable ICU 2.4
3275          */
3276         public static final int YUDH_HE = 49;
3277         /**
3278          * @stable ICU 2.4
3279          */
3280         public static final int ZAIN = 50;
3281         /**
3282          * @stable ICU 2.6
3283          */
3284         public static final int FE = 51;
3285         /**
3286          * @stable ICU 2.6
3287          */
3288         public static final int KHAPH = 52;
3289         /**
3290          * @stable ICU 2.6
3291          */
3292         public static final int ZHAIN = 53;
3293         /**
3294          * @stable ICU 4.0
3295          */
3296         public static final int BURUSHASKI_YEH_BARREE = 54;
3297         /** @stable ICU 4.4 */
3298         public static final int FARSI_YEH = 55;
3299         /** @stable ICU 4.4 */
3300         public static final int NYA = 56;
3301         /** @stable ICU 49 */
3302         public static final int ROHINGYA_YEH = 57;
3303 
3304         /** @stable ICU 54 */
3305         public static final int MANICHAEAN_ALEPH = 58;
3306         /** @stable ICU 54 */
3307         public static final int MANICHAEAN_AYIN = 59;
3308         /** @stable ICU 54 */
3309         public static final int MANICHAEAN_BETH = 60;
3310         /** @stable ICU 54 */
3311         public static final int MANICHAEAN_DALETH = 61;
3312         /** @stable ICU 54 */
3313         public static final int MANICHAEAN_DHAMEDH = 62;
3314         /** @stable ICU 54 */
3315         public static final int MANICHAEAN_FIVE = 63;
3316         /** @stable ICU 54 */
3317         public static final int MANICHAEAN_GIMEL = 64;
3318         /** @stable ICU 54 */
3319         public static final int MANICHAEAN_HETH = 65;
3320         /** @stable ICU 54 */
3321         public static final int MANICHAEAN_HUNDRED = 66;
3322         /** @stable ICU 54 */
3323         public static final int MANICHAEAN_KAPH = 67;
3324         /** @stable ICU 54 */
3325         public static final int MANICHAEAN_LAMEDH = 68;
3326         /** @stable ICU 54 */
3327         public static final int MANICHAEAN_MEM = 69;
3328         /** @stable ICU 54 */
3329         public static final int MANICHAEAN_NUN = 70;
3330         /** @stable ICU 54 */
3331         public static final int MANICHAEAN_ONE = 71;
3332         /** @stable ICU 54 */
3333         public static final int MANICHAEAN_PE = 72;
3334         /** @stable ICU 54 */
3335         public static final int MANICHAEAN_QOPH = 73;
3336         /** @stable ICU 54 */
3337         public static final int MANICHAEAN_RESH = 74;
3338         /** @stable ICU 54 */
3339         public static final int MANICHAEAN_SADHE = 75;
3340         /** @stable ICU 54 */
3341         public static final int MANICHAEAN_SAMEKH = 76;
3342         /** @stable ICU 54 */
3343         public static final int MANICHAEAN_TAW = 77;
3344         /** @stable ICU 54 */
3345         public static final int MANICHAEAN_TEN = 78;
3346         /** @stable ICU 54 */
3347         public static final int MANICHAEAN_TETH = 79;
3348         /** @stable ICU 54 */
3349         public static final int MANICHAEAN_THAMEDH = 80;
3350         /** @stable ICU 54 */
3351         public static final int MANICHAEAN_TWENTY = 81;
3352         /** @stable ICU 54 */
3353         public static final int MANICHAEAN_WAW = 82;
3354         /** @stable ICU 54 */
3355         public static final int MANICHAEAN_YODH = 83;
3356         /** @stable ICU 54 */
3357         public static final int MANICHAEAN_ZAYIN = 84;
3358         /** @stable ICU 54 */
3359         public static final int STRAIGHT_WAW = 85;
3360 
3361         /** @stable ICU 58 */
3362         public static final int AFRICAN_FEH = 86;
3363         /** @stable ICU 58 */
3364         public static final int AFRICAN_NOON = 87;
3365         /** @stable ICU 58 */
3366         public static final int AFRICAN_QAF = 88;
3367 
3368         /** @stable ICU 60 */
3369         public static final int MALAYALAM_BHA = 89;
3370         /** @stable ICU 60 */
3371         public static final int MALAYALAM_JA = 90;
3372         /** @stable ICU 60 */
3373         public static final int MALAYALAM_LLA = 91;
3374         /** @stable ICU 60 */
3375         public static final int MALAYALAM_LLLA = 92;
3376         /** @stable ICU 60 */
3377         public static final int MALAYALAM_NGA = 93;
3378         /** @stable ICU 60 */
3379         public static final int MALAYALAM_NNA = 94;
3380         /** @stable ICU 60 */
3381         public static final int MALAYALAM_NNNA = 95;
3382         /** @stable ICU 60 */
3383         public static final int MALAYALAM_NYA = 96;
3384         /** @stable ICU 60 */
3385         public static final int MALAYALAM_RA = 97;
3386         /** @stable ICU 60 */
3387         public static final int MALAYALAM_SSA = 98;
3388         /** @stable ICU 60 */
3389         public static final int MALAYALAM_TTA = 99;
3390 
3391         /** @stable ICU 62 */
3392         public static final int HANIFI_ROHINGYA_KINNA_YA = 100;
3393         /** @stable ICU 62 */
3394         public static final int HANIFI_ROHINGYA_PA = 101;
3395 
3396         /** @stable ICU 70 */
3397         public static final int THIN_YEH = 102;
3398         /** @stable ICU 70 */
3399         public static final int VERTICAL_TAIL = 103;
3400 
3401         /**
3402          * One more than the highest normal JoiningGroup value.
3403          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JoiningGroup).
3404          *
3405          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3406          */
3407         @Deprecated
3408         public static final int COUNT = 104;
3409     }
3410 
3411     /**
3412      * Grapheme Cluster Break constants.
3413      * @see UProperty#GRAPHEME_CLUSTER_BREAK
3414      * @stable ICU 3.4
3415      */
3416     public static interface GraphemeClusterBreak {
3417         /**
3418          * @stable ICU 3.4
3419          */
3420         public static final int OTHER = 0;
3421         /**
3422          * @stable ICU 3.4
3423          */
3424         public static final int CONTROL = 1;
3425         /**
3426          * @stable ICU 3.4
3427          */
3428         public static final int CR = 2;
3429         /**
3430          * @stable ICU 3.4
3431          */
3432         public static final int EXTEND = 3;
3433         /**
3434          * @stable ICU 3.4
3435          */
3436         public static final int L = 4;
3437         /**
3438          * @stable ICU 3.4
3439          */
3440         public static final int LF = 5;
3441         /**
3442          * @stable ICU 3.4
3443          */
3444         public static final int LV = 6;
3445         /**
3446          * @stable ICU 3.4
3447          */
3448         public static final int LVT = 7;
3449         /**
3450          * @stable ICU 3.4
3451          */
3452         public static final int T = 8;
3453         /**
3454          * @stable ICU 3.4
3455          */
3456         public static final int V = 9;
3457         /**
3458          * @stable ICU 4.0
3459          */
3460         public static final int SPACING_MARK = 10;
3461         /**
3462          * @stable ICU 4.0
3463          */
3464         public static final int PREPEND = 11;
3465         /** @stable ICU 50 */
3466         public static final int REGIONAL_INDICATOR = 12;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
3467         /** @stable ICU 58 */
3468         public static final int E_BASE = 13;          /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
3469         /** @stable ICU 58 */
3470         public static final int E_BASE_GAZ = 14;      /*[EBG]*/
3471         /** @stable ICU 58 */
3472         public static final int E_MODIFIER = 15;      /*[EM]*/
3473         /** @stable ICU 58 */
3474         public static final int GLUE_AFTER_ZWJ = 16;  /*[GAZ]*/
3475         /** @stable ICU 58 */
3476         public static final int ZWJ = 17;             /*[ZWJ]*/
3477 
3478         /**
3479          * One more than the highest normal GraphemeClusterBreak value.
3480          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK).
3481          *
3482          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3483          */
3484         @Deprecated
3485         public static final int COUNT = 18;
3486     }
3487 
3488     /**
3489      * Word Break constants.
3490      * @see UProperty#WORD_BREAK
3491      * @stable ICU 3.4
3492      */
3493     public static interface WordBreak {
3494         /**
3495          * @stable ICU 3.8
3496          */
3497         public static final int OTHER = 0;
3498         /**
3499          * @stable ICU 3.8
3500          */
3501         public static final int ALETTER = 1;
3502         /**
3503          * @stable ICU 3.8
3504          */
3505         public static final int FORMAT = 2;
3506         /**
3507          * @stable ICU 3.8
3508          */
3509         public static final int KATAKANA = 3;
3510         /**
3511          * @stable ICU 3.8
3512          */
3513         public static final int MIDLETTER = 4;
3514         /**
3515          * @stable ICU 3.8
3516          */
3517         public static final int MIDNUM = 5;
3518         /**
3519          * @stable ICU 3.8
3520          */
3521         public static final int NUMERIC = 6;
3522         /**
3523          * @stable ICU 3.8
3524          */
3525         public static final int EXTENDNUMLET = 7;
3526         /**
3527          * @stable ICU 4.0
3528          */
3529         public static final int CR = 8;
3530         /**
3531          * @stable ICU 4.0
3532          */
3533         public static final int EXTEND = 9;
3534         /**
3535          * @stable ICU 4.0
3536          */
3537         public static final int LF = 10;
3538         /**
3539          * @stable ICU 4.0
3540          */
3541         public static final int MIDNUMLET = 11;
3542         /**
3543          * @stable ICU 4.0
3544          */
3545         public static final int NEWLINE = 12;
3546         /** @stable ICU 50 */
3547         public static final int REGIONAL_INDICATOR = 13;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
3548         /** @stable ICU 52 */
3549         public static final int HEBREW_LETTER = 14;    /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
3550         /** @stable ICU 52 */
3551         public static final int SINGLE_QUOTE = 15;     /*[SQ]*/
3552         /** @stable ICU 52 */
3553         public static final int DOUBLE_QUOTE = 16;     /*[DQ]*/
3554         /** @stable ICU 58 */
3555         public static final int E_BASE = 17;           /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
3556         /** @stable ICU 58 */
3557         public static final int E_BASE_GAZ = 18;       /*[EBG]*/
3558         /** @stable ICU 58 */
3559         public static final int E_MODIFIER = 19;       /*[EM]*/
3560         /** @stable ICU 58 */
3561         public static final int GLUE_AFTER_ZWJ = 20;   /*[GAZ]*/
3562         /** @stable ICU 58 */
3563         public static final int ZWJ = 21;              /*[ZWJ]*/
3564         /** @stable ICU 62 */
3565         public static final int WSEGSPACE = 22;        /*[WSEGSPACE]*/
3566         /**
3567          * One more than the highest normal WordBreak value.
3568          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK).
3569          *
3570          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3571          */
3572         @Deprecated
3573         public static final int COUNT = 23;
3574     }
3575 
3576     /**
3577      * Sentence Break constants.
3578      * @see UProperty#SENTENCE_BREAK
3579      * @stable ICU 3.4
3580      */
3581     public static interface SentenceBreak {
3582         /**
3583          * @stable ICU 3.8
3584          */
3585         public static final int OTHER = 0;
3586         /**
3587          * @stable ICU 3.8
3588          */
3589         public static final int ATERM = 1;
3590         /**
3591          * @stable ICU 3.8
3592          */
3593         public static final int CLOSE = 2;
3594         /**
3595          * @stable ICU 3.8
3596          */
3597         public static final int FORMAT = 3;
3598         /**
3599          * @stable ICU 3.8
3600          */
3601         public static final int LOWER = 4;
3602         /**
3603          * @stable ICU 3.8
3604          */
3605         public static final int NUMERIC = 5;
3606         /**
3607          * @stable ICU 3.8
3608          */
3609         public static final int OLETTER = 6;
3610         /**
3611          * @stable ICU 3.8
3612          */
3613         public static final int SEP = 7;
3614         /**
3615          * @stable ICU 3.8
3616          */
3617         public static final int SP = 8;
3618         /**
3619          * @stable ICU 3.8
3620          */
3621         public static final int STERM = 9;
3622         /**
3623          * @stable ICU 3.8
3624          */
3625         public static final int UPPER = 10;
3626         /**
3627          * @stable ICU 4.0
3628          */
3629         public static final int CR = 11;
3630         /**
3631          * @stable ICU 4.0
3632          */
3633         public static final int EXTEND = 12;
3634         /**
3635          * @stable ICU 4.0
3636          */
3637         public static final int LF = 13;
3638         /**
3639          * @stable ICU 4.0
3640          */
3641         public static final int SCONTINUE = 14;
3642         /**
3643          * One more than the highest normal SentenceBreak value.
3644          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK).
3645          *
3646          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3647          */
3648         @Deprecated
3649         public static final int COUNT = 15;
3650     }
3651 
3652     /**
3653      * Line Break constants.
3654      * @see UProperty#LINE_BREAK
3655      * @stable ICU 2.4
3656      */
3657     public static interface LineBreak
3658     {
3659         /**
3660          * @stable ICU 2.4
3661          */
3662         public static final int UNKNOWN = 0;
3663         /**
3664          * @stable ICU 2.4
3665          */
3666         public static final int AMBIGUOUS = 1;
3667         /**
3668          * @stable ICU 2.4
3669          */
3670         public static final int ALPHABETIC = 2;
3671         /**
3672          * @stable ICU 2.4
3673          */
3674         public static final int BREAK_BOTH = 3;
3675         /**
3676          * @stable ICU 2.4
3677          */
3678         public static final int BREAK_AFTER = 4;
3679         /**
3680          * @stable ICU 2.4
3681          */
3682         public static final int BREAK_BEFORE = 5;
3683         /**
3684          * @stable ICU 2.4
3685          */
3686         public static final int MANDATORY_BREAK = 6;
3687         /**
3688          * @stable ICU 2.4
3689          */
3690         public static final int CONTINGENT_BREAK = 7;
3691         /**
3692          * @stable ICU 2.4
3693          */
3694         public static final int CLOSE_PUNCTUATION = 8;
3695         /**
3696          * @stable ICU 2.4
3697          */
3698         public static final int COMBINING_MARK = 9;
3699         /**
3700          * @stable ICU 2.4
3701          */
3702         public static final int CARRIAGE_RETURN = 10;
3703         /**
3704          * @stable ICU 2.4
3705          */
3706         public static final int EXCLAMATION = 11;
3707         /**
3708          * @stable ICU 2.4
3709          */
3710         public static final int GLUE = 12;
3711         /**
3712          * @stable ICU 2.4
3713          */
3714         public static final int HYPHEN = 13;
3715         /**
3716          * @stable ICU 2.4
3717          */
3718         public static final int IDEOGRAPHIC = 14;
3719         /**
3720          * @see #INSEPARABLE
3721          * @stable ICU 2.4
3722          */
3723         public static final int INSEPERABLE = 15;
3724         /**
3725          * Renamed from the misspelled "inseperable" in Unicode 4.0.1.
3726          * @stable ICU 3.0
3727          */
3728         public static final int INSEPARABLE = 15;
3729         /**
3730          * @stable ICU 2.4
3731          */
3732         public static final int INFIX_NUMERIC = 16;
3733         /**
3734          * @stable ICU 2.4
3735          */
3736         public static final int LINE_FEED = 17;
3737         /**
3738          * @stable ICU 2.4
3739          */
3740         public static final int NONSTARTER = 18;
3741         /**
3742          * @stable ICU 2.4
3743          */
3744         public static final int NUMERIC = 19;
3745         /**
3746          * @stable ICU 2.4
3747          */
3748         public static final int OPEN_PUNCTUATION = 20;
3749         /**
3750          * @stable ICU 2.4
3751          */
3752         public static final int POSTFIX_NUMERIC = 21;
3753         /**
3754          * @stable ICU 2.4
3755          */
3756         public static final int PREFIX_NUMERIC = 22;
3757         /**
3758          * @stable ICU 2.4
3759          */
3760         public static final int QUOTATION = 23;
3761         /**
3762          * @stable ICU 2.4
3763          */
3764         public static final int COMPLEX_CONTEXT = 24;
3765         /**
3766          * @stable ICU 2.4
3767          */
3768         public static final int SURROGATE = 25;
3769         /**
3770          * @stable ICU 2.4
3771          */
3772         public static final int SPACE = 26;
3773         /**
3774          * @stable ICU 2.4
3775          */
3776         public static final int BREAK_SYMBOLS = 27;
3777         /**
3778          * @stable ICU 2.4
3779          */
3780         public static final int ZWSPACE = 28;
3781         /**
3782          * @stable ICU 2.6
3783          */
3784         public static final int NEXT_LINE = 29;  /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
3785         /**
3786          * @stable ICU 2.6
3787          */
3788         public static final int WORD_JOINER = 30;      /*[WJ]*/
3789         /**
3790          * @stable ICU 3.4
3791          */
3792         public static final int H2 = 31;  /* from here on: new in Unicode 4.1/ICU 3.4 */
3793         /**
3794          * @stable ICU 3.4
3795          */
3796         public static final int H3 = 32;
3797         /**
3798          * @stable ICU 3.4
3799          */
3800         public static final int JL = 33;
3801         /**
3802          * @stable ICU 3.4
3803          */
3804         public static final int JT = 34;
3805         /**
3806          * @stable ICU 3.4
3807          */
3808         public static final int JV = 35;
3809         /** @stable ICU 4.4 */
3810         public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
3811         /** @stable ICU 49 */
3812         public static final int CONDITIONAL_JAPANESE_STARTER = 37;  /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
3813         /** @stable ICU 49 */
3814         public static final int HEBREW_LETTER = 38;  /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
3815         /** @stable ICU 50 */
3816         public static final int REGIONAL_INDICATOR = 39;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
3817         /** @stable ICU 58 */
3818         public static final int E_BASE = 40;  /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
3819         /** @stable ICU 58 */
3820         public static final int E_MODIFIER = 41;  /*[EM]*/
3821         /** @stable ICU 58 */
3822         public static final int ZWJ = 42;  /*[ZWJ]*/
3823         /** @stable ICU 74 */
3824         public static final int AKSARA = 43;  /*[AK]*/ /* from here on: new in Unicode 15.1/ICU 74 */
3825         /** @stable ICU 74 */
3826         public static final int AKSARA_PREBASE = 44;  /*[AP]*/
3827         /** @stable ICU 74 */
3828         public static final int AKSARA_START = 45;  /*[AS]*/
3829         /** @stable ICU 74 */
3830         public static final int VIRAMA_FINAL = 46;  /*[VF]*/
3831         /** @stable ICU 74 */
3832         public static final int VIRAMA = 47;  /*[VI]*/
3833         /**
3834          * One more than the highest normal LineBreak value.
3835          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK).
3836          *
3837          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3838          */
3839         @Deprecated
3840         public static final int COUNT = 48;
3841     }
3842 
3843     /**
3844      * Numeric Type constants.
3845      * @see UProperty#NUMERIC_TYPE
3846      * @stable ICU 2.4
3847      */
3848     public static interface NumericType
3849     {
3850         /**
3851          * @stable ICU 2.4
3852          */
3853         public static final int NONE = 0;
3854         /**
3855          * @stable ICU 2.4
3856          */
3857         public static final int DECIMAL = 1;
3858         /**
3859          * @stable ICU 2.4
3860          */
3861         public static final int DIGIT = 2;
3862         /**
3863          * @stable ICU 2.4
3864          */
3865         public static final int NUMERIC = 3;
3866         /**
3867          * One more than the highest normal NumericType value.
3868          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE).
3869          *
3870          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3871          */
3872         @Deprecated
3873         public static final int COUNT = 4;
3874     }
3875 
3876     /**
3877      * Hangul Syllable Type constants.
3878      *
3879      * @see UProperty#HANGUL_SYLLABLE_TYPE
3880      * @stable ICU 2.6
3881      */
3882     public static interface HangulSyllableType
3883     {
3884         /**
3885          * @stable ICU 2.6
3886          */
3887         public static final int NOT_APPLICABLE      = 0;   /*[NA]*/ /*See note !!*/
3888         /**
3889          * @stable ICU 2.6
3890          */
3891         public static final int LEADING_JAMO        = 1;   /*[L]*/
3892         /**
3893          * @stable ICU 2.6
3894          */
3895         public static final int VOWEL_JAMO          = 2;   /*[V]*/
3896         /**
3897          * @stable ICU 2.6
3898          */
3899         public static final int TRAILING_JAMO       = 3;   /*[T]*/
3900         /**
3901          * @stable ICU 2.6
3902          */
3903         public static final int LV_SYLLABLE         = 4;   /*[LV]*/
3904         /**
3905          * @stable ICU 2.6
3906          */
3907         public static final int LVT_SYLLABLE        = 5;   /*[LVT]*/
3908         /**
3909          * One more than the highest normal HangulSyllableType value.
3910          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE).
3911          *
3912          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3913          */
3914         @Deprecated
3915         public static final int COUNT               = 6;
3916     }
3917 
3918     /**
3919      * Bidi Paired Bracket Type constants.
3920      *
3921      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
3922      * @stable ICU 52
3923      */
3924     public static interface BidiPairedBracketType {
3925         /**
3926          * Not a paired bracket.
3927          * @stable ICU 52
3928          */
3929         public static final int NONE = 0;
3930         /**
3931          * Open paired bracket.
3932          * @stable ICU 52
3933          */
3934         public static final int OPEN = 1;
3935         /**
3936          * Close paired bracket.
3937          * @stable ICU 52
3938          */
3939         public static final int CLOSE = 2;
3940         /**
3941          * One more than the highest normal BidiPairedBracketType value.
3942          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE).
3943          *
3944          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3945          */
3946         @Deprecated
3947         public static final int COUNT = 3;
3948     }
3949 
3950     /**
3951      * Indic Positional Category constants.
3952      *
3953      * @see UProperty#INDIC_POSITIONAL_CATEGORY
3954      * @stable ICU 63
3955      */
3956     public static interface IndicPositionalCategory {
3957         /** @stable ICU 63 */
3958         public static final int NA = 0;
3959         /** @stable ICU 63 */
3960         public static final int BOTTOM = 1;
3961         /** @stable ICU 63 */
3962         public static final int BOTTOM_AND_LEFT = 2;
3963         /** @stable ICU 63 */
3964         public static final int BOTTOM_AND_RIGHT = 3;
3965         /** @stable ICU 63 */
3966         public static final int LEFT = 4;
3967         /** @stable ICU 63 */
3968         public static final int LEFT_AND_RIGHT = 5;
3969         /** @stable ICU 63 */
3970         public static final int OVERSTRUCK = 6;
3971         /** @stable ICU 63 */
3972         public static final int RIGHT = 7;
3973         /** @stable ICU 63 */
3974         public static final int TOP = 8;
3975         /** @stable ICU 63 */
3976         public static final int TOP_AND_BOTTOM = 9;
3977         /** @stable ICU 63 */
3978         public static final int TOP_AND_BOTTOM_AND_RIGHT = 10;
3979         /** @stable ICU 63 */
3980         public static final int TOP_AND_LEFT = 11;
3981         /** @stable ICU 63 */
3982         public static final int TOP_AND_LEFT_AND_RIGHT = 12;
3983         /** @stable ICU 63 */
3984         public static final int TOP_AND_RIGHT = 13;
3985         /** @stable ICU 63 */
3986         public static final int VISUAL_ORDER_LEFT = 14;
3987         /** @stable ICU 66 */
3988         public static final int TOP_AND_BOTTOM_AND_LEFT = 15;
3989     }
3990 
3991     /**
3992      * Indic Syllabic Category constants.
3993      *
3994      * @see UProperty#INDIC_SYLLABIC_CATEGORY
3995      * @stable ICU 63
3996      */
3997     public static interface IndicSyllabicCategory {
3998         /** @stable ICU 63 */
3999         public static final int OTHER = 0;
4000         /** @stable ICU 63 */
4001         public static final int AVAGRAHA = 1;
4002         /** @stable ICU 63 */
4003         public static final int BINDU = 2;
4004         /** @stable ICU 63 */
4005         public static final int BRAHMI_JOINING_NUMBER = 3;
4006         /** @stable ICU 63 */
4007         public static final int CANTILLATION_MARK = 4;
4008         /** @stable ICU 63 */
4009         public static final int CONSONANT = 5;
4010         /** @stable ICU 63 */
4011         public static final int CONSONANT_DEAD = 6;
4012         /** @stable ICU 63 */
4013         public static final int CONSONANT_FINAL = 7;
4014         /** @stable ICU 63 */
4015         public static final int CONSONANT_HEAD_LETTER = 8;
4016         /** @stable ICU 63 */
4017         public static final int CONSONANT_INITIAL_POSTFIXED = 9;
4018         /** @stable ICU 63 */
4019         public static final int CONSONANT_KILLER = 10;
4020         /** @stable ICU 63 */
4021         public static final int CONSONANT_MEDIAL = 11;
4022         /** @stable ICU 63 */
4023         public static final int CONSONANT_PLACEHOLDER = 12;
4024         /** @stable ICU 63 */
4025         public static final int CONSONANT_PRECEDING_REPHA = 13;
4026         /** @stable ICU 63 */
4027         public static final int CONSONANT_PREFIXED = 14;
4028         /** @stable ICU 63 */
4029         public static final int CONSONANT_SUBJOINED = 15;
4030         /** @stable ICU 63 */
4031         public static final int CONSONANT_SUCCEEDING_REPHA = 16;
4032         /** @stable ICU 63 */
4033         public static final int CONSONANT_WITH_STACKER = 17;
4034         /** @stable ICU 63 */
4035         public static final int GEMINATION_MARK = 18;
4036         /** @stable ICU 63 */
4037         public static final int INVISIBLE_STACKER = 19;
4038         /** @stable ICU 63 */
4039         public static final int JOINER = 20;
4040         /** @stable ICU 63 */
4041         public static final int MODIFYING_LETTER = 21;
4042         /** @stable ICU 63 */
4043         public static final int NON_JOINER = 22;
4044         /** @stable ICU 63 */
4045         public static final int NUKTA = 23;
4046         /** @stable ICU 63 */
4047         public static final int NUMBER = 24;
4048         /** @stable ICU 63 */
4049         public static final int NUMBER_JOINER = 25;
4050         /** @stable ICU 63 */
4051         public static final int PURE_KILLER = 26;
4052         /** @stable ICU 63 */
4053         public static final int REGISTER_SHIFTER = 27;
4054         /** @stable ICU 63 */
4055         public static final int SYLLABLE_MODIFIER = 28;
4056         /** @stable ICU 63 */
4057         public static final int TONE_LETTER = 29;
4058         /** @stable ICU 63 */
4059         public static final int TONE_MARK = 30;
4060         /** @stable ICU 63 */
4061         public static final int VIRAMA = 31;
4062         /** @stable ICU 63 */
4063         public static final int VISARGA = 32;
4064         /** @stable ICU 63 */
4065         public static final int VOWEL = 33;
4066         /** @stable ICU 63 */
4067         public static final int VOWEL_DEPENDENT = 34;
4068         /** @stable ICU 63 */
4069         public static final int VOWEL_INDEPENDENT = 35;
4070     }
4071 
4072     /**
4073      * Vertical Orientation constants.
4074      *
4075      * @see UProperty#VERTICAL_ORIENTATION
4076      * @stable ICU 63
4077      */
4078     public static interface VerticalOrientation {
4079         /** @stable ICU 63 */
4080         public static final int ROTATED = 0;
4081         /** @stable ICU 63 */
4082         public static final int TRANSFORMED_ROTATED = 1;
4083         /** @stable ICU 63 */
4084         public static final int TRANSFORMED_UPRIGHT = 2;
4085         /** @stable ICU 63 */
4086         public static final int UPRIGHT = 3;
4087     }
4088 
4089     /**
4090      * Identifier Status constants.
4091      * See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type.
4092      *
4093      * @see UProperty#IDENTIFIER_STATUS
4094      * @draft ICU 75
4095      */
4096     public enum IdentifierStatus {
4097         /** @draft ICU 75 */
4098         RESTRICTED,
4099         /** @draft ICU 75 */
4100         ALLOWED,
4101     }
4102 
4103     /**
4104      * Identifier Type constants.
4105      * See https://www.unicode.org/reports/tr39/#Identifier_Status_and_Type.
4106      *
4107      * @see UProperty#IDENTIFIER_TYPE
4108      * @draft ICU 75
4109      */
4110     public enum IdentifierType {
4111         /** @draft ICU 75 */
4112         NOT_CHARACTER,
4113         /** @draft ICU 75 */
4114         DEPRECATED,
4115         /** @draft ICU 75 */
4116         DEFAULT_IGNORABLE,
4117         /** @draft ICU 75 */
4118         NOT_NFKC,
4119         /** @draft ICU 75 */
4120         NOT_XID,
4121         /** @draft ICU 75 */
4122         EXCLUSION,
4123         /** @draft ICU 75 */
4124         OBSOLETE,
4125         /** @draft ICU 75 */
4126         TECHNICAL,
4127         /** @draft ICU 75 */
4128         UNCOMMON_USE,
4129         /** @draft ICU 75 */
4130         LIMITED_USE,
4131         /** @draft ICU 75 */
4132         INCLUSION,
4133         /** @draft ICU 75 */
4134         RECOMMENDED,
4135     }
4136 
4137     // public data members -----------------------------------------------
4138 
4139     /**
4140      * The lowest Unicode code point value, constant 0.
4141      * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}.
4142      *
4143      * @stable ICU 2.1
4144      */
4145     public static final int MIN_VALUE = Character.MIN_CODE_POINT;
4146 
4147     /**
4148      * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits).
4149      * Same as {@link Character#MAX_CODE_POINT}.
4150      *
4151      * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE}
4152      * which is still a char with the value U+FFFF.
4153      *
4154      * @stable ICU 2.1
4155      */
4156     public static final int MAX_VALUE = Character.MAX_CODE_POINT;
4157 
4158     /**
4159      * The minimum value for Supplementary code points, constant U+10000.
4160      * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
4161      *
4162      * @stable ICU 2.1
4163      */
4164     public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT;
4165 
4166     /**
4167      * Unicode value used when translating into Unicode encoding form and there
4168      * is no existing character.
4169      * @stable ICU 2.1
4170      */
4171     public static final int REPLACEMENT_CHAR = '\uFFFD';
4172 
4173     /**
4174      * Special value that is returned by getUnicodeNumericValue(int) when no
4175      * numeric value is defined for a code point.
4176      * @stable ICU 2.4
4177      * @see #getUnicodeNumericValue
4178      */
4179     public static final double NO_NUMERIC_VALUE = -123456789;
4180 
4181     /**
4182      * Compatibility constant for Java Character's MIN_RADIX.
4183      * @stable ICU 3.4
4184      */
4185     public static final int MIN_RADIX = java.lang.Character.MIN_RADIX;
4186 
4187     /**
4188      * Compatibility constant for Java Character's MAX_RADIX.
4189      * @stable ICU 3.4
4190      */
4191     public static final int MAX_RADIX = java.lang.Character.MAX_RADIX;
4192 
4193     /**
4194      * Do not lowercase non-initial parts of words when titlecasing.
4195      * Option bit for titlecasing APIs that take an options bit set.
4196      *
4197      * By default, titlecasing will titlecase the first cased character
4198      * of a word and lowercase all other characters.
4199      * With this option, the other characters will not be modified.
4200      *
4201      * @see #toTitleCase
4202      * @stable ICU 3.8
4203      */
4204     public static final int TITLECASE_NO_LOWERCASE = 0x100;
4205 
4206     /**
4207      * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
4208      * titlecase exactly the characters at breaks from the iterator.
4209      * Option bit for titlecasing APIs that take an options bit set.
4210      *
4211      * By default, titlecasing will take each break iterator index,
4212      * adjust it by looking for the next cased character, and titlecase that one.
4213      * Other characters are lowercased.
4214      *
4215      * This follows Unicode 4 &amp; 5 section 3.13 Default Case Operations:
4216      *
4217      * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
4218      * #29, "Text Boundaries." Between each pair of word boundaries, find the first
4219      * cased character F. If F exists, map F to default_title(F); then map each
4220      * subsequent character C to default_lower(C).
4221      *
4222      * @see #toTitleCase
4223      * @see #TITLECASE_NO_LOWERCASE
4224      * @stable ICU 3.8
4225      */
4226     public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200;
4227 
4228     // public methods ----------------------------------------------------
4229 
4230     /**
4231      * Returnss the numeric value of a decimal digit code point.
4232      * <br>This method observes the semantics of
4233      * <code>java.lang.Character.digit()</code>.  Note that this
4234      * will return positive values for code points for which isDigit
4235      * returns false, just like java.lang.Character.
4236      * <br><em>Semantic Change:</em> In release 1.3.1 and
4237      * prior, this did not treat the European letters as having a
4238      * digit value, and also treated numeric letters and other numbers as
4239      * digits.
4240      * This has been changed to conform to the java semantics.
4241      * <br>A code point is a valid digit if and only if:
4242      * <ul>
4243      *   <li>ch is a decimal digit or one of the european letters, and
4244      *   <li>the value of ch is less than the specified radix.
4245      * </ul>
4246      * @param ch the code point to query
4247      * @param radix the radix
4248      * @return the numeric value represented by the code point in the
4249      * specified radix, or -1 if the code point is not a decimal digit
4250      * or if its value is too large for the radix
4251      * @stable ICU 2.1
4252      */
digit(int ch, int radix)4253     public static int digit(int ch, int radix)
4254     {
4255         if (2 <= radix && radix <= 36) {
4256             int value = digit(ch);
4257             if (value < 0) {
4258                 // ch is not a decimal digit, try latin letters
4259                 value = UCharacterProperty.getEuropeanDigit(ch);
4260             }
4261             return (value < radix) ? value : -1;
4262         } else {
4263             return -1;  // invalid radix
4264         }
4265     }
4266 
4267     /**
4268      * Returnss the numeric value of a decimal digit code point.
4269      * <br>This is a convenience overload of <code>digit(int, int)</code>
4270      * that provides a decimal radix.
4271      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this
4272      * treated numeric letters and other numbers as digits.  This has
4273      * been changed to conform to the java semantics.
4274      * @param ch the code point to query
4275      * @return the numeric value represented by the code point,
4276      * or -1 if the code point is not a decimal digit or if its
4277      * value is too large for a decimal radix
4278      * @stable ICU 2.1
4279      */
digit(int ch)4280     public static int digit(int ch)
4281     {
4282         return UCharacterProperty.INSTANCE.digit(ch);
4283     }
4284 
4285     /**
4286      * Returns the numeric value of the code point as a nonnegative
4287      * integer.
4288      * <br>If the code point does not have a numeric value, then -1 is returned.
4289      * <br>
4290      * If the code point has a numeric value that cannot be represented as a
4291      * nonnegative integer (for example, a fractional value), then -2 is
4292      * returned.
4293      * @param ch the code point to query
4294      * @return the numeric value of the code point, or -1 if it has no numeric
4295      * value, or -2 if it has a numeric value that cannot be represented as a
4296      * nonnegative integer
4297      * @stable ICU 2.1
4298      */
getNumericValue(int ch)4299     public static int getNumericValue(int ch)
4300     {
4301         return UCharacterProperty.INSTANCE.getNumericValue(ch);
4302     }
4303 
4304     /**
4305      * {@icu} Returns the numeric value for a Unicode code point as defined in the
4306      * Unicode Character Database.
4307      * <p>A "double" return type is necessary because some numeric values are
4308      * fractions, negative, or too large for int.
4309      * <p>For characters without any numeric values in the Unicode Character
4310      * Database, this function will return NO_NUMERIC_VALUE.
4311      * Note: This is different from the Unicode Standard which specifies NaN as the default value.
4312      * <p><em>API Change:</em> In release 2.2 and prior, this API has a
4313      * return type int and returns -1 when the argument ch does not have a
4314      * corresponding numeric value. This has been changed to synch with ICU4C
4315      *
4316      * This corresponds to the ICU4C function u_getNumericValue.
4317      * @param ch Code point to get the numeric value for.
4318      * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined.
4319      * @stable ICU 2.4
4320      */
getUnicodeNumericValue(int ch)4321     public static double getUnicodeNumericValue(int ch)
4322     {
4323         return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch);
4324     }
4325 
4326     /**
4327      * Compatibility override of Java deprecated method.  This
4328      * method will always remain deprecated.
4329      * Same as java.lang.Character.isSpace().
4330      * @param ch the code point
4331      * @return true if the code point is a space character as
4332      * defined by java.lang.Character.isSpace.
4333      * @deprecated ICU 3.4 (Java)
4334      */
4335     @Deprecated
isSpace(int ch)4336     public static boolean isSpace(int ch) {
4337         return ch <= 0x20 &&
4338                 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d);
4339     }
4340 
4341     /**
4342      * Returns a value indicating a code point's Unicode category.
4343      * Up-to-date Unicode implementation of java.lang.Character.getType()
4344      * except for the above mentioned code points that had their category
4345      * changed.<br>
4346      * Return results are constants from the interface
4347      * <a href=UCharacterCategory.html>UCharacterCategory</a><br>
4348      * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with
4349      * those returned by java.lang.Character.getType.  UCharacterCategory values
4350      * match the ones used in ICU4C, while java.lang.Character type
4351      * values, though similar, skip the value 17.
4352      * @param ch code point whose type is to be determined
4353      * @return category which is a value of UCharacterCategory
4354      * @stable ICU 2.1
4355      */
getType(int ch)4356     public static int getType(int ch)
4357     {
4358         return UCharacterProperty.INSTANCE.getType(ch);
4359     }
4360 
4361     /**
4362      * Determines if a code point has a defined meaning in the up-to-date
4363      * Unicode standard.
4364      * E.g. supplementary code points though allocated space are not defined in
4365      * Unicode yet.<br>
4366      * Up-to-date Unicode implementation of java.lang.Character.isDefined()
4367      * @param ch code point to be determined if it is defined in the most
4368      *        current version of Unicode
4369      * @return true if this code point is defined in unicode
4370      * @stable ICU 2.1
4371      */
isDefined(int ch)4372     public static boolean isDefined(int ch)
4373     {
4374         return getType(ch) != 0;
4375     }
4376 
4377     /**
4378      * Determines if a code point is a Java digit.
4379      * <br>This method observes the semantics of
4380      * <code>java.lang.Character.isDigit()</code>. It returns true for decimal
4381      * digits only.
4382      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated
4383      * numeric letters and other numbers as digits.
4384      * This has been changed to conform to the java semantics.
4385      * @param ch code point to query
4386      * @return true if this code point is a digit
4387      * @stable ICU 2.1
4388      */
isDigit(int ch)4389     public static boolean isDigit(int ch)
4390     {
4391         return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER;
4392     }
4393 
4394     /**
4395      * Determines if the specified code point is an ISO control character.
4396      * A code point is considered to be an ISO control character if it is in
4397      * the range &#92;u0000 through &#92;u001F or in the range &#92;u007F through
4398      * &#92;u009F.<br>
4399      * Up-to-date Unicode implementation of java.lang.Character.isISOControl()
4400      * @param ch code point to determine if it is an ISO control character
4401      * @return true if code point is a ISO control character
4402      * @stable ICU 2.1
4403      */
isISOControl(int ch)4404     public static boolean isISOControl(int ch)
4405     {
4406         return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ &&
4407                 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_));
4408     }
4409 
4410     /**
4411      * Determines if the specified code point is a letter.
4412      * Up-to-date Unicode implementation of java.lang.Character.isLetter()
4413      * @param ch code point to determine if it is a letter
4414      * @return true if code point is a letter
4415      * @stable ICU 2.1
4416      */
isLetter(int ch)4417     public static boolean isLetter(int ch)
4418     {
4419         // if props == 0, it will just fall through and return false
4420         return ((1 << getType(ch))
4421                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
4422                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
4423                         | (1 << UCharacterCategory.TITLECASE_LETTER)
4424                         | (1 << UCharacterCategory.MODIFIER_LETTER)
4425                         | (1 << UCharacterCategory.OTHER_LETTER))) != 0;
4426     }
4427 
4428     /**
4429      * Determines if the specified code point is a letter or digit.
4430      * {@icunote} This method, unlike java.lang.Character does not regard the ascii
4431      * characters 'A' - 'Z' and 'a' - 'z' as digits.
4432      * @param ch code point to determine if it is a letter or a digit
4433      * @return true if code point is a letter or a digit
4434      * @stable ICU 2.1
4435      */
isLetterOrDigit(int ch)4436     public static boolean isLetterOrDigit(int ch)
4437     {
4438         return ((1 << getType(ch))
4439                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
4440                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
4441                         | (1 << UCharacterCategory.TITLECASE_LETTER)
4442                         | (1 << UCharacterCategory.MODIFIER_LETTER)
4443                         | (1 << UCharacterCategory.OTHER_LETTER)
4444                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0;
4445     }
4446 
4447     /**
4448      * Compatibility override of Java deprecated method.  This
4449      * method will always remain deprecated.  Delegates to
4450      * java.lang.Character.isJavaIdentifierStart.
4451      * @param cp the code point
4452      * @return true if the code point can start a java identifier.
4453      * @deprecated ICU 3.4 (Java)
4454      */
4455     @Deprecated
isJavaLetter(int cp)4456     public static boolean isJavaLetter(int cp) {
4457         return isJavaIdentifierStart(cp);
4458     }
4459 
4460     /**
4461      * Compatibility override of Java deprecated method.  This
4462      * method will always remain deprecated.  Delegates to
4463      * java.lang.Character.isJavaIdentifierPart.
4464      * @param cp the code point
4465      * @return true if the code point can continue a java identifier.
4466      * @deprecated ICU 3.4 (Java)
4467      */
4468     @Deprecated
isJavaLetterOrDigit(int cp)4469     public static boolean isJavaLetterOrDigit(int cp) {
4470         return isJavaIdentifierPart(cp);
4471     }
4472 
4473     /**
4474      * Compatibility override of Java method, delegates to
4475      * java.lang.Character.isJavaIdentifierStart.
4476      * @param cp the code point
4477      * @return true if the code point can start a java identifier.
4478      * @stable ICU 3.4
4479      */
isJavaIdentifierStart(int cp)4480     public static boolean isJavaIdentifierStart(int cp) {
4481         // note, downcast to char for jdk 1.4 compatibility
4482         return java.lang.Character.isJavaIdentifierStart((char)cp);
4483     }
4484 
4485     /**
4486      * Compatibility override of Java method, delegates to
4487      * java.lang.Character.isJavaIdentifierPart.
4488      * @param cp the code point
4489      * @return true if the code point can continue a java identifier.
4490      * @stable ICU 3.4
4491      */
isJavaIdentifierPart(int cp)4492     public static boolean isJavaIdentifierPart(int cp) {
4493         // note, downcast to char for jdk 1.4 compatibility
4494         return java.lang.Character.isJavaIdentifierPart((char)cp);
4495     }
4496 
4497     /**
4498      * Determines if the specified code point is a lowercase character.
4499      * UnicodeData only contains case mappings for code points where they are
4500      * one-to-one mappings; it also omits information about context-sensitive
4501      * case mappings.<br> For more information about Unicode case mapping
4502      * please refer to the
4503      * <a href=https://www.unicode.org/reports/tr21/>Technical report
4504      * #21</a>.<br>
4505      * Up-to-date Unicode implementation of java.lang.Character.isLowerCase()
4506      * @param ch code point to determine if it is in lowercase
4507      * @return true if code point is a lowercase character
4508      * @stable ICU 2.1
4509      */
isLowerCase(int ch)4510     public static boolean isLowerCase(int ch)
4511     {
4512         // if props == 0, it will just fall through and return false
4513         return getType(ch) == UCharacterCategory.LOWERCASE_LETTER;
4514     }
4515 
4516     /**
4517      * Determines if the specified code point is a white space character.
4518      * A code point is considered to be an whitespace character if and only
4519      * if it satisfies one of the following criteria:
4520      * <ul>
4521      * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not
4522      *      also a non-breaking space (&#92;u00A0 or &#92;u2007 or &#92;u202F).
4523      * <li> It is &#92;u0009, HORIZONTAL TABULATION.
4524      * <li> It is &#92;u000A, LINE FEED.
4525      * <li> It is &#92;u000B, VERTICAL TABULATION.
4526      * <li> It is &#92;u000C, FORM FEED.
4527      * <li> It is &#92;u000D, CARRIAGE RETURN.
4528      * <li> It is &#92;u001C, FILE SEPARATOR.
4529      * <li> It is &#92;u001D, GROUP SEPARATOR.
4530      * <li> It is &#92;u001E, RECORD SEPARATOR.
4531      * <li> It is &#92;u001F, UNIT SEPARATOR.
4532      * </ul>
4533      *
4534      * This API tries to sync with the semantics of Java's
4535      * java.lang.Character.isWhitespace(), but it may not return
4536      * the exact same results because of the Unicode version
4537      * difference.
4538      * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)
4539      * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.
4540      * See http://www.unicode.org/versions/Unicode4.0.1/
4541      * @param ch code point to determine if it is a white space
4542      * @return true if the specified code point is a white space character
4543      * @stable ICU 2.1
4544      */
isWhitespace(int ch)4545     public static boolean isWhitespace(int ch)
4546     {
4547         // exclude no-break spaces
4548         // if props == 0, it will just fall through and return false
4549         return ((1 << getType(ch)) &
4550                 ((1 << UCharacterCategory.SPACE_SEPARATOR)
4551                         | (1 << UCharacterCategory.LINE_SEPARATOR)
4552                         | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0
4553                         && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_)
4554                         // TAB VT LF FF CR FS GS RS US NL are all control characters
4555                         // that are white spaces.
4556                         || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f);
4557     }
4558 
4559     /**
4560      * Determines if the specified code point is a Unicode specified space
4561      * character, i.e. if code point is in the category Zs, Zl and Zp.
4562      * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar().
4563      * @param ch code point to determine if it is a space
4564      * @return true if the specified code point is a space character
4565      * @stable ICU 2.1
4566      */
isSpaceChar(int ch)4567     public static boolean isSpaceChar(int ch)
4568     {
4569         // if props == 0, it will just fall through and return false
4570         return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR)
4571                 | (1 << UCharacterCategory.LINE_SEPARATOR)
4572                 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR)))
4573                 != 0;
4574     }
4575 
4576     /**
4577      * Determines if the specified code point is a titlecase character.
4578      * UnicodeData only contains case mappings for code points where they are
4579      * one-to-one mappings; it also omits information about context-sensitive
4580      * case mappings.<br>
4581      * For more information about Unicode case mapping please refer to the
4582      * <a href=https://www.unicode.org/reports/tr21/>
4583      * Technical report #21</a>.<br>
4584      * Up-to-date Unicode implementation of java.lang.Character.isTitleCase().
4585      * @param ch code point to determine if it is in title case
4586      * @return true if the specified code point is a titlecase character
4587      * @stable ICU 2.1
4588      */
isTitleCase(int ch)4589     public static boolean isTitleCase(int ch)
4590     {
4591         // if props == 0, it will just fall through and return false
4592         return getType(ch) == UCharacterCategory.TITLECASE_LETTER;
4593     }
4594 
4595     /**
4596      * Determines if the specified character is permissible as a
4597      * non-initial character of an identifier
4598      * according to UAX #31 Unicode Identifier and Pattern Syntax.
4599      *
4600      * <p>Same as Unicode ID_Continue ({@link UProperty#ID_CONTINUE}).
4601      *
4602      * <p>Note that this differs from {@link java.lang.Character#isUnicodeIdentifierPart(char)}
4603      * which implements a different identifier profile.
4604      *
4605      * @param ch the code point to be tested
4606      * @return true if the code point may occur as a non-initial character of an identifier
4607      * @stable ICU 2.1
4608      */
isUnicodeIdentifierPart(int ch)4609     public static boolean isUnicodeIdentifierPart(int ch)
4610     {
4611         return hasBinaryProperty(ch, UProperty.ID_CONTINUE);  // single code point
4612     }
4613 
4614     /**
4615      * Determines if the specified character is permissible as the first character in an identifier
4616      * according to UAX #31 Unicode Identifier and Pattern Syntax.
4617      *
4618      * <p>Same as Unicode ID_Start ({@link UProperty#ID_START}).
4619      *
4620      * <p>Note that this differs from {@link java.lang.Character#isUnicodeIdentifierStart(char)}
4621      * which implements a different identifier profile.
4622      *
4623      * @param ch the code point to be tested
4624      * @return true if the code point may start an identifier
4625      * @stable ICU 2.1
4626      */
isUnicodeIdentifierStart(int ch)4627     public static boolean isUnicodeIdentifierStart(int ch)
4628     {
4629         return hasBinaryProperty(ch, UProperty.ID_START);  // single code point
4630     }
4631 
4632     /**
4633      * Does the set of Identifier_Type values code point c contain the given type?
4634      *
4635      * <p>Used for UTS #39 General Security Profile for Identifiers
4636      * (https://www.unicode.org/reports/tr39/#General_Security_Profile).
4637      *
4638      * <p>Each code point maps to a <i>set</i> of UIdentifierType values.
4639      *
4640      * @param c code point
4641      * @param type Identifier_Type to check
4642      * @return true if type is in Identifier_Type(c)
4643      * @draft ICU 75
4644      */
hasIdentifierType(int c, IdentifierType type)4645     public static final boolean hasIdentifierType(int c, IdentifierType type) {
4646         return UCharacterProperty.INSTANCE.hasIDType(c, type);
4647     }
4648 
4649     /**
4650      * Writes code point c's Identifier_Type as a set of IdentifierType values and
4651      * returns the number of types.
4652      * The set is cleared before c's types are added.
4653      *
4654      * <p>Used for UTS #39 General Security Profile for Identifiers
4655      * (https://www.unicode.org/reports/tr39/#General_Security_Profile).
4656      *
4657      * <p>Each code point maps to a <i>set</i> of IdentifierType values.
4658      * There is always at least one type.
4659      * Only some of the types can be combined with others,
4660      * and usually only a small number of types occur together.
4661      * Future versions might add additional types.
4662      * See UTS #39 and its data files for details.
4663      *
4664      * @param c code point
4665      * @param types output set
4666      * @return number of values in c's Identifier_Type
4667      * @draft ICU 75
4668      */
getIdentifierTypes(int c, EnumSet<IdentifierType> types)4669     public static final int getIdentifierTypes(int c, EnumSet<IdentifierType> types) {
4670         return UCharacterProperty.INSTANCE.getIDTypes(c, types);
4671     }
4672 
4673     /**
4674      * Determines if the specified code point should be regarded as an
4675      * ignorable character in a Java identifier.
4676      * A character is Java-identifier-ignorable if it has the general category
4677      * Cf Formatting Control, or it is a non-Java-whitespace ISO control:
4678      * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br>
4679      * Up-to-date Unicode implementation of
4680      * java.lang.Character.isIdentifierIgnorable().<br>
4681      * See <a href=https://www.unicode.org/reports/tr8/>UTR #8</a>.
4682      * <p>Note that Unicode just recommends to ignore Cf (format controls).
4683      * @param ch code point to be determined if it can be ignored in a Unicode
4684      *        identifier.
4685      * @return true if the code point is ignorable
4686      * @stable ICU 2.1
4687      */
isIdentifierIgnorable(int ch)4688     public static boolean isIdentifierIgnorable(int ch)
4689     {
4690         // see java.lang.Character.isIdentifierIgnorable() on range of
4691         // ignorable characters.
4692         if (ch <= 0x9f) {
4693             return isISOControl(ch)
4694                     && !((ch >= 0x9 && ch <= 0xd)
4695                             || (ch >= 0x1c && ch <= 0x1f));
4696         }
4697         return getType(ch) == UCharacterCategory.FORMAT;
4698     }
4699 
4700     /**
4701      * Determines if the specified code point is an uppercase character.
4702      * UnicodeData only contains case mappings for code point where they are
4703      * one-to-one mappings; it also omits information about context-sensitive
4704      * case mappings.<br>
4705      * For language specific case conversion behavior, use
4706      * toUpperCase(locale, str). <br>
4707      * For example, the case conversion for dot-less i and dotted I in Turkish,
4708      * or for final sigma in Greek.
4709      * For more information about Unicode case mapping please refer to the
4710      * <a href=https://www.unicode.org/reports/tr21/>
4711      * Technical report #21</a>.<br>
4712      * Up-to-date Unicode implementation of java.lang.Character.isUpperCase().
4713      * @param ch code point to determine if it is in uppercase
4714      * @return true if the code point is an uppercase character
4715      * @stable ICU 2.1
4716      */
isUpperCase(int ch)4717     public static boolean isUpperCase(int ch)
4718     {
4719         // if props == 0, it will just fall through and return false
4720         return getType(ch) == UCharacterCategory.UPPERCASE_LETTER;
4721     }
4722 
4723     /**
4724      * The given code point is mapped to its lowercase equivalent; if the code
4725      * point has no lowercase equivalent, the code point itself is returned.
4726      * Up-to-date Unicode implementation of java.lang.Character.toLowerCase()
4727      *
4728      * <p>This function only returns the simple, single-code point case mapping.
4729      * Full case mappings should be used whenever possible because they produce
4730      * better results by working on whole strings.
4731      * They take into account the string context and the language and can map
4732      * to a result string with a different length as appropriate.
4733      * Full case mappings are applied by the case mapping functions
4734      * that take String parameters rather than code points (int).
4735      * See also the User Guide chapter on C/POSIX migration:
4736      * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
4737      *
4738      * @param ch code point whose lowercase equivalent is to be retrieved
4739      * @return the lowercase equivalent code point
4740      * @stable ICU 2.1
4741      */
toLowerCase(int ch)4742     public static int toLowerCase(int ch) {
4743         return UCaseProps.INSTANCE.tolower(ch);
4744     }
4745 
4746     /**
4747      * Converts argument code point and returns a String object representing
4748      * the code point's value in UTF-16 format.
4749      * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones.
4750      *
4751      * <p>Up-to-date Unicode implementation of java.lang.Character.toString().
4752      *
4753      * @param ch code point
4754      * @return string representation of the code point, null if code point is not
4755      *         defined in unicode
4756      * @stable ICU 2.1
4757      */
toString(int ch)4758     public static String toString(int ch)
4759     {
4760         if (ch < MIN_VALUE || ch > MAX_VALUE) {
4761             return null;
4762         }
4763 
4764         if (ch < SUPPLEMENTARY_MIN_VALUE) {
4765             return String.valueOf((char)ch);
4766         }
4767 
4768         return new String(Character.toChars(ch));
4769     }
4770 
4771     /**
4772      * Converts the code point argument to titlecase.
4773      * If no titlecase is available, the uppercase is returned. If no uppercase
4774      * is available, the code point itself is returned.
4775      * Up-to-date Unicode implementation of java.lang.Character.toTitleCase()
4776      *
4777      * <p>This function only returns the simple, single-code point case mapping.
4778      * Full case mappings should be used whenever possible because they produce
4779      * better results by working on whole strings.
4780      * They take into account the string context and the language and can map
4781      * to a result string with a different length as appropriate.
4782      * Full case mappings are applied by the case mapping functions
4783      * that take String parameters rather than code points (int).
4784      * See also the User Guide chapter on C/POSIX migration:
4785      * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
4786      *
4787      * @param ch code point  whose title case is to be retrieved
4788      * @return titlecase code point
4789      * @stable ICU 2.1
4790      */
toTitleCase(int ch)4791     public static int toTitleCase(int ch) {
4792         return UCaseProps.INSTANCE.totitle(ch);
4793     }
4794 
4795     /**
4796      * Converts the character argument to uppercase.
4797      * If no uppercase is available, the character itself is returned.
4798      * Up-to-date Unicode implementation of java.lang.Character.toUpperCase()
4799      *
4800      * <p>This function only returns the simple, single-code point case mapping.
4801      * Full case mappings should be used whenever possible because they produce
4802      * better results by working on whole strings.
4803      * They take into account the string context and the language and can map
4804      * to a result string with a different length as appropriate.
4805      * Full case mappings are applied by the case mapping functions
4806      * that take String parameters rather than code points (int).
4807      * See also the User Guide chapter on C/POSIX migration:
4808      * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
4809      *
4810      * @param ch code point whose uppercase is to be retrieved
4811      * @return uppercase code point
4812      * @stable ICU 2.1
4813      */
toUpperCase(int ch)4814     public static int toUpperCase(int ch) {
4815         return UCaseProps.INSTANCE.toupper(ch);
4816     }
4817 
4818     // extra methods not in java.lang.Character --------------------------
4819 
4820     /**
4821      * {@icu} Determines if the code point is a supplementary character.
4822      * A code point is a supplementary character if and only if it is greater
4823      * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a>
4824      * @param ch code point to be determined if it is in the supplementary
4825      *        plane
4826      * @return true if code point is a supplementary character
4827      * @stable ICU 2.1
4828      */
isSupplementary(int ch)4829     public static boolean isSupplementary(int ch)
4830     {
4831         return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE &&
4832                 ch <= UCharacter.MAX_VALUE;
4833     }
4834 
4835     /**
4836      * {@icu} Determines if the code point is in the BMP plane.
4837      * @param ch code point to be determined if it is not a supplementary
4838      *        character
4839      * @return true if code point is not a supplementary character
4840      * @stable ICU 2.1
4841      */
isBMP(int ch)4842     public static boolean isBMP(int ch)
4843     {
4844         return (ch >= 0 && ch <= LAST_CHAR_MASK_);
4845     }
4846 
4847     /**
4848      * {@icu} Determines whether the specified code point is a printable character
4849      * according to the Unicode standard.
4850      * @param ch code point to be determined if it is printable
4851      * @return true if the code point is a printable character
4852      * @stable ICU 2.1
4853      */
isPrintable(int ch)4854     public static boolean isPrintable(int ch)
4855     {
4856         int cat = getType(ch);
4857         // if props == 0, it will just fall through and return false
4858         return (cat != UCharacterCategory.UNASSIGNED &&
4859                 cat != UCharacterCategory.CONTROL &&
4860                 cat != UCharacterCategory.FORMAT &&
4861                 cat != UCharacterCategory.PRIVATE_USE &&
4862                 cat != UCharacterCategory.SURROGATE &&
4863                 cat != UCharacterCategory.GENERAL_OTHER_TYPES);
4864     }
4865 
4866     /**
4867      * {@icu} Determines whether the specified code point is of base form.
4868      * A code point of base form does not graphically combine with preceding
4869      * characters, and is neither a control nor a format character.
4870      * @param ch code point to be determined if it is of base form
4871      * @return true if the code point is of base form
4872      * @stable ICU 2.1
4873      */
isBaseForm(int ch)4874     public static boolean isBaseForm(int ch)
4875     {
4876         int cat = getType(ch);
4877         // if props == 0, it will just fall through and return false
4878         return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER ||
4879                 cat == UCharacterCategory.OTHER_NUMBER ||
4880                 cat == UCharacterCategory.LETTER_NUMBER ||
4881                 cat == UCharacterCategory.UPPERCASE_LETTER ||
4882                 cat == UCharacterCategory.LOWERCASE_LETTER ||
4883                 cat == UCharacterCategory.TITLECASE_LETTER ||
4884                 cat == UCharacterCategory.MODIFIER_LETTER ||
4885                 cat == UCharacterCategory.OTHER_LETTER ||
4886                 cat == UCharacterCategory.NON_SPACING_MARK ||
4887                 cat == UCharacterCategory.ENCLOSING_MARK ||
4888                 cat == UCharacterCategory.COMBINING_SPACING_MARK;
4889     }
4890 
4891     /**
4892      * {@icu} Returns the Bidirection property of a code point.
4893      * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
4894      * property.<br>
4895      * Result returned belongs to the interface
4896      * <a href=UCharacterDirection.html>UCharacterDirection</a>
4897      * @param ch the code point to be determined its direction
4898      * @return direction constant from UCharacterDirection.
4899      * @stable ICU 2.1
4900      */
getDirection(int ch)4901     public static int getDirection(int ch)
4902     {
4903         return UBiDiProps.INSTANCE.getClass(ch);
4904     }
4905 
4906     /**
4907      * Determines whether the code point has the "mirrored" property.
4908      * This property is set for characters that are commonly used in
4909      * Right-To-Left contexts and need to be displayed with a "mirrored"
4910      * glyph.
4911      * @param ch code point whose mirror is to be determined
4912      * @return true if the code point has the "mirrored" property
4913      * @stable ICU 2.1
4914      */
isMirrored(int ch)4915     public static boolean isMirrored(int ch)
4916     {
4917         return UBiDiProps.INSTANCE.isMirrored(ch);
4918     }
4919 
4920     /**
4921      * {@icu} Maps the specified code point to a "mirror-image" code point.
4922      * For code points with the "mirrored" property, implementations sometimes
4923      * need a "poor man's" mapping to another code point such that the default
4924      * glyph may serve as the mirror-image of the default glyph of the
4925      * specified code point.<br>
4926      * This is useful for text conversion to and from codepages with visual
4927      * order, and for displays without glyph selection capabilities.
4928      * @param ch code point whose mirror is to be retrieved
4929      * @return another code point that may serve as a mirror-image substitute,
4930      *         or ch itself if there is no such mapping or ch does not have the
4931      *         "mirrored" property
4932      * @stable ICU 2.1
4933      */
getMirror(int ch)4934     public static int getMirror(int ch)
4935     {
4936         return UBiDiProps.INSTANCE.getMirror(ch);
4937     }
4938 
4939     /**
4940      * {@icu} Maps the specified character to its paired bracket character.
4941      * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int).
4942      * Otherwise c itself is returned.
4943      * See http://www.unicode.org/reports/tr9/
4944      *
4945      * @param c the code point to be mapped
4946      * @return the paired bracket code point,
4947      *         or c itself if there is no such mapping
4948      *         (Bidi_Paired_Bracket_Type=None)
4949      *
4950      * @see UProperty#BIDI_PAIRED_BRACKET
4951      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
4952      * @see #getMirror(int)
4953      * @stable ICU 52
4954      */
getBidiPairedBracket(int c)4955     public static int getBidiPairedBracket(int c) {
4956         return UBiDiProps.INSTANCE.getPairedBracket(c);
4957     }
4958 
4959     /**
4960      * {@icu} Returns the combining class of the argument codepoint
4961      * @param ch code point whose combining is to be retrieved
4962      * @return the combining class of the codepoint
4963      * @stable ICU 2.1
4964      */
getCombiningClass(int ch)4965     public static int getCombiningClass(int ch)
4966     {
4967         return Normalizer2.getNFDInstance().getCombiningClass(ch);
4968     }
4969 
4970     /**
4971      * {@icu} A code point is illegal if and only if
4972      * <ul>
4973      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
4974      * <li> A surrogate value, 0xD800 to 0xDFFF
4975      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
4976      * </ul>
4977      * Note: legal does not mean that it is assigned in this version of Unicode.
4978      * @param ch code point to determine if it is a legal code point by itself
4979      * @return true if and only if legal.
4980      * @stable ICU 2.1
4981      */
isLegal(int ch)4982     public static boolean isLegal(int ch)
4983     {
4984         if (ch < MIN_VALUE) {
4985             return false;
4986         }
4987         if (ch < Character.MIN_SURROGATE) {
4988             return true;
4989         }
4990         if (ch <= Character.MAX_SURROGATE) {
4991             return false;
4992         }
4993         if (UCharacterUtility.isNonCharacter(ch)) {
4994             return false;
4995         }
4996         return (ch <= MAX_VALUE);
4997     }
4998 
4999     /**
5000      * {@icu} A string is legal iff all its code points are legal.
5001      * A code point is illegal if and only if
5002      * <ul>
5003      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
5004      * <li> A surrogate value, 0xD800 to 0xDFFF
5005      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
5006      * </ul>
5007      * Note: legal does not mean that it is assigned in this version of Unicode.
5008      * @param str containing code points to examin
5009      * @return true if and only if legal.
5010      * @stable ICU 2.1
5011      */
isLegal(String str)5012     public static boolean isLegal(String str)
5013     {
5014         int size = str.length();
5015         int codepoint;
5016         for (int i = 0; i < size; i += Character.charCount(codepoint))
5017         {
5018             codepoint = str.codePointAt(i);
5019             if (!isLegal(codepoint)) {
5020                 return false;
5021             }
5022         }
5023         return true;
5024     }
5025 
5026     /**
5027      * {@icu} Returns the version of Unicode data used.
5028      * @return the unicode version number used
5029      * @stable ICU 2.1
5030      */
getUnicodeVersion()5031     public static VersionInfo getUnicodeVersion()
5032     {
5033         return UCharacterProperty.INSTANCE.m_unicodeVersion_;
5034     }
5035 
5036     /**
5037      * {@icu} Returns the most current Unicode name of the argument code point, or
5038      * null if the character is unassigned or outside the range
5039      * {@code UCharacter.MIN_VALUE} and {@code UCharacter.MAX_VALUE} or does not
5040      * have a name.
5041      * <br>
5042      * Note calling any methods related to code point names, e.g. {@code getName()}
5043      * incurs a one-time initialization cost to construct the name tables.
5044      * @param ch the code point for which to get the name
5045      * @return most current Unicode name
5046      * @stable ICU 2.1
5047      */
getName(int ch)5048     public static String getName(int ch)
5049     {
5050         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
5051     }
5052 
5053     /**
5054      * {@icu} Returns the names for each of the characters in a string
5055      * @param s string to format
5056      * @param separator string to go between names
5057      * @return string of names
5058      * @stable ICU 3.8
5059      */
getName(String s, String separator)5060     public static String getName(String s, String separator) {
5061         if (s.length() == 1) { // handle common case
5062             return getName(s.charAt(0));
5063         }
5064         int cp;
5065         StringBuilder sb = new StringBuilder();
5066         for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
5067             cp = s.codePointAt(i);
5068             if (i != 0) sb.append(separator);
5069             sb.append(UCharacter.getName(cp));
5070         }
5071         return sb.toString();
5072     }
5073 
5074     /**
5075      * {@icu} Returns null.
5076      * Used to return the Unicode_1_Name property value which was of little practical value.
5077      * @param ch the code point for which to get the name
5078      * @return null
5079      * @deprecated ICU 49
5080      */
5081     @Deprecated
getName1_0(int ch)5082     public static String getName1_0(int ch)
5083     {
5084         return null;
5085     }
5086 
5087     /**
5088      * {@icu} Returns a name for a valid codepoint. Unlike, getName(int) and
5089      * getName1_0(int), this method will return a name even for codepoints that
5090      * are not assigned a name in UnicodeData.txt.
5091      *
5092      * <p>The names are returned in the following order.
5093      * <ul>
5094      * <li> Most current Unicode name if there is any
5095      * <li> Unicode 1.0 name if there is any
5096      * <li> Extended name in the form of
5097      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g., &lt;noncharacter-fffe&gt;
5098      * </ul>
5099      * Note calling any methods related to code point names, e.g. {@code getName()}
5100      * incurs a one-time initialization cost to construct the name tables.
5101      * @param ch the code point for which to get the name
5102      * @return a name for the argument codepoint
5103      * @stable ICU 2.6
5104      */
getExtendedName(int ch)5105     public static String getExtendedName(int ch) {
5106         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME);
5107     }
5108 
5109     /**
5110      * {@icu} Returns the corrected name from NameAliases.txt if there is one.
5111      * Returns null if the character is unassigned or outside the range
5112      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
5113      * <br>
5114      * Note calling any methods related to code point names, e.g. {@code getName()}
5115      * incurs a one-time initialization cost to construct the name tables.
5116      * @param ch the code point for which to get the name alias
5117      * @return Unicode name alias, or null
5118      * @stable ICU 4.4
5119      */
getNameAlias(int ch)5120     public static String getNameAlias(int ch)
5121     {
5122         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS);
5123     }
5124 
5125     /**
5126      * {@icu} Returns null.
5127      * Used to return the ISO 10646 comment for a character.
5128      * The Unicode ISO_Comment property is deprecated and has no values.
5129      *
5130      * @param ch The code point for which to get the ISO comment.
5131      *           It must be the case that {@code 0 <= ch <= 0x10ffff}.
5132      * @return null
5133      * @deprecated ICU 49
5134      */
5135     @Deprecated
getISOComment(int ch)5136     public static String getISOComment(int ch)
5137     {
5138         return null;
5139     }
5140 
5141     /**
5142      * {@icu} <p>Finds a Unicode code point by its most current Unicode name and
5143      * return its code point value. All Unicode names are in uppercase.
5144      * Note calling any methods related to code point names, e.g. {@code getName()}
5145      * incurs a one-time initialization cost to construct the name tables.
5146      * @param name most current Unicode character name whose code point is to
5147      *        be returned
5148      * @return code point or -1 if name is not found
5149      * @stable ICU 2.1
5150      */
getCharFromName(String name)5151     public static int getCharFromName(String name){
5152         return UCharacterName.INSTANCE.getCharFromName(
5153                 UCharacterNameChoice.UNICODE_CHAR_NAME, name);
5154     }
5155 
5156     /**
5157      * {@icu} Returns -1.
5158      * <p>Used to find a Unicode character by its version 1.0 Unicode name and return
5159      * its code point value.
5160      * @param name Unicode 1.0 code point name whose code point is to be
5161      *             returned
5162      * @return -1
5163      * @deprecated ICU 49
5164      * @see #getName1_0(int)
5165      */
5166     @Deprecated
getCharFromName1_0(String name)5167     public static int getCharFromName1_0(String name){
5168         return -1;
5169     }
5170 
5171     /**
5172      * {@icu} <p>Find a Unicode character by either its name and return its code
5173      * point value. All Unicode names are in uppercase.
5174      * Extended names are all lowercase except for numbers and are contained
5175      * within angle brackets.
5176      * The names are searched in the following order
5177      * <ul>
5178      * <li> Most current Unicode name if there is any
5179      * <li> Unicode 1.0 name if there is any
5180      * <li> Extended name in the form of
5181      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g. &lt;noncharacter-FFFE&gt;
5182      * </ul>
5183      * Note calling any methods related to code point names, e.g. {@code getName()}
5184      * incurs a one-time initialization cost to construct the name tables.
5185      * @param name codepoint name
5186      * @return code point associated with the name or -1 if the name is not
5187      *         found.
5188      * @stable ICU 2.6
5189      */
getCharFromExtendedName(String name)5190     public static int getCharFromExtendedName(String name){
5191         return UCharacterName.INSTANCE.getCharFromName(
5192                 UCharacterNameChoice.EXTENDED_CHAR_NAME, name);
5193     }
5194 
5195     /**
5196      * {@icu} <p>Find a Unicode character by its corrected name alias and return
5197      * its code point value. All Unicode names are in uppercase.
5198      * Note calling any methods related to code point names, e.g. {@code getName()}
5199      * incurs a one-time initialization cost to construct the name tables.
5200      * @param name Unicode name alias whose code point is to be returned
5201      * @return code point or -1 if name is not found
5202      * @stable ICU 4.4
5203      */
getCharFromNameAlias(String name)5204     public static int getCharFromNameAlias(String name){
5205         return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name);
5206     }
5207 
5208     /**
5209      * {@icu} Return the Unicode name for a given property, as given in the
5210      * Unicode database file PropertyAliases.txt.  Most properties
5211      * have more than one name.  The nameChoice determines which one
5212      * is returned.
5213      *
5214      * In addition, this function maps the property
5215      * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
5216      * "General_Category_Mask".  These names are not in
5217      * PropertyAliases.txt.
5218      *
5219      * @param property UProperty selector.
5220      *
5221      * @param nameChoice UProperty.NameChoice selector for which name
5222      * to get.  All properties have a long name.  Most have a short
5223      * name, but some do not.  Unicode allows for additional names; if
5224      * present these will be returned by UProperty.NameChoice.LONG + i,
5225      * where i=1, 2,...
5226      *
5227      * @return a name, or null if Unicode explicitly defines no name
5228      * ("n/a") for a given property/nameChoice.  If a given nameChoice
5229      * throws an exception, then all larger values of nameChoice will
5230      * throw an exception.  If null is returned for a given
5231      * nameChoice, then other nameChoice values may return non-null
5232      * results.
5233      *
5234      * @exception IllegalArgumentException thrown if property or
5235      * nameChoice are invalid.
5236      *
5237      * @see UProperty
5238      * @see UProperty.NameChoice
5239      * @stable ICU 2.4
5240      */
getPropertyName(int property, int nameChoice)5241     public static String getPropertyName(int property,
5242             int nameChoice) {
5243         return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice);
5244     }
5245 
5246     /**
5247      * {@icu} Return the UProperty selector for a given property name, as
5248      * specified in the Unicode database file PropertyAliases.txt.
5249      * Short, long, and any other variants are recognized.
5250      *
5251      * In addition, this function maps the synthetic names "gcm" /
5252      * "General_Category_Mask" to the property
5253      * UProperty.GENERAL_CATEGORY_MASK.  These names are not in
5254      * PropertyAliases.txt.
5255      *
5256      * @param propertyAlias the property name to be matched.  The name
5257      * is compared using "loose matching" as described in
5258      * PropertyAliases.txt.
5259      *
5260      * @return a UProperty enum.
5261      *
5262      * @exception IllegalArgumentException thrown if propertyAlias
5263      * is not recognized.
5264      *
5265      * @see UProperty
5266      * @stable ICU 2.4
5267      */
getPropertyEnum(CharSequence propertyAlias)5268     public static int getPropertyEnum(CharSequence propertyAlias) {
5269         int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias);
5270         if (propEnum == UProperty.UNDEFINED) {
5271             throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias);
5272         }
5273         return propEnum;
5274     }
5275 
5276     /**
5277      * {@icu} Return the Unicode name for a given property value, as given in
5278      * the Unicode database file PropertyValueAliases.txt.  Most
5279      * values have more than one name.  The nameChoice determines
5280      * which one is returned.
5281      *
5282      * Note: Some of the names in PropertyValueAliases.txt can only be
5283      * retrieved using UProperty.GENERAL_CATEGORY_MASK, not
5284      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
5285      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
5286      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
5287      *
5288      * @param property UProperty selector constant.
5289      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
5290      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
5291      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
5292      * If out of range, null is returned.
5293      *
5294      * @param value selector for a value for the given property.  In
5295      * general, valid values range from 0 up to some maximum.  There
5296      * are a few exceptions: (1.) UProperty.BLOCK values begin at the
5297      * non-zero value BASIC_LATIN.getID().  (2.)
5298      * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous
5299      * and range from 0..240.  (3.)  UProperty.GENERAL_CATEGORY_MASK values
5300      * are mask values produced by left-shifting 1 by
5301      * UCharacter.getType().  This allows grouped categories such as
5302      * [:L:] to be represented.  Mask values are non-contiguous.
5303      *
5304      * @param nameChoice UProperty.NameChoice selector for which name
5305      * to get.  All values have a long name.  Most have a short name,
5306      * but some do not.  Unicode allows for additional names; if
5307      * present these will be returned by UProperty.NameChoice.LONG + i,
5308      * where i=1, 2,...
5309      *
5310      * @return a name, or null if Unicode explicitly defines no name
5311      * ("n/a") for a given property/value/nameChoice.  If a given
5312      * nameChoice throws an exception, then all larger values of
5313      * nameChoice will throw an exception.  If null is returned for a
5314      * given nameChoice, then other nameChoice values may return
5315      * non-null results.
5316      *
5317      * @exception IllegalArgumentException thrown if property, value,
5318      * or nameChoice are invalid.
5319      *
5320      * @see UProperty
5321      * @see UProperty.NameChoice
5322      * @stable ICU 2.4
5323      */
getPropertyValueName(int property, int value, int nameChoice)5324     public static String getPropertyValueName(int property,
5325             int value,
5326             int nameChoice)
5327     {
5328         if ((property == UProperty.CANONICAL_COMBINING_CLASS
5329                 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS
5330                 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS)
5331                 && value >= UCharacter.getIntPropertyMinValue(
5332                         UProperty.CANONICAL_COMBINING_CLASS)
5333                         && value <= UCharacter.getIntPropertyMaxValue(
5334                                 UProperty.CANONICAL_COMBINING_CLASS)
5335                                 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) {
5336             // this is hard coded for the valid cc
5337             // because PropertyValueAliases.txt does not contain all of them
5338             try {
5339                 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value,
5340                         nameChoice);
5341             }
5342             catch (IllegalArgumentException e) {
5343                 return null;
5344             }
5345         }
5346         return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice);
5347     }
5348 
5349     /**
5350      * {@icu} Return the property value integer for a given value name, as
5351      * specified in the Unicode database file PropertyValueAliases.txt.
5352      * Short, long, and any other variants are recognized.
5353      *
5354      * Note: Some of the names in PropertyValueAliases.txt will only be
5355      * recognized with UProperty.GENERAL_CATEGORY_MASK, not
5356      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
5357      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
5358      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
5359      *
5360      * @param property UProperty selector constant.
5361      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
5362      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
5363      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
5364      * Only these properties can be enumerated.
5365      *
5366      * @param valueAlias the value name to be matched.  The name is
5367      * compared using "loose matching" as described in
5368      * PropertyValueAliases.txt.
5369      *
5370      * @return a value integer.  Note: UProperty.GENERAL_CATEGORY
5371      * values are mask values produced by left-shifting 1 by
5372      * UCharacter.getType().  This allows grouped categories such as
5373      * [:L:] to be represented.
5374      *
5375      * @see UProperty
5376      * @throws IllegalArgumentException if property is not a valid UProperty
5377      *         selector or valueAlias is not a value of this property
5378      * @stable ICU 2.4
5379      */
getPropertyValueEnum(int property, CharSequence valueAlias)5380     public static int getPropertyValueEnum(int property, CharSequence valueAlias) {
5381         int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias);
5382         if (propEnum == UProperty.UNDEFINED) {
5383             throw new IllegalIcuArgumentException("Invalid name: " + valueAlias);
5384         }
5385         return propEnum;
5386     }
5387 
5388     /**
5389      * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED.
5390      * @param property  Same as {@link #getPropertyValueEnum(int, CharSequence)}
5391      * @param valueAlias    Same as {@link #getPropertyValueEnum(int, CharSequence)}
5392      * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value.
5393      * @internal
5394      * @deprecated This API is ICU internal only.
5395      */
5396     @Deprecated
getPropertyValueEnumNoThrow(int property, CharSequence valueAlias)5397     public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) {
5398         return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias);
5399     }
5400 
5401 
5402     /**
5403      * {@icu} Returns a code point corresponding to the two surrogate code units.
5404      *
5405      * @param lead the lead unit
5406      *        (In ICU 2.1-69 the type of both parameters was <code>char</code>.)
5407      * @param trail the trail unit
5408      * @return code point if lead and trail form a valid surrogate pair.
5409      * @exception IllegalArgumentException thrown when the code units do
5410      *            not form a valid surrogate pair
5411      * @stable ICU 70
5412      * @see #toCodePoint(int, int)
5413      */
getCodePoint(int lead, int trail)5414     public static int getCodePoint(int lead, int trail)
5415     {
5416         if (isHighSurrogate(lead) && isLowSurrogate(trail)) {
5417             return toCodePoint(lead, trail);
5418         }
5419         throw new IllegalArgumentException("Not a valid surrogate pair");
5420     }
5421 
5422     // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655
5423     /**
5424      * {@icu} Returns a code point corresponding to the two surrogate code units.
5425      *
5426      * @param lead the lead char
5427      * @param trail the trail char
5428      * @return code point if surrogate characters are valid.
5429      * @exception IllegalArgumentException thrown when the code units do
5430      *            not form a valid code point
5431      * @stable ICU 2.1
5432      */
getCodePoint(char lead, char trail)5433     public static int getCodePoint(char lead, char trail)
5434     {
5435         return getCodePoint((int) lead, (int) trail);
5436     }
5437     // END Android patch: Keep the `char` version on Android. See ICU-21655
5438 
5439     /**
5440      * {@icu} Returns the code point corresponding to the BMP code point.
5441      *
5442      * @param char16 the BMP code point
5443      * @return code point if argument is a valid character.
5444      * @exception IllegalArgumentException thrown when char16 is not a valid
5445      *            code point
5446      * @stable ICU 2.1
5447      */
getCodePoint(char char16)5448     public static int getCodePoint(char char16)
5449     {
5450         if (UCharacter.isLegal(char16)) {
5451             return char16;
5452         }
5453         throw new IllegalArgumentException("Illegal codepoint");
5454     }
5455 
5456     /**
5457      * Returns the uppercase version of the argument string.
5458      * Casing is dependent on the default locale and context-sensitive.
5459      * @param str source string to be performed on
5460      * @return uppercase version of the argument string
5461      * @stable ICU 2.1
5462      */
toUpperCase(String str)5463     public static String toUpperCase(String str)
5464     {
5465         return CaseMapImpl.toUpper(getDefaultCaseLocale(), 0, str);
5466     }
5467 
5468     /**
5469      * Returns the lowercase version of the argument string.
5470      * Casing is dependent on the default locale and context-sensitive
5471      * @param str source string to be performed on
5472      * @return lowercase version of the argument string
5473      * @stable ICU 2.1
5474      */
toLowerCase(String str)5475     public static String toLowerCase(String str)
5476     {
5477         return CaseMapImpl.toLower(getDefaultCaseLocale(), 0, str);
5478     }
5479 
5480     /**
5481      * <p>Returns the titlecase version of the argument string.
5482      * <p>Position for titlecasing is determined by the argument break
5483      * iterator, hence the user can customize his break iterator for
5484      * a specialized titlecasing. In this case only the forward iteration
5485      * needs to be implemented.
5486      * If the break iterator passed in is null, the default Unicode algorithm
5487      * will be used to determine the titlecase positions.
5488      *
5489      * <p>Only positions returned by the break iterator will be title cased,
5490      * character in between the positions will all be in lower case.
5491      * <p>Casing is dependent on the default locale and context-sensitive
5492      * @param str source string to be performed on
5493      * @param breakiter break iterator to determine the positions in which
5494      *        the character should be title cased.
5495      * @return titlecase version of the argument string
5496      * @stable ICU 2.6
5497      */
toTitleCase(String str, BreakIterator breakiter)5498     public static String toTitleCase(String str, BreakIterator breakiter)
5499     {
5500         return toTitleCase(Locale.getDefault(), str, breakiter, 0);
5501     }
5502 
getDefaultCaseLocale()5503     private static int getDefaultCaseLocale() {
5504         return UCaseProps.getCaseLocale(Locale.getDefault());
5505     }
5506 
getCaseLocale(Locale locale)5507     private static int getCaseLocale(Locale locale) {
5508         if (locale == null) {
5509             locale = Locale.getDefault();
5510         }
5511         return UCaseProps.getCaseLocale(locale);
5512     }
5513 
getCaseLocale(ULocale locale)5514     private static int getCaseLocale(ULocale locale) {
5515         if (locale == null) {
5516             locale = ULocale.getDefault();
5517         }
5518         return UCaseProps.getCaseLocale(locale);
5519     }
5520 
5521     /**
5522      * Returns the uppercase version of the argument string.
5523      * Casing is dependent on the argument locale and context-sensitive.
5524      * @param locale which string is to be converted in
5525      * @param str source string to be performed on
5526      * @return uppercase version of the argument string
5527      * @stable ICU 2.1
5528      */
toUpperCase(Locale locale, String str)5529     public static String toUpperCase(Locale locale, String str)
5530     {
5531         return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str);
5532     }
5533 
5534     /**
5535      * Returns the uppercase version of the argument string.
5536      * Casing is dependent on the argument locale and context-sensitive.
5537      * @param locale which string is to be converted in
5538      * @param str source string to be performed on
5539      * @return uppercase version of the argument string
5540      * @stable ICU 3.2
5541      */
toUpperCase(ULocale locale, String str)5542     public static String toUpperCase(ULocale locale, String str) {
5543         return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str);
5544     }
5545 
5546     /**
5547      * Returns the lowercase version of the argument string.
5548      * Casing is dependent on the argument locale and context-sensitive
5549      * @param locale which string is to be converted in
5550      * @param str source string to be performed on
5551      * @return lowercase version of the argument string
5552      * @stable ICU 2.1
5553      */
toLowerCase(Locale locale, String str)5554     public static String toLowerCase(Locale locale, String str)
5555     {
5556         return CaseMapImpl.toLower(getCaseLocale(locale), 0, str);
5557     }
5558 
5559     /**
5560      * Returns the lowercase version of the argument string.
5561      * Casing is dependent on the argument locale and context-sensitive
5562      * @param locale which string is to be converted in
5563      * @param str source string to be performed on
5564      * @return lowercase version of the argument string
5565      * @stable ICU 3.2
5566      */
toLowerCase(ULocale locale, String str)5567     public static String toLowerCase(ULocale locale, String str) {
5568         return CaseMapImpl.toLower(getCaseLocale(locale), 0, str);
5569     }
5570 
5571     /**
5572      * <p>Returns the titlecase version of the argument string.
5573      * <p>Position for titlecasing is determined by the argument break
5574      * iterator, hence the user can customize his break iterator for
5575      * a specialized titlecasing. In this case only the forward iteration
5576      * needs to be implemented.
5577      * If the break iterator passed in is null, the default Unicode algorithm
5578      * will be used to determine the titlecase positions.
5579      *
5580      * <p>Only positions returned by the break iterator will be title cased,
5581      * character in between the positions will all be in lower case.
5582      * <p>Casing is dependent on the argument locale and context-sensitive
5583      * @param locale which string is to be converted in
5584      * @param str source string to be performed on
5585      * @param breakiter break iterator to determine the positions in which
5586      *        the character should be title cased.
5587      * @return titlecase version of the argument string
5588      * @stable ICU 2.6
5589      */
toTitleCase(Locale locale, String str, BreakIterator breakiter)5590     public static String toTitleCase(Locale locale, String str,
5591             BreakIterator breakiter)
5592     {
5593         return toTitleCase(locale, str, breakiter, 0);
5594     }
5595 
5596     /**
5597      * <p>Returns the titlecase version of the argument string.
5598      * <p>Position for titlecasing is determined by the argument break
5599      * iterator, hence the user can customize his break iterator for
5600      * a specialized titlecasing. In this case only the forward iteration
5601      * needs to be implemented.
5602      * If the break iterator passed in is null, the default Unicode algorithm
5603      * will be used to determine the titlecase positions.
5604      *
5605      * <p>Only positions returned by the break iterator will be title cased,
5606      * character in between the positions will all be in lower case.
5607      * <p>Casing is dependent on the argument locale and context-sensitive
5608      * @param locale which string is to be converted in
5609      * @param str source string to be performed on
5610      * @param titleIter break iterator to determine the positions in which
5611      *        the character should be title cased.
5612      * @return titlecase version of the argument string
5613      * @stable ICU 3.2
5614      */
toTitleCase(ULocale locale, String str, BreakIterator titleIter)5615     public static String toTitleCase(ULocale locale, String str,
5616             BreakIterator titleIter) {
5617         return toTitleCase(locale, str, titleIter, 0);
5618     }
5619 
5620     /**
5621      * <p>Returns the titlecase version of the argument string.
5622      * <p>Position for titlecasing is determined by the argument break
5623      * iterator, hence the user can customize his break iterator for
5624      * a specialized titlecasing. In this case only the forward iteration
5625      * needs to be implemented.
5626      * If the break iterator passed in is null, the default Unicode algorithm
5627      * will be used to determine the titlecase positions.
5628      *
5629      * <p>Only positions returned by the break iterator will be title cased,
5630      * character in between the positions will all be in lower case.
5631      * <p>Casing is dependent on the argument locale and context-sensitive
5632      * @param locale which string is to be converted in
5633      * @param str source string to be performed on
5634      * @param titleIter break iterator to determine the positions in which
5635      *        the character should be title cased.
5636      * @param options bit set to modify the titlecasing operation
5637      * @return titlecase version of the argument string
5638      * @stable ICU 3.8
5639      * @see #TITLECASE_NO_LOWERCASE
5640      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
5641      */
toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)5642     public static String toTitleCase(ULocale locale, String str,
5643             BreakIterator titleIter, int options) {
5644         if (titleIter == null && locale == null) {
5645             locale = ULocale.getDefault();
5646         }
5647         titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter);
5648         titleIter.setText(str);
5649         return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
5650     }
5651 
5652     /**
5653      * {@icu} <p>Returns the titlecase version of the argument string.
5654      * <p>Position for titlecasing is determined by the argument break
5655      * iterator, hence the user can customize his break iterator for
5656      * a specialized titlecasing. In this case only the forward iteration
5657      * needs to be implemented.
5658      * If the break iterator passed in is null, the default Unicode algorithm
5659      * will be used to determine the titlecase positions.
5660      *
5661      * <p>Only positions returned by the break iterator will be title cased,
5662      * character in between the positions will all be in lower case.
5663      * <p>Casing is dependent on the argument locale and context-sensitive
5664      * @param locale which string is to be converted in
5665      * @param str source string to be performed on
5666      * @param titleIter break iterator to determine the positions in which
5667      *        the character should be title cased.
5668      * @param options bit set to modify the titlecasing operation
5669      * @return titlecase version of the argument string
5670      * @see #TITLECASE_NO_LOWERCASE
5671      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
5672      * @stable ICU 54
5673      */
toTitleCase(Locale locale, String str, BreakIterator titleIter, int options)5674     public static String toTitleCase(Locale locale, String str,
5675             BreakIterator titleIter,
5676             int options) {
5677         if (titleIter == null && locale == null) {
5678             locale = Locale.getDefault();
5679         }
5680         titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter);
5681         titleIter.setText(str);
5682         return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
5683     }
5684 
5685     /**
5686      * {@icu} The given character is mapped to its case folding equivalent according
5687      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
5688      * folding equivalent, the character itself is returned.
5689      *
5690      * <p>This function only returns the simple, single-code point case mapping.
5691      * Full case mappings should be used whenever possible because they produce
5692      * better results by working on whole strings.
5693      * They can map to a result string with a different length as appropriate.
5694      * Full case mappings are applied by the case mapping functions
5695      * that take String parameters rather than code points (int).
5696      * See also the User Guide chapter on C/POSIX migration:
5697      * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
5698      *
5699      * @param ch             the character to be converted
5700      * @param defaultmapping Indicates whether the default mappings defined in
5701      *                       CaseFolding.txt are to be used, otherwise the
5702      *                       mappings for dotted I and dotless i marked with
5703      *                       'T' in CaseFolding.txt are included.
5704      * @return               the case folding equivalent of the character, if
5705      *                       any; otherwise the character itself.
5706      * @see                  #foldCase(String, boolean)
5707      * @stable ICU 2.1
5708      */
foldCase(int ch, boolean defaultmapping)5709     public static int foldCase(int ch, boolean defaultmapping) {
5710         return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
5711     }
5712 
5713     /**
5714      * {@icu} The given string is mapped to its case folding equivalent according to
5715      * UnicodeData.txt and CaseFolding.txt; if any character has no case
5716      * folding equivalent, the character itself is returned.
5717      * "Full", multiple-code point case folding mappings are returned here.
5718      * For "simple" single-code point mappings use the API
5719      * foldCase(int ch, boolean defaultmapping).
5720      * @param str            the String to be converted
5721      * @param defaultmapping Indicates whether the default mappings defined in
5722      *                       CaseFolding.txt are to be used, otherwise the
5723      *                       mappings for dotted I and dotless i marked with
5724      *                       'T' in CaseFolding.txt are included.
5725      * @return               the case folding equivalent of the character, if
5726      *                       any; otherwise the character itself.
5727      * @see                  #foldCase(int, boolean)
5728      * @stable ICU 2.1
5729      */
foldCase(String str, boolean defaultmapping)5730     public static String foldCase(String str, boolean defaultmapping) {
5731         return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
5732     }
5733 
5734     /**
5735      * {@icu} Option value for case folding: use default mappings defined in
5736      * CaseFolding.txt.
5737      * @stable ICU 2.6
5738      */
5739     public static final int FOLD_CASE_DEFAULT    =      0x0000;
5740     /**
5741      * {@icu} Option value for case folding:
5742      * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
5743      * and dotless i appropriately for Turkic languages (tr, az).
5744      *
5745      * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
5746      * are to be included for default mappings and
5747      * excluded for the Turkic-specific mappings.
5748      *
5749      * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
5750      * are to be excluded for default mappings and
5751      * included for the Turkic-specific mappings.
5752      *
5753      * @stable ICU 2.6
5754      */
5755     public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001;
5756 
5757     /**
5758      * {@icu} The given character is mapped to its case folding equivalent according
5759      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
5760      * folding equivalent, the character itself is returned.
5761      *
5762      * <p>This function only returns the simple, single-code point case mapping.
5763      * Full case mappings should be used whenever possible because they produce
5764      * better results by working on whole strings.
5765      * They can map to a result string with a different length as appropriate.
5766      * Full case mappings are applied by the case mapping functions
5767      * that take String parameters rather than code points (int).
5768      * See also the User Guide chapter on C/POSIX migration:
5769      * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
5770      *
5771      * @param ch the character to be converted
5772      * @param options A bit set for special processing. Currently the recognised options
5773      * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
5774      * @return the case folding equivalent of the character, if any; otherwise the
5775      * character itself.
5776      * @see #foldCase(String, boolean)
5777      * @stable ICU 2.6
5778      */
foldCase(int ch, int options)5779     public static int foldCase(int ch, int options) {
5780         return UCaseProps.INSTANCE.fold(ch, options);
5781     }
5782 
5783     /**
5784      * {@icu} The given string is mapped to its case folding equivalent according to
5785      * UnicodeData.txt and CaseFolding.txt; if any character has no case
5786      * folding equivalent, the character itself is returned.
5787      * "Full", multiple-code point case folding mappings are returned here.
5788      * For "simple" single-code point mappings use the API
5789      * foldCase(int ch, boolean defaultmapping).
5790      * @param str the String to be converted
5791      * @param options A bit set for special processing. Currently the recognised options
5792      *                are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
5793      * @return the case folding equivalent of the character, if any; otherwise the
5794      *         character itself.
5795      * @see #foldCase(int, boolean)
5796      * @stable ICU 2.6
5797      */
foldCase(String str, int options)5798     public static final String foldCase(String str, int options) {
5799         return CaseMapImpl.fold(options, str);
5800     }
5801 
5802     /**
5803      * {@icu} Returns the numeric value of a Han character.
5804      *
5805      * <p>This returns the value of Han 'numeric' code points,
5806      * including those for zero, ten, hundred, thousand, ten thousand,
5807      * and hundred million.
5808      * This includes both the standard and 'checkwriting'
5809      * characters, the 'big circle' zero character, and the standard
5810      * zero character.
5811      *
5812      * <p>Note: The Unicode Standard has numeric values for more
5813      * Han characters recognized by this method
5814      * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt),
5815      * and a {@link com.ibm.icu.text.NumberFormat} can be used with
5816      * a Chinese {@link com.ibm.icu.text.NumberingSystem}.
5817      *
5818      * @param ch code point to query
5819      * @return value if it is a Han 'numeric character,' otherwise return -1.
5820      * @stable ICU 2.4
5821      */
getHanNumericValue(int ch)5822     public static int getHanNumericValue(int ch)
5823     {
5824         switch(ch)
5825         {
5826         case IDEOGRAPHIC_NUMBER_ZERO_ :
5827         case CJK_IDEOGRAPH_COMPLEX_ZERO_ :
5828             return 0; // Han Zero
5829         case CJK_IDEOGRAPH_FIRST_ :
5830         case CJK_IDEOGRAPH_COMPLEX_ONE_ :
5831             return 1; // Han One
5832         case CJK_IDEOGRAPH_SECOND_ :
5833         case CJK_IDEOGRAPH_COMPLEX_TWO_ :
5834             return 2; // Han Two
5835         case CJK_IDEOGRAPH_THIRD_ :
5836         case CJK_IDEOGRAPH_COMPLEX_THREE_ :
5837             return 3; // Han Three
5838         case CJK_IDEOGRAPH_FOURTH_ :
5839         case CJK_IDEOGRAPH_COMPLEX_FOUR_ :
5840             return 4; // Han Four
5841         case CJK_IDEOGRAPH_FIFTH_ :
5842         case CJK_IDEOGRAPH_COMPLEX_FIVE_ :
5843             return 5; // Han Five
5844         case CJK_IDEOGRAPH_SIXTH_ :
5845         case CJK_IDEOGRAPH_COMPLEX_SIX_ :
5846             return 6; // Han Six
5847         case CJK_IDEOGRAPH_SEVENTH_ :
5848         case CJK_IDEOGRAPH_COMPLEX_SEVEN_ :
5849             return 7; // Han Seven
5850         case CJK_IDEOGRAPH_EIGHTH_ :
5851         case CJK_IDEOGRAPH_COMPLEX_EIGHT_ :
5852             return 8; // Han Eight
5853         case CJK_IDEOGRAPH_NINETH_ :
5854         case CJK_IDEOGRAPH_COMPLEX_NINE_ :
5855             return 9; // Han Nine
5856         case CJK_IDEOGRAPH_TEN_ :
5857         case CJK_IDEOGRAPH_COMPLEX_TEN_ :
5858             return 10;
5859         case CJK_IDEOGRAPH_HUNDRED_ :
5860         case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ :
5861             return 100;
5862         case CJK_IDEOGRAPH_THOUSAND_ :
5863         case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ :
5864             return 1000;
5865         case CJK_IDEOGRAPH_TEN_THOUSAND_ :
5866             return 10000;
5867         case CJK_IDEOGRAPH_HUNDRED_MILLION_ :
5868             return 100000000;
5869         }
5870         return -1; // no value
5871     }
5872 
5873     /**
5874      * {@icu} <p>Returns an iterator for character types, iterating over codepoints.
5875      * <p>Example of use:<br>
5876      * <pre>
5877      * RangeValueIterator iterator = UCharacter.getTypeIterator();
5878      * RangeValueIterator.Element element = new RangeValueIterator.Element();
5879      * while (iterator.next(element)) {
5880      *     System.out.println("Codepoint \\u" +
5881      *                        Integer.toHexString(element.start) +
5882      *                        " to codepoint \\u" +
5883      *                        Integer.toHexString(element.limit - 1) +
5884      *                        " has the character type " +
5885      *                        element.value);
5886      * }
5887      * </pre>
5888      * @return an iterator
5889      * @stable ICU 2.6
5890      */
getTypeIterator()5891     public static RangeValueIterator getTypeIterator()
5892     {
5893         return new UCharacterTypeIterator();
5894     }
5895 
5896     private static final class UCharacterTypeIterator implements RangeValueIterator {
UCharacterTypeIterator()5897         UCharacterTypeIterator() {
5898             reset();
5899         }
5900 
5901         // implements RangeValueIterator
5902         @Override
next(Element element)5903         public boolean next(Element element) {
5904             if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
5905                 element.start=range.startCodePoint;
5906                 element.limit=range.endCodePoint+1;
5907                 element.value=range.value;
5908                 return true;
5909             } else {
5910                 return false;
5911             }
5912         }
5913 
5914         // implements RangeValueIterator
5915         @Override
reset()5916         public void reset() {
5917             trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE);
5918         }
5919 
5920         private Iterator<Trie2.Range> trieIterator;
5921         private Trie2.Range range;
5922 
5923         private static final class MaskType implements Trie2.ValueMapper {
5924             // Extracts the general category ("character type") from the trie value.
5925             @Override
map(int value)5926             public int map(int value) {
5927                 return value & UCharacterProperty.TYPE_MASK;
5928             }
5929         }
5930         private static final MaskType MASK_TYPE=new MaskType();
5931     }
5932 
5933     /**
5934      * {@icu} <p>Returns an iterator for character names, iterating over codepoints.
5935      * <p>This API only gets the iterator for the modern, most up-to-date
5936      * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or
5937      * for extended names use getExtendedNameIterator().
5938      * <p>Example of use:<br>
5939      * <pre>
5940      * ValueIterator iterator = UCharacter.getNameIterator();
5941      * ValueIterator.Element element = new ValueIterator.Element();
5942      * while (iterator.next(element)) {
5943      *     System.out.println("Codepoint \\u" +
5944      *                        Integer.toHexString(element.codepoint) +
5945      *                        " has the name " + (String)element.value);
5946      * }
5947      * </pre>
5948      * <p>The maximal range which the name iterator iterates is from
5949      * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.
5950      * @return an iterator
5951      * @stable ICU 2.6
5952      */
getNameIterator()5953     public static ValueIterator getNameIterator(){
5954         return new UCharacterNameIterator(UCharacterName.INSTANCE,
5955                 UCharacterNameChoice.UNICODE_CHAR_NAME);
5956     }
5957 
5958     /**
5959      * {@icu} Returns an empty iterator.
5960      * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints.
5961      * @return an empty iterator
5962      * @deprecated ICU 49
5963      * @see #getName1_0(int)
5964      */
5965     @Deprecated
getName1_0Iterator()5966     public static ValueIterator getName1_0Iterator(){
5967         return new DummyValueIterator();
5968     }
5969 
5970     private static final class DummyValueIterator implements ValueIterator {
5971         @Override
next(Element element)5972         public boolean next(Element element) { return false; }
5973         @Override
reset()5974         public void reset() {}
5975         @Override
setRange(int start, int limit)5976         public void setRange(int start, int limit) {}
5977     }
5978 
5979     /**
5980      * {@icu} <p>Returns an iterator for character names, iterating over codepoints.
5981      * <p>This API only gets the iterator for the extended names.
5982      * For modern, most up-to-date Unicode names use getNameIterator() or
5983      * for older 1.0 Unicode names use get1_0NameIterator().
5984      * <p>Example of use:<br>
5985      * <pre>
5986      * ValueIterator iterator = UCharacter.getExtendedNameIterator();
5987      * ValueIterator.Element element = new ValueIterator.Element();
5988      * while (iterator.next(element)) {
5989      *     System.out.println("Codepoint \\u" +
5990      *                        Integer.toHexString(element.codepoint) +
5991      *                        " has the name " + (String)element.value);
5992      * }
5993      * </pre>
5994      * <p>The maximal range which the name iterator iterates is from
5995      * @return an iterator
5996      * @stable ICU 2.6
5997      */
getExtendedNameIterator()5998     public static ValueIterator getExtendedNameIterator(){
5999         return new UCharacterNameIterator(UCharacterName.INSTANCE,
6000                 UCharacterNameChoice.EXTENDED_CHAR_NAME);
6001     }
6002 
6003     /**
6004      * {@icu} Returns the "age" of the code point.
6005      * <p>The "age" is the Unicode version when the code point was first
6006      * designated (as a non-character or for Private Use) or assigned a
6007      * character.
6008      * <p>This can be useful to avoid emitting code points to receiving
6009      * processes that do not accept newer characters.
6010      * <p>The data is from the UCD file DerivedAge.txt.
6011      * @param ch The code point.
6012      * @return the Unicode version number
6013      * @stable ICU 2.6
6014      */
getAge(int ch)6015     public static VersionInfo getAge(int ch)
6016     {
6017         if (ch < MIN_VALUE || ch > MAX_VALUE) {
6018             throw new IllegalArgumentException("Codepoint out of bounds");
6019         }
6020         return UCharacterProperty.INSTANCE.getAge(ch);
6021     }
6022 
6023     /**
6024      * {@icu} Check a binary Unicode property for a code point.
6025      * <p>Unicode, especially in version 3.2, defines many more properties
6026      * than the original set in UnicodeData.txt.
6027      * <p>This API is intended to reflect Unicode properties as defined in
6028      * the Unicode Character Database (UCD) and Unicode Technical Reports
6029      * (UTR).
6030      * <p>For details about the properties see
6031      * <a href=http://www.unicode.org/>http://www.unicode.org/</a>.
6032      * <p>For names of Unicode properties see the UCD file
6033      * PropertyAliases.txt.
6034      * <p>This API does not check the validity of the codepoint.
6035      * <p>Important: If ICU is built with UCD files from Unicode versions
6036      * below 3.2, then properties marked with "new" are not or
6037      * not fully available.
6038      * @param ch code point to test.
6039      * @param property selector constant from com.ibm.icu.lang.UProperty,
6040      *        identifies which binary property to check.
6041      * @return true or false according to the binary Unicode property value
6042      *         for ch. Also false if property is out of bounds or if the
6043      *         Unicode version does not have data for the property at all, or
6044      *         not for this code point.
6045      * @see com.ibm.icu.lang.UProperty
6046      * @see CharacterProperties#getBinaryPropertySet(int)
6047      * @stable ICU 2.6
6048      */
hasBinaryProperty(int ch, int property)6049     public static boolean hasBinaryProperty(int ch, int property)
6050     {
6051         return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property);
6052     }
6053 
6054     /**
6055      * {@icu} Returns true if the property is true for the string.
6056      * Same as {@link #hasBinaryProperty(int, int)}
6057      * if the string contains exactly one code point.
6058      *
6059      * <p>Most properties apply only to single code points.
6060      * <a href="https://www.unicode.org/reports/tr51/#Emoji_Sets">UTS #51 Unicode Emoji</a>
6061      * defines several properties of strings.
6062      *
6063      * @param s String to test.
6064      * @param property UProperty selector constant, identifies which binary property to check.
6065      *        Must be BINARY_START&lt;=which&lt;BINARY_LIMIT.
6066      * @return true or false according to the binary Unicode property value for the string.
6067      *         Also false if <code>property</code> is out of bounds or if the Unicode version
6068      *         does not have data for the property at all.
6069      *
6070      * @see com.ibm.icu.lang.UProperty
6071      * @see CharacterProperties#getBinaryPropertySet(int)
6072      * @stable ICU 70
6073      */
hasBinaryProperty(CharSequence s, int property)6074     public static boolean hasBinaryProperty(CharSequence s, int property) {
6075         int length = s.length();
6076         if (length == 1) {
6077             return hasBinaryProperty(s.charAt(0), property);  // single code point
6078         } else if (length == 2) {
6079             // first code point
6080             int c = Character.codePointAt(s, 0);
6081             if (Character.charCount(c) == length) {
6082                 return hasBinaryProperty(c, property);  // single code point
6083             }
6084         }
6085         // Only call into EmojiProps for a relevant property,
6086         // so that we not unnecessarily try to load its data file.
6087         return UProperty.BASIC_EMOJI <= property && property <= UProperty.RGI_EMOJI &&
6088             EmojiProps.INSTANCE.hasBinaryProperty(s, property);
6089     }
6090 
6091     /**
6092      * {@icu} <p>Check if a code point has the Alphabetic Unicode property.
6093      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).
6094      * <p>Different from UCharacter.isLetter(ch)!
6095      * @stable ICU 2.6
6096      * @param ch codepoint to be tested
6097      */
isUAlphabetic(int ch)6098     public static boolean isUAlphabetic(int ch)
6099     {
6100         return hasBinaryProperty(ch, UProperty.ALPHABETIC);
6101     }
6102 
6103     /**
6104      * {@icu} <p>Check if a code point has the Lowercase Unicode property.
6105      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).
6106      * <p>This is different from UCharacter.isLowerCase(ch)!
6107      * @param ch codepoint to be tested
6108      * @stable ICU 2.6
6109      */
isULowercase(int ch)6110     public static boolean isULowercase(int ch)
6111     {
6112         return hasBinaryProperty(ch, UProperty.LOWERCASE);
6113     }
6114 
6115     /**
6116      * {@icu} <p>Check if a code point has the Uppercase Unicode property.
6117      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).
6118      * <p>This is different from UCharacter.isUpperCase(ch)!
6119      * @param ch codepoint to be tested
6120      * @stable ICU 2.6
6121      */
isUUppercase(int ch)6122     public static boolean isUUppercase(int ch)
6123     {
6124         return hasBinaryProperty(ch, UProperty.UPPERCASE);
6125     }
6126 
6127     /**
6128      * {@icu} <p>Check if a code point has the White_Space Unicode property.
6129      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).
6130      * <p>This is different from both UCharacter.isSpace(ch) and
6131      * UCharacter.isWhitespace(ch)!
6132      * @param ch codepoint to be tested
6133      * @stable ICU 2.6
6134      */
isUWhiteSpace(int ch)6135     public static boolean isUWhiteSpace(int ch)
6136     {
6137         return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
6138     }
6139 
6140     /**
6141      * {@icu} Returns the property value for a Unicode property type of a code point.
6142      * Also returns binary and mask property values.
6143      * <p>Unicode, especially in version 3.2, defines many more properties than
6144      * the original set in UnicodeData.txt.
6145      * <p>The properties APIs are intended to reflect Unicode properties as
6146      * defined in the Unicode Character Database (UCD) and Unicode Technical
6147      * Reports (UTR). For details about the properties see
6148      * http://www.unicode.org/.
6149      * <p>For names of Unicode properties see the UCD file PropertyAliases.txt.
6150      *
6151      * <pre>
6152      * Sample usage:
6153      * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH);
6154      * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC);
6155      * boolean b = (ideo == 1) ? true : false;
6156      * </pre>
6157      * @param ch code point to test.
6158      * @param type UProperty selector constant, identifies which binary
6159      *        property to check. Must be
6160      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
6161      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT or
6162      *        UProperty.MASK_START &lt;= type &lt; UProperty.MASK_LIMIT.
6163      * @return numeric value that is directly the property value or,
6164      *         for enumerated properties, corresponds to the numeric value of
6165      *         the enumerated constant of the respective property value type
6166      *         ({@link ECharacterCategory}, {@link ECharacterDirection},
6167      *         {@link DecompositionType}, etc.).
6168      *         Returns 0 or 1 (for false / true) for binary Unicode properties.
6169      *         Returns a bit-mask for mask properties.
6170      *         Returns 0 if 'type' is out of bounds or if the Unicode version
6171      *         does not have data for the property at all, or not for this code
6172      *         point.
6173      * @see UProperty
6174      * @see #hasBinaryProperty
6175      * @see #getIntPropertyMinValue
6176      * @see #getIntPropertyMaxValue
6177      * @see CharacterProperties#getIntPropertyMap(int)
6178      * @see #getUnicodeVersion
6179      * @stable ICU 2.4
6180      */
getIntPropertyValue(int ch, int type)6181     public static int getIntPropertyValue(int ch, int type)
6182     {
6183         return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type);
6184     }
6185     /**
6186      * {@icu} Returns a string version of the property value.
6187      * @param propertyEnum The property enum value.
6188      * @param codepoint The codepoint value.
6189      * @param nameChoice The choice of the name.
6190      * @return value as string
6191      * @internal
6192      * @deprecated This API is ICU internal only.
6193      */
6194     @Deprecated
6195     ///CLOVER:OFF
getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice)6196     public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) {
6197         if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) ||
6198                 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) {
6199             return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum),
6200                     nameChoice);
6201         }
6202         if (propertyEnum == UProperty.NUMERIC_VALUE) {
6203             return String.valueOf(getUnicodeNumericValue(codepoint));
6204         }
6205         // otherwise must be string property
6206         switch (propertyEnum) {
6207         case UProperty.AGE: return getAge(codepoint).toString();
6208         case UProperty.ISO_COMMENT: return getISOComment(codepoint);
6209         case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint));
6210         case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true));
6211         case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
6212         case UProperty.NAME: return getName(codepoint);
6213         case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true));
6214         case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
6215         case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
6216         case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
6217         case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
6218         case UProperty.UNICODE_1_NAME: return getName1_0(codepoint);
6219         case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
6220         }
6221         throw new IllegalArgumentException("Illegal Property Enum");
6222     }
6223     ///CLOVER:ON
6224 
6225     /**
6226      * {@icu} Returns the minimum value for an integer/binary Unicode property type.
6227      * Can be used together with UCharacter.getIntPropertyMaxValue(int)
6228      * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
6229      * @param type UProperty selector constant, identifies which binary
6230      *        property to check. Must be
6231      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
6232      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
6233      * @return Minimum value returned by UCharacter.getIntPropertyValue(int)
6234      *         for a Unicode property. 0 if the property
6235      *         selector 'type' is out of range.
6236      * @see UProperty
6237      * @see #hasBinaryProperty
6238      * @see #getUnicodeVersion
6239      * @see #getIntPropertyMaxValue
6240      * @see #getIntPropertyValue
6241      * @stable ICU 2.4
6242      */
getIntPropertyMinValue(int type)6243     public static int getIntPropertyMinValue(int type){
6244 
6245         return 0; // undefined; and: all other properties have a minimum value of 0
6246     }
6247 
6248 
6249     /**
6250      * {@icu} Returns the maximum value for an integer/binary Unicode property.
6251      * Can be used together with UCharacter.getIntPropertyMinValue(int)
6252      * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
6253      * Examples for min/max values (for Unicode 3.2):
6254      * <ul>
6255      * <li> UProperty.BIDI_CLASS:    0/18
6256      * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL)
6257      * <li> UProperty.SCRIPT:        0/45 (UScript.COMMON/UScript.TAGBANWA)
6258      * <li> UProperty.IDEOGRAPHIC:   0/1  (false/true)
6259      * </ul>
6260      * For undefined UProperty constant values, min/max values will be 0/-1.
6261      * @param type UProperty selector constant, identifies which binary
6262      *        property to check. Must be
6263      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
6264      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
6265      * @return Maximum value returned by u_getIntPropertyValue for a Unicode
6266      *         property. &lt;= 0 if the property selector 'type' is out of range.
6267      * @see UProperty
6268      * @see #hasBinaryProperty
6269      * @see #getUnicodeVersion
6270      * @see #getIntPropertyMaxValue
6271      * @see #getIntPropertyValue
6272      * @stable ICU 2.4
6273      */
getIntPropertyMaxValue(int type)6274     public static int getIntPropertyMaxValue(int type)
6275     {
6276         return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type);
6277     }
6278 
6279     /**
6280      * Provide the java.lang.Character forDigit API, for convenience.
6281      * @stable ICU 3.0
6282      */
forDigit(int digit, int radix)6283     public static char forDigit(int digit, int radix) {
6284         return java.lang.Character.forDigit(digit, radix);
6285     }
6286 
6287     // JDK 1.5 API coverage
6288 
6289     /**
6290      * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}.
6291      *
6292      * @stable ICU 3.0
6293      */
6294     public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE;
6295 
6296     /**
6297      * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}.
6298      *
6299      * @stable ICU 3.0
6300      */
6301     public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE;
6302 
6303     /**
6304      * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}.
6305      *
6306      * @stable ICU 3.0
6307      */
6308     public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE;
6309 
6310     /**
6311      * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}.
6312      *
6313      * @stable ICU 3.0
6314      */
6315     public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE;
6316 
6317     /**
6318      * Constant U+D800, same as {@link Character#MIN_SURROGATE}.
6319      *
6320      * @stable ICU 3.0
6321      */
6322     public static final char MIN_SURROGATE = Character.MIN_SURROGATE;
6323 
6324     /**
6325      * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}.
6326      *
6327      * @stable ICU 3.0
6328      */
6329     public static final char MAX_SURROGATE = Character.MAX_SURROGATE;
6330 
6331     /**
6332      * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
6333      *
6334      * @stable ICU 3.0
6335      */
6336     public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT;
6337 
6338     /**
6339      * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}.
6340      *
6341      * @stable ICU 3.0
6342      */
6343     public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT;
6344 
6345     /**
6346      * Constant U+0000, same as {@link Character#MIN_CODE_POINT}.
6347      *
6348      * @stable ICU 3.0
6349      */
6350     public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT;
6351 
6352     /**
6353      * Equivalent to {@link Character#isValidCodePoint}.
6354      *
6355      * @param cp the code point to check
6356      * @return true if cp is a valid code point
6357      * @stable ICU 3.0
6358      */
isValidCodePoint(int cp)6359     public static final boolean isValidCodePoint(int cp) {
6360         return cp >= 0 && cp <= MAX_CODE_POINT;
6361     }
6362 
6363     /**
6364      * Same as {@link Character#isSupplementaryCodePoint}.
6365      *
6366      * @param cp the code point to check
6367      * @return true if cp is a supplementary code point
6368      * @stable ICU 3.0
6369      */
isSupplementaryCodePoint(int cp)6370     public static final boolean isSupplementaryCodePoint(int cp) {
6371         return Character.isSupplementaryCodePoint(cp);
6372     }
6373 
6374     /**
6375      * Same as {@link Character#isHighSurrogate},
6376      * except that the ICU version accepts <code>int</code> for code points.
6377      *
6378      * @param codePoint the code point to check
6379      *        (In ICU 3.0-69 the type of this parameter was <code>char</code>.)
6380      * @return true if codePoint is a high (lead) surrogate
6381      * @stable ICU 70
6382      */
isHighSurrogate(int codePoint)6383     public static boolean isHighSurrogate(int codePoint) {
6384         return (codePoint & LEAD_SURROGATE_BITMASK) == LEAD_SURROGATE_BITS;
6385     }
6386 
6387     // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655
6388     /**
6389      * Same as {@link Character#isHighSurrogate},
6390      *
6391      * @param ch the char to check
6392      * @return true if ch is a high (lead) surrogate
6393      * @stable ICU 3.0
6394      */
isHighSurrogate(char ch)6395     public static boolean isHighSurrogate(char ch) {
6396         return isHighSurrogate((int) ch);
6397     }
6398     // END Android patch: Keep the `char` version on Android. See ICU-21655
6399 
6400     /**
6401      * Same as {@link Character#isLowSurrogate},
6402      * except that the ICU version accepts <code>int</code> for code points.
6403      *
6404      * @param codePoint the code point to check
6405      *        (In ICU 3.0-69 the type of this parameter was <code>char</code>.)
6406      * @return true if codePoint is a low (trail) surrogate
6407      * @stable ICU 70
6408      */
isLowSurrogate(int codePoint)6409     public static boolean isLowSurrogate(int codePoint) {
6410         return (codePoint & TRAIL_SURROGATE_BITMASK) == TRAIL_SURROGATE_BITS;
6411     }
6412 
6413     // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655
6414     /**
6415      * Same as {@link Character#isLowSurrogate},
6416      *
6417      * @param ch the char to check
6418      * @return true if ch is a low (trail) surrogate
6419      * @stable ICU 3.0
6420      */
isLowSurrogate(char ch)6421     public static boolean isLowSurrogate(char ch) {
6422         return isLowSurrogate((int) ch);
6423     }
6424     // END Android patch: Keep the `char` version on Android. See ICU-21655
6425 
6426     /**
6427      * Same as {@link Character#isSurrogatePair},
6428      * except that the ICU version accepts <code>int</code> for code points.
6429      *
6430      * @param high the high (lead) unit
6431      *        (In ICU 3.0-69 the type of both parameters was <code>char</code>.)
6432      * @param low the low (trail) unit
6433      * @return true if high, low form a surrogate pair
6434      * @stable ICU 70
6435      */
isSurrogatePair(int high, int low)6436     public static final boolean isSurrogatePair(int high, int low) {
6437         return isHighSurrogate(high) && isLowSurrogate(low);
6438     }
6439 
6440     // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655
6441     /**
6442      * Same as {@link Character#isSurrogatePair}.
6443      *
6444      * @param high the high (lead) char
6445      * @param low the low (trail) char
6446      * @return true if high, low form a surrogate pair
6447      * @stable ICU 3.0
6448      */
isSurrogatePair(char high, char low)6449     public static final boolean isSurrogatePair(char high, char low) {
6450         return isSurrogatePair((int) high, (int) low);
6451     }
6452     // END Android patch: Keep the `char` version on Android. See ICU-21655
6453 
6454     /**
6455      * Same as {@link Character#charCount}.
6456      * Returns the number of chars needed to represent the code point (1 or 2).
6457      * This does not check the code point for validity.
6458      *
6459      * @param cp the code point to check
6460      * @return the number of chars needed to represent the code point
6461      * @stable ICU 3.0
6462      */
charCount(int cp)6463     public static int charCount(int cp) {
6464         return Character.charCount(cp);
6465     }
6466 
6467     /**
6468      * Same as {@link Character#toCodePoint},
6469      * except that the ICU version accepts <code>int</code> for code points.
6470      * Returns the code point represented by the two surrogate code units.
6471      * This does not check the surrogate pair for validity.
6472      *
6473      * @param high the high (lead) surrogate
6474      *        (In ICU 3.0-69 the type of both parameters was <code>char</code>.)
6475      * @param low the low (trail) surrogate
6476      * @return the code point formed by the surrogate pair
6477      * @stable ICU 70
6478      * @see #getCodePoint(int, int)
6479      */
toCodePoint(int high, int low)6480     public static final int toCodePoint(int high, int low) {
6481         // see ICU4C U16_GET_SUPPLEMENTARY()
6482         return (high << 10) + low - U16_SURROGATE_OFFSET;
6483     }
6484 
6485     // BEGIN Android patch: Keep the `char` version on Android. See ICU-21655
6486     /**
6487      * Same as {@link Character#toCodePoint}.
6488      * Returns the code point represented by the two surrogate code units.
6489      * This does not check the surrogate pair for validity.
6490      *
6491      * @param high the high (lead) surrogate
6492      * @param low the low (trail) surrogate
6493      * @return the code point formed by the surrogate pair
6494      * @stable ICU 3.0
6495      */
toCodePoint(char high, char low)6496     public static final int toCodePoint(char high, char low) {
6497         return toCodePoint((int) high, (int) low);
6498     }
6499     // END Android patch: Keep the `char` version on Android. See ICU-21655
6500 
6501     /**
6502      * Same as {@link Character#codePointAt(CharSequence, int)}.
6503      * Returns the code point at index.
6504      * This examines only the characters at index and index+1.
6505      *
6506      * @param seq the characters to check
6507      * @param index the index of the first or only char forming the code point
6508      * @return the code point at the index
6509      * @stable ICU 3.0
6510      */
codePointAt(CharSequence seq, int index)6511     public static final int codePointAt(CharSequence seq, int index) {
6512         char c1 = seq.charAt(index++);
6513         if (isHighSurrogate(c1)) {
6514             if (index < seq.length()) {
6515                 char c2 = seq.charAt(index);
6516                 if (isLowSurrogate(c2)) {
6517                     return toCodePoint(c1, c2);
6518                 }
6519             }
6520         }
6521         return c1;
6522     }
6523 
6524     /**
6525      * Same as {@link Character#codePointAt(char[], int)}.
6526      * Returns the code point at index.
6527      * This examines only the characters at index and index+1.
6528      *
6529      * @param text the characters to check
6530      * @param index the index of the first or only char forming the code point
6531      * @return the code point at the index
6532      * @stable ICU 3.0
6533      */
codePointAt(char[] text, int index)6534     public static final int codePointAt(char[] text, int index) {
6535         char c1 = text[index++];
6536         if (isHighSurrogate(c1)) {
6537             if (index < text.length) {
6538                 char c2 = text[index];
6539                 if (isLowSurrogate(c2)) {
6540                     return toCodePoint(c1, c2);
6541                 }
6542             }
6543         }
6544         return c1;
6545     }
6546 
6547     /**
6548      * Same as {@link Character#codePointAt(char[], int, int)}.
6549      * Returns the code point at index.
6550      * This examines only the characters at index and index+1.
6551      *
6552      * @param text the characters to check
6553      * @param index the index of the first or only char forming the code point
6554      * @param limit the limit of the valid text
6555      * @return the code point at the index
6556      * @stable ICU 3.0
6557      */
codePointAt(char[] text, int index, int limit)6558     public static final int codePointAt(char[] text, int index, int limit) {
6559         if (index >= limit || limit > text.length) {
6560             throw new IndexOutOfBoundsException();
6561         }
6562         char c1 = text[index++];
6563         if (isHighSurrogate(c1)) {
6564             if (index < limit) {
6565                 char c2 = text[index];
6566                 if (isLowSurrogate(c2)) {
6567                     return toCodePoint(c1, c2);
6568                 }
6569             }
6570         }
6571         return c1;
6572     }
6573 
6574     /**
6575      * Same as {@link Character#codePointBefore(CharSequence, int)}.
6576      * Return the code point before index.
6577      * This examines only the characters at index-1 and index-2.
6578      *
6579      * @param seq the characters to check
6580      * @param index the index after the last or only char forming the code point
6581      * @return the code point before the index
6582      * @stable ICU 3.0
6583      */
codePointBefore(CharSequence seq, int index)6584     public static final int codePointBefore(CharSequence seq, int index) {
6585         char c2 = seq.charAt(--index);
6586         if (isLowSurrogate(c2)) {
6587             if (index > 0) {
6588                 char c1 = seq.charAt(--index);
6589                 if (isHighSurrogate(c1)) {
6590                     return toCodePoint(c1, c2);
6591                 }
6592             }
6593         }
6594         return c2;
6595     }
6596 
6597     /**
6598      * Same as {@link Character#codePointBefore(char[], int)}.
6599      * Returns the code point before index.
6600      * This examines only the characters at index-1 and index-2.
6601      *
6602      * @param text the characters to check
6603      * @param index the index after the last or only char forming the code point
6604      * @return the code point before the index
6605      * @stable ICU 3.0
6606      */
codePointBefore(char[] text, int index)6607     public static final int codePointBefore(char[] text, int index) {
6608         char c2 = text[--index];
6609         if (isLowSurrogate(c2)) {
6610             if (index > 0) {
6611                 char c1 = text[--index];
6612                 if (isHighSurrogate(c1)) {
6613                     return toCodePoint(c1, c2);
6614                 }
6615             }
6616         }
6617         return c2;
6618     }
6619 
6620     /**
6621      * Same as {@link Character#codePointBefore(char[], int, int)}.
6622      * Return the code point before index.
6623      * This examines only the characters at index-1 and index-2.
6624      *
6625      * @param text the characters to check
6626      * @param index the index after the last or only char forming the code point
6627      * @param limit the start of the valid text
6628      * @return the code point before the index
6629      * @stable ICU 3.0
6630      */
codePointBefore(char[] text, int index, int limit)6631     public static final int codePointBefore(char[] text, int index, int limit) {
6632         if (index <= limit || limit < 0) {
6633             throw new IndexOutOfBoundsException();
6634         }
6635         char c2 = text[--index];
6636         if (isLowSurrogate(c2)) {
6637             if (index > limit) {
6638                 char c1 = text[--index];
6639                 if (isHighSurrogate(c1)) {
6640                     return toCodePoint(c1, c2);
6641                 }
6642             }
6643         }
6644         return c2;
6645     }
6646 
6647     /**
6648      * Same as {@link Character#toChars(int, char[], int)}.
6649      * Writes the chars representing the
6650      * code point into the destination at the given index.
6651      *
6652      * @param cp the code point to convert
6653      * @param dst the destination array into which to put the char(s) representing the code point
6654      * @param dstIndex the index at which to put the first (or only) char
6655      * @return the count of the number of chars written (1 or 2)
6656      * @throws IllegalArgumentException if cp is not a valid code point
6657      * @stable ICU 3.0
6658      */
toChars(int cp, char[] dst, int dstIndex)6659     public static final int toChars(int cp, char[] dst, int dstIndex) {
6660         return Character.toChars(cp, dst, dstIndex);
6661     }
6662 
6663     /**
6664      * Same as {@link Character#toChars(int)}.
6665      * Returns a char array representing the code point.
6666      *
6667      * @param cp the code point to convert
6668      * @return an array containing the char(s) representing the code point
6669      * @throws IllegalArgumentException if cp is not a valid code point
6670      * @stable ICU 3.0
6671      */
toChars(int cp)6672     public static final char[] toChars(int cp) {
6673         return Character.toChars(cp);
6674     }
6675 
6676     /**
6677      * Equivalent to the {@link Character#getDirectionality(char)} method, for
6678      * convenience. Returns a byte representing the directionality of the
6679      * character.
6680      *
6681      * {@icunote} Unlike {@link Character#getDirectionality(char)}, this returns
6682      * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters.
6683      *
6684      * {@icunote} The return value must be tested using the constants defined in {@link
6685      * UCharacterDirection} and its interface {@link
6686      * UCharacterEnums.ECharacterDirection} since the values are different from the ones
6687      * defined by <code>java.lang.Character</code>.
6688      * @param cp the code point to check
6689      * @return the directionality of the code point
6690      * @see #getDirection
6691      * @stable ICU 3.0
6692      */
getDirectionality(int cp)6693     public static byte getDirectionality(int cp)
6694     {
6695         return (byte)getDirection(cp);
6696     }
6697 
6698     /**
6699      * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)}
6700      * method, for convenience.  Counts the number of code points in the range
6701      * of text.
6702      * @param text the characters to check
6703      * @param start the start of the range
6704      * @param limit the limit of the range
6705      * @return the number of code points in the range
6706      * @stable ICU 3.0
6707      */
codePointCount(CharSequence text, int start, int limit)6708     public static int codePointCount(CharSequence text, int start, int limit) {
6709         if (start < 0 || limit < start || limit > text.length()) {
6710             throw new IndexOutOfBoundsException("start (" + start +
6711                     ") or limit (" + limit +
6712                     ") invalid or out of range 0, " + text.length());
6713         }
6714 
6715         int len = limit - start;
6716         while (limit > start) {
6717             char ch = text.charAt(--limit);
6718             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
6719                 ch = text.charAt(--limit);
6720                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
6721                     --len;
6722                     break;
6723                 }
6724             }
6725         }
6726         return len;
6727     }
6728 
6729     /**
6730      * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for
6731      * convenience. Counts the number of code points in the range of text.
6732      * @param text the characters to check
6733      * @param start the start of the range
6734      * @param limit the limit of the range
6735      * @return the number of code points in the range
6736      * @stable ICU 3.0
6737      */
codePointCount(char[] text, int start, int limit)6738     public static int codePointCount(char[] text, int start, int limit) {
6739         if (start < 0 || limit < start || limit > text.length) {
6740             throw new IndexOutOfBoundsException("start (" + start +
6741                     ") or limit (" + limit +
6742                     ") invalid or out of range 0, " + text.length);
6743         }
6744 
6745         int len = limit - start;
6746         while (limit > start) {
6747             char ch = text[--limit];
6748             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
6749                 ch = text[--limit];
6750                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
6751                     --len;
6752                     break;
6753                 }
6754             }
6755         }
6756         return len;
6757     }
6758 
6759     /**
6760      * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)}
6761      * method, for convenience.  Adjusts the char index by a code point offset.
6762      * @param text the characters to check
6763      * @param index the index to adjust
6764      * @param codePointOffset the number of code points by which to offset the index
6765      * @return the adjusted index
6766      * @stable ICU 3.0
6767      */
offsetByCodePoints(CharSequence text, int index, int codePointOffset)6768     public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) {
6769         if (index < 0 || index > text.length()) {
6770             throw new IndexOutOfBoundsException("index ( " + index +
6771                     ") out of range 0, " + text.length());
6772         }
6773 
6774         if (codePointOffset < 0) {
6775             while (++codePointOffset <= 0) {
6776                 char ch = text.charAt(--index);
6777                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) {
6778                     ch = text.charAt(--index);
6779                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
6780                         if (++codePointOffset > 0) {
6781                             return index+1;
6782                         }
6783                     }
6784                 }
6785             }
6786         } else {
6787             int limit = text.length();
6788             while (--codePointOffset >= 0) {
6789                 char ch = text.charAt(index++);
6790                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
6791                     ch = text.charAt(index++);
6792                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
6793                         if (--codePointOffset < 0) {
6794                             return index-1;
6795                         }
6796                     }
6797                 }
6798             }
6799         }
6800 
6801         return index;
6802     }
6803 
6804     /**
6805      * Equivalent to the
6806      * {@link Character#offsetByCodePoints(char[], int, int, int, int)}
6807      * method, for convenience.  Adjusts the char index by a code point offset.
6808      * @param text the characters to check
6809      * @param start the start of the range to check
6810      * @param count the length of the range to check
6811      * @param index the index to adjust
6812      * @param codePointOffset the number of code points by which to offset the index
6813      * @return the adjusted index
6814      * @stable ICU 3.0
6815      */
offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset)6816     public static int offsetByCodePoints(char[] text, int start, int count, int index,
6817             int codePointOffset) {
6818         int limit = start + count;
6819         if (start < 0 || limit < start || limit > text.length || index < start || index > limit) {
6820             throw new IndexOutOfBoundsException("index ( " + index +
6821                     ") out of range " + start +
6822                     ", " + limit +
6823                     " in array 0, " + text.length);
6824         }
6825 
6826         if (codePointOffset < 0) {
6827             while (++codePointOffset <= 0) {
6828                 char ch = text[--index];
6829                 if (index < start) {
6830                     throw new IndexOutOfBoundsException("index ( " + index +
6831                             ") < start (" + start +
6832                             ")");
6833                 }
6834                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) {
6835                     ch = text[--index];
6836                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
6837                         if (++codePointOffset > 0) {
6838                             return index+1;
6839                         }
6840                     }
6841                 }
6842             }
6843         } else {
6844             while (--codePointOffset >= 0) {
6845                 char ch = text[index++];
6846                 if (index > limit) {
6847                     throw new IndexOutOfBoundsException("index ( " + index +
6848                             ") > limit (" + limit +
6849                             ")");
6850                 }
6851                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
6852                     ch = text[index++];
6853                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
6854                         if (--codePointOffset < 0) {
6855                             return index-1;
6856                         }
6857                     }
6858                 }
6859             }
6860         }
6861 
6862         return index;
6863     }
6864 
6865     // private variables -------------------------------------------------
6866 
6867     /**
6868      * To get the last character out from a data type
6869      */
6870     private static final int LAST_CHAR_MASK_ = 0xFFFF;
6871 
6872     //    /**
6873     //     * To get the last byte out from a data type
6874     //     */
6875     //    private static final int LAST_BYTE_MASK_ = 0xFF;
6876     //
6877     //    /**
6878     //     * Shift 16 bits
6879     //     */
6880     //    private static final int SHIFT_16_ = 16;
6881     //
6882     //    /**
6883     //     * Shift 24 bits
6884     //     */
6885     //    private static final int SHIFT_24_ = 24;
6886     //
6887     //    /**
6888     //     * Decimal radix
6889     //     */
6890     //    private static final int DECIMAL_RADIX_ = 10;
6891 
6892     /**
6893      * No break space code point
6894      */
6895     private static final int NO_BREAK_SPACE_ = 0xA0;
6896 
6897     /**
6898      * Figure space code point
6899      */
6900     private static final int FIGURE_SPACE_ = 0x2007;
6901 
6902     /**
6903      * Narrow no break space code point
6904      */
6905     private static final int NARROW_NO_BREAK_SPACE_ = 0x202F;
6906 
6907     /**
6908      * Ideographic number zero code point
6909      */
6910     private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007;
6911 
6912     /**
6913      * CJK Ideograph, First code point
6914      */
6915     private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00;
6916 
6917     /**
6918      * CJK Ideograph, Second code point
6919      */
6920     private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c;
6921 
6922     /**
6923      * CJK Ideograph, Third code point
6924      */
6925     private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09;
6926 
6927     /**
6928      * CJK Ideograph, Fourth code point
6929      */
6930     private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db;
6931 
6932     /**
6933      * CJK Ideograph, FIFTH code point
6934      */
6935     private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94;
6936 
6937     /**
6938      * CJK Ideograph, Sixth code point
6939      */
6940     private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d;
6941 
6942     /**
6943      * CJK Ideograph, Seventh code point
6944      */
6945     private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03;
6946 
6947     /**
6948      * CJK Ideograph, Eighth code point
6949      */
6950     private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b;
6951 
6952     /**
6953      * CJK Ideograph, Nineth code point
6954      */
6955     private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d;
6956 
6957     /**
6958      * Application Program command code point
6959      */
6960     private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F;
6961 
6962     /**
6963      * Unit separator code point
6964      */
6965     private static final int UNIT_SEPARATOR_ = 0x001F;
6966 
6967     /**
6968      * Delete code point
6969      */
6970     private static final int DELETE_ = 0x007F;
6971 
6972     /**
6973      * Han digit characters
6974      */
6975     private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_     = 0x96f6;
6976     private static final int CJK_IDEOGRAPH_COMPLEX_ONE_      = 0x58f9;
6977     private static final int CJK_IDEOGRAPH_COMPLEX_TWO_      = 0x8cb3;
6978     private static final int CJK_IDEOGRAPH_COMPLEX_THREE_    = 0x53c3;
6979     private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_     = 0x8086;
6980     private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_     = 0x4f0d;
6981     private static final int CJK_IDEOGRAPH_COMPLEX_SIX_      = 0x9678;
6982     private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_    = 0x67d2;
6983     private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_    = 0x634c;
6984     private static final int CJK_IDEOGRAPH_COMPLEX_NINE_     = 0x7396;
6985     private static final int CJK_IDEOGRAPH_TEN_              = 0x5341;
6986     private static final int CJK_IDEOGRAPH_COMPLEX_TEN_      = 0x62fe;
6987     private static final int CJK_IDEOGRAPH_HUNDRED_          = 0x767e;
6988     private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_  = 0x4f70;
6989     private static final int CJK_IDEOGRAPH_THOUSAND_         = 0x5343;
6990     private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf;
6991     private static final int CJK_IDEOGRAPH_TEN_THOUSAND_     = 0x824c;
6992     private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_  = 0x5104;
6993 
6994     // private constructor -----------------------------------------------
6995     ///CLOVER:OFF
6996     /**
6997      * Private constructor to prevent instantiation
6998      */
UCharacter()6999     private UCharacter()
7000     {
7001     }
7002     ///CLOVER:ON
7003 }
7004