• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /**
4  *******************************************************************************
5  * Copyright (C) 1996-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  */
9 
10 package com.ibm.icu.lang;
11 
12 import java.lang.ref.SoftReference;
13 import java.util.HashMap;
14 import java.util.Iterator;
15 import java.util.Locale;
16 import java.util.Map;
17 
18 import com.ibm.icu.impl.CaseMapImpl;
19 import com.ibm.icu.impl.IllegalIcuArgumentException;
20 import com.ibm.icu.impl.Trie2;
21 import com.ibm.icu.impl.UBiDiProps;
22 import com.ibm.icu.impl.UCaseProps;
23 import com.ibm.icu.impl.UCharacterName;
24 import com.ibm.icu.impl.UCharacterNameChoice;
25 import com.ibm.icu.impl.UCharacterProperty;
26 import com.ibm.icu.impl.UCharacterUtility;
27 import com.ibm.icu.impl.UPropertyAliases;
28 import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
29 import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection;
30 import com.ibm.icu.text.BreakIterator;
31 import com.ibm.icu.text.Normalizer2;
32 import com.ibm.icu.util.RangeValueIterator;
33 import com.ibm.icu.util.ULocale;
34 import com.ibm.icu.util.ValueIterator;
35 import com.ibm.icu.util.VersionInfo;
36 
37 /**
38  * {@icuenhanced java.lang.Character}.{@icu _usage_}
39  *
40  * <p>The UCharacter class provides extensions to the {@link java.lang.Character} class.
41  * These extensions provide support for more Unicode properties.
42  * Each ICU release supports the latest version of Unicode available at that time.
43  *
44  * <p>For some time before Java 5 added support for supplementary Unicode code points,
45  * The ICU UCharacter class and many other ICU classes already supported them.
46  * Some UCharacter methods and constants were widened slightly differently than
47  * how the Character class methods and constants were widened later.
48  * In particular, {@link Character#MAX_VALUE} is still a char with the value U+FFFF,
49  * while the {@link UCharacter#MAX_VALUE} is an int with the value U+10FFFF.
50  *
51  * <p>Code points are represented in these API using ints. While it would be
52  * more convenient in Java to have a separate primitive datatype for them,
53  * ints suffice in the meantime.
54  *
55  * <p>To use this class please add the jar file name icu4j.jar to the
56  * class path, since it contains data files which supply the information used
57  * by this file.<br>
58  * E.g. In Windows <br>
59  * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br>
60  * Otherwise, another method would be to copy the files uprops.dat and
61  * unames.icu from the icu4j source subdirectory
62  * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory
63  * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>.
64  *
65  * <p>Aside from the additions for UTF-16 support, and the updated Unicode
66  * properties, the main differences between UCharacter and Character are:
67  * <ul>
68  * <li> UCharacter is not designed to be a char wrapper and does not have
69  *      APIs to which involves management of that single char.<br>
70  *      These include:
71  *      <ul>
72  *        <li> char charValue(),
73  *        <li> int compareTo(java.lang.Character, java.lang.Character), etc.
74  *      </ul>
75  * <li> UCharacter does not include Character APIs that are deprecated, nor
76  *      does it include the Java-specific character information, such as
77  *      boolean isJavaIdentifierPart(char ch).
78  * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric
79  *      values '10' - '35'. UCharacter also does this in digit and
80  *      getNumericValue, to adhere to the java semantics of these
81  *      methods.  New methods unicodeDigit, and
82  *      getUnicodeNumericValue do not treat the above code points
83  *      as having numeric values.  This is a semantic change from ICU4J 1.3.1.
84  * </ul>
85  * <p>
86  * Further detail on differences can be determined using the program
87  *        <a href=
88  * "http://source.icu-project.org/repos/icu/icu4j/trunk/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java">
89  *        com.ibm.icu.dev.test.lang.UCharacterCompare</a>
90  * <p>
91  * In addition to Java compatibility functions, which calculate derived properties,
92  * this API provides low-level access to the Unicode Character Database.
93  * <p>
94  * Unicode assigns each code point (not just assigned character) values for
95  * many properties.
96  * Most of them are simple boolean flags, or constants from a small enumerated list.
97  * For some properties, values are strings or other relatively more complex types.
98  * <p>
99  * For more information see
100  * <a href="http://www.unicode/org/ucd/">"About the Unicode Character Database"</a>
101  * (http://www.unicode.org/ucd/)
102  * and the <a href="http://www.icu-project.org/userguide/properties.html">ICU
103  * User Guide chapter on Properties</a>
104  * (http://www.icu-project.org/userguide/properties.html).
105  * <p>
106  * There are also functions that provide easy migration from C/POSIX functions
107  * like isblank(). Their use is generally discouraged because the C/POSIX
108  * standards do not define their semantics beyond the ASCII range, which means
109  * that different implementations exhibit very different behavior.
110  * Instead, Unicode properties should be used directly.
111  * <p>
112  * There are also only a few, broad C/POSIX character classes, and they tend
113  * to be used for conflicting purposes. For example, the "isalpha()" class
114  * is sometimes used to determine word boundaries, while a more sophisticated
115  * approach would at least distinguish initial letters from continuation
116  * characters (the latter including combining marks).
117  * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
118  * Another example: There is no "istitle()" class for titlecase characters.
119  * <p>
120  * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
121  * ICU implements them according to the Standard Recommendations in
122  * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
123  * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
124  * <p>
125  * API access for C/POSIX character classes is as follows:
126  * <pre>{@code
127  * - alpha:     isUAlphabetic(c) or hasBinaryProperty(c, UProperty.ALPHABETIC)
128  * - lower:     isULowercase(c) or hasBinaryProperty(c, UProperty.LOWERCASE)
129  * - upper:     isUUppercase(c) or hasBinaryProperty(c, UProperty.UPPERCASE)
130  * - punct:     ((1<<getType(c)) & ((1<<DASH_PUNCTUATION)|(1<<START_PUNCTUATION)|
131  *               (1<<END_PUNCTUATION)|(1<<CONNECTOR_PUNCTUATION)|(1<<OTHER_PUNCTUATION)|
132  *               (1<<INITIAL_PUNCTUATION)|(1<<FINAL_PUNCTUATION)))!=0
133  * - digit:     isDigit(c) or getType(c)==DECIMAL_DIGIT_NUMBER
134  * - xdigit:    hasBinaryProperty(c, UProperty.POSIX_XDIGIT)
135  * - alnum:     hasBinaryProperty(c, UProperty.POSIX_ALNUM)
136  * - space:     isUWhiteSpace(c) or hasBinaryProperty(c, UProperty.WHITE_SPACE)
137  * - blank:     hasBinaryProperty(c, UProperty.POSIX_BLANK)
138  * - cntrl:     getType(c)==CONTROL
139  * - graph:     hasBinaryProperty(c, UProperty.POSIX_GRAPH)
140  * - print:     hasBinaryProperty(c, UProperty.POSIX_PRINT)}</pre>
141  * <p>
142  * The C/POSIX character classes are also available in UnicodeSet patterns,
143  * using patterns like [:graph:] or \p{graph}.
144  *
145  * <p>{@icunote} There are several ICU (and Java) whitespace functions.
146  * Comparison:<ul>
147  * <li> isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
148  *       most of general categories "Z" (separators) + most whitespace ISO controls
149  *       (including no-break spaces, but excluding IS1..IS4)
150  * <li> isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
151  * <li> isSpaceChar: just Z (including no-break spaces)</ul>
152  *
153  * <p>
154  * This class is not subclassable.
155  *
156  * @author Syn Wee Quek
157  * @stable ICU 2.1
158  * @see com.ibm.icu.lang.UCharacterEnums
159  */
160 
161 public final class UCharacter implements ECharacterCategory, ECharacterDirection
162 {
163     // public inner classes ----------------------------------------------
164 
165     /**
166      * {@icuenhanced java.lang.Character.UnicodeBlock}.{@icu _usage_}
167      *
168      * A family of character subsets representing the character blocks in the
169      * Unicode specification, generated from Unicode Data file Blocks.txt.
170      * Character blocks generally define characters used for a specific script
171      * or purpose. A character is contained by at most one Unicode block.
172      *
173      * {@icunote} All fields named XXX_ID are specific to ICU.
174      *
175      * @stable ICU 2.4
176      */
177     public static final class UnicodeBlock extends Character.Subset
178     {
179         // block id corresponding to icu4c -----------------------------------
180 
181         /**
182          * @stable ICU 2.4
183          */
184         public static final int INVALID_CODE_ID = -1;
185         /**
186          * @stable ICU 2.4
187          */
188         public static final int BASIC_LATIN_ID = 1;
189         /**
190          * @stable ICU 2.4
191          */
192         public static final int LATIN_1_SUPPLEMENT_ID = 2;
193         /**
194          * @stable ICU 2.4
195          */
196         public static final int LATIN_EXTENDED_A_ID = 3;
197         /**
198          * @stable ICU 2.4
199          */
200         public static final int LATIN_EXTENDED_B_ID = 4;
201         /**
202          * @stable ICU 2.4
203          */
204         public static final int IPA_EXTENSIONS_ID = 5;
205         /**
206          * @stable ICU 2.4
207          */
208         public static final int SPACING_MODIFIER_LETTERS_ID = 6;
209         /**
210          * @stable ICU 2.4
211          */
212         public static final int COMBINING_DIACRITICAL_MARKS_ID = 7;
213         /**
214          * Unicode 3.2 renames this block to "Greek and Coptic".
215          * @stable ICU 2.4
216          */
217         public static final int GREEK_ID = 8;
218         /**
219          * @stable ICU 2.4
220          */
221         public static final int CYRILLIC_ID = 9;
222         /**
223          * @stable ICU 2.4
224          */
225         public static final int ARMENIAN_ID = 10;
226         /**
227          * @stable ICU 2.4
228          */
229         public static final int HEBREW_ID = 11;
230         /**
231          * @stable ICU 2.4
232          */
233         public static final int ARABIC_ID = 12;
234         /**
235          * @stable ICU 2.4
236          */
237         public static final int SYRIAC_ID = 13;
238         /**
239          * @stable ICU 2.4
240          */
241         public static final int THAANA_ID = 14;
242         /**
243          * @stable ICU 2.4
244          */
245         public static final int DEVANAGARI_ID = 15;
246         /**
247          * @stable ICU 2.4
248          */
249         public static final int BENGALI_ID = 16;
250         /**
251          * @stable ICU 2.4
252          */
253         public static final int GURMUKHI_ID = 17;
254         /**
255          * @stable ICU 2.4
256          */
257         public static final int GUJARATI_ID = 18;
258         /**
259          * @stable ICU 2.4
260          */
261         public static final int ORIYA_ID = 19;
262         /**
263          * @stable ICU 2.4
264          */
265         public static final int TAMIL_ID = 20;
266         /**
267          * @stable ICU 2.4
268          */
269         public static final int TELUGU_ID = 21;
270         /**
271          * @stable ICU 2.4
272          */
273         public static final int KANNADA_ID = 22;
274         /**
275          * @stable ICU 2.4
276          */
277         public static final int MALAYALAM_ID = 23;
278         /**
279          * @stable ICU 2.4
280          */
281         public static final int SINHALA_ID = 24;
282         /**
283          * @stable ICU 2.4
284          */
285         public static final int THAI_ID = 25;
286         /**
287          * @stable ICU 2.4
288          */
289         public static final int LAO_ID = 26;
290         /**
291          * @stable ICU 2.4
292          */
293         public static final int TIBETAN_ID = 27;
294         /**
295          * @stable ICU 2.4
296          */
297         public static final int MYANMAR_ID = 28;
298         /**
299          * @stable ICU 2.4
300          */
301         public static final int GEORGIAN_ID = 29;
302         /**
303          * @stable ICU 2.4
304          */
305         public static final int HANGUL_JAMO_ID = 30;
306         /**
307          * @stable ICU 2.4
308          */
309         public static final int ETHIOPIC_ID = 31;
310         /**
311          * @stable ICU 2.4
312          */
313         public static final int CHEROKEE_ID = 32;
314         /**
315          * @stable ICU 2.4
316          */
317         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID = 33;
318         /**
319          * @stable ICU 2.4
320          */
321         public static final int OGHAM_ID = 34;
322         /**
323          * @stable ICU 2.4
324          */
325         public static final int RUNIC_ID = 35;
326         /**
327          * @stable ICU 2.4
328          */
329         public static final int KHMER_ID = 36;
330         /**
331          * @stable ICU 2.4
332          */
333         public static final int MONGOLIAN_ID = 37;
334         /**
335          * @stable ICU 2.4
336          */
337         public static final int LATIN_EXTENDED_ADDITIONAL_ID = 38;
338         /**
339          * @stable ICU 2.4
340          */
341         public static final int GREEK_EXTENDED_ID = 39;
342         /**
343          * @stable ICU 2.4
344          */
345         public static final int GENERAL_PUNCTUATION_ID = 40;
346         /**
347          * @stable ICU 2.4
348          */
349         public static final int SUPERSCRIPTS_AND_SUBSCRIPTS_ID = 41;
350         /**
351          * @stable ICU 2.4
352          */
353         public static final int CURRENCY_SYMBOLS_ID = 42;
354         /**
355          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
356          * Symbols".
357          * @stable ICU 2.4
358          */
359         public static final int COMBINING_MARKS_FOR_SYMBOLS_ID = 43;
360         /**
361          * @stable ICU 2.4
362          */
363         public static final int LETTERLIKE_SYMBOLS_ID = 44;
364         /**
365          * @stable ICU 2.4
366          */
367         public static final int NUMBER_FORMS_ID = 45;
368         /**
369          * @stable ICU 2.4
370          */
371         public static final int ARROWS_ID = 46;
372         /**
373          * @stable ICU 2.4
374          */
375         public static final int MATHEMATICAL_OPERATORS_ID = 47;
376         /**
377          * @stable ICU 2.4
378          */
379         public static final int MISCELLANEOUS_TECHNICAL_ID = 48;
380         /**
381          * @stable ICU 2.4
382          */
383         public static final int CONTROL_PICTURES_ID = 49;
384         /**
385          * @stable ICU 2.4
386          */
387         public static final int OPTICAL_CHARACTER_RECOGNITION_ID = 50;
388         /**
389          * @stable ICU 2.4
390          */
391         public static final int ENCLOSED_ALPHANUMERICS_ID = 51;
392         /**
393          * @stable ICU 2.4
394          */
395         public static final int BOX_DRAWING_ID = 52;
396         /**
397          * @stable ICU 2.4
398          */
399         public static final int BLOCK_ELEMENTS_ID = 53;
400         /**
401          * @stable ICU 2.4
402          */
403         public static final int GEOMETRIC_SHAPES_ID = 54;
404         /**
405          * @stable ICU 2.4
406          */
407         public static final int MISCELLANEOUS_SYMBOLS_ID = 55;
408         /**
409          * @stable ICU 2.4
410          */
411         public static final int DINGBATS_ID = 56;
412         /**
413          * @stable ICU 2.4
414          */
415         public static final int BRAILLE_PATTERNS_ID = 57;
416         /**
417          * @stable ICU 2.4
418          */
419         public static final int CJK_RADICALS_SUPPLEMENT_ID = 58;
420         /**
421          * @stable ICU 2.4
422          */
423         public static final int KANGXI_RADICALS_ID = 59;
424         /**
425          * @stable ICU 2.4
426          */
427         public static final int IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID = 60;
428         /**
429          * @stable ICU 2.4
430          */
431         public static final int CJK_SYMBOLS_AND_PUNCTUATION_ID = 61;
432         /**
433          * @stable ICU 2.4
434          */
435         public static final int HIRAGANA_ID = 62;
436         /**
437          * @stable ICU 2.4
438          */
439         public static final int KATAKANA_ID = 63;
440         /**
441          * @stable ICU 2.4
442          */
443         public static final int BOPOMOFO_ID = 64;
444         /**
445          * @stable ICU 2.4
446          */
447         public static final int HANGUL_COMPATIBILITY_JAMO_ID = 65;
448         /**
449          * @stable ICU 2.4
450          */
451         public static final int KANBUN_ID = 66;
452         /**
453          * @stable ICU 2.4
454          */
455         public static final int BOPOMOFO_EXTENDED_ID = 67;
456         /**
457          * @stable ICU 2.4
458          */
459         public static final int ENCLOSED_CJK_LETTERS_AND_MONTHS_ID = 68;
460         /**
461          * @stable ICU 2.4
462          */
463         public static final int CJK_COMPATIBILITY_ID = 69;
464         /**
465          * @stable ICU 2.4
466          */
467         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID = 70;
468         /**
469          * @stable ICU 2.4
470          */
471         public static final int CJK_UNIFIED_IDEOGRAPHS_ID = 71;
472         /**
473          * @stable ICU 2.4
474          */
475         public static final int YI_SYLLABLES_ID = 72;
476         /**
477          * @stable ICU 2.4
478          */
479         public static final int YI_RADICALS_ID = 73;
480         /**
481          * @stable ICU 2.4
482          */
483         public static final int HANGUL_SYLLABLES_ID = 74;
484         /**
485          * @stable ICU 2.4
486          */
487         public static final int HIGH_SURROGATES_ID = 75;
488         /**
489          * @stable ICU 2.4
490          */
491         public static final int HIGH_PRIVATE_USE_SURROGATES_ID = 76;
492         /**
493          * @stable ICU 2.4
494          */
495         public static final int LOW_SURROGATES_ID = 77;
496         /**
497          * Same as public static final int PRIVATE_USE.
498          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
499          * and multiple code point ranges had this block.
500          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
501          * and adds separate blocks for the supplementary PUAs.
502          * @stable ICU 2.4
503          */
504         public static final int PRIVATE_USE_AREA_ID = 78;
505         /**
506          * Same as public static final int PRIVATE_USE_AREA.
507          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
508          * and multiple code point ranges had this block.
509          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
510          * and adds separate blocks for the supplementary PUAs.
511          * @stable ICU 2.4
512          */
513         public static final int PRIVATE_USE_ID = PRIVATE_USE_AREA_ID;
514         /**
515          * @stable ICU 2.4
516          */
517         public static final int CJK_COMPATIBILITY_IDEOGRAPHS_ID = 79;
518         /**
519          * @stable ICU 2.4
520          */
521         public static final int ALPHABETIC_PRESENTATION_FORMS_ID = 80;
522         /**
523          * @stable ICU 2.4
524          */
525         public static final int ARABIC_PRESENTATION_FORMS_A_ID = 81;
526         /**
527          * @stable ICU 2.4
528          */
529         public static final int COMBINING_HALF_MARKS_ID = 82;
530         /**
531          * @stable ICU 2.4
532          */
533         public static final int CJK_COMPATIBILITY_FORMS_ID = 83;
534         /**
535          * @stable ICU 2.4
536          */
537         public static final int SMALL_FORM_VARIANTS_ID = 84;
538         /**
539          * @stable ICU 2.4
540          */
541         public static final int ARABIC_PRESENTATION_FORMS_B_ID = 85;
542         /**
543          * @stable ICU 2.4
544          */
545         public static final int SPECIALS_ID = 86;
546         /**
547          * @stable ICU 2.4
548          */
549         public static final int HALFWIDTH_AND_FULLWIDTH_FORMS_ID = 87;
550         /**
551          * @stable ICU 2.4
552          */
553         public static final int OLD_ITALIC_ID = 88;
554         /**
555          * @stable ICU 2.4
556          */
557         public static final int GOTHIC_ID = 89;
558         /**
559          * @stable ICU 2.4
560          */
561         public static final int DESERET_ID = 90;
562         /**
563          * @stable ICU 2.4
564          */
565         public static final int BYZANTINE_MUSICAL_SYMBOLS_ID = 91;
566         /**
567          * @stable ICU 2.4
568          */
569         public static final int MUSICAL_SYMBOLS_ID = 92;
570         /**
571          * @stable ICU 2.4
572          */
573         public static final int MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID = 93;
574         /**
575          * @stable ICU 2.4
576          */
577         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID = 94;
578         /**
579          * @stable ICU 2.4
580          */
581         public static final int
582         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID = 95;
583         /**
584          * @stable ICU 2.4
585          */
586         public static final int TAGS_ID = 96;
587 
588         // New blocks in Unicode 3.2
589 
590         /**
591          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
592          * @stable ICU 2.4
593          */
594         public static final int CYRILLIC_SUPPLEMENTARY_ID = 97;
595         /**
596          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
597          * @stable ICU 3.0
598          */
599 
600         public static final int CYRILLIC_SUPPLEMENT_ID = 97;
601         /**
602          * @stable ICU 2.4
603          */
604         public static final int TAGALOG_ID = 98;
605         /**
606          * @stable ICU 2.4
607          */
608         public static final int HANUNOO_ID = 99;
609         /**
610          * @stable ICU 2.4
611          */
612         public static final int BUHID_ID = 100;
613         /**
614          * @stable ICU 2.4
615          */
616         public static final int TAGBANWA_ID = 101;
617         /**
618          * @stable ICU 2.4
619          */
620         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID = 102;
621         /**
622          * @stable ICU 2.4
623          */
624         public static final int SUPPLEMENTAL_ARROWS_A_ID = 103;
625         /**
626          * @stable ICU 2.4
627          */
628         public static final int SUPPLEMENTAL_ARROWS_B_ID = 104;
629         /**
630          * @stable ICU 2.4
631          */
632         public static final int MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID = 105;
633         /**
634          * @stable ICU 2.4
635          */
636         public static final int SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID = 106;
637         /**
638          * @stable ICU 2.4
639          */
640         public static final int KATAKANA_PHONETIC_EXTENSIONS_ID = 107;
641         /**
642          * @stable ICU 2.4
643          */
644         public static final int VARIATION_SELECTORS_ID = 108;
645         /**
646          * @stable ICU 2.4
647          */
648         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID = 109;
649         /**
650          * @stable ICU 2.4
651          */
652         public static final int SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID = 110;
653 
654         /**
655          * @stable ICU 2.6
656          */
657         public static final int LIMBU_ID = 111; /*[1900]*/
658         /**
659          * @stable ICU 2.6
660          */
661         public static final int TAI_LE_ID = 112; /*[1950]*/
662         /**
663          * @stable ICU 2.6
664          */
665         public static final int KHMER_SYMBOLS_ID = 113; /*[19E0]*/
666         /**
667          * @stable ICU 2.6
668          */
669         public static final int PHONETIC_EXTENSIONS_ID = 114; /*[1D00]*/
670         /**
671          * @stable ICU 2.6
672          */
673         public static final int MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID = 115; /*[2B00]*/
674         /**
675          * @stable ICU 2.6
676          */
677         public static final int YIJING_HEXAGRAM_SYMBOLS_ID = 116; /*[4DC0]*/
678         /**
679          * @stable ICU 2.6
680          */
681         public static final int LINEAR_B_SYLLABARY_ID = 117; /*[10000]*/
682         /**
683          * @stable ICU 2.6
684          */
685         public static final int LINEAR_B_IDEOGRAMS_ID = 118; /*[10080]*/
686         /**
687          * @stable ICU 2.6
688          */
689         public static final int AEGEAN_NUMBERS_ID = 119; /*[10100]*/
690         /**
691          * @stable ICU 2.6
692          */
693         public static final int UGARITIC_ID = 120; /*[10380]*/
694         /**
695          * @stable ICU 2.6
696          */
697         public static final int SHAVIAN_ID = 121; /*[10450]*/
698         /**
699          * @stable ICU 2.6
700          */
701         public static final int OSMANYA_ID = 122; /*[10480]*/
702         /**
703          * @stable ICU 2.6
704          */
705         public static final int CYPRIOT_SYLLABARY_ID = 123; /*[10800]*/
706         /**
707          * @stable ICU 2.6
708          */
709         public static final int TAI_XUAN_JING_SYMBOLS_ID = 124; /*[1D300]*/
710         /**
711          * @stable ICU 2.6
712          */
713         public static final int VARIATION_SELECTORS_SUPPLEMENT_ID = 125; /*[E0100]*/
714 
715         /* New blocks in Unicode 4.1 */
716 
717         /**
718          * @stable ICU 3.4
719          */
720         public static final int ANCIENT_GREEK_MUSICAL_NOTATION_ID = 126; /*[1D200]*/
721 
722         /**
723          * @stable ICU 3.4
724          */
725         public static final int ANCIENT_GREEK_NUMBERS_ID = 127; /*[10140]*/
726 
727         /**
728          * @stable ICU 3.4
729          */
730         public static final int ARABIC_SUPPLEMENT_ID = 128; /*[0750]*/
731 
732         /**
733          * @stable ICU 3.4
734          */
735         public static final int BUGINESE_ID = 129; /*[1A00]*/
736 
737         /**
738          * @stable ICU 3.4
739          */
740         public static final int CJK_STROKES_ID = 130; /*[31C0]*/
741 
742         /**
743          * @stable ICU 3.4
744          */
745         public static final int COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID = 131; /*[1DC0]*/
746 
747         /**
748          * @stable ICU 3.4
749          */
750         public static final int COPTIC_ID = 132; /*[2C80]*/
751 
752         /**
753          * @stable ICU 3.4
754          */
755         public static final int ETHIOPIC_EXTENDED_ID = 133; /*[2D80]*/
756 
757         /**
758          * @stable ICU 3.4
759          */
760         public static final int ETHIOPIC_SUPPLEMENT_ID = 134; /*[1380]*/
761 
762         /**
763          * @stable ICU 3.4
764          */
765         public static final int GEORGIAN_SUPPLEMENT_ID = 135; /*[2D00]*/
766 
767         /**
768          * @stable ICU 3.4
769          */
770         public static final int GLAGOLITIC_ID = 136; /*[2C00]*/
771 
772         /**
773          * @stable ICU 3.4
774          */
775         public static final int KHAROSHTHI_ID = 137; /*[10A00]*/
776 
777         /**
778          * @stable ICU 3.4
779          */
780         public static final int MODIFIER_TONE_LETTERS_ID = 138; /*[A700]*/
781 
782         /**
783          * @stable ICU 3.4
784          */
785         public static final int NEW_TAI_LUE_ID = 139; /*[1980]*/
786 
787         /**
788          * @stable ICU 3.4
789          */
790         public static final int OLD_PERSIAN_ID = 140; /*[103A0]*/
791 
792         /**
793          * @stable ICU 3.4
794          */
795         public static final int PHONETIC_EXTENSIONS_SUPPLEMENT_ID = 141; /*[1D80]*/
796 
797         /**
798          * @stable ICU 3.4
799          */
800         public static final int SUPPLEMENTAL_PUNCTUATION_ID = 142; /*[2E00]*/
801 
802         /**
803          * @stable ICU 3.4
804          */
805         public static final int SYLOTI_NAGRI_ID = 143; /*[A800]*/
806 
807         /**
808          * @stable ICU 3.4
809          */
810         public static final int TIFINAGH_ID = 144; /*[2D30]*/
811 
812         /**
813          * @stable ICU 3.4
814          */
815         public static final int VERTICAL_FORMS_ID = 145; /*[FE10]*/
816 
817         /* New blocks in Unicode 5.0 */
818 
819         /**
820          * @stable ICU 3.6
821          */
822         public static final int NKO_ID = 146; /*[07C0]*/
823         /**
824          * @stable ICU 3.6
825          */
826         public static final int BALINESE_ID = 147; /*[1B00]*/
827         /**
828          * @stable ICU 3.6
829          */
830         public static final int LATIN_EXTENDED_C_ID = 148; /*[2C60]*/
831         /**
832          * @stable ICU 3.6
833          */
834         public static final int LATIN_EXTENDED_D_ID = 149; /*[A720]*/
835         /**
836          * @stable ICU 3.6
837          */
838         public static final int PHAGS_PA_ID = 150; /*[A840]*/
839         /**
840          * @stable ICU 3.6
841          */
842         public static final int PHOENICIAN_ID = 151; /*[10900]*/
843         /**
844          * @stable ICU 3.6
845          */
846         public static final int CUNEIFORM_ID = 152; /*[12000]*/
847         /**
848          * @stable ICU 3.6
849          */
850         public static final int CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID = 153; /*[12400]*/
851         /**
852          * @stable ICU 3.6
853          */
854         public static final int COUNTING_ROD_NUMERALS_ID = 154; /*[1D360]*/
855 
856         /**
857          * @stable ICU 4.0
858          */
859         public static final int SUNDANESE_ID = 155; /* [1B80] */
860 
861         /**
862          * @stable ICU 4.0
863          */
864         public static final int LEPCHA_ID = 156; /* [1C00] */
865 
866         /**
867          * @stable ICU 4.0
868          */
869         public static final int OL_CHIKI_ID = 157; /* [1C50] */
870 
871         /**
872          * @stable ICU 4.0
873          */
874         public static final int CYRILLIC_EXTENDED_A_ID = 158; /* [2DE0] */
875 
876         /**
877          * @stable ICU 4.0
878          */
879         public static final int VAI_ID = 159; /* [A500] */
880 
881         /**
882          * @stable ICU 4.0
883          */
884         public static final int CYRILLIC_EXTENDED_B_ID = 160; /* [A640] */
885 
886         /**
887          * @stable ICU 4.0
888          */
889         public static final int SAURASHTRA_ID = 161; /* [A880] */
890 
891         /**
892          * @stable ICU 4.0
893          */
894         public static final int KAYAH_LI_ID = 162; /* [A900] */
895 
896         /**
897          * @stable ICU 4.0
898          */
899         public static final int REJANG_ID = 163; /* [A930] */
900 
901         /**
902          * @stable ICU 4.0
903          */
904         public static final int CHAM_ID = 164; /* [AA00] */
905 
906         /**
907          * @stable ICU 4.0
908          */
909         public static final int ANCIENT_SYMBOLS_ID = 165; /* [10190] */
910 
911         /**
912          * @stable ICU 4.0
913          */
914         public static final int PHAISTOS_DISC_ID = 166; /* [101D0] */
915 
916         /**
917          * @stable ICU 4.0
918          */
919         public static final int LYCIAN_ID = 167; /* [10280] */
920 
921         /**
922          * @stable ICU 4.0
923          */
924         public static final int CARIAN_ID = 168; /* [102A0] */
925 
926         /**
927          * @stable ICU 4.0
928          */
929         public static final int LYDIAN_ID = 169; /* [10920] */
930 
931         /**
932          * @stable ICU 4.0
933          */
934         public static final int MAHJONG_TILES_ID = 170; /* [1F000] */
935 
936         /**
937          * @stable ICU 4.0
938          */
939         public static final int DOMINO_TILES_ID = 171; /* [1F030] */
940 
941         /* New blocks in Unicode 5.2 */
942 
943         /** @stable ICU 4.4 */
944         public static final int SAMARITAN_ID = 172; /*[0800]*/
945         /** @stable ICU 4.4 */
946         public static final int UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID = 173; /*[18B0]*/
947         /** @stable ICU 4.4 */
948         public static final int TAI_THAM_ID = 174; /*[1A20]*/
949         /** @stable ICU 4.4 */
950         public static final int VEDIC_EXTENSIONS_ID = 175; /*[1CD0]*/
951         /** @stable ICU 4.4 */
952         public static final int LISU_ID = 176; /*[A4D0]*/
953         /** @stable ICU 4.4 */
954         public static final int BAMUM_ID = 177; /*[A6A0]*/
955         /** @stable ICU 4.4 */
956         public static final int COMMON_INDIC_NUMBER_FORMS_ID = 178; /*[A830]*/
957         /** @stable ICU 4.4 */
958         public static final int DEVANAGARI_EXTENDED_ID = 179; /*[A8E0]*/
959         /** @stable ICU 4.4 */
960         public static final int HANGUL_JAMO_EXTENDED_A_ID = 180; /*[A960]*/
961         /** @stable ICU 4.4 */
962         public static final int JAVANESE_ID = 181; /*[A980]*/
963         /** @stable ICU 4.4 */
964         public static final int MYANMAR_EXTENDED_A_ID = 182; /*[AA60]*/
965         /** @stable ICU 4.4 */
966         public static final int TAI_VIET_ID = 183; /*[AA80]*/
967         /** @stable ICU 4.4 */
968         public static final int MEETEI_MAYEK_ID = 184; /*[ABC0]*/
969         /** @stable ICU 4.4 */
970         public static final int HANGUL_JAMO_EXTENDED_B_ID = 185; /*[D7B0]*/
971         /** @stable ICU 4.4 */
972         public static final int IMPERIAL_ARAMAIC_ID = 186; /*[10840]*/
973         /** @stable ICU 4.4 */
974         public static final int OLD_SOUTH_ARABIAN_ID = 187; /*[10A60]*/
975         /** @stable ICU 4.4 */
976         public static final int AVESTAN_ID = 188; /*[10B00]*/
977         /** @stable ICU 4.4 */
978         public static final int INSCRIPTIONAL_PARTHIAN_ID = 189; /*[10B40]*/
979         /** @stable ICU 4.4 */
980         public static final int INSCRIPTIONAL_PAHLAVI_ID = 190; /*[10B60]*/
981         /** @stable ICU 4.4 */
982         public static final int OLD_TURKIC_ID = 191; /*[10C00]*/
983         /** @stable ICU 4.4 */
984         public static final int RUMI_NUMERAL_SYMBOLS_ID = 192; /*[10E60]*/
985         /** @stable ICU 4.4 */
986         public static final int KAITHI_ID = 193; /*[11080]*/
987         /** @stable ICU 4.4 */
988         public static final int EGYPTIAN_HIEROGLYPHS_ID = 194; /*[13000]*/
989         /** @stable ICU 4.4 */
990         public static final int ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID = 195; /*[1F100]*/
991         /** @stable ICU 4.4 */
992         public static final int ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID = 196; /*[1F200]*/
993         /** @stable ICU 4.4 */
994         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID = 197; /*[2A700]*/
995 
996         /* New blocks in Unicode 6.0 */
997 
998         /** @stable ICU 4.6 */
999         public static final int MANDAIC_ID = 198; /*[0840]*/
1000         /** @stable ICU 4.6 */
1001         public static final int BATAK_ID = 199; /*[1BC0]*/
1002         /** @stable ICU 4.6 */
1003         public static final int ETHIOPIC_EXTENDED_A_ID = 200; /*[AB00]*/
1004         /** @stable ICU 4.6 */
1005         public static final int BRAHMI_ID = 201; /*[11000]*/
1006         /** @stable ICU 4.6 */
1007         public static final int BAMUM_SUPPLEMENT_ID = 202; /*[16800]*/
1008         /** @stable ICU 4.6 */
1009         public static final int KANA_SUPPLEMENT_ID = 203; /*[1B000]*/
1010         /** @stable ICU 4.6 */
1011         public static final int PLAYING_CARDS_ID = 204; /*[1F0A0]*/
1012         /** @stable ICU 4.6 */
1013         public static final int MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID = 205; /*[1F300]*/
1014         /** @stable ICU 4.6 */
1015         public static final int EMOTICONS_ID = 206; /*[1F600]*/
1016         /** @stable ICU 4.6 */
1017         public static final int TRANSPORT_AND_MAP_SYMBOLS_ID = 207; /*[1F680]*/
1018         /** @stable ICU 4.6 */
1019         public static final int ALCHEMICAL_SYMBOLS_ID = 208; /*[1F700]*/
1020         /** @stable ICU 4.6 */
1021         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID = 209; /*[2B740]*/
1022 
1023         /* New blocks in Unicode 6.1 */
1024 
1025         /** @stable ICU 49 */
1026         public static final int ARABIC_EXTENDED_A_ID = 210; /*[08A0]*/
1027         /** @stable ICU 49 */
1028         public static final int ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID = 211; /*[1EE00]*/
1029         /** @stable ICU 49 */
1030         public static final int CHAKMA_ID = 212; /*[11100]*/
1031         /** @stable ICU 49 */
1032         public static final int MEETEI_MAYEK_EXTENSIONS_ID = 213; /*[AAE0]*/
1033         /** @stable ICU 49 */
1034         public static final int MEROITIC_CURSIVE_ID = 214; /*[109A0]*/
1035         /** @stable ICU 49 */
1036         public static final int MEROITIC_HIEROGLYPHS_ID = 215; /*[10980]*/
1037         /** @stable ICU 49 */
1038         public static final int MIAO_ID = 216; /*[16F00]*/
1039         /** @stable ICU 49 */
1040         public static final int SHARADA_ID = 217; /*[11180]*/
1041         /** @stable ICU 49 */
1042         public static final int SORA_SOMPENG_ID = 218; /*[110D0]*/
1043         /** @stable ICU 49 */
1044         public static final int SUNDANESE_SUPPLEMENT_ID = 219; /*[1CC0]*/
1045         /** @stable ICU 49 */
1046         public static final int TAKRI_ID = 220; /*[11680]*/
1047 
1048         /* New blocks in Unicode 7.0 */
1049 
1050         /** @stable ICU 54 */
1051         public static final int BASSA_VAH_ID = 221; /*[16AD0]*/
1052         /** @stable ICU 54 */
1053         public static final int CAUCASIAN_ALBANIAN_ID = 222; /*[10530]*/
1054         /** @stable ICU 54 */
1055         public static final int COPTIC_EPACT_NUMBERS_ID = 223; /*[102E0]*/
1056         /** @stable ICU 54 */
1057         public static final int COMBINING_DIACRITICAL_MARKS_EXTENDED_ID = 224; /*[1AB0]*/
1058         /** @stable ICU 54 */
1059         public static final int DUPLOYAN_ID = 225; /*[1BC00]*/
1060         /** @stable ICU 54 */
1061         public static final int ELBASAN_ID = 226; /*[10500]*/
1062         /** @stable ICU 54 */
1063         public static final int GEOMETRIC_SHAPES_EXTENDED_ID = 227; /*[1F780]*/
1064         /** @stable ICU 54 */
1065         public static final int GRANTHA_ID = 228; /*[11300]*/
1066         /** @stable ICU 54 */
1067         public static final int KHOJKI_ID = 229; /*[11200]*/
1068         /** @stable ICU 54 */
1069         public static final int KHUDAWADI_ID = 230; /*[112B0]*/
1070         /** @stable ICU 54 */
1071         public static final int LATIN_EXTENDED_E_ID = 231; /*[AB30]*/
1072         /** @stable ICU 54 */
1073         public static final int LINEAR_A_ID = 232; /*[10600]*/
1074         /** @stable ICU 54 */
1075         public static final int MAHAJANI_ID = 233; /*[11150]*/
1076         /** @stable ICU 54 */
1077         public static final int MANICHAEAN_ID = 234; /*[10AC0]*/
1078         /** @stable ICU 54 */
1079         public static final int MENDE_KIKAKUI_ID = 235; /*[1E800]*/
1080         /** @stable ICU 54 */
1081         public static final int MODI_ID = 236; /*[11600]*/
1082         /** @stable ICU 54 */
1083         public static final int MRO_ID = 237; /*[16A40]*/
1084         /** @stable ICU 54 */
1085         public static final int MYANMAR_EXTENDED_B_ID = 238; /*[A9E0]*/
1086         /** @stable ICU 54 */
1087         public static final int NABATAEAN_ID = 239; /*[10880]*/
1088         /** @stable ICU 54 */
1089         public static final int OLD_NORTH_ARABIAN_ID = 240; /*[10A80]*/
1090         /** @stable ICU 54 */
1091         public static final int OLD_PERMIC_ID = 241; /*[10350]*/
1092         /** @stable ICU 54 */
1093         public static final int ORNAMENTAL_DINGBATS_ID = 242; /*[1F650]*/
1094         /** @stable ICU 54 */
1095         public static final int PAHAWH_HMONG_ID = 243; /*[16B00]*/
1096         /** @stable ICU 54 */
1097         public static final int PALMYRENE_ID = 244; /*[10860]*/
1098         /** @stable ICU 54 */
1099         public static final int PAU_CIN_HAU_ID = 245; /*[11AC0]*/
1100         /** @stable ICU 54 */
1101         public static final int PSALTER_PAHLAVI_ID = 246; /*[10B80]*/
1102         /** @stable ICU 54 */
1103         public static final int SHORTHAND_FORMAT_CONTROLS_ID = 247; /*[1BCA0]*/
1104         /** @stable ICU 54 */
1105         public static final int SIDDHAM_ID = 248; /*[11580]*/
1106         /** @stable ICU 54 */
1107         public static final int SINHALA_ARCHAIC_NUMBERS_ID = 249; /*[111E0]*/
1108         /** @stable ICU 54 */
1109         public static final int SUPPLEMENTAL_ARROWS_C_ID = 250; /*[1F800]*/
1110         /** @stable ICU 54 */
1111         public static final int TIRHUTA_ID = 251; /*[11480]*/
1112         /** @stable ICU 54 */
1113         public static final int WARANG_CITI_ID = 252; /*[118A0]*/
1114 
1115         /* New blocks in Unicode 8.0 */
1116 
1117         /** @stable ICU 56 */
1118         public static final int AHOM_ID = 253; /*[11700]*/
1119         /** @stable ICU 56 */
1120         public static final int ANATOLIAN_HIEROGLYPHS_ID = 254; /*[14400]*/
1121         /** @stable ICU 56 */
1122         public static final int CHEROKEE_SUPPLEMENT_ID = 255; /*[AB70]*/
1123         /** @stable ICU 56 */
1124         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID = 256; /*[2B820]*/
1125         /** @stable ICU 56 */
1126         public static final int EARLY_DYNASTIC_CUNEIFORM_ID = 257; /*[12480]*/
1127         /** @stable ICU 56 */
1128         public static final int HATRAN_ID = 258; /*[108E0]*/
1129         /** @stable ICU 56 */
1130         public static final int MULTANI_ID = 259; /*[11280]*/
1131         /** @stable ICU 56 */
1132         public static final int OLD_HUNGARIAN_ID = 260; /*[10C80]*/
1133         /** @stable ICU 56 */
1134         public static final int SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID = 261; /*[1F900]*/
1135         /** @stable ICU 56 */
1136         public static final int SUTTON_SIGNWRITING_ID = 262; /*[1D800]*/
1137 
1138         /* New blocks in Unicode 9.0 */
1139 
1140         /** @stable ICU 58 */
1141         public static final int ADLAM_ID = 263; /*[1E900]*/
1142         /** @stable ICU 58 */
1143         public static final int BHAIKSUKI_ID = 264; /*[11C00]*/
1144         /** @stable ICU 58 */
1145         public static final int CYRILLIC_EXTENDED_C_ID = 265; /*[1C80]*/
1146         /** @stable ICU 58 */
1147         public static final int GLAGOLITIC_SUPPLEMENT_ID = 266; /*[1E000]*/
1148         /** @stable ICU 58 */
1149         public static final int IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID = 267; /*[16FE0]*/
1150         /** @stable ICU 58 */
1151         public static final int MARCHEN_ID = 268; /*[11C70]*/
1152         /** @stable ICU 58 */
1153         public static final int MONGOLIAN_SUPPLEMENT_ID = 269; /*[11660]*/
1154         /** @stable ICU 58 */
1155         public static final int NEWA_ID = 270; /*[11400]*/
1156         /** @stable ICU 58 */
1157         public static final int OSAGE_ID = 271; /*[104B0]*/
1158         /** @stable ICU 58 */
1159         public static final int TANGUT_ID = 272; /*[17000]*/
1160         /** @stable ICU 58 */
1161         public static final int TANGUT_COMPONENTS_ID = 273; /*[18800]*/
1162 
1163         // New blocks in Unicode 10.0
1164 
1165         /** @stable ICU 60 */
1166         public static final int CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID = 274; /*[2CEB0]*/
1167         /** @stable ICU 60 */
1168         public static final int KANA_EXTENDED_A_ID = 275; /*[1B100]*/
1169         /** @stable ICU 60 */
1170         public static final int MASARAM_GONDI_ID = 276; /*[11D00]*/
1171         /** @stable ICU 60 */
1172         public static final int NUSHU_ID = 277; /*[1B170]*/
1173         /** @stable ICU 60 */
1174         public static final int SOYOMBO_ID = 278; /*[11A50]*/
1175         /** @stable ICU 60 */
1176         public static final int SYRIAC_SUPPLEMENT_ID = 279; /*[0860]*/
1177         /** @stable ICU 60 */
1178         public static final int ZANABAZAR_SQUARE_ID = 280; /*[11A00]*/
1179 
1180         // New blocks in Unicode 11.0
1181 
1182         /** @stable ICU 62 */
1183         public static final int CHESS_SYMBOLS_ID = 281; /*[1FA00]*/
1184         /** @stable ICU 62 */
1185         public static final int DOGRA_ID = 282; /*[11800]*/
1186         /** @stable ICU 62 */
1187         public static final int GEORGIAN_EXTENDED_ID = 283; /*[1C90]*/
1188         /** @stable ICU 62 */
1189         public static final int GUNJALA_GONDI_ID = 284; /*[11D60]*/
1190         /** @stable ICU 62 */
1191         public static final int HANIFI_ROHINGYA_ID = 285; /*[10D00]*/
1192         /** @stable ICU 62 */
1193         public static final int INDIC_SIYAQ_NUMBERS_ID = 286; /*[1EC70]*/
1194         /** @stable ICU 62 */
1195         public static final int MAKASAR_ID = 287; /*[11EE0]*/
1196         /** @stable ICU 62 */
1197         public static final int MAYAN_NUMERALS_ID = 288; /*[1D2E0]*/
1198         /** @stable ICU 62 */
1199         public static final int MEDEFAIDRIN_ID = 289; /*[16E40]*/
1200         /** @stable ICU 62 */
1201         public static final int OLD_SOGDIAN_ID = 290; /*[10F00]*/
1202         /** @stable ICU 62 */
1203         public static final int SOGDIAN_ID = 291; /*[10F30]*/
1204 
1205         /**
1206          * One more than the highest normal UnicodeBlock value.
1207          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BLOCK).
1208          *
1209          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
1210          */
1211         @Deprecated
1212         public static final int COUNT = 292;
1213 
1214         // blocks objects ---------------------------------------------------
1215 
1216         /**
1217          * Array of UnicodeBlocks, for easy access in getInstance(int)
1218          */
1219         private final static UnicodeBlock BLOCKS_[] = new UnicodeBlock[COUNT];
1220 
1221         /**
1222          * @stable ICU 2.6
1223          */
1224         public static final UnicodeBlock NO_BLOCK
1225         = new UnicodeBlock("NO_BLOCK", 0);
1226 
1227         /**
1228          * @stable ICU 2.4
1229          */
1230         public static final UnicodeBlock BASIC_LATIN
1231         = new UnicodeBlock("BASIC_LATIN", BASIC_LATIN_ID);
1232         /**
1233          * @stable ICU 2.4
1234          */
1235         public static final UnicodeBlock LATIN_1_SUPPLEMENT
1236         = new UnicodeBlock("LATIN_1_SUPPLEMENT", LATIN_1_SUPPLEMENT_ID);
1237         /**
1238          * @stable ICU 2.4
1239          */
1240         public static final UnicodeBlock LATIN_EXTENDED_A
1241         = new UnicodeBlock("LATIN_EXTENDED_A", LATIN_EXTENDED_A_ID);
1242         /**
1243          * @stable ICU 2.4
1244          */
1245         public static final UnicodeBlock LATIN_EXTENDED_B
1246         = new UnicodeBlock("LATIN_EXTENDED_B", LATIN_EXTENDED_B_ID);
1247         /**
1248          * @stable ICU 2.4
1249          */
1250         public static final UnicodeBlock IPA_EXTENSIONS
1251         = new UnicodeBlock("IPA_EXTENSIONS", IPA_EXTENSIONS_ID);
1252         /**
1253          * @stable ICU 2.4
1254          */
1255         public static final UnicodeBlock SPACING_MODIFIER_LETTERS
1256         = new UnicodeBlock("SPACING_MODIFIER_LETTERS", SPACING_MODIFIER_LETTERS_ID);
1257         /**
1258          * @stable ICU 2.4
1259          */
1260         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS
1261         = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS", COMBINING_DIACRITICAL_MARKS_ID);
1262         /**
1263          * Unicode 3.2 renames this block to "Greek and Coptic".
1264          * @stable ICU 2.4
1265          */
1266         public static final UnicodeBlock GREEK
1267         = new UnicodeBlock("GREEK", GREEK_ID);
1268         /**
1269          * @stable ICU 2.4
1270          */
1271         public static final UnicodeBlock CYRILLIC
1272         = new UnicodeBlock("CYRILLIC", CYRILLIC_ID);
1273         /**
1274          * @stable ICU 2.4
1275          */
1276         public static final UnicodeBlock ARMENIAN
1277         = new UnicodeBlock("ARMENIAN", ARMENIAN_ID);
1278         /**
1279          * @stable ICU 2.4
1280          */
1281         public static final UnicodeBlock HEBREW
1282         = new UnicodeBlock("HEBREW", HEBREW_ID);
1283         /**
1284          * @stable ICU 2.4
1285          */
1286         public static final UnicodeBlock ARABIC
1287         = new UnicodeBlock("ARABIC", ARABIC_ID);
1288         /**
1289          * @stable ICU 2.4
1290          */
1291         public static final UnicodeBlock SYRIAC
1292         = new UnicodeBlock("SYRIAC", SYRIAC_ID);
1293         /**
1294          * @stable ICU 2.4
1295          */
1296         public static final UnicodeBlock THAANA
1297         = new UnicodeBlock("THAANA", THAANA_ID);
1298         /**
1299          * @stable ICU 2.4
1300          */
1301         public static final UnicodeBlock DEVANAGARI
1302         = new UnicodeBlock("DEVANAGARI", DEVANAGARI_ID);
1303         /**
1304          * @stable ICU 2.4
1305          */
1306         public static final UnicodeBlock BENGALI
1307         = new UnicodeBlock("BENGALI", BENGALI_ID);
1308         /**
1309          * @stable ICU 2.4
1310          */
1311         public static final UnicodeBlock GURMUKHI
1312         = new UnicodeBlock("GURMUKHI", GURMUKHI_ID);
1313         /**
1314          * @stable ICU 2.4
1315          */
1316         public static final UnicodeBlock GUJARATI
1317         = new UnicodeBlock("GUJARATI", GUJARATI_ID);
1318         /**
1319          * @stable ICU 2.4
1320          */
1321         public static final UnicodeBlock ORIYA
1322         = new UnicodeBlock("ORIYA", ORIYA_ID);
1323         /**
1324          * @stable ICU 2.4
1325          */
1326         public static final UnicodeBlock TAMIL
1327         = new UnicodeBlock("TAMIL", TAMIL_ID);
1328         /**
1329          * @stable ICU 2.4
1330          */
1331         public static final UnicodeBlock TELUGU
1332         = new UnicodeBlock("TELUGU", TELUGU_ID);
1333         /**
1334          * @stable ICU 2.4
1335          */
1336         public static final UnicodeBlock KANNADA
1337         = new UnicodeBlock("KANNADA", KANNADA_ID);
1338         /**
1339          * @stable ICU 2.4
1340          */
1341         public static final UnicodeBlock MALAYALAM
1342         = new UnicodeBlock("MALAYALAM", MALAYALAM_ID);
1343         /**
1344          * @stable ICU 2.4
1345          */
1346         public static final UnicodeBlock SINHALA
1347         = new UnicodeBlock("SINHALA", SINHALA_ID);
1348         /**
1349          * @stable ICU 2.4
1350          */
1351         public static final UnicodeBlock THAI
1352         = new UnicodeBlock("THAI", THAI_ID);
1353         /**
1354          * @stable ICU 2.4
1355          */
1356         public static final UnicodeBlock LAO
1357         = new UnicodeBlock("LAO", LAO_ID);
1358         /**
1359          * @stable ICU 2.4
1360          */
1361         public static final UnicodeBlock TIBETAN
1362         = new UnicodeBlock("TIBETAN", TIBETAN_ID);
1363         /**
1364          * @stable ICU 2.4
1365          */
1366         public static final UnicodeBlock MYANMAR
1367         = new UnicodeBlock("MYANMAR", MYANMAR_ID);
1368         /**
1369          * @stable ICU 2.4
1370          */
1371         public static final UnicodeBlock GEORGIAN
1372         = new UnicodeBlock("GEORGIAN", GEORGIAN_ID);
1373         /**
1374          * @stable ICU 2.4
1375          */
1376         public static final UnicodeBlock HANGUL_JAMO
1377         = new UnicodeBlock("HANGUL_JAMO", HANGUL_JAMO_ID);
1378         /**
1379          * @stable ICU 2.4
1380          */
1381         public static final UnicodeBlock ETHIOPIC
1382         = new UnicodeBlock("ETHIOPIC", ETHIOPIC_ID);
1383         /**
1384          * @stable ICU 2.4
1385          */
1386         public static final UnicodeBlock CHEROKEE
1387         = new UnicodeBlock("CHEROKEE", CHEROKEE_ID);
1388         /**
1389          * @stable ICU 2.4
1390          */
1391         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
1392         = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS",
1393                 UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_ID);
1394         /**
1395          * @stable ICU 2.4
1396          */
1397         public static final UnicodeBlock OGHAM
1398         = new UnicodeBlock("OGHAM", OGHAM_ID);
1399         /**
1400          * @stable ICU 2.4
1401          */
1402         public static final UnicodeBlock RUNIC
1403         = new UnicodeBlock("RUNIC", RUNIC_ID);
1404         /**
1405          * @stable ICU 2.4
1406          */
1407         public static final UnicodeBlock KHMER
1408         = new UnicodeBlock("KHMER", KHMER_ID);
1409         /**
1410          * @stable ICU 2.4
1411          */
1412         public static final UnicodeBlock MONGOLIAN
1413         = new UnicodeBlock("MONGOLIAN", MONGOLIAN_ID);
1414         /**
1415          * @stable ICU 2.4
1416          */
1417         public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL
1418         = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL", LATIN_EXTENDED_ADDITIONAL_ID);
1419         /**
1420          * @stable ICU 2.4
1421          */
1422         public static final UnicodeBlock GREEK_EXTENDED
1423         = new UnicodeBlock("GREEK_EXTENDED", GREEK_EXTENDED_ID);
1424         /**
1425          * @stable ICU 2.4
1426          */
1427         public static final UnicodeBlock GENERAL_PUNCTUATION
1428         = new UnicodeBlock("GENERAL_PUNCTUATION", GENERAL_PUNCTUATION_ID);
1429         /**
1430          * @stable ICU 2.4
1431          */
1432         public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS
1433         = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS", SUPERSCRIPTS_AND_SUBSCRIPTS_ID);
1434         /**
1435          * @stable ICU 2.4
1436          */
1437         public static final UnicodeBlock CURRENCY_SYMBOLS
1438         = new UnicodeBlock("CURRENCY_SYMBOLS", CURRENCY_SYMBOLS_ID);
1439         /**
1440          * Unicode 3.2 renames this block to "Combining Diacritical Marks for
1441          * Symbols".
1442          * @stable ICU 2.4
1443          */
1444         public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS
1445         = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS", COMBINING_MARKS_FOR_SYMBOLS_ID);
1446         /**
1447          * @stable ICU 2.4
1448          */
1449         public static final UnicodeBlock LETTERLIKE_SYMBOLS
1450         = new UnicodeBlock("LETTERLIKE_SYMBOLS", LETTERLIKE_SYMBOLS_ID);
1451         /**
1452          * @stable ICU 2.4
1453          */
1454         public static final UnicodeBlock NUMBER_FORMS
1455         = new UnicodeBlock("NUMBER_FORMS", NUMBER_FORMS_ID);
1456         /**
1457          * @stable ICU 2.4
1458          */
1459         public static final UnicodeBlock ARROWS
1460         = new UnicodeBlock("ARROWS", ARROWS_ID);
1461         /**
1462          * @stable ICU 2.4
1463          */
1464         public static final UnicodeBlock MATHEMATICAL_OPERATORS
1465         = new UnicodeBlock("MATHEMATICAL_OPERATORS", MATHEMATICAL_OPERATORS_ID);
1466         /**
1467          * @stable ICU 2.4
1468          */
1469         public static final UnicodeBlock MISCELLANEOUS_TECHNICAL
1470         = new UnicodeBlock("MISCELLANEOUS_TECHNICAL", MISCELLANEOUS_TECHNICAL_ID);
1471         /**
1472          * @stable ICU 2.4
1473          */
1474         public static final UnicodeBlock CONTROL_PICTURES
1475         = new UnicodeBlock("CONTROL_PICTURES", CONTROL_PICTURES_ID);
1476         /**
1477          * @stable ICU 2.4
1478          */
1479         public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION
1480         = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION", OPTICAL_CHARACTER_RECOGNITION_ID);
1481         /**
1482          * @stable ICU 2.4
1483          */
1484         public static final UnicodeBlock ENCLOSED_ALPHANUMERICS
1485         = new UnicodeBlock("ENCLOSED_ALPHANUMERICS", ENCLOSED_ALPHANUMERICS_ID);
1486         /**
1487          * @stable ICU 2.4
1488          */
1489         public static final UnicodeBlock BOX_DRAWING
1490         = new UnicodeBlock("BOX_DRAWING", BOX_DRAWING_ID);
1491         /**
1492          * @stable ICU 2.4
1493          */
1494         public static final UnicodeBlock BLOCK_ELEMENTS
1495         = new UnicodeBlock("BLOCK_ELEMENTS", BLOCK_ELEMENTS_ID);
1496         /**
1497          * @stable ICU 2.4
1498          */
1499         public static final UnicodeBlock GEOMETRIC_SHAPES
1500         = new UnicodeBlock("GEOMETRIC_SHAPES", GEOMETRIC_SHAPES_ID);
1501         /**
1502          * @stable ICU 2.4
1503          */
1504         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS
1505         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS", MISCELLANEOUS_SYMBOLS_ID);
1506         /**
1507          * @stable ICU 2.4
1508          */
1509         public static final UnicodeBlock DINGBATS
1510         = new UnicodeBlock("DINGBATS", DINGBATS_ID);
1511         /**
1512          * @stable ICU 2.4
1513          */
1514         public static final UnicodeBlock BRAILLE_PATTERNS
1515         = new UnicodeBlock("BRAILLE_PATTERNS", BRAILLE_PATTERNS_ID);
1516         /**
1517          * @stable ICU 2.4
1518          */
1519         public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT
1520         = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT", CJK_RADICALS_SUPPLEMENT_ID);
1521         /**
1522          * @stable ICU 2.4
1523          */
1524         public static final UnicodeBlock KANGXI_RADICALS
1525         = new UnicodeBlock("KANGXI_RADICALS", KANGXI_RADICALS_ID);
1526         /**
1527          * @stable ICU 2.4
1528          */
1529         public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS
1530         = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS",
1531                 IDEOGRAPHIC_DESCRIPTION_CHARACTERS_ID);
1532         /**
1533          * @stable ICU 2.4
1534          */
1535         public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION
1536         = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION", CJK_SYMBOLS_AND_PUNCTUATION_ID);
1537         /**
1538          * @stable ICU 2.4
1539          */
1540         public static final UnicodeBlock HIRAGANA
1541         = new UnicodeBlock("HIRAGANA", HIRAGANA_ID);
1542         /**
1543          * @stable ICU 2.4
1544          */
1545         public static final UnicodeBlock KATAKANA
1546         = new UnicodeBlock("KATAKANA", KATAKANA_ID);
1547         /**
1548          * @stable ICU 2.4
1549          */
1550         public static final UnicodeBlock BOPOMOFO
1551         = new UnicodeBlock("BOPOMOFO", BOPOMOFO_ID);
1552         /**
1553          * @stable ICU 2.4
1554          */
1555         public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO
1556         = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO", HANGUL_COMPATIBILITY_JAMO_ID);
1557         /**
1558          * @stable ICU 2.4
1559          */
1560         public static final UnicodeBlock KANBUN
1561         = new UnicodeBlock("KANBUN", KANBUN_ID);
1562         /**
1563          * @stable ICU 2.4
1564          */
1565         public static final UnicodeBlock BOPOMOFO_EXTENDED
1566         = new UnicodeBlock("BOPOMOFO_EXTENDED", BOPOMOFO_EXTENDED_ID);
1567         /**
1568          * @stable ICU 2.4
1569          */
1570         public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS
1571         = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS",
1572                 ENCLOSED_CJK_LETTERS_AND_MONTHS_ID);
1573         /**
1574          * @stable ICU 2.4
1575          */
1576         public static final UnicodeBlock CJK_COMPATIBILITY
1577         = new UnicodeBlock("CJK_COMPATIBILITY", CJK_COMPATIBILITY_ID);
1578         /**
1579          * @stable ICU 2.4
1580          */
1581         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
1582         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A",
1583                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A_ID);
1584         /**
1585          * @stable ICU 2.4
1586          */
1587         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS
1588         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS", CJK_UNIFIED_IDEOGRAPHS_ID);
1589         /**
1590          * @stable ICU 2.4
1591          */
1592         public static final UnicodeBlock YI_SYLLABLES
1593         = new UnicodeBlock("YI_SYLLABLES", YI_SYLLABLES_ID);
1594         /**
1595          * @stable ICU 2.4
1596          */
1597         public static final UnicodeBlock YI_RADICALS
1598         = new UnicodeBlock("YI_RADICALS", YI_RADICALS_ID);
1599         /**
1600          * @stable ICU 2.4
1601          */
1602         public static final UnicodeBlock HANGUL_SYLLABLES
1603         = new UnicodeBlock("HANGUL_SYLLABLES", HANGUL_SYLLABLES_ID);
1604         /**
1605          * @stable ICU 2.4
1606          */
1607         public static final UnicodeBlock HIGH_SURROGATES
1608         = new UnicodeBlock("HIGH_SURROGATES", HIGH_SURROGATES_ID);
1609         /**
1610          * @stable ICU 2.4
1611          */
1612         public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES
1613         = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES", HIGH_PRIVATE_USE_SURROGATES_ID);
1614         /**
1615          * @stable ICU 2.4
1616          */
1617         public static final UnicodeBlock LOW_SURROGATES
1618         = new UnicodeBlock("LOW_SURROGATES", LOW_SURROGATES_ID);
1619         /**
1620          * Same as public static final int PRIVATE_USE.
1621          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1622          * and multiple code point ranges had this block.
1623          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1624          * and adds separate blocks for the supplementary PUAs.
1625          * @stable ICU 2.4
1626          */
1627         public static final UnicodeBlock PRIVATE_USE_AREA
1628         = new UnicodeBlock("PRIVATE_USE_AREA",  78);
1629         /**
1630          * Same as public static final int PRIVATE_USE_AREA.
1631          * Until Unicode 3.1.1; the corresponding block name was "Private Use";
1632          * and multiple code point ranges had this block.
1633          * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area"
1634          * and adds separate blocks for the supplementary PUAs.
1635          * @stable ICU 2.4
1636          */
1637         public static final UnicodeBlock PRIVATE_USE
1638         = PRIVATE_USE_AREA;
1639         /**
1640          * @stable ICU 2.4
1641          */
1642         public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS
1643         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS", CJK_COMPATIBILITY_IDEOGRAPHS_ID);
1644         /**
1645          * @stable ICU 2.4
1646          */
1647         public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS
1648         = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS", ALPHABETIC_PRESENTATION_FORMS_ID);
1649         /**
1650          * @stable ICU 2.4
1651          */
1652         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A
1653         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A", ARABIC_PRESENTATION_FORMS_A_ID);
1654         /**
1655          * @stable ICU 2.4
1656          */
1657         public static final UnicodeBlock COMBINING_HALF_MARKS
1658         = new UnicodeBlock("COMBINING_HALF_MARKS", COMBINING_HALF_MARKS_ID);
1659         /**
1660          * @stable ICU 2.4
1661          */
1662         public static final UnicodeBlock CJK_COMPATIBILITY_FORMS
1663         = new UnicodeBlock("CJK_COMPATIBILITY_FORMS", CJK_COMPATIBILITY_FORMS_ID);
1664         /**
1665          * @stable ICU 2.4
1666          */
1667         public static final UnicodeBlock SMALL_FORM_VARIANTS
1668         = new UnicodeBlock("SMALL_FORM_VARIANTS", SMALL_FORM_VARIANTS_ID);
1669         /**
1670          * @stable ICU 2.4
1671          */
1672         public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B
1673         = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B", ARABIC_PRESENTATION_FORMS_B_ID);
1674         /**
1675          * @stable ICU 2.4
1676          */
1677         public static final UnicodeBlock SPECIALS
1678         = new UnicodeBlock("SPECIALS", SPECIALS_ID);
1679         /**
1680          * @stable ICU 2.4
1681          */
1682         public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS
1683         = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS", HALFWIDTH_AND_FULLWIDTH_FORMS_ID);
1684         /**
1685          * @stable ICU 2.4
1686          */
1687         public static final UnicodeBlock OLD_ITALIC
1688         = new UnicodeBlock("OLD_ITALIC", OLD_ITALIC_ID);
1689         /**
1690          * @stable ICU 2.4
1691          */
1692         public static final UnicodeBlock GOTHIC
1693         = new UnicodeBlock("GOTHIC", GOTHIC_ID);
1694         /**
1695          * @stable ICU 2.4
1696          */
1697         public static final UnicodeBlock DESERET
1698         = new UnicodeBlock("DESERET", DESERET_ID);
1699         /**
1700          * @stable ICU 2.4
1701          */
1702         public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS
1703         = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS", BYZANTINE_MUSICAL_SYMBOLS_ID);
1704         /**
1705          * @stable ICU 2.4
1706          */
1707         public static final UnicodeBlock MUSICAL_SYMBOLS
1708         = new UnicodeBlock("MUSICAL_SYMBOLS", MUSICAL_SYMBOLS_ID);
1709         /**
1710          * @stable ICU 2.4
1711          */
1712         public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS
1713         = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS",
1714                 MATHEMATICAL_ALPHANUMERIC_SYMBOLS_ID);
1715         /**
1716          * @stable ICU 2.4
1717          */
1718         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
1719         = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B",
1720                 CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B_ID);
1721         /**
1722          * @stable ICU 2.4
1723          */
1724         public static final UnicodeBlock
1725         CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
1726         = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT",
1727                 CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT_ID);
1728         /**
1729          * @stable ICU 2.4
1730          */
1731         public static final UnicodeBlock TAGS
1732         = new UnicodeBlock("TAGS", TAGS_ID);
1733 
1734         // New blocks in Unicode 3.2
1735 
1736         /**
1737          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1738          * @stable ICU 2.4
1739          */
1740         public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY
1741         = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY", CYRILLIC_SUPPLEMENTARY_ID);
1742         /**
1743          * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
1744          * @stable ICU 3.0
1745          */
1746         public static final UnicodeBlock CYRILLIC_SUPPLEMENT
1747         = new UnicodeBlock("CYRILLIC_SUPPLEMENT", CYRILLIC_SUPPLEMENT_ID);
1748         /**
1749          * @stable ICU 2.4
1750          */
1751         public static final UnicodeBlock TAGALOG
1752         = new UnicodeBlock("TAGALOG", TAGALOG_ID);
1753         /**
1754          * @stable ICU 2.4
1755          */
1756         public static final UnicodeBlock HANUNOO
1757         = new UnicodeBlock("HANUNOO", HANUNOO_ID);
1758         /**
1759          * @stable ICU 2.4
1760          */
1761         public static final UnicodeBlock BUHID
1762         = new UnicodeBlock("BUHID", BUHID_ID);
1763         /**
1764          * @stable ICU 2.4
1765          */
1766         public static final UnicodeBlock TAGBANWA
1767         = new UnicodeBlock("TAGBANWA", TAGBANWA_ID);
1768         /**
1769          * @stable ICU 2.4
1770          */
1771         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
1772         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A",
1773                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_ID);
1774         /**
1775          * @stable ICU 2.4
1776          */
1777         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A
1778         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A", SUPPLEMENTAL_ARROWS_A_ID);
1779         /**
1780          * @stable ICU 2.4
1781          */
1782         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B
1783         = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B", SUPPLEMENTAL_ARROWS_B_ID);
1784         /**
1785          * @stable ICU 2.4
1786          */
1787         public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
1788         = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B",
1789                 MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_ID);
1790         /**
1791          * @stable ICU 2.4
1792          */
1793         public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS
1794         = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS",
1795                 SUPPLEMENTAL_MATHEMATICAL_OPERATORS_ID);
1796         /**
1797          * @stable ICU 2.4
1798          */
1799         public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS
1800         = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS", KATAKANA_PHONETIC_EXTENSIONS_ID);
1801         /**
1802          * @stable ICU 2.4
1803          */
1804         public static final UnicodeBlock VARIATION_SELECTORS
1805         = new UnicodeBlock("VARIATION_SELECTORS", VARIATION_SELECTORS_ID);
1806         /**
1807          * @stable ICU 2.4
1808          */
1809         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A
1810         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A",
1811                 SUPPLEMENTARY_PRIVATE_USE_AREA_A_ID);
1812         /**
1813          * @stable ICU 2.4
1814          */
1815         public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B
1816         = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B",
1817                 SUPPLEMENTARY_PRIVATE_USE_AREA_B_ID);
1818 
1819         /**
1820          * @stable ICU 2.6
1821          */
1822         public static final UnicodeBlock LIMBU
1823         = new UnicodeBlock("LIMBU", LIMBU_ID);
1824         /**
1825          * @stable ICU 2.6
1826          */
1827         public static final UnicodeBlock TAI_LE
1828         = new UnicodeBlock("TAI_LE", TAI_LE_ID);
1829         /**
1830          * @stable ICU 2.6
1831          */
1832         public static final UnicodeBlock KHMER_SYMBOLS
1833         = new UnicodeBlock("KHMER_SYMBOLS", KHMER_SYMBOLS_ID);
1834 
1835         /**
1836          * @stable ICU 2.6
1837          */
1838         public static final UnicodeBlock PHONETIC_EXTENSIONS
1839         = new UnicodeBlock("PHONETIC_EXTENSIONS", PHONETIC_EXTENSIONS_ID);
1840 
1841         /**
1842          * @stable ICU 2.6
1843          */
1844         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS
1845         = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS",
1846                 MISCELLANEOUS_SYMBOLS_AND_ARROWS_ID);
1847         /**
1848          * @stable ICU 2.6
1849          */
1850         public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS
1851         = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS", YIJING_HEXAGRAM_SYMBOLS_ID);
1852         /**
1853          * @stable ICU 2.6
1854          */
1855         public static final UnicodeBlock LINEAR_B_SYLLABARY
1856         = new UnicodeBlock("LINEAR_B_SYLLABARY", LINEAR_B_SYLLABARY_ID);
1857         /**
1858          * @stable ICU 2.6
1859          */
1860         public static final UnicodeBlock LINEAR_B_IDEOGRAMS
1861         = new UnicodeBlock("LINEAR_B_IDEOGRAMS", LINEAR_B_IDEOGRAMS_ID);
1862         /**
1863          * @stable ICU 2.6
1864          */
1865         public static final UnicodeBlock AEGEAN_NUMBERS
1866         = new UnicodeBlock("AEGEAN_NUMBERS", AEGEAN_NUMBERS_ID);
1867         /**
1868          * @stable ICU 2.6
1869          */
1870         public static final UnicodeBlock UGARITIC
1871         = new UnicodeBlock("UGARITIC", UGARITIC_ID);
1872         /**
1873          * @stable ICU 2.6
1874          */
1875         public static final UnicodeBlock SHAVIAN
1876         = new UnicodeBlock("SHAVIAN", SHAVIAN_ID);
1877         /**
1878          * @stable ICU 2.6
1879          */
1880         public static final UnicodeBlock OSMANYA
1881         = new UnicodeBlock("OSMANYA", OSMANYA_ID);
1882         /**
1883          * @stable ICU 2.6
1884          */
1885         public static final UnicodeBlock CYPRIOT_SYLLABARY
1886         = new UnicodeBlock("CYPRIOT_SYLLABARY", CYPRIOT_SYLLABARY_ID);
1887         /**
1888          * @stable ICU 2.6
1889          */
1890         public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS
1891         = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS", TAI_XUAN_JING_SYMBOLS_ID);
1892 
1893         /**
1894          * @stable ICU 2.6
1895          */
1896         public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT
1897         = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT", VARIATION_SELECTORS_SUPPLEMENT_ID);
1898 
1899         /* New blocks in Unicode 4.1 */
1900 
1901         /**
1902          * @stable ICU 3.4
1903          */
1904         public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION =
1905                 new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION",
1906                         ANCIENT_GREEK_MUSICAL_NOTATION_ID); /*[1D200]*/
1907 
1908         /**
1909          * @stable ICU 3.4
1910          */
1911         public static final UnicodeBlock ANCIENT_GREEK_NUMBERS =
1912                 new UnicodeBlock("ANCIENT_GREEK_NUMBERS", ANCIENT_GREEK_NUMBERS_ID); /*[10140]*/
1913 
1914         /**
1915          * @stable ICU 3.4
1916          */
1917         public static final UnicodeBlock ARABIC_SUPPLEMENT =
1918                 new UnicodeBlock("ARABIC_SUPPLEMENT", ARABIC_SUPPLEMENT_ID); /*[0750]*/
1919 
1920         /**
1921          * @stable ICU 3.4
1922          */
1923         public static final UnicodeBlock BUGINESE =
1924                 new UnicodeBlock("BUGINESE", BUGINESE_ID); /*[1A00]*/
1925 
1926         /**
1927          * @stable ICU 3.4
1928          */
1929         public static final UnicodeBlock CJK_STROKES =
1930                 new UnicodeBlock("CJK_STROKES", CJK_STROKES_ID); /*[31C0]*/
1931 
1932         /**
1933          * @stable ICU 3.4
1934          */
1935         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT =
1936                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT",
1937                         COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_ID); /*[1DC0]*/
1938 
1939         /**
1940          * @stable ICU 3.4
1941          */
1942         public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC", COPTIC_ID); /*[2C80]*/
1943 
1944         /**
1945          * @stable ICU 3.4
1946          */
1947         public static final UnicodeBlock ETHIOPIC_EXTENDED =
1948                 new UnicodeBlock("ETHIOPIC_EXTENDED", ETHIOPIC_EXTENDED_ID); /*[2D80]*/
1949 
1950         /**
1951          * @stable ICU 3.4
1952          */
1953         public static final UnicodeBlock ETHIOPIC_SUPPLEMENT =
1954                 new UnicodeBlock("ETHIOPIC_SUPPLEMENT", ETHIOPIC_SUPPLEMENT_ID); /*[1380]*/
1955 
1956         /**
1957          * @stable ICU 3.4
1958          */
1959         public static final UnicodeBlock GEORGIAN_SUPPLEMENT =
1960                 new UnicodeBlock("GEORGIAN_SUPPLEMENT", GEORGIAN_SUPPLEMENT_ID); /*[2D00]*/
1961 
1962         /**
1963          * @stable ICU 3.4
1964          */
1965         public static final UnicodeBlock GLAGOLITIC =
1966                 new UnicodeBlock("GLAGOLITIC", GLAGOLITIC_ID); /*[2C00]*/
1967 
1968         /**
1969          * @stable ICU 3.4
1970          */
1971         public static final UnicodeBlock KHAROSHTHI =
1972                 new UnicodeBlock("KHAROSHTHI", KHAROSHTHI_ID); /*[10A00]*/
1973 
1974         /**
1975          * @stable ICU 3.4
1976          */
1977         public static final UnicodeBlock MODIFIER_TONE_LETTERS =
1978                 new UnicodeBlock("MODIFIER_TONE_LETTERS", MODIFIER_TONE_LETTERS_ID); /*[A700]*/
1979 
1980         /**
1981          * @stable ICU 3.4
1982          */
1983         public static final UnicodeBlock NEW_TAI_LUE =
1984                 new UnicodeBlock("NEW_TAI_LUE", NEW_TAI_LUE_ID); /*[1980]*/
1985 
1986         /**
1987          * @stable ICU 3.4
1988          */
1989         public static final UnicodeBlock OLD_PERSIAN =
1990                 new UnicodeBlock("OLD_PERSIAN", OLD_PERSIAN_ID); /*[103A0]*/
1991 
1992         /**
1993          * @stable ICU 3.4
1994          */
1995         public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT =
1996                 new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT",
1997                         PHONETIC_EXTENSIONS_SUPPLEMENT_ID); /*[1D80]*/
1998 
1999         /**
2000          * @stable ICU 3.4
2001          */
2002         public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION =
2003                 new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION", SUPPLEMENTAL_PUNCTUATION_ID); /*[2E00]*/
2004 
2005         /**
2006          * @stable ICU 3.4
2007          */
2008         public static final UnicodeBlock SYLOTI_NAGRI =
2009                 new UnicodeBlock("SYLOTI_NAGRI", SYLOTI_NAGRI_ID); /*[A800]*/
2010 
2011         /**
2012          * @stable ICU 3.4
2013          */
2014         public static final UnicodeBlock TIFINAGH =
2015                 new UnicodeBlock("TIFINAGH", TIFINAGH_ID); /*[2D30]*/
2016 
2017         /**
2018          * @stable ICU 3.4
2019          */
2020         public static final UnicodeBlock VERTICAL_FORMS =
2021                 new UnicodeBlock("VERTICAL_FORMS", VERTICAL_FORMS_ID); /*[FE10]*/
2022 
2023         /**
2024          * @stable ICU 3.6
2025          */
2026         public static final UnicodeBlock NKO = new UnicodeBlock("NKO", NKO_ID); /*[07C0]*/
2027         /**
2028          * @stable ICU 3.6
2029          */
2030         public static final UnicodeBlock BALINESE =
2031                 new UnicodeBlock("BALINESE", BALINESE_ID); /*[1B00]*/
2032         /**
2033          * @stable ICU 3.6
2034          */
2035         public static final UnicodeBlock LATIN_EXTENDED_C =
2036                 new UnicodeBlock("LATIN_EXTENDED_C", LATIN_EXTENDED_C_ID); /*[2C60]*/
2037         /**
2038          * @stable ICU 3.6
2039          */
2040         public static final UnicodeBlock LATIN_EXTENDED_D =
2041                 new UnicodeBlock("LATIN_EXTENDED_D", LATIN_EXTENDED_D_ID); /*[A720]*/
2042         /**
2043          * @stable ICU 3.6
2044          */
2045         public static final UnicodeBlock PHAGS_PA =
2046                 new UnicodeBlock("PHAGS_PA", PHAGS_PA_ID); /*[A840]*/
2047         /**
2048          * @stable ICU 3.6
2049          */
2050         public static final UnicodeBlock PHOENICIAN =
2051                 new UnicodeBlock("PHOENICIAN", PHOENICIAN_ID); /*[10900]*/
2052         /**
2053          * @stable ICU 3.6
2054          */
2055         public static final UnicodeBlock CUNEIFORM =
2056                 new UnicodeBlock("CUNEIFORM", CUNEIFORM_ID); /*[12000]*/
2057         /**
2058          * @stable ICU 3.6
2059          */
2060         public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION =
2061                 new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION",
2062                         CUNEIFORM_NUMBERS_AND_PUNCTUATION_ID); /*[12400]*/
2063         /**
2064          * @stable ICU 3.6
2065          */
2066         public static final UnicodeBlock COUNTING_ROD_NUMERALS =
2067                 new UnicodeBlock("COUNTING_ROD_NUMERALS", COUNTING_ROD_NUMERALS_ID); /*[1D360]*/
2068 
2069         /**
2070          * @stable ICU 4.0
2071          */
2072         public static final UnicodeBlock SUNDANESE =
2073                 new UnicodeBlock("SUNDANESE", SUNDANESE_ID); /* [1B80] */
2074 
2075         /**
2076          * @stable ICU 4.0
2077          */
2078         public static final UnicodeBlock LEPCHA =
2079                 new UnicodeBlock("LEPCHA", LEPCHA_ID); /* [1C00] */
2080 
2081         /**
2082          * @stable ICU 4.0
2083          */
2084         public static final UnicodeBlock OL_CHIKI =
2085                 new UnicodeBlock("OL_CHIKI", OL_CHIKI_ID); /* [1C50] */
2086 
2087         /**
2088          * @stable ICU 4.0
2089          */
2090         public static final UnicodeBlock CYRILLIC_EXTENDED_A =
2091                 new UnicodeBlock("CYRILLIC_EXTENDED_A", CYRILLIC_EXTENDED_A_ID); /* [2DE0] */
2092 
2093         /**
2094          * @stable ICU 4.0
2095          */
2096         public static final UnicodeBlock VAI = new UnicodeBlock("VAI", VAI_ID); /* [A500] */
2097 
2098         /**
2099          * @stable ICU 4.0
2100          */
2101         public static final UnicodeBlock CYRILLIC_EXTENDED_B =
2102                 new UnicodeBlock("CYRILLIC_EXTENDED_B", CYRILLIC_EXTENDED_B_ID); /* [A640] */
2103 
2104         /**
2105          * @stable ICU 4.0
2106          */
2107         public static final UnicodeBlock SAURASHTRA =
2108                 new UnicodeBlock("SAURASHTRA", SAURASHTRA_ID); /* [A880] */
2109 
2110         /**
2111          * @stable ICU 4.0
2112          */
2113         public static final UnicodeBlock KAYAH_LI =
2114                 new UnicodeBlock("KAYAH_LI", KAYAH_LI_ID); /* [A900] */
2115 
2116         /**
2117          * @stable ICU 4.0
2118          */
2119         public static final UnicodeBlock REJANG =
2120                 new UnicodeBlock("REJANG", REJANG_ID); /* [A930] */
2121 
2122         /**
2123          * @stable ICU 4.0
2124          */
2125         public static final UnicodeBlock CHAM =
2126                 new UnicodeBlock("CHAM", CHAM_ID); /* [AA00] */
2127 
2128         /**
2129          * @stable ICU 4.0
2130          */
2131         public static final UnicodeBlock ANCIENT_SYMBOLS =
2132                 new UnicodeBlock("ANCIENT_SYMBOLS", ANCIENT_SYMBOLS_ID); /* [10190] */
2133 
2134         /**
2135          * @stable ICU 4.0
2136          */
2137         public static final UnicodeBlock PHAISTOS_DISC =
2138                 new UnicodeBlock("PHAISTOS_DISC", PHAISTOS_DISC_ID); /* [101D0] */
2139 
2140         /**
2141          * @stable ICU 4.0
2142          */
2143         public static final UnicodeBlock LYCIAN =
2144                 new UnicodeBlock("LYCIAN", LYCIAN_ID); /* [10280] */
2145 
2146         /**
2147          * @stable ICU 4.0
2148          */
2149         public static final UnicodeBlock CARIAN =
2150                 new UnicodeBlock("CARIAN", CARIAN_ID); /* [102A0] */
2151 
2152         /**
2153          * @stable ICU 4.0
2154          */
2155         public static final UnicodeBlock LYDIAN =
2156                 new UnicodeBlock("LYDIAN", LYDIAN_ID); /* [10920] */
2157 
2158         /**
2159          * @stable ICU 4.0
2160          */
2161         public static final UnicodeBlock MAHJONG_TILES =
2162                 new UnicodeBlock("MAHJONG_TILES", MAHJONG_TILES_ID); /* [1F000] */
2163 
2164         /**
2165          * @stable ICU 4.0
2166          */
2167         public static final UnicodeBlock DOMINO_TILES =
2168                 new UnicodeBlock("DOMINO_TILES", DOMINO_TILES_ID); /* [1F030] */
2169 
2170         /* New blocks in Unicode 5.2 */
2171 
2172         /** @stable ICU 4.4 */
2173         public static final UnicodeBlock SAMARITAN =
2174                 new UnicodeBlock("SAMARITAN", SAMARITAN_ID); /*[0800]*/
2175         /** @stable ICU 4.4 */
2176         public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED =
2177                 new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED",
2178                         UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_ID); /*[18B0]*/
2179         /** @stable ICU 4.4 */
2180         public static final UnicodeBlock TAI_THAM =
2181                 new UnicodeBlock("TAI_THAM", TAI_THAM_ID); /*[1A20]*/
2182         /** @stable ICU 4.4 */
2183         public static final UnicodeBlock VEDIC_EXTENSIONS =
2184                 new UnicodeBlock("VEDIC_EXTENSIONS", VEDIC_EXTENSIONS_ID); /*[1CD0]*/
2185         /** @stable ICU 4.4 */
2186         public static final UnicodeBlock LISU =
2187                 new UnicodeBlock("LISU", LISU_ID); /*[A4D0]*/
2188         /** @stable ICU 4.4 */
2189         public static final UnicodeBlock BAMUM =
2190                 new UnicodeBlock("BAMUM", BAMUM_ID); /*[A6A0]*/
2191         /** @stable ICU 4.4 */
2192         public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS =
2193                 new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS", COMMON_INDIC_NUMBER_FORMS_ID); /*[A830]*/
2194         /** @stable ICU 4.4 */
2195         public static final UnicodeBlock DEVANAGARI_EXTENDED =
2196                 new UnicodeBlock("DEVANAGARI_EXTENDED", DEVANAGARI_EXTENDED_ID); /*[A8E0]*/
2197         /** @stable ICU 4.4 */
2198         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A =
2199                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_A", HANGUL_JAMO_EXTENDED_A_ID); /*[A960]*/
2200         /** @stable ICU 4.4 */
2201         public static final UnicodeBlock JAVANESE =
2202                 new UnicodeBlock("JAVANESE", JAVANESE_ID); /*[A980]*/
2203         /** @stable ICU 4.4 */
2204         public static final UnicodeBlock MYANMAR_EXTENDED_A =
2205                 new UnicodeBlock("MYANMAR_EXTENDED_A", MYANMAR_EXTENDED_A_ID); /*[AA60]*/
2206         /** @stable ICU 4.4 */
2207         public static final UnicodeBlock TAI_VIET =
2208                 new UnicodeBlock("TAI_VIET", TAI_VIET_ID); /*[AA80]*/
2209         /** @stable ICU 4.4 */
2210         public static final UnicodeBlock MEETEI_MAYEK =
2211                 new UnicodeBlock("MEETEI_MAYEK", MEETEI_MAYEK_ID); /*[ABC0]*/
2212         /** @stable ICU 4.4 */
2213         public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B =
2214                 new UnicodeBlock("HANGUL_JAMO_EXTENDED_B", HANGUL_JAMO_EXTENDED_B_ID); /*[D7B0]*/
2215         /** @stable ICU 4.4 */
2216         public static final UnicodeBlock IMPERIAL_ARAMAIC =
2217                 new UnicodeBlock("IMPERIAL_ARAMAIC", IMPERIAL_ARAMAIC_ID); /*[10840]*/
2218         /** @stable ICU 4.4 */
2219         public static final UnicodeBlock OLD_SOUTH_ARABIAN =
2220                 new UnicodeBlock("OLD_SOUTH_ARABIAN", OLD_SOUTH_ARABIAN_ID); /*[10A60]*/
2221         /** @stable ICU 4.4 */
2222         public static final UnicodeBlock AVESTAN =
2223                 new UnicodeBlock("AVESTAN", AVESTAN_ID); /*[10B00]*/
2224         /** @stable ICU 4.4 */
2225         public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN =
2226                 new UnicodeBlock("INSCRIPTIONAL_PARTHIAN", INSCRIPTIONAL_PARTHIAN_ID); /*[10B40]*/
2227         /** @stable ICU 4.4 */
2228         public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI =
2229                 new UnicodeBlock("INSCRIPTIONAL_PAHLAVI", INSCRIPTIONAL_PAHLAVI_ID); /*[10B60]*/
2230         /** @stable ICU 4.4 */
2231         public static final UnicodeBlock OLD_TURKIC =
2232                 new UnicodeBlock("OLD_TURKIC", OLD_TURKIC_ID); /*[10C00]*/
2233         /** @stable ICU 4.4 */
2234         public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS =
2235                 new UnicodeBlock("RUMI_NUMERAL_SYMBOLS", RUMI_NUMERAL_SYMBOLS_ID); /*[10E60]*/
2236         /** @stable ICU 4.4 */
2237         public static final UnicodeBlock KAITHI =
2238                 new UnicodeBlock("KAITHI", KAITHI_ID); /*[11080]*/
2239         /** @stable ICU 4.4 */
2240         public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS =
2241                 new UnicodeBlock("EGYPTIAN_HIEROGLYPHS", EGYPTIAN_HIEROGLYPHS_ID); /*[13000]*/
2242         /** @stable ICU 4.4 */
2243         public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT =
2244                 new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT",
2245                         ENCLOSED_ALPHANUMERIC_SUPPLEMENT_ID); /*[1F100]*/
2246         /** @stable ICU 4.4 */
2247         public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT =
2248                 new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT",
2249                         ENCLOSED_IDEOGRAPHIC_SUPPLEMENT_ID); /*[1F200]*/
2250         /** @stable ICU 4.4 */
2251         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C =
2252                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C",
2253                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C_ID); /*[2A700]*/
2254 
2255         /* New blocks in Unicode 6.0 */
2256 
2257         /** @stable ICU 4.6 */
2258         public static final UnicodeBlock MANDAIC =
2259                 new UnicodeBlock("MANDAIC", MANDAIC_ID); /*[0840]*/
2260         /** @stable ICU 4.6 */
2261         public static final UnicodeBlock BATAK =
2262                 new UnicodeBlock("BATAK", BATAK_ID); /*[1BC0]*/
2263         /** @stable ICU 4.6 */
2264         public static final UnicodeBlock ETHIOPIC_EXTENDED_A =
2265                 new UnicodeBlock("ETHIOPIC_EXTENDED_A", ETHIOPIC_EXTENDED_A_ID); /*[AB00]*/
2266         /** @stable ICU 4.6 */
2267         public static final UnicodeBlock BRAHMI =
2268                 new UnicodeBlock("BRAHMI", BRAHMI_ID); /*[11000]*/
2269         /** @stable ICU 4.6 */
2270         public static final UnicodeBlock BAMUM_SUPPLEMENT =
2271                 new UnicodeBlock("BAMUM_SUPPLEMENT", BAMUM_SUPPLEMENT_ID); /*[16800]*/
2272         /** @stable ICU 4.6 */
2273         public static final UnicodeBlock KANA_SUPPLEMENT =
2274                 new UnicodeBlock("KANA_SUPPLEMENT", KANA_SUPPLEMENT_ID); /*[1B000]*/
2275         /** @stable ICU 4.6 */
2276         public static final UnicodeBlock PLAYING_CARDS =
2277                 new UnicodeBlock("PLAYING_CARDS", PLAYING_CARDS_ID); /*[1F0A0]*/
2278         /** @stable ICU 4.6 */
2279         public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS =
2280                 new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS",
2281                         MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F300]*/
2282         /** @stable ICU 4.6 */
2283         public static final UnicodeBlock EMOTICONS =
2284                 new UnicodeBlock("EMOTICONS", EMOTICONS_ID); /*[1F600]*/
2285         /** @stable ICU 4.6 */
2286         public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS =
2287                 new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS", TRANSPORT_AND_MAP_SYMBOLS_ID); /*[1F680]*/
2288         /** @stable ICU 4.6 */
2289         public static final UnicodeBlock ALCHEMICAL_SYMBOLS =
2290                 new UnicodeBlock("ALCHEMICAL_SYMBOLS", ALCHEMICAL_SYMBOLS_ID); /*[1F700]*/
2291         /** @stable ICU 4.6 */
2292         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D =
2293                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D",
2294                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D_ID); /*[2B740]*/
2295 
2296         /* New blocks in Unicode 6.1 */
2297 
2298         /** @stable ICU 49 */
2299         public static final UnicodeBlock ARABIC_EXTENDED_A =
2300                 new UnicodeBlock("ARABIC_EXTENDED_A", ARABIC_EXTENDED_A_ID); /*[08A0]*/
2301         /** @stable ICU 49 */
2302         public static final UnicodeBlock ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS =
2303                 new UnicodeBlock("ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS", ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS_ID); /*[1EE00]*/
2304         /** @stable ICU 49 */
2305         public static final UnicodeBlock CHAKMA = new UnicodeBlock("CHAKMA", CHAKMA_ID); /*[11100]*/
2306         /** @stable ICU 49 */
2307         public static final UnicodeBlock MEETEI_MAYEK_EXTENSIONS =
2308                 new UnicodeBlock("MEETEI_MAYEK_EXTENSIONS", MEETEI_MAYEK_EXTENSIONS_ID); /*[AAE0]*/
2309         /** @stable ICU 49 */
2310         public static final UnicodeBlock MEROITIC_CURSIVE =
2311                 new UnicodeBlock("MEROITIC_CURSIVE", MEROITIC_CURSIVE_ID); /*[109A0]*/
2312         /** @stable ICU 49 */
2313         public static final UnicodeBlock MEROITIC_HIEROGLYPHS =
2314                 new UnicodeBlock("MEROITIC_HIEROGLYPHS", MEROITIC_HIEROGLYPHS_ID); /*[10980]*/
2315         /** @stable ICU 49 */
2316         public static final UnicodeBlock MIAO = new UnicodeBlock("MIAO", MIAO_ID); /*[16F00]*/
2317         /** @stable ICU 49 */
2318         public static final UnicodeBlock SHARADA = new UnicodeBlock("SHARADA", SHARADA_ID); /*[11180]*/
2319         /** @stable ICU 49 */
2320         public static final UnicodeBlock SORA_SOMPENG =
2321                 new UnicodeBlock("SORA_SOMPENG", SORA_SOMPENG_ID); /*[110D0]*/
2322         /** @stable ICU 49 */
2323         public static final UnicodeBlock SUNDANESE_SUPPLEMENT =
2324                 new UnicodeBlock("SUNDANESE_SUPPLEMENT", SUNDANESE_SUPPLEMENT_ID); /*[1CC0]*/
2325         /** @stable ICU 49 */
2326         public static final UnicodeBlock TAKRI = new UnicodeBlock("TAKRI", TAKRI_ID); /*[11680]*/
2327 
2328         /* New blocks in Unicode 7.0 */
2329 
2330         /** @stable ICU 54 */
2331         public static final UnicodeBlock BASSA_VAH = new UnicodeBlock("BASSA_VAH", BASSA_VAH_ID); /*[16AD0]*/
2332         /** @stable ICU 54 */
2333         public static final UnicodeBlock CAUCASIAN_ALBANIAN =
2334                 new UnicodeBlock("CAUCASIAN_ALBANIAN", CAUCASIAN_ALBANIAN_ID); /*[10530]*/
2335         /** @stable ICU 54 */
2336         public static final UnicodeBlock COPTIC_EPACT_NUMBERS =
2337                 new UnicodeBlock("COPTIC_EPACT_NUMBERS", COPTIC_EPACT_NUMBERS_ID); /*[102E0]*/
2338         /** @stable ICU 54 */
2339         public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_EXTENDED =
2340                 new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_EXTENDED", COMBINING_DIACRITICAL_MARKS_EXTENDED_ID); /*[1AB0]*/
2341         /** @stable ICU 54 */
2342         public static final UnicodeBlock DUPLOYAN = new UnicodeBlock("DUPLOYAN", DUPLOYAN_ID); /*[1BC00]*/
2343         /** @stable ICU 54 */
2344         public static final UnicodeBlock ELBASAN = new UnicodeBlock("ELBASAN", ELBASAN_ID); /*[10500]*/
2345         /** @stable ICU 54 */
2346         public static final UnicodeBlock GEOMETRIC_SHAPES_EXTENDED =
2347                 new UnicodeBlock("GEOMETRIC_SHAPES_EXTENDED", GEOMETRIC_SHAPES_EXTENDED_ID); /*[1F780]*/
2348         /** @stable ICU 54 */
2349         public static final UnicodeBlock GRANTHA = new UnicodeBlock("GRANTHA", GRANTHA_ID); /*[11300]*/
2350         /** @stable ICU 54 */
2351         public static final UnicodeBlock KHOJKI = new UnicodeBlock("KHOJKI", KHOJKI_ID); /*[11200]*/
2352         /** @stable ICU 54 */
2353         public static final UnicodeBlock KHUDAWADI = new UnicodeBlock("KHUDAWADI", KHUDAWADI_ID); /*[112B0]*/
2354         /** @stable ICU 54 */
2355         public static final UnicodeBlock LATIN_EXTENDED_E =
2356                 new UnicodeBlock("LATIN_EXTENDED_E", LATIN_EXTENDED_E_ID); /*[AB30]*/
2357         /** @stable ICU 54 */
2358         public static final UnicodeBlock LINEAR_A = new UnicodeBlock("LINEAR_A", LINEAR_A_ID); /*[10600]*/
2359         /** @stable ICU 54 */
2360         public static final UnicodeBlock MAHAJANI = new UnicodeBlock("MAHAJANI", MAHAJANI_ID); /*[11150]*/
2361         /** @stable ICU 54 */
2362         public static final UnicodeBlock MANICHAEAN = new UnicodeBlock("MANICHAEAN", MANICHAEAN_ID); /*[10AC0]*/
2363         /** @stable ICU 54 */
2364         public static final UnicodeBlock MENDE_KIKAKUI =
2365                 new UnicodeBlock("MENDE_KIKAKUI", MENDE_KIKAKUI_ID); /*[1E800]*/
2366         /** @stable ICU 54 */
2367         public static final UnicodeBlock MODI = new UnicodeBlock("MODI", MODI_ID); /*[11600]*/
2368         /** @stable ICU 54 */
2369         public static final UnicodeBlock MRO = new UnicodeBlock("MRO", MRO_ID); /*[16A40]*/
2370         /** @stable ICU 54 */
2371         public static final UnicodeBlock MYANMAR_EXTENDED_B =
2372                 new UnicodeBlock("MYANMAR_EXTENDED_B", MYANMAR_EXTENDED_B_ID); /*[A9E0]*/
2373         /** @stable ICU 54 */
2374         public static final UnicodeBlock NABATAEAN = new UnicodeBlock("NABATAEAN", NABATAEAN_ID); /*[10880]*/
2375         /** @stable ICU 54 */
2376         public static final UnicodeBlock OLD_NORTH_ARABIAN =
2377                 new UnicodeBlock("OLD_NORTH_ARABIAN", OLD_NORTH_ARABIAN_ID); /*[10A80]*/
2378         /** @stable ICU 54 */
2379         public static final UnicodeBlock OLD_PERMIC = new UnicodeBlock("OLD_PERMIC", OLD_PERMIC_ID); /*[10350]*/
2380         /** @stable ICU 54 */
2381         public static final UnicodeBlock ORNAMENTAL_DINGBATS =
2382                 new UnicodeBlock("ORNAMENTAL_DINGBATS", ORNAMENTAL_DINGBATS_ID); /*[1F650]*/
2383         /** @stable ICU 54 */
2384         public static final UnicodeBlock PAHAWH_HMONG = new UnicodeBlock("PAHAWH_HMONG", PAHAWH_HMONG_ID); /*[16B00]*/
2385         /** @stable ICU 54 */
2386         public static final UnicodeBlock PALMYRENE = new UnicodeBlock("PALMYRENE", PALMYRENE_ID); /*[10860]*/
2387         /** @stable ICU 54 */
2388         public static final UnicodeBlock PAU_CIN_HAU = new UnicodeBlock("PAU_CIN_HAU", PAU_CIN_HAU_ID); /*[11AC0]*/
2389         /** @stable ICU 54 */
2390         public static final UnicodeBlock PSALTER_PAHLAVI =
2391                 new UnicodeBlock("PSALTER_PAHLAVI", PSALTER_PAHLAVI_ID); /*[10B80]*/
2392         /** @stable ICU 54 */
2393         public static final UnicodeBlock SHORTHAND_FORMAT_CONTROLS =
2394                 new UnicodeBlock("SHORTHAND_FORMAT_CONTROLS", SHORTHAND_FORMAT_CONTROLS_ID); /*[1BCA0]*/
2395         /** @stable ICU 54 */
2396         public static final UnicodeBlock SIDDHAM = new UnicodeBlock("SIDDHAM", SIDDHAM_ID); /*[11580]*/
2397         /** @stable ICU 54 */
2398         public static final UnicodeBlock SINHALA_ARCHAIC_NUMBERS =
2399                 new UnicodeBlock("SINHALA_ARCHAIC_NUMBERS", SINHALA_ARCHAIC_NUMBERS_ID); /*[111E0]*/
2400         /** @stable ICU 54 */
2401         public static final UnicodeBlock SUPPLEMENTAL_ARROWS_C =
2402                 new UnicodeBlock("SUPPLEMENTAL_ARROWS_C", SUPPLEMENTAL_ARROWS_C_ID); /*[1F800]*/
2403         /** @stable ICU 54 */
2404         public static final UnicodeBlock TIRHUTA = new UnicodeBlock("TIRHUTA", TIRHUTA_ID); /*[11480]*/
2405         /** @stable ICU 54 */
2406         public static final UnicodeBlock WARANG_CITI = new UnicodeBlock("WARANG_CITI", WARANG_CITI_ID); /*[118A0]*/
2407 
2408         /* New blocks in Unicode 8.0 */
2409 
2410         /** @stable ICU 56 */
2411         public static final UnicodeBlock AHOM = new UnicodeBlock("AHOM", AHOM_ID); /*[11700]*/
2412         /** @stable ICU 56 */
2413         public static final UnicodeBlock ANATOLIAN_HIEROGLYPHS =
2414                 new UnicodeBlock("ANATOLIAN_HIEROGLYPHS", ANATOLIAN_HIEROGLYPHS_ID); /*[14400]*/
2415         /** @stable ICU 56 */
2416         public static final UnicodeBlock CHEROKEE_SUPPLEMENT =
2417                 new UnicodeBlock("CHEROKEE_SUPPLEMENT", CHEROKEE_SUPPLEMENT_ID); /*[AB70]*/
2418         /** @stable ICU 56 */
2419         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E =
2420                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E",
2421                         CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E_ID); /*[2B820]*/
2422         /** @stable ICU 56 */
2423         public static final UnicodeBlock EARLY_DYNASTIC_CUNEIFORM =
2424                 new UnicodeBlock("EARLY_DYNASTIC_CUNEIFORM", EARLY_DYNASTIC_CUNEIFORM_ID); /*[12480]*/
2425         /** @stable ICU 56 */
2426         public static final UnicodeBlock HATRAN = new UnicodeBlock("HATRAN", HATRAN_ID); /*[108E0]*/
2427         /** @stable ICU 56 */
2428         public static final UnicodeBlock MULTANI = new UnicodeBlock("MULTANI", MULTANI_ID); /*[11280]*/
2429         /** @stable ICU 56 */
2430         public static final UnicodeBlock OLD_HUNGARIAN =
2431                 new UnicodeBlock("OLD_HUNGARIAN", OLD_HUNGARIAN_ID); /*[10C80]*/
2432         /** @stable ICU 56 */
2433         public static final UnicodeBlock SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS =
2434                 new UnicodeBlock("SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS",
2435                         SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS_ID); /*[1F900]*/
2436         /** @stable ICU 56 */
2437         public static final UnicodeBlock SUTTON_SIGNWRITING =
2438                 new UnicodeBlock("SUTTON_SIGNWRITING", SUTTON_SIGNWRITING_ID); /*[1D800]*/
2439 
2440         /* New blocks in Unicode 9.0 */
2441 
2442         /** @stable ICU 58 */
2443         public static final UnicodeBlock ADLAM = new UnicodeBlock("ADLAM", ADLAM_ID); /*[1E900]*/
2444         /** @stable ICU 58 */
2445         public static final UnicodeBlock BHAIKSUKI = new UnicodeBlock("BHAIKSUKI", BHAIKSUKI_ID); /*[11C00]*/
2446         /** @stable ICU 58 */
2447         public static final UnicodeBlock CYRILLIC_EXTENDED_C =
2448                 new UnicodeBlock("CYRILLIC_EXTENDED_C", CYRILLIC_EXTENDED_C_ID); /*[1C80]*/
2449         /** @stable ICU 58 */
2450         public static final UnicodeBlock GLAGOLITIC_SUPPLEMENT =
2451                 new UnicodeBlock("GLAGOLITIC_SUPPLEMENT", GLAGOLITIC_SUPPLEMENT_ID); /*[1E000]*/
2452         /** @stable ICU 58 */
2453         public static final UnicodeBlock IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION =
2454                 new UnicodeBlock("IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION", IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION_ID); /*[16FE0]*/
2455         /** @stable ICU 58 */
2456         public static final UnicodeBlock MARCHEN = new UnicodeBlock("MARCHEN", MARCHEN_ID); /*[11C70]*/
2457         /** @stable ICU 58 */
2458         public static final UnicodeBlock MONGOLIAN_SUPPLEMENT =
2459                 new UnicodeBlock("MONGOLIAN_SUPPLEMENT", MONGOLIAN_SUPPLEMENT_ID); /*[11660]*/
2460         /** @stable ICU 58 */
2461         public static final UnicodeBlock NEWA = new UnicodeBlock("NEWA", NEWA_ID); /*[11400]*/
2462         /** @stable ICU 58 */
2463         public static final UnicodeBlock OSAGE = new UnicodeBlock("OSAGE", OSAGE_ID); /*[104B0]*/
2464         /** @stable ICU 58 */
2465         public static final UnicodeBlock TANGUT = new UnicodeBlock("TANGUT", TANGUT_ID); /*[17000]*/
2466         /** @stable ICU 58 */
2467         public static final UnicodeBlock TANGUT_COMPONENTS =
2468                 new UnicodeBlock("TANGUT_COMPONENTS", TANGUT_COMPONENTS_ID); /*[18800]*/
2469 
2470         // New blocks in Unicode 10.0
2471 
2472         /** @stable ICU 60 */
2473         public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F =
2474                 new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F", CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F_ID); /*[2CEB0]*/
2475         /** @stable ICU 60 */
2476         public static final UnicodeBlock KANA_EXTENDED_A =
2477                 new UnicodeBlock("KANA_EXTENDED_A", KANA_EXTENDED_A_ID); /*[1B100]*/
2478         /** @stable ICU 60 */
2479         public static final UnicodeBlock MASARAM_GONDI =
2480                 new UnicodeBlock("MASARAM_GONDI", MASARAM_GONDI_ID); /*[11D00]*/
2481         /** @stable ICU 60 */
2482         public static final UnicodeBlock NUSHU = new UnicodeBlock("NUSHU", NUSHU_ID); /*[1B170]*/
2483         /** @stable ICU 60 */
2484         public static final UnicodeBlock SOYOMBO = new UnicodeBlock("SOYOMBO", SOYOMBO_ID); /*[11A50]*/
2485         /** @stable ICU 60 */
2486         public static final UnicodeBlock SYRIAC_SUPPLEMENT =
2487                 new UnicodeBlock("SYRIAC_SUPPLEMENT", SYRIAC_SUPPLEMENT_ID); /*[0860]*/
2488         /** @stable ICU 60 */
2489         public static final UnicodeBlock ZANABAZAR_SQUARE =
2490                 new UnicodeBlock("ZANABAZAR_SQUARE", ZANABAZAR_SQUARE_ID); /*[11A00]*/
2491 
2492         // New blocks in Unicode 11.0
2493 
2494         /** @stable ICU 62 */
2495         public static final UnicodeBlock CHESS_SYMBOLS =
2496                 new UnicodeBlock("CHESS_SYMBOLS", CHESS_SYMBOLS_ID); /*[1FA00]*/
2497         /** @stable ICU 62 */
2498         public static final UnicodeBlock DOGRA = new UnicodeBlock("DOGRA", DOGRA_ID); /*[11800]*/
2499         /** @stable ICU 62 */
2500         public static final UnicodeBlock GEORGIAN_EXTENDED =
2501                 new UnicodeBlock("GEORGIAN_EXTENDED", GEORGIAN_EXTENDED_ID); /*[1C90]*/
2502         /** @stable ICU 62 */
2503         public static final UnicodeBlock GUNJALA_GONDI =
2504                 new UnicodeBlock("GUNJALA_GONDI", GUNJALA_GONDI_ID); /*[11D60]*/
2505         /** @stable ICU 62 */
2506         public static final UnicodeBlock HANIFI_ROHINGYA =
2507                 new UnicodeBlock("HANIFI_ROHINGYA", HANIFI_ROHINGYA_ID); /*[10D00]*/
2508         /** @stable ICU 62 */
2509         public static final UnicodeBlock INDIC_SIYAQ_NUMBERS =
2510                 new UnicodeBlock("INDIC_SIYAQ_NUMBERS", INDIC_SIYAQ_NUMBERS_ID); /*[1EC70]*/
2511         /** @stable ICU 62 */
2512         public static final UnicodeBlock MAKASAR = new UnicodeBlock("MAKASAR", MAKASAR_ID); /*[11EE0]*/
2513         /** @stable ICU 62 */
2514         public static final UnicodeBlock MAYAN_NUMERALS =
2515                 new UnicodeBlock("MAYAN_NUMERALS", MAYAN_NUMERALS_ID); /*[1D2E0]*/
2516         /** @stable ICU 62 */
2517         public static final UnicodeBlock MEDEFAIDRIN =
2518                 new UnicodeBlock("MEDEFAIDRIN", MEDEFAIDRIN_ID); /*[16E40]*/
2519         /** @stable ICU 62 */
2520         public static final UnicodeBlock OLD_SOGDIAN =
2521                 new UnicodeBlock("OLD_SOGDIAN", OLD_SOGDIAN_ID); /*[10F00]*/
2522         /** @stable ICU 62 */
2523         public static final UnicodeBlock SOGDIAN = new UnicodeBlock("SOGDIAN", SOGDIAN_ID); /*[10F30]*/
2524 
2525         /**
2526          * @stable ICU 2.4
2527          */
2528         public static final UnicodeBlock INVALID_CODE
2529         = new UnicodeBlock("INVALID_CODE", INVALID_CODE_ID);
2530 
2531         static {
2532             for (int blockId = 0; blockId < COUNT; ++blockId) {
2533                 if (BLOCKS_[blockId] == null) {
2534                     throw new java.lang.IllegalStateException(
2535                             "UnicodeBlock.BLOCKS_[" + blockId + "] not initialized");
2536                 }
2537             }
2538         }
2539 
2540         // public methods --------------------------------------------------
2541 
2542         /**
2543          * {@icu} Returns the only instance of the UnicodeBlock with the argument ID.
2544          * If no such ID exists, a INVALID_CODE UnicodeBlock will be returned.
2545          * @param id UnicodeBlock ID
2546          * @return the only instance of the UnicodeBlock with the argument ID
2547          *         if it exists, otherwise a INVALID_CODE UnicodeBlock will be
2548          *         returned.
2549          * @stable ICU 2.4
2550          */
getInstance(int id)2551         public static UnicodeBlock getInstance(int id)
2552         {
2553             if (id >= 0 && id < BLOCKS_.length) {
2554                 return BLOCKS_[id];
2555             }
2556             return INVALID_CODE;
2557         }
2558 
2559         /**
2560          * Returns the Unicode allocation block that contains the code point,
2561          * or null if the code point is not a member of a defined block.
2562          * @param ch code point to be tested
2563          * @return the Unicode allocation block that contains the code point
2564          * @stable ICU 2.4
2565          */
of(int ch)2566         public static UnicodeBlock of(int ch)
2567         {
2568             if (ch > MAX_VALUE) {
2569                 return INVALID_CODE;
2570             }
2571 
2572             return UnicodeBlock.getInstance(
2573                     UCharacterProperty.INSTANCE.getIntPropertyValue(ch, UProperty.BLOCK));
2574         }
2575 
2576         /**
2577          * Alternative to the {@link java.lang.Character.UnicodeBlock#forName(String)} method.
2578          * Returns the Unicode block with the given name. {@icunote} Unlike
2579          * {@link java.lang.Character.UnicodeBlock#forName(String)}, this only matches
2580          * against the official UCD name and the Java block name
2581          * (ignoring case).
2582          * @param blockName the name of the block to match
2583          * @return the UnicodeBlock with that name
2584          * @throws IllegalArgumentException if the blockName could not be matched
2585          * @stable ICU 3.0
2586          */
forName(String blockName)2587         public static final UnicodeBlock forName(String blockName) {
2588             Map<String, UnicodeBlock> m = null;
2589             if (mref != null) {
2590                 m = mref.get();
2591             }
2592             if (m == null) {
2593                 m = new HashMap<>(BLOCKS_.length);
2594                 for (int i = 0; i < BLOCKS_.length; ++i) {
2595                     UnicodeBlock b = BLOCKS_[i];
2596                     String name = trimBlockName(
2597                             getPropertyValueName(UProperty.BLOCK, b.getID(),
2598                                     UProperty.NameChoice.LONG));
2599                     m.put(name, b);
2600                 }
2601                 mref = new SoftReference<>(m);
2602             }
2603             UnicodeBlock b = m.get(trimBlockName(blockName));
2604             if (b == null) {
2605                 throw new IllegalArgumentException();
2606             }
2607             return b;
2608         }
2609         private static SoftReference<Map<String, UnicodeBlock>> mref;
2610 
trimBlockName(String name)2611         private static String trimBlockName(String name) {
2612             String upper = name.toUpperCase(Locale.ENGLISH);
2613             StringBuilder result = new StringBuilder(upper.length());
2614             for (int i = 0; i < upper.length(); i++) {
2615                 char c = upper.charAt(i);
2616                 if (c != ' ' && c != '_' && c != '-') {
2617                     result.append(c);
2618                 }
2619             }
2620             return result.toString();
2621         }
2622 
2623         /**
2624          * {icu} Returns the type ID of this Unicode block
2625          * @return integer type ID of this Unicode block
2626          * @stable ICU 2.4
2627          */
getID()2628         public int getID()
2629         {
2630             return m_id_;
2631         }
2632 
2633         // private data members ---------------------------------------------
2634 
2635         /**
2636          * Identification code for this UnicodeBlock
2637          */
2638         private int m_id_;
2639 
2640         // private constructor ----------------------------------------------
2641 
2642         /**
2643          * UnicodeBlock constructor
2644          * @param name name of this UnicodeBlock
2645          * @param id unique id of this UnicodeBlock
2646          * @exception NullPointerException if name is <code>null</code>
2647          */
UnicodeBlock(String name, int id)2648         private UnicodeBlock(String name, int id)
2649         {
2650             super(name);
2651             m_id_ = id;
2652             if (id >= 0) {
2653                 BLOCKS_[id] = this;
2654             }
2655         }
2656     }
2657 
2658     /**
2659      * East Asian Width constants.
2660      * @see UProperty#EAST_ASIAN_WIDTH
2661      * @see UCharacter#getIntPropertyValue
2662      * @stable ICU 2.4
2663      */
2664     public static interface EastAsianWidth
2665     {
2666         /**
2667          * @stable ICU 2.4
2668          */
2669         public static final int NEUTRAL = 0;
2670         /**
2671          * @stable ICU 2.4
2672          */
2673         public static final int AMBIGUOUS = 1;
2674         /**
2675          * @stable ICU 2.4
2676          */
2677         public static final int HALFWIDTH = 2;
2678         /**
2679          * @stable ICU 2.4
2680          */
2681         public static final int FULLWIDTH = 3;
2682         /**
2683          * @stable ICU 2.4
2684          */
2685         public static final int NARROW = 4;
2686         /**
2687          * @stable ICU 2.4
2688          */
2689         public static final int WIDE = 5;
2690         /**
2691          * One more than the highest normal EastAsianWidth value.
2692          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.EAST_ASIAN_WIDTH).
2693          *
2694          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2695          */
2696         @Deprecated
2697         public static final int COUNT = 6;
2698     }
2699 
2700     /**
2701      * Decomposition Type constants.
2702      * @see UProperty#DECOMPOSITION_TYPE
2703      * @stable ICU 2.4
2704      */
2705     public static interface DecompositionType
2706     {
2707         /**
2708          * @stable ICU 2.4
2709          */
2710         public static final int NONE = 0;
2711         /**
2712          * @stable ICU 2.4
2713          */
2714         public static final int CANONICAL = 1;
2715         /**
2716          * @stable ICU 2.4
2717          */
2718         public static final int COMPAT = 2;
2719         /**
2720          * @stable ICU 2.4
2721          */
2722         public static final int CIRCLE = 3;
2723         /**
2724          * @stable ICU 2.4
2725          */
2726         public static final int FINAL = 4;
2727         /**
2728          * @stable ICU 2.4
2729          */
2730         public static final int FONT = 5;
2731         /**
2732          * @stable ICU 2.4
2733          */
2734         public static final int FRACTION = 6;
2735         /**
2736          * @stable ICU 2.4
2737          */
2738         public static final int INITIAL = 7;
2739         /**
2740          * @stable ICU 2.4
2741          */
2742         public static final int ISOLATED = 8;
2743         /**
2744          * @stable ICU 2.4
2745          */
2746         public static final int MEDIAL = 9;
2747         /**
2748          * @stable ICU 2.4
2749          */
2750         public static final int NARROW = 10;
2751         /**
2752          * @stable ICU 2.4
2753          */
2754         public static final int NOBREAK = 11;
2755         /**
2756          * @stable ICU 2.4
2757          */
2758         public static final int SMALL = 12;
2759         /**
2760          * @stable ICU 2.4
2761          */
2762         public static final int SQUARE = 13;
2763         /**
2764          * @stable ICU 2.4
2765          */
2766         public static final int SUB = 14;
2767         /**
2768          * @stable ICU 2.4
2769          */
2770         public static final int SUPER = 15;
2771         /**
2772          * @stable ICU 2.4
2773          */
2774         public static final int VERTICAL = 16;
2775         /**
2776          * @stable ICU 2.4
2777          */
2778         public static final int WIDE = 17;
2779         /**
2780          * One more than the highest normal DecompositionType value.
2781          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.DECOMPOSITION_TYPE).
2782          *
2783          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2784          */
2785         @Deprecated
2786         public static final int COUNT = 18;
2787     }
2788 
2789     /**
2790      * Joining Type constants.
2791      * @see UProperty#JOINING_TYPE
2792      * @stable ICU 2.4
2793      */
2794     public static interface JoiningType
2795     {
2796         /**
2797          * @stable ICU 2.4
2798          */
2799         public static final int NON_JOINING = 0;
2800         /**
2801          * @stable ICU 2.4
2802          */
2803         public static final int JOIN_CAUSING = 1;
2804         /**
2805          * @stable ICU 2.4
2806          */
2807         public static final int DUAL_JOINING = 2;
2808         /**
2809          * @stable ICU 2.4
2810          */
2811         public static final int LEFT_JOINING = 3;
2812         /**
2813          * @stable ICU 2.4
2814          */
2815         public static final int RIGHT_JOINING = 4;
2816         /**
2817          * @stable ICU 2.4
2818          */
2819         public static final int TRANSPARENT = 5;
2820         /**
2821          * One more than the highest normal JoiningType value.
2822          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JOINING_TYPE).
2823          *
2824          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
2825          */
2826         @Deprecated
2827         public static final int COUNT = 6;
2828     }
2829 
2830     /**
2831      * Joining Group constants.
2832      * @see UProperty#JOINING_GROUP
2833      * @stable ICU 2.4
2834      */
2835     public static interface JoiningGroup
2836     {
2837         /**
2838          * @stable ICU 2.4
2839          */
2840         public static final int NO_JOINING_GROUP = 0;
2841         /**
2842          * @stable ICU 2.4
2843          */
2844         public static final int AIN = 1;
2845         /**
2846          * @stable ICU 2.4
2847          */
2848         public static final int ALAPH = 2;
2849         /**
2850          * @stable ICU 2.4
2851          */
2852         public static final int ALEF = 3;
2853         /**
2854          * @stable ICU 2.4
2855          */
2856         public static final int BEH = 4;
2857         /**
2858          * @stable ICU 2.4
2859          */
2860         public static final int BETH = 5;
2861         /**
2862          * @stable ICU 2.4
2863          */
2864         public static final int DAL = 6;
2865         /**
2866          * @stable ICU 2.4
2867          */
2868         public static final int DALATH_RISH = 7;
2869         /**
2870          * @stable ICU 2.4
2871          */
2872         public static final int E = 8;
2873         /**
2874          * @stable ICU 2.4
2875          */
2876         public static final int FEH = 9;
2877         /**
2878          * @stable ICU 2.4
2879          */
2880         public static final int FINAL_SEMKATH = 10;
2881         /**
2882          * @stable ICU 2.4
2883          */
2884         public static final int GAF = 11;
2885         /**
2886          * @stable ICU 2.4
2887          */
2888         public static final int GAMAL = 12;
2889         /**
2890          * @stable ICU 2.4
2891          */
2892         public static final int HAH = 13;
2893         /** @stable ICU 4.6 */
2894         public static final int TEH_MARBUTA_GOAL = 14;
2895         /**
2896          * @stable ICU 2.4
2897          */
2898         public static final int HAMZA_ON_HEH_GOAL = TEH_MARBUTA_GOAL;
2899         /**
2900          * @stable ICU 2.4
2901          */
2902         public static final int HE = 15;
2903         /**
2904          * @stable ICU 2.4
2905          */
2906         public static final int HEH = 16;
2907         /**
2908          * @stable ICU 2.4
2909          */
2910         public static final int HEH_GOAL = 17;
2911         /**
2912          * @stable ICU 2.4
2913          */
2914         public static final int HETH = 18;
2915         /**
2916          * @stable ICU 2.4
2917          */
2918         public static final int KAF = 19;
2919         /**
2920          * @stable ICU 2.4
2921          */
2922         public static final int KAPH = 20;
2923         /**
2924          * @stable ICU 2.4
2925          */
2926         public static final int KNOTTED_HEH = 21;
2927         /**
2928          * @stable ICU 2.4
2929          */
2930         public static final int LAM = 22;
2931         /**
2932          * @stable ICU 2.4
2933          */
2934         public static final int LAMADH = 23;
2935         /**
2936          * @stable ICU 2.4
2937          */
2938         public static final int MEEM = 24;
2939         /**
2940          * @stable ICU 2.4
2941          */
2942         public static final int MIM = 25;
2943         /**
2944          * @stable ICU 2.4
2945          */
2946         public static final int NOON = 26;
2947         /**
2948          * @stable ICU 2.4
2949          */
2950         public static final int NUN = 27;
2951         /**
2952          * @stable ICU 2.4
2953          */
2954         public static final int PE = 28;
2955         /**
2956          * @stable ICU 2.4
2957          */
2958         public static final int QAF = 29;
2959         /**
2960          * @stable ICU 2.4
2961          */
2962         public static final int QAPH = 30;
2963         /**
2964          * @stable ICU 2.4
2965          */
2966         public static final int REH = 31;
2967         /**
2968          * @stable ICU 2.4
2969          */
2970         public static final int REVERSED_PE = 32;
2971         /**
2972          * @stable ICU 2.4
2973          */
2974         public static final int SAD = 33;
2975         /**
2976          * @stable ICU 2.4
2977          */
2978         public static final int SADHE = 34;
2979         /**
2980          * @stable ICU 2.4
2981          */
2982         public static final int SEEN = 35;
2983         /**
2984          * @stable ICU 2.4
2985          */
2986         public static final int SEMKATH = 36;
2987         /**
2988          * @stable ICU 2.4
2989          */
2990         public static final int SHIN = 37;
2991         /**
2992          * @stable ICU 2.4
2993          */
2994         public static final int SWASH_KAF = 38;
2995         /**
2996          * @stable ICU 2.4
2997          */
2998         public static final int SYRIAC_WAW = 39;
2999         /**
3000          * @stable ICU 2.4
3001          */
3002         public static final int TAH = 40;
3003         /**
3004          * @stable ICU 2.4
3005          */
3006         public static final int TAW = 41;
3007         /**
3008          * @stable ICU 2.4
3009          */
3010         public static final int TEH_MARBUTA = 42;
3011         /**
3012          * @stable ICU 2.4
3013          */
3014         public static final int TETH = 43;
3015         /**
3016          * @stable ICU 2.4
3017          */
3018         public static final int WAW = 44;
3019         /**
3020          * @stable ICU 2.4
3021          */
3022         public static final int YEH = 45;
3023         /**
3024          * @stable ICU 2.4
3025          */
3026         public static final int YEH_BARREE = 46;
3027         /**
3028          * @stable ICU 2.4
3029          */
3030         public static final int YEH_WITH_TAIL = 47;
3031         /**
3032          * @stable ICU 2.4
3033          */
3034         public static final int YUDH = 48;
3035         /**
3036          * @stable ICU 2.4
3037          */
3038         public static final int YUDH_HE = 49;
3039         /**
3040          * @stable ICU 2.4
3041          */
3042         public static final int ZAIN = 50;
3043         /**
3044          * @stable ICU 2.6
3045          */
3046         public static final int FE = 51;
3047         /**
3048          * @stable ICU 2.6
3049          */
3050         public static final int KHAPH = 52;
3051         /**
3052          * @stable ICU 2.6
3053          */
3054         public static final int ZHAIN = 53;
3055         /**
3056          * @stable ICU 4.0
3057          */
3058         public static final int BURUSHASKI_YEH_BARREE = 54;
3059         /** @stable ICU 4.4 */
3060         public static final int FARSI_YEH = 55;
3061         /** @stable ICU 4.4 */
3062         public static final int NYA = 56;
3063         /** @stable ICU 49 */
3064         public static final int ROHINGYA_YEH = 57;
3065 
3066         /** @stable ICU 54 */
3067         public static final int MANICHAEAN_ALEPH = 58;
3068         /** @stable ICU 54 */
3069         public static final int MANICHAEAN_AYIN = 59;
3070         /** @stable ICU 54 */
3071         public static final int MANICHAEAN_BETH = 60;
3072         /** @stable ICU 54 */
3073         public static final int MANICHAEAN_DALETH = 61;
3074         /** @stable ICU 54 */
3075         public static final int MANICHAEAN_DHAMEDH = 62;
3076         /** @stable ICU 54 */
3077         public static final int MANICHAEAN_FIVE = 63;
3078         /** @stable ICU 54 */
3079         public static final int MANICHAEAN_GIMEL = 64;
3080         /** @stable ICU 54 */
3081         public static final int MANICHAEAN_HETH = 65;
3082         /** @stable ICU 54 */
3083         public static final int MANICHAEAN_HUNDRED = 66;
3084         /** @stable ICU 54 */
3085         public static final int MANICHAEAN_KAPH = 67;
3086         /** @stable ICU 54 */
3087         public static final int MANICHAEAN_LAMEDH = 68;
3088         /** @stable ICU 54 */
3089         public static final int MANICHAEAN_MEM = 69;
3090         /** @stable ICU 54 */
3091         public static final int MANICHAEAN_NUN = 70;
3092         /** @stable ICU 54 */
3093         public static final int MANICHAEAN_ONE = 71;
3094         /** @stable ICU 54 */
3095         public static final int MANICHAEAN_PE = 72;
3096         /** @stable ICU 54 */
3097         public static final int MANICHAEAN_QOPH = 73;
3098         /** @stable ICU 54 */
3099         public static final int MANICHAEAN_RESH = 74;
3100         /** @stable ICU 54 */
3101         public static final int MANICHAEAN_SADHE = 75;
3102         /** @stable ICU 54 */
3103         public static final int MANICHAEAN_SAMEKH = 76;
3104         /** @stable ICU 54 */
3105         public static final int MANICHAEAN_TAW = 77;
3106         /** @stable ICU 54 */
3107         public static final int MANICHAEAN_TEN = 78;
3108         /** @stable ICU 54 */
3109         public static final int MANICHAEAN_TETH = 79;
3110         /** @stable ICU 54 */
3111         public static final int MANICHAEAN_THAMEDH = 80;
3112         /** @stable ICU 54 */
3113         public static final int MANICHAEAN_TWENTY = 81;
3114         /** @stable ICU 54 */
3115         public static final int MANICHAEAN_WAW = 82;
3116         /** @stable ICU 54 */
3117         public static final int MANICHAEAN_YODH = 83;
3118         /** @stable ICU 54 */
3119         public static final int MANICHAEAN_ZAYIN = 84;
3120         /** @stable ICU 54 */
3121         public static final int STRAIGHT_WAW = 85;
3122 
3123         /** @stable ICU 58 */
3124         public static final int AFRICAN_FEH = 86;
3125         /** @stable ICU 58 */
3126         public static final int AFRICAN_NOON = 87;
3127         /** @stable ICU 58 */
3128         public static final int AFRICAN_QAF = 88;
3129 
3130         /** @stable ICU 60 */
3131         public static final int MALAYALAM_BHA = 89;
3132         /** @stable ICU 60 */
3133         public static final int MALAYALAM_JA = 90;
3134         /** @stable ICU 60 */
3135         public static final int MALAYALAM_LLA = 91;
3136         /** @stable ICU 60 */
3137         public static final int MALAYALAM_LLLA = 92;
3138         /** @stable ICU 60 */
3139         public static final int MALAYALAM_NGA = 93;
3140         /** @stable ICU 60 */
3141         public static final int MALAYALAM_NNA = 94;
3142         /** @stable ICU 60 */
3143         public static final int MALAYALAM_NNNA = 95;
3144         /** @stable ICU 60 */
3145         public static final int MALAYALAM_NYA = 96;
3146         /** @stable ICU 60 */
3147         public static final int MALAYALAM_RA = 97;
3148         /** @stable ICU 60 */
3149         public static final int MALAYALAM_SSA = 98;
3150         /** @stable ICU 60 */
3151         public static final int MALAYALAM_TTA = 99;
3152 
3153         /** @stable ICU 62 */
3154         public static final int HANIFI_ROHINGYA_KINNA_YA = 100;
3155         /** @stable ICU 62 */
3156         public static final int HANIFI_ROHINGYA_PA = 101;
3157 
3158         /**
3159          * One more than the highest normal JoiningGroup value.
3160          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.JoiningGroup).
3161          *
3162          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3163          */
3164         @Deprecated
3165         public static final int COUNT = 102;
3166     }
3167 
3168     /**
3169      * Grapheme Cluster Break constants.
3170      * @see UProperty#GRAPHEME_CLUSTER_BREAK
3171      * @stable ICU 3.4
3172      */
3173     public static interface GraphemeClusterBreak {
3174         /**
3175          * @stable ICU 3.4
3176          */
3177         public static final int OTHER = 0;
3178         /**
3179          * @stable ICU 3.4
3180          */
3181         public static final int CONTROL = 1;
3182         /**
3183          * @stable ICU 3.4
3184          */
3185         public static final int CR = 2;
3186         /**
3187          * @stable ICU 3.4
3188          */
3189         public static final int EXTEND = 3;
3190         /**
3191          * @stable ICU 3.4
3192          */
3193         public static final int L = 4;
3194         /**
3195          * @stable ICU 3.4
3196          */
3197         public static final int LF = 5;
3198         /**
3199          * @stable ICU 3.4
3200          */
3201         public static final int LV = 6;
3202         /**
3203          * @stable ICU 3.4
3204          */
3205         public static final int LVT = 7;
3206         /**
3207          * @stable ICU 3.4
3208          */
3209         public static final int T = 8;
3210         /**
3211          * @stable ICU 3.4
3212          */
3213         public static final int V = 9;
3214         /**
3215          * @stable ICU 4.0
3216          */
3217         public static final int SPACING_MARK = 10;
3218         /**
3219          * @stable ICU 4.0
3220          */
3221         public static final int PREPEND = 11;
3222         /** @stable ICU 50 */
3223         public static final int REGIONAL_INDICATOR = 12;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
3224         /** @stable ICU 58 */
3225         public static final int E_BASE = 13;          /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
3226         /** @stable ICU 58 */
3227         public static final int E_BASE_GAZ = 14;      /*[EBG]*/
3228         /** @stable ICU 58 */
3229         public static final int E_MODIFIER = 15;      /*[EM]*/
3230         /** @stable ICU 58 */
3231         public static final int GLUE_AFTER_ZWJ = 16;  /*[GAZ]*/
3232         /** @stable ICU 58 */
3233         public static final int ZWJ = 17;             /*[ZWJ]*/
3234 
3235         /**
3236          * One more than the highest normal GraphemeClusterBreak value.
3237          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.GRAPHEME_CLUSTER_BREAK).
3238          *
3239          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3240          */
3241         @Deprecated
3242         public static final int COUNT = 18;
3243     }
3244 
3245     /**
3246      * Word Break constants.
3247      * @see UProperty#WORD_BREAK
3248      * @stable ICU 3.4
3249      */
3250     public static interface WordBreak {
3251         /**
3252          * @stable ICU 3.8
3253          */
3254         public static final int OTHER = 0;
3255         /**
3256          * @stable ICU 3.8
3257          */
3258         public static final int ALETTER = 1;
3259         /**
3260          * @stable ICU 3.8
3261          */
3262         public static final int FORMAT = 2;
3263         /**
3264          * @stable ICU 3.8
3265          */
3266         public static final int KATAKANA = 3;
3267         /**
3268          * @stable ICU 3.8
3269          */
3270         public static final int MIDLETTER = 4;
3271         /**
3272          * @stable ICU 3.8
3273          */
3274         public static final int MIDNUM = 5;
3275         /**
3276          * @stable ICU 3.8
3277          */
3278         public static final int NUMERIC = 6;
3279         /**
3280          * @stable ICU 3.8
3281          */
3282         public static final int EXTENDNUMLET = 7;
3283         /**
3284          * @stable ICU 4.0
3285          */
3286         public static final int CR = 8;
3287         /**
3288          * @stable ICU 4.0
3289          */
3290         public static final int EXTEND = 9;
3291         /**
3292          * @stable ICU 4.0
3293          */
3294         public static final int LF = 10;
3295         /**
3296          * @stable ICU 4.0
3297          */
3298         public static final int MIDNUMLET = 11;
3299         /**
3300          * @stable ICU 4.0
3301          */
3302         public static final int NEWLINE = 12;
3303         /** @stable ICU 50 */
3304         public static final int REGIONAL_INDICATOR = 13;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
3305         /** @stable ICU 52 */
3306         public static final int HEBREW_LETTER = 14;    /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
3307         /** @stable ICU 52 */
3308         public static final int SINGLE_QUOTE = 15;     /*[SQ]*/
3309         /** @stable ICU 52 */
3310         public static final int DOUBLE_QUOTE = 16;     /*[DQ]*/
3311         /** @stable ICU 58 */
3312         public static final int E_BASE = 17;           /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
3313         /** @stable ICU 58 */
3314         public static final int E_BASE_GAZ = 18;       /*[EBG]*/
3315         /** @stable ICU 58 */
3316         public static final int E_MODIFIER = 19;       /*[EM]*/
3317         /** @stable ICU 58 */
3318         public static final int GLUE_AFTER_ZWJ = 20;   /*[GAZ]*/
3319         /** @stable ICU 58 */
3320         public static final int ZWJ = 21;              /*[ZWJ]*/
3321         /** @stable ICU 62 */
3322         public static final int WSEGSPACE = 22;        /*[WSEGSPACE]*/
3323         /**
3324          * One more than the highest normal WordBreak value.
3325          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.WORD_BREAK).
3326          *
3327          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3328          */
3329         @Deprecated
3330         public static final int COUNT = 23;
3331     }
3332 
3333     /**
3334      * Sentence Break constants.
3335      * @see UProperty#SENTENCE_BREAK
3336      * @stable ICU 3.4
3337      */
3338     public static interface SentenceBreak {
3339         /**
3340          * @stable ICU 3.8
3341          */
3342         public static final int OTHER = 0;
3343         /**
3344          * @stable ICU 3.8
3345          */
3346         public static final int ATERM = 1;
3347         /**
3348          * @stable ICU 3.8
3349          */
3350         public static final int CLOSE = 2;
3351         /**
3352          * @stable ICU 3.8
3353          */
3354         public static final int FORMAT = 3;
3355         /**
3356          * @stable ICU 3.8
3357          */
3358         public static final int LOWER = 4;
3359         /**
3360          * @stable ICU 3.8
3361          */
3362         public static final int NUMERIC = 5;
3363         /**
3364          * @stable ICU 3.8
3365          */
3366         public static final int OLETTER = 6;
3367         /**
3368          * @stable ICU 3.8
3369          */
3370         public static final int SEP = 7;
3371         /**
3372          * @stable ICU 3.8
3373          */
3374         public static final int SP = 8;
3375         /**
3376          * @stable ICU 3.8
3377          */
3378         public static final int STERM = 9;
3379         /**
3380          * @stable ICU 3.8
3381          */
3382         public static final int UPPER = 10;
3383         /**
3384          * @stable ICU 4.0
3385          */
3386         public static final int CR = 11;
3387         /**
3388          * @stable ICU 4.0
3389          */
3390         public static final int EXTEND = 12;
3391         /**
3392          * @stable ICU 4.0
3393          */
3394         public static final int LF = 13;
3395         /**
3396          * @stable ICU 4.0
3397          */
3398         public static final int SCONTINUE = 14;
3399         /**
3400          * One more than the highest normal SentenceBreak value.
3401          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SENTENCE_BREAK).
3402          *
3403          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3404          */
3405         @Deprecated
3406         public static final int COUNT = 15;
3407     }
3408 
3409     /**
3410      * Line Break constants.
3411      * @see UProperty#LINE_BREAK
3412      * @stable ICU 2.4
3413      */
3414     public static interface LineBreak
3415     {
3416         /**
3417          * @stable ICU 2.4
3418          */
3419         public static final int UNKNOWN = 0;
3420         /**
3421          * @stable ICU 2.4
3422          */
3423         public static final int AMBIGUOUS = 1;
3424         /**
3425          * @stable ICU 2.4
3426          */
3427         public static final int ALPHABETIC = 2;
3428         /**
3429          * @stable ICU 2.4
3430          */
3431         public static final int BREAK_BOTH = 3;
3432         /**
3433          * @stable ICU 2.4
3434          */
3435         public static final int BREAK_AFTER = 4;
3436         /**
3437          * @stable ICU 2.4
3438          */
3439         public static final int BREAK_BEFORE = 5;
3440         /**
3441          * @stable ICU 2.4
3442          */
3443         public static final int MANDATORY_BREAK = 6;
3444         /**
3445          * @stable ICU 2.4
3446          */
3447         public static final int CONTINGENT_BREAK = 7;
3448         /**
3449          * @stable ICU 2.4
3450          */
3451         public static final int CLOSE_PUNCTUATION = 8;
3452         /**
3453          * @stable ICU 2.4
3454          */
3455         public static final int COMBINING_MARK = 9;
3456         /**
3457          * @stable ICU 2.4
3458          */
3459         public static final int CARRIAGE_RETURN = 10;
3460         /**
3461          * @stable ICU 2.4
3462          */
3463         public static final int EXCLAMATION = 11;
3464         /**
3465          * @stable ICU 2.4
3466          */
3467         public static final int GLUE = 12;
3468         /**
3469          * @stable ICU 2.4
3470          */
3471         public static final int HYPHEN = 13;
3472         /**
3473          * @stable ICU 2.4
3474          */
3475         public static final int IDEOGRAPHIC = 14;
3476         /**
3477          * @see #INSEPARABLE
3478          * @stable ICU 2.4
3479          */
3480         public static final int INSEPERABLE = 15;
3481         /**
3482          * Renamed from the misspelled "inseperable" in Unicode 4.0.1.
3483          * @stable ICU 3.0
3484          */
3485         public static final int INSEPARABLE = 15;
3486         /**
3487          * @stable ICU 2.4
3488          */
3489         public static final int INFIX_NUMERIC = 16;
3490         /**
3491          * @stable ICU 2.4
3492          */
3493         public static final int LINE_FEED = 17;
3494         /**
3495          * @stable ICU 2.4
3496          */
3497         public static final int NONSTARTER = 18;
3498         /**
3499          * @stable ICU 2.4
3500          */
3501         public static final int NUMERIC = 19;
3502         /**
3503          * @stable ICU 2.4
3504          */
3505         public static final int OPEN_PUNCTUATION = 20;
3506         /**
3507          * @stable ICU 2.4
3508          */
3509         public static final int POSTFIX_NUMERIC = 21;
3510         /**
3511          * @stable ICU 2.4
3512          */
3513         public static final int PREFIX_NUMERIC = 22;
3514         /**
3515          * @stable ICU 2.4
3516          */
3517         public static final int QUOTATION = 23;
3518         /**
3519          * @stable ICU 2.4
3520          */
3521         public static final int COMPLEX_CONTEXT = 24;
3522         /**
3523          * @stable ICU 2.4
3524          */
3525         public static final int SURROGATE = 25;
3526         /**
3527          * @stable ICU 2.4
3528          */
3529         public static final int SPACE = 26;
3530         /**
3531          * @stable ICU 2.4
3532          */
3533         public static final int BREAK_SYMBOLS = 27;
3534         /**
3535          * @stable ICU 2.4
3536          */
3537         public static final int ZWSPACE = 28;
3538         /**
3539          * @stable ICU 2.6
3540          */
3541         public static final int NEXT_LINE = 29;  /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
3542         /**
3543          * @stable ICU 2.6
3544          */
3545         public static final int WORD_JOINER = 30;      /*[WJ]*/
3546         /**
3547          * @stable ICU 3.4
3548          */
3549         public static final int H2 = 31;  /* from here on: new in Unicode 4.1/ICU 3.4 */
3550         /**
3551          * @stable ICU 3.4
3552          */
3553         public static final int H3 = 32;
3554         /**
3555          * @stable ICU 3.4
3556          */
3557         public static final int JL = 33;
3558         /**
3559          * @stable ICU 3.4
3560          */
3561         public static final int JT = 34;
3562         /**
3563          * @stable ICU 3.4
3564          */
3565         public static final int JV = 35;
3566         /** @stable ICU 4.4 */
3567         public static final int CLOSE_PARENTHESIS = 36; /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
3568         /** @stable ICU 49 */
3569         public static final int CONDITIONAL_JAPANESE_STARTER = 37;  /*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
3570         /** @stable ICU 49 */
3571         public static final int HEBREW_LETTER = 38;  /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
3572         /** @stable ICU 50 */
3573         public static final int REGIONAL_INDICATOR = 39;  /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
3574         /** @stable ICU 58 */
3575         public static final int E_BASE = 40;  /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
3576         /** @stable ICU 58 */
3577         public static final int E_MODIFIER = 41;  /*[EM]*/
3578         /** @stable ICU 58 */
3579         public static final int ZWJ = 42;  /*[ZWJ]*/
3580         /**
3581          * One more than the highest normal LineBreak value.
3582          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.LINE_BREAK).
3583          *
3584          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3585          */
3586         @Deprecated
3587         public static final int COUNT = 43;
3588     }
3589 
3590     /**
3591      * Numeric Type constants.
3592      * @see UProperty#NUMERIC_TYPE
3593      * @stable ICU 2.4
3594      */
3595     public static interface NumericType
3596     {
3597         /**
3598          * @stable ICU 2.4
3599          */
3600         public static final int NONE = 0;
3601         /**
3602          * @stable ICU 2.4
3603          */
3604         public static final int DECIMAL = 1;
3605         /**
3606          * @stable ICU 2.4
3607          */
3608         public static final int DIGIT = 2;
3609         /**
3610          * @stable ICU 2.4
3611          */
3612         public static final int NUMERIC = 3;
3613         /**
3614          * One more than the highest normal NumericType value.
3615          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.NUMERIC_TYPE).
3616          *
3617          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3618          */
3619         @Deprecated
3620         public static final int COUNT = 4;
3621     }
3622 
3623     /**
3624      * Hangul Syllable Type constants.
3625      *
3626      * @see UProperty#HANGUL_SYLLABLE_TYPE
3627      * @stable ICU 2.6
3628      */
3629     public static interface HangulSyllableType
3630     {
3631         /**
3632          * @stable ICU 2.6
3633          */
3634         public static final int NOT_APPLICABLE      = 0;   /*[NA]*/ /*See note !!*/
3635         /**
3636          * @stable ICU 2.6
3637          */
3638         public static final int LEADING_JAMO        = 1;   /*[L]*/
3639         /**
3640          * @stable ICU 2.6
3641          */
3642         public static final int VOWEL_JAMO          = 2;   /*[V]*/
3643         /**
3644          * @stable ICU 2.6
3645          */
3646         public static final int TRAILING_JAMO       = 3;   /*[T]*/
3647         /**
3648          * @stable ICU 2.6
3649          */
3650         public static final int LV_SYLLABLE         = 4;   /*[LV]*/
3651         /**
3652          * @stable ICU 2.6
3653          */
3654         public static final int LVT_SYLLABLE        = 5;   /*[LVT]*/
3655         /**
3656          * One more than the highest normal HangulSyllableType value.
3657          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.HANGUL_SYLLABLE_TYPE).
3658          *
3659          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3660          */
3661         @Deprecated
3662         public static final int COUNT               = 6;
3663     }
3664 
3665     /**
3666      * Bidi Paired Bracket Type constants.
3667      *
3668      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
3669      * @stable ICU 52
3670      */
3671     public static interface BidiPairedBracketType {
3672         /**
3673          * Not a paired bracket.
3674          * @stable ICU 52
3675          */
3676         public static final int NONE = 0;
3677         /**
3678          * Open paired bracket.
3679          * @stable ICU 52
3680          */
3681         public static final int OPEN = 1;
3682         /**
3683          * Close paired bracket.
3684          * @stable ICU 52
3685          */
3686         public static final int CLOSE = 2;
3687         /**
3688          * One more than the highest normal BidiPairedBracketType value.
3689          * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.BIDI_PAIRED_BRACKET_TYPE).
3690          *
3691          * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
3692          */
3693         @Deprecated
3694         public static final int COUNT = 3;
3695     }
3696 
3697     /**
3698      * Indic Positional Category constants.
3699      *
3700      * @see UProperty#INDIC_POSITIONAL_CATEGORY
3701      * @stable ICU 63
3702      */
3703     public static interface IndicPositionalCategory {
3704         /** @stable ICU 63 */
3705         public static final int NA = 0;
3706         /** @stable ICU 63 */
3707         public static final int BOTTOM = 1;
3708         /** @stable ICU 63 */
3709         public static final int BOTTOM_AND_LEFT = 2;
3710         /** @stable ICU 63 */
3711         public static final int BOTTOM_AND_RIGHT = 3;
3712         /** @stable ICU 63 */
3713         public static final int LEFT = 4;
3714         /** @stable ICU 63 */
3715         public static final int LEFT_AND_RIGHT = 5;
3716         /** @stable ICU 63 */
3717         public static final int OVERSTRUCK = 6;
3718         /** @stable ICU 63 */
3719         public static final int RIGHT = 7;
3720         /** @stable ICU 63 */
3721         public static final int TOP = 8;
3722         /** @stable ICU 63 */
3723         public static final int TOP_AND_BOTTOM = 9;
3724         /** @stable ICU 63 */
3725         public static final int TOP_AND_BOTTOM_AND_RIGHT = 10;
3726         /** @stable ICU 63 */
3727         public static final int TOP_AND_LEFT = 11;
3728         /** @stable ICU 63 */
3729         public static final int TOP_AND_LEFT_AND_RIGHT = 12;
3730         /** @stable ICU 63 */
3731         public static final int TOP_AND_RIGHT = 13;
3732         /** @stable ICU 63 */
3733         public static final int VISUAL_ORDER_LEFT = 14;
3734     }
3735 
3736     /**
3737      * Indic Syllabic Category constants.
3738      *
3739      * @see UProperty#INDIC_SYLLABIC_CATEGORY
3740      * @stable ICU 63
3741      */
3742     public static interface IndicSyllabicCategory {
3743         /** @stable ICU 63 */
3744         public static final int OTHER = 0;
3745         /** @stable ICU 63 */
3746         public static final int AVAGRAHA = 1;
3747         /** @stable ICU 63 */
3748         public static final int BINDU = 2;
3749         /** @stable ICU 63 */
3750         public static final int BRAHMI_JOINING_NUMBER = 3;
3751         /** @stable ICU 63 */
3752         public static final int CANTILLATION_MARK = 4;
3753         /** @stable ICU 63 */
3754         public static final int CONSONANT = 5;
3755         /** @stable ICU 63 */
3756         public static final int CONSONANT_DEAD = 6;
3757         /** @stable ICU 63 */
3758         public static final int CONSONANT_FINAL = 7;
3759         /** @stable ICU 63 */
3760         public static final int CONSONANT_HEAD_LETTER = 8;
3761         /** @stable ICU 63 */
3762         public static final int CONSONANT_INITIAL_POSTFIXED = 9;
3763         /** @stable ICU 63 */
3764         public static final int CONSONANT_KILLER = 10;
3765         /** @stable ICU 63 */
3766         public static final int CONSONANT_MEDIAL = 11;
3767         /** @stable ICU 63 */
3768         public static final int CONSONANT_PLACEHOLDER = 12;
3769         /** @stable ICU 63 */
3770         public static final int CONSONANT_PRECEDING_REPHA = 13;
3771         /** @stable ICU 63 */
3772         public static final int CONSONANT_PREFIXED = 14;
3773         /** @stable ICU 63 */
3774         public static final int CONSONANT_SUBJOINED = 15;
3775         /** @stable ICU 63 */
3776         public static final int CONSONANT_SUCCEEDING_REPHA = 16;
3777         /** @stable ICU 63 */
3778         public static final int CONSONANT_WITH_STACKER = 17;
3779         /** @stable ICU 63 */
3780         public static final int GEMINATION_MARK = 18;
3781         /** @stable ICU 63 */
3782         public static final int INVISIBLE_STACKER = 19;
3783         /** @stable ICU 63 */
3784         public static final int JOINER = 20;
3785         /** @stable ICU 63 */
3786         public static final int MODIFYING_LETTER = 21;
3787         /** @stable ICU 63 */
3788         public static final int NON_JOINER = 22;
3789         /** @stable ICU 63 */
3790         public static final int NUKTA = 23;
3791         /** @stable ICU 63 */
3792         public static final int NUMBER = 24;
3793         /** @stable ICU 63 */
3794         public static final int NUMBER_JOINER = 25;
3795         /** @stable ICU 63 */
3796         public static final int PURE_KILLER = 26;
3797         /** @stable ICU 63 */
3798         public static final int REGISTER_SHIFTER = 27;
3799         /** @stable ICU 63 */
3800         public static final int SYLLABLE_MODIFIER = 28;
3801         /** @stable ICU 63 */
3802         public static final int TONE_LETTER = 29;
3803         /** @stable ICU 63 */
3804         public static final int TONE_MARK = 30;
3805         /** @stable ICU 63 */
3806         public static final int VIRAMA = 31;
3807         /** @stable ICU 63 */
3808         public static final int VISARGA = 32;
3809         /** @stable ICU 63 */
3810         public static final int VOWEL = 33;
3811         /** @stable ICU 63 */
3812         public static final int VOWEL_DEPENDENT = 34;
3813         /** @stable ICU 63 */
3814         public static final int VOWEL_INDEPENDENT = 35;
3815     }
3816 
3817     /**
3818      * Vertical Orientation constants.
3819      *
3820      * @see UProperty#VERTICAL_ORIENTATION
3821      * @stable ICU 63
3822      */
3823     public static interface VerticalOrientation {
3824         /** @stable ICU 63 */
3825         public static final int ROTATED = 0;
3826         /** @stable ICU 63 */
3827         public static final int TRANSFORMED_ROTATED = 1;
3828         /** @stable ICU 63 */
3829         public static final int TRANSFORMED_UPRIGHT = 2;
3830         /** @stable ICU 63 */
3831         public static final int UPRIGHT = 3;
3832     }
3833 
3834     // public data members -----------------------------------------------
3835 
3836     /**
3837      * The lowest Unicode code point value, constant 0.
3838      * Same as {@link Character#MIN_CODE_POINT}, same integer value as {@link Character#MIN_VALUE}.
3839      *
3840      * @stable ICU 2.1
3841      */
3842     public static final int MIN_VALUE = Character.MIN_CODE_POINT;
3843 
3844     /**
3845      * The highest Unicode code point value (scalar value), constant U+10FFFF (uses 21 bits).
3846      * Same as {@link Character#MAX_CODE_POINT}.
3847      *
3848      * <p>Up-to-date Unicode implementation of {@link Character#MAX_VALUE}
3849      * which is still a char with the value U+FFFF.
3850      *
3851      * @stable ICU 2.1
3852      */
3853     public static final int MAX_VALUE = Character.MAX_CODE_POINT;
3854 
3855     /**
3856      * The minimum value for Supplementary code points, constant U+10000.
3857      * Same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
3858      *
3859      * @stable ICU 2.1
3860      */
3861     public static final int SUPPLEMENTARY_MIN_VALUE = Character.MIN_SUPPLEMENTARY_CODE_POINT;
3862 
3863     /**
3864      * Unicode value used when translating into Unicode encoding form and there
3865      * is no existing character.
3866      * @stable ICU 2.1
3867      */
3868     public static final int REPLACEMENT_CHAR = '\uFFFD';
3869 
3870     /**
3871      * Special value that is returned by getUnicodeNumericValue(int) when no
3872      * numeric value is defined for a code point.
3873      * @stable ICU 2.4
3874      * @see #getUnicodeNumericValue
3875      */
3876     public static final double NO_NUMERIC_VALUE = -123456789;
3877 
3878     /**
3879      * Compatibility constant for Java Character's MIN_RADIX.
3880      * @stable ICU 3.4
3881      */
3882     public static final int MIN_RADIX = java.lang.Character.MIN_RADIX;
3883 
3884     /**
3885      * Compatibility constant for Java Character's MAX_RADIX.
3886      * @stable ICU 3.4
3887      */
3888     public static final int MAX_RADIX = java.lang.Character.MAX_RADIX;
3889 
3890     /**
3891      * Do not lowercase non-initial parts of words when titlecasing.
3892      * Option bit for titlecasing APIs that take an options bit set.
3893      *
3894      * By default, titlecasing will titlecase the first cased character
3895      * of a word and lowercase all other characters.
3896      * With this option, the other characters will not be modified.
3897      *
3898      * @see #toTitleCase
3899      * @stable ICU 3.8
3900      */
3901     public static final int TITLECASE_NO_LOWERCASE = 0x100;
3902 
3903     /**
3904      * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
3905      * titlecase exactly the characters at breaks from the iterator.
3906      * Option bit for titlecasing APIs that take an options bit set.
3907      *
3908      * By default, titlecasing will take each break iterator index,
3909      * adjust it by looking for the next cased character, and titlecase that one.
3910      * Other characters are lowercased.
3911      *
3912      * This follows Unicode 4 &amp; 5 section 3.13 Default Case Operations:
3913      *
3914      * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
3915      * #29, "Text Boundaries." Between each pair of word boundaries, find the first
3916      * cased character F. If F exists, map F to default_title(F); then map each
3917      * subsequent character C to default_lower(C).
3918      *
3919      * @see #toTitleCase
3920      * @see #TITLECASE_NO_LOWERCASE
3921      * @stable ICU 3.8
3922      */
3923     public static final int TITLECASE_NO_BREAK_ADJUSTMENT = 0x200;
3924 
3925     // public methods ----------------------------------------------------
3926 
3927     /**
3928      * Returnss the numeric value of a decimal digit code point.
3929      * <br>This method observes the semantics of
3930      * <code>java.lang.Character.digit()</code>.  Note that this
3931      * will return positive values for code points for which isDigit
3932      * returns false, just like java.lang.Character.
3933      * <br><em>Semantic Change:</em> In release 1.3.1 and
3934      * prior, this did not treat the European letters as having a
3935      * digit value, and also treated numeric letters and other numbers as
3936      * digits.
3937      * This has been changed to conform to the java semantics.
3938      * <br>A code point is a valid digit if and only if:
3939      * <ul>
3940      *   <li>ch is a decimal digit or one of the european letters, and
3941      *   <li>the value of ch is less than the specified radix.
3942      * </ul>
3943      * @param ch the code point to query
3944      * @param radix the radix
3945      * @return the numeric value represented by the code point in the
3946      * specified radix, or -1 if the code point is not a decimal digit
3947      * or if its value is too large for the radix
3948      * @stable ICU 2.1
3949      */
digit(int ch, int radix)3950     public static int digit(int ch, int radix)
3951     {
3952         if (2 <= radix && radix <= 36) {
3953             int value = digit(ch);
3954             if (value < 0) {
3955                 // ch is not a decimal digit, try latin letters
3956                 value = UCharacterProperty.getEuropeanDigit(ch);
3957             }
3958             return (value < radix) ? value : -1;
3959         } else {
3960             return -1;  // invalid radix
3961         }
3962     }
3963 
3964     /**
3965      * Returnss the numeric value of a decimal digit code point.
3966      * <br>This is a convenience overload of <code>digit(int, int)</code>
3967      * that provides a decimal radix.
3968      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this
3969      * treated numeric letters and other numbers as digits.  This has
3970      * been changed to conform to the java semantics.
3971      * @param ch the code point to query
3972      * @return the numeric value represented by the code point,
3973      * or -1 if the code point is not a decimal digit or if its
3974      * value is too large for a decimal radix
3975      * @stable ICU 2.1
3976      */
digit(int ch)3977     public static int digit(int ch)
3978     {
3979         return UCharacterProperty.INSTANCE.digit(ch);
3980     }
3981 
3982     /**
3983      * Returns the numeric value of the code point as a nonnegative
3984      * integer.
3985      * <br>If the code point does not have a numeric value, then -1 is returned.
3986      * <br>
3987      * If the code point has a numeric value that cannot be represented as a
3988      * nonnegative integer (for example, a fractional value), then -2 is
3989      * returned.
3990      * @param ch the code point to query
3991      * @return the numeric value of the code point, or -1 if it has no numeric
3992      * value, or -2 if it has a numeric value that cannot be represented as a
3993      * nonnegative integer
3994      * @stable ICU 2.1
3995      */
getNumericValue(int ch)3996     public static int getNumericValue(int ch)
3997     {
3998         return UCharacterProperty.INSTANCE.getNumericValue(ch);
3999     }
4000 
4001     /**
4002      * {@icu} Returns the numeric value for a Unicode code point as defined in the
4003      * Unicode Character Database.
4004      * <p>A "double" return type is necessary because some numeric values are
4005      * fractions, negative, or too large for int.
4006      * <p>For characters without any numeric values in the Unicode Character
4007      * Database, this function will return NO_NUMERIC_VALUE.
4008      * Note: This is different from the Unicode Standard which specifies NaN as the default value.
4009      * <p><em>API Change:</em> In release 2.2 and prior, this API has a
4010      * return type int and returns -1 when the argument ch does not have a
4011      * corresponding numeric value. This has been changed to synch with ICU4C
4012      *
4013      * This corresponds to the ICU4C function u_getNumericValue.
4014      * @param ch Code point to get the numeric value for.
4015      * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined.
4016      * @stable ICU 2.4
4017      */
getUnicodeNumericValue(int ch)4018     public static double getUnicodeNumericValue(int ch)
4019     {
4020         return UCharacterProperty.INSTANCE.getUnicodeNumericValue(ch);
4021     }
4022 
4023     /**
4024      * Compatibility override of Java deprecated method.  This
4025      * method will always remain deprecated.
4026      * Same as java.lang.Character.isSpace().
4027      * @param ch the code point
4028      * @return true if the code point is a space character as
4029      * defined by java.lang.Character.isSpace.
4030      * @deprecated ICU 3.4 (Java)
4031      */
4032     @Deprecated
isSpace(int ch)4033     public static boolean isSpace(int ch) {
4034         return ch <= 0x20 &&
4035                 (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0c || ch == 0x0d);
4036     }
4037 
4038     /**
4039      * Returns a value indicating a code point's Unicode category.
4040      * Up-to-date Unicode implementation of java.lang.Character.getType()
4041      * except for the above mentioned code points that had their category
4042      * changed.<br>
4043      * Return results are constants from the interface
4044      * <a href=UCharacterCategory.html>UCharacterCategory</a><br>
4045      * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with
4046      * those returned by java.lang.Character.getType.  UCharacterCategory values
4047      * match the ones used in ICU4C, while java.lang.Character type
4048      * values, though similar, skip the value 17.
4049      * @param ch code point whose type is to be determined
4050      * @return category which is a value of UCharacterCategory
4051      * @stable ICU 2.1
4052      */
getType(int ch)4053     public static int getType(int ch)
4054     {
4055         return UCharacterProperty.INSTANCE.getType(ch);
4056     }
4057 
4058     /**
4059      * Determines if a code point has a defined meaning in the up-to-date
4060      * Unicode standard.
4061      * E.g. supplementary code points though allocated space are not defined in
4062      * Unicode yet.<br>
4063      * Up-to-date Unicode implementation of java.lang.Character.isDefined()
4064      * @param ch code point to be determined if it is defined in the most
4065      *        current version of Unicode
4066      * @return true if this code point is defined in unicode
4067      * @stable ICU 2.1
4068      */
isDefined(int ch)4069     public static boolean isDefined(int ch)
4070     {
4071         return getType(ch) != 0;
4072     }
4073 
4074     /**
4075      * Determines if a code point is a Java digit.
4076      * <br>This method observes the semantics of
4077      * <code>java.lang.Character.isDigit()</code>. It returns true for decimal
4078      * digits only.
4079      * <br><em>Semantic Change:</em> In release 1.3.1 and prior, this treated
4080      * numeric letters and other numbers as digits.
4081      * This has been changed to conform to the java semantics.
4082      * @param ch code point to query
4083      * @return true if this code point is a digit
4084      * @stable ICU 2.1
4085      */
isDigit(int ch)4086     public static boolean isDigit(int ch)
4087     {
4088         return getType(ch) == UCharacterCategory.DECIMAL_DIGIT_NUMBER;
4089     }
4090 
4091     /**
4092      * Determines if the specified code point is an ISO control character.
4093      * A code point is considered to be an ISO control character if it is in
4094      * the range &#92;u0000 through &#92;u001F or in the range &#92;u007F through
4095      * &#92;u009F.<br>
4096      * Up-to-date Unicode implementation of java.lang.Character.isISOControl()
4097      * @param ch code point to determine if it is an ISO control character
4098      * @return true if code point is a ISO control character
4099      * @stable ICU 2.1
4100      */
isISOControl(int ch)4101     public static boolean isISOControl(int ch)
4102     {
4103         return ch >= 0 && ch <= APPLICATION_PROGRAM_COMMAND_ &&
4104                 ((ch <= UNIT_SEPARATOR_) || (ch >= DELETE_));
4105     }
4106 
4107     /**
4108      * Determines if the specified code point is a letter.
4109      * Up-to-date Unicode implementation of java.lang.Character.isLetter()
4110      * @param ch code point to determine if it is a letter
4111      * @return true if code point is a letter
4112      * @stable ICU 2.1
4113      */
isLetter(int ch)4114     public static boolean isLetter(int ch)
4115     {
4116         // if props == 0, it will just fall through and return false
4117         return ((1 << getType(ch))
4118                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
4119                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
4120                         | (1 << UCharacterCategory.TITLECASE_LETTER)
4121                         | (1 << UCharacterCategory.MODIFIER_LETTER)
4122                         | (1 << UCharacterCategory.OTHER_LETTER))) != 0;
4123     }
4124 
4125     /**
4126      * Determines if the specified code point is a letter or digit.
4127      * {@icunote} This method, unlike java.lang.Character does not regard the ascii
4128      * characters 'A' - 'Z' and 'a' - 'z' as digits.
4129      * @param ch code point to determine if it is a letter or a digit
4130      * @return true if code point is a letter or a digit
4131      * @stable ICU 2.1
4132      */
isLetterOrDigit(int ch)4133     public static boolean isLetterOrDigit(int ch)
4134     {
4135         return ((1 << getType(ch))
4136                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
4137                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
4138                         | (1 << UCharacterCategory.TITLECASE_LETTER)
4139                         | (1 << UCharacterCategory.MODIFIER_LETTER)
4140                         | (1 << UCharacterCategory.OTHER_LETTER)
4141                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER))) != 0;
4142     }
4143 
4144     /**
4145      * Compatibility override of Java deprecated method.  This
4146      * method will always remain deprecated.  Delegates to
4147      * java.lang.Character.isJavaIdentifierStart.
4148      * @param cp the code point
4149      * @return true if the code point can start a java identifier.
4150      * @deprecated ICU 3.4 (Java)
4151      */
4152     @Deprecated
isJavaLetter(int cp)4153     public static boolean isJavaLetter(int cp) {
4154         return isJavaIdentifierStart(cp);
4155     }
4156 
4157     /**
4158      * Compatibility override of Java deprecated method.  This
4159      * method will always remain deprecated.  Delegates to
4160      * java.lang.Character.isJavaIdentifierPart.
4161      * @param cp the code point
4162      * @return true if the code point can continue a java identifier.
4163      * @deprecated ICU 3.4 (Java)
4164      */
4165     @Deprecated
isJavaLetterOrDigit(int cp)4166     public static boolean isJavaLetterOrDigit(int cp) {
4167         return isJavaIdentifierPart(cp);
4168     }
4169 
4170     /**
4171      * Compatibility override of Java method, delegates to
4172      * java.lang.Character.isJavaIdentifierStart.
4173      * @param cp the code point
4174      * @return true if the code point can start a java identifier.
4175      * @stable ICU 3.4
4176      */
isJavaIdentifierStart(int cp)4177     public static boolean isJavaIdentifierStart(int cp) {
4178         // note, downcast to char for jdk 1.4 compatibility
4179         return java.lang.Character.isJavaIdentifierStart((char)cp);
4180     }
4181 
4182     /**
4183      * Compatibility override of Java method, delegates to
4184      * java.lang.Character.isJavaIdentifierPart.
4185      * @param cp the code point
4186      * @return true if the code point can continue a java identifier.
4187      * @stable ICU 3.4
4188      */
isJavaIdentifierPart(int cp)4189     public static boolean isJavaIdentifierPart(int cp) {
4190         // note, downcast to char for jdk 1.4 compatibility
4191         return java.lang.Character.isJavaIdentifierPart((char)cp);
4192     }
4193 
4194     /**
4195      * Determines if the specified code point is a lowercase character.
4196      * UnicodeData only contains case mappings for code points where they are
4197      * one-to-one mappings; it also omits information about context-sensitive
4198      * case mappings.<br> For more information about Unicode case mapping
4199      * please refer to the
4200      * <a href=http://www.unicode.org/unicode/reports/tr21/>Technical report
4201      * #21</a>.<br>
4202      * Up-to-date Unicode implementation of java.lang.Character.isLowerCase()
4203      * @param ch code point to determine if it is in lowercase
4204      * @return true if code point is a lowercase character
4205      * @stable ICU 2.1
4206      */
isLowerCase(int ch)4207     public static boolean isLowerCase(int ch)
4208     {
4209         // if props == 0, it will just fall through and return false
4210         return getType(ch) == UCharacterCategory.LOWERCASE_LETTER;
4211     }
4212 
4213     /**
4214      * Determines if the specified code point is a white space character.
4215      * A code point is considered to be an whitespace character if and only
4216      * if it satisfies one of the following criteria:
4217      * <ul>
4218      * <li> It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not
4219      *      also a non-breaking space (&#92;u00A0 or &#92;u2007 or &#92;u202F).
4220      * <li> It is &#92;u0009, HORIZONTAL TABULATION.
4221      * <li> It is &#92;u000A, LINE FEED.
4222      * <li> It is &#92;u000B, VERTICAL TABULATION.
4223      * <li> It is &#92;u000C, FORM FEED.
4224      * <li> It is &#92;u000D, CARRIAGE RETURN.
4225      * <li> It is &#92;u001C, FILE SEPARATOR.
4226      * <li> It is &#92;u001D, GROUP SEPARATOR.
4227      * <li> It is &#92;u001E, RECORD SEPARATOR.
4228      * <li> It is &#92;u001F, UNIT SEPARATOR.
4229      * </ul>
4230      *
4231      * This API tries to sync with the semantics of Java's
4232      * java.lang.Character.isWhitespace(), but it may not return
4233      * the exact same results because of the Unicode version
4234      * difference.
4235      * <p>Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)
4236      * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.
4237      * See http://www.unicode.org/versions/Unicode4.0.1/
4238      * @param ch code point to determine if it is a white space
4239      * @return true if the specified code point is a white space character
4240      * @stable ICU 2.1
4241      */
isWhitespace(int ch)4242     public static boolean isWhitespace(int ch)
4243     {
4244         // exclude no-break spaces
4245         // if props == 0, it will just fall through and return false
4246         return ((1 << getType(ch)) &
4247                 ((1 << UCharacterCategory.SPACE_SEPARATOR)
4248                         | (1 << UCharacterCategory.LINE_SEPARATOR)
4249                         | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR))) != 0
4250                         && (ch != NO_BREAK_SPACE_) && (ch != FIGURE_SPACE_) && (ch != NARROW_NO_BREAK_SPACE_)
4251                         // TAB VT LF FF CR FS GS RS US NL are all control characters
4252                         // that are white spaces.
4253                         || (ch >= 0x9 && ch <= 0xd) || (ch >= 0x1c && ch <= 0x1f);
4254     }
4255 
4256     /**
4257      * Determines if the specified code point is a Unicode specified space
4258      * character, i.e. if code point is in the category Zs, Zl and Zp.
4259      * Up-to-date Unicode implementation of java.lang.Character.isSpaceChar().
4260      * @param ch code point to determine if it is a space
4261      * @return true if the specified code point is a space character
4262      * @stable ICU 2.1
4263      */
isSpaceChar(int ch)4264     public static boolean isSpaceChar(int ch)
4265     {
4266         // if props == 0, it will just fall through and return false
4267         return ((1 << getType(ch)) & ((1 << UCharacterCategory.SPACE_SEPARATOR)
4268                 | (1 << UCharacterCategory.LINE_SEPARATOR)
4269                 | (1 << UCharacterCategory.PARAGRAPH_SEPARATOR)))
4270                 != 0;
4271     }
4272 
4273     /**
4274      * Determines if the specified code point is a titlecase character.
4275      * UnicodeData only contains case mappings for code points where they are
4276      * one-to-one mappings; it also omits information about context-sensitive
4277      * case mappings.<br>
4278      * For more information about Unicode case mapping please refer to the
4279      * <a href=http://www.unicode.org/unicode/reports/tr21/>
4280      * Technical report #21</a>.<br>
4281      * Up-to-date Unicode implementation of java.lang.Character.isTitleCase().
4282      * @param ch code point to determine if it is in title case
4283      * @return true if the specified code point is a titlecase character
4284      * @stable ICU 2.1
4285      */
isTitleCase(int ch)4286     public static boolean isTitleCase(int ch)
4287     {
4288         // if props == 0, it will just fall through and return false
4289         return getType(ch) == UCharacterCategory.TITLECASE_LETTER;
4290     }
4291 
4292     /**
4293      * Determines if the specified code point may be any part of a Unicode
4294      * identifier other than the starting character.
4295      * A code point may be part of a Unicode identifier if and only if it is
4296      * one of the following:
4297      * <ul>
4298      * <li> Lu Uppercase letter
4299      * <li> Ll Lowercase letter
4300      * <li> Lt Titlecase letter
4301      * <li> Lm Modifier letter
4302      * <li> Lo Other letter
4303      * <li> Nl Letter number
4304      * <li> Pc Connecting punctuation character
4305      * <li> Nd decimal number
4306      * <li> Mc Spacing combining mark
4307      * <li> Mn Non-spacing mark
4308      * <li> Cf formatting code
4309      * </ul>
4310      * Up-to-date Unicode implementation of
4311      * java.lang.Character.isUnicodeIdentifierPart().<br>
4312      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
4313      * @param ch code point to determine if is can be part of a Unicode
4314      *        identifier
4315      * @return true if code point is any character belonging a unicode
4316      *         identifier suffix after the first character
4317      * @stable ICU 2.1
4318      */
isUnicodeIdentifierPart(int ch)4319     public static boolean isUnicodeIdentifierPart(int ch)
4320     {
4321         // if props == 0, it will just fall through and return false
4322         // cat == format
4323         return ((1 << getType(ch))
4324                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
4325                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
4326                         | (1 << UCharacterCategory.TITLECASE_LETTER)
4327                         | (1 << UCharacterCategory.MODIFIER_LETTER)
4328                         | (1 << UCharacterCategory.OTHER_LETTER)
4329                         | (1 << UCharacterCategory.LETTER_NUMBER)
4330                         | (1 << UCharacterCategory.CONNECTOR_PUNCTUATION)
4331                         | (1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER)
4332                         | (1 << UCharacterCategory.COMBINING_SPACING_MARK)
4333                         | (1 << UCharacterCategory.NON_SPACING_MARK))) != 0
4334                         || isIdentifierIgnorable(ch);
4335     }
4336 
4337     /**
4338      * Determines if the specified code point is permissible as the first
4339      * character in a Unicode identifier.
4340      * A code point may start a Unicode identifier if it is of type either
4341      * <ul>
4342      * <li> Lu Uppercase letter
4343      * <li> Ll Lowercase letter
4344      * <li> Lt Titlecase letter
4345      * <li> Lm Modifier letter
4346      * <li> Lo Other letter
4347      * <li> Nl Letter number
4348      * </ul>
4349      * Up-to-date Unicode implementation of
4350      * java.lang.Character.isUnicodeIdentifierStart().<br>
4351      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
4352      * @param ch code point to determine if it can start a Unicode identifier
4353      * @return true if code point is the first character belonging a unicode
4354      *              identifier
4355      * @stable ICU 2.1
4356      */
isUnicodeIdentifierStart(int ch)4357     public static boolean isUnicodeIdentifierStart(int ch)
4358     {
4359         /*int cat = getType(ch);*/
4360         // if props == 0, it will just fall through and return false
4361         return ((1 << getType(ch))
4362                 & ((1 << UCharacterCategory.UPPERCASE_LETTER)
4363                         | (1 << UCharacterCategory.LOWERCASE_LETTER)
4364                         | (1 << UCharacterCategory.TITLECASE_LETTER)
4365                         | (1 << UCharacterCategory.MODIFIER_LETTER)
4366                         | (1 << UCharacterCategory.OTHER_LETTER)
4367                         | (1 << UCharacterCategory.LETTER_NUMBER))) != 0;
4368     }
4369 
4370     /**
4371      * Determines if the specified code point should be regarded as an
4372      * ignorable character in a Java identifier.
4373      * A character is Java-identifier-ignorable if it has the general category
4374      * Cf Formatting Control, or it is a non-Java-whitespace ISO control:
4375      * U+0000..U+0008, U+000E..U+001B, U+007F..U+009F.<br>
4376      * Up-to-date Unicode implementation of
4377      * java.lang.Character.isIdentifierIgnorable().<br>
4378      * See <a href=http://www.unicode.org/unicode/reports/tr8/>UTR #8</a>.
4379      * <p>Note that Unicode just recommends to ignore Cf (format controls).
4380      * @param ch code point to be determined if it can be ignored in a Unicode
4381      *        identifier.
4382      * @return true if the code point is ignorable
4383      * @stable ICU 2.1
4384      */
isIdentifierIgnorable(int ch)4385     public static boolean isIdentifierIgnorable(int ch)
4386     {
4387         // see java.lang.Character.isIdentifierIgnorable() on range of
4388         // ignorable characters.
4389         if (ch <= 0x9f) {
4390             return isISOControl(ch)
4391                     && !((ch >= 0x9 && ch <= 0xd)
4392                             || (ch >= 0x1c && ch <= 0x1f));
4393         }
4394         return getType(ch) == UCharacterCategory.FORMAT;
4395     }
4396 
4397     /**
4398      * Determines if the specified code point is an uppercase character.
4399      * UnicodeData only contains case mappings for code point where they are
4400      * one-to-one mappings; it also omits information about context-sensitive
4401      * case mappings.<br>
4402      * For language specific case conversion behavior, use
4403      * toUpperCase(locale, str). <br>
4404      * For example, the case conversion for dot-less i and dotted I in Turkish,
4405      * or for final sigma in Greek.
4406      * For more information about Unicode case mapping please refer to the
4407      * <a href=http://www.unicode.org/unicode/reports/tr21/>
4408      * Technical report #21</a>.<br>
4409      * Up-to-date Unicode implementation of java.lang.Character.isUpperCase().
4410      * @param ch code point to determine if it is in uppercase
4411      * @return true if the code point is an uppercase character
4412      * @stable ICU 2.1
4413      */
isUpperCase(int ch)4414     public static boolean isUpperCase(int ch)
4415     {
4416         // if props == 0, it will just fall through and return false
4417         return getType(ch) == UCharacterCategory.UPPERCASE_LETTER;
4418     }
4419 
4420     /**
4421      * The given code point is mapped to its lowercase equivalent; if the code
4422      * point has no lowercase equivalent, the code point itself is returned.
4423      * Up-to-date Unicode implementation of java.lang.Character.toLowerCase()
4424      *
4425      * <p>This function only returns the simple, single-code point case mapping.
4426      * Full case mappings should be used whenever possible because they produce
4427      * better results by working on whole strings.
4428      * They take into account the string context and the language and can map
4429      * to a result string with a different length as appropriate.
4430      * Full case mappings are applied by the case mapping functions
4431      * that take String parameters rather than code points (int).
4432      * See also the User Guide chapter on C/POSIX migration:
4433      * http://www.icu-project.org/userguide/posix.html#case_mappings
4434      *
4435      * @param ch code point whose lowercase equivalent is to be retrieved
4436      * @return the lowercase equivalent code point
4437      * @stable ICU 2.1
4438      */
toLowerCase(int ch)4439     public static int toLowerCase(int ch) {
4440         return UCaseProps.INSTANCE.tolower(ch);
4441     }
4442 
4443     /**
4444      * Converts argument code point and returns a String object representing
4445      * the code point's value in UTF-16 format.
4446      * The result is a string whose length is 1 for BMP code points, 2 for supplementary ones.
4447      *
4448      * <p>Up-to-date Unicode implementation of java.lang.Character.toString().
4449      *
4450      * @param ch code point
4451      * @return string representation of the code point, null if code point is not
4452      *         defined in unicode
4453      * @stable ICU 2.1
4454      */
toString(int ch)4455     public static String toString(int ch)
4456     {
4457         if (ch < MIN_VALUE || ch > MAX_VALUE) {
4458             return null;
4459         }
4460 
4461         if (ch < SUPPLEMENTARY_MIN_VALUE) {
4462             return String.valueOf((char)ch);
4463         }
4464 
4465         return new String(Character.toChars(ch));
4466     }
4467 
4468     /**
4469      * Converts the code point argument to titlecase.
4470      * If no titlecase is available, the uppercase is returned. If no uppercase
4471      * is available, the code point itself is returned.
4472      * Up-to-date Unicode implementation of java.lang.Character.toTitleCase()
4473      *
4474      * <p>This function only returns the simple, single-code point case mapping.
4475      * Full case mappings should be used whenever possible because they produce
4476      * better results by working on whole strings.
4477      * They take into account the string context and the language and can map
4478      * to a result string with a different length as appropriate.
4479      * Full case mappings are applied by the case mapping functions
4480      * that take String parameters rather than code points (int).
4481      * See also the User Guide chapter on C/POSIX migration:
4482      * http://www.icu-project.org/userguide/posix.html#case_mappings
4483      *
4484      * @param ch code point  whose title case is to be retrieved
4485      * @return titlecase code point
4486      * @stable ICU 2.1
4487      */
toTitleCase(int ch)4488     public static int toTitleCase(int ch) {
4489         return UCaseProps.INSTANCE.totitle(ch);
4490     }
4491 
4492     /**
4493      * Converts the character argument to uppercase.
4494      * If no uppercase is available, the character itself is returned.
4495      * Up-to-date Unicode implementation of java.lang.Character.toUpperCase()
4496      *
4497      * <p>This function only returns the simple, single-code point case mapping.
4498      * Full case mappings should be used whenever possible because they produce
4499      * better results by working on whole strings.
4500      * They take into account the string context and the language and can map
4501      * to a result string with a different length as appropriate.
4502      * Full case mappings are applied by the case mapping functions
4503      * that take String parameters rather than code points (int).
4504      * See also the User Guide chapter on C/POSIX migration:
4505      * http://www.icu-project.org/userguide/posix.html#case_mappings
4506      *
4507      * @param ch code point whose uppercase is to be retrieved
4508      * @return uppercase code point
4509      * @stable ICU 2.1
4510      */
toUpperCase(int ch)4511     public static int toUpperCase(int ch) {
4512         return UCaseProps.INSTANCE.toupper(ch);
4513     }
4514 
4515     // extra methods not in java.lang.Character --------------------------
4516 
4517     /**
4518      * {@icu} Determines if the code point is a supplementary character.
4519      * A code point is a supplementary character if and only if it is greater
4520      * than <a href=#SUPPLEMENTARY_MIN_VALUE>SUPPLEMENTARY_MIN_VALUE</a>
4521      * @param ch code point to be determined if it is in the supplementary
4522      *        plane
4523      * @return true if code point is a supplementary character
4524      * @stable ICU 2.1
4525      */
isSupplementary(int ch)4526     public static boolean isSupplementary(int ch)
4527     {
4528         return ch >= UCharacter.SUPPLEMENTARY_MIN_VALUE &&
4529                 ch <= UCharacter.MAX_VALUE;
4530     }
4531 
4532     /**
4533      * {@icu} Determines if the code point is in the BMP plane.
4534      * @param ch code point to be determined if it is not a supplementary
4535      *        character
4536      * @return true if code point is not a supplementary character
4537      * @stable ICU 2.1
4538      */
isBMP(int ch)4539     public static boolean isBMP(int ch)
4540     {
4541         return (ch >= 0 && ch <= LAST_CHAR_MASK_);
4542     }
4543 
4544     /**
4545      * {@icu} Determines whether the specified code point is a printable character
4546      * according to the Unicode standard.
4547      * @param ch code point to be determined if it is printable
4548      * @return true if the code point is a printable character
4549      * @stable ICU 2.1
4550      */
isPrintable(int ch)4551     public static boolean isPrintable(int ch)
4552     {
4553         int cat = getType(ch);
4554         // if props == 0, it will just fall through and return false
4555         return (cat != UCharacterCategory.UNASSIGNED &&
4556                 cat != UCharacterCategory.CONTROL &&
4557                 cat != UCharacterCategory.FORMAT &&
4558                 cat != UCharacterCategory.PRIVATE_USE &&
4559                 cat != UCharacterCategory.SURROGATE &&
4560                 cat != UCharacterCategory.GENERAL_OTHER_TYPES);
4561     }
4562 
4563     /**
4564      * {@icu} Determines whether the specified code point is of base form.
4565      * A code point of base form does not graphically combine with preceding
4566      * characters, and is neither a control nor a format character.
4567      * @param ch code point to be determined if it is of base form
4568      * @return true if the code point is of base form
4569      * @stable ICU 2.1
4570      */
isBaseForm(int ch)4571     public static boolean isBaseForm(int ch)
4572     {
4573         int cat = getType(ch);
4574         // if props == 0, it will just fall through and return false
4575         return cat == UCharacterCategory.DECIMAL_DIGIT_NUMBER ||
4576                 cat == UCharacterCategory.OTHER_NUMBER ||
4577                 cat == UCharacterCategory.LETTER_NUMBER ||
4578                 cat == UCharacterCategory.UPPERCASE_LETTER ||
4579                 cat == UCharacterCategory.LOWERCASE_LETTER ||
4580                 cat == UCharacterCategory.TITLECASE_LETTER ||
4581                 cat == UCharacterCategory.MODIFIER_LETTER ||
4582                 cat == UCharacterCategory.OTHER_LETTER ||
4583                 cat == UCharacterCategory.NON_SPACING_MARK ||
4584                 cat == UCharacterCategory.ENCLOSING_MARK ||
4585                 cat == UCharacterCategory.COMBINING_SPACING_MARK;
4586     }
4587 
4588     /**
4589      * {@icu} Returns the Bidirection property of a code point.
4590      * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional
4591      * property.<br>
4592      * Result returned belongs to the interface
4593      * <a href=UCharacterDirection.html>UCharacterDirection</a>
4594      * @param ch the code point to be determined its direction
4595      * @return direction constant from UCharacterDirection.
4596      * @stable ICU 2.1
4597      */
getDirection(int ch)4598     public static int getDirection(int ch)
4599     {
4600         return UBiDiProps.INSTANCE.getClass(ch);
4601     }
4602 
4603     /**
4604      * Determines whether the code point has the "mirrored" property.
4605      * This property is set for characters that are commonly used in
4606      * Right-To-Left contexts and need to be displayed with a "mirrored"
4607      * glyph.
4608      * @param ch code point whose mirror is to be determined
4609      * @return true if the code point has the "mirrored" property
4610      * @stable ICU 2.1
4611      */
isMirrored(int ch)4612     public static boolean isMirrored(int ch)
4613     {
4614         return UBiDiProps.INSTANCE.isMirrored(ch);
4615     }
4616 
4617     /**
4618      * {@icu} Maps the specified code point to a "mirror-image" code point.
4619      * For code points with the "mirrored" property, implementations sometimes
4620      * need a "poor man's" mapping to another code point such that the default
4621      * glyph may serve as the mirror-image of the default glyph of the
4622      * specified code point.<br>
4623      * This is useful for text conversion to and from codepages with visual
4624      * order, and for displays without glyph selection capabilities.
4625      * @param ch code point whose mirror is to be retrieved
4626      * @return another code point that may serve as a mirror-image substitute,
4627      *         or ch itself if there is no such mapping or ch does not have the
4628      *         "mirrored" property
4629      * @stable ICU 2.1
4630      */
getMirror(int ch)4631     public static int getMirror(int ch)
4632     {
4633         return UBiDiProps.INSTANCE.getMirror(ch);
4634     }
4635 
4636     /**
4637      * {@icu} Maps the specified character to its paired bracket character.
4638      * For Bidi_Paired_Bracket_Type!=None, this is the same as getMirror(int).
4639      * Otherwise c itself is returned.
4640      * See http://www.unicode.org/reports/tr9/
4641      *
4642      * @param c the code point to be mapped
4643      * @return the paired bracket code point,
4644      *         or c itself if there is no such mapping
4645      *         (Bidi_Paired_Bracket_Type=None)
4646      *
4647      * @see UProperty#BIDI_PAIRED_BRACKET
4648      * @see UProperty#BIDI_PAIRED_BRACKET_TYPE
4649      * @see #getMirror(int)
4650      * @stable ICU 52
4651      */
getBidiPairedBracket(int c)4652     public static int getBidiPairedBracket(int c) {
4653         return UBiDiProps.INSTANCE.getPairedBracket(c);
4654     }
4655 
4656     /**
4657      * {@icu} Returns the combining class of the argument codepoint
4658      * @param ch code point whose combining is to be retrieved
4659      * @return the combining class of the codepoint
4660      * @stable ICU 2.1
4661      */
getCombiningClass(int ch)4662     public static int getCombiningClass(int ch)
4663     {
4664         return Normalizer2.getNFDInstance().getCombiningClass(ch);
4665     }
4666 
4667     /**
4668      * {@icu} A code point is illegal if and only if
4669      * <ul>
4670      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
4671      * <li> A surrogate value, 0xD800 to 0xDFFF
4672      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
4673      * </ul>
4674      * Note: legal does not mean that it is assigned in this version of Unicode.
4675      * @param ch code point to determine if it is a legal code point by itself
4676      * @return true if and only if legal.
4677      * @stable ICU 2.1
4678      */
isLegal(int ch)4679     public static boolean isLegal(int ch)
4680     {
4681         if (ch < MIN_VALUE) {
4682             return false;
4683         }
4684         if (ch < Character.MIN_SURROGATE) {
4685             return true;
4686         }
4687         if (ch <= Character.MAX_SURROGATE) {
4688             return false;
4689         }
4690         if (UCharacterUtility.isNonCharacter(ch)) {
4691             return false;
4692         }
4693         return (ch <= MAX_VALUE);
4694     }
4695 
4696     /**
4697      * {@icu} A string is legal iff all its code points are legal.
4698      * A code point is illegal if and only if
4699      * <ul>
4700      * <li> Out of bounds, less than 0 or greater than UCharacter.MAX_VALUE
4701      * <li> A surrogate value, 0xD800 to 0xDFFF
4702      * <li> Not-a-character, having the form 0x xxFFFF or 0x xxFFFE
4703      * </ul>
4704      * Note: legal does not mean that it is assigned in this version of Unicode.
4705      * @param str containing code points to examin
4706      * @return true if and only if legal.
4707      * @stable ICU 2.1
4708      */
isLegal(String str)4709     public static boolean isLegal(String str)
4710     {
4711         int size = str.length();
4712         int codepoint;
4713         for (int i = 0; i < size; i += Character.charCount(codepoint))
4714         {
4715             codepoint = str.codePointAt(i);
4716             if (!isLegal(codepoint)) {
4717                 return false;
4718             }
4719         }
4720         return true;
4721     }
4722 
4723     /**
4724      * {@icu} Returns the version of Unicode data used.
4725      * @return the unicode version number used
4726      * @stable ICU 2.1
4727      */
getUnicodeVersion()4728     public static VersionInfo getUnicodeVersion()
4729     {
4730         return UCharacterProperty.INSTANCE.m_unicodeVersion_;
4731     }
4732 
4733     /**
4734      * {@icu} Returns the most current Unicode name of the argument code point, or
4735      * null if the character is unassigned or outside the range
4736      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
4737      * <br>
4738      * Note calling any methods related to code point names, e.g. get*Name*()
4739      * incurs a one-time initialisation cost to construct the name tables.
4740      * @param ch the code point for which to get the name
4741      * @return most current Unicode name
4742      * @stable ICU 2.1
4743      */
getName(int ch)4744     public static String getName(int ch)
4745     {
4746         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.UNICODE_CHAR_NAME);
4747     }
4748 
4749     /**
4750      * {@icu} Returns the names for each of the characters in a string
4751      * @param s string to format
4752      * @param separator string to go between names
4753      * @return string of names
4754      * @stable ICU 3.8
4755      */
getName(String s, String separator)4756     public static String getName(String s, String separator) {
4757         if (s.length() == 1) { // handle common case
4758             return getName(s.charAt(0));
4759         }
4760         int cp;
4761         StringBuilder sb = new StringBuilder();
4762         for (int i = 0; i < s.length(); i += Character.charCount(cp)) {
4763             cp = s.codePointAt(i);
4764             if (i != 0) sb.append(separator);
4765             sb.append(UCharacter.getName(cp));
4766         }
4767         return sb.toString();
4768     }
4769 
4770     /**
4771      * {@icu} Returns null.
4772      * Used to return the Unicode_1_Name property value which was of little practical value.
4773      * @param ch the code point for which to get the name
4774      * @return null
4775      * @deprecated ICU 49
4776      */
4777     @Deprecated
getName1_0(int ch)4778     public static String getName1_0(int ch)
4779     {
4780         return null;
4781     }
4782 
4783     /**
4784      * {@icu} Returns a name for a valid codepoint. Unlike, getName(int) and
4785      * getName1_0(int), this method will return a name even for codepoints that
4786      * are not assigned a name in UnicodeData.txt.
4787      *
4788      * <p>The names are returned in the following order.
4789      * <ul>
4790      * <li> Most current Unicode name if there is any
4791      * <li> Unicode 1.0 name if there is any
4792      * <li> Extended name in the form of
4793      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g., &lt;noncharacter-fffe&gt;
4794      * </ul>
4795      * Note calling any methods related to code point names, e.g. get*Name*()
4796      * incurs a one-time initialisation cost to construct the name tables.
4797      * @param ch the code point for which to get the name
4798      * @return a name for the argument codepoint
4799      * @stable ICU 2.6
4800      */
getExtendedName(int ch)4801     public static String getExtendedName(int ch) {
4802         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.EXTENDED_CHAR_NAME);
4803     }
4804 
4805     /**
4806      * {@icu} Returns the corrected name from NameAliases.txt if there is one.
4807      * Returns null if the character is unassigned or outside the range
4808      * UCharacter.MIN_VALUE and UCharacter.MAX_VALUE or does not have a name.
4809      * <br>
4810      * Note calling any methods related to code point names, e.g. get*Name*()
4811      * incurs a one-time initialisation cost to construct the name tables.
4812      * @param ch the code point for which to get the name alias
4813      * @return Unicode name alias, or null
4814      * @stable ICU 4.4
4815      */
getNameAlias(int ch)4816     public static String getNameAlias(int ch)
4817     {
4818         return UCharacterName.INSTANCE.getName(ch, UCharacterNameChoice.CHAR_NAME_ALIAS);
4819     }
4820 
4821     /**
4822      * {@icu} Returns null.
4823      * Used to return the ISO 10646 comment for a character.
4824      * The Unicode ISO_Comment property is deprecated and has no values.
4825      *
4826      * @param ch The code point for which to get the ISO comment.
4827      *           It must be the case that {@code 0 <= ch <= 0x10ffff}.
4828      * @return null
4829      * @deprecated ICU 49
4830      */
4831     @Deprecated
getISOComment(int ch)4832     public static String getISOComment(int ch)
4833     {
4834         return null;
4835     }
4836 
4837     /**
4838      * {@icu} <p>Finds a Unicode code point by its most current Unicode name and
4839      * return its code point value. All Unicode names are in uppercase.
4840      * Note calling any methods related to code point names, e.g. get*Name*()
4841      * incurs a one-time initialisation cost to construct the name tables.
4842      * @param name most current Unicode character name whose code point is to
4843      *        be returned
4844      * @return code point or -1 if name is not found
4845      * @stable ICU 2.1
4846      */
getCharFromName(String name)4847     public static int getCharFromName(String name){
4848         return UCharacterName.INSTANCE.getCharFromName(
4849                 UCharacterNameChoice.UNICODE_CHAR_NAME, name);
4850     }
4851 
4852     /**
4853      * {@icu} Returns -1.
4854      * <p>Used to find a Unicode character by its version 1.0 Unicode name and return
4855      * its code point value.
4856      * @param name Unicode 1.0 code point name whose code point is to be
4857      *             returned
4858      * @return -1
4859      * @deprecated ICU 49
4860      * @see #getName1_0(int)
4861      */
4862     @Deprecated
getCharFromName1_0(String name)4863     public static int getCharFromName1_0(String name){
4864         return -1;
4865     }
4866 
4867     /**
4868      * {@icu} <p>Find a Unicode character by either its name and return its code
4869      * point value. All Unicode names are in uppercase.
4870      * Extended names are all lowercase except for numbers and are contained
4871      * within angle brackets.
4872      * The names are searched in the following order
4873      * <ul>
4874      * <li> Most current Unicode name if there is any
4875      * <li> Unicode 1.0 name if there is any
4876      * <li> Extended name in the form of
4877      *      "&lt;codepoint_type-codepoint_hex_digits&gt;". E.g. &lt;noncharacter-FFFE&gt;
4878      * </ul>
4879      * Note calling any methods related to code point names, e.g. get*Name*()
4880      * incurs a one-time initialisation cost to construct the name tables.
4881      * @param name codepoint name
4882      * @return code point associated with the name or -1 if the name is not
4883      *         found.
4884      * @stable ICU 2.6
4885      */
getCharFromExtendedName(String name)4886     public static int getCharFromExtendedName(String name){
4887         return UCharacterName.INSTANCE.getCharFromName(
4888                 UCharacterNameChoice.EXTENDED_CHAR_NAME, name);
4889     }
4890 
4891     /**
4892      * {@icu} <p>Find a Unicode character by its corrected name alias and return
4893      * its code point value. All Unicode names are in uppercase.
4894      * Note calling any methods related to code point names, e.g. get*Name*()
4895      * incurs a one-time initialisation cost to construct the name tables.
4896      * @param name Unicode name alias whose code point is to be returned
4897      * @return code point or -1 if name is not found
4898      * @stable ICU 4.4
4899      */
getCharFromNameAlias(String name)4900     public static int getCharFromNameAlias(String name){
4901         return UCharacterName.INSTANCE.getCharFromName(UCharacterNameChoice.CHAR_NAME_ALIAS, name);
4902     }
4903 
4904     /**
4905      * {@icu} Return the Unicode name for a given property, as given in the
4906      * Unicode database file PropertyAliases.txt.  Most properties
4907      * have more than one name.  The nameChoice determines which one
4908      * is returned.
4909      *
4910      * In addition, this function maps the property
4911      * UProperty.GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
4912      * "General_Category_Mask".  These names are not in
4913      * PropertyAliases.txt.
4914      *
4915      * @param property UProperty selector.
4916      *
4917      * @param nameChoice UProperty.NameChoice selector for which name
4918      * to get.  All properties have a long name.  Most have a short
4919      * name, but some do not.  Unicode allows for additional names; if
4920      * present these will be returned by UProperty.NameChoice.LONG + i,
4921      * where i=1, 2,...
4922      *
4923      * @return a name, or null if Unicode explicitly defines no name
4924      * ("n/a") for a given property/nameChoice.  If a given nameChoice
4925      * throws an exception, then all larger values of nameChoice will
4926      * throw an exception.  If null is returned for a given
4927      * nameChoice, then other nameChoice values may return non-null
4928      * results.
4929      *
4930      * @exception IllegalArgumentException thrown if property or
4931      * nameChoice are invalid.
4932      *
4933      * @see UProperty
4934      * @see UProperty.NameChoice
4935      * @stable ICU 2.4
4936      */
getPropertyName(int property, int nameChoice)4937     public static String getPropertyName(int property,
4938             int nameChoice) {
4939         return UPropertyAliases.INSTANCE.getPropertyName(property, nameChoice);
4940     }
4941 
4942     /**
4943      * {@icu} Return the UProperty selector for a given property name, as
4944      * specified in the Unicode database file PropertyAliases.txt.
4945      * Short, long, and any other variants are recognized.
4946      *
4947      * In addition, this function maps the synthetic names "gcm" /
4948      * "General_Category_Mask" to the property
4949      * UProperty.GENERAL_CATEGORY_MASK.  These names are not in
4950      * PropertyAliases.txt.
4951      *
4952      * @param propertyAlias the property name to be matched.  The name
4953      * is compared using "loose matching" as described in
4954      * PropertyAliases.txt.
4955      *
4956      * @return a UProperty enum.
4957      *
4958      * @exception IllegalArgumentException thrown if propertyAlias
4959      * is not recognized.
4960      *
4961      * @see UProperty
4962      * @stable ICU 2.4
4963      */
getPropertyEnum(CharSequence propertyAlias)4964     public static int getPropertyEnum(CharSequence propertyAlias) {
4965         int propEnum = UPropertyAliases.INSTANCE.getPropertyEnum(propertyAlias);
4966         if (propEnum == UProperty.UNDEFINED) {
4967             throw new IllegalIcuArgumentException("Invalid name: " + propertyAlias);
4968         }
4969         return propEnum;
4970     }
4971 
4972     /**
4973      * {@icu} Return the Unicode name for a given property value, as given in
4974      * the Unicode database file PropertyValueAliases.txt.  Most
4975      * values have more than one name.  The nameChoice determines
4976      * which one is returned.
4977      *
4978      * Note: Some of the names in PropertyValueAliases.txt can only be
4979      * retrieved using UProperty.GENERAL_CATEGORY_MASK, not
4980      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
4981      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
4982      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
4983      *
4984      * @param property UProperty selector constant.
4985      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
4986      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
4987      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
4988      * If out of range, null is returned.
4989      *
4990      * @param value selector for a value for the given property.  In
4991      * general, valid values range from 0 up to some maximum.  There
4992      * are a few exceptions: (1.) UProperty.BLOCK values begin at the
4993      * non-zero value BASIC_LATIN.getID().  (2.)
4994      * UProperty.CANONICAL_COMBINING_CLASS values are not contiguous
4995      * and range from 0..240.  (3.)  UProperty.GENERAL_CATEGORY_MASK values
4996      * are mask values produced by left-shifting 1 by
4997      * UCharacter.getType().  This allows grouped categories such as
4998      * [:L:] to be represented.  Mask values are non-contiguous.
4999      *
5000      * @param nameChoice UProperty.NameChoice selector for which name
5001      * to get.  All values have a long name.  Most have a short name,
5002      * but some do not.  Unicode allows for additional names; if
5003      * present these will be returned by UProperty.NameChoice.LONG + i,
5004      * where i=1, 2,...
5005      *
5006      * @return a name, or null if Unicode explicitly defines no name
5007      * ("n/a") for a given property/value/nameChoice.  If a given
5008      * nameChoice throws an exception, then all larger values of
5009      * nameChoice will throw an exception.  If null is returned for a
5010      * given nameChoice, then other nameChoice values may return
5011      * non-null results.
5012      *
5013      * @exception IllegalArgumentException thrown if property, value,
5014      * or nameChoice are invalid.
5015      *
5016      * @see UProperty
5017      * @see UProperty.NameChoice
5018      * @stable ICU 2.4
5019      */
getPropertyValueName(int property, int value, int nameChoice)5020     public static String getPropertyValueName(int property,
5021             int value,
5022             int nameChoice)
5023     {
5024         if ((property == UProperty.CANONICAL_COMBINING_CLASS
5025                 || property == UProperty.LEAD_CANONICAL_COMBINING_CLASS
5026                 || property == UProperty.TRAIL_CANONICAL_COMBINING_CLASS)
5027                 && value >= UCharacter.getIntPropertyMinValue(
5028                         UProperty.CANONICAL_COMBINING_CLASS)
5029                         && value <= UCharacter.getIntPropertyMaxValue(
5030                                 UProperty.CANONICAL_COMBINING_CLASS)
5031                                 && nameChoice >= 0 && nameChoice < UProperty.NameChoice.COUNT) {
5032             // this is hard coded for the valid cc
5033             // because PropertyValueAliases.txt does not contain all of them
5034             try {
5035                 return UPropertyAliases.INSTANCE.getPropertyValueName(property, value,
5036                         nameChoice);
5037             }
5038             catch (IllegalArgumentException e) {
5039                 return null;
5040             }
5041         }
5042         return UPropertyAliases.INSTANCE.getPropertyValueName(property, value, nameChoice);
5043     }
5044 
5045     /**
5046      * {@icu} Return the property value integer for a given value name, as
5047      * specified in the Unicode database file PropertyValueAliases.txt.
5048      * Short, long, and any other variants are recognized.
5049      *
5050      * Note: Some of the names in PropertyValueAliases.txt will only be
5051      * recognized with UProperty.GENERAL_CATEGORY_MASK, not
5052      * UProperty.GENERAL_CATEGORY.  These include: "C" / "Other", "L" /
5053      * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
5054      * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
5055      *
5056      * @param property UProperty selector constant.
5057      * UProperty.INT_START &lt;= property &lt; UProperty.INT_LIMIT or
5058      * UProperty.BINARY_START &lt;= property &lt; UProperty.BINARY_LIMIT or
5059      * UProperty.MASK_START &lt; = property &lt; UProperty.MASK_LIMIT.
5060      * Only these properties can be enumerated.
5061      *
5062      * @param valueAlias the value name to be matched.  The name is
5063      * compared using "loose matching" as described in
5064      * PropertyValueAliases.txt.
5065      *
5066      * @return a value integer.  Note: UProperty.GENERAL_CATEGORY
5067      * values are mask values produced by left-shifting 1 by
5068      * UCharacter.getType().  This allows grouped categories such as
5069      * [:L:] to be represented.
5070      *
5071      * @see UProperty
5072      * @throws IllegalArgumentException if property is not a valid UProperty
5073      *         selector or valueAlias is not a value of this property
5074      * @stable ICU 2.4
5075      */
getPropertyValueEnum(int property, CharSequence valueAlias)5076     public static int getPropertyValueEnum(int property, CharSequence valueAlias) {
5077         int propEnum = UPropertyAliases.INSTANCE.getPropertyValueEnum(property, valueAlias);
5078         if (propEnum == UProperty.UNDEFINED) {
5079             throw new IllegalIcuArgumentException("Invalid name: " + valueAlias);
5080         }
5081         return propEnum;
5082     }
5083 
5084     /**
5085      * Same as {@link #getPropertyValueEnum(int, CharSequence)}, except doesn't throw exception. Instead, returns UProperty.UNDEFINED.
5086      * @param property  Same as {@link #getPropertyValueEnum(int, CharSequence)}
5087      * @param valueAlias    Same as {@link #getPropertyValueEnum(int, CharSequence)}
5088      * @return returns UProperty.UNDEFINED if the value is not valid, otherwise the value.
5089      * @internal
5090      * @deprecated This API is ICU internal only.
5091      */
5092     @Deprecated
getPropertyValueEnumNoThrow(int property, CharSequence valueAlias)5093     public static int getPropertyValueEnumNoThrow(int property, CharSequence valueAlias) {
5094         return UPropertyAliases.INSTANCE.getPropertyValueEnumNoThrow(property, valueAlias);
5095     }
5096 
5097 
5098     /**
5099      * {@icu} Returns a code point corresponding to the two surrogate code units.
5100      *
5101      * @param lead the lead char
5102      * @param trail the trail char
5103      * @return code point if surrogate characters are valid.
5104      * @exception IllegalArgumentException thrown when the code units do
5105      *            not form a valid code point
5106      * @stable ICU 2.1
5107      */
getCodePoint(char lead, char trail)5108     public static int getCodePoint(char lead, char trail)
5109     {
5110         if (Character.isSurrogatePair(lead, trail)) {
5111             return Character.toCodePoint(lead, trail);
5112         }
5113         throw new IllegalArgumentException("Illegal surrogate characters");
5114     }
5115 
5116     /**
5117      * {@icu} Returns the code point corresponding to the BMP code point.
5118      *
5119      * @param char16 the BMP code point
5120      * @return code point if argument is a valid character.
5121      * @exception IllegalArgumentException thrown when char16 is not a valid
5122      *            code point
5123      * @stable ICU 2.1
5124      */
getCodePoint(char char16)5125     public static int getCodePoint(char char16)
5126     {
5127         if (UCharacter.isLegal(char16)) {
5128             return char16;
5129         }
5130         throw new IllegalArgumentException("Illegal codepoint");
5131     }
5132 
5133     /**
5134      * Returns the uppercase version of the argument string.
5135      * Casing is dependent on the default locale and context-sensitive.
5136      * @param str source string to be performed on
5137      * @return uppercase version of the argument string
5138      * @stable ICU 2.1
5139      */
toUpperCase(String str)5140     public static String toUpperCase(String str)
5141     {
5142         return CaseMapImpl.toUpper(getDefaultCaseLocale(), 0, str);
5143     }
5144 
5145     /**
5146      * Returns the lowercase version of the argument string.
5147      * Casing is dependent on the default locale and context-sensitive
5148      * @param str source string to be performed on
5149      * @return lowercase version of the argument string
5150      * @stable ICU 2.1
5151      */
toLowerCase(String str)5152     public static String toLowerCase(String str)
5153     {
5154         return CaseMapImpl.toLower(getDefaultCaseLocale(), 0, str);
5155     }
5156 
5157     /**
5158      * <p>Returns the titlecase version of the argument string.
5159      * <p>Position for titlecasing is determined by the argument break
5160      * iterator, hence the user can customize his break iterator for
5161      * a specialized titlecasing. In this case only the forward iteration
5162      * needs to be implemented.
5163      * If the break iterator passed in is null, the default Unicode algorithm
5164      * will be used to determine the titlecase positions.
5165      *
5166      * <p>Only positions returned by the break iterator will be title cased,
5167      * character in between the positions will all be in lower case.
5168      * <p>Casing is dependent on the default locale and context-sensitive
5169      * @param str source string to be performed on
5170      * @param breakiter break iterator to determine the positions in which
5171      *        the character should be title cased.
5172      * @return titlecase version of the argument string
5173      * @stable ICU 2.6
5174      */
toTitleCase(String str, BreakIterator breakiter)5175     public static String toTitleCase(String str, BreakIterator breakiter)
5176     {
5177         return toTitleCase(Locale.getDefault(), str, breakiter, 0);
5178     }
5179 
getDefaultCaseLocale()5180     private static int getDefaultCaseLocale() {
5181         return UCaseProps.getCaseLocale(Locale.getDefault());
5182     }
5183 
getCaseLocale(Locale locale)5184     private static int getCaseLocale(Locale locale) {
5185         if (locale == null) {
5186             locale = Locale.getDefault();
5187         }
5188         return UCaseProps.getCaseLocale(locale);
5189     }
5190 
getCaseLocale(ULocale locale)5191     private static int getCaseLocale(ULocale locale) {
5192         if (locale == null) {
5193             locale = ULocale.getDefault();
5194         }
5195         return UCaseProps.getCaseLocale(locale);
5196     }
5197 
5198     /**
5199      * Returns the uppercase version of the argument string.
5200      * Casing is dependent on the argument locale and context-sensitive.
5201      * @param locale which string is to be converted in
5202      * @param str source string to be performed on
5203      * @return uppercase version of the argument string
5204      * @stable ICU 2.1
5205      */
toUpperCase(Locale locale, String str)5206     public static String toUpperCase(Locale locale, String str)
5207     {
5208         return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str);
5209     }
5210 
5211     /**
5212      * Returns the uppercase version of the argument string.
5213      * Casing is dependent on the argument locale and context-sensitive.
5214      * @param locale which string is to be converted in
5215      * @param str source string to be performed on
5216      * @return uppercase version of the argument string
5217      * @stable ICU 3.2
5218      */
toUpperCase(ULocale locale, String str)5219     public static String toUpperCase(ULocale locale, String str) {
5220         return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str);
5221     }
5222 
5223     /**
5224      * Returns the lowercase version of the argument string.
5225      * Casing is dependent on the argument locale and context-sensitive
5226      * @param locale which string is to be converted in
5227      * @param str source string to be performed on
5228      * @return lowercase version of the argument string
5229      * @stable ICU 2.1
5230      */
toLowerCase(Locale locale, String str)5231     public static String toLowerCase(Locale locale, String str)
5232     {
5233         return CaseMapImpl.toLower(getCaseLocale(locale), 0, str);
5234     }
5235 
5236     /**
5237      * Returns the lowercase version of the argument string.
5238      * Casing is dependent on the argument locale and context-sensitive
5239      * @param locale which string is to be converted in
5240      * @param str source string to be performed on
5241      * @return lowercase version of the argument string
5242      * @stable ICU 3.2
5243      */
toLowerCase(ULocale locale, String str)5244     public static String toLowerCase(ULocale locale, String str) {
5245         return CaseMapImpl.toLower(getCaseLocale(locale), 0, str);
5246     }
5247 
5248     /**
5249      * <p>Returns the titlecase version of the argument string.
5250      * <p>Position for titlecasing is determined by the argument break
5251      * iterator, hence the user can customize his break iterator for
5252      * a specialized titlecasing. In this case only the forward iteration
5253      * needs to be implemented.
5254      * If the break iterator passed in is null, the default Unicode algorithm
5255      * will be used to determine the titlecase positions.
5256      *
5257      * <p>Only positions returned by the break iterator will be title cased,
5258      * character in between the positions will all be in lower case.
5259      * <p>Casing is dependent on the argument locale and context-sensitive
5260      * @param locale which string is to be converted in
5261      * @param str source string to be performed on
5262      * @param breakiter break iterator to determine the positions in which
5263      *        the character should be title cased.
5264      * @return titlecase version of the argument string
5265      * @stable ICU 2.6
5266      */
toTitleCase(Locale locale, String str, BreakIterator breakiter)5267     public static String toTitleCase(Locale locale, String str,
5268             BreakIterator breakiter)
5269     {
5270         return toTitleCase(locale, str, breakiter, 0);
5271     }
5272 
5273     /**
5274      * <p>Returns the titlecase version of the argument string.
5275      * <p>Position for titlecasing is determined by the argument break
5276      * iterator, hence the user can customize his break iterator for
5277      * a specialized titlecasing. In this case only the forward iteration
5278      * needs to be implemented.
5279      * If the break iterator passed in is null, the default Unicode algorithm
5280      * will be used to determine the titlecase positions.
5281      *
5282      * <p>Only positions returned by the break iterator will be title cased,
5283      * character in between the positions will all be in lower case.
5284      * <p>Casing is dependent on the argument locale and context-sensitive
5285      * @param locale which string is to be converted in
5286      * @param str source string to be performed on
5287      * @param titleIter break iterator to determine the positions in which
5288      *        the character should be title cased.
5289      * @return titlecase version of the argument string
5290      * @stable ICU 3.2
5291      */
toTitleCase(ULocale locale, String str, BreakIterator titleIter)5292     public static String toTitleCase(ULocale locale, String str,
5293             BreakIterator titleIter) {
5294         return toTitleCase(locale, str, titleIter, 0);
5295     }
5296 
5297     /**
5298      * <p>Returns the titlecase version of the argument string.
5299      * <p>Position for titlecasing is determined by the argument break
5300      * iterator, hence the user can customize his break iterator for
5301      * a specialized titlecasing. In this case only the forward iteration
5302      * needs to be implemented.
5303      * If the break iterator passed in is null, the default Unicode algorithm
5304      * will be used to determine the titlecase positions.
5305      *
5306      * <p>Only positions returned by the break iterator will be title cased,
5307      * character in between the positions will all be in lower case.
5308      * <p>Casing is dependent on the argument locale and context-sensitive
5309      * @param locale which string is to be converted in
5310      * @param str source string to be performed on
5311      * @param titleIter break iterator to determine the positions in which
5312      *        the character should be title cased.
5313      * @param options bit set to modify the titlecasing operation
5314      * @return titlecase version of the argument string
5315      * @stable ICU 3.8
5316      * @see #TITLECASE_NO_LOWERCASE
5317      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
5318      */
toTitleCase(ULocale locale, String str, BreakIterator titleIter, int options)5319     public static String toTitleCase(ULocale locale, String str,
5320             BreakIterator titleIter, int options) {
5321         if (titleIter == null && locale == null) {
5322             locale = ULocale.getDefault();
5323         }
5324         titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter);
5325         titleIter.setText(str);
5326         return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
5327     }
5328 
5329     /**
5330      * {@icu} <p>Returns the titlecase version of the argument string.
5331      * <p>Position for titlecasing is determined by the argument break
5332      * iterator, hence the user can customize his break iterator for
5333      * a specialized titlecasing. In this case only the forward iteration
5334      * needs to be implemented.
5335      * If the break iterator passed in is null, the default Unicode algorithm
5336      * will be used to determine the titlecase positions.
5337      *
5338      * <p>Only positions returned by the break iterator will be title cased,
5339      * character in between the positions will all be in lower case.
5340      * <p>Casing is dependent on the argument locale and context-sensitive
5341      * @param locale which string is to be converted in
5342      * @param str source string to be performed on
5343      * @param titleIter break iterator to determine the positions in which
5344      *        the character should be title cased.
5345      * @param options bit set to modify the titlecasing operation
5346      * @return titlecase version of the argument string
5347      * @see #TITLECASE_NO_LOWERCASE
5348      * @see #TITLECASE_NO_BREAK_ADJUSTMENT
5349      * @stable ICU 54
5350      */
toTitleCase(Locale locale, String str, BreakIterator titleIter, int options)5351     public static String toTitleCase(Locale locale, String str,
5352             BreakIterator titleIter,
5353             int options) {
5354         if (titleIter == null && locale == null) {
5355             locale = Locale.getDefault();
5356         }
5357         titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter);
5358         titleIter.setText(str);
5359         return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str);
5360     }
5361 
5362     /**
5363      * {@icu} The given character is mapped to its case folding equivalent according
5364      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
5365      * folding equivalent, the character itself is returned.
5366      *
5367      * <p>This function only returns the simple, single-code point case mapping.
5368      * Full case mappings should be used whenever possible because they produce
5369      * better results by working on whole strings.
5370      * They can map to a result string with a different length as appropriate.
5371      * Full case mappings are applied by the case mapping functions
5372      * that take String parameters rather than code points (int).
5373      * See also the User Guide chapter on C/POSIX migration:
5374      * http://www.icu-project.org/userguide/posix.html#case_mappings
5375      *
5376      * @param ch             the character to be converted
5377      * @param defaultmapping Indicates whether the default mappings defined in
5378      *                       CaseFolding.txt are to be used, otherwise the
5379      *                       mappings for dotted I and dotless i marked with
5380      *                       'T' in CaseFolding.txt are included.
5381      * @return               the case folding equivalent of the character, if
5382      *                       any; otherwise the character itself.
5383      * @see                  #foldCase(String, boolean)
5384      * @stable ICU 2.1
5385      */
foldCase(int ch, boolean defaultmapping)5386     public static int foldCase(int ch, boolean defaultmapping) {
5387         return foldCase(ch, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
5388     }
5389 
5390     /**
5391      * {@icu} The given string is mapped to its case folding equivalent according to
5392      * UnicodeData.txt and CaseFolding.txt; if any character has no case
5393      * folding equivalent, the character itself is returned.
5394      * "Full", multiple-code point case folding mappings are returned here.
5395      * For "simple" single-code point mappings use the API
5396      * foldCase(int ch, boolean defaultmapping).
5397      * @param str            the String to be converted
5398      * @param defaultmapping Indicates whether the default mappings defined in
5399      *                       CaseFolding.txt are to be used, otherwise the
5400      *                       mappings for dotted I and dotless i marked with
5401      *                       'T' in CaseFolding.txt are included.
5402      * @return               the case folding equivalent of the character, if
5403      *                       any; otherwise the character itself.
5404      * @see                  #foldCase(int, boolean)
5405      * @stable ICU 2.1
5406      */
foldCase(String str, boolean defaultmapping)5407     public static String foldCase(String str, boolean defaultmapping) {
5408         return foldCase(str, defaultmapping ? FOLD_CASE_DEFAULT : FOLD_CASE_EXCLUDE_SPECIAL_I);
5409     }
5410 
5411     /**
5412      * {@icu} Option value for case folding: use default mappings defined in
5413      * CaseFolding.txt.
5414      * @stable ICU 2.6
5415      */
5416     public static final int FOLD_CASE_DEFAULT    =      0x0000;
5417     /**
5418      * {@icu} Option value for case folding:
5419      * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
5420      * and dotless i appropriately for Turkic languages (tr, az).
5421      *
5422      * <p>Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
5423      * are to be included for default mappings and
5424      * excluded for the Turkic-specific mappings.
5425      *
5426      * <p>Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
5427      * are to be excluded for default mappings and
5428      * included for the Turkic-specific mappings.
5429      *
5430      * @stable ICU 2.6
5431      */
5432     public static final int FOLD_CASE_EXCLUDE_SPECIAL_I = 0x0001;
5433 
5434     /**
5435      * {@icu} The given character is mapped to its case folding equivalent according
5436      * to UnicodeData.txt and CaseFolding.txt; if the character has no case
5437      * folding equivalent, the character itself is returned.
5438      *
5439      * <p>This function only returns the simple, single-code point case mapping.
5440      * Full case mappings should be used whenever possible because they produce
5441      * better results by working on whole strings.
5442      * They can map to a result string with a different length as appropriate.
5443      * Full case mappings are applied by the case mapping functions
5444      * that take String parameters rather than code points (int).
5445      * See also the User Guide chapter on C/POSIX migration:
5446      * http://www.icu-project.org/userguide/posix.html#case_mappings
5447      *
5448      * @param ch the character to be converted
5449      * @param options A bit set for special processing. Currently the recognised options
5450      * are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
5451      * @return the case folding equivalent of the character, if any; otherwise the
5452      * character itself.
5453      * @see #foldCase(String, boolean)
5454      * @stable ICU 2.6
5455      */
foldCase(int ch, int options)5456     public static int foldCase(int ch, int options) {
5457         return UCaseProps.INSTANCE.fold(ch, options);
5458     }
5459 
5460     /**
5461      * {@icu} The given string is mapped to its case folding equivalent according to
5462      * UnicodeData.txt and CaseFolding.txt; if any character has no case
5463      * folding equivalent, the character itself is returned.
5464      * "Full", multiple-code point case folding mappings are returned here.
5465      * For "simple" single-code point mappings use the API
5466      * foldCase(int ch, boolean defaultmapping).
5467      * @param str the String to be converted
5468      * @param options A bit set for special processing. Currently the recognised options
5469      *                are FOLD_CASE_EXCLUDE_SPECIAL_I and FOLD_CASE_DEFAULT
5470      * @return the case folding equivalent of the character, if any; otherwise the
5471      *         character itself.
5472      * @see #foldCase(int, boolean)
5473      * @stable ICU 2.6
5474      */
foldCase(String str, int options)5475     public static final String foldCase(String str, int options) {
5476         return CaseMapImpl.fold(options, str);
5477     }
5478 
5479     /**
5480      * {@icu} Returns the numeric value of a Han character.
5481      *
5482      * <p>This returns the value of Han 'numeric' code points,
5483      * including those for zero, ten, hundred, thousand, ten thousand,
5484      * and hundred million.
5485      * This includes both the standard and 'checkwriting'
5486      * characters, the 'big circle' zero character, and the standard
5487      * zero character.
5488      *
5489      * <p>Note: The Unicode Standard has numeric values for more
5490      * Han characters recognized by this method
5491      * (see {@link #getNumericValue(int)} and the UCD file DerivedNumericValues.txt),
5492      * and a {@link com.ibm.icu.text.NumberFormat} can be used with
5493      * a Chinese {@link com.ibm.icu.text.NumberingSystem}.
5494      *
5495      * @param ch code point to query
5496      * @return value if it is a Han 'numeric character,' otherwise return -1.
5497      * @stable ICU 2.4
5498      */
getHanNumericValue(int ch)5499     public static int getHanNumericValue(int ch)
5500     {
5501         switch(ch)
5502         {
5503         case IDEOGRAPHIC_NUMBER_ZERO_ :
5504         case CJK_IDEOGRAPH_COMPLEX_ZERO_ :
5505             return 0; // Han Zero
5506         case CJK_IDEOGRAPH_FIRST_ :
5507         case CJK_IDEOGRAPH_COMPLEX_ONE_ :
5508             return 1; // Han One
5509         case CJK_IDEOGRAPH_SECOND_ :
5510         case CJK_IDEOGRAPH_COMPLEX_TWO_ :
5511             return 2; // Han Two
5512         case CJK_IDEOGRAPH_THIRD_ :
5513         case CJK_IDEOGRAPH_COMPLEX_THREE_ :
5514             return 3; // Han Three
5515         case CJK_IDEOGRAPH_FOURTH_ :
5516         case CJK_IDEOGRAPH_COMPLEX_FOUR_ :
5517             return 4; // Han Four
5518         case CJK_IDEOGRAPH_FIFTH_ :
5519         case CJK_IDEOGRAPH_COMPLEX_FIVE_ :
5520             return 5; // Han Five
5521         case CJK_IDEOGRAPH_SIXTH_ :
5522         case CJK_IDEOGRAPH_COMPLEX_SIX_ :
5523             return 6; // Han Six
5524         case CJK_IDEOGRAPH_SEVENTH_ :
5525         case CJK_IDEOGRAPH_COMPLEX_SEVEN_ :
5526             return 7; // Han Seven
5527         case CJK_IDEOGRAPH_EIGHTH_ :
5528         case CJK_IDEOGRAPH_COMPLEX_EIGHT_ :
5529             return 8; // Han Eight
5530         case CJK_IDEOGRAPH_NINETH_ :
5531         case CJK_IDEOGRAPH_COMPLEX_NINE_ :
5532             return 9; // Han Nine
5533         case CJK_IDEOGRAPH_TEN_ :
5534         case CJK_IDEOGRAPH_COMPLEX_TEN_ :
5535             return 10;
5536         case CJK_IDEOGRAPH_HUNDRED_ :
5537         case CJK_IDEOGRAPH_COMPLEX_HUNDRED_ :
5538             return 100;
5539         case CJK_IDEOGRAPH_THOUSAND_ :
5540         case CJK_IDEOGRAPH_COMPLEX_THOUSAND_ :
5541             return 1000;
5542         case CJK_IDEOGRAPH_TEN_THOUSAND_ :
5543             return 10000;
5544         case CJK_IDEOGRAPH_HUNDRED_MILLION_ :
5545             return 100000000;
5546         }
5547         return -1; // no value
5548     }
5549 
5550     /**
5551      * {@icu} <p>Returns an iterator for character types, iterating over codepoints.
5552      * <p>Example of use:<br>
5553      * <pre>
5554      * RangeValueIterator iterator = UCharacter.getTypeIterator();
5555      * RangeValueIterator.Element element = new RangeValueIterator.Element();
5556      * while (iterator.next(element)) {
5557      *     System.out.println("Codepoint \\u" +
5558      *                        Integer.toHexString(element.start) +
5559      *                        " to codepoint \\u" +
5560      *                        Integer.toHexString(element.limit - 1) +
5561      *                        " has the character type " +
5562      *                        element.value);
5563      * }
5564      * </pre>
5565      * @return an iterator
5566      * @stable ICU 2.6
5567      */
getTypeIterator()5568     public static RangeValueIterator getTypeIterator()
5569     {
5570         return new UCharacterTypeIterator();
5571     }
5572 
5573     private static final class UCharacterTypeIterator implements RangeValueIterator {
UCharacterTypeIterator()5574         UCharacterTypeIterator() {
5575             reset();
5576         }
5577 
5578         // implements RangeValueIterator
5579         @Override
next(Element element)5580         public boolean next(Element element) {
5581             if(trieIterator.hasNext() && !(range=trieIterator.next()).leadSurrogate) {
5582                 element.start=range.startCodePoint;
5583                 element.limit=range.endCodePoint+1;
5584                 element.value=range.value;
5585                 return true;
5586             } else {
5587                 return false;
5588             }
5589         }
5590 
5591         // implements RangeValueIterator
5592         @Override
reset()5593         public void reset() {
5594             trieIterator=UCharacterProperty.INSTANCE.m_trie_.iterator(MASK_TYPE);
5595         }
5596 
5597         private Iterator<Trie2.Range> trieIterator;
5598         private Trie2.Range range;
5599 
5600         private static final class MaskType implements Trie2.ValueMapper {
5601             // Extracts the general category ("character type") from the trie value.
5602             @Override
map(int value)5603             public int map(int value) {
5604                 return value & UCharacterProperty.TYPE_MASK;
5605             }
5606         }
5607         private static final MaskType MASK_TYPE=new MaskType();
5608     }
5609 
5610     /**
5611      * {@icu} <p>Returns an iterator for character names, iterating over codepoints.
5612      * <p>This API only gets the iterator for the modern, most up-to-date
5613      * Unicode names. For older 1.0 Unicode names use get1_0NameIterator() or
5614      * for extended names use getExtendedNameIterator().
5615      * <p>Example of use:<br>
5616      * <pre>
5617      * ValueIterator iterator = UCharacter.getNameIterator();
5618      * ValueIterator.Element element = new ValueIterator.Element();
5619      * while (iterator.next(element)) {
5620      *     System.out.println("Codepoint \\u" +
5621      *                        Integer.toHexString(element.codepoint) +
5622      *                        " has the name " + (String)element.value);
5623      * }
5624      * </pre>
5625      * <p>The maximal range which the name iterator iterates is from
5626      * UCharacter.MIN_VALUE to UCharacter.MAX_VALUE.
5627      * @return an iterator
5628      * @stable ICU 2.6
5629      */
getNameIterator()5630     public static ValueIterator getNameIterator(){
5631         return new UCharacterNameIterator(UCharacterName.INSTANCE,
5632                 UCharacterNameChoice.UNICODE_CHAR_NAME);
5633     }
5634 
5635     /**
5636      * {@icu} Returns an empty iterator.
5637      * <p>Used to return an iterator for the older 1.0 Unicode character names, iterating over codepoints.
5638      * @return an empty iterator
5639      * @deprecated ICU 49
5640      * @see #getName1_0(int)
5641      */
5642     @Deprecated
getName1_0Iterator()5643     public static ValueIterator getName1_0Iterator(){
5644         return new DummyValueIterator();
5645     }
5646 
5647     private static final class DummyValueIterator implements ValueIterator {
5648         @Override
next(Element element)5649         public boolean next(Element element) { return false; }
5650         @Override
reset()5651         public void reset() {}
5652         @Override
setRange(int start, int limit)5653         public void setRange(int start, int limit) {}
5654     }
5655 
5656     /**
5657      * {@icu} <p>Returns an iterator for character names, iterating over codepoints.
5658      * <p>This API only gets the iterator for the extended names.
5659      * For modern, most up-to-date Unicode names use getNameIterator() or
5660      * for older 1.0 Unicode names use get1_0NameIterator().
5661      * <p>Example of use:<br>
5662      * <pre>
5663      * ValueIterator iterator = UCharacter.getExtendedNameIterator();
5664      * ValueIterator.Element element = new ValueIterator.Element();
5665      * while (iterator.next(element)) {
5666      *     System.out.println("Codepoint \\u" +
5667      *                        Integer.toHexString(element.codepoint) +
5668      *                        " has the name " + (String)element.value);
5669      * }
5670      * </pre>
5671      * <p>The maximal range which the name iterator iterates is from
5672      * @return an iterator
5673      * @stable ICU 2.6
5674      */
getExtendedNameIterator()5675     public static ValueIterator getExtendedNameIterator(){
5676         return new UCharacterNameIterator(UCharacterName.INSTANCE,
5677                 UCharacterNameChoice.EXTENDED_CHAR_NAME);
5678     }
5679 
5680     /**
5681      * {@icu} Returns the "age" of the code point.
5682      * <p>The "age" is the Unicode version when the code point was first
5683      * designated (as a non-character or for Private Use) or assigned a
5684      * character.
5685      * <p>This can be useful to avoid emitting code points to receiving
5686      * processes that do not accept newer characters.
5687      * <p>The data is from the UCD file DerivedAge.txt.
5688      * @param ch The code point.
5689      * @return the Unicode version number
5690      * @stable ICU 2.6
5691      */
getAge(int ch)5692     public static VersionInfo getAge(int ch)
5693     {
5694         if (ch < MIN_VALUE || ch > MAX_VALUE) {
5695             throw new IllegalArgumentException("Codepoint out of bounds");
5696         }
5697         return UCharacterProperty.INSTANCE.getAge(ch);
5698     }
5699 
5700     /**
5701      * {@icu} Check a binary Unicode property for a code point.
5702      * <p>Unicode, especially in version 3.2, defines many more properties
5703      * than the original set in UnicodeData.txt.
5704      * <p>This API is intended to reflect Unicode properties as defined in
5705      * the Unicode Character Database (UCD) and Unicode Technical Reports
5706      * (UTR).
5707      * <p>For details about the properties see
5708      * <a href=http://www.unicode.org/>http://www.unicode.org/</a>.
5709      * <p>For names of Unicode properties see the UCD file
5710      * PropertyAliases.txt.
5711      * <p>This API does not check the validity of the codepoint.
5712      * <p>Important: If ICU is built with UCD files from Unicode versions
5713      * below 3.2, then properties marked with "new" are not or
5714      * not fully available.
5715      * @param ch code point to test.
5716      * @param property selector constant from com.ibm.icu.lang.UProperty,
5717      *        identifies which binary property to check.
5718      * @return true or false according to the binary Unicode property value
5719      *         for ch. Also false if property is out of bounds or if the
5720      *         Unicode version does not have data for the property at all, or
5721      *         not for this code point.
5722      * @see com.ibm.icu.lang.UProperty
5723      * @see CharacterProperties#getBinaryPropertySet(int)
5724      * @stable ICU 2.6
5725      */
hasBinaryProperty(int ch, int property)5726     public static boolean hasBinaryProperty(int ch, int property)
5727     {
5728         return UCharacterProperty.INSTANCE.hasBinaryProperty(ch, property);
5729     }
5730 
5731     /**
5732      * {@icu} <p>Check if a code point has the Alphabetic Unicode property.
5733      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.ALPHABETIC).
5734      * <p>Different from UCharacter.isLetter(ch)!
5735      * @stable ICU 2.6
5736      * @param ch codepoint to be tested
5737      */
isUAlphabetic(int ch)5738     public static boolean isUAlphabetic(int ch)
5739     {
5740         return hasBinaryProperty(ch, UProperty.ALPHABETIC);
5741     }
5742 
5743     /**
5744      * {@icu} <p>Check if a code point has the Lowercase Unicode property.
5745      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.LOWERCASE).
5746      * <p>This is different from UCharacter.isLowerCase(ch)!
5747      * @param ch codepoint to be tested
5748      * @stable ICU 2.6
5749      */
isULowercase(int ch)5750     public static boolean isULowercase(int ch)
5751     {
5752         return hasBinaryProperty(ch, UProperty.LOWERCASE);
5753     }
5754 
5755     /**
5756      * {@icu} <p>Check if a code point has the Uppercase Unicode property.
5757      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.UPPERCASE).
5758      * <p>This is different from UCharacter.isUpperCase(ch)!
5759      * @param ch codepoint to be tested
5760      * @stable ICU 2.6
5761      */
isUUppercase(int ch)5762     public static boolean isUUppercase(int ch)
5763     {
5764         return hasBinaryProperty(ch, UProperty.UPPERCASE);
5765     }
5766 
5767     /**
5768      * {@icu} <p>Check if a code point has the White_Space Unicode property.
5769      * <p>Same as UCharacter.hasBinaryProperty(ch, UProperty.WHITE_SPACE).
5770      * <p>This is different from both UCharacter.isSpace(ch) and
5771      * UCharacter.isWhitespace(ch)!
5772      * @param ch codepoint to be tested
5773      * @stable ICU 2.6
5774      */
isUWhiteSpace(int ch)5775     public static boolean isUWhiteSpace(int ch)
5776     {
5777         return hasBinaryProperty(ch, UProperty.WHITE_SPACE);
5778     }
5779 
5780     /**
5781      * {@icu} Returns the property value for a Unicode property type of a code point.
5782      * Also returns binary and mask property values.
5783      * <p>Unicode, especially in version 3.2, defines many more properties than
5784      * the original set in UnicodeData.txt.
5785      * <p>The properties APIs are intended to reflect Unicode properties as
5786      * defined in the Unicode Character Database (UCD) and Unicode Technical
5787      * Reports (UTR). For details about the properties see
5788      * http://www.unicode.org/.
5789      * <p>For names of Unicode properties see the UCD file PropertyAliases.txt.
5790      *
5791      * <pre>
5792      * Sample usage:
5793      * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH);
5794      * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC);
5795      * boolean b = (ideo == 1) ? true : false;
5796      * </pre>
5797      * @param ch code point to test.
5798      * @param type UProperty selector constant, identifies which binary
5799      *        property to check. Must be
5800      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5801      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT or
5802      *        UProperty.MASK_START &lt;= type &lt; UProperty.MASK_LIMIT.
5803      * @return numeric value that is directly the property value or,
5804      *         for enumerated properties, corresponds to the numeric value of
5805      *         the enumerated constant of the respective property value type
5806      *         ({@link ECharacterCategory}, {@link ECharacterDirection},
5807      *         {@link DecompositionType}, etc.).
5808      *         Returns 0 or 1 (for false / true) for binary Unicode properties.
5809      *         Returns a bit-mask for mask properties.
5810      *         Returns 0 if 'type' is out of bounds or if the Unicode version
5811      *         does not have data for the property at all, or not for this code
5812      *         point.
5813      * @see UProperty
5814      * @see #hasBinaryProperty
5815      * @see #getIntPropertyMinValue
5816      * @see #getIntPropertyMaxValue
5817      * @see CharacterProperties#getIntPropertyMap(int)
5818      * @see #getUnicodeVersion
5819      * @stable ICU 2.4
5820      */
getIntPropertyValue(int ch, int type)5821     public static int getIntPropertyValue(int ch, int type)
5822     {
5823         return UCharacterProperty.INSTANCE.getIntPropertyValue(ch, type);
5824     }
5825     /**
5826      * {@icu} Returns a string version of the property value.
5827      * @param propertyEnum The property enum value.
5828      * @param codepoint The codepoint value.
5829      * @param nameChoice The choice of the name.
5830      * @return value as string
5831      * @internal
5832      * @deprecated This API is ICU internal only.
5833      */
5834     @Deprecated
5835     ///CLOVER:OFF
getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice)5836     public static String getStringPropertyValue(int propertyEnum, int codepoint, int nameChoice) {
5837         if ((propertyEnum >= UProperty.BINARY_START && propertyEnum < UProperty.BINARY_LIMIT) ||
5838                 (propertyEnum >= UProperty.INT_START && propertyEnum < UProperty.INT_LIMIT)) {
5839             return getPropertyValueName(propertyEnum, getIntPropertyValue(codepoint, propertyEnum),
5840                     nameChoice);
5841         }
5842         if (propertyEnum == UProperty.NUMERIC_VALUE) {
5843             return String.valueOf(getUnicodeNumericValue(codepoint));
5844         }
5845         // otherwise must be string property
5846         switch (propertyEnum) {
5847         case UProperty.AGE: return getAge(codepoint).toString();
5848         case UProperty.ISO_COMMENT: return getISOComment(codepoint);
5849         case UProperty.BIDI_MIRRORING_GLYPH: return toString(getMirror(codepoint));
5850         case UProperty.CASE_FOLDING: return toString(foldCase(codepoint, true));
5851         case UProperty.LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
5852         case UProperty.NAME: return getName(codepoint);
5853         case UProperty.SIMPLE_CASE_FOLDING: return toString(foldCase(codepoint, true));
5854         case UProperty.SIMPLE_LOWERCASE_MAPPING: return toString(toLowerCase(codepoint));
5855         case UProperty.SIMPLE_TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
5856         case UProperty.SIMPLE_UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
5857         case UProperty.TITLECASE_MAPPING: return toString(toTitleCase(codepoint));
5858         case UProperty.UNICODE_1_NAME: return getName1_0(codepoint);
5859         case UProperty.UPPERCASE_MAPPING: return toString(toUpperCase(codepoint));
5860         }
5861         throw new IllegalArgumentException("Illegal Property Enum");
5862     }
5863     ///CLOVER:ON
5864 
5865     /**
5866      * {@icu} Returns the minimum value for an integer/binary Unicode property type.
5867      * Can be used together with UCharacter.getIntPropertyMaxValue(int)
5868      * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
5869      * @param type UProperty selector constant, identifies which binary
5870      *        property to check. Must be
5871      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5872      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5873      * @return Minimum value returned by UCharacter.getIntPropertyValue(int)
5874      *         for a Unicode property. 0 if the property
5875      *         selector 'type' is out of range.
5876      * @see UProperty
5877      * @see #hasBinaryProperty
5878      * @see #getUnicodeVersion
5879      * @see #getIntPropertyMaxValue
5880      * @see #getIntPropertyValue
5881      * @stable ICU 2.4
5882      */
getIntPropertyMinValue(int type)5883     public static int getIntPropertyMinValue(int type){
5884 
5885         return 0; // undefined; and: all other properties have a minimum value of 0
5886     }
5887 
5888 
5889     /**
5890      * {@icu} Returns the maximum value for an integer/binary Unicode property.
5891      * Can be used together with UCharacter.getIntPropertyMinValue(int)
5892      * to allocate arrays of com.ibm.icu.text.UnicodeSet or similar.
5893      * Examples for min/max values (for Unicode 3.2):
5894      * <ul>
5895      * <li> UProperty.BIDI_CLASS:    0/18
5896      * (UCharacterDirection.LEFT_TO_RIGHT/UCharacterDirection.BOUNDARY_NEUTRAL)
5897      * <li> UProperty.SCRIPT:        0/45 (UScript.COMMON/UScript.TAGBANWA)
5898      * <li> UProperty.IDEOGRAPHIC:   0/1  (false/true)
5899      * </ul>
5900      * For undefined UProperty constant values, min/max values will be 0/-1.
5901      * @param type UProperty selector constant, identifies which binary
5902      *        property to check. Must be
5903      *        UProperty.BINARY_START &lt;= type &lt; UProperty.BINARY_LIMIT or
5904      *        UProperty.INT_START &lt;= type &lt; UProperty.INT_LIMIT.
5905      * @return Maximum value returned by u_getIntPropertyValue for a Unicode
5906      *         property. &lt;= 0 if the property selector 'type' is out of range.
5907      * @see UProperty
5908      * @see #hasBinaryProperty
5909      * @see #getUnicodeVersion
5910      * @see #getIntPropertyMaxValue
5911      * @see #getIntPropertyValue
5912      * @stable ICU 2.4
5913      */
getIntPropertyMaxValue(int type)5914     public static int getIntPropertyMaxValue(int type)
5915     {
5916         return UCharacterProperty.INSTANCE.getIntPropertyMaxValue(type);
5917     }
5918 
5919     /**
5920      * Provide the java.lang.Character forDigit API, for convenience.
5921      * @stable ICU 3.0
5922      */
forDigit(int digit, int radix)5923     public static char forDigit(int digit, int radix) {
5924         return java.lang.Character.forDigit(digit, radix);
5925     }
5926 
5927     // JDK 1.5 API coverage
5928 
5929     /**
5930      * Constant U+D800, same as {@link Character#MIN_HIGH_SURROGATE}.
5931      *
5932      * @stable ICU 3.0
5933      */
5934     public static final char MIN_HIGH_SURROGATE = Character.MIN_HIGH_SURROGATE;
5935 
5936     /**
5937      * Constant U+DBFF, same as {@link Character#MAX_HIGH_SURROGATE}.
5938      *
5939      * @stable ICU 3.0
5940      */
5941     public static final char MAX_HIGH_SURROGATE = Character.MAX_HIGH_SURROGATE;
5942 
5943     /**
5944      * Constant U+DC00, same as {@link Character#MIN_LOW_SURROGATE}.
5945      *
5946      * @stable ICU 3.0
5947      */
5948     public static final char MIN_LOW_SURROGATE = Character.MIN_LOW_SURROGATE;
5949 
5950     /**
5951      * Constant U+DFFF, same as {@link Character#MAX_LOW_SURROGATE}.
5952      *
5953      * @stable ICU 3.0
5954      */
5955     public static final char MAX_LOW_SURROGATE = Character.MAX_LOW_SURROGATE;
5956 
5957     /**
5958      * Constant U+D800, same as {@link Character#MIN_SURROGATE}.
5959      *
5960      * @stable ICU 3.0
5961      */
5962     public static final char MIN_SURROGATE = Character.MIN_SURROGATE;
5963 
5964     /**
5965      * Constant U+DFFF, same as {@link Character#MAX_SURROGATE}.
5966      *
5967      * @stable ICU 3.0
5968      */
5969     public static final char MAX_SURROGATE = Character.MAX_SURROGATE;
5970 
5971     /**
5972      * Constant U+10000, same as {@link Character#MIN_SUPPLEMENTARY_CODE_POINT}.
5973      *
5974      * @stable ICU 3.0
5975      */
5976     public static final int MIN_SUPPLEMENTARY_CODE_POINT = Character.MIN_SUPPLEMENTARY_CODE_POINT;
5977 
5978     /**
5979      * Constant U+10FFFF, same as {@link Character#MAX_CODE_POINT}.
5980      *
5981      * @stable ICU 3.0
5982      */
5983     public static final int MAX_CODE_POINT = Character.MAX_CODE_POINT;
5984 
5985     /**
5986      * Constant U+0000, same as {@link Character#MIN_CODE_POINT}.
5987      *
5988      * @stable ICU 3.0
5989      */
5990     public static final int MIN_CODE_POINT = Character.MIN_CODE_POINT;
5991 
5992     /**
5993      * Equivalent to {@link Character#isValidCodePoint}.
5994      *
5995      * @param cp the code point to check
5996      * @return true if cp is a valid code point
5997      * @stable ICU 3.0
5998      */
isValidCodePoint(int cp)5999     public static final boolean isValidCodePoint(int cp) {
6000         return cp >= 0 && cp <= MAX_CODE_POINT;
6001     }
6002 
6003     /**
6004      * Same as {@link Character#isSupplementaryCodePoint}.
6005      *
6006      * @param cp the code point to check
6007      * @return true if cp is a supplementary code point
6008      * @stable ICU 3.0
6009      */
isSupplementaryCodePoint(int cp)6010     public static final boolean isSupplementaryCodePoint(int cp) {
6011         return Character.isSupplementaryCodePoint(cp);
6012     }
6013 
6014     /**
6015      * Same as {@link Character#isHighSurrogate}.
6016      *
6017      * @param ch the char to check
6018      * @return true if ch is a high (lead) surrogate
6019      * @stable ICU 3.0
6020      */
isHighSurrogate(char ch)6021     public static boolean isHighSurrogate(char ch) {
6022         return Character.isHighSurrogate(ch);
6023     }
6024 
6025     /**
6026      * Same as {@link Character#isLowSurrogate}.
6027      *
6028      * @param ch the char to check
6029      * @return true if ch is a low (trail) surrogate
6030      * @stable ICU 3.0
6031      */
isLowSurrogate(char ch)6032     public static boolean isLowSurrogate(char ch) {
6033         return Character.isLowSurrogate(ch);
6034     }
6035 
6036     /**
6037      * Same as {@link Character#isSurrogatePair}.
6038      *
6039      * @param high the high (lead) char
6040      * @param low the low (trail) char
6041      * @return true if high, low form a surrogate pair
6042      * @stable ICU 3.0
6043      */
isSurrogatePair(char high, char low)6044     public static final boolean isSurrogatePair(char high, char low) {
6045         return Character.isSurrogatePair(high, low);
6046     }
6047 
6048     /**
6049      * Same as {@link Character#charCount}.
6050      * Returns the number of chars needed to represent the code point (1 or 2).
6051      * This does not check the code point for validity.
6052      *
6053      * @param cp the code point to check
6054      * @return the number of chars needed to represent the code point
6055      * @stable ICU 3.0
6056      */
charCount(int cp)6057     public static int charCount(int cp) {
6058         return Character.charCount(cp);
6059     }
6060 
6061     /**
6062      * Same as {@link Character#toCodePoint}.
6063      * Returns the code point represented by the two surrogate code units.
6064      * This does not check the surrogate pair for validity.
6065      *
6066      * @param high the high (lead) surrogate
6067      * @param low the low (trail) surrogate
6068      * @return the code point formed by the surrogate pair
6069      * @stable ICU 3.0
6070      */
toCodePoint(char high, char low)6071     public static final int toCodePoint(char high, char low) {
6072         return Character.toCodePoint(high, low);
6073     }
6074 
6075     /**
6076      * Same as {@link Character#codePointAt(CharSequence, int)}.
6077      * Returns the code point at index.
6078      * This examines only the characters at index and index+1.
6079      *
6080      * @param seq the characters to check
6081      * @param index the index of the first or only char forming the code point
6082      * @return the code point at the index
6083      * @stable ICU 3.0
6084      */
codePointAt(CharSequence seq, int index)6085     public static final int codePointAt(CharSequence seq, int index) {
6086         char c1 = seq.charAt(index++);
6087         if (isHighSurrogate(c1)) {
6088             if (index < seq.length()) {
6089                 char c2 = seq.charAt(index);
6090                 if (isLowSurrogate(c2)) {
6091                     return toCodePoint(c1, c2);
6092                 }
6093             }
6094         }
6095         return c1;
6096     }
6097 
6098     /**
6099      * Same as {@link Character#codePointAt(char[], int)}.
6100      * Returns the code point at index.
6101      * This examines only the characters at index and index+1.
6102      *
6103      * @param text the characters to check
6104      * @param index the index of the first or only char forming the code point
6105      * @return the code point at the index
6106      * @stable ICU 3.0
6107      */
codePointAt(char[] text, int index)6108     public static final int codePointAt(char[] text, int index) {
6109         char c1 = text[index++];
6110         if (isHighSurrogate(c1)) {
6111             if (index < text.length) {
6112                 char c2 = text[index];
6113                 if (isLowSurrogate(c2)) {
6114                     return toCodePoint(c1, c2);
6115                 }
6116             }
6117         }
6118         return c1;
6119     }
6120 
6121     /**
6122      * Same as {@link Character#codePointAt(char[], int, int)}.
6123      * Returns the code point at index.
6124      * This examines only the characters at index and index+1.
6125      *
6126      * @param text the characters to check
6127      * @param index the index of the first or only char forming the code point
6128      * @param limit the limit of the valid text
6129      * @return the code point at the index
6130      * @stable ICU 3.0
6131      */
codePointAt(char[] text, int index, int limit)6132     public static final int codePointAt(char[] text, int index, int limit) {
6133         if (index >= limit || limit > text.length) {
6134             throw new IndexOutOfBoundsException();
6135         }
6136         char c1 = text[index++];
6137         if (isHighSurrogate(c1)) {
6138             if (index < limit) {
6139                 char c2 = text[index];
6140                 if (isLowSurrogate(c2)) {
6141                     return toCodePoint(c1, c2);
6142                 }
6143             }
6144         }
6145         return c1;
6146     }
6147 
6148     /**
6149      * Same as {@link Character#codePointBefore(CharSequence, int)}.
6150      * Return the code point before index.
6151      * This examines only the characters at index-1 and index-2.
6152      *
6153      * @param seq the characters to check
6154      * @param index the index after the last or only char forming the code point
6155      * @return the code point before the index
6156      * @stable ICU 3.0
6157      */
codePointBefore(CharSequence seq, int index)6158     public static final int codePointBefore(CharSequence seq, int index) {
6159         char c2 = seq.charAt(--index);
6160         if (isLowSurrogate(c2)) {
6161             if (index > 0) {
6162                 char c1 = seq.charAt(--index);
6163                 if (isHighSurrogate(c1)) {
6164                     return toCodePoint(c1, c2);
6165                 }
6166             }
6167         }
6168         return c2;
6169     }
6170 
6171     /**
6172      * Same as {@link Character#codePointBefore(char[], int)}.
6173      * Returns the code point before index.
6174      * This examines only the characters at index-1 and index-2.
6175      *
6176      * @param text the characters to check
6177      * @param index the index after the last or only char forming the code point
6178      * @return the code point before the index
6179      * @stable ICU 3.0
6180      */
codePointBefore(char[] text, int index)6181     public static final int codePointBefore(char[] text, int index) {
6182         char c2 = text[--index];
6183         if (isLowSurrogate(c2)) {
6184             if (index > 0) {
6185                 char c1 = text[--index];
6186                 if (isHighSurrogate(c1)) {
6187                     return toCodePoint(c1, c2);
6188                 }
6189             }
6190         }
6191         return c2;
6192     }
6193 
6194     /**
6195      * Same as {@link Character#codePointBefore(char[], int, int)}.
6196      * Return the code point before index.
6197      * This examines only the characters at index-1 and index-2.
6198      *
6199      * @param text the characters to check
6200      * @param index the index after the last or only char forming the code point
6201      * @param limit the start of the valid text
6202      * @return the code point before the index
6203      * @stable ICU 3.0
6204      */
codePointBefore(char[] text, int index, int limit)6205     public static final int codePointBefore(char[] text, int index, int limit) {
6206         if (index <= limit || limit < 0) {
6207             throw new IndexOutOfBoundsException();
6208         }
6209         char c2 = text[--index];
6210         if (isLowSurrogate(c2)) {
6211             if (index > limit) {
6212                 char c1 = text[--index];
6213                 if (isHighSurrogate(c1)) {
6214                     return toCodePoint(c1, c2);
6215                 }
6216             }
6217         }
6218         return c2;
6219     }
6220 
6221     /**
6222      * Same as {@link Character#toChars(int, char[], int)}.
6223      * Writes the chars representing the
6224      * code point into the destination at the given index.
6225      *
6226      * @param cp the code point to convert
6227      * @param dst the destination array into which to put the char(s) representing the code point
6228      * @param dstIndex the index at which to put the first (or only) char
6229      * @return the count of the number of chars written (1 or 2)
6230      * @throws IllegalArgumentException if cp is not a valid code point
6231      * @stable ICU 3.0
6232      */
toChars(int cp, char[] dst, int dstIndex)6233     public static final int toChars(int cp, char[] dst, int dstIndex) {
6234         return Character.toChars(cp, dst, dstIndex);
6235     }
6236 
6237     /**
6238      * Same as {@link Character#toChars(int)}.
6239      * Returns a char array representing the code point.
6240      *
6241      * @param cp the code point to convert
6242      * @return an array containing the char(s) representing the code point
6243      * @throws IllegalArgumentException if cp is not a valid code point
6244      * @stable ICU 3.0
6245      */
toChars(int cp)6246     public static final char[] toChars(int cp) {
6247         return Character.toChars(cp);
6248     }
6249 
6250     /**
6251      * Equivalent to the {@link Character#getDirectionality(char)} method, for
6252      * convenience. Returns a byte representing the directionality of the
6253      * character.
6254      *
6255      * {@icunote} Unlike {@link Character#getDirectionality(char)}, this returns
6256      * DIRECTIONALITY_LEFT_TO_RIGHT for undefined or out-of-bounds characters.
6257      *
6258      * {@icunote} The return value must be tested using the constants defined in {@link
6259      * UCharacterDirection} and its interface {@link
6260      * UCharacterEnums.ECharacterDirection} since the values are different from the ones
6261      * defined by <code>java.lang.Character</code>.
6262      * @param cp the code point to check
6263      * @return the directionality of the code point
6264      * @see #getDirection
6265      * @stable ICU 3.0
6266      */
getDirectionality(int cp)6267     public static byte getDirectionality(int cp)
6268     {
6269         return (byte)getDirection(cp);
6270     }
6271 
6272     /**
6273      * Equivalent to the {@link Character#codePointCount(CharSequence, int, int)}
6274      * method, for convenience.  Counts the number of code points in the range
6275      * of text.
6276      * @param text the characters to check
6277      * @param start the start of the range
6278      * @param limit the limit of the range
6279      * @return the number of code points in the range
6280      * @stable ICU 3.0
6281      */
codePointCount(CharSequence text, int start, int limit)6282     public static int codePointCount(CharSequence text, int start, int limit) {
6283         if (start < 0 || limit < start || limit > text.length()) {
6284             throw new IndexOutOfBoundsException("start (" + start +
6285                     ") or limit (" + limit +
6286                     ") invalid or out of range 0, " + text.length());
6287         }
6288 
6289         int len = limit - start;
6290         while (limit > start) {
6291             char ch = text.charAt(--limit);
6292             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
6293                 ch = text.charAt(--limit);
6294                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
6295                     --len;
6296                     break;
6297                 }
6298             }
6299         }
6300         return len;
6301     }
6302 
6303     /**
6304      * Equivalent to the {@link Character#codePointCount(char[], int, int)} method, for
6305      * convenience. Counts the number of code points in the range of text.
6306      * @param text the characters to check
6307      * @param start the start of the range
6308      * @param limit the limit of the range
6309      * @return the number of code points in the range
6310      * @stable ICU 3.0
6311      */
codePointCount(char[] text, int start, int limit)6312     public static int codePointCount(char[] text, int start, int limit) {
6313         if (start < 0 || limit < start || limit > text.length) {
6314             throw new IndexOutOfBoundsException("start (" + start +
6315                     ") or limit (" + limit +
6316                     ") invalid or out of range 0, " + text.length);
6317         }
6318 
6319         int len = limit - start;
6320         while (limit > start) {
6321             char ch = text[--limit];
6322             while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && limit > start) {
6323                 ch = text[--limit];
6324                 if (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE) {
6325                     --len;
6326                     break;
6327                 }
6328             }
6329         }
6330         return len;
6331     }
6332 
6333     /**
6334      * Equivalent to the {@link Character#offsetByCodePoints(CharSequence, int, int)}
6335      * method, for convenience.  Adjusts the char index by a code point offset.
6336      * @param text the characters to check
6337      * @param index the index to adjust
6338      * @param codePointOffset the number of code points by which to offset the index
6339      * @return the adjusted index
6340      * @stable ICU 3.0
6341      */
offsetByCodePoints(CharSequence text, int index, int codePointOffset)6342     public static int offsetByCodePoints(CharSequence text, int index, int codePointOffset) {
6343         if (index < 0 || index > text.length()) {
6344             throw new IndexOutOfBoundsException("index ( " + index +
6345                     ") out of range 0, " + text.length());
6346         }
6347 
6348         if (codePointOffset < 0) {
6349             while (++codePointOffset <= 0) {
6350                 char ch = text.charAt(--index);
6351                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > 0) {
6352                     ch = text.charAt(--index);
6353                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
6354                         if (++codePointOffset > 0) {
6355                             return index+1;
6356                         }
6357                     }
6358                 }
6359             }
6360         } else {
6361             int limit = text.length();
6362             while (--codePointOffset >= 0) {
6363                 char ch = text.charAt(index++);
6364                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
6365                     ch = text.charAt(index++);
6366                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
6367                         if (--codePointOffset < 0) {
6368                             return index-1;
6369                         }
6370                     }
6371                 }
6372             }
6373         }
6374 
6375         return index;
6376     }
6377 
6378     /**
6379      * Equivalent to the
6380      * {@link Character#offsetByCodePoints(char[], int, int, int, int)}
6381      * method, for convenience.  Adjusts the char index by a code point offset.
6382      * @param text the characters to check
6383      * @param start the start of the range to check
6384      * @param count the length of the range to check
6385      * @param index the index to adjust
6386      * @param codePointOffset the number of code points by which to offset the index
6387      * @return the adjusted index
6388      * @stable ICU 3.0
6389      */
offsetByCodePoints(char[] text, int start, int count, int index, int codePointOffset)6390     public static int offsetByCodePoints(char[] text, int start, int count, int index,
6391             int codePointOffset) {
6392         int limit = start + count;
6393         if (start < 0 || limit < start || limit > text.length || index < start || index > limit) {
6394             throw new IndexOutOfBoundsException("index ( " + index +
6395                     ") out of range " + start +
6396                     ", " + limit +
6397                     " in array 0, " + text.length);
6398         }
6399 
6400         if (codePointOffset < 0) {
6401             while (++codePointOffset <= 0) {
6402                 char ch = text[--index];
6403                 if (index < start) {
6404                     throw new IndexOutOfBoundsException("index ( " + index +
6405                             ") < start (" + start +
6406                             ")");
6407                 }
6408                 while (ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE && index > start) {
6409                     ch = text[--index];
6410                     if (ch < MIN_HIGH_SURROGATE || ch > MAX_HIGH_SURROGATE) {
6411                         if (++codePointOffset > 0) {
6412                             return index+1;
6413                         }
6414                     }
6415                 }
6416             }
6417         } else {
6418             while (--codePointOffset >= 0) {
6419                 char ch = text[index++];
6420                 if (index > limit) {
6421                     throw new IndexOutOfBoundsException("index ( " + index +
6422                             ") > limit (" + limit +
6423                             ")");
6424                 }
6425                 while (ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE && index < limit) {
6426                     ch = text[index++];
6427                     if (ch < MIN_LOW_SURROGATE || ch > MAX_LOW_SURROGATE) {
6428                         if (--codePointOffset < 0) {
6429                             return index-1;
6430                         }
6431                     }
6432                 }
6433             }
6434         }
6435 
6436         return index;
6437     }
6438 
6439     // private variables -------------------------------------------------
6440 
6441     /**
6442      * To get the last character out from a data type
6443      */
6444     private static final int LAST_CHAR_MASK_ = 0xFFFF;
6445 
6446     //    /**
6447     //     * To get the last byte out from a data type
6448     //     */
6449     //    private static final int LAST_BYTE_MASK_ = 0xFF;
6450     //
6451     //    /**
6452     //     * Shift 16 bits
6453     //     */
6454     //    private static final int SHIFT_16_ = 16;
6455     //
6456     //    /**
6457     //     * Shift 24 bits
6458     //     */
6459     //    private static final int SHIFT_24_ = 24;
6460     //
6461     //    /**
6462     //     * Decimal radix
6463     //     */
6464     //    private static final int DECIMAL_RADIX_ = 10;
6465 
6466     /**
6467      * No break space code point
6468      */
6469     private static final int NO_BREAK_SPACE_ = 0xA0;
6470 
6471     /**
6472      * Figure space code point
6473      */
6474     private static final int FIGURE_SPACE_ = 0x2007;
6475 
6476     /**
6477      * Narrow no break space code point
6478      */
6479     private static final int NARROW_NO_BREAK_SPACE_ = 0x202F;
6480 
6481     /**
6482      * Ideographic number zero code point
6483      */
6484     private static final int IDEOGRAPHIC_NUMBER_ZERO_ = 0x3007;
6485 
6486     /**
6487      * CJK Ideograph, First code point
6488      */
6489     private static final int CJK_IDEOGRAPH_FIRST_ = 0x4e00;
6490 
6491     /**
6492      * CJK Ideograph, Second code point
6493      */
6494     private static final int CJK_IDEOGRAPH_SECOND_ = 0x4e8c;
6495 
6496     /**
6497      * CJK Ideograph, Third code point
6498      */
6499     private static final int CJK_IDEOGRAPH_THIRD_ = 0x4e09;
6500 
6501     /**
6502      * CJK Ideograph, Fourth code point
6503      */
6504     private static final int CJK_IDEOGRAPH_FOURTH_ = 0x56db;
6505 
6506     /**
6507      * CJK Ideograph, FIFTH code point
6508      */
6509     private static final int CJK_IDEOGRAPH_FIFTH_ = 0x4e94;
6510 
6511     /**
6512      * CJK Ideograph, Sixth code point
6513      */
6514     private static final int CJK_IDEOGRAPH_SIXTH_ = 0x516d;
6515 
6516     /**
6517      * CJK Ideograph, Seventh code point
6518      */
6519     private static final int CJK_IDEOGRAPH_SEVENTH_ = 0x4e03;
6520 
6521     /**
6522      * CJK Ideograph, Eighth code point
6523      */
6524     private static final int CJK_IDEOGRAPH_EIGHTH_ = 0x516b;
6525 
6526     /**
6527      * CJK Ideograph, Nineth code point
6528      */
6529     private static final int CJK_IDEOGRAPH_NINETH_ = 0x4e5d;
6530 
6531     /**
6532      * Application Program command code point
6533      */
6534     private static final int APPLICATION_PROGRAM_COMMAND_ = 0x009F;
6535 
6536     /**
6537      * Unit separator code point
6538      */
6539     private static final int UNIT_SEPARATOR_ = 0x001F;
6540 
6541     /**
6542      * Delete code point
6543      */
6544     private static final int DELETE_ = 0x007F;
6545 
6546     /**
6547      * Han digit characters
6548      */
6549     private static final int CJK_IDEOGRAPH_COMPLEX_ZERO_     = 0x96f6;
6550     private static final int CJK_IDEOGRAPH_COMPLEX_ONE_      = 0x58f9;
6551     private static final int CJK_IDEOGRAPH_COMPLEX_TWO_      = 0x8cb3;
6552     private static final int CJK_IDEOGRAPH_COMPLEX_THREE_    = 0x53c3;
6553     private static final int CJK_IDEOGRAPH_COMPLEX_FOUR_     = 0x8086;
6554     private static final int CJK_IDEOGRAPH_COMPLEX_FIVE_     = 0x4f0d;
6555     private static final int CJK_IDEOGRAPH_COMPLEX_SIX_      = 0x9678;
6556     private static final int CJK_IDEOGRAPH_COMPLEX_SEVEN_    = 0x67d2;
6557     private static final int CJK_IDEOGRAPH_COMPLEX_EIGHT_    = 0x634c;
6558     private static final int CJK_IDEOGRAPH_COMPLEX_NINE_     = 0x7396;
6559     private static final int CJK_IDEOGRAPH_TEN_              = 0x5341;
6560     private static final int CJK_IDEOGRAPH_COMPLEX_TEN_      = 0x62fe;
6561     private static final int CJK_IDEOGRAPH_HUNDRED_          = 0x767e;
6562     private static final int CJK_IDEOGRAPH_COMPLEX_HUNDRED_  = 0x4f70;
6563     private static final int CJK_IDEOGRAPH_THOUSAND_         = 0x5343;
6564     private static final int CJK_IDEOGRAPH_COMPLEX_THOUSAND_ = 0x4edf;
6565     private static final int CJK_IDEOGRAPH_TEN_THOUSAND_     = 0x824c;
6566     private static final int CJK_IDEOGRAPH_HUNDRED_MILLION_  = 0x5104;
6567 
6568     // private constructor -----------------------------------------------
6569     ///CLOVER:OFF
6570     /**
6571      * Private constructor to prevent instantiation
6572      */
UCharacter()6573     private UCharacter()
6574     {
6575     }
6576     ///CLOVER:ON
6577 }
6578