• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /**
4  *******************************************************************************
5  * Copyright (C) 2001-2016 International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  */
9 
10 package com.ibm.icu.lang;
11 
12 import java.util.BitSet;
13 import java.util.Locale;
14 
15 import com.ibm.icu.impl.UCharacterProperty;
16 import com.ibm.icu.util.ULocale;
17 
18 /**
19  * Constants for ISO 15924 script codes, and related functions.
20  *
21  * <p>The current set of script code constants supports at least all scripts
22  * that are encoded in the version of Unicode which ICU currently supports.
23  * The names of the constants are usually derived from the
24  * Unicode script property value aliases.
25  * See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/)
26  * and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .
27  *
28  * <p>In addition, constants for many ISO 15924 script codes
29  * are included, for use with language tags, CLDR data, and similar.
30  * Some of those codes are not used in the Unicode Character Database (UCD).
31  * For example, there are no characters that have a UCD script property value of
32  * Hans or Hant. All Han ideographs have the Hani script property value in Unicode.
33  *
34  * <p>Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.
35  *
36  * <p>Starting with ICU 55, script codes are only added when their scripts
37  * have been or will certainly be encoded in Unicode,
38  * and have been assigned Unicode script property value aliases,
39  * to ensure that their script names are stable and match the names of the constants.
40  * Script codes like Latf and Aran that are not subject to separate encoding
41  * may be added at any time.
42  *
43  * @stable ICU 2.4
44  */
45 public final class UScript {
46     /**
47      * Invalid code
48      * @stable ICU 2.4
49      */
50     public static final int INVALID_CODE = -1;
51     /**
52      * Common
53      * @stable ICU 2.4
54      */
55     public static final int COMMON       =  0;  /* Zyyy */
56     /**
57      * Inherited
58      * @stable ICU 2.4
59      */
60     public static final int INHERITED    =  1;  /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */
61     /**
62      * Arabic
63      * @stable ICU 2.4
64      */
65     public static final int ARABIC       =  2;  /* Arab */
66     /**
67      * Armenian
68      * @stable ICU 2.4
69      */
70     public static final int ARMENIAN     =  3;  /* Armn */
71     /**
72      * Bengali
73      * @stable ICU 2.4
74      */
75     public static final int BENGALI      =  4;  /* Beng */
76     /**
77      * Bopomofo
78      * @stable ICU 2.4
79      */
80     public static final int BOPOMOFO     =  5;  /* Bopo */
81     /**
82      * Cherokee
83      * @stable ICU 2.4
84      */
85     public static final int CHEROKEE     =  6;  /* Cher */
86     /**
87      * Coptic
88      * @stable ICU 2.4
89      */
90     public static final int COPTIC       =  7;  /* Qaac */
91     /**
92      * Cyrillic
93      * @stable ICU 2.4
94      */
95     public static final int CYRILLIC     =  8;  /* Cyrl (Cyrs) */
96     /**
97      * Deseret
98      * @stable ICU 2.4
99      */
100     public static final int DESERET      =  9;  /* Dsrt */
101     /**
102      * Devanagari
103      * @stable ICU 2.4
104      */
105     public static final int DEVANAGARI   = 10;  /* Deva */
106     /**
107      * Ethiopic
108      * @stable ICU 2.4
109      */
110     public static final int ETHIOPIC     = 11;  /* Ethi */
111     /**
112      * Georgian
113      * @stable ICU 2.4
114      */
115     public static final int GEORGIAN     = 12;  /* Geor (Geon; Geoa) */
116     /**
117      * Gothic
118      * @stable ICU 2.4
119      */
120     public static final int GOTHIC       = 13;  /* Goth */
121     /**
122      * Greek
123      * @stable ICU 2.4
124      */
125     public static final int GREEK        = 14;  /* Grek */
126     /**
127      * Gujarati
128      * @stable ICU 2.4
129      */
130     public static final int GUJARATI     = 15;  /* Gujr */
131     /**
132      * Gurmukhi
133      * @stable ICU 2.4
134      */
135     public static final int GURMUKHI     = 16;  /* Guru */
136     /**
137      * Han
138      * @stable ICU 2.4
139      */
140     public static final int HAN          = 17;  /* Hani */
141     /**
142      * Hangul
143      * @stable ICU 2.4
144      */
145     public static final int HANGUL       = 18;  /* Hang */
146     /**
147      * Hebrew
148      * @stable ICU 2.4
149      */
150     public static final int HEBREW       = 19;  /* Hebr */
151     /**
152      * Hiragana
153      * @stable ICU 2.4
154      */
155     public static final int HIRAGANA     = 20;  /* Hira */
156     /**
157      * Kannada
158      * @stable ICU 2.4
159      */
160     public static final int KANNADA      = 21;  /* Knda */
161     /**
162      * Katakana
163      * @stable ICU 2.4
164      */
165     public static final int KATAKANA     = 22;  /* Kana */
166     /**
167      * Khmer
168      * @stable ICU 2.4
169      */
170     public static final int KHMER        = 23;  /* Khmr */
171     /**
172      * Lao
173      * @stable ICU 2.4
174      */
175     public static final int LAO          = 24;  /* Laoo */
176     /**
177      * Latin
178      * @stable ICU 2.4
179      */
180     public static final int LATIN        = 25;  /* Latn (Latf; Latg) */
181     /**
182      * Malayalam
183      * @stable ICU 2.4
184      */
185     public static final int MALAYALAM    = 26;  /* Mlym */
186     /**
187      * Mangolian
188      * @stable ICU 2.4
189      */
190     public static final int MONGOLIAN    = 27;  /* Mong */
191     /**
192      * Myammar
193      * @stable ICU 2.4
194      */
195     public static final int MYANMAR      = 28;  /* Mymr */
196     /**
197      * Ogham
198      * @stable ICU 2.4
199      */
200     public static final int OGHAM        = 29;  /* Ogam */
201     /**
202      * Old Itallic
203      * @stable ICU 2.4
204      */
205     public static final int OLD_ITALIC   = 30;  /* Ital */
206     /**
207      * Oriya
208      * @stable ICU 2.4
209      */
210     public static final int ORIYA        = 31;  /* Orya */
211     /**
212      * Runic
213      * @stable ICU 2.4
214      */
215     public static final int RUNIC        = 32;  /* Runr */
216     /**
217      * Sinhala
218      * @stable ICU 2.4
219      */
220     public static final int SINHALA      = 33;  /* Sinh */
221     /**
222      * Syriac
223      * @stable ICU 2.4
224      */
225     public static final int SYRIAC       = 34;  /* Syrc (Syrj; Syrn; Syre) */
226     /**
227      * Tamil
228      * @stable ICU 2.4
229      */
230     public static final int TAMIL        = 35;  /* Taml */
231     /**
232      * Telugu
233      * @stable ICU 2.4
234      */
235     public static final int TELUGU       = 36;  /* Telu */
236     /**
237      * Thana
238      * @stable ICU 2.4
239      */
240     public static final int THAANA       = 37;  /* Thaa */
241     /**
242      * Thai
243      * @stable ICU 2.4
244      */
245     public static final int THAI         = 38;  /* Thai */
246     /**
247      * Tibetan
248      * @stable ICU 2.4
249      */
250     public static final int TIBETAN      = 39;  /* Tibt */
251     /**
252      * Unified Canadian Aboriginal Symbols
253      * @stable ICU 2.6
254      */
255     public static final int CANADIAN_ABORIGINAL = 40;  /* Cans */
256     /**
257      * Unified Canadian Aboriginal Symbols (alias)
258      * @stable ICU 2.4
259      */
260     public static final int UCAS         = CANADIAN_ABORIGINAL;  /* Cans */
261     /**
262      * Yi syllables
263      * @stable ICU 2.4
264      */
265     public static final int YI           = 41;  /* Yiii */
266     /**
267      * Tagalog
268      * @stable ICU 2.4
269      */
270     public static final int TAGALOG      = 42;  /* Tglg */
271     /**
272      * Hanunooo
273      * @stable ICU 2.4
274      */
275     public static final int HANUNOO      = 43;  /* Hano */
276     /**
277      * Buhid
278      * @stable ICU 2.4
279      */
280     public static final int BUHID        = 44;  /* Buhd */
281     /**
282      * Tagbanwa
283      * @stable ICU 2.4
284      */
285     public static final int TAGBANWA     = 45;  /* Tagb */
286     /**
287      * Braille
288      * Script in Unicode 4
289      * @stable ICU 2.6
290      *
291      */
292     public static final int BRAILLE      = 46;  /* Brai */
293     /**
294      * Cypriot
295      * Script in Unicode 4
296      * @stable ICU 2.6
297      *
298      */
299     public static final int CYPRIOT              = 47;  /* Cprt */
300     /**
301      * Limbu
302      * Script in Unicode 4
303      * @stable ICU 2.6
304      *
305      */
306     public static final int LIMBU                = 48;  /* Limb */
307     /**
308      * Linear B
309      * Script in Unicode 4
310      * @stable ICU 2.6
311      *
312      */
313     public static final int LINEAR_B     = 49;  /* Linb */
314     /**
315      * Osmanya
316      * Script in Unicode 4
317      * @stable ICU 2.6
318      *
319      */
320     public static final int OSMANYA              = 50;  /* Osma */
321     /**
322      * Shavian
323      * Script in Unicode 4
324      * @stable ICU 2.6
325      *
326      */
327     public static final int SHAVIAN              = 51;  /* Shaw */
328     /**
329      * Tai Le
330      * Script in Unicode 4
331      * @stable ICU 2.6
332      *
333      */
334     public static final int TAI_LE               = 52;  /* Tale */
335     /**
336      * Ugaritic
337      * Script in Unicode 4
338      * @stable ICU 2.6
339      *
340      */
341     public static final int UGARITIC     = 53;  /* Ugar */
342     /**
343      * Script in Unicode 4.0.1
344      * @stable ICU 3.0
345      */
346     public static final int KATAKANA_OR_HIRAGANA = 54;  /*Hrkt */
347 
348     /**
349      * Script in Unicode 4.1
350      * @stable ICU 3.4
351      */
352     public static final int BUGINESE = 55;           /* Bugi */
353     /**
354      * Script in Unicode 4.1
355      * @stable ICU 3.4
356      */
357     public static final int GLAGOLITIC = 56;         /* Glag */
358     /**
359      * Script in Unicode 4.1
360      * @stable ICU 3.4
361      */
362     public static final int KHAROSHTHI = 57;         /* Khar */
363     /**
364      * Script in Unicode 4.1
365      * @stable ICU 3.4
366      */
367     public static final int SYLOTI_NAGRI = 58;       /* Sylo */
368     /**
369      * Script in Unicode 4.1
370      * @stable ICU 3.4
371      */
372     public static final int NEW_TAI_LUE = 59;        /* Talu */
373     /**
374      * Script in Unicode 4.1
375      * @stable ICU 3.4
376      */
377     public static final int TIFINAGH = 60;           /* Tfng */
378     /**
379      * Script in Unicode 4.1
380      * @stable ICU 3.4
381      */
382     public static final int OLD_PERSIAN = 61;        /* Xpeo */
383 
384 
385     /**
386      * ISO 15924 script code
387      * @stable ICU 3.6
388      */
389     public static final int BALINESE                      = 62; /* Bali */
390     /**
391      * ISO 15924 script code
392      * @stable ICU 3.6
393      */
394     public static final int BATAK                         = 63; /* Batk */
395     /**
396      * ISO 15924 script code
397      * @stable ICU 3.6
398      */
399     public static final int BLISSYMBOLS                   = 64; /* Blis */
400     /**
401      * ISO 15924 script code
402      * @stable ICU 3.6
403      */
404     public static final int BRAHMI                        = 65; /* Brah */
405     /**
406      * ISO 15924 script code
407      * @stable ICU 3.6
408      */
409     public static final int CHAM                          = 66; /* Cham */
410     /**
411      * ISO 15924 script code
412      * @stable ICU 3.6
413      */
414     public static final int CIRTH                         = 67; /* Cirt */
415     /**
416      * ISO 15924 script code
417      * @stable ICU 3.6
418      */
419     public static final int OLD_CHURCH_SLAVONIC_CYRILLIC  = 68; /* Cyrs */
420     /**
421      * ISO 15924 script code
422      * @stable ICU 3.6
423      */
424     public static final int DEMOTIC_EGYPTIAN              = 69; /* Egyd */
425     /**
426      * ISO 15924 script code
427      * @stable ICU 3.6
428      */
429     public static final int HIERATIC_EGYPTIAN             = 70; /* Egyh */
430     /**
431      * ISO 15924 script code
432      * @stable ICU 3.6
433      */
434     public static final int EGYPTIAN_HIEROGLYPHS          = 71; /* Egyp */
435     /**
436      * ISO 15924 script code
437      * @stable ICU 3.6
438      */
439     public static final int KHUTSURI                      = 72; /* Geok */
440     /**
441      * ISO 15924 script code
442      * @stable ICU 3.6
443      */
444     public static final int SIMPLIFIED_HAN                = 73; /* Hans */
445     /**
446      * ISO 15924 script code
447      * @stable ICU 3.6
448      */
449     public static final int TRADITIONAL_HAN               = 74; /* Hant */
450     /**
451      * ISO 15924 script code
452      * @stable ICU 3.6
453      */
454     public static final int PAHAWH_HMONG                  = 75; /* Hmng */
455     /**
456      * ISO 15924 script code
457      * @stable ICU 3.6
458      */
459     public static final int OLD_HUNGARIAN                 = 76; /* Hung */
460     /**
461      * ISO 15924 script code
462      * @stable ICU 3.6
463      */
464     public static final int HARAPPAN_INDUS                = 77; /* Inds */
465     /**
466      * ISO 15924 script code
467      * @stable ICU 3.6
468      */
469     public static final int JAVANESE                      = 78; /* Java */
470     /**
471      * ISO 15924 script code
472      * @stable ICU 3.6
473      */
474     public static final int KAYAH_LI                      = 79; /* Kali */
475     /**
476      * ISO 15924 script code
477      * @stable ICU 3.6
478      */
479     public static final int LATIN_FRAKTUR                 = 80; /* Latf */
480     /**
481      * ISO 15924 script code
482      * @stable ICU 3.6
483      */
484     public static final int LATIN_GAELIC                  = 81; /* Latg */
485     /**
486      * ISO 15924 script code
487      * @stable ICU 3.6
488      */
489     public static final int LEPCHA                        = 82; /* Lepc */
490     /**
491      * ISO 15924 script code
492      * @stable ICU 3.6
493      */
494     public static final int LINEAR_A                      = 83; /* Lina */
495     /**
496      * ISO 15924 script code
497      * @stable ICU 4.6
498      */
499     public static final int MANDAIC                       = 84; /* Mand */
500     /**
501      * ISO 15924 script code
502      * @stable ICU 3.6
503      */
504     public static final int MANDAEAN                      = MANDAIC;
505     /**
506      * ISO 15924 script code
507      * @stable ICU 3.6
508      */
509     public static final int MAYAN_HIEROGLYPHS             = 85; /* Maya */
510     /**
511      * ISO 15924 script code
512      * @stable ICU 4.6
513      */
514     public static final int MEROITIC_HIEROGLYPHS          = 86; /* Mero */
515     /**
516      * ISO 15924 script code
517      * @stable ICU 3.6
518      */
519     public static final int MEROITIC                      = MEROITIC_HIEROGLYPHS;
520     /**
521      * ISO 15924 script code
522      * @stable ICU 3.6
523      */
524     public static final int NKO                           = 87; /* Nkoo */
525     /**
526      * ISO 15924 script code
527      * @stable ICU 3.6
528      */
529     public static final int ORKHON                        = 88; /* Orkh */
530     /**
531      * ISO 15924 script code
532      * @stable ICU 3.6
533      */
534     public static final int OLD_PERMIC                    = 89; /* Perm */
535     /**
536      * ISO 15924 script code
537      * @stable ICU 3.6
538      */
539     public static final int PHAGS_PA                      = 90; /* Phag */
540     /**
541      * ISO 15924 script code
542      * @stable ICU 3.6
543      */
544     public static final int PHOENICIAN                    = 91; /* Phnx */
545     /**
546      * ISO 15924 script code
547      * @stable ICU 52
548      */
549     public static final int MIAO                          = 92; /* Plrd */
550     /**
551      * ISO 15924 script code
552      * @stable ICU 3.6
553      */
554     public static final int PHONETIC_POLLARD              = MIAO;
555     /**
556      * ISO 15924 script code
557      * @stable ICU 3.6
558      */
559     public static final int RONGORONGO                    = 93; /* Roro */
560     /**
561      * ISO 15924 script code
562      * @stable ICU 3.6
563      */
564     public static final int SARATI                        = 94; /* Sara */
565     /**
566      * ISO 15924 script code
567      * @stable ICU 3.6
568      */
569     public static final int ESTRANGELO_SYRIAC             = 95; /* Syre */
570     /**
571      * ISO 15924 script code
572      * @stable ICU 3.6
573      */
574     public static final int WESTERN_SYRIAC                = 96; /* Syrj */
575     /**
576      * ISO 15924 script code
577      * @stable ICU 3.6
578      */
579     public static final int EASTERN_SYRIAC                = 97; /* Syrn */
580     /**
581      * ISO 15924 script code
582      * @stable ICU 3.6
583      */
584     public static final int TENGWAR                       = 98; /* Teng */
585     /**
586      * ISO 15924 script code
587      * @stable ICU 3.6
588      */
589     public static final int VAI                           = 99; /* Vaii */
590     /**
591      * ISO 15924 script code
592      * @stable ICU 3.6
593      */
594     public static final int VISIBLE_SPEECH                = 100;/* Visp */
595     /**
596      * ISO 15924 script code
597      * @stable ICU 3.6
598      */
599     public static final int CUNEIFORM                     = 101;/* Xsux */
600     /**
601      * ISO 15924 script code
602      * @stable ICU 3.6
603      */
604     public static final int UNWRITTEN_LANGUAGES           = 102;/* Zxxx */
605     /**
606      * ISO 15924 script code
607      * @stable ICU 3.6
608      */
609     public static final int UNKNOWN                       = 103;/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */
610 
611     /**
612      * ISO 15924 script code
613      * @stable ICU 3.8
614      */
615     public static final int CARIAN                        = 104;/* Cari */
616     /**
617      * ISO 15924 script code
618      * @stable ICU 3.8
619      */
620     public static final int JAPANESE                      = 105;/* Jpan */
621     /**
622      * ISO 15924 script code
623      * @stable ICU 3.8
624      */
625     public static final int LANNA                         = 106;/* Lana */
626     /**
627      * ISO 15924 script code
628      * @stable ICU 3.8
629      */
630     public static final int LYCIAN                        = 107;/* Lyci */
631     /**
632      * ISO 15924 script code
633      * @stable ICU 3.8
634      */
635     public static final int LYDIAN                        = 108;/* Lydi */
636     /**
637      * ISO 15924 script code
638      * @stable ICU 3.8
639      */
640     public static final int OL_CHIKI                      = 109;/* Olck */
641     /**
642      * ISO 15924 script code
643      * @stable ICU 3.8
644      */
645     public static final int REJANG                        = 110;/* Rjng */
646     /**
647      * ISO 15924 script code
648      * @stable ICU 3.8
649      */
650     public static final int SAURASHTRA                    = 111;/* Saur */
651     /**
652      * ISO 15924 script code for Sutton SignWriting
653      * @stable ICU 3.8
654      */
655     public static final int SIGN_WRITING                  = 112;/* Sgnw */
656     /**
657      * ISO 15924 script code
658      * @stable ICU 3.8
659      */
660     public static final int SUNDANESE                     = 113;/* Sund */
661     /**
662      * ISO 15924 script code
663      * @stable ICU 3.8
664      */
665     public static final int MOON                          = 114;/* Moon */
666     /**
667      * ISO 15924 script code
668      * @stable ICU 3.8
669      */
670     public static final int MEITEI_MAYEK                  = 115;/* Mtei */
671 
672     /**
673      * ISO 15924 script code
674      * @stable ICU 4.0
675      */
676     public static final int IMPERIAL_ARAMAIC              = 116;/* Armi */
677 
678     /**
679      * ISO 15924 script code
680      * @stable ICU 4.0
681      */
682     public static final int AVESTAN                       = 117;/* Avst */
683 
684     /**
685      * ISO 15924 script code
686      * @stable ICU 4.0
687      */
688     public static final int CHAKMA                        = 118;/* Cakm */
689 
690     /**
691      * ISO 15924 script code
692      * @stable ICU 4.0
693      */
694     public static final int KOREAN                        = 119;/* Kore */
695 
696     /**
697      * ISO 15924 script code
698      * @stable ICU 4.0
699      */
700     public static final int KAITHI                        = 120;/* Kthi */
701 
702     /**
703      * ISO 15924 script code
704      * @stable ICU 4.0
705      */
706     public static final int MANICHAEAN                    = 121;/* Mani */
707 
708     /**
709      * ISO 15924 script code
710      * @stable ICU 4.0
711      */
712     public static final int INSCRIPTIONAL_PAHLAVI         = 122;/* Phli */
713 
714     /**
715      * ISO 15924 script code
716      * @stable ICU 4.0
717      */
718     public static final int PSALTER_PAHLAVI               = 123;/* Phlp */
719 
720     /**
721      * ISO 15924 script code
722      * @stable ICU 4.0
723      */
724     public static final int BOOK_PAHLAVI                  = 124;/* Phlv */
725 
726     /**
727      * ISO 15924 script code
728      * @stable ICU 4.0
729      */
730     public static final int INSCRIPTIONAL_PARTHIAN        = 125;/* Prti */
731 
732     /**
733      * ISO 15924 script code
734      * @stable ICU 4.0
735      */
736     public static final int SAMARITAN                     = 126;/* Samr */
737 
738     /**
739      * ISO 15924 script code
740      * @stable ICU 4.0
741      */
742     public static final int TAI_VIET                      = 127;/* Tavt */
743 
744     /**
745      * ISO 15924 script code
746      * @stable ICU 4.0
747      */
748     public static final int MATHEMATICAL_NOTATION         = 128;/* Zmth */
749 
750     /**
751      * ISO 15924 script code
752      * @stable ICU 4.0
753      */
754     public static final int SYMBOLS                       = 129;/* Zsym */
755 
756     /**
757      * ISO 15924 script code
758      * @stable ICU 4.4
759      */
760     public static final int BAMUM                         = 130;/* Bamu */
761     /**
762      * ISO 15924 script code
763      * @stable ICU 4.4
764      */
765     public static final int LISU                          = 131;/* Lisu */
766     /**
767      * ISO 15924 script code
768      * @stable ICU 4.4
769      */
770     public static final int NAKHI_GEBA                    = 132;/* Nkgb */
771     /**
772      * ISO 15924 script code
773      * @stable ICU 4.4
774      */
775     public static final int OLD_SOUTH_ARABIAN             = 133;/* Sarb */
776 
777     /**
778      * ISO 15924 script code
779      * @stable ICU 4.6
780      */
781     public static final int BASSA_VAH                     = 134;/* Bass */
782     /**
783      * ISO 15924 script code
784      * @stable ICU 54
785      */
786     public static final int DUPLOYAN                      = 135;/* Dupl */
787     /**
788      * Typo, use DUPLOYAN
789      * @deprecated ICU 54
790      */
791     @Deprecated
792     public static final int DUPLOYAN_SHORTAND             = DUPLOYAN;
793     /**
794      * ISO 15924 script code
795      * @stable ICU 4.6
796      */
797     public static final int ELBASAN                       = 136;/* Elba */
798     /**
799      * ISO 15924 script code
800      * @stable ICU 4.6
801      */
802     public static final int GRANTHA                       = 137;/* Gran */
803     /**
804      * ISO 15924 script code
805      * @stable ICU 4.6
806      */
807     public static final int KPELLE                        = 138;/* Kpel */
808     /**
809      * ISO 15924 script code
810      * @stable ICU 4.6
811      */
812     public static final int LOMA                          = 139;/* Loma */
813     /**
814      * Mende Kikakui
815      * ISO 15924 script code
816      * @stable ICU 4.6
817      */
818     public static final int MENDE                         = 140;/* Mend */
819     /**
820      * ISO 15924 script code
821      * @stable ICU 4.6
822      */
823     public static final int MEROITIC_CURSIVE              = 141;/* Merc */
824     /**
825      * ISO 15924 script code
826      * @stable ICU 4.6
827      */
828     public static final int OLD_NORTH_ARABIAN             = 142;/* Narb */
829     /**
830      * ISO 15924 script code
831      * @stable ICU 4.6
832      */
833     public static final int NABATAEAN                     = 143;/* Nbat */
834     /**
835      * ISO 15924 script code
836      * @stable ICU 4.6
837      */
838     public static final int PALMYRENE                     = 144;/* Palm */
839     /**
840      * ISO 15924 script code
841      * @stable ICU 54
842      */
843     public static final int KHUDAWADI                     = 145;/* Sind */
844     /**
845      * ISO 15924 script code
846      * @stable ICU 4.6
847      */
848     public static final int SINDHI = KHUDAWADI;
849     /**
850      * ISO 15924 script code
851      * @stable ICU 4.6
852      */
853     public static final int WARANG_CITI                   = 146;/* Wara */
854 
855     /**
856      * ISO 15924 script code
857      * @stable ICU 4.8
858      */
859     public static final int AFAKA = 147;/* Afak */
860     /**
861      * ISO 15924 script code
862      * @stable ICU 4.8
863      */
864     public static final int JURCHEN = 148;/* Jurc */
865     /**
866      * ISO 15924 script code
867      * @stable ICU 4.8
868      */
869     public static final int MRO = 149;/* Mroo */
870     /**
871      * ISO 15924 script code
872      * @stable ICU 4.8
873      */
874     public static final int NUSHU = 150;/* Nshu */
875     /**
876      * ISO 15924 script code
877      * @stable ICU 4.8
878      */
879     public static final int SHARADA = 151;/* Shrd */
880     /**
881      * ISO 15924 script code
882      * @stable ICU 4.8
883      */
884     public static final int SORA_SOMPENG = 152;/* Sora */
885     /**
886      * ISO 15924 script code
887      * @stable ICU 4.8
888      */
889     public static final int TAKRI = 153;/* Takr */
890     /**
891      * ISO 15924 script code
892      * @stable ICU 4.8
893      */
894     public static final int TANGUT = 154;/* Tang */
895     /**
896      * ISO 15924 script code
897      * @stable ICU 4.8
898      */
899     public static final int WOLEAI = 155;/* Wole */
900 
901     /**
902      * ISO 15924 script code
903      * @stable ICU 49
904      */
905     public static final int ANATOLIAN_HIEROGLYPHS = 156;/* Hluw */
906     /**
907      * ISO 15924 script code
908      * @stable ICU 49
909      */
910     public static final int KHOJKI = 157;/* Khoj */
911     /**
912      * ISO 15924 script code
913      * @stable ICU 49
914      */
915     public static final int TIRHUTA = 158;/* Tirh */
916     /**
917      * ISO 15924 script code
918      * @stable ICU 52
919      */
920     public static final int CAUCASIAN_ALBANIAN = 159; /* Aghb */
921     /**
922      * ISO 15924 script code
923      * @stable ICU 52
924      */
925     public static final int MAHAJANI = 160; /* Mahj */
926 
927     /**
928      * ISO 15924 script code
929      * @stable ICU 54
930      */
931     public static final int AHOM = 161; /* Ahom */
932     /**
933      * ISO 15924 script code
934      * @stable ICU 54
935      */
936     public static final int HATRAN = 162; /* Hatr */
937     /**
938      * ISO 15924 script code
939      * @stable ICU 54
940      */
941     public static final int MODI = 163; /* Modi */
942     /**
943      * ISO 15924 script code
944      * @stable ICU 54
945      */
946     public static final int MULTANI = 164; /* Mult */
947     /**
948      * ISO 15924 script code
949      * @stable ICU 54
950      */
951     public static final int PAU_CIN_HAU = 165; /* Pauc */
952     /**
953      * ISO 15924 script code
954      * @stable ICU 54
955      */
956     public static final int SIDDHAM = 166; /* Sidd */
957 
958     /**
959      * ISO 15924 script code
960      * @stable ICU 58
961      */
962     public static final int ADLAM = 167; /* Adlm */
963     /**
964      * ISO 15924 script code
965      * @stable ICU 58
966      */
967     public static final int BHAIKSUKI = 168; /* Bhks */
968     /**
969      * ISO 15924 script code
970      * @stable ICU 58
971      */
972     public static final int MARCHEN = 169; /* Marc */
973     /**
974      * ISO 15924 script code
975      * @stable ICU 58
976      */
977     public static final int NEWA = 170; /* Newa */
978     /**
979      * ISO 15924 script code
980      * @stable ICU 58
981      */
982     public static final int OSAGE = 171; /* Osge */
983 
984     /**
985      * ISO 15924 script code
986      * @stable ICU 58
987      */
988     public static final int HAN_WITH_BOPOMOFO = 172; /* Hanb */
989     /**
990      * ISO 15924 script code
991      * @stable ICU 58
992      */
993     public static final int JAMO = 173; /* Jamo */
994     /**
995      * ISO 15924 script code
996      * @stable ICU 58
997      */
998     public static final int SYMBOLS_EMOJI = 174; /* Zsye */
999 
1000     /**
1001      * ISO 15924 script code
1002      * @stable ICU 60
1003      */
1004     public static final int MASARAM_GONDI = 175; /* Gonm */
1005     /**
1006      * ISO 15924 script code
1007      * @stable ICU 60
1008      */
1009     public static final int SOYOMBO = 176; /* Soyo */
1010     /**
1011      * ISO 15924 script code
1012      * @stable ICU 60
1013      */
1014     public static final int ZANABAZAR_SQUARE = 177; /* Zanb */
1015 
1016     /**
1017      * ISO 15924 script code
1018      * @stable ICU 62
1019      */
1020     public static final int DOGRA = 178; /* Dogr */
1021     /** @stable ICU 62 */
1022     public static final int GUNJALA_GONDI = 179; /* Gong */
1023     /** @stable ICU 62 */
1024     public static final int MAKASAR = 180; /* Maka */
1025     /** @stable ICU 62 */
1026     public static final int MEDEFAIDRIN = 181; /* Medf */
1027     /** @stable ICU 62 */
1028     public static final int HANIFI_ROHINGYA = 182; /* Rohg */
1029     /** @stable ICU 62 */
1030     public static final int SOGDIAN = 183; /* Sogd */
1031     /** @stable ICU 62 */
1032     public static final int OLD_SOGDIAN = 184; /* Sogo */
1033 
1034     /** @stable ICU 64 */
1035     public static final int ELYMAIC = 185; /* Elym */
1036     /** @stable ICU 64 */
1037     public static final int NYIAKENG_PUACHUE_HMONG = 186; /* Hmnp */
1038     /** @stable ICU 64 */
1039     public static final int NANDINAGARI = 187; /* Nand */
1040     /** @stable ICU 64 */
1041     public static final int WANCHO = 188; /* Wcho */
1042 
1043     /** @stable ICU 66 */
1044     public static final int CHORASMIAN = 189; /* Chrs */
1045     /** @stable ICU 66 */
1046     public static final int DIVES_AKURU = 190; /* Diak */
1047     /** @stable ICU 66 */
1048     public static final int KHITAN_SMALL_SCRIPT = 191; /* Kits */
1049     /** @stable ICU 66 */
1050     public static final int YEZIDI = 192; /* Yezi */
1051 
1052     /** @stable ICU 70 */
1053     public static final int CYPRO_MINOAN = 193; /* Cpmn */
1054     /** @stable ICU 70 */
1055     public static final int OLD_UYGHUR = 194; /* Ougr */
1056     /** @stable ICU 70 */
1057     public static final int TANGSA = 195; /* Tnsa */
1058     /** @stable ICU 70 */
1059     public static final int TOTO = 196; /* Toto */
1060     /** @stable ICU 70 */
1061     public static final int VITHKUQI = 197; /* Vith */
1062 
1063     /** @stable ICU 72 */
1064     public static final int KAWI = 198; /* Kawi */
1065     /** @stable ICU 72 */
1066     public static final int NAG_MUNDARI = 199; /* Nagm */
1067 
1068     /** @stable ICU 75 */
1069     public static final int ARABIC_NASTALIQ = 200; /* Aran */
1070 
1071     /**
1072      * One more than the highest normal UScript code.
1073      * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT).
1074      *
1075      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
1076      */
1077     @Deprecated
1078     public static final int CODE_LIMIT   = 201;
1079 
getCodesFromLocale(ULocale locale)1080     private static int[] getCodesFromLocale(ULocale locale) {
1081         // Multi-script languages, equivalent to the LocaleScript data
1082         // that we used to load from locale resource bundles.
1083         String lang = locale.getLanguage();
1084         if(lang.equals("ja")) {
1085             return new int[] { UScript.KATAKANA, UScript.HIRAGANA, UScript.HAN };
1086         }
1087         if(lang.equals("ko")) {
1088             return new int[] { UScript.HANGUL, UScript.HAN };
1089         }
1090         String script = locale.getScript();
1091         if(lang.equals("zh") && script.equals("Hant")) {
1092             return new int[] { UScript.HAN, UScript.BOPOMOFO };
1093         }
1094         // Explicit script code.
1095         if(script.length() != 0) {
1096             int scriptCode = UScript.getCodeFromName(script);
1097             if(scriptCode != UScript.INVALID_CODE) {
1098                 if(scriptCode == UScript.SIMPLIFIED_HAN || scriptCode == UScript.TRADITIONAL_HAN) {
1099                     scriptCode = UScript.HAN;
1100                 }
1101                 return new int[] { scriptCode };
1102             }
1103         }
1104         return null;
1105     }
1106 
1107     /**
1108      * Helper function to find the code from locale.
1109      * @param locale The locale.
1110      */
findCodeFromLocale(ULocale locale)1111     private static int[] findCodeFromLocale(ULocale locale) {
1112         int[] result = getCodesFromLocale(locale);
1113         if(result != null) {
1114             return result;
1115         }
1116         ULocale likely = ULocale.addLikelySubtags(locale);
1117         return getCodesFromLocale(likely);
1118     }
1119 
1120     /**
1121      * Gets a script codes associated with the given locale or ISO 15924 abbreviation or name.
1122      * Returns MALAYAM given "Malayam" OR "Mlym".
1123      * Returns LATIN given "en" OR "en_US"
1124      * @param locale Locale
1125      * @return The script codes array. null if the the code cannot be found.
1126      * @stable ICU 2.4
1127      */
getCode(Locale locale)1128     public static final int[] getCode(Locale locale){
1129         return findCodeFromLocale(ULocale.forLocale(locale));
1130     }
1131     /**
1132      * Gets a script codes associated with the given locale or ISO 15924 abbreviation or name.
1133      * Returns MALAYAM given "Malayam" OR "Mlym".
1134      * Returns LATIN given "en" OR "en_US"
1135      * @param locale ULocale
1136      * @return The script codes array. null if the the code cannot be found.
1137      * @stable ICU 3.0
1138      */
getCode(ULocale locale)1139     public static final int[] getCode(ULocale locale){
1140         return findCodeFromLocale(locale);
1141     }
1142     /**
1143      * Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
1144      * Returns MALAYAM given "Malayam" OR "Mlym".
1145      * Returns LATIN given "en" OR "en_US"
1146      *
1147      * <p>Note: To search by short or long script alias only, use
1148      * {@link #getCodeFromName(String)} instead.
1149      * That does a fast lookup with no access of the locale data.
1150      *
1151      * @param nameOrAbbrOrLocale name of the script or ISO 15924 code or locale
1152      * @return The script codes array. null if the the code cannot be found.
1153      * @stable ICU 2.4
1154      */
getCode(String nameOrAbbrOrLocale)1155     public static final int[] getCode(String nameOrAbbrOrLocale) {
1156         boolean triedCode = false;
1157         int lastSepPos = nameOrAbbrOrLocale.indexOf('_');
1158         if (lastSepPos < 0) {
1159             lastSepPos = nameOrAbbrOrLocale.indexOf('-');
1160         }
1161         // Favor interpretation of nameOrAbbrOrLocale as a script alias if either
1162         // 1. nameOrAbbrOrLocale does not contain -/_. Handles Han, Mro, Nko, etc.
1163         // 2. The last instance of -/_ is at offset 3, and the portion after that is
1164         //    longer than 4 characters (i.e. not a script or region code). This handles
1165         //    Old_Hungarian, Old_Italic, etc. ("old" is a valid language code)
1166         // 3. The last instance of -/_ is at offset 7, and the portion after that is
1167         //    3 characters. This handles New_Tai_Lue ("new" is a valid language code).
1168         if ( lastSepPos < 0
1169                 || (lastSepPos == 3 && nameOrAbbrOrLocale.length() > 8)
1170                 || (lastSepPos == 7 && nameOrAbbrOrLocale.length() == 11) ) {
1171             int propNum = UCharacter.getPropertyValueEnumNoThrow(UProperty.SCRIPT, nameOrAbbrOrLocale);
1172             if (propNum != UProperty.UNDEFINED) {
1173                 return new int[] {propNum};
1174             }
1175             triedCode = true;
1176         }
1177         int[] scripts = findCodeFromLocale(new ULocale(nameOrAbbrOrLocale));
1178         if (scripts != null) {
1179             return scripts;
1180         }
1181         if (!triedCode) {
1182             int propNum = UCharacter.getPropertyValueEnumNoThrow(UProperty.SCRIPT, nameOrAbbrOrLocale);
1183             if (propNum != UProperty.UNDEFINED) {
1184                 return new int[] {propNum};
1185             }
1186         }
1187         return null;
1188     }
1189 
1190     /**
1191      * Returns the script code associated with the given Unicode script property alias
1192      * (name or abbreviation).
1193      * Short aliases are ISO 15924 script codes.
1194      * Returns MALAYAM given "Malayam" OR "Mlym".
1195      *
1196      * @param nameOrAbbr name of the script or ISO 15924 code
1197      * @return The script code value, or INVALID_CODE if the code cannot be found.
1198      * @stable ICU 54
1199      */
getCodeFromName(String nameOrAbbr)1200     public static final int getCodeFromName(String nameOrAbbr) {
1201         int propNum = UCharacter.getPropertyValueEnumNoThrow(UProperty.SCRIPT, nameOrAbbr);
1202         return propNum == UProperty.UNDEFINED ? INVALID_CODE : propNum;
1203     }
1204 
1205     /**
1206      * Gets the script code associated with the given codepoint.
1207      * Returns UScript.MALAYAM given 0x0D02
1208      * @param codepoint UChar32 codepoint
1209      * @return The script code
1210      * @stable ICU 2.4
1211      */
getScript(int codepoint)1212     public static final int getScript(int codepoint){
1213         if (codepoint >= UCharacter.MIN_VALUE & codepoint <= UCharacter.MAX_VALUE) {
1214             int scriptX=UCharacterProperty.INSTANCE.getAdditional(codepoint, 0)&UCharacterProperty.SCRIPT_X_MASK;
1215             int codeOrIndex=UCharacterProperty.mergeScriptCodeOrIndex(scriptX);
1216             if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) {
1217                 return codeOrIndex;
1218             } else if(scriptX<UCharacterProperty.SCRIPT_X_WITH_INHERITED) {
1219                 return UScript.COMMON;
1220             } else if(scriptX<UCharacterProperty.SCRIPT_X_WITH_OTHER) {
1221                 return UScript.INHERITED;
1222             } else {
1223                 return UCharacterProperty.INSTANCE.m_scriptExtensions_[codeOrIndex];
1224             }
1225         }else{
1226             throw new IllegalArgumentException(Integer.toString(codepoint));
1227         }
1228     }
1229 
1230     /**
1231      * Do the Script_Extensions of code point c contain script sc?
1232      * If c does not have explicit Script_Extensions, then this tests whether
1233      * c has the Script property value sc.
1234      *
1235      * <p>Some characters are commonly used in multiple scripts.
1236      * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
1237      *
1238      * @param c code point
1239      * @param sc script code
1240      * @return true if sc is in Script_Extensions(c)
1241      * @stable ICU 49
1242      */
hasScript(int c, int sc)1243     public static final boolean hasScript(int c, int sc) {
1244         int scriptX=UCharacterProperty.INSTANCE.getAdditional(c, 0)&UCharacterProperty.SCRIPT_X_MASK;
1245         int codeOrIndex=UCharacterProperty.mergeScriptCodeOrIndex(scriptX);
1246         if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) {
1247             return sc==codeOrIndex;
1248         }
1249 
1250         char[] scriptExtensions=UCharacterProperty.INSTANCE.m_scriptExtensions_;
1251         int scx=codeOrIndex;  // index into scriptExtensions
1252         if(scriptX>=UCharacterProperty.SCRIPT_X_WITH_OTHER) {
1253             scx=scriptExtensions[scx+1];
1254         }
1255         if(sc>0x7fff) {
1256             // Guard against bogus input that would
1257             // make us go past the Script_Extensions terminator.
1258             return false;
1259         }
1260         while(sc>scriptExtensions[scx]) {
1261             ++scx;
1262         }
1263         return sc==(scriptExtensions[scx]&0x7fff);
1264     }
1265 
1266     /**
1267      * Sets code point c's Script_Extensions as script code integers into the output BitSet.
1268      * <ul>
1269      * <li>If c does have Script_Extensions, then the return value is
1270      * the negative number of Script_Extensions codes (= -set.cardinality());
1271      * in this case, the Script property value
1272      * (normally Common or Inherited) is not included in the set.
1273      * <li>If c does not have Script_Extensions, then the one Script code is put into the set
1274      * and also returned.
1275      * <li>If c is not a valid code point, then the one {@link #UNKNOWN} code is put into the set
1276      * and also returned.
1277      * </ul>
1278      * In other words, if the return value is non-negative, it is c's single Script code
1279      * and the set contains exactly this Script code.
1280      * If the return value is -n, then the set contains c's n&gt;=2 Script_Extensions script codes.
1281      *
1282      * <p>Some characters are commonly used in multiple scripts.
1283      * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
1284      *
1285      * @param c code point
1286      * @param set set of script code integers; will be cleared, then bits are set
1287      *            corresponding to c's Script_Extensions
1288      * @return negative number of script codes in c's Script_Extensions,
1289      *         or the non-negative single Script value
1290      * @stable ICU 49
1291      */
getScriptExtensions(int c, BitSet set)1292     public static final int getScriptExtensions(int c, BitSet set) {
1293         set.clear();
1294         int scriptX=UCharacterProperty.INSTANCE.getAdditional(c, 0)&UCharacterProperty.SCRIPT_X_MASK;
1295         int codeOrIndex=UCharacterProperty.mergeScriptCodeOrIndex(scriptX);
1296         if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) {
1297             set.set(codeOrIndex);
1298             return codeOrIndex;
1299         }
1300 
1301         char[] scriptExtensions=UCharacterProperty.INSTANCE.m_scriptExtensions_;
1302         int scx=codeOrIndex;  // index into scriptExtensions
1303         if(scriptX>=UCharacterProperty.SCRIPT_X_WITH_OTHER) {
1304             scx=scriptExtensions[scx+1];
1305         }
1306         int length=0;
1307         int sx;
1308         do {
1309             sx=scriptExtensions[scx++];
1310             set.set(sx&0x7fff);
1311             ++length;
1312         } while(sx<0x8000);
1313         // length==set.cardinality()
1314         return -length;
1315     }
1316 
1317     /**
1318      * Returns the long Unicode script name, if there is one.
1319      * Otherwise returns the 4-letter ISO 15924 script code.
1320      * Returns "Malayam" given MALAYALAM.
1321      *
1322      * @param scriptCode int script code
1323      * @return long script name as given in PropertyValueAliases.txt, or the 4-letter code
1324      * @throws IllegalArgumentException if the script code is not valid
1325      * @stable ICU 2.4
1326      */
getName(int scriptCode)1327     public static final String getName(int scriptCode){
1328         return UCharacter.getPropertyValueName(UProperty.SCRIPT,
1329                 scriptCode,
1330                 UProperty.NameChoice.LONG);
1331     }
1332 
1333     /**
1334      * Returns the 4-letter ISO 15924 script code,
1335      * which is the same as the short Unicode script name if Unicode has names for the script.
1336      * Returns "Mlym" given MALAYALAM.
1337      *
1338      * @param scriptCode int script code
1339      * @return short script name (4-letter code)
1340      * @throws IllegalArgumentException if the script code is not valid
1341      * @stable ICU 2.4
1342      */
getShortName(int scriptCode)1343     public static final String getShortName(int scriptCode){
1344         return UCharacter.getPropertyValueName(UProperty.SCRIPT,
1345                 scriptCode,
1346                 UProperty.NameChoice.SHORT);
1347     }
1348 
1349     /**
1350      * Script metadata (script properties).
1351      * See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
1352      */
1353     private static final class ScriptMetadata {
1354         // 0 = NOT_ENCODED, no sample character, default false script properties.
1355         // Bits 20.. 0: sample character
1356 
1357         // Bits 23..21: usage
1358         private static final int UNKNOWN = 1 << 21;
1359         private static final int EXCLUSION = 2 << 21;
1360         private static final int LIMITED_USE = 3 << 21;
1361         // vate static final int ASPIRATIONAL = 4 << 21; -- not used any more since Unicode 10
1362         private static final int RECOMMENDED = 5 << 21;
1363 
1364         // Bits 31..24: Single-bit flags
1365         private static final int RTL = 1 << 24;
1366         private static final int LB_LETTERS = 1 << 25;
1367         private static final int CASED = 1 << 26;
1368 
1369         private static final int SCRIPT_PROPS[] = {
1370             // Begin copy-paste output from
1371             // tools/trunk/unicode/py/parsescriptmetadata.py
1372             // or from icu/trunk/source/common/uscript_props.cpp
1373             0x0040 | RECOMMENDED,  // Zyyy
1374             0x0308 | RECOMMENDED,  // Zinh
1375             0x0628 | RECOMMENDED | RTL,  // Arab
1376             0x0531 | RECOMMENDED | CASED,  // Armn
1377             0x0995 | RECOMMENDED,  // Beng
1378             0x3105 | RECOMMENDED | LB_LETTERS,  // Bopo
1379             0x13C4 | LIMITED_USE | CASED,  // Cher
1380             0x03E2 | EXCLUSION | CASED,  // Copt
1381             0x042F | RECOMMENDED | CASED,  // Cyrl
1382             0x10414 | EXCLUSION | CASED,  // Dsrt
1383             0x0905 | RECOMMENDED,  // Deva
1384             0x12A0 | RECOMMENDED,  // Ethi
1385             0x10D3 | RECOMMENDED,  // Geor
1386             0x10330 | EXCLUSION,  // Goth
1387             0x03A9 | RECOMMENDED | CASED,  // Grek
1388             0x0A95 | RECOMMENDED,  // Gujr
1389             0x0A15 | RECOMMENDED,  // Guru
1390             0x5B57 | RECOMMENDED | LB_LETTERS,  // Hani
1391             0xAC00 | RECOMMENDED,  // Hang
1392             0x05D0 | RECOMMENDED | RTL,  // Hebr
1393             0x304B | RECOMMENDED | LB_LETTERS,  // Hira
1394             0x0C95 | RECOMMENDED,  // Knda
1395             0x30AB | RECOMMENDED | LB_LETTERS,  // Kana
1396             0x1780 | RECOMMENDED | LB_LETTERS,  // Khmr
1397             0x0EA5 | RECOMMENDED | LB_LETTERS,  // Laoo
1398             0x004C | RECOMMENDED | CASED,  // Latn
1399             0x0D15 | RECOMMENDED,  // Mlym
1400             0x1826 | EXCLUSION,  // Mong
1401             0x1000 | RECOMMENDED | LB_LETTERS,  // Mymr
1402             0x168F | EXCLUSION,  // Ogam
1403             0x10300 | EXCLUSION,  // Ital
1404             0x0B15 | RECOMMENDED,  // Orya
1405             0x16A0 | EXCLUSION,  // Runr
1406             0x0D85 | RECOMMENDED,  // Sinh
1407             0x0710 | LIMITED_USE | RTL,  // Syrc
1408             0x0B95 | RECOMMENDED,  // Taml
1409             0x0C15 | RECOMMENDED,  // Telu
1410             0x078C | RECOMMENDED | RTL,  // Thaa
1411             0x0E17 | RECOMMENDED | LB_LETTERS,  // Thai
1412             0x0F40 | RECOMMENDED,  // Tibt
1413             0x14C0 | LIMITED_USE,  // Cans
1414             0xA288 | LIMITED_USE | LB_LETTERS,  // Yiii
1415             0x1703 | EXCLUSION,  // Tglg
1416             0x1723 | EXCLUSION,  // Hano
1417             0x1743 | EXCLUSION,  // Buhd
1418             0x1763 | EXCLUSION,  // Tagb
1419             0x280E | UNKNOWN,  // Brai
1420             0x10800 | EXCLUSION | RTL,  // Cprt
1421             0x1900 | LIMITED_USE,  // Limb
1422             0x10000 | EXCLUSION,  // Linb
1423             0x10480 | EXCLUSION,  // Osma
1424             0x10450 | EXCLUSION,  // Shaw
1425             0x1950 | LIMITED_USE | LB_LETTERS,  // Tale
1426             0x10380 | EXCLUSION,  // Ugar
1427             0,
1428             0x1A00 | EXCLUSION,  // Bugi
1429             0x2C00 | EXCLUSION | CASED,  // Glag
1430             0x10A00 | EXCLUSION | RTL,  // Khar
1431             0xA800 | LIMITED_USE,  // Sylo
1432             0x1980 | LIMITED_USE | LB_LETTERS,  // Talu
1433             0x2D30 | LIMITED_USE,  // Tfng
1434             0x103A0 | EXCLUSION,  // Xpeo
1435             0x1B05 | LIMITED_USE,  // Bali
1436             0x1BC0 | LIMITED_USE,  // Batk
1437             0,
1438             0x11005 | EXCLUSION,  // Brah
1439             0xAA00 | LIMITED_USE,  // Cham
1440             0,
1441             0,
1442             0,
1443             0,
1444             0x13153 | EXCLUSION,  // Egyp
1445             0,
1446             0x5B57 | RECOMMENDED | LB_LETTERS,  // Hans
1447             0x5B57 | RECOMMENDED | LB_LETTERS,  // Hant
1448             0x16B1C | EXCLUSION,  // Hmng
1449             0x10CA1 | EXCLUSION | RTL | CASED,  // Hung
1450             0,
1451             0xA984 | LIMITED_USE,  // Java
1452             0xA90A | LIMITED_USE,  // Kali
1453             0,
1454             0,
1455             0x1C00 | LIMITED_USE,  // Lepc
1456             0x10647 | EXCLUSION,  // Lina
1457             0x0840 | LIMITED_USE | RTL,  // Mand
1458             0,
1459             0x10980 | EXCLUSION | RTL,  // Mero
1460             0x07CA | LIMITED_USE | RTL,  // Nkoo
1461             0x10C00 | EXCLUSION | RTL,  // Orkh
1462             0x1036B | EXCLUSION,  // Perm
1463             0xA840 | EXCLUSION,  // Phag
1464             0x10900 | EXCLUSION | RTL,  // Phnx
1465             0x16F00 | LIMITED_USE,  // Plrd
1466             0,
1467             0,
1468             0,
1469             0,
1470             0,
1471             0,
1472             0xA549 | LIMITED_USE,  // Vaii
1473             0,
1474             0x12000 | EXCLUSION,  // Xsux
1475             0,
1476             0xFDD0 | UNKNOWN,  // Zzzz
1477             0x102A0 | EXCLUSION,  // Cari
1478             0x304B | RECOMMENDED | LB_LETTERS,  // Jpan
1479             0x1A20 | LIMITED_USE | LB_LETTERS,  // Lana
1480             0x10280 | EXCLUSION,  // Lyci
1481             0x10920 | EXCLUSION | RTL,  // Lydi
1482             0x1C5A | LIMITED_USE,  // Olck
1483             0xA930 | EXCLUSION,  // Rjng
1484             0xA882 | LIMITED_USE,  // Saur
1485             0x1D850 | EXCLUSION,  // Sgnw
1486             0x1B83 | LIMITED_USE,  // Sund
1487             0,
1488             0xABC0 | LIMITED_USE,  // Mtei
1489             0x10840 | EXCLUSION | RTL,  // Armi
1490             0x10B00 | EXCLUSION | RTL,  // Avst
1491             0x11103 | LIMITED_USE,  // Cakm
1492             0xAC00 | RECOMMENDED,  // Kore
1493             0x11083 | EXCLUSION,  // Kthi
1494             0x10AD8 | EXCLUSION | RTL,  // Mani
1495             0x10B60 | EXCLUSION | RTL,  // Phli
1496             0x10B8F | EXCLUSION | RTL,  // Phlp
1497             0,
1498             0x10B40 | EXCLUSION | RTL,  // Prti
1499             0x0800 | EXCLUSION | RTL,  // Samr
1500             0xAA80 | LIMITED_USE | LB_LETTERS,  // Tavt
1501             0,
1502             0,
1503             0xA6A0 | LIMITED_USE,  // Bamu
1504             0xA4D0 | LIMITED_USE,  // Lisu
1505             0,
1506             0x10A60 | EXCLUSION | RTL,  // Sarb
1507             0x16AE6 | EXCLUSION,  // Bass
1508             0x1BC20 | EXCLUSION,  // Dupl
1509             0x10500 | EXCLUSION,  // Elba
1510             0x11315 | EXCLUSION,  // Gran
1511             0,
1512             0,
1513             0x1E802 | EXCLUSION | RTL,  // Mend
1514             0x109A0 | EXCLUSION | RTL,  // Merc
1515             0x10A95 | EXCLUSION | RTL,  // Narb
1516             0x10896 | EXCLUSION | RTL,  // Nbat
1517             0x10873 | EXCLUSION | RTL,  // Palm
1518             0x112BE | EXCLUSION,  // Sind
1519             0x118B4 | EXCLUSION | CASED,  // Wara
1520             0,
1521             0,
1522             0x16A4F | EXCLUSION,  // Mroo
1523             0x1B1C4 | EXCLUSION | LB_LETTERS,  // Nshu
1524             0x11183 | EXCLUSION,  // Shrd
1525             0x110D0 | EXCLUSION,  // Sora
1526             0x11680 | EXCLUSION,  // Takr
1527             0x18229 | EXCLUSION | LB_LETTERS,  // Tang
1528             0,
1529             0x14400 | EXCLUSION,  // Hluw
1530             0x11208 | EXCLUSION,  // Khoj
1531             0x11484 | EXCLUSION,  // Tirh
1532             0x10537 | EXCLUSION,  // Aghb
1533             0x11152 | EXCLUSION,  // Mahj
1534             0x11717 | EXCLUSION | LB_LETTERS,  // Ahom
1535             0x108F4 | EXCLUSION | RTL,  // Hatr
1536             0x1160E | EXCLUSION,  // Modi
1537             0x1128F | EXCLUSION,  // Mult
1538             0x11AC0 | EXCLUSION,  // Pauc
1539             0x1158E | EXCLUSION,  // Sidd
1540             0x1E909 | LIMITED_USE | RTL | CASED,  // Adlm
1541             0x11C0E | EXCLUSION,  // Bhks
1542             0x11C72 | EXCLUSION,  // Marc
1543             0x11412 | LIMITED_USE,  // Newa
1544             0x104B5 | LIMITED_USE | CASED,  // Osge
1545             0x5B57 | RECOMMENDED | LB_LETTERS,  // Hanb
1546             0x1112 | RECOMMENDED,  // Jamo
1547             0,
1548             0x11D10 | EXCLUSION,  // Gonm
1549             0x11A5C | EXCLUSION,  // Soyo
1550             0x11A0B | EXCLUSION,  // Zanb
1551             0x1180B | EXCLUSION,  // Dogr
1552             0x11D71 | LIMITED_USE,  // Gong
1553             0x11EE5 | EXCLUSION,  // Maka
1554             0x16E40 | EXCLUSION | CASED,  // Medf
1555             0x10D12 | LIMITED_USE | RTL,  // Rohg
1556             0x10F42 | EXCLUSION | RTL,  // Sogd
1557             0x10F19 | EXCLUSION | RTL,  // Sogo
1558             0x10FF1 | EXCLUSION | RTL,  // Elym
1559             0x1E108 | LIMITED_USE,  // Hmnp
1560             0x119CE | EXCLUSION,  // Nand
1561             0x1E2E1 | LIMITED_USE,  // Wcho
1562             0x10FBF | EXCLUSION | RTL,  // Chrs
1563             0x1190C | EXCLUSION,  // Diak
1564             0x18C65 | EXCLUSION | LB_LETTERS,  // Kits
1565             0x10E88 | EXCLUSION | RTL,  // Yezi
1566             0x12FE5 | EXCLUSION,  // Cpmn
1567             0x10F7C | EXCLUSION | RTL,  // Ougr
1568             0x16ABC | EXCLUSION,  // Tnsa
1569             0x1E290 | EXCLUSION,  // Toto
1570             0x10582 | EXCLUSION | CASED,  // Vith
1571             0x11F1B | EXCLUSION | LB_LETTERS,  // Kawi
1572             0x1E4E6 | EXCLUSION,  // Nagm
1573             // End copy-paste from parsescriptmetadata.py
1574         };
1575 
getScriptProps(int script)1576         private static final int getScriptProps(int script) {
1577             if (0 <= script && script < SCRIPT_PROPS.length) {
1578                 return SCRIPT_PROPS[script];
1579             } else {
1580                 return 0;
1581             }
1582         }
1583     }
1584 
1585     /**
1586      * Script usage constants.
1587      * See UAX #31 Unicode Identifier and Pattern Syntax.
1588      * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers
1589      *
1590      * @stable ICU 51
1591      */
1592     public enum ScriptUsage {
1593         /**
1594          * Not encoded in Unicode.
1595          * @stable ICU 51
1596          */
1597         NOT_ENCODED,
1598         /**
1599          * Unknown script usage.
1600          * @stable ICU 51
1601          */
1602         UNKNOWN,
1603         /**
1604          * Candidate for Exclusion from Identifiers.
1605          * @stable ICU 51
1606          */
1607         EXCLUDED,
1608         /**
1609          * Limited Use script.
1610          * @stable ICU 51
1611          */
1612         LIMITED_USE,
1613         /**
1614          * Aspirational Use script.
1615          * @stable ICU 51
1616          */
1617         ASPIRATIONAL,
1618         /**
1619          * Recommended script.
1620          * @stable ICU 51
1621          */
1622         RECOMMENDED
1623     }
1624     private static final ScriptUsage[] usageValues = ScriptUsage.values();
1625 
1626     /**
1627      * Returns the script sample character string.
1628      * This string normally consists of one code point but might be longer.
1629      * The string is empty if the script is not encoded.
1630      *
1631      * @param script script code
1632      * @return the sample character string
1633      * @stable ICU 51
1634      */
getSampleString(int script)1635     public static final String getSampleString(int script) {
1636         int sampleChar = ScriptMetadata.getScriptProps(script) & 0x1fffff;
1637         if(sampleChar != 0) {
1638             return new StringBuilder().appendCodePoint(sampleChar).toString();
1639         }
1640         return "";
1641     }
1642 
1643     /**
1644      * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
1645      * Returns {@link ScriptUsage#NOT_ENCODED} if the script is not encoded in Unicode.
1646      *
1647      * @param script script code
1648      * @return script usage
1649      * @see ScriptUsage
1650      * @stable ICU 51
1651      */
getUsage(int script)1652     public static final ScriptUsage getUsage(int script) {
1653         return usageValues[(ScriptMetadata.getScriptProps(script) >> 21) & 7];
1654     }
1655 
1656     /**
1657      * Returns true if the script is written right-to-left.
1658      * For example, Arab and Hebr.
1659      *
1660      * @param script script code
1661      * @return true if the script is right-to-left
1662      * @stable ICU 51
1663      */
isRightToLeft(int script)1664     public static final boolean isRightToLeft(int script) {
1665         return (ScriptMetadata.getScriptProps(script) & ScriptMetadata.RTL) != 0;
1666     }
1667 
1668     /**
1669      * Returns true if the script allows line breaks between letters (excluding hyphenation).
1670      * Such a script typically requires dictionary-based line breaking.
1671      * For example, Hani and Thai.
1672      *
1673      * @param script script code
1674      * @return true if the script allows line breaks between letters
1675      * @stable ICU 51
1676      */
breaksBetweenLetters(int script)1677     public static final boolean breaksBetweenLetters(int script) {
1678         return (ScriptMetadata.getScriptProps(script) & ScriptMetadata.LB_LETTERS) != 0;
1679     }
1680 
1681     /**
1682      * Returns true if in modern (or most recent) usage of the script case distinctions are customary.
1683      * For example, Latn and Cyrl.
1684      *
1685      * @param script script code
1686      * @return true if the script is cased
1687      * @stable ICU 51
1688      */
isCased(int script)1689     public static final boolean isCased(int script) {
1690         return (ScriptMetadata.getScriptProps(script) & ScriptMetadata.CASED) != 0;
1691     }
1692 
1693     ///CLOVER:OFF
1694     /**
1695      *  Private Constructor. Never default construct
1696      */
UScript()1697     private UScript(){}
1698     ///CLOVER:ON
1699 }
1700