• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /**
4  *******************************************************************************
5  * Copyright (C) 2001-2016 International Business Machines Corporation and
6  * others. All Rights Reserved.
7  *******************************************************************************
8  */
9 
10 package com.ibm.icu.lang;
11 
12 import java.util.BitSet;
13 import java.util.Locale;
14 
15 import com.ibm.icu.impl.UCharacterProperty;
16 import com.ibm.icu.util.ULocale;
17 
18 /**
19  * Constants for ISO 15924 script codes, and related functions.
20  *
21  * <p>The current set of script code constants supports at least all scripts
22  * that are encoded in the version of Unicode which ICU currently supports.
23  * The names of the constants are usually derived from the
24  * Unicode script property value aliases.
25  * See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/)
26  * and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .
27  *
28  * <p>In addition, constants for many ISO 15924 script codes
29  * are included, for use with language tags, CLDR data, and similar.
30  * Some of those codes are not used in the Unicode Character Database (UCD).
31  * For example, there are no characters that have a UCD script property value of
32  * Hans or Hant. All Han ideographs have the Hani script property value in Unicode.
33  *
34  * <p>Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.
35  *
36  * <p>Starting with ICU 55, script codes are only added when their scripts
37  * have been or will certainly be encoded in Unicode,
38  * and have been assigned Unicode script property value aliases,
39  * to ensure that their script names are stable and match the names of the constants.
40  * Script codes like Latf and Aran that are not subject to separate encoding
41  * may be added at any time.
42  *
43  * @stable ICU 2.4
44  */
45 public final class UScript {
46     /**
47      * Invalid code
48      * @stable ICU 2.4
49      */
50     public static final int INVALID_CODE = -1;
51     /**
52      * Common
53      * @stable ICU 2.4
54      */
55     public static final int COMMON       =  0;  /* Zyyy */
56     /**
57      * Inherited
58      * @stable ICU 2.4
59      */
60     public static final int INHERITED    =  1;  /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */
61     /**
62      * Arabic
63      * @stable ICU 2.4
64      */
65     public static final int ARABIC       =  2;  /* Arab */
66     /**
67      * Armenian
68      * @stable ICU 2.4
69      */
70     public static final int ARMENIAN     =  3;  /* Armn */
71     /**
72      * Bengali
73      * @stable ICU 2.4
74      */
75     public static final int BENGALI      =  4;  /* Beng */
76     /**
77      * Bopomofo
78      * @stable ICU 2.4
79      */
80     public static final int BOPOMOFO     =  5;  /* Bopo */
81     /**
82      * Cherokee
83      * @stable ICU 2.4
84      */
85     public static final int CHEROKEE     =  6;  /* Cher */
86     /**
87      * Coptic
88      * @stable ICU 2.4
89      */
90     public static final int COPTIC       =  7;  /* Qaac */
91     /**
92      * Cyrillic
93      * @stable ICU 2.4
94      */
95     public static final int CYRILLIC     =  8;  /* Cyrl (Cyrs) */
96     /**
97      * Deseret
98      * @stable ICU 2.4
99      */
100     public static final int DESERET      =  9;  /* Dsrt */
101     /**
102      * Devanagari
103      * @stable ICU 2.4
104      */
105     public static final int DEVANAGARI   = 10;  /* Deva */
106     /**
107      * Ethiopic
108      * @stable ICU 2.4
109      */
110     public static final int ETHIOPIC     = 11;  /* Ethi */
111     /**
112      * Georgian
113      * @stable ICU 2.4
114      */
115     public static final int GEORGIAN     = 12;  /* Geor (Geon; Geoa) */
116     /**
117      * Gothic
118      * @stable ICU 2.4
119      */
120     public static final int GOTHIC       = 13;  /* Goth */
121     /**
122      * Greek
123      * @stable ICU 2.4
124      */
125     public static final int GREEK        = 14;  /* Grek */
126     /**
127      * Gujarati
128      * @stable ICU 2.4
129      */
130     public static final int GUJARATI     = 15;  /* Gujr */
131     /**
132      * Gurmukhi
133      * @stable ICU 2.4
134      */
135     public static final int GURMUKHI     = 16;  /* Guru */
136     /**
137      * Han
138      * @stable ICU 2.4
139      */
140     public static final int HAN          = 17;  /* Hani */
141     /**
142      * Hangul
143      * @stable ICU 2.4
144      */
145     public static final int HANGUL       = 18;  /* Hang */
146     /**
147      * Hebrew
148      * @stable ICU 2.4
149      */
150     public static final int HEBREW       = 19;  /* Hebr */
151     /**
152      * Hiragana
153      * @stable ICU 2.4
154      */
155     public static final int HIRAGANA     = 20;  /* Hira */
156     /**
157      * Kannada
158      * @stable ICU 2.4
159      */
160     public static final int KANNADA      = 21;  /* Knda */
161     /**
162      * Katakana
163      * @stable ICU 2.4
164      */
165     public static final int KATAKANA     = 22;  /* Kana */
166     /**
167      * Khmer
168      * @stable ICU 2.4
169      */
170     public static final int KHMER        = 23;  /* Khmr */
171     /**
172      * Lao
173      * @stable ICU 2.4
174      */
175     public static final int LAO          = 24;  /* Laoo */
176     /**
177      * Latin
178      * @stable ICU 2.4
179      */
180     public static final int LATIN        = 25;  /* Latn (Latf; Latg) */
181     /**
182      * Malayalam
183      * @stable ICU 2.4
184      */
185     public static final int MALAYALAM    = 26;  /* Mlym */
186     /**
187      * Mangolian
188      * @stable ICU 2.4
189      */
190     public static final int MONGOLIAN    = 27;  /* Mong */
191     /**
192      * Myammar
193      * @stable ICU 2.4
194      */
195     public static final int MYANMAR      = 28;  /* Mymr */
196     /**
197      * Ogham
198      * @stable ICU 2.4
199      */
200     public static final int OGHAM        = 29;  /* Ogam */
201     /**
202      * Old Itallic
203      * @stable ICU 2.4
204      */
205     public static final int OLD_ITALIC   = 30;  /* Ital */
206     /**
207      * Oriya
208      * @stable ICU 2.4
209      */
210     public static final int ORIYA        = 31;  /* Orya */
211     /**
212      * Runic
213      * @stable ICU 2.4
214      */
215     public static final int RUNIC        = 32;  /* Runr */
216     /**
217      * Sinhala
218      * @stable ICU 2.4
219      */
220     public static final int SINHALA      = 33;  /* Sinh */
221     /**
222      * Syriac
223      * @stable ICU 2.4
224      */
225     public static final int SYRIAC       = 34;  /* Syrc (Syrj; Syrn; Syre) */
226     /**
227      * Tamil
228      * @stable ICU 2.4
229      */
230     public static final int TAMIL        = 35;  /* Taml */
231     /**
232      * Telugu
233      * @stable ICU 2.4
234      */
235     public static final int TELUGU       = 36;  /* Telu */
236     /**
237      * Thana
238      * @stable ICU 2.4
239      */
240     public static final int THAANA       = 37;  /* Thaa */
241     /**
242      * Thai
243      * @stable ICU 2.4
244      */
245     public static final int THAI         = 38;  /* Thai */
246     /**
247      * Tibetan
248      * @stable ICU 2.4
249      */
250     public static final int TIBETAN      = 39;  /* Tibt */
251     /**
252      * Unified Canadian Aboriginal Symbols
253      * @stable ICU 2.6
254      */
255     public static final int CANADIAN_ABORIGINAL = 40;  /* Cans */
256     /**
257      * Unified Canadian Aboriginal Symbols (alias)
258      * @stable ICU 2.4
259      */
260     public static final int UCAS         = CANADIAN_ABORIGINAL;  /* Cans */
261     /**
262      * Yi syllables
263      * @stable ICU 2.4
264      */
265     public static final int YI           = 41;  /* Yiii */
266     /**
267      * Tagalog
268      * @stable ICU 2.4
269      */
270     public static final int TAGALOG      = 42;  /* Tglg */
271     /**
272      * Hanunooo
273      * @stable ICU 2.4
274      */
275     public static final int HANUNOO      = 43;  /* Hano */
276     /**
277      * Buhid
278      * @stable ICU 2.4
279      */
280     public static final int BUHID        = 44;  /* Buhd */
281     /**
282      * Tagbanwa
283      * @stable ICU 2.4
284      */
285     public static final int TAGBANWA     = 45;  /* Tagb */
286     /**
287      * Braille
288      * Script in Unicode 4
289      * @stable ICU 2.6
290      *
291      */
292     public static final int BRAILLE      = 46;  /* Brai */
293     /**
294      * Cypriot
295      * Script in Unicode 4
296      * @stable ICU 2.6
297      *
298      */
299     public static final int CYPRIOT              = 47;  /* Cprt */
300     /**
301      * Limbu
302      * Script in Unicode 4
303      * @stable ICU 2.6
304      *
305      */
306     public static final int LIMBU                = 48;  /* Limb */
307     /**
308      * Linear B
309      * Script in Unicode 4
310      * @stable ICU 2.6
311      *
312      */
313     public static final int LINEAR_B     = 49;  /* Linb */
314     /**
315      * Osmanya
316      * Script in Unicode 4
317      * @stable ICU 2.6
318      *
319      */
320     public static final int OSMANYA              = 50;  /* Osma */
321     /**
322      * Shavian
323      * Script in Unicode 4
324      * @stable ICU 2.6
325      *
326      */
327     public static final int SHAVIAN              = 51;  /* Shaw */
328     /**
329      * Tai Le
330      * Script in Unicode 4
331      * @stable ICU 2.6
332      *
333      */
334     public static final int TAI_LE               = 52;  /* Tale */
335     /**
336      * Ugaritic
337      * Script in Unicode 4
338      * @stable ICU 2.6
339      *
340      */
341     public static final int UGARITIC     = 53;  /* Ugar */
342     /**
343      * Script in Unicode 4.0.1
344      * @stable ICU 3.0
345      */
346     public static final int KATAKANA_OR_HIRAGANA = 54;  /*Hrkt */
347 
348     /**
349      * Script in Unicode 4.1
350      * @stable ICU 3.4
351      */
352     public static final int BUGINESE = 55;           /* Bugi */
353     /**
354      * Script in Unicode 4.1
355      * @stable ICU 3.4
356      */
357     public static final int GLAGOLITIC = 56;         /* Glag */
358     /**
359      * Script in Unicode 4.1
360      * @stable ICU 3.4
361      */
362     public static final int KHAROSHTHI = 57;         /* Khar */
363     /**
364      * Script in Unicode 4.1
365      * @stable ICU 3.4
366      */
367     public static final int SYLOTI_NAGRI = 58;       /* Sylo */
368     /**
369      * Script in Unicode 4.1
370      * @stable ICU 3.4
371      */
372     public static final int NEW_TAI_LUE = 59;        /* Talu */
373     /**
374      * Script in Unicode 4.1
375      * @stable ICU 3.4
376      */
377     public static final int TIFINAGH = 60;           /* Tfng */
378     /**
379      * Script in Unicode 4.1
380      * @stable ICU 3.4
381      */
382     public static final int OLD_PERSIAN = 61;        /* Xpeo */
383 
384 
385     /**
386      * ISO 15924 script code
387      * @stable ICU 3.6
388      */
389     public static final int BALINESE                      = 62; /* Bali */
390     /**
391      * ISO 15924 script code
392      * @stable ICU 3.6
393      */
394     public static final int BATAK                         = 63; /* Batk */
395     /**
396      * ISO 15924 script code
397      * @stable ICU 3.6
398      */
399     public static final int BLISSYMBOLS                   = 64; /* Blis */
400     /**
401      * ISO 15924 script code
402      * @stable ICU 3.6
403      */
404     public static final int BRAHMI                        = 65; /* Brah */
405     /**
406      * ISO 15924 script code
407      * @stable ICU 3.6
408      */
409     public static final int CHAM                          = 66; /* Cham */
410     /**
411      * ISO 15924 script code
412      * @stable ICU 3.6
413      */
414     public static final int CIRTH                         = 67; /* Cirt */
415     /**
416      * ISO 15924 script code
417      * @stable ICU 3.6
418      */
419     public static final int OLD_CHURCH_SLAVONIC_CYRILLIC  = 68; /* Cyrs */
420     /**
421      * ISO 15924 script code
422      * @stable ICU 3.6
423      */
424     public static final int DEMOTIC_EGYPTIAN              = 69; /* Egyd */
425     /**
426      * ISO 15924 script code
427      * @stable ICU 3.6
428      */
429     public static final int HIERATIC_EGYPTIAN             = 70; /* Egyh */
430     /**
431      * ISO 15924 script code
432      * @stable ICU 3.6
433      */
434     public static final int EGYPTIAN_HIEROGLYPHS          = 71; /* Egyp */
435     /**
436      * ISO 15924 script code
437      * @stable ICU 3.6
438      */
439     public static final int KHUTSURI                      = 72; /* Geok */
440     /**
441      * ISO 15924 script code
442      * @stable ICU 3.6
443      */
444     public static final int SIMPLIFIED_HAN                = 73; /* Hans */
445     /**
446      * ISO 15924 script code
447      * @stable ICU 3.6
448      */
449     public static final int TRADITIONAL_HAN               = 74; /* Hant */
450     /**
451      * ISO 15924 script code
452      * @stable ICU 3.6
453      */
454     public static final int PAHAWH_HMONG                  = 75; /* Hmng */
455     /**
456      * ISO 15924 script code
457      * @stable ICU 3.6
458      */
459     public static final int OLD_HUNGARIAN                 = 76; /* Hung */
460     /**
461      * ISO 15924 script code
462      * @stable ICU 3.6
463      */
464     public static final int HARAPPAN_INDUS                = 77; /* Inds */
465     /**
466      * ISO 15924 script code
467      * @stable ICU 3.6
468      */
469     public static final int JAVANESE                      = 78; /* Java */
470     /**
471      * ISO 15924 script code
472      * @stable ICU 3.6
473      */
474     public static final int KAYAH_LI                      = 79; /* Kali */
475     /**
476      * ISO 15924 script code
477      * @stable ICU 3.6
478      */
479     public static final int LATIN_FRAKTUR                 = 80; /* Latf */
480     /**
481      * ISO 15924 script code
482      * @stable ICU 3.6
483      */
484     public static final int LATIN_GAELIC                  = 81; /* Latg */
485     /**
486      * ISO 15924 script code
487      * @stable ICU 3.6
488      */
489     public static final int LEPCHA                        = 82; /* Lepc */
490     /**
491      * ISO 15924 script code
492      * @stable ICU 3.6
493      */
494     public static final int LINEAR_A                      = 83; /* Lina */
495     /**
496      * ISO 15924 script code
497      * @stable ICU 4.6
498      */
499     public static final int MANDAIC                       = 84; /* Mand */
500     /**
501      * ISO 15924 script code
502      * @stable ICU 3.6
503      */
504     public static final int MANDAEAN                      = MANDAIC;
505     /**
506      * ISO 15924 script code
507      * @stable ICU 3.6
508      */
509     public static final int MAYAN_HIEROGLYPHS             = 85; /* Maya */
510     /**
511      * ISO 15924 script code
512      * @stable ICU 4.6
513      */
514     public static final int MEROITIC_HIEROGLYPHS          = 86; /* Mero */
515     /**
516      * ISO 15924 script code
517      * @stable ICU 3.6
518      */
519     public static final int MEROITIC                      = MEROITIC_HIEROGLYPHS;
520     /**
521      * ISO 15924 script code
522      * @stable ICU 3.6
523      */
524     public static final int NKO                           = 87; /* Nkoo */
525     /**
526      * ISO 15924 script code
527      * @stable ICU 3.6
528      */
529     public static final int ORKHON                        = 88; /* Orkh */
530     /**
531      * ISO 15924 script code
532      * @stable ICU 3.6
533      */
534     public static final int OLD_PERMIC                    = 89; /* Perm */
535     /**
536      * ISO 15924 script code
537      * @stable ICU 3.6
538      */
539     public static final int PHAGS_PA                      = 90; /* Phag */
540     /**
541      * ISO 15924 script code
542      * @stable ICU 3.6
543      */
544     public static final int PHOENICIAN                    = 91; /* Phnx */
545     /**
546      * ISO 15924 script code
547      * @stable ICU 52
548      */
549     public static final int MIAO                          = 92; /* Plrd */
550     /**
551      * ISO 15924 script code
552      * @stable ICU 3.6
553      */
554     public static final int PHONETIC_POLLARD              = MIAO;
555     /**
556      * ISO 15924 script code
557      * @stable ICU 3.6
558      */
559     public static final int RONGORONGO                    = 93; /* Roro */
560     /**
561      * ISO 15924 script code
562      * @stable ICU 3.6
563      */
564     public static final int SARATI                        = 94; /* Sara */
565     /**
566      * ISO 15924 script code
567      * @stable ICU 3.6
568      */
569     public static final int ESTRANGELO_SYRIAC             = 95; /* Syre */
570     /**
571      * ISO 15924 script code
572      * @stable ICU 3.6
573      */
574     public static final int WESTERN_SYRIAC                = 96; /* Syrj */
575     /**
576      * ISO 15924 script code
577      * @stable ICU 3.6
578      */
579     public static final int EASTERN_SYRIAC                = 97; /* Syrn */
580     /**
581      * ISO 15924 script code
582      * @stable ICU 3.6
583      */
584     public static final int TENGWAR                       = 98; /* Teng */
585     /**
586      * ISO 15924 script code
587      * @stable ICU 3.6
588      */
589     public static final int VAI                           = 99; /* Vaii */
590     /**
591      * ISO 15924 script code
592      * @stable ICU 3.6
593      */
594     public static final int VISIBLE_SPEECH                = 100;/* Visp */
595     /**
596      * ISO 15924 script code
597      * @stable ICU 3.6
598      */
599     public static final int CUNEIFORM                     = 101;/* Xsux */
600     /**
601      * ISO 15924 script code
602      * @stable ICU 3.6
603      */
604     public static final int UNWRITTEN_LANGUAGES           = 102;/* Zxxx */
605     /**
606      * ISO 15924 script code
607      * @stable ICU 3.6
608      */
609     public static final int UNKNOWN                       = 103;/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */
610 
611     /**
612      * ISO 15924 script code
613      * @stable ICU 3.8
614      */
615     public static final int CARIAN                        = 104;/* Cari */
616     /**
617      * ISO 15924 script code
618      * @stable ICU 3.8
619      */
620     public static final int JAPANESE                      = 105;/* Jpan */
621     /**
622      * ISO 15924 script code
623      * @stable ICU 3.8
624      */
625     public static final int LANNA                         = 106;/* Lana */
626     /**
627      * ISO 15924 script code
628      * @stable ICU 3.8
629      */
630     public static final int LYCIAN                        = 107;/* Lyci */
631     /**
632      * ISO 15924 script code
633      * @stable ICU 3.8
634      */
635     public static final int LYDIAN                        = 108;/* Lydi */
636     /**
637      * ISO 15924 script code
638      * @stable ICU 3.8
639      */
640     public static final int OL_CHIKI                      = 109;/* Olck */
641     /**
642      * ISO 15924 script code
643      * @stable ICU 3.8
644      */
645     public static final int REJANG                        = 110;/* Rjng */
646     /**
647      * ISO 15924 script code
648      * @stable ICU 3.8
649      */
650     public static final int SAURASHTRA                    = 111;/* Saur */
651     /**
652      * ISO 15924 script code for Sutton SignWriting
653      * @stable ICU 3.8
654      */
655     public static final int SIGN_WRITING                  = 112;/* Sgnw */
656     /**
657      * ISO 15924 script code
658      * @stable ICU 3.8
659      */
660     public static final int SUNDANESE                     = 113;/* Sund */
661     /**
662      * ISO 15924 script code
663      * @stable ICU 3.8
664      */
665     public static final int MOON                          = 114;/* Moon */
666     /**
667      * ISO 15924 script code
668      * @stable ICU 3.8
669      */
670     public static final int MEITEI_MAYEK                  = 115;/* Mtei */
671 
672     /**
673      * ISO 15924 script code
674      * @stable ICU 4.0
675      */
676     public static final int IMPERIAL_ARAMAIC              = 116;/* Armi */
677 
678     /**
679      * ISO 15924 script code
680      * @stable ICU 4.0
681      */
682     public static final int AVESTAN                       = 117;/* Avst */
683 
684     /**
685      * ISO 15924 script code
686      * @stable ICU 4.0
687      */
688     public static final int CHAKMA                        = 118;/* Cakm */
689 
690     /**
691      * ISO 15924 script code
692      * @stable ICU 4.0
693      */
694     public static final int KOREAN                        = 119;/* Kore */
695 
696     /**
697      * ISO 15924 script code
698      * @stable ICU 4.0
699      */
700     public static final int KAITHI                        = 120;/* Kthi */
701 
702     /**
703      * ISO 15924 script code
704      * @stable ICU 4.0
705      */
706     public static final int MANICHAEAN                    = 121;/* Mani */
707 
708     /**
709      * ISO 15924 script code
710      * @stable ICU 4.0
711      */
712     public static final int INSCRIPTIONAL_PAHLAVI         = 122;/* Phli */
713 
714     /**
715      * ISO 15924 script code
716      * @stable ICU 4.0
717      */
718     public static final int PSALTER_PAHLAVI               = 123;/* Phlp */
719 
720     /**
721      * ISO 15924 script code
722      * @stable ICU 4.0
723      */
724     public static final int BOOK_PAHLAVI                  = 124;/* Phlv */
725 
726     /**
727      * ISO 15924 script code
728      * @stable ICU 4.0
729      */
730     public static final int INSCRIPTIONAL_PARTHIAN        = 125;/* Prti */
731 
732     /**
733      * ISO 15924 script code
734      * @stable ICU 4.0
735      */
736     public static final int SAMARITAN                     = 126;/* Samr */
737 
738     /**
739      * ISO 15924 script code
740      * @stable ICU 4.0
741      */
742     public static final int TAI_VIET                      = 127;/* Tavt */
743 
744     /**
745      * ISO 15924 script code
746      * @stable ICU 4.0
747      */
748     public static final int MATHEMATICAL_NOTATION         = 128;/* Zmth */
749 
750     /**
751      * ISO 15924 script code
752      * @stable ICU 4.0
753      */
754     public static final int SYMBOLS                       = 129;/* Zsym */
755 
756     /**
757      * ISO 15924 script code
758      * @stable ICU 4.4
759      */
760     public static final int BAMUM                         = 130;/* Bamu */
761     /**
762      * ISO 15924 script code
763      * @stable ICU 4.4
764      */
765     public static final int LISU                          = 131;/* Lisu */
766     /**
767      * ISO 15924 script code
768      * @stable ICU 4.4
769      */
770     public static final int NAKHI_GEBA                    = 132;/* Nkgb */
771     /**
772      * ISO 15924 script code
773      * @stable ICU 4.4
774      */
775     public static final int OLD_SOUTH_ARABIAN             = 133;/* Sarb */
776 
777     /**
778      * ISO 15924 script code
779      * @stable ICU 4.6
780      */
781     public static final int BASSA_VAH                     = 134;/* Bass */
782     /**
783      * ISO 15924 script code
784      * @stable ICU 54
785      */
786     public static final int DUPLOYAN                      = 135;/* Dupl */
787     /**
788      * Typo, use DUPLOYAN
789      * @deprecated ICU 54
790      */
791     @Deprecated
792     public static final int DUPLOYAN_SHORTAND             = DUPLOYAN;
793     /**
794      * ISO 15924 script code
795      * @stable ICU 4.6
796      */
797     public static final int ELBASAN                       = 136;/* Elba */
798     /**
799      * ISO 15924 script code
800      * @stable ICU 4.6
801      */
802     public static final int GRANTHA                       = 137;/* Gran */
803     /**
804      * ISO 15924 script code
805      * @stable ICU 4.6
806      */
807     public static final int KPELLE                        = 138;/* Kpel */
808     /**
809      * ISO 15924 script code
810      * @stable ICU 4.6
811      */
812     public static final int LOMA                          = 139;/* Loma */
813     /**
814      * Mende Kikakui
815      * ISO 15924 script code
816      * @stable ICU 4.6
817      */
818     public static final int MENDE                         = 140;/* Mend */
819     /**
820      * ISO 15924 script code
821      * @stable ICU 4.6
822      */
823     public static final int MEROITIC_CURSIVE              = 141;/* Merc */
824     /**
825      * ISO 15924 script code
826      * @stable ICU 4.6
827      */
828     public static final int OLD_NORTH_ARABIAN             = 142;/* Narb */
829     /**
830      * ISO 15924 script code
831      * @stable ICU 4.6
832      */
833     public static final int NABATAEAN                     = 143;/* Nbat */
834     /**
835      * ISO 15924 script code
836      * @stable ICU 4.6
837      */
838     public static final int PALMYRENE                     = 144;/* Palm */
839     /**
840      * ISO 15924 script code
841      * @stable ICU 54
842      */
843     public static final int KHUDAWADI                     = 145;/* Sind */
844     /**
845      * ISO 15924 script code
846      * @stable ICU 4.6
847      */
848     public static final int SINDHI = KHUDAWADI;
849     /**
850      * ISO 15924 script code
851      * @stable ICU 4.6
852      */
853     public static final int WARANG_CITI                   = 146;/* Wara */
854 
855     /**
856      * ISO 15924 script code
857      * @stable ICU 4.8
858      */
859     public static final int AFAKA = 147;/* Afak */
860     /**
861      * ISO 15924 script code
862      * @stable ICU 4.8
863      */
864     public static final int JURCHEN = 148;/* Jurc */
865     /**
866      * ISO 15924 script code
867      * @stable ICU 4.8
868      */
869     public static final int MRO = 149;/* Mroo */
870     /**
871      * ISO 15924 script code
872      * @stable ICU 4.8
873      */
874     public static final int NUSHU = 150;/* Nshu */
875     /**
876      * ISO 15924 script code
877      * @stable ICU 4.8
878      */
879     public static final int SHARADA = 151;/* Shrd */
880     /**
881      * ISO 15924 script code
882      * @stable ICU 4.8
883      */
884     public static final int SORA_SOMPENG = 152;/* Sora */
885     /**
886      * ISO 15924 script code
887      * @stable ICU 4.8
888      */
889     public static final int TAKRI = 153;/* Takr */
890     /**
891      * ISO 15924 script code
892      * @stable ICU 4.8
893      */
894     public static final int TANGUT = 154;/* Tang */
895     /**
896      * ISO 15924 script code
897      * @stable ICU 4.8
898      */
899     public static final int WOLEAI = 155;/* Wole */
900 
901     /**
902      * ISO 15924 script code
903      * @stable ICU 49
904      */
905     public static final int ANATOLIAN_HIEROGLYPHS = 156;/* Hluw */
906     /**
907      * ISO 15924 script code
908      * @stable ICU 49
909      */
910     public static final int KHOJKI = 157;/* Khoj */
911     /**
912      * ISO 15924 script code
913      * @stable ICU 49
914      */
915     public static final int TIRHUTA = 158;/* Tirh */
916     /**
917      * ISO 15924 script code
918      * @stable ICU 52
919      */
920     public static final int CAUCASIAN_ALBANIAN = 159; /* Aghb */
921     /**
922      * ISO 15924 script code
923      * @stable ICU 52
924      */
925     public static final int MAHAJANI = 160; /* Mahj */
926 
927     /**
928      * ISO 15924 script code
929      * @stable ICU 54
930      */
931     public static final int AHOM = 161; /* Ahom */
932     /**
933      * ISO 15924 script code
934      * @stable ICU 54
935      */
936     public static final int HATRAN = 162; /* Hatr */
937     /**
938      * ISO 15924 script code
939      * @stable ICU 54
940      */
941     public static final int MODI = 163; /* Modi */
942     /**
943      * ISO 15924 script code
944      * @stable ICU 54
945      */
946     public static final int MULTANI = 164; /* Mult */
947     /**
948      * ISO 15924 script code
949      * @stable ICU 54
950      */
951     public static final int PAU_CIN_HAU = 165; /* Pauc */
952     /**
953      * ISO 15924 script code
954      * @stable ICU 54
955      */
956     public static final int SIDDHAM = 166; /* Sidd */
957 
958     /**
959      * ISO 15924 script code
960      * @stable ICU 58
961      */
962     public static final int ADLAM = 167; /* Adlm */
963     /**
964      * ISO 15924 script code
965      * @stable ICU 58
966      */
967     public static final int BHAIKSUKI = 168; /* Bhks */
968     /**
969      * ISO 15924 script code
970      * @stable ICU 58
971      */
972     public static final int MARCHEN = 169; /* Marc */
973     /**
974      * ISO 15924 script code
975      * @stable ICU 58
976      */
977     public static final int NEWA = 170; /* Newa */
978     /**
979      * ISO 15924 script code
980      * @stable ICU 58
981      */
982     public static final int OSAGE = 171; /* Osge */
983 
984     /**
985      * ISO 15924 script code
986      * @stable ICU 58
987      */
988     public static final int HAN_WITH_BOPOMOFO = 172; /* Hanb */
989     /**
990      * ISO 15924 script code
991      * @stable ICU 58
992      */
993     public static final int JAMO = 173; /* Jamo */
994     /**
995      * ISO 15924 script code
996      * @stable ICU 58
997      */
998     public static final int SYMBOLS_EMOJI = 174; /* Zsye */
999 
1000     /**
1001      * ISO 15924 script code
1002      * @stable ICU 60
1003      */
1004     public static final int MASARAM_GONDI = 175; /* Gonm */
1005     /**
1006      * ISO 15924 script code
1007      * @stable ICU 60
1008      */
1009     public static final int SOYOMBO = 176; /* Soyo */
1010     /**
1011      * ISO 15924 script code
1012      * @stable ICU 60
1013      */
1014     public static final int ZANABAZAR_SQUARE = 177; /* Zanb */
1015 
1016     /**
1017      * ISO 15924 script code
1018      * @stable ICU 62
1019      */
1020     public static final int DOGRA = 178; /* Dogr */
1021     /** @stable ICU 62 */
1022     public static final int GUNJALA_GONDI = 179; /* Gong */
1023     /** @stable ICU 62 */
1024     public static final int MAKASAR = 180; /* Maka */
1025     /** @stable ICU 62 */
1026     public static final int MEDEFAIDRIN = 181; /* Medf */
1027     /** @stable ICU 62 */
1028     public static final int HANIFI_ROHINGYA = 182; /* Rohg */
1029     /** @stable ICU 62 */
1030     public static final int SOGDIAN = 183; /* Sogd */
1031     /** @stable ICU 62 */
1032     public static final int OLD_SOGDIAN = 184; /* Sogo */
1033 
1034     /** @stable ICU 64 */
1035     public static final int ELYMAIC = 185; /* Elym */
1036     /** @stable ICU 64 */
1037     public static final int NYIAKENG_PUACHUE_HMONG = 186; /* Hmnp */
1038     /** @stable ICU 64 */
1039     public static final int NANDINAGARI = 187; /* Nand */
1040     /** @stable ICU 64 */
1041     public static final int WANCHO = 188; /* Wcho */
1042 
1043     /** @stable ICU 66 */
1044     public static final int CHORASMIAN = 189; /* Chrs */
1045     /** @stable ICU 66 */
1046     public static final int DIVES_AKURU = 190; /* Diak */
1047     /** @stable ICU 66 */
1048     public static final int KHITAN_SMALL_SCRIPT = 191; /* Kits */
1049     /** @stable ICU 66 */
1050     public static final int YEZIDI = 192; /* Yezi */
1051 
1052     /** @stable ICU 70 */
1053     public static final int CYPRO_MINOAN = 193; /* Cpmn */
1054     /** @stable ICU 70 */
1055     public static final int OLD_UYGHUR = 194; /* Ougr */
1056     /** @stable ICU 70 */
1057     public static final int TANGSA = 195; /* Tnsa */
1058     /** @stable ICU 70 */
1059     public static final int TOTO = 196; /* Toto */
1060     /** @stable ICU 70 */
1061     public static final int VITHKUQI = 197; /* Vith */
1062 
1063     /**
1064      * One more than the highest normal UScript code.
1065      * The highest value is available via UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT).
1066      *
1067      * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
1068      */
1069     @Deprecated
1070     public static final int CODE_LIMIT   = 198;
1071 
getCodesFromLocale(ULocale locale)1072     private static int[] getCodesFromLocale(ULocale locale) {
1073         // Multi-script languages, equivalent to the LocaleScript data
1074         // that we used to load from locale resource bundles.
1075         String lang = locale.getLanguage();
1076         if(lang.equals("ja")) {
1077             return new int[] { UScript.KATAKANA, UScript.HIRAGANA, UScript.HAN };
1078         }
1079         if(lang.equals("ko")) {
1080             return new int[] { UScript.HANGUL, UScript.HAN };
1081         }
1082         String script = locale.getScript();
1083         if(lang.equals("zh") && script.equals("Hant")) {
1084             return new int[] { UScript.HAN, UScript.BOPOMOFO };
1085         }
1086         // Explicit script code.
1087         if(script.length() != 0) {
1088             int scriptCode = UScript.getCodeFromName(script);
1089             if(scriptCode != UScript.INVALID_CODE) {
1090                 if(scriptCode == UScript.SIMPLIFIED_HAN || scriptCode == UScript.TRADITIONAL_HAN) {
1091                     scriptCode = UScript.HAN;
1092                 }
1093                 return new int[] { scriptCode };
1094             }
1095         }
1096         return null;
1097     }
1098 
1099     /**
1100      * Helper function to find the code from locale.
1101      * @param locale The locale.
1102      */
findCodeFromLocale(ULocale locale)1103     private static int[] findCodeFromLocale(ULocale locale) {
1104         int[] result = getCodesFromLocale(locale);
1105         if(result != null) {
1106             return result;
1107         }
1108         ULocale likely = ULocale.addLikelySubtags(locale);
1109         return getCodesFromLocale(likely);
1110     }
1111 
1112     /**
1113      * Gets a script codes associated with the given locale or ISO 15924 abbreviation or name.
1114      * Returns MALAYAM given "Malayam" OR "Mlym".
1115      * Returns LATIN given "en" OR "en_US"
1116      * @param locale Locale
1117      * @return The script codes array. null if the the code cannot be found.
1118      * @stable ICU 2.4
1119      */
getCode(Locale locale)1120     public static final int[] getCode(Locale locale){
1121         return findCodeFromLocale(ULocale.forLocale(locale));
1122     }
1123     /**
1124      * Gets a script codes associated with the given locale or ISO 15924 abbreviation or name.
1125      * Returns MALAYAM given "Malayam" OR "Mlym".
1126      * Returns LATIN given "en" OR "en_US"
1127      * @param locale ULocale
1128      * @return The script codes array. null if the the code cannot be found.
1129      * @stable ICU 3.0
1130      */
getCode(ULocale locale)1131     public static final int[] getCode(ULocale locale){
1132         return findCodeFromLocale(locale);
1133     }
1134     /**
1135      * Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
1136      * Returns MALAYAM given "Malayam" OR "Mlym".
1137      * Returns LATIN given "en" OR "en_US"
1138      *
1139      * <p>Note: To search by short or long script alias only, use
1140      * {@link #getCodeFromName(String)} instead.
1141      * That does a fast lookup with no access of the locale data.
1142      *
1143      * @param nameOrAbbrOrLocale name of the script or ISO 15924 code or locale
1144      * @return The script codes array. null if the the code cannot be found.
1145      * @stable ICU 2.4
1146      */
getCode(String nameOrAbbrOrLocale)1147     public static final int[] getCode(String nameOrAbbrOrLocale) {
1148         boolean triedCode = false;
1149         if (nameOrAbbrOrLocale.indexOf('_') < 0 && nameOrAbbrOrLocale.indexOf('-') < 0) {
1150             int propNum = UCharacter.getPropertyValueEnumNoThrow(UProperty.SCRIPT, nameOrAbbrOrLocale);
1151             if (propNum != UProperty.UNDEFINED) {
1152                 return new int[] {propNum};
1153             }
1154             triedCode = true;
1155         }
1156         int[] scripts = findCodeFromLocale(new ULocale(nameOrAbbrOrLocale));
1157         if (scripts != null) {
1158             return scripts;
1159         }
1160         if (!triedCode) {
1161             int propNum = UCharacter.getPropertyValueEnumNoThrow(UProperty.SCRIPT, nameOrAbbrOrLocale);
1162             if (propNum != UProperty.UNDEFINED) {
1163                 return new int[] {propNum};
1164             }
1165         }
1166         return null;
1167     }
1168 
1169     /**
1170      * Returns the script code associated with the given Unicode script property alias
1171      * (name or abbreviation).
1172      * Short aliases are ISO 15924 script codes.
1173      * Returns MALAYAM given "Malayam" OR "Mlym".
1174      *
1175      * @param nameOrAbbr name of the script or ISO 15924 code
1176      * @return The script code value, or INVALID_CODE if the code cannot be found.
1177      * @stable ICU 54
1178      */
getCodeFromName(String nameOrAbbr)1179     public static final int getCodeFromName(String nameOrAbbr) {
1180         int propNum = UCharacter.getPropertyValueEnumNoThrow(UProperty.SCRIPT, nameOrAbbr);
1181         return propNum == UProperty.UNDEFINED ? INVALID_CODE : propNum;
1182     }
1183 
1184     /**
1185      * Gets the script code associated with the given codepoint.
1186      * Returns UScript.MALAYAM given 0x0D02
1187      * @param codepoint UChar32 codepoint
1188      * @return The script code
1189      * @stable ICU 2.4
1190      */
getScript(int codepoint)1191     public static final int getScript(int codepoint){
1192         if (codepoint >= UCharacter.MIN_VALUE & codepoint <= UCharacter.MAX_VALUE) {
1193             int scriptX=UCharacterProperty.INSTANCE.getAdditional(codepoint, 0)&UCharacterProperty.SCRIPT_X_MASK;
1194             int codeOrIndex=UCharacterProperty.mergeScriptCodeOrIndex(scriptX);
1195             if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) {
1196                 return codeOrIndex;
1197             } else if(scriptX<UCharacterProperty.SCRIPT_X_WITH_INHERITED) {
1198                 return UScript.COMMON;
1199             } else if(scriptX<UCharacterProperty.SCRIPT_X_WITH_OTHER) {
1200                 return UScript.INHERITED;
1201             } else {
1202                 return UCharacterProperty.INSTANCE.m_scriptExtensions_[codeOrIndex];
1203             }
1204         }else{
1205             throw new IllegalArgumentException(Integer.toString(codepoint));
1206         }
1207     }
1208 
1209     /**
1210      * Do the Script_Extensions of code point c contain script sc?
1211      * If c does not have explicit Script_Extensions, then this tests whether
1212      * c has the Script property value sc.
1213      *
1214      * <p>Some characters are commonly used in multiple scripts.
1215      * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
1216      *
1217      * @param c code point
1218      * @param sc script code
1219      * @return true if sc is in Script_Extensions(c)
1220      * @stable ICU 49
1221      */
hasScript(int c, int sc)1222     public static final boolean hasScript(int c, int sc) {
1223         int scriptX=UCharacterProperty.INSTANCE.getAdditional(c, 0)&UCharacterProperty.SCRIPT_X_MASK;
1224         int codeOrIndex=UCharacterProperty.mergeScriptCodeOrIndex(scriptX);
1225         if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) {
1226             return sc==codeOrIndex;
1227         }
1228 
1229         char[] scriptExtensions=UCharacterProperty.INSTANCE.m_scriptExtensions_;
1230         int scx=codeOrIndex;  // index into scriptExtensions
1231         if(scriptX>=UCharacterProperty.SCRIPT_X_WITH_OTHER) {
1232             scx=scriptExtensions[scx+1];
1233         }
1234         if(sc>0x7fff) {
1235             // Guard against bogus input that would
1236             // make us go past the Script_Extensions terminator.
1237             return false;
1238         }
1239         while(sc>scriptExtensions[scx]) {
1240             ++scx;
1241         }
1242         return sc==(scriptExtensions[scx]&0x7fff);
1243     }
1244 
1245     /**
1246      * Sets code point c's Script_Extensions as script code integers into the output BitSet.
1247      * <ul>
1248      * <li>If c does have Script_Extensions, then the return value is
1249      * the negative number of Script_Extensions codes (= -set.cardinality());
1250      * in this case, the Script property value
1251      * (normally Common or Inherited) is not included in the set.
1252      * <li>If c does not have Script_Extensions, then the one Script code is put into the set
1253      * and also returned.
1254      * <li>If c is not a valid code point, then the one {@link #UNKNOWN} code is put into the set
1255      * and also returned.
1256      * </ul>
1257      * In other words, if the return value is non-negative, it is c's single Script code
1258      * and the set contains exactly this Script code.
1259      * If the return value is -n, then the set contains c's n&gt;=2 Script_Extensions script codes.
1260      *
1261      * <p>Some characters are commonly used in multiple scripts.
1262      * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
1263      *
1264      * @param c code point
1265      * @param set set of script code integers; will be cleared, then bits are set
1266      *            corresponding to c's Script_Extensions
1267      * @return negative number of script codes in c's Script_Extensions,
1268      *         or the non-negative single Script value
1269      * @stable ICU 49
1270      */
getScriptExtensions(int c, BitSet set)1271     public static final int getScriptExtensions(int c, BitSet set) {
1272         set.clear();
1273         int scriptX=UCharacterProperty.INSTANCE.getAdditional(c, 0)&UCharacterProperty.SCRIPT_X_MASK;
1274         int codeOrIndex=UCharacterProperty.mergeScriptCodeOrIndex(scriptX);
1275         if(scriptX<UCharacterProperty.SCRIPT_X_WITH_COMMON) {
1276             set.set(codeOrIndex);
1277             return codeOrIndex;
1278         }
1279 
1280         char[] scriptExtensions=UCharacterProperty.INSTANCE.m_scriptExtensions_;
1281         int scx=codeOrIndex;  // index into scriptExtensions
1282         if(scriptX>=UCharacterProperty.SCRIPT_X_WITH_OTHER) {
1283             scx=scriptExtensions[scx+1];
1284         }
1285         int length=0;
1286         int sx;
1287         do {
1288             sx=scriptExtensions[scx++];
1289             set.set(sx&0x7fff);
1290             ++length;
1291         } while(sx<0x8000);
1292         // length==set.cardinality()
1293         return -length;
1294     }
1295 
1296     /**
1297      * Returns the long Unicode script name, if there is one.
1298      * Otherwise returns the 4-letter ISO 15924 script code.
1299      * Returns "Malayam" given MALAYALAM.
1300      *
1301      * @param scriptCode int script code
1302      * @return long script name as given in PropertyValueAliases.txt, or the 4-letter code
1303      * @throws IllegalArgumentException if the script code is not valid
1304      * @stable ICU 2.4
1305      */
getName(int scriptCode)1306     public static final String getName(int scriptCode){
1307         return UCharacter.getPropertyValueName(UProperty.SCRIPT,
1308                 scriptCode,
1309                 UProperty.NameChoice.LONG);
1310     }
1311 
1312     /**
1313      * Returns the 4-letter ISO 15924 script code,
1314      * which is the same as the short Unicode script name if Unicode has names for the script.
1315      * Returns "Mlym" given MALAYALAM.
1316      *
1317      * @param scriptCode int script code
1318      * @return short script name (4-letter code)
1319      * @throws IllegalArgumentException if the script code is not valid
1320      * @stable ICU 2.4
1321      */
getShortName(int scriptCode)1322     public static final String getShortName(int scriptCode){
1323         return UCharacter.getPropertyValueName(UProperty.SCRIPT,
1324                 scriptCode,
1325                 UProperty.NameChoice.SHORT);
1326     }
1327 
1328     /**
1329      * Script metadata (script properties).
1330      * See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
1331      */
1332     private static final class ScriptMetadata {
1333         // 0 = NOT_ENCODED, no sample character, default false script properties.
1334         // Bits 20.. 0: sample character
1335 
1336         // Bits 23..21: usage
1337         private static final int UNKNOWN = 1 << 21;
1338         private static final int EXCLUSION = 2 << 21;
1339         private static final int LIMITED_USE = 3 << 21;
1340         // vate static final int ASPIRATIONAL = 4 << 21; -- not used any more since Unicode 10
1341         private static final int RECOMMENDED = 5 << 21;
1342 
1343         // Bits 31..24: Single-bit flags
1344         private static final int RTL = 1 << 24;
1345         private static final int LB_LETTERS = 1 << 25;
1346         private static final int CASED = 1 << 26;
1347 
1348         private static final int SCRIPT_PROPS[] = {
1349             // Begin copy-paste output from
1350             // tools/trunk/unicode/py/parsescriptmetadata.py
1351             // or from icu/trunk/source/common/uscript_props.cpp
1352             0x0040 | RECOMMENDED,  // Zyyy
1353             0x0308 | RECOMMENDED,  // Zinh
1354             0x0628 | RECOMMENDED | RTL,  // Arab
1355             0x0531 | RECOMMENDED | CASED,  // Armn
1356             0x0995 | RECOMMENDED,  // Beng
1357             0x3105 | RECOMMENDED | LB_LETTERS,  // Bopo
1358             0x13C4 | LIMITED_USE | CASED,  // Cher
1359             0x03E2 | EXCLUSION | CASED,  // Copt
1360             0x042F | RECOMMENDED | CASED,  // Cyrl
1361             0x10414 | EXCLUSION | CASED,  // Dsrt
1362             0x0905 | RECOMMENDED,  // Deva
1363             0x12A0 | RECOMMENDED,  // Ethi
1364             0x10D3 | RECOMMENDED,  // Geor
1365             0x10330 | EXCLUSION,  // Goth
1366             0x03A9 | RECOMMENDED | CASED,  // Grek
1367             0x0A95 | RECOMMENDED,  // Gujr
1368             0x0A15 | RECOMMENDED,  // Guru
1369             0x5B57 | RECOMMENDED | LB_LETTERS,  // Hani
1370             0xAC00 | RECOMMENDED,  // Hang
1371             0x05D0 | RECOMMENDED | RTL,  // Hebr
1372             0x304B | RECOMMENDED | LB_LETTERS,  // Hira
1373             0x0C95 | RECOMMENDED,  // Knda
1374             0x30AB | RECOMMENDED | LB_LETTERS,  // Kana
1375             0x1780 | RECOMMENDED | LB_LETTERS,  // Khmr
1376             0x0EA5 | RECOMMENDED | LB_LETTERS,  // Laoo
1377             0x004C | RECOMMENDED | CASED,  // Latn
1378             0x0D15 | RECOMMENDED,  // Mlym
1379             0x1826 | EXCLUSION,  // Mong
1380             0x1000 | RECOMMENDED | LB_LETTERS,  // Mymr
1381             0x168F | EXCLUSION,  // Ogam
1382             0x10300 | EXCLUSION,  // Ital
1383             0x0B15 | RECOMMENDED,  // Orya
1384             0x16A0 | EXCLUSION,  // Runr
1385             0x0D85 | RECOMMENDED,  // Sinh
1386             0x0710 | LIMITED_USE | RTL,  // Syrc
1387             0x0B95 | RECOMMENDED,  // Taml
1388             0x0C15 | RECOMMENDED,  // Telu
1389             0x078C | RECOMMENDED | RTL,  // Thaa
1390             0x0E17 | RECOMMENDED | LB_LETTERS,  // Thai
1391             0x0F40 | RECOMMENDED,  // Tibt
1392             0x14C0 | LIMITED_USE,  // Cans
1393             0xA288 | LIMITED_USE | LB_LETTERS,  // Yiii
1394             0x1703 | EXCLUSION,  // Tglg
1395             0x1723 | EXCLUSION,  // Hano
1396             0x1743 | EXCLUSION,  // Buhd
1397             0x1763 | EXCLUSION,  // Tagb
1398             0x280E | UNKNOWN,  // Brai
1399             0x10800 | EXCLUSION | RTL,  // Cprt
1400             0x1900 | LIMITED_USE,  // Limb
1401             0x10000 | EXCLUSION,  // Linb
1402             0x10480 | EXCLUSION,  // Osma
1403             0x10450 | EXCLUSION,  // Shaw
1404             0x1950 | LIMITED_USE | LB_LETTERS,  // Tale
1405             0x10380 | EXCLUSION,  // Ugar
1406             0,
1407             0x1A00 | EXCLUSION,  // Bugi
1408             0x2C00 | EXCLUSION | CASED,  // Glag
1409             0x10A00 | EXCLUSION | RTL,  // Khar
1410             0xA800 | LIMITED_USE,  // Sylo
1411             0x1980 | LIMITED_USE | LB_LETTERS,  // Talu
1412             0x2D30 | LIMITED_USE,  // Tfng
1413             0x103A0 | EXCLUSION,  // Xpeo
1414             0x1B05 | LIMITED_USE,  // Bali
1415             0x1BC0 | LIMITED_USE,  // Batk
1416             0,
1417             0x11005 | EXCLUSION,  // Brah
1418             0xAA00 | LIMITED_USE,  // Cham
1419             0,
1420             0,
1421             0,
1422             0,
1423             0x13153 | EXCLUSION,  // Egyp
1424             0,
1425             0x5B57 | RECOMMENDED | LB_LETTERS,  // Hans
1426             0x5B57 | RECOMMENDED | LB_LETTERS,  // Hant
1427             0x16B1C | EXCLUSION,  // Hmng
1428             0x10CA1 | EXCLUSION | RTL | CASED,  // Hung
1429             0,
1430             0xA984 | LIMITED_USE,  // Java
1431             0xA90A | LIMITED_USE,  // Kali
1432             0,
1433             0,
1434             0x1C00 | LIMITED_USE,  // Lepc
1435             0x10647 | EXCLUSION,  // Lina
1436             0x0840 | LIMITED_USE | RTL,  // Mand
1437             0,
1438             0x10980 | EXCLUSION | RTL,  // Mero
1439             0x07CA | LIMITED_USE | RTL,  // Nkoo
1440             0x10C00 | EXCLUSION | RTL,  // Orkh
1441             0x1036B | EXCLUSION,  // Perm
1442             0xA840 | EXCLUSION,  // Phag
1443             0x10900 | EXCLUSION | RTL,  // Phnx
1444             0x16F00 | LIMITED_USE,  // Plrd
1445             0,
1446             0,
1447             0,
1448             0,
1449             0,
1450             0,
1451             0xA549 | LIMITED_USE,  // Vaii
1452             0,
1453             0x12000 | EXCLUSION,  // Xsux
1454             0,
1455             0xFDD0 | UNKNOWN,  // Zzzz
1456             0x102A0 | EXCLUSION,  // Cari
1457             0x304B | RECOMMENDED | LB_LETTERS,  // Jpan
1458             0x1A20 | LIMITED_USE | LB_LETTERS,  // Lana
1459             0x10280 | EXCLUSION,  // Lyci
1460             0x10920 | EXCLUSION | RTL,  // Lydi
1461             0x1C5A | LIMITED_USE,  // Olck
1462             0xA930 | EXCLUSION,  // Rjng
1463             0xA882 | LIMITED_USE,  // Saur
1464             0x1D850 | EXCLUSION,  // Sgnw
1465             0x1B83 | LIMITED_USE,  // Sund
1466             0,
1467             0xABC0 | LIMITED_USE,  // Mtei
1468             0x10840 | EXCLUSION | RTL,  // Armi
1469             0x10B00 | EXCLUSION | RTL,  // Avst
1470             0x11103 | LIMITED_USE,  // Cakm
1471             0xAC00 | RECOMMENDED,  // Kore
1472             0x11083 | EXCLUSION,  // Kthi
1473             0x10AD8 | EXCLUSION | RTL,  // Mani
1474             0x10B60 | EXCLUSION | RTL,  // Phli
1475             0x10B8F | EXCLUSION | RTL,  // Phlp
1476             0,
1477             0x10B40 | EXCLUSION | RTL,  // Prti
1478             0x0800 | EXCLUSION | RTL,  // Samr
1479             0xAA80 | LIMITED_USE | LB_LETTERS,  // Tavt
1480             0,
1481             0,
1482             0xA6A0 | LIMITED_USE,  // Bamu
1483             0xA4D0 | LIMITED_USE,  // Lisu
1484             0,
1485             0x10A60 | EXCLUSION | RTL,  // Sarb
1486             0x16AE6 | EXCLUSION,  // Bass
1487             0x1BC20 | EXCLUSION,  // Dupl
1488             0x10500 | EXCLUSION,  // Elba
1489             0x11315 | EXCLUSION,  // Gran
1490             0,
1491             0,
1492             0x1E802 | EXCLUSION | RTL,  // Mend
1493             0x109A0 | EXCLUSION | RTL,  // Merc
1494             0x10A95 | EXCLUSION | RTL,  // Narb
1495             0x10896 | EXCLUSION | RTL,  // Nbat
1496             0x10873 | EXCLUSION | RTL,  // Palm
1497             0x112BE | EXCLUSION,  // Sind
1498             0x118B4 | EXCLUSION | CASED,  // Wara
1499             0,
1500             0,
1501             0x16A4F | EXCLUSION,  // Mroo
1502             0x1B1C4 | EXCLUSION | LB_LETTERS,  // Nshu
1503             0x11183 | EXCLUSION,  // Shrd
1504             0x110D0 | EXCLUSION,  // Sora
1505             0x11680 | EXCLUSION,  // Takr
1506             0x18229 | EXCLUSION | LB_LETTERS,  // Tang
1507             0,
1508             0x14400 | EXCLUSION,  // Hluw
1509             0x11208 | EXCLUSION,  // Khoj
1510             0x11484 | EXCLUSION,  // Tirh
1511             0x10537 | EXCLUSION,  // Aghb
1512             0x11152 | EXCLUSION,  // Mahj
1513             0x11717 | EXCLUSION | LB_LETTERS,  // Ahom
1514             0x108F4 | EXCLUSION | RTL,  // Hatr
1515             0x1160E | EXCLUSION,  // Modi
1516             0x1128F | EXCLUSION,  // Mult
1517             0x11AC0 | EXCLUSION,  // Pauc
1518             0x1158E | EXCLUSION,  // Sidd
1519             0x1E909 | LIMITED_USE | RTL | CASED,  // Adlm
1520             0x11C0E | EXCLUSION,  // Bhks
1521             0x11C72 | EXCLUSION,  // Marc
1522             0x11412 | LIMITED_USE,  // Newa
1523             0x104B5 | LIMITED_USE | CASED,  // Osge
1524             0x5B57 | RECOMMENDED | LB_LETTERS,  // Hanb
1525             0x1112 | RECOMMENDED,  // Jamo
1526             0,
1527             0x11D10 | EXCLUSION,  // Gonm
1528             0x11A5C | EXCLUSION,  // Soyo
1529             0x11A0B | EXCLUSION,  // Zanb
1530             0x1180B | EXCLUSION,  // Dogr
1531             0x11D71 | LIMITED_USE,  // Gong
1532             0x11EE5 | EXCLUSION,  // Maka
1533             0x16E40 | EXCLUSION | CASED,  // Medf
1534             0x10D12 | LIMITED_USE | RTL,  // Rohg
1535             0x10F42 | EXCLUSION | RTL,  // Sogd
1536             0x10F19 | EXCLUSION | RTL,  // Sogo
1537             0x10FF1 | EXCLUSION | RTL,  // Elym
1538             0x1E108 | LIMITED_USE,  // Hmnp
1539             0x119CE | EXCLUSION,  // Nand
1540             0x1E2E1 | LIMITED_USE,  // Wcho
1541             0x10FBF | EXCLUSION | RTL,  // Chrs
1542             0x1190C | EXCLUSION,  // Diak
1543             0x18C65 | EXCLUSION | LB_LETTERS,  // Kits
1544             0x10E88 | EXCLUSION | RTL,  // Yezi
1545             0x12FE5 | EXCLUSION,  // Cpmn
1546             0x10F7C | EXCLUSION | RTL,  // Ougr
1547             0x16ABC | EXCLUSION,  // Tnsa
1548             0x1E290 | EXCLUSION,  // Toto
1549             0x10582 | EXCLUSION | CASED,  // Vith
1550             // End copy-paste from parsescriptmetadata.py
1551         };
1552 
getScriptProps(int script)1553         private static final int getScriptProps(int script) {
1554             if (0 <= script && script < SCRIPT_PROPS.length) {
1555                 return SCRIPT_PROPS[script];
1556             } else {
1557                 return 0;
1558             }
1559         }
1560     }
1561 
1562     /**
1563      * Script usage constants.
1564      * See UAX #31 Unicode Identifier and Pattern Syntax.
1565      * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers
1566      *
1567      * @stable ICU 51
1568      */
1569     public enum ScriptUsage {
1570         /**
1571          * Not encoded in Unicode.
1572          * @stable ICU 51
1573          */
1574         NOT_ENCODED,
1575         /**
1576          * Unknown script usage.
1577          * @stable ICU 51
1578          */
1579         UNKNOWN,
1580         /**
1581          * Candidate for Exclusion from Identifiers.
1582          * @stable ICU 51
1583          */
1584         EXCLUDED,
1585         /**
1586          * Limited Use script.
1587          * @stable ICU 51
1588          */
1589         LIMITED_USE,
1590         /**
1591          * Aspirational Use script.
1592          * @stable ICU 51
1593          */
1594         ASPIRATIONAL,
1595         /**
1596          * Recommended script.
1597          * @stable ICU 51
1598          */
1599         RECOMMENDED
1600     }
1601     private static final ScriptUsage[] usageValues = ScriptUsage.values();
1602 
1603     /**
1604      * Returns the script sample character string.
1605      * This string normally consists of one code point but might be longer.
1606      * The string is empty if the script is not encoded.
1607      *
1608      * @param script script code
1609      * @return the sample character string
1610      * @stable ICU 51
1611      */
getSampleString(int script)1612     public static final String getSampleString(int script) {
1613         int sampleChar = ScriptMetadata.getScriptProps(script) & 0x1fffff;
1614         if(sampleChar != 0) {
1615             return new StringBuilder().appendCodePoint(sampleChar).toString();
1616         }
1617         return "";
1618     }
1619 
1620     /**
1621      * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
1622      * Returns {@link ScriptUsage#NOT_ENCODED} if the script is not encoded in Unicode.
1623      *
1624      * @param script script code
1625      * @return script usage
1626      * @see ScriptUsage
1627      * @stable ICU 51
1628      */
getUsage(int script)1629     public static final ScriptUsage getUsage(int script) {
1630         return usageValues[(ScriptMetadata.getScriptProps(script) >> 21) & 7];
1631     }
1632 
1633     /**
1634      * Returns true if the script is written right-to-left.
1635      * For example, Arab and Hebr.
1636      *
1637      * @param script script code
1638      * @return true if the script is right-to-left
1639      * @stable ICU 51
1640      */
isRightToLeft(int script)1641     public static final boolean isRightToLeft(int script) {
1642         return (ScriptMetadata.getScriptProps(script) & ScriptMetadata.RTL) != 0;
1643     }
1644 
1645     /**
1646      * Returns true if the script allows line breaks between letters (excluding hyphenation).
1647      * Such a script typically requires dictionary-based line breaking.
1648      * For example, Hani and Thai.
1649      *
1650      * @param script script code
1651      * @return true if the script allows line breaks between letters
1652      * @stable ICU 51
1653      */
breaksBetweenLetters(int script)1654     public static final boolean breaksBetweenLetters(int script) {
1655         return (ScriptMetadata.getScriptProps(script) & ScriptMetadata.LB_LETTERS) != 0;
1656     }
1657 
1658     /**
1659      * Returns true if in modern (or most recent) usage of the script case distinctions are customary.
1660      * For example, Latn and Cyrl.
1661      *
1662      * @param script script code
1663      * @return true if the script is cased
1664      * @stable ICU 51
1665      */
isCased(int script)1666     public static final boolean isCased(int script) {
1667         return (ScriptMetadata.getScriptProps(script) & ScriptMetadata.CASED) != 0;
1668     }
1669 
1670     ///CLOVER:OFF
1671     /**
1672      *  Private Constructor. Never default construct
1673      */
UScript()1674     private UScript(){}
1675     ///CLOVER:ON
1676 }
1677