• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html#License
3 /**
4 *******************************************************************************
5 * Copyright (C) 1996-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 */
9 
10 package com.ibm.icu.dev.test.lang;
11 
12 import java.util.BitSet;
13 import java.util.Locale;
14 
15 import org.junit.Test;
16 
17 import com.ibm.icu.dev.test.TestFmwk;
18 import com.ibm.icu.lang.UProperty;
19 import com.ibm.icu.lang.UScript;
20 import com.ibm.icu.lang.UScript.ScriptUsage;
21 import com.ibm.icu.text.UnicodeSet;
22 import com.ibm.icu.util.ULocale;
23 
24 public class TestUScript extends TestFmwk {
25 
26     /**
27     * Constructor
28     */
TestUScript()29     public TestUScript()
30     {
31     }
32 
scriptsToString(int[] scripts)33     private static String scriptsToString(int[] scripts) {
34         if(scripts == null) {
35             return "null";
36         }
37         StringBuilder sb = new StringBuilder();
38         for(int script : scripts) {
39             if(sb.length() > 0) {
40                 sb.append(' ');
41             }
42             sb.append(UScript.getShortName(script));
43         }
44         return sb.toString();
45     }
46 
assertEqualScripts(String msg, int[] expectedScripts, int[] actualScripts)47     private void assertEqualScripts(String msg, int[] expectedScripts, int[] actualScripts) {
48         assertEquals(msg, scriptsToString(expectedScripts), scriptsToString(actualScripts));
49     }
50 
51     @Test
TestLocaleGetCode()52     public void TestLocaleGetCode(){
53         final ULocale[] testNames={
54         /* test locale */
55         new ULocale("en"), new ULocale("en_US"),
56         new ULocale("sr"), new ULocale("ta") ,
57         new ULocale("te_IN"),
58         new ULocale("hi"),
59         new ULocale("he"), new ULocale("ar"),
60         new ULocale("abcde"),
61         new ULocale("abcde_cdef"),
62         new ULocale("iw")
63         };
64         final int[] expected ={
65                 /* locales should return */
66                 UScript.LATIN, UScript.LATIN,
67                 UScript.CYRILLIC, UScript.TAMIL,
68                 UScript.TELUGU,UScript.DEVANAGARI,
69                 UScript.HEBREW, UScript.ARABIC,
70                 UScript.INVALID_CODE,UScript.INVALID_CODE,
71                 UScript.HEBREW
72         };
73         int i =0;
74         int numErrors =0;
75 
76         for( ; i<testNames.length; i++){
77             int[] code = UScript.getCode(testNames[i]);
78 
79             if(code==null){
80                 if(expected[i]!=UScript.INVALID_CODE){
81                     logln("Error getting script code Got: null" + " Expected: " +expected[i] +" for name "+testNames[i]);
82                     numErrors++;
83                 }
84                 // getCode returns null if the code could not be found
85                 continue;
86             }
87             if((code[0] != expected[i])){
88                 logln("Error getting script code Got: " +code[0] + " Expected: " +expected[i] +" for name "+testNames[i]);
89                 numErrors++;
90             }
91         }
92         reportDataErrors(numErrors);
93 
94         //
95         ULocale defaultLoc = ULocale.getDefault();
96         ULocale esperanto = new ULocale("eo_DE");
97         ULocale.setDefault(esperanto);
98         int[] code = UScript.getCode(esperanto);
99         if(code != null){
100             if( code[0] != UScript.LATIN){
101                 errln("Did not get the expected script code for Esperanto");
102             }
103         }else{
104             warnln("Could not load the locale data.");
105         }
106         ULocale.setDefault(defaultLoc);
107 
108         // Should work regardless of whether we have locale data for the language.
109         assertEqualScripts("tg script: Cyrl",  // Tajik
110                 new int[] { UScript.CYRILLIC },
111                 UScript.getCode(new ULocale("tg")));
112         assertEqualScripts("xsr script: Deva",  // Sherpa
113                 new int[] { UScript.DEVANAGARI },
114                 UScript.getCode(new ULocale("xsr")));
115 
116         // Multi-script languages.
117         assertEqualScripts("ja scripts: Kana Hira Hani",
118                 new int[] { UScript.KATAKANA, UScript.HIRAGANA, UScript.HAN },
119                 UScript.getCode(ULocale.JAPANESE));
120         assertEqualScripts("ko scripts: Hang Hani",
121                 new int[] { UScript.HANGUL, UScript.HAN },
122                 UScript.getCode(ULocale.KOREAN));
123         assertEqualScripts("zh script: Hani",
124                 new int[] { UScript.HAN },
125                 UScript.getCode(ULocale.CHINESE));
126         assertEqualScripts("zh-Hant scripts: Hani Bopo",
127                 new int[] { UScript.HAN, UScript.BOPOMOFO },
128                 UScript.getCode(ULocale.TRADITIONAL_CHINESE));
129         assertEqualScripts("zh-TW scripts: Hani Bopo",
130                 new int[] { UScript.HAN, UScript.BOPOMOFO },
131                 UScript.getCode(ULocale.TAIWAN));
132 
133         // Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro).
134         assertEqualScripts("ro-RO script: Latn",
135                 new int[] { UScript.LATIN },
136                 UScript.getCode("ro-RO"));  // String not ULocale
137     }
138 
139     // TODO(junit): remove this and convert the tests that use this to be parameterized
reportDataErrors(int numErrors)140     private void reportDataErrors(int numErrors) {
141         if (numErrors >0) {
142             // assume missing locale data, so not an error, just a warning
143             errln("encountered " + numErrors + " errors.");
144         }
145     }
146 
147     @Test
TestMultipleCode()148     public void TestMultipleCode(){
149         final String[] testNames = { "ja" ,"ko_KR","zh","zh_TW"};
150         final int[][] expected = {
151                                 {UScript.KATAKANA,UScript.HIRAGANA,UScript.HAN},
152                                 {UScript.HANGUL, UScript.HAN},
153                                 {UScript.HAN},
154                                 {UScript.HAN,UScript.BOPOMOFO}
155                               };
156 
157         int numErrors = 0;
158         for(int i=0; i<testNames.length;i++){
159             int[] code = UScript.getCode(testNames[i]);
160             int[] expt = (int[]) expected[i];
161             if(code!=null){
162                 for(int j =0; j< code.length;j++){
163                     if(code[j]!=expt[j]){
164                         numErrors++;
165                         logln("Error getting script code Got: " +code[j] + " Expected: " +expt[j] +" for name "+testNames[i]);
166                     }
167                 }
168             }else{
169                 numErrors++;
170                 logln("Error getting script code for name "+testNames[i]);
171             }
172         }
173         reportDataErrors(numErrors);
174 
175         //cover UScript.getCode(Locale)
176         Locale[] testLocales = new Locale[] {
177             Locale.JAPANESE,
178             Locale.KOREA,
179             Locale.CHINESE,
180             Locale.TAIWAN };
181         logln("Testing UScript.getCode(Locale) ...");
182         numErrors = 0;
183         for(int i=0; i<testNames.length;i++){
184             logln("  Testing locale: " + testLocales[i].getDisplayName());
185             int[] code = UScript.getCode(testLocales[i]);
186             int[] expt = (int[]) expected[i];
187             if(code!=null){
188                 for(int j =0; j< code.length;j++){
189                     if(code[j]!=expt[j]){
190                         numErrors++;
191                         logln("  Error getting script code Got: " +code[j] + " Expected: " +expt[j] +" for name "+testNames[i]);
192                     }
193                 }
194             }else{
195                 numErrors++;
196                 logln("  Error getting script code for name "+testNames[i]);
197             }
198         }
199         reportDataErrors(numErrors);
200     }
201 
202     @Test
TestGetCode()203     public void TestGetCode(){
204 
205         final String[] testNames={
206             /* test locale */
207             "en", "en_US", "sr", "ta", "gu", "te_IN",
208             "hi", "he", "ar",
209             /* test abbr */
210             "Hani", "Hang","Hebr","Hira",
211             "Knda","Kana","Khmr","Lao",
212             "Latn",/*"Latf","Latg",*/
213             "Mlym", "Mong",
214 
215             /* test names */
216             "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN",
217             "GOTHIC",  "GREEK",  "GUJARATI", "COMMON", "INHERITED",
218             /* test lower case names */
219             "malayalam", "mongolian", "myanmar", "ogham", "old-italic",
220             "oriya",     "runic",     "sinhala", "syriac","tamil",
221             "telugu",    "thaana",    "thai",    "tibetan",
222             /* test the bounds*/
223             "Cans", "arabic","Yi","Zyyy"
224         };
225         final int[] expected ={
226             /* locales should return */
227             UScript.LATIN, UScript.LATIN,
228             UScript.CYRILLIC, UScript.TAMIL, UScript.GUJARATI,
229             UScript.TELUGU,UScript.DEVANAGARI,
230             UScript.HEBREW, UScript.ARABIC,
231             /* abbr should return */
232             UScript.HAN, UScript.HANGUL, UScript.HEBREW, UScript.HIRAGANA,
233             UScript.KANNADA, UScript.KATAKANA, UScript.KHMER, UScript.LAO,
234             UScript.LATIN,/* UScript.LATIN, UScript.LATIN,*/
235             UScript.MALAYALAM, UScript.MONGOLIAN,
236             /* names should return */
237             UScript.CYRILLIC, UScript.DESERET, UScript.DEVANAGARI, UScript.ETHIOPIC, UScript.GEORGIAN,
238             UScript.GOTHIC, UScript.GREEK, UScript.GUJARATI, UScript.COMMON, UScript.INHERITED,
239             /* lower case names should return */
240             UScript.MALAYALAM, UScript.MONGOLIAN, UScript.MYANMAR, UScript.OGHAM, UScript.OLD_ITALIC,
241             UScript.ORIYA, UScript.RUNIC, UScript.SINHALA, UScript.SYRIAC, UScript.TAMIL,
242             UScript.TELUGU, UScript.THAANA, UScript.THAI, UScript.TIBETAN,
243             /* bounds */
244             UScript.CANADIAN_ABORIGINAL, UScript.ARABIC, UScript.YI, UScript.COMMON
245         };
246         int i =0;
247         int numErrors =0;
248 
249         for( ; i<testNames.length; i++){
250             int[] code = UScript.getCode(testNames[i]);
251             if(code == null){
252                 if(expected[i]==UScript.INVALID_CODE){
253                     // getCode returns null if the code could not be found
254                     continue;
255                 }
256                 // currently commented out until jitterbug#2678 is fixed
257                 logln("Error getting script code Got: null" + " Expected: " +expected[i] +" for name "+testNames[i]);
258                 numErrors++;
259                 continue;
260             }
261             if((code[0] != expected[i])){
262                 logln("Error getting script code Got: " +code[0] + " Expected: " +expected[i] +" for name "+testNames[i]);
263                 numErrors++;
264             }
265         }
266         reportDataErrors(numErrors);
267     }
268 
269     @Test
TestGetName()270     public void TestGetName(){
271 
272         final int[] testCodes={
273             /* names should return */
274             UScript.CYRILLIC, UScript.DESERET, UScript.DEVANAGARI, UScript.ETHIOPIC, UScript.GEORGIAN,
275             UScript.GOTHIC, UScript.GREEK, UScript.GUJARATI,
276         };
277 
278         final String[] expectedNames={
279 
280             /* test names */
281             "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian",
282             "Gothic",  "Greek",  "Gujarati",
283         };
284         int i =0;
285         int numErrors=0;
286         while(i< testCodes.length){
287             String scriptName  = UScript.getName(testCodes[i]);
288             if(!expectedNames[i].equals(scriptName)){
289                 logln("Error getting abbreviations Got: " +scriptName +" Expected: "+expectedNames[i]);
290                 numErrors++;
291             }
292             i++;
293         }
294         if(numErrors >0 ){
295             warnln("encountered " + numErrors + " errors in UScript.getName()");
296         }
297 
298     }
299     @Test
TestGetShortName()300     public void TestGetShortName(){
301         final int[] testCodes={
302             /* abbr should return */
303             UScript.HAN, UScript.HANGUL, UScript.HEBREW, UScript.HIRAGANA,
304             UScript.KANNADA, UScript.KATAKANA, UScript.KHMER, UScript.LAO,
305             UScript.LATIN,
306             UScript.MALAYALAM, UScript.MONGOLIAN,
307         };
308 
309         final String[] expectedAbbr={
310               /* test abbr */
311             "Hani", "Hang","Hebr","Hira",
312             "Knda","Kana","Khmr","Laoo",
313             "Latn",
314             "Mlym", "Mong",
315         };
316         int i=0;
317         int numErrors=0;
318         while(i<testCodes.length){
319             String  shortName = UScript.getShortName(testCodes[i]);
320             if(!expectedAbbr[i].equals(shortName)){
321                 logln("Error getting abbreviations Got: " +shortName+ " Expected: " +expectedAbbr[i]);
322                 numErrors++;
323             }
324             i++;
325         }
326         if(numErrors >0 ){
327             warnln("encountered " + numErrors + " errors in UScript.getShortName()");
328         }
329     }
330     @Test
TestGetScript()331     public void TestGetScript(){
332         int codepoints[][] = new int[][] {
333                 {0x0000FF9D, UScript.KATAKANA },
334                 {0x0000FFBE, UScript.HANGUL },
335                 {0x0000FFC7, UScript.HANGUL },
336                 {0x0000FFCF, UScript.HANGUL },
337                 {0x0000FFD7, UScript.HANGUL},
338                 {0x0000FFDC, UScript.HANGUL},
339                 {0x00010300, UScript.OLD_ITALIC},
340                 {0x00010330, UScript.GOTHIC},
341                 {0x0001034A, UScript.GOTHIC},
342                 {0x00010400, UScript.DESERET},
343                 {0x00010428, UScript.DESERET},
344                 {0x0001D167, UScript.INHERITED},
345                 {0x0001D17B, UScript.INHERITED},
346                 {0x0001D185, UScript.INHERITED},
347                 {0x0001D1AA, UScript.INHERITED},
348                 {0x00020000, UScript.HAN},
349                 {0x00000D02, UScript.MALAYALAM},
350                 {0x00000D00, UScript.UNKNOWN},
351                 {0x00000000, UScript.COMMON},
352                 {0x0001D169, UScript.INHERITED },
353                 {0x0001D182, UScript.INHERITED },
354                 {0x0001D18B, UScript.INHERITED },
355                 {0x0001D1AD, UScript.INHERITED },
356         };
357 
358         int i =0;
359         int code = UScript.INVALID_CODE;
360         boolean passed = true;
361 
362         while(i< codepoints.length){
363             code = UScript.getScript(codepoints[i][0]);
364 
365             if(code != codepoints[i][1]){
366                 logln("UScript.getScript for codepoint 0x"+ hex(codepoints[i][0])+" failed");
367                 passed = false;
368             }
369 
370             i++;
371         }
372         if(!passed){
373            errln("UScript.getScript failed.");
374         }
375     }
376 
377     @Test
TestGetScriptOfCharsWithScriptExtensions()378     public void TestGetScriptOfCharsWithScriptExtensions() {
379         /* test characters which have Script_Extensions */
380         if(!(
381             UScript.COMMON==UScript.getScript(0x0640) &&
382             UScript.INHERITED==UScript.getScript(0x0650) &&
383             UScript.ARABIC==UScript.getScript(0xfdf2))
384         ) {
385             errln("UScript.getScript(character with Script_Extensions) failed");
386         }
387     }
388 
389     @Test
TestHasScript()390     public void TestHasScript() {
391         if(!(
392             !UScript.hasScript(0x063f, UScript.COMMON) &&
393             UScript.hasScript(0x063f, UScript.ARABIC) &&  /* main Script value */
394             !UScript.hasScript(0x063f, UScript.SYRIAC) &&
395             !UScript.hasScript(0x063f, UScript.THAANA))
396         ) {
397             errln("UScript.hasScript(U+063F, ...) is wrong");
398         }
399         if(!(
400             !UScript.hasScript(0x0640, UScript.COMMON) &&  /* main Script value */
401             UScript.hasScript(0x0640, UScript.ARABIC) &&
402             UScript.hasScript(0x0640, UScript.SYRIAC) &&
403             !UScript.hasScript(0x0640, UScript.THAANA))
404         ) {
405             errln("UScript.hasScript(U+0640, ...) is wrong");
406         }
407         if(!(
408             !UScript.hasScript(0x0650, UScript.INHERITED) &&  /* main Script value */
409             UScript.hasScript(0x0650, UScript.ARABIC) &&
410             UScript.hasScript(0x0650, UScript.SYRIAC) &&
411             !UScript.hasScript(0x0650, UScript.THAANA))
412         ) {
413             errln("UScript.hasScript(U+0650, ...) is wrong");
414         }
415         if(!(
416             !UScript.hasScript(0x0660, UScript.COMMON) &&  /* main Script value */
417             UScript.hasScript(0x0660, UScript.ARABIC) &&
418             !UScript.hasScript(0x0660, UScript.SYRIAC) &&
419             UScript.hasScript(0x0660, UScript.THAANA))
420         ) {
421             errln("UScript.hasScript(U+0660, ...) is wrong");
422         }
423         if(!(
424             !UScript.hasScript(0xfdf2, UScript.COMMON) &&
425             UScript.hasScript(0xfdf2, UScript.ARABIC) &&  /* main Script value */
426             !UScript.hasScript(0xfdf2, UScript.SYRIAC) &&
427             UScript.hasScript(0xfdf2, UScript.THAANA))
428         ) {
429             errln("UScript.hasScript(U+FDF2, ...) is wrong");
430         }
431         if(UScript.hasScript(0x0640, 0xaffe)) {
432             // An unguarded implementation might go into an infinite loop.
433             errln("UScript.hasScript(U+0640, bogus 0xaffe) is wrong");
434         }
435     }
436 
437     @Test
TestGetScriptExtensions()438     public void TestGetScriptExtensions() {
439         BitSet scripts=new BitSet(UScript.CODE_LIMIT);
440 
441         /* invalid code points */
442         if(UScript.getScriptExtensions(-1, scripts)!=UScript.UNKNOWN || scripts.cardinality()!=1 ||
443                 !scripts.get(UScript.UNKNOWN)) {
444             errln("UScript.getScriptExtensions(-1) is not {UNKNOWN}");
445         }
446         if(UScript.getScriptExtensions(0x110000, scripts)!=UScript.UNKNOWN || scripts.cardinality()!=1 ||
447                 !scripts.get(UScript.UNKNOWN)) {
448             errln("UScript.getScriptExtensions(0x110000) is not {UNKNOWN}");
449         }
450 
451         /* normal usage */
452         if(UScript.getScriptExtensions(0x063f, scripts)!=UScript.ARABIC || scripts.cardinality()!=1 ||
453                 !scripts.get(UScript.ARABIC)) {
454             errln("UScript.getScriptExtensions(U+063F) is not {ARABIC}");
455         }
456         if(UScript.getScriptExtensions(0x0640, scripts)>-3 || scripts.cardinality()<3 ||
457            !scripts.get(UScript.ARABIC) || !scripts.get(UScript.SYRIAC) || !scripts.get(UScript.MANDAIC)
458         ) {
459             errln("UScript.getScriptExtensions(U+0640) failed");
460         }
461         if(UScript.getScriptExtensions(0xfdf2, scripts)!=-2 || scripts.cardinality()!=2 ||
462                 !scripts.get(UScript.ARABIC) || !scripts.get(UScript.THAANA)) {
463             errln("UScript.getScriptExtensions(U+FDF2) failed");
464         }
465         if(UScript.getScriptExtensions(0xff65, scripts)!=-6 || scripts.cardinality()!=6 ||
466                 !scripts.get(UScript.BOPOMOFO) || !scripts.get(UScript.YI)) {
467             errln("UScript.getScriptExtensions(U+FF65) failed");
468         }
469     }
470 
471     @Test
TestScriptMetadataAPI()472     public void TestScriptMetadataAPI() {
473         /* API & code coverage. */
474         String sample = UScript.getSampleString(UScript.LATIN);
475         if(sample.length()!=1 || UScript.getScript(sample.charAt(0))!=UScript.LATIN) {
476             errln("UScript.getSampleString(Latn) failed");
477         }
478         sample = UScript.getSampleString(UScript.INVALID_CODE);
479         if(sample.length()!=0) {
480             errln("UScript.getSampleString(invalid) failed");
481         }
482 
483         if(UScript.getUsage(UScript.LATIN)!=ScriptUsage.RECOMMENDED ||
484                 UScript.getUsage(UScript.YI)!=ScriptUsage.ASPIRATIONAL ||
485                 UScript.getUsage(UScript.CHEROKEE)!=ScriptUsage.LIMITED_USE ||
486                 UScript.getUsage(UScript.COPTIC)!=ScriptUsage.EXCLUDED ||
487                 UScript.getUsage(UScript.CIRTH)!=ScriptUsage.NOT_ENCODED ||
488                 UScript.getUsage(UScript.INVALID_CODE)!=ScriptUsage.NOT_ENCODED ||
489                 UScript.getUsage(UScript.CODE_LIMIT)!=ScriptUsage.NOT_ENCODED) {
490             errln("UScript.getUsage() failed");
491         }
492 
493         if(UScript.isRightToLeft(UScript.LATIN) ||
494                 UScript.isRightToLeft(UScript.CIRTH) ||
495                 !UScript.isRightToLeft(UScript.ARABIC) ||
496                 !UScript.isRightToLeft(UScript.HEBREW)) {
497             errln("UScript.isRightToLeft() failed");
498         }
499 
500         if(UScript.breaksBetweenLetters(UScript.LATIN) ||
501                 UScript.breaksBetweenLetters(UScript.CIRTH) ||
502                 !UScript.breaksBetweenLetters(UScript.HAN) ||
503                 !UScript.breaksBetweenLetters(UScript.THAI)) {
504             errln("UScript.breaksBetweenLetters() failed");
505         }
506 
507         if(UScript.isCased(UScript.CIRTH) ||
508                 UScript.isCased(UScript.HAN) ||
509                 !UScript.isCased(UScript.LATIN) ||
510                 !UScript.isCased(UScript.GREEK)) {
511             errln("UScript.isCased() failed");
512         }
513     }
514 
515     /**
516      * Maps a special script code to the most common script of its encoded characters.
517      */
getCharScript(int script)518     private static final int getCharScript(int script) {
519         switch(script) {
520         case UScript.HAN_WITH_BOPOMOFO:
521         case UScript.SIMPLIFIED_HAN:
522         case UScript.TRADITIONAL_HAN:
523             return UScript.HAN;
524         case UScript.JAPANESE:
525             return UScript.HIRAGANA;
526         case UScript.JAMO:
527         case UScript.KOREAN:
528             return UScript.HANGUL;
529         case UScript.SYMBOLS_EMOJI:
530             return UScript.SYMBOLS;
531         default:
532             return script;
533         }
534     }
535 
536     @Test
TestScriptMetadata()537     public void TestScriptMetadata() {
538         UnicodeSet rtl = new UnicodeSet("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]");
539         // So far, sample characters are uppercase.
540         // Georgian is special.
541         UnicodeSet cased = new UnicodeSet("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]");
542         for(int sc = 0; sc < UScript.CODE_LIMIT; ++sc) {
543             String sn = UScript.getShortName(sc);
544             ScriptUsage usage = UScript.getUsage(sc);
545             String sample = UScript.getSampleString(sc);
546             UnicodeSet scriptSet = new UnicodeSet();
547             scriptSet.applyIntPropertyValue(UProperty.SCRIPT, sc);
548             if(usage == ScriptUsage.NOT_ENCODED) {
549                 assertTrue(sn + " not encoded, no sample", sample.length() == 0);  // Java 6: sample.isEmpty()
550                 assertFalse(sn + " not encoded, not RTL", UScript.isRightToLeft(sc));
551                 assertFalse(sn + " not encoded, not LB letters", UScript.breaksBetweenLetters(sc));
552                 assertFalse(sn + " not encoded, not cased", UScript.isCased(sc));
553                 assertTrue(sn + " not encoded, no characters", scriptSet.isEmpty());
554             } else {
555                 assertFalse(sn + " encoded, has a sample character", sample.length() == 0);  // Java 6: sample.isEmpty()
556                 int firstChar = sample.codePointAt(0);
557                 int charScript = getCharScript(sc);
558                 assertEquals(sn + " script(sample(script))",
559                              charScript, UScript.getScript(firstChar));
560                 assertEquals(sn + " RTL vs. set", rtl.contains(firstChar), UScript.isRightToLeft(sc));
561                 assertEquals(sn + " cased vs. set", cased.contains(firstChar), UScript.isCased(sc));
562                 assertEquals(sn + " encoded, has characters", sc == charScript, !scriptSet.isEmpty());
563                 if(UScript.isRightToLeft(sc)) {
564                     rtl.removeAll(scriptSet);
565                 }
566                 if(UScript.isCased(sc)) {
567                     cased.removeAll(scriptSet);
568                 }
569             }
570         }
571         assertEquals("no remaining RTL characters", "[]", rtl.toPattern(true));
572         assertEquals("no remaining cased characters", "[]", cased.toPattern(true));
573 
574         assertTrue("Hani breaks between letters", UScript.breaksBetweenLetters(UScript.HAN));
575         assertTrue("Thai breaks between letters", UScript.breaksBetweenLetters(UScript.THAI));
576         assertFalse("Latn does not break between letters", UScript.breaksBetweenLetters(UScript.LATIN));
577     }
578 
579     @Test
TestScriptNames()580     public void TestScriptNames(){
581         for(int i=0; i<UScript.CODE_LIMIT;i++){
582             String name = UScript.getName(i);
583             if(name.equals("") ){
584                 errln("FAILED: getName for code : "+i);
585             }
586             String shortName= UScript.getShortName(i);
587             if(shortName.equals("")){
588                 errln("FAILED: getName for code : "+i);
589             }
590         }
591     }
592     @Test
TestAllCodepoints()593     public void TestAllCodepoints(){
594         int code;
595         //String oldId="";
596         //String oldAbbrId="";
597         for( int i =0; i <= 0x10ffff; i++){
598           code =UScript.INVALID_CODE;
599           code = UScript.getScript(i);
600           if(code==UScript.INVALID_CODE){
601                 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
602           }
603           String id =UScript.getName(code);
604           if(id.indexOf("INVALID")>=0){
605                  errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
606           }
607           String abbr = UScript.getShortName(code);
608           if(abbr.indexOf("INV")>=0){
609                  errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
610           }
611         }
612     }
613     @Test
TestNewCode()614     public void TestNewCode(){
615         /*
616          * These script codes were originally added to ICU pre-3.6, so that ICU would
617          * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
618          * These script codes were added with only short names because we don't
619          * want to invent long names ourselves.
620          * Unicode 5 and later encode some of these scripts and give them long names.
621          * Whenever this happens, the long script names here need to be updated.
622          */
623         String[] expectedLong = new String[]{
624             "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs",
625             "Egyd", "Egyh", "Egyptian_Hieroglyphs",
626             "Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds",
627             "Javanese", "Kayah_Li", "Latf", "Latg",
628             "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs",
629             "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician",
630             "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
631             "Zxxx", "Unknown",
632             "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese",
633             "Moon", "Meetei_Mayek",
634             /* new in ICU 4.0 */
635             "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
636             "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv",
637             "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
638             "Zmth", "Zsym",
639             /* new in ICU 4.4 */
640             "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
641             /* new in ICU 4.6 */
642             "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel",
643             "Loma", "Mende_Kikakui", "Meroitic_Cursive",
644             "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi",
645             /* new in ICU 4.8 */
646             "Afak", "Jurc", "Mro", "Nshu", "Sharada", "Sora_Sompeng", "Takri", "Tangut", "Wole",
647             /* new in ICU 49 */
648             "Anatolian_Hieroglyphs", "Khojki", "Tirhuta",
649             /* new in ICU 52 */
650             "Caucasian_Albanian", "Mahajani",
651             /* new in ICU 54 */
652             "Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham",
653             // new in ICU 58
654             "Adlam", "Bhaiksuki", "Marchen", "Newa", "Osage", "Hanb", "Jamo", "Zsye"
655         };
656         String[] expectedShort = new String[]{
657             "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
658             "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
659             "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
660             "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
661             "Zxxx", "Zzzz",
662             "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
663             "Moon", "Mtei",
664             /* new in ICU 4.0 */
665             "Armi", "Avst", "Cakm", "Kore",
666             "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
667             "Zmth", "Zsym",
668             /* new in ICU 4.4 */
669             "Bamu", "Lisu", "Nkgb", "Sarb",
670             /* new in ICU 4.6 */
671             "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
672             "Narb", "Nbat", "Palm", "Sind", "Wara",
673             /* new in ICU 4.8 */
674             "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
675             /* new in ICU 49 */
676             "Hluw", "Khoj", "Tirh",
677             /* new in ICU 52 */
678             "Aghb", "Mahj",
679             /* new in ICU 54 */
680             "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd",
681             // new in ICU 58
682             "Adlm", "Bhks", "Marc", "Newa", "Osge", "Hanb", "Jamo", "Zsye"
683         };
684         if(expectedLong.length!=(UScript.CODE_LIMIT-UScript.BALINESE)) {
685             errln("need to add new script codes in lang.TestUScript.java!");
686             return;
687         }
688         int j = 0;
689         int i = 0;
690         for(i=UScript.BALINESE; i<UScript.CODE_LIMIT; i++, j++){
691             String name = UScript.getName(i);
692             if(name==null || !name.equals(expectedLong[j])){
693                 errln("UScript.getName failed for code"+ i + name +"!=" +expectedLong[j]);
694             }
695             name = UScript.getShortName(i);
696             if(name==null || !name.equals(expectedShort[j])){
697                 errln("UScript.getShortName failed for code"+ i + name +"!=" +expectedShort[j]);
698             }
699         }
700         for(i=0; i<expectedLong.length; i++){
701             int[] ret = UScript.getCode(expectedShort[i]);
702             if(ret.length>1){
703                 errln("UScript.getCode did not return expected number of codes for script"+ expectedShort[i]+". EXPECTED: 1 GOT: "+ ret.length);
704             }
705             if(ret[0]!= (UScript.BALINESE+i)){
706                 errln("UScript.getCode did not return expected code for script"+ expectedShort[i]+". EXPECTED: "+ (UScript.BALINESE+i)+" GOT: %i\n"+ ret[0] );
707             }
708         }
709     }
710 }
711