1 // © 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html#License 3 /** 4 ******************************************************************************* 5 * Copyright (C) 1996-2016, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9 10 package com.ibm.icu.dev.test.lang; 11 12 import java.util.BitSet; 13 import java.util.Locale; 14 15 import org.junit.Test; 16 17 import com.ibm.icu.dev.test.TestFmwk; 18 import com.ibm.icu.lang.UProperty; 19 import com.ibm.icu.lang.UScript; 20 import com.ibm.icu.lang.UScript.ScriptUsage; 21 import com.ibm.icu.text.UnicodeSet; 22 import com.ibm.icu.util.ULocale; 23 24 public class TestUScript extends TestFmwk { 25 26 /** 27 * Constructor 28 */ TestUScript()29 public TestUScript() 30 { 31 } 32 scriptsToString(int[] scripts)33 private static String scriptsToString(int[] scripts) { 34 if(scripts == null) { 35 return "null"; 36 } 37 StringBuilder sb = new StringBuilder(); 38 for(int script : scripts) { 39 if(sb.length() > 0) { 40 sb.append(' '); 41 } 42 sb.append(UScript.getShortName(script)); 43 } 44 return sb.toString(); 45 } 46 assertEqualScripts(String msg, int[] expectedScripts, int[] actualScripts)47 private void assertEqualScripts(String msg, int[] expectedScripts, int[] actualScripts) { 48 assertEquals(msg, scriptsToString(expectedScripts), scriptsToString(actualScripts)); 49 } 50 51 @Test TestLocaleGetCode()52 public void TestLocaleGetCode(){ 53 final ULocale[] testNames={ 54 /* test locale */ 55 new ULocale("en"), new ULocale("en_US"), 56 new ULocale("sr"), new ULocale("ta") , 57 new ULocale("te_IN"), 58 new ULocale("hi"), 59 new ULocale("he"), new ULocale("ar"), 60 new ULocale("abcde"), 61 new ULocale("abcde_cdef"), 62 new ULocale("iw") 63 }; 64 final int[] expected ={ 65 /* locales should return */ 66 UScript.LATIN, UScript.LATIN, 67 UScript.CYRILLIC, UScript.TAMIL, 68 UScript.TELUGU,UScript.DEVANAGARI, 69 UScript.HEBREW, UScript.ARABIC, 70 UScript.INVALID_CODE,UScript.INVALID_CODE, 71 UScript.HEBREW 72 }; 73 int i =0; 74 int numErrors =0; 75 76 for( ; i<testNames.length; i++){ 77 int[] code = UScript.getCode(testNames[i]); 78 79 if(code==null){ 80 if(expected[i]!=UScript.INVALID_CODE){ 81 logln("Error getting script code Got: null" + " Expected: " +expected[i] +" for name "+testNames[i]); 82 numErrors++; 83 } 84 // getCode returns null if the code could not be found 85 continue; 86 } 87 if((code[0] != expected[i])){ 88 logln("Error getting script code Got: " +code[0] + " Expected: " +expected[i] +" for name "+testNames[i]); 89 numErrors++; 90 } 91 } 92 reportDataErrors(numErrors); 93 94 // 95 ULocale defaultLoc = ULocale.getDefault(); 96 ULocale esperanto = new ULocale("eo_DE"); 97 ULocale.setDefault(esperanto); 98 int[] code = UScript.getCode(esperanto); 99 if(code != null){ 100 if( code[0] != UScript.LATIN){ 101 errln("Did not get the expected script code for Esperanto"); 102 } 103 }else{ 104 warnln("Could not load the locale data."); 105 } 106 ULocale.setDefault(defaultLoc); 107 108 // Should work regardless of whether we have locale data for the language. 109 assertEqualScripts("tg script: Cyrl", // Tajik 110 new int[] { UScript.CYRILLIC }, 111 UScript.getCode(new ULocale("tg"))); 112 assertEqualScripts("xsr script: Deva", // Sherpa 113 new int[] { UScript.DEVANAGARI }, 114 UScript.getCode(new ULocale("xsr"))); 115 116 // Multi-script languages. 117 assertEqualScripts("ja scripts: Kana Hira Hani", 118 new int[] { UScript.KATAKANA, UScript.HIRAGANA, UScript.HAN }, 119 UScript.getCode(ULocale.JAPANESE)); 120 assertEqualScripts("ko scripts: Hang Hani", 121 new int[] { UScript.HANGUL, UScript.HAN }, 122 UScript.getCode(ULocale.KOREAN)); 123 assertEqualScripts("zh script: Hani", 124 new int[] { UScript.HAN }, 125 UScript.getCode(ULocale.CHINESE)); 126 assertEqualScripts("zh-Hant scripts: Hani Bopo", 127 new int[] { UScript.HAN, UScript.BOPOMOFO }, 128 UScript.getCode(ULocale.TRADITIONAL_CHINESE)); 129 assertEqualScripts("zh-TW scripts: Hani Bopo", 130 new int[] { UScript.HAN, UScript.BOPOMOFO }, 131 UScript.getCode(ULocale.TAIWAN)); 132 133 // Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro). 134 assertEqualScripts("ro-RO script: Latn", 135 new int[] { UScript.LATIN }, 136 UScript.getCode("ro-RO")); // String not ULocale 137 } 138 139 // TODO(junit): remove this and convert the tests that use this to be parameterized reportDataErrors(int numErrors)140 private void reportDataErrors(int numErrors) { 141 if (numErrors >0) { 142 // assume missing locale data, so not an error, just a warning 143 errln("encountered " + numErrors + " errors."); 144 } 145 } 146 147 @Test TestMultipleCode()148 public void TestMultipleCode(){ 149 final String[] testNames = { "ja" ,"ko_KR","zh","zh_TW"}; 150 final int[][] expected = { 151 {UScript.KATAKANA,UScript.HIRAGANA,UScript.HAN}, 152 {UScript.HANGUL, UScript.HAN}, 153 {UScript.HAN}, 154 {UScript.HAN,UScript.BOPOMOFO} 155 }; 156 157 int numErrors = 0; 158 for(int i=0; i<testNames.length;i++){ 159 int[] code = UScript.getCode(testNames[i]); 160 int[] expt = (int[]) expected[i]; 161 if(code!=null){ 162 for(int j =0; j< code.length;j++){ 163 if(code[j]!=expt[j]){ 164 numErrors++; 165 logln("Error getting script code Got: " +code[j] + " Expected: " +expt[j] +" for name "+testNames[i]); 166 } 167 } 168 }else{ 169 numErrors++; 170 logln("Error getting script code for name "+testNames[i]); 171 } 172 } 173 reportDataErrors(numErrors); 174 175 //cover UScript.getCode(Locale) 176 Locale[] testLocales = new Locale[] { 177 Locale.JAPANESE, 178 Locale.KOREA, 179 Locale.CHINESE, 180 Locale.TAIWAN }; 181 logln("Testing UScript.getCode(Locale) ..."); 182 numErrors = 0; 183 for(int i=0; i<testNames.length;i++){ 184 logln(" Testing locale: " + testLocales[i].getDisplayName()); 185 int[] code = UScript.getCode(testLocales[i]); 186 int[] expt = (int[]) expected[i]; 187 if(code!=null){ 188 for(int j =0; j< code.length;j++){ 189 if(code[j]!=expt[j]){ 190 numErrors++; 191 logln(" Error getting script code Got: " +code[j] + " Expected: " +expt[j] +" for name "+testNames[i]); 192 } 193 } 194 }else{ 195 numErrors++; 196 logln(" Error getting script code for name "+testNames[i]); 197 } 198 } 199 reportDataErrors(numErrors); 200 } 201 202 @Test TestGetCode()203 public void TestGetCode(){ 204 205 final String[] testNames={ 206 /* test locale */ 207 "en", "en_US", "sr", "ta", "gu", "te_IN", 208 "hi", "he", "ar", 209 /* test abbr */ 210 "Hani", "Hang","Hebr","Hira", 211 "Knda","Kana","Khmr","Lao", 212 "Latn",/*"Latf","Latg",*/ 213 "Mlym", "Mong", 214 215 /* test names */ 216 "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN", 217 "GOTHIC", "GREEK", "GUJARATI", "COMMON", "INHERITED", 218 /* test lower case names */ 219 "malayalam", "mongolian", "myanmar", "ogham", "old-italic", 220 "oriya", "runic", "sinhala", "syriac","tamil", 221 "telugu", "thaana", "thai", "tibetan", 222 /* test the bounds*/ 223 "Cans", "arabic","Yi","Zyyy" 224 }; 225 final int[] expected ={ 226 /* locales should return */ 227 UScript.LATIN, UScript.LATIN, 228 UScript.CYRILLIC, UScript.TAMIL, UScript.GUJARATI, 229 UScript.TELUGU,UScript.DEVANAGARI, 230 UScript.HEBREW, UScript.ARABIC, 231 /* abbr should return */ 232 UScript.HAN, UScript.HANGUL, UScript.HEBREW, UScript.HIRAGANA, 233 UScript.KANNADA, UScript.KATAKANA, UScript.KHMER, UScript.LAO, 234 UScript.LATIN,/* UScript.LATIN, UScript.LATIN,*/ 235 UScript.MALAYALAM, UScript.MONGOLIAN, 236 /* names should return */ 237 UScript.CYRILLIC, UScript.DESERET, UScript.DEVANAGARI, UScript.ETHIOPIC, UScript.GEORGIAN, 238 UScript.GOTHIC, UScript.GREEK, UScript.GUJARATI, UScript.COMMON, UScript.INHERITED, 239 /* lower case names should return */ 240 UScript.MALAYALAM, UScript.MONGOLIAN, UScript.MYANMAR, UScript.OGHAM, UScript.OLD_ITALIC, 241 UScript.ORIYA, UScript.RUNIC, UScript.SINHALA, UScript.SYRIAC, UScript.TAMIL, 242 UScript.TELUGU, UScript.THAANA, UScript.THAI, UScript.TIBETAN, 243 /* bounds */ 244 UScript.CANADIAN_ABORIGINAL, UScript.ARABIC, UScript.YI, UScript.COMMON 245 }; 246 int i =0; 247 int numErrors =0; 248 249 for( ; i<testNames.length; i++){ 250 int[] code = UScript.getCode(testNames[i]); 251 if(code == null){ 252 if(expected[i]==UScript.INVALID_CODE){ 253 // getCode returns null if the code could not be found 254 continue; 255 } 256 // currently commented out until jitterbug#2678 is fixed 257 logln("Error getting script code Got: null" + " Expected: " +expected[i] +" for name "+testNames[i]); 258 numErrors++; 259 continue; 260 } 261 if((code[0] != expected[i])){ 262 logln("Error getting script code Got: " +code[0] + " Expected: " +expected[i] +" for name "+testNames[i]); 263 numErrors++; 264 } 265 } 266 reportDataErrors(numErrors); 267 } 268 269 @Test TestGetName()270 public void TestGetName(){ 271 272 final int[] testCodes={ 273 /* names should return */ 274 UScript.CYRILLIC, UScript.DESERET, UScript.DEVANAGARI, UScript.ETHIOPIC, UScript.GEORGIAN, 275 UScript.GOTHIC, UScript.GREEK, UScript.GUJARATI, 276 }; 277 278 final String[] expectedNames={ 279 280 /* test names */ 281 "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian", 282 "Gothic", "Greek", "Gujarati", 283 }; 284 int i =0; 285 int numErrors=0; 286 while(i< testCodes.length){ 287 String scriptName = UScript.getName(testCodes[i]); 288 if(!expectedNames[i].equals(scriptName)){ 289 logln("Error getting abbreviations Got: " +scriptName +" Expected: "+expectedNames[i]); 290 numErrors++; 291 } 292 i++; 293 } 294 if(numErrors >0 ){ 295 warnln("encountered " + numErrors + " errors in UScript.getName()"); 296 } 297 298 } 299 @Test TestGetShortName()300 public void TestGetShortName(){ 301 final int[] testCodes={ 302 /* abbr should return */ 303 UScript.HAN, UScript.HANGUL, UScript.HEBREW, UScript.HIRAGANA, 304 UScript.KANNADA, UScript.KATAKANA, UScript.KHMER, UScript.LAO, 305 UScript.LATIN, 306 UScript.MALAYALAM, UScript.MONGOLIAN, 307 }; 308 309 final String[] expectedAbbr={ 310 /* test abbr */ 311 "Hani", "Hang","Hebr","Hira", 312 "Knda","Kana","Khmr","Laoo", 313 "Latn", 314 "Mlym", "Mong", 315 }; 316 int i=0; 317 int numErrors=0; 318 while(i<testCodes.length){ 319 String shortName = UScript.getShortName(testCodes[i]); 320 if(!expectedAbbr[i].equals(shortName)){ 321 logln("Error getting abbreviations Got: " +shortName+ " Expected: " +expectedAbbr[i]); 322 numErrors++; 323 } 324 i++; 325 } 326 if(numErrors >0 ){ 327 warnln("encountered " + numErrors + " errors in UScript.getShortName()"); 328 } 329 } 330 @Test TestGetScript()331 public void TestGetScript(){ 332 int codepoints[][] = new int[][] { 333 {0x0000FF9D, UScript.KATAKANA }, 334 {0x0000FFBE, UScript.HANGUL }, 335 {0x0000FFC7, UScript.HANGUL }, 336 {0x0000FFCF, UScript.HANGUL }, 337 {0x0000FFD7, UScript.HANGUL}, 338 {0x0000FFDC, UScript.HANGUL}, 339 {0x00010300, UScript.OLD_ITALIC}, 340 {0x00010330, UScript.GOTHIC}, 341 {0x0001034A, UScript.GOTHIC}, 342 {0x00010400, UScript.DESERET}, 343 {0x00010428, UScript.DESERET}, 344 {0x0001D167, UScript.INHERITED}, 345 {0x0001D17B, UScript.INHERITED}, 346 {0x0001D185, UScript.INHERITED}, 347 {0x0001D1AA, UScript.INHERITED}, 348 {0x00020000, UScript.HAN}, 349 {0x00000D02, UScript.MALAYALAM}, 350 {0x00000D00, UScript.UNKNOWN}, 351 {0x00000000, UScript.COMMON}, 352 {0x0001D169, UScript.INHERITED }, 353 {0x0001D182, UScript.INHERITED }, 354 {0x0001D18B, UScript.INHERITED }, 355 {0x0001D1AD, UScript.INHERITED }, 356 }; 357 358 int i =0; 359 int code = UScript.INVALID_CODE; 360 boolean passed = true; 361 362 while(i< codepoints.length){ 363 code = UScript.getScript(codepoints[i][0]); 364 365 if(code != codepoints[i][1]){ 366 logln("UScript.getScript for codepoint 0x"+ hex(codepoints[i][0])+" failed"); 367 passed = false; 368 } 369 370 i++; 371 } 372 if(!passed){ 373 errln("UScript.getScript failed."); 374 } 375 } 376 377 @Test TestGetScriptOfCharsWithScriptExtensions()378 public void TestGetScriptOfCharsWithScriptExtensions() { 379 /* test characters which have Script_Extensions */ 380 if(!( 381 UScript.COMMON==UScript.getScript(0x0640) && 382 UScript.INHERITED==UScript.getScript(0x0650) && 383 UScript.ARABIC==UScript.getScript(0xfdf2)) 384 ) { 385 errln("UScript.getScript(character with Script_Extensions) failed"); 386 } 387 } 388 389 @Test TestHasScript()390 public void TestHasScript() { 391 if(!( 392 !UScript.hasScript(0x063f, UScript.COMMON) && 393 UScript.hasScript(0x063f, UScript.ARABIC) && /* main Script value */ 394 !UScript.hasScript(0x063f, UScript.SYRIAC) && 395 !UScript.hasScript(0x063f, UScript.THAANA)) 396 ) { 397 errln("UScript.hasScript(U+063F, ...) is wrong"); 398 } 399 if(!( 400 !UScript.hasScript(0x0640, UScript.COMMON) && /* main Script value */ 401 UScript.hasScript(0x0640, UScript.ARABIC) && 402 UScript.hasScript(0x0640, UScript.SYRIAC) && 403 !UScript.hasScript(0x0640, UScript.THAANA)) 404 ) { 405 errln("UScript.hasScript(U+0640, ...) is wrong"); 406 } 407 if(!( 408 !UScript.hasScript(0x0650, UScript.INHERITED) && /* main Script value */ 409 UScript.hasScript(0x0650, UScript.ARABIC) && 410 UScript.hasScript(0x0650, UScript.SYRIAC) && 411 !UScript.hasScript(0x0650, UScript.THAANA)) 412 ) { 413 errln("UScript.hasScript(U+0650, ...) is wrong"); 414 } 415 if(!( 416 !UScript.hasScript(0x0660, UScript.COMMON) && /* main Script value */ 417 UScript.hasScript(0x0660, UScript.ARABIC) && 418 !UScript.hasScript(0x0660, UScript.SYRIAC) && 419 UScript.hasScript(0x0660, UScript.THAANA)) 420 ) { 421 errln("UScript.hasScript(U+0660, ...) is wrong"); 422 } 423 if(!( 424 !UScript.hasScript(0xfdf2, UScript.COMMON) && 425 UScript.hasScript(0xfdf2, UScript.ARABIC) && /* main Script value */ 426 !UScript.hasScript(0xfdf2, UScript.SYRIAC) && 427 UScript.hasScript(0xfdf2, UScript.THAANA)) 428 ) { 429 errln("UScript.hasScript(U+FDF2, ...) is wrong"); 430 } 431 if(UScript.hasScript(0x0640, 0xaffe)) { 432 // An unguarded implementation might go into an infinite loop. 433 errln("UScript.hasScript(U+0640, bogus 0xaffe) is wrong"); 434 } 435 } 436 437 @Test TestGetScriptExtensions()438 public void TestGetScriptExtensions() { 439 BitSet scripts=new BitSet(UScript.CODE_LIMIT); 440 441 /* invalid code points */ 442 if(UScript.getScriptExtensions(-1, scripts)!=UScript.UNKNOWN || scripts.cardinality()!=1 || 443 !scripts.get(UScript.UNKNOWN)) { 444 errln("UScript.getScriptExtensions(-1) is not {UNKNOWN}"); 445 } 446 if(UScript.getScriptExtensions(0x110000, scripts)!=UScript.UNKNOWN || scripts.cardinality()!=1 || 447 !scripts.get(UScript.UNKNOWN)) { 448 errln("UScript.getScriptExtensions(0x110000) is not {UNKNOWN}"); 449 } 450 451 /* normal usage */ 452 if(UScript.getScriptExtensions(0x063f, scripts)!=UScript.ARABIC || scripts.cardinality()!=1 || 453 !scripts.get(UScript.ARABIC)) { 454 errln("UScript.getScriptExtensions(U+063F) is not {ARABIC}"); 455 } 456 if(UScript.getScriptExtensions(0x0640, scripts)>-3 || scripts.cardinality()<3 || 457 !scripts.get(UScript.ARABIC) || !scripts.get(UScript.SYRIAC) || !scripts.get(UScript.MANDAIC) 458 ) { 459 errln("UScript.getScriptExtensions(U+0640) failed"); 460 } 461 if(UScript.getScriptExtensions(0xfdf2, scripts)!=-2 || scripts.cardinality()!=2 || 462 !scripts.get(UScript.ARABIC) || !scripts.get(UScript.THAANA)) { 463 errln("UScript.getScriptExtensions(U+FDF2) failed"); 464 } 465 if(UScript.getScriptExtensions(0xff65, scripts)!=-6 || scripts.cardinality()!=6 || 466 !scripts.get(UScript.BOPOMOFO) || !scripts.get(UScript.YI)) { 467 errln("UScript.getScriptExtensions(U+FF65) failed"); 468 } 469 } 470 471 @Test TestScriptMetadataAPI()472 public void TestScriptMetadataAPI() { 473 /* API & code coverage. */ 474 String sample = UScript.getSampleString(UScript.LATIN); 475 if(sample.length()!=1 || UScript.getScript(sample.charAt(0))!=UScript.LATIN) { 476 errln("UScript.getSampleString(Latn) failed"); 477 } 478 sample = UScript.getSampleString(UScript.INVALID_CODE); 479 if(sample.length()!=0) { 480 errln("UScript.getSampleString(invalid) failed"); 481 } 482 483 if(UScript.getUsage(UScript.LATIN)!=ScriptUsage.RECOMMENDED || 484 UScript.getUsage(UScript.YI)!=ScriptUsage.ASPIRATIONAL || 485 UScript.getUsage(UScript.CHEROKEE)!=ScriptUsage.LIMITED_USE || 486 UScript.getUsage(UScript.COPTIC)!=ScriptUsage.EXCLUDED || 487 UScript.getUsage(UScript.CIRTH)!=ScriptUsage.NOT_ENCODED || 488 UScript.getUsage(UScript.INVALID_CODE)!=ScriptUsage.NOT_ENCODED || 489 UScript.getUsage(UScript.CODE_LIMIT)!=ScriptUsage.NOT_ENCODED) { 490 errln("UScript.getUsage() failed"); 491 } 492 493 if(UScript.isRightToLeft(UScript.LATIN) || 494 UScript.isRightToLeft(UScript.CIRTH) || 495 !UScript.isRightToLeft(UScript.ARABIC) || 496 !UScript.isRightToLeft(UScript.HEBREW)) { 497 errln("UScript.isRightToLeft() failed"); 498 } 499 500 if(UScript.breaksBetweenLetters(UScript.LATIN) || 501 UScript.breaksBetweenLetters(UScript.CIRTH) || 502 !UScript.breaksBetweenLetters(UScript.HAN) || 503 !UScript.breaksBetweenLetters(UScript.THAI)) { 504 errln("UScript.breaksBetweenLetters() failed"); 505 } 506 507 if(UScript.isCased(UScript.CIRTH) || 508 UScript.isCased(UScript.HAN) || 509 !UScript.isCased(UScript.LATIN) || 510 !UScript.isCased(UScript.GREEK)) { 511 errln("UScript.isCased() failed"); 512 } 513 } 514 515 /** 516 * Maps a special script code to the most common script of its encoded characters. 517 */ getCharScript(int script)518 private static final int getCharScript(int script) { 519 switch(script) { 520 case UScript.HAN_WITH_BOPOMOFO: 521 case UScript.SIMPLIFIED_HAN: 522 case UScript.TRADITIONAL_HAN: 523 return UScript.HAN; 524 case UScript.JAPANESE: 525 return UScript.HIRAGANA; 526 case UScript.JAMO: 527 case UScript.KOREAN: 528 return UScript.HANGUL; 529 case UScript.SYMBOLS_EMOJI: 530 return UScript.SYMBOLS; 531 default: 532 return script; 533 } 534 } 535 536 @Test TestScriptMetadata()537 public void TestScriptMetadata() { 538 UnicodeSet rtl = new UnicodeSet("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]"); 539 // So far, sample characters are uppercase. 540 // Georgian is special. 541 UnicodeSet cased = new UnicodeSet("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]"); 542 for(int sc = 0; sc < UScript.CODE_LIMIT; ++sc) { 543 String sn = UScript.getShortName(sc); 544 ScriptUsage usage = UScript.getUsage(sc); 545 String sample = UScript.getSampleString(sc); 546 UnicodeSet scriptSet = new UnicodeSet(); 547 scriptSet.applyIntPropertyValue(UProperty.SCRIPT, sc); 548 if(usage == ScriptUsage.NOT_ENCODED) { 549 assertTrue(sn + " not encoded, no sample", sample.length() == 0); // Java 6: sample.isEmpty() 550 assertFalse(sn + " not encoded, not RTL", UScript.isRightToLeft(sc)); 551 assertFalse(sn + " not encoded, not LB letters", UScript.breaksBetweenLetters(sc)); 552 assertFalse(sn + " not encoded, not cased", UScript.isCased(sc)); 553 assertTrue(sn + " not encoded, no characters", scriptSet.isEmpty()); 554 } else { 555 assertFalse(sn + " encoded, has a sample character", sample.length() == 0); // Java 6: sample.isEmpty() 556 int firstChar = sample.codePointAt(0); 557 int charScript = getCharScript(sc); 558 assertEquals(sn + " script(sample(script))", 559 charScript, UScript.getScript(firstChar)); 560 assertEquals(sn + " RTL vs. set", rtl.contains(firstChar), UScript.isRightToLeft(sc)); 561 assertEquals(sn + " cased vs. set", cased.contains(firstChar), UScript.isCased(sc)); 562 assertEquals(sn + " encoded, has characters", sc == charScript, !scriptSet.isEmpty()); 563 if(UScript.isRightToLeft(sc)) { 564 rtl.removeAll(scriptSet); 565 } 566 if(UScript.isCased(sc)) { 567 cased.removeAll(scriptSet); 568 } 569 } 570 } 571 assertEquals("no remaining RTL characters", "[]", rtl.toPattern(true)); 572 assertEquals("no remaining cased characters", "[]", cased.toPattern(true)); 573 574 assertTrue("Hani breaks between letters", UScript.breaksBetweenLetters(UScript.HAN)); 575 assertTrue("Thai breaks between letters", UScript.breaksBetweenLetters(UScript.THAI)); 576 assertFalse("Latn does not break between letters", UScript.breaksBetweenLetters(UScript.LATIN)); 577 } 578 579 @Test TestScriptNames()580 public void TestScriptNames(){ 581 for(int i=0; i<UScript.CODE_LIMIT;i++){ 582 String name = UScript.getName(i); 583 if(name.equals("") ){ 584 errln("FAILED: getName for code : "+i); 585 } 586 String shortName= UScript.getShortName(i); 587 if(shortName.equals("")){ 588 errln("FAILED: getName for code : "+i); 589 } 590 } 591 } 592 @Test TestAllCodepoints()593 public void TestAllCodepoints(){ 594 int code; 595 //String oldId=""; 596 //String oldAbbrId=""; 597 for( int i =0; i <= 0x10ffff; i++){ 598 code =UScript.INVALID_CODE; 599 code = UScript.getScript(i); 600 if(code==UScript.INVALID_CODE){ 601 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed"); 602 } 603 String id =UScript.getName(code); 604 if(id.indexOf("INVALID")>=0){ 605 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed"); 606 } 607 String abbr = UScript.getShortName(code); 608 if(abbr.indexOf("INV")>=0){ 609 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed"); 610 } 611 } 612 } 613 @Test TestNewCode()614 public void TestNewCode(){ 615 /* 616 * These script codes were originally added to ICU pre-3.6, so that ICU would 617 * have all ISO 15924 script codes. ICU was then based on Unicode 4.1. 618 * These script codes were added with only short names because we don't 619 * want to invent long names ourselves. 620 * Unicode 5 and later encode some of these scripts and give them long names. 621 * Whenever this happens, the long script names here need to be updated. 622 */ 623 String[] expectedLong = new String[]{ 624 "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs", 625 "Egyd", "Egyh", "Egyptian_Hieroglyphs", 626 "Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds", 627 "Javanese", "Kayah_Li", "Latf", "Latg", 628 "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs", 629 "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician", 630 "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform", 631 "Zxxx", "Unknown", 632 "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese", 633 "Moon", "Meetei_Mayek", 634 /* new in ICU 4.0 */ 635 "Imperial_Aramaic", "Avestan", "Chakma", "Kore", 636 "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv", 637 "Inscriptional_Parthian", "Samaritan", "Tai_Viet", 638 "Zmth", "Zsym", 639 /* new in ICU 4.4 */ 640 "Bamum", "Lisu", "Nkgb", "Old_South_Arabian", 641 /* new in ICU 4.6 */ 642 "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel", 643 "Loma", "Mende_Kikakui", "Meroitic_Cursive", 644 "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi", 645 /* new in ICU 4.8 */ 646 "Afak", "Jurc", "Mro", "Nshu", "Sharada", "Sora_Sompeng", "Takri", "Tangut", "Wole", 647 /* new in ICU 49 */ 648 "Anatolian_Hieroglyphs", "Khojki", "Tirhuta", 649 /* new in ICU 52 */ 650 "Caucasian_Albanian", "Mahajani", 651 /* new in ICU 54 */ 652 "Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham", 653 // new in ICU 58 654 "Adlam", "Bhaiksuki", "Marchen", "Newa", "Osage", "Hanb", "Jamo", "Zsye" 655 }; 656 String[] expectedShort = new String[]{ 657 "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp", 658 "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg", 659 "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx", 660 "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux", 661 "Zxxx", "Zzzz", 662 "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund", 663 "Moon", "Mtei", 664 /* new in ICU 4.0 */ 665 "Armi", "Avst", "Cakm", "Kore", 666 "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt", 667 "Zmth", "Zsym", 668 /* new in ICU 4.4 */ 669 "Bamu", "Lisu", "Nkgb", "Sarb", 670 /* new in ICU 4.6 */ 671 "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc", 672 "Narb", "Nbat", "Palm", "Sind", "Wara", 673 /* new in ICU 4.8 */ 674 "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole", 675 /* new in ICU 49 */ 676 "Hluw", "Khoj", "Tirh", 677 /* new in ICU 52 */ 678 "Aghb", "Mahj", 679 /* new in ICU 54 */ 680 "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd", 681 // new in ICU 58 682 "Adlm", "Bhks", "Marc", "Newa", "Osge", "Hanb", "Jamo", "Zsye" 683 }; 684 if(expectedLong.length!=(UScript.CODE_LIMIT-UScript.BALINESE)) { 685 errln("need to add new script codes in lang.TestUScript.java!"); 686 return; 687 } 688 int j = 0; 689 int i = 0; 690 for(i=UScript.BALINESE; i<UScript.CODE_LIMIT; i++, j++){ 691 String name = UScript.getName(i); 692 if(name==null || !name.equals(expectedLong[j])){ 693 errln("UScript.getName failed for code"+ i + name +"!=" +expectedLong[j]); 694 } 695 name = UScript.getShortName(i); 696 if(name==null || !name.equals(expectedShort[j])){ 697 errln("UScript.getShortName failed for code"+ i + name +"!=" +expectedShort[j]); 698 } 699 } 700 for(i=0; i<expectedLong.length; i++){ 701 int[] ret = UScript.getCode(expectedShort[i]); 702 if(ret.length>1){ 703 errln("UScript.getCode did not return expected number of codes for script"+ expectedShort[i]+". EXPECTED: 1 GOT: "+ ret.length); 704 } 705 if(ret[0]!= (UScript.BALINESE+i)){ 706 errln("UScript.getCode did not return expected code for script"+ expectedShort[i]+". EXPECTED: "+ (UScript.BALINESE+i)+" GOT: %i\n"+ ret[0] ); 707 } 708 } 709 } 710 } 711