1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html#License 4 /** 5 ******************************************************************************* 6 * Copyright (C) 1996-2016, International Business Machines Corporation and 7 * others. All Rights Reserved. 8 ******************************************************************************* 9 */ 10 11 package ohos.global.icu.dev.test.lang; 12 13 import java.util.BitSet; 14 15 import org.junit.Test; 16 import org.junit.runner.RunWith; 17 import org.junit.runners.JUnit4; 18 19 import ohos.global.icu.dev.test.TestFmwk; 20 import ohos.global.icu.lang.UProperty; 21 import ohos.global.icu.lang.UScript; 22 import ohos.global.icu.lang.UScript.ScriptUsage; 23 import ohos.global.icu.text.UnicodeSet; 24 25 26 27 @RunWith(JUnit4.class) 28 public class TestUScript extends TestFmwk { 29 30 /** 31 * Constructor 32 */ TestUScript()33 public TestUScript() 34 { 35 } 36 37 @Test TestGetScriptOfCharsWithScriptExtensions()38 public void TestGetScriptOfCharsWithScriptExtensions() { 39 /* test characters which have Script_Extensions */ 40 if(!( 41 UScript.COMMON==UScript.getScript(0x0640) && 42 UScript.INHERITED==UScript.getScript(0x0650) && 43 UScript.ARABIC==UScript.getScript(0xfdf2)) 44 ) { 45 errln("UScript.getScript(character with Script_Extensions) failed"); 46 } 47 } 48 49 @Test TestHasScript()50 public void TestHasScript() { 51 if(!( 52 !UScript.hasScript(0x063f, UScript.COMMON) && 53 UScript.hasScript(0x063f, UScript.ARABIC) && /* main Script value */ 54 !UScript.hasScript(0x063f, UScript.SYRIAC) && 55 !UScript.hasScript(0x063f, UScript.THAANA)) 56 ) { 57 errln("UScript.hasScript(U+063F, ...) is wrong"); 58 } 59 if(!( 60 !UScript.hasScript(0x0640, UScript.COMMON) && /* main Script value */ 61 UScript.hasScript(0x0640, UScript.ARABIC) && 62 UScript.hasScript(0x0640, UScript.SYRIAC) && 63 !UScript.hasScript(0x0640, UScript.THAANA)) 64 ) { 65 errln("UScript.hasScript(U+0640, ...) is wrong"); 66 } 67 if(!( 68 !UScript.hasScript(0x0650, UScript.INHERITED) && /* main Script value */ 69 UScript.hasScript(0x0650, UScript.ARABIC) && 70 UScript.hasScript(0x0650, UScript.SYRIAC) && 71 !UScript.hasScript(0x0650, UScript.THAANA)) 72 ) { 73 errln("UScript.hasScript(U+0650, ...) is wrong"); 74 } 75 if(!( 76 !UScript.hasScript(0x0660, UScript.COMMON) && /* main Script value */ 77 UScript.hasScript(0x0660, UScript.ARABIC) && 78 !UScript.hasScript(0x0660, UScript.SYRIAC) && 79 UScript.hasScript(0x0660, UScript.THAANA)) 80 ) { 81 errln("UScript.hasScript(U+0660, ...) is wrong"); 82 } 83 if(!( 84 !UScript.hasScript(0xfdf2, UScript.COMMON) && 85 UScript.hasScript(0xfdf2, UScript.ARABIC) && /* main Script value */ 86 !UScript.hasScript(0xfdf2, UScript.SYRIAC) && 87 UScript.hasScript(0xfdf2, UScript.THAANA)) 88 ) { 89 errln("UScript.hasScript(U+FDF2, ...) is wrong"); 90 } 91 if(UScript.hasScript(0x0640, 0xaffe)) { 92 // An unguarded implementation might go into an infinite loop. 93 errln("UScript.hasScript(U+0640, bogus 0xaffe) is wrong"); 94 } 95 } 96 97 @Test TestGetScriptExtensions()98 public void TestGetScriptExtensions() { 99 BitSet scripts=new BitSet(UScript.CODE_LIMIT); 100 101 /* invalid code points */ 102 if(UScript.getScriptExtensions(-1, scripts)!=UScript.UNKNOWN || scripts.cardinality()!=1 || 103 !scripts.get(UScript.UNKNOWN)) { 104 errln("UScript.getScriptExtensions(-1) is not {UNKNOWN}"); 105 } 106 if(UScript.getScriptExtensions(0x110000, scripts)!=UScript.UNKNOWN || scripts.cardinality()!=1 || 107 !scripts.get(UScript.UNKNOWN)) { 108 errln("UScript.getScriptExtensions(0x110000) is not {UNKNOWN}"); 109 } 110 111 /* normal usage */ 112 if(UScript.getScriptExtensions(0x063f, scripts)!=UScript.ARABIC || scripts.cardinality()!=1 || 113 !scripts.get(UScript.ARABIC)) { 114 errln("UScript.getScriptExtensions(U+063F) is not {ARABIC}"); 115 } 116 if(UScript.getScriptExtensions(0x0640, scripts)>-3 || scripts.cardinality()<3 || 117 !scripts.get(UScript.ARABIC) || !scripts.get(UScript.SYRIAC) || !scripts.get(UScript.MANDAIC) 118 ) { 119 errln("UScript.getScriptExtensions(U+0640) failed"); 120 } 121 if(UScript.getScriptExtensions(0xfdf2, scripts)!=-2 || scripts.cardinality()!=2 || 122 !scripts.get(UScript.ARABIC) || !scripts.get(UScript.THAANA)) { 123 errln("UScript.getScriptExtensions(U+FDF2) failed"); 124 } 125 if(UScript.getScriptExtensions(0xff65, scripts)!=-6 || scripts.cardinality()!=6 || 126 !scripts.get(UScript.BOPOMOFO) || !scripts.get(UScript.YI)) { 127 errln("UScript.getScriptExtensions(U+FF65) failed"); 128 } 129 } 130 131 @Test TestDefaultScriptExtensions()132 public void TestDefaultScriptExtensions() { 133 // Block 3000..303F CJK Symbols and Punctuation defaults to scx=Bopo Hang Hani Hira Kana Yiii 134 // but some of its characters revert to scx=<script> which is usually Common. 135 BitSet scx = new BitSet(); 136 assertEquals("U+3000 num scx", // IDEOGRAPHIC SPACE 137 UScript.COMMON, 138 UScript.getScriptExtensions(0x3000, scx)); 139 scx.clear(); 140 assertEquals("U+3012 num scx", // POSTAL MARK 141 UScript.COMMON, 142 UScript.getScriptExtensions(0x3012, scx)); 143 } 144 145 @Test TestScriptMetadataAPI()146 public void TestScriptMetadataAPI() { 147 /* API & code coverage. */ 148 String sample = UScript.getSampleString(UScript.LATIN); 149 if(sample.length()!=1 || UScript.getScript(sample.charAt(0))!=UScript.LATIN) { 150 errln("UScript.getSampleString(Latn) failed"); 151 } 152 sample = UScript.getSampleString(UScript.INVALID_CODE); 153 if(sample.length()!=0) { 154 errln("UScript.getSampleString(invalid) failed"); 155 } 156 157 if(UScript.getUsage(UScript.LATIN)!=ScriptUsage.RECOMMENDED || 158 // Unicode 10 gives up on "aspirational". 159 UScript.getUsage(UScript.YI)!=ScriptUsage.LIMITED_USE || 160 UScript.getUsage(UScript.CHEROKEE)!=ScriptUsage.LIMITED_USE || 161 UScript.getUsage(UScript.COPTIC)!=ScriptUsage.EXCLUDED || 162 UScript.getUsage(UScript.CIRTH)!=ScriptUsage.NOT_ENCODED || 163 UScript.getUsage(UScript.INVALID_CODE)!=ScriptUsage.NOT_ENCODED || 164 UScript.getUsage(UScript.CODE_LIMIT)!=ScriptUsage.NOT_ENCODED) { 165 errln("UScript.getUsage() failed"); 166 } 167 168 if(UScript.isRightToLeft(UScript.LATIN) || 169 UScript.isRightToLeft(UScript.CIRTH) || 170 !UScript.isRightToLeft(UScript.ARABIC) || 171 !UScript.isRightToLeft(UScript.HEBREW)) { 172 errln("UScript.isRightToLeft() failed"); 173 } 174 175 if(UScript.breaksBetweenLetters(UScript.LATIN) || 176 UScript.breaksBetweenLetters(UScript.CIRTH) || 177 !UScript.breaksBetweenLetters(UScript.HAN) || 178 !UScript.breaksBetweenLetters(UScript.THAI)) { 179 errln("UScript.breaksBetweenLetters() failed"); 180 } 181 182 if(UScript.isCased(UScript.CIRTH) || 183 UScript.isCased(UScript.HAN) || 184 !UScript.isCased(UScript.LATIN) || 185 !UScript.isCased(UScript.GREEK)) { 186 errln("UScript.isCased() failed"); 187 } 188 } 189 190 /** 191 * Maps a special script code to the most common script of its encoded characters. 192 */ getCharScript(int script)193 private static final int getCharScript(int script) { 194 switch(script) { 195 case UScript.HAN_WITH_BOPOMOFO: 196 case UScript.SIMPLIFIED_HAN: 197 case UScript.TRADITIONAL_HAN: 198 return UScript.HAN; 199 case UScript.JAPANESE: 200 return UScript.HIRAGANA; 201 case UScript.JAMO: 202 case UScript.KOREAN: 203 return UScript.HANGUL; 204 case UScript.SYMBOLS_EMOJI: 205 return UScript.SYMBOLS; 206 default: 207 return script; 208 } 209 } 210 211 @Test TestScriptMetadata()212 public void TestScriptMetadata() { 213 UnicodeSet rtl = new UnicodeSet("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]"); 214 // So far, sample characters are uppercase. 215 // Georgian is special. 216 UnicodeSet cased = new UnicodeSet("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]"); 217 for(int sc = 0; sc < UScript.CODE_LIMIT; ++sc) { 218 String sn = UScript.getShortName(sc); 219 ScriptUsage usage = UScript.getUsage(sc); 220 String sample = UScript.getSampleString(sc); 221 UnicodeSet scriptSet = new UnicodeSet(); 222 scriptSet.applyIntPropertyValue(UProperty.SCRIPT, sc); 223 if(usage == ScriptUsage.NOT_ENCODED) { 224 assertTrue(sn + " not encoded, no sample", sample.isEmpty()); 225 assertFalse(sn + " not encoded, not RTL", UScript.isRightToLeft(sc)); 226 assertFalse(sn + " not encoded, not LB letters", UScript.breaksBetweenLetters(sc)); 227 assertFalse(sn + " not encoded, not cased", UScript.isCased(sc)); 228 assertTrue(sn + " not encoded, no characters", scriptSet.isEmpty()); 229 } else { 230 assertFalse(sn + " encoded, has a sample character", sample.isEmpty()); 231 int firstChar = sample.codePointAt(0); 232 int charScript = getCharScript(sc); 233 assertEquals(sn + " script(sample(script))", 234 charScript, UScript.getScript(firstChar)); 235 assertEquals(sn + " RTL vs. set", rtl.contains(firstChar), UScript.isRightToLeft(sc)); 236 assertEquals(sn + " cased vs. set", cased.contains(firstChar), UScript.isCased(sc)); 237 assertEquals(sn + " encoded, has characters", sc == charScript, !scriptSet.isEmpty()); 238 if(UScript.isRightToLeft(sc)) { 239 rtl.removeAll(scriptSet); 240 } 241 if(UScript.isCased(sc)) { 242 cased.removeAll(scriptSet); 243 } 244 } 245 } 246 assertEquals("no remaining RTL characters", "[]", rtl.toPattern(true)); 247 assertEquals("no remaining cased characters", "[]", cased.toPattern(true)); 248 249 assertTrue("Hani breaks between letters", UScript.breaksBetweenLetters(UScript.HAN)); 250 assertTrue("Thai breaks between letters", UScript.breaksBetweenLetters(UScript.THAI)); 251 assertFalse("Latn does not break between letters", UScript.breaksBetweenLetters(UScript.LATIN)); 252 } 253 254 @Test TestScriptNames()255 public void TestScriptNames(){ 256 for(int i=0; i<UScript.CODE_LIMIT;i++){ 257 String name = UScript.getName(i); 258 if(name.equals("") ){ 259 errln("FAILED: getName for code : "+i); 260 } 261 String shortName= UScript.getShortName(i); 262 if(shortName.equals("")){ 263 errln("FAILED: getName for code : "+i); 264 } 265 } 266 } 267 @Test TestAllCodepoints()268 public void TestAllCodepoints(){ 269 int code; 270 //String oldId=""; 271 //String oldAbbrId=""; 272 for( int i =0; i <= 0x10ffff; i++){ 273 code =UScript.INVALID_CODE; 274 code = UScript.getScript(i); 275 if(code==UScript.INVALID_CODE){ 276 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed"); 277 } 278 String id =UScript.getName(code); 279 if(id.indexOf("INVALID")>=0){ 280 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed"); 281 } 282 String abbr = UScript.getShortName(code); 283 if(abbr.indexOf("INV")>=0){ 284 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed"); 285 } 286 } 287 } 288 @Test TestNewCode()289 public void TestNewCode(){ 290 /* 291 * These script codes were originally added to ICU pre-3.6, so that ICU would 292 * have all ISO 15924 script codes. ICU was then based on Unicode 4.1. 293 * These script codes were added with only short names because we don't 294 * want to invent long names ourselves. 295 * Unicode 5 and later encode some of these scripts and give them long names. 296 * Whenever this happens, the long script names here need to be updated. 297 */ 298 String[] expectedLong = new String[]{ 299 "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs", 300 "Egyd", "Egyh", "Egyptian_Hieroglyphs", 301 "Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds", 302 "Javanese", "Kayah_Li", "Latf", "Latg", 303 "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs", 304 "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician", 305 "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform", 306 "Zxxx", "Unknown", 307 "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese", 308 "Moon", "Meetei_Mayek", 309 /* new in ICU 4.0 */ 310 "Imperial_Aramaic", "Avestan", "Chakma", "Kore", 311 "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv", 312 "Inscriptional_Parthian", "Samaritan", "Tai_Viet", 313 "Zmth", "Zsym", 314 /* new in ICU 4.4 */ 315 "Bamum", "Lisu", "Nkgb", "Old_South_Arabian", 316 /* new in ICU 4.6 */ 317 "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel", 318 "Loma", "Mende_Kikakui", "Meroitic_Cursive", 319 "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi", 320 /* new in ICU 4.8 */ 321 "Afak", "Jurc", "Mro", "Nushu", "Sharada", "Sora_Sompeng", "Takri", "Tangut", "Wole", 322 /* new in ICU 49 */ 323 "Anatolian_Hieroglyphs", "Khojki", "Tirhuta", 324 /* new in ICU 52 */ 325 "Caucasian_Albanian", "Mahajani", 326 /* new in ICU 54 */ 327 "Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham", 328 // new in ICU 58 329 "Adlam", "Bhaiksuki", "Marchen", "Newa", "Osage", "Hanb", "Jamo", "Zsye", 330 // new in ICU 60 331 "Masaram_Gondi", "Soyombo", "Zanabazar_Square", 332 // new in ICU 61 333 "Dogra", "Gunjala_Gondi", "Makasar", "Medefaidrin", 334 "Hanifi_Rohingya", "Sogdian", "Old_Sogdian", 335 // new in ICU 64 336 "Elymaic", "Nyiakeng_Puachue_Hmong", "Nandinagari", "Wancho", 337 // new in ICU 66 338 "Chorasmian", "Dives_Akuru", "Khitan_Small_Script", "Yezidi", 339 }; 340 String[] expectedShort = new String[]{ 341 "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp", 342 "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg", 343 "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx", 344 "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux", 345 "Zxxx", "Zzzz", 346 "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund", 347 "Moon", "Mtei", 348 /* new in ICU 4.0 */ 349 "Armi", "Avst", "Cakm", "Kore", 350 "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt", 351 "Zmth", "Zsym", 352 /* new in ICU 4.4 */ 353 "Bamu", "Lisu", "Nkgb", "Sarb", 354 /* new in ICU 4.6 */ 355 "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc", 356 "Narb", "Nbat", "Palm", "Sind", "Wara", 357 /* new in ICU 4.8 */ 358 "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole", 359 /* new in ICU 49 */ 360 "Hluw", "Khoj", "Tirh", 361 /* new in ICU 52 */ 362 "Aghb", "Mahj", 363 /* new in ICU 54 */ 364 "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd", 365 // new in ICU 58 366 "Adlm", "Bhks", "Marc", "Newa", "Osge", "Hanb", "Jamo", "Zsye", 367 // new in ICU 60 368 "Gonm", "Soyo", "Zanb", 369 // new in ICU 61 370 "Dogr", "Gong", "Maka", "Medf", "Rohg", "Sogd", "Sogo", 371 // new in ICU 64 372 "Elym", "Hmnp", "Nand", "Wcho", 373 // new in ICU 66 374 "Chrs", "Diak", "Kits", "Yezi", 375 }; 376 if(expectedLong.length!=(UScript.CODE_LIMIT-UScript.BALINESE)) { 377 errln("need to add new script codes in lang.TestUScript.java!"); 378 return; 379 } 380 int j = 0; 381 int i = 0; 382 for(i=UScript.BALINESE; i<UScript.CODE_LIMIT; i++, j++){ 383 String name = UScript.getName(i); 384 if(name==null || !name.equals(expectedLong[j])){ 385 errln("UScript.getName failed for code"+ i + name +"!=" +expectedLong[j]); 386 } 387 name = UScript.getShortName(i); 388 if(name==null || !name.equals(expectedShort[j])){ 389 errln("UScript.getShortName failed for code"+ i + name +"!=" +expectedShort[j]); 390 } 391 } 392 for(i=0; i<expectedLong.length; i++){ 393 int[] ret = UScript.getCode(expectedShort[i]); 394 if(ret.length>1){ 395 errln("UScript.getCode did not return expected number of codes for script"+ expectedShort[i]+". EXPECTED: 1 GOT: "+ ret.length); 396 } 397 if(ret[0]!= (UScript.BALINESE+i)){ 398 errln("UScript.getCode did not return expected code for script"+ expectedShort[i]+". EXPECTED: "+ (UScript.BALINESE+i)+" GOT: %i\n"+ ret[0] ); 399 } 400 } 401 } 402 } 403