• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* GENERATED SOURCE. DO NOT MODIFY. */
2 // © 2016 and later: Unicode, Inc. and others.
3 // License & terms of use: http://www.unicode.org/copyright.html#License
4 /**
5 *******************************************************************************
6 * Copyright (C) 1996-2016, International Business Machines Corporation and
7 * others. All Rights Reserved.
8 *******************************************************************************
9 */
10 
11 package ohos.global.icu.dev.test.lang;
12 
13 import java.util.BitSet;
14 
15 import org.junit.Test;
16 import org.junit.runner.RunWith;
17 import org.junit.runners.JUnit4;
18 
19 import ohos.global.icu.dev.test.TestFmwk;
20 import ohos.global.icu.lang.UProperty;
21 import ohos.global.icu.lang.UScript;
22 import ohos.global.icu.lang.UScript.ScriptUsage;
23 import ohos.global.icu.text.UnicodeSet;
24 
25 
26 
27 @RunWith(JUnit4.class)
28 public class TestUScript extends TestFmwk {
29 
30     /**
31     * Constructor
32     */
TestUScript()33     public TestUScript()
34     {
35     }
36 
37     @Test
TestGetScriptOfCharsWithScriptExtensions()38     public void TestGetScriptOfCharsWithScriptExtensions() {
39         /* test characters which have Script_Extensions */
40         if(!(
41             UScript.COMMON==UScript.getScript(0x0640) &&
42             UScript.INHERITED==UScript.getScript(0x0650) &&
43             UScript.ARABIC==UScript.getScript(0xfdf2))
44         ) {
45             errln("UScript.getScript(character with Script_Extensions) failed");
46         }
47     }
48 
49     @Test
TestHasScript()50     public void TestHasScript() {
51         if(!(
52             !UScript.hasScript(0x063f, UScript.COMMON) &&
53             UScript.hasScript(0x063f, UScript.ARABIC) &&  /* main Script value */
54             !UScript.hasScript(0x063f, UScript.SYRIAC) &&
55             !UScript.hasScript(0x063f, UScript.THAANA))
56         ) {
57             errln("UScript.hasScript(U+063F, ...) is wrong");
58         }
59         if(!(
60             !UScript.hasScript(0x0640, UScript.COMMON) &&  /* main Script value */
61             UScript.hasScript(0x0640, UScript.ARABIC) &&
62             UScript.hasScript(0x0640, UScript.SYRIAC) &&
63             !UScript.hasScript(0x0640, UScript.THAANA))
64         ) {
65             errln("UScript.hasScript(U+0640, ...) is wrong");
66         }
67         if(!(
68             !UScript.hasScript(0x0650, UScript.INHERITED) &&  /* main Script value */
69             UScript.hasScript(0x0650, UScript.ARABIC) &&
70             UScript.hasScript(0x0650, UScript.SYRIAC) &&
71             !UScript.hasScript(0x0650, UScript.THAANA))
72         ) {
73             errln("UScript.hasScript(U+0650, ...) is wrong");
74         }
75         if(!(
76             !UScript.hasScript(0x0660, UScript.COMMON) &&  /* main Script value */
77             UScript.hasScript(0x0660, UScript.ARABIC) &&
78             !UScript.hasScript(0x0660, UScript.SYRIAC) &&
79             UScript.hasScript(0x0660, UScript.THAANA))
80         ) {
81             errln("UScript.hasScript(U+0660, ...) is wrong");
82         }
83         if(!(
84             !UScript.hasScript(0xfdf2, UScript.COMMON) &&
85             UScript.hasScript(0xfdf2, UScript.ARABIC) &&  /* main Script value */
86             !UScript.hasScript(0xfdf2, UScript.SYRIAC) &&
87             UScript.hasScript(0xfdf2, UScript.THAANA))
88         ) {
89             errln("UScript.hasScript(U+FDF2, ...) is wrong");
90         }
91         if(UScript.hasScript(0x0640, 0xaffe)) {
92             // An unguarded implementation might go into an infinite loop.
93             errln("UScript.hasScript(U+0640, bogus 0xaffe) is wrong");
94         }
95     }
96 
97     @Test
TestGetScriptExtensions()98     public void TestGetScriptExtensions() {
99         BitSet scripts=new BitSet(UScript.CODE_LIMIT);
100 
101         /* invalid code points */
102         if(UScript.getScriptExtensions(-1, scripts)!=UScript.UNKNOWN || scripts.cardinality()!=1 ||
103                 !scripts.get(UScript.UNKNOWN)) {
104             errln("UScript.getScriptExtensions(-1) is not {UNKNOWN}");
105         }
106         if(UScript.getScriptExtensions(0x110000, scripts)!=UScript.UNKNOWN || scripts.cardinality()!=1 ||
107                 !scripts.get(UScript.UNKNOWN)) {
108             errln("UScript.getScriptExtensions(0x110000) is not {UNKNOWN}");
109         }
110 
111         /* normal usage */
112         if(UScript.getScriptExtensions(0x063f, scripts)!=UScript.ARABIC || scripts.cardinality()!=1 ||
113                 !scripts.get(UScript.ARABIC)) {
114             errln("UScript.getScriptExtensions(U+063F) is not {ARABIC}");
115         }
116         if(UScript.getScriptExtensions(0x0640, scripts)>-3 || scripts.cardinality()<3 ||
117            !scripts.get(UScript.ARABIC) || !scripts.get(UScript.SYRIAC) || !scripts.get(UScript.MANDAIC)
118         ) {
119             errln("UScript.getScriptExtensions(U+0640) failed");
120         }
121         if(UScript.getScriptExtensions(0xfdf2, scripts)!=-2 || scripts.cardinality()!=2 ||
122                 !scripts.get(UScript.ARABIC) || !scripts.get(UScript.THAANA)) {
123             errln("UScript.getScriptExtensions(U+FDF2) failed");
124         }
125         if(UScript.getScriptExtensions(0xff65, scripts)!=-6 || scripts.cardinality()!=6 ||
126                 !scripts.get(UScript.BOPOMOFO) || !scripts.get(UScript.YI)) {
127             errln("UScript.getScriptExtensions(U+FF65) failed");
128         }
129     }
130 
131     @Test
TestDefaultScriptExtensions()132     public void TestDefaultScriptExtensions() {
133         // Block 3000..303F CJK Symbols and Punctuation defaults to scx=Bopo Hang Hani Hira Kana Yiii
134         // but some of its characters revert to scx=<script> which is usually Common.
135         BitSet scx = new BitSet();
136         assertEquals("U+3000 num scx",  // IDEOGRAPHIC SPACE
137                 UScript.COMMON,
138                 UScript.getScriptExtensions(0x3000, scx));
139         scx.clear();
140         assertEquals("U+3012 num scx",  // POSTAL MARK
141                 UScript.COMMON,
142                 UScript.getScriptExtensions(0x3012, scx));
143     }
144 
145     @Test
TestScriptMetadataAPI()146     public void TestScriptMetadataAPI() {
147         /* API & code coverage. */
148         String sample = UScript.getSampleString(UScript.LATIN);
149         if(sample.length()!=1 || UScript.getScript(sample.charAt(0))!=UScript.LATIN) {
150             errln("UScript.getSampleString(Latn) failed");
151         }
152         sample = UScript.getSampleString(UScript.INVALID_CODE);
153         if(sample.length()!=0) {
154             errln("UScript.getSampleString(invalid) failed");
155         }
156 
157         if(UScript.getUsage(UScript.LATIN)!=ScriptUsage.RECOMMENDED ||
158                 // Unicode 10 gives up on "aspirational".
159                 UScript.getUsage(UScript.YI)!=ScriptUsage.LIMITED_USE ||
160                 UScript.getUsage(UScript.CHEROKEE)!=ScriptUsage.LIMITED_USE ||
161                 UScript.getUsage(UScript.COPTIC)!=ScriptUsage.EXCLUDED ||
162                 UScript.getUsage(UScript.CIRTH)!=ScriptUsage.NOT_ENCODED ||
163                 UScript.getUsage(UScript.INVALID_CODE)!=ScriptUsage.NOT_ENCODED ||
164                 UScript.getUsage(UScript.CODE_LIMIT)!=ScriptUsage.NOT_ENCODED) {
165             errln("UScript.getUsage() failed");
166         }
167 
168         if(UScript.isRightToLeft(UScript.LATIN) ||
169                 UScript.isRightToLeft(UScript.CIRTH) ||
170                 !UScript.isRightToLeft(UScript.ARABIC) ||
171                 !UScript.isRightToLeft(UScript.HEBREW)) {
172             errln("UScript.isRightToLeft() failed");
173         }
174 
175         if(UScript.breaksBetweenLetters(UScript.LATIN) ||
176                 UScript.breaksBetweenLetters(UScript.CIRTH) ||
177                 !UScript.breaksBetweenLetters(UScript.HAN) ||
178                 !UScript.breaksBetweenLetters(UScript.THAI)) {
179             errln("UScript.breaksBetweenLetters() failed");
180         }
181 
182         if(UScript.isCased(UScript.CIRTH) ||
183                 UScript.isCased(UScript.HAN) ||
184                 !UScript.isCased(UScript.LATIN) ||
185                 !UScript.isCased(UScript.GREEK)) {
186             errln("UScript.isCased() failed");
187         }
188     }
189 
190     /**
191      * Maps a special script code to the most common script of its encoded characters.
192      */
getCharScript(int script)193     private static final int getCharScript(int script) {
194         switch(script) {
195         case UScript.HAN_WITH_BOPOMOFO:
196         case UScript.SIMPLIFIED_HAN:
197         case UScript.TRADITIONAL_HAN:
198             return UScript.HAN;
199         case UScript.JAPANESE:
200             return UScript.HIRAGANA;
201         case UScript.JAMO:
202         case UScript.KOREAN:
203             return UScript.HANGUL;
204         case UScript.SYMBOLS_EMOJI:
205             return UScript.SYMBOLS;
206         default:
207             return script;
208         }
209     }
210 
211     @Test
TestScriptMetadata()212     public void TestScriptMetadata() {
213         UnicodeSet rtl = new UnicodeSet("[[:bc=R:][:bc=AL:]-[:Cn:]-[:sc=Common:]]");
214         // So far, sample characters are uppercase.
215         // Georgian is special.
216         UnicodeSet cased = new UnicodeSet("[[:Lu:]-[:sc=Common:]-[:sc=Geor:]]");
217         for(int sc = 0; sc < UScript.CODE_LIMIT; ++sc) {
218             String sn = UScript.getShortName(sc);
219             ScriptUsage usage = UScript.getUsage(sc);
220             String sample = UScript.getSampleString(sc);
221             UnicodeSet scriptSet = new UnicodeSet();
222             scriptSet.applyIntPropertyValue(UProperty.SCRIPT, sc);
223             if(usage == ScriptUsage.NOT_ENCODED) {
224                 assertTrue(sn + " not encoded, no sample", sample.isEmpty());
225                 assertFalse(sn + " not encoded, not RTL", UScript.isRightToLeft(sc));
226                 assertFalse(sn + " not encoded, not LB letters", UScript.breaksBetweenLetters(sc));
227                 assertFalse(sn + " not encoded, not cased", UScript.isCased(sc));
228                 assertTrue(sn + " not encoded, no characters", scriptSet.isEmpty());
229             } else {
230                 assertFalse(sn + " encoded, has a sample character", sample.isEmpty());
231                 int firstChar = sample.codePointAt(0);
232                 int charScript = getCharScript(sc);
233                 assertEquals(sn + " script(sample(script))",
234                              charScript, UScript.getScript(firstChar));
235                 assertEquals(sn + " RTL vs. set", rtl.contains(firstChar), UScript.isRightToLeft(sc));
236                 assertEquals(sn + " cased vs. set", cased.contains(firstChar), UScript.isCased(sc));
237                 assertEquals(sn + " encoded, has characters", sc == charScript, !scriptSet.isEmpty());
238                 if(UScript.isRightToLeft(sc)) {
239                     rtl.removeAll(scriptSet);
240                 }
241                 if(UScript.isCased(sc)) {
242                     cased.removeAll(scriptSet);
243                 }
244             }
245         }
246         assertEquals("no remaining RTL characters", "[]", rtl.toPattern(true));
247         assertEquals("no remaining cased characters", "[]", cased.toPattern(true));
248 
249         assertTrue("Hani breaks between letters", UScript.breaksBetweenLetters(UScript.HAN));
250         assertTrue("Thai breaks between letters", UScript.breaksBetweenLetters(UScript.THAI));
251         assertFalse("Latn does not break between letters", UScript.breaksBetweenLetters(UScript.LATIN));
252     }
253 
254     @Test
TestScriptNames()255     public void TestScriptNames(){
256         for(int i=0; i<UScript.CODE_LIMIT;i++){
257             String name = UScript.getName(i);
258             if(name.equals("") ){
259                 errln("FAILED: getName for code : "+i);
260             }
261             String shortName= UScript.getShortName(i);
262             if(shortName.equals("")){
263                 errln("FAILED: getName for code : "+i);
264             }
265         }
266     }
267     @Test
TestAllCodepoints()268     public void TestAllCodepoints(){
269         int code;
270         //String oldId="";
271         //String oldAbbrId="";
272         for( int i =0; i <= 0x10ffff; i++){
273           code =UScript.INVALID_CODE;
274           code = UScript.getScript(i);
275           if(code==UScript.INVALID_CODE){
276                 errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
277           }
278           String id =UScript.getName(code);
279           if(id.indexOf("INVALID")>=0){
280                  errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
281           }
282           String abbr = UScript.getShortName(code);
283           if(abbr.indexOf("INV")>=0){
284                  errln("UScript.getScript for codepoint 0x"+ hex(i)+" failed");
285           }
286         }
287     }
288     @Test
TestNewCode()289     public void TestNewCode(){
290         /*
291          * These script codes were originally added to ICU pre-3.6, so that ICU would
292          * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
293          * These script codes were added with only short names because we don't
294          * want to invent long names ourselves.
295          * Unicode 5 and later encode some of these scripts and give them long names.
296          * Whenever this happens, the long script names here need to be updated.
297          */
298         String[] expectedLong = new String[]{
299             "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs",
300             "Egyd", "Egyh", "Egyptian_Hieroglyphs",
301             "Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds",
302             "Javanese", "Kayah_Li", "Latf", "Latg",
303             "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs",
304             "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician",
305             "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
306             "Zxxx", "Unknown",
307             "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese",
308             "Moon", "Meetei_Mayek",
309             /* new in ICU 4.0 */
310             "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
311             "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv",
312             "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
313             "Zmth", "Zsym",
314             /* new in ICU 4.4 */
315             "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
316             /* new in ICU 4.6 */
317             "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel",
318             "Loma", "Mende_Kikakui", "Meroitic_Cursive",
319             "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi",
320             /* new in ICU 4.8 */
321             "Afak", "Jurc", "Mro", "Nushu", "Sharada", "Sora_Sompeng", "Takri", "Tangut", "Wole",
322             /* new in ICU 49 */
323             "Anatolian_Hieroglyphs", "Khojki", "Tirhuta",
324             /* new in ICU 52 */
325             "Caucasian_Albanian", "Mahajani",
326             /* new in ICU 54 */
327             "Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham",
328             // new in ICU 58
329             "Adlam", "Bhaiksuki", "Marchen", "Newa", "Osage", "Hanb", "Jamo", "Zsye",
330             // new in ICU 60
331             "Masaram_Gondi", "Soyombo", "Zanabazar_Square",
332             // new in ICU 61
333             "Dogra", "Gunjala_Gondi", "Makasar", "Medefaidrin",
334             "Hanifi_Rohingya", "Sogdian", "Old_Sogdian",
335             // new in ICU 64
336             "Elymaic", "Nyiakeng_Puachue_Hmong", "Nandinagari", "Wancho",
337             // new in ICU 66
338             "Chorasmian", "Dives_Akuru", "Khitan_Small_Script", "Yezidi",
339         };
340         String[] expectedShort = new String[]{
341             "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
342             "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
343             "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
344             "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
345             "Zxxx", "Zzzz",
346             "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
347             "Moon", "Mtei",
348             /* new in ICU 4.0 */
349             "Armi", "Avst", "Cakm", "Kore",
350             "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
351             "Zmth", "Zsym",
352             /* new in ICU 4.4 */
353             "Bamu", "Lisu", "Nkgb", "Sarb",
354             /* new in ICU 4.6 */
355             "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
356             "Narb", "Nbat", "Palm", "Sind", "Wara",
357             /* new in ICU 4.8 */
358             "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
359             /* new in ICU 49 */
360             "Hluw", "Khoj", "Tirh",
361             /* new in ICU 52 */
362             "Aghb", "Mahj",
363             /* new in ICU 54 */
364             "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd",
365             // new in ICU 58
366             "Adlm", "Bhks", "Marc", "Newa", "Osge", "Hanb", "Jamo", "Zsye",
367             // new in ICU 60
368             "Gonm", "Soyo", "Zanb",
369             // new in ICU 61
370             "Dogr", "Gong", "Maka", "Medf", "Rohg", "Sogd", "Sogo",
371             // new in ICU 64
372             "Elym", "Hmnp", "Nand", "Wcho",
373             // new in ICU 66
374             "Chrs", "Diak", "Kits", "Yezi",
375         };
376         if(expectedLong.length!=(UScript.CODE_LIMIT-UScript.BALINESE)) {
377             errln("need to add new script codes in lang.TestUScript.java!");
378             return;
379         }
380         int j = 0;
381         int i = 0;
382         for(i=UScript.BALINESE; i<UScript.CODE_LIMIT; i++, j++){
383             String name = UScript.getName(i);
384             if(name==null || !name.equals(expectedLong[j])){
385                 errln("UScript.getName failed for code"+ i + name +"!=" +expectedLong[j]);
386             }
387             name = UScript.getShortName(i);
388             if(name==null || !name.equals(expectedShort[j])){
389                 errln("UScript.getShortName failed for code"+ i + name +"!=" +expectedShort[j]);
390             }
391         }
392         for(i=0; i<expectedLong.length; i++){
393             int[] ret = UScript.getCode(expectedShort[i]);
394             if(ret.length>1){
395                 errln("UScript.getCode did not return expected number of codes for script"+ expectedShort[i]+". EXPECTED: 1 GOT: "+ ret.length);
396             }
397             if(ret[0]!= (UScript.BALINESE+i)){
398                 errln("UScript.getCode did not return expected code for script"+ expectedShort[i]+". EXPECTED: "+ (UScript.BALINESE+i)+" GOT: %i\n"+ ret[0] );
399             }
400         }
401     }
402 }
403