• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * Copyright (c) 1997-2016, International Business Machines
5  * Corporation and others. All Rights Reserved.
6  ********************************************************************/
7 
8 #include <stdbool.h>
9 #include <string.h>
10 #include "unicode/utypes.h"
11 #include "unicode/uscript.h"
12 #include "unicode/uchar.h"
13 #include "cintltst.h"
14 #include "cucdapi.h"
15 #include "cmemory.h"
16 
scriptsToString(const UScriptCode scripts[],int32_t length,char s[])17 static void scriptsToString(const UScriptCode scripts[], int32_t length, char s[]) {
18     int32_t i;
19     if(length == 0) {
20         strcpy(s, "(no scripts)");
21         return;
22     }
23     s[0] = 0;
24     for(i = 0; i < length; ++i) {
25         if(i > 0) {
26             strcat(s, " ");
27         }
28         strcat(s, uscript_getShortName(scripts[i]));
29     }
30 }
31 
assertEqualScripts(const char * msg,const UScriptCode scripts1[],int32_t length1,const UScriptCode scripts2[],int32_t length2,UErrorCode errorCode)32 static void assertEqualScripts(const char *msg,
33                                const UScriptCode scripts1[], int32_t length1,
34                                const UScriptCode scripts2[], int32_t length2,
35                                UErrorCode errorCode) {
36     char s1[80];
37     char s2[80];
38     if(U_FAILURE(errorCode)) {
39         log_err("Failed: %s - %s\n", msg, u_errorName(errorCode));
40         return;
41     }
42     scriptsToString(scripts1, length1, s1);
43     scriptsToString(scripts2, length2, s2);
44     if(0!=strcmp(s1, s2)) {
45         log_data_err("Failed: %s: expected %s but got %s\n", msg, s1, s2);
46     }
47 }
48 
TestUScriptCodeAPI(void)49 void TestUScriptCodeAPI(void){
50     int i =0;
51     int numErrors =0;
52     {
53         const char* testNames[]={
54         /* test locale */
55         "en", "en_US", "sr", "ta" , "te_IN",
56         "hi", "he", "ar",
57         /* test abbr */
58         "Hani", "Hang","Hebr","Hira",
59         "Knda","Kana","Khmr","Lao",
60         "Latn",/*"Latf","Latg",*/
61         "Mlym", "Mong",
62 
63         /* test names */
64         "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN",
65         "GOTHIC",  "GREEK",  "GUJARATI", "COMMON", "INHERITED",
66         /* test lower case names */
67         "malayalam", "mongolian", "myanmar", "ogham", "old-italic",
68         "oriya",     "runic",     "sinhala", "syriac","tamil",
69         "telugu",    "thaana",    "thai",    "tibetan",
70         /* test the bounds*/
71         "tagb", "arabic",
72         /* test bogus */
73         "asfdasd", "5464", "12235",
74         /* test the last index */
75         "zyyy", "YI",
76          /* test other cases that are ambiguous (script alias vs language tag) */
77          "han", "mro", "nko", "old-hungarian", "new-tai-lue",
78        NULL
79         };
80         UScriptCode expected[] ={
81             /* locales should return */
82             USCRIPT_LATIN, USCRIPT_LATIN, USCRIPT_CYRILLIC, USCRIPT_TAMIL, USCRIPT_TELUGU,
83             USCRIPT_DEVANAGARI, USCRIPT_HEBREW, USCRIPT_ARABIC,
84             /* abbr should return */
85             USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
86             USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
87             USCRIPT_LATIN,/* USCRIPT_LATIN, USCRIPT_LATIN,*/
88             USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
89             /* names should return */
90             USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
91             USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI, USCRIPT_COMMON, USCRIPT_INHERITED,
92             /* lower case names should return */
93             USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN, USCRIPT_MYANMAR, USCRIPT_OGHAM, USCRIPT_OLD_ITALIC,
94             USCRIPT_ORIYA, USCRIPT_RUNIC, USCRIPT_SINHALA, USCRIPT_SYRIAC, USCRIPT_TAMIL,
95             USCRIPT_TELUGU, USCRIPT_THAANA, USCRIPT_THAI, USCRIPT_TIBETAN,
96             /* bounds */
97             USCRIPT_TAGBANWA, USCRIPT_ARABIC,
98             /* bogus names should return invalid code */
99             USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE,
100             /* test the last index */
101             USCRIPT_COMMON, USCRIPT_YI,
102             /* test other cases that are ambiguous (script alias vs language tag) */
103             USCRIPT_HAN, USCRIPT_MRO, USCRIPT_NKO, USCRIPT_OLD_HUNGARIAN, USCRIPT_NEW_TAI_LUE,
104         };
105 
106         UErrorCode err = U_ZERO_ERROR;
107 
108         const int32_t capacity = 10;
109 
110         for( ; testNames[i]!=NULL; i++){
111             UScriptCode script[10]={USCRIPT_INVALID_CODE};
112             uscript_getCode(testNames[i],script,capacity, &err);
113             if( script[0] != expected[i]){
114                    log_data_err("Error getting script code Got: %i  Expected: %i for name %s (Error code does not propagate if data is not present. Are you missing data?)\n",
115                        script[0],expected[i],testNames[i]);
116                    numErrors++;
117             }
118         }
119         if(numErrors >0 ){
120             log_data_err("Errors uchar_getScriptCode() : %i \n",numErrors);
121         }
122     }
123 
124     {
125         UErrorCode err = U_ZERO_ERROR;
126         int32_t capacity=0;
127         int32_t j;
128         UScriptCode jaCode[]={USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
129         UScriptCode script[10]={USCRIPT_INVALID_CODE};
130         int32_t num = uscript_getCode("ja",script,capacity, &err);
131         /* preflight */
132         if(err==U_BUFFER_OVERFLOW_ERROR){
133             err = U_ZERO_ERROR;
134             capacity = 10;
135             num = uscript_getCode("ja",script,capacity, &err);
136             if(num!=UPRV_LENGTHOF(jaCode)){
137                 log_err("Errors uscript_getScriptCode() for Japanese locale: num=%d, expected %d \n",
138                         num, UPRV_LENGTHOF(jaCode));
139             }
140             for(j=0;j<UPRV_LENGTHOF(jaCode);j++) {
141                 if(script[j]!=jaCode[j]) {
142                     log_err("Japanese locale: code #%d was %d (%s) but expected %d (%s)\n", j,
143                             script[j], uscript_getName(script[j]),
144                             jaCode[j], uscript_getName(jaCode[j]));
145 
146                 }
147             }
148         }else{
149             log_data_err("Errors in uscript_getScriptCode() expected error : %s got: %s \n",
150                 "U_BUFFER_OVERFLOW_ERROR",
151                  u_errorName(err));
152         }
153 
154     }
155     {
156         static const UScriptCode LATIN[1] = { USCRIPT_LATIN };
157         static const UScriptCode CYRILLIC[1] = { USCRIPT_CYRILLIC };
158         static const UScriptCode DEVANAGARI[1] = { USCRIPT_DEVANAGARI };
159         static const UScriptCode HAN[1] = { USCRIPT_HAN };
160         static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
161         static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
162         static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
163         UScriptCode scripts[5];
164         UErrorCode err;
165         int32_t num;
166 
167         // Should work regardless of whether we have locale data for the language.
168         err = U_ZERO_ERROR;
169         num = uscript_getCode("tg", scripts, UPRV_LENGTHOF(scripts), &err);
170         assertEqualScripts("tg script: Cyrl", CYRILLIC, 1, scripts, num, err);  // Tajik
171         err = U_ZERO_ERROR;
172         num = uscript_getCode("xsr", scripts, UPRV_LENGTHOF(scripts), &err);
173         assertEqualScripts("xsr script: Deva", DEVANAGARI, 1, scripts, num, err);  // Sherpa
174 
175         // Multi-script languages.
176         err = U_ZERO_ERROR;
177         num = uscript_getCode("ja", scripts, UPRV_LENGTHOF(scripts), &err);
178         assertEqualScripts("ja scripts: Kana Hira Hani",
179                            JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, num, err);
180         err = U_ZERO_ERROR;
181         num = uscript_getCode("ko", scripts, UPRV_LENGTHOF(scripts), &err);
182         assertEqualScripts("ko scripts: Hang Hani",
183                            KOREAN, UPRV_LENGTHOF(KOREAN), scripts, num, err);
184         err = U_ZERO_ERROR;
185         num = uscript_getCode("zh", scripts, UPRV_LENGTHOF(scripts), &err);
186         assertEqualScripts("zh script: Hani", HAN, 1, scripts, num, err);
187         err = U_ZERO_ERROR;
188         num = uscript_getCode("zh-Hant", scripts, UPRV_LENGTHOF(scripts), &err);
189         assertEqualScripts("zh-Hant scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
190         err = U_ZERO_ERROR;
191         num = uscript_getCode("zh-TW", scripts, UPRV_LENGTHOF(scripts), &err);
192         assertEqualScripts("zh-TW scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
193 
194         // Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro).
195         err = U_ZERO_ERROR;
196         num = uscript_getCode("ro-RO", scripts, UPRV_LENGTHOF(scripts), &err);
197         assertEqualScripts("ro-RO script: Latn", LATIN, 1, scripts, num, err);
198     }
199 
200     {
201         UScriptCode testAbbr[]={
202             /* names should return */
203             USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
204             USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI,
205         };
206 
207         const char* expectedNames[]={
208 
209             /* test names */
210             "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian",
211             "Gothic",  "Greek",  "Gujarati",
212              NULL
213         };
214         i=0;
215         while(i<UPRV_LENGTHOF(testAbbr)){
216             const char* name = uscript_getName(testAbbr[i]);
217              if(name == NULL) {
218                log_data_err("Couldn't get script name\n");
219                return;
220              }
221             numErrors=0;
222             if(strcmp(expectedNames[i],name)!=0){
223                 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedNames[i]);
224                 numErrors++;
225             }
226             if(numErrors > 0){
227                 if(numErrors >0 ){
228                     log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
229                 }
230             }
231             i++;
232         }
233 
234     }
235 
236     {
237         UScriptCode testAbbr[]={
238             /* abbr should return */
239             USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
240             USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
241             USCRIPT_LATIN,
242             USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
243         };
244 
245         const char* expectedAbbr[]={
246               /* test abbr */
247             "Hani", "Hang","Hebr","Hira",
248             "Knda","Kana","Khmr","Laoo",
249             "Latn",
250             "Mlym", "Mong",
251              NULL
252         };
253         i=0;
254         while(i<UPRV_LENGTHOF(testAbbr)){
255             const char* name = uscript_getShortName(testAbbr[i]);
256             numErrors=0;
257             if(strcmp(expectedAbbr[i],name)!=0){
258                 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedAbbr[i]);
259                 numErrors++;
260             }
261             if(numErrors > 0){
262                 if(numErrors >0 ){
263                     log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
264                 }
265             }
266             i++;
267         }
268 
269     }
270     /* now test uscript_getScript() API */
271     {
272         uint32_t codepoints[] = {
273                 0x0000FF9D, /* USCRIPT_KATAKANA*/
274                 0x0000FFBE, /* USCRIPT_HANGUL*/
275                 0x0000FFC7, /* USCRIPT_HANGUL*/
276                 0x0000FFCF, /* USCRIPT_HANGUL*/
277                 0x0000FFD7, /* USCRIPT_HANGUL*/
278                 0x0000FFDC, /* USCRIPT_HANGUL*/
279                 0x00010300, /* USCRIPT_OLD_ITALIC*/
280                 0x00010330, /* USCRIPT_GOTHIC*/
281                 0x0001034A, /* USCRIPT_GOTHIC*/
282                 0x00010400, /* USCRIPT_DESERET*/
283                 0x00010428, /* USCRIPT_DESERET*/
284                 0x0001D167, /* USCRIPT_INHERITED*/
285                 0x0001D17B, /* USCRIPT_INHERITED*/
286                 0x0001D185, /* USCRIPT_INHERITED*/
287                 0x0001D1AA, /* USCRIPT_INHERITED*/
288                 0x00020000, /* USCRIPT_HAN*/
289                 0x00000D02, /* USCRIPT_MALAYALAM*/
290                 0x00050005, /* USCRIPT_UNKNOWN (new Zzzz value in Unicode 5.0) */
291                 0x00000000, /* USCRIPT_COMMON*/
292                 0x0001D169, /* USCRIPT_INHERITED*/
293                 0x0001D182, /* USCRIPT_INHERITED*/
294                 0x0001D18B, /* USCRIPT_INHERITED*/
295                 0x0001D1AD, /* USCRIPT_INHERITED*/
296         };
297 
298         UScriptCode expected[] = {
299                 USCRIPT_KATAKANA ,
300                 USCRIPT_HANGUL ,
301                 USCRIPT_HANGUL ,
302                 USCRIPT_HANGUL ,
303                 USCRIPT_HANGUL ,
304                 USCRIPT_HANGUL ,
305                 USCRIPT_OLD_ITALIC,
306                 USCRIPT_GOTHIC ,
307                 USCRIPT_GOTHIC ,
308                 USCRIPT_DESERET ,
309                 USCRIPT_DESERET ,
310                 USCRIPT_INHERITED,
311                 USCRIPT_INHERITED,
312                 USCRIPT_INHERITED,
313                 USCRIPT_INHERITED,
314                 USCRIPT_HAN ,
315                 USCRIPT_MALAYALAM,
316                 USCRIPT_UNKNOWN,
317                 USCRIPT_COMMON,
318                 USCRIPT_INHERITED ,
319                 USCRIPT_INHERITED ,
320                 USCRIPT_INHERITED ,
321                 USCRIPT_INHERITED ,
322         };
323         UScriptCode code = USCRIPT_INVALID_CODE;
324         UErrorCode status = U_ZERO_ERROR;
325         UBool passed = true;
326 
327         for(i=0; i<UPRV_LENGTHOF(codepoints); ++i){
328             code = uscript_getScript(codepoints[i],&status);
329             if(U_SUCCESS(status)){
330                 if( code != expected[i] ||
331                     code != (UScriptCode)u_getIntPropertyValue(codepoints[i], UCHAR_SCRIPT)
332                 ) {
333                     log_err("uscript_getScript for codepoint \\U%08X failed\n",codepoints[i]);
334                     passed = false;
335                 }
336             }else{
337                 log_err("uscript_getScript for codepoint \\U%08X failed. Error: %s\n",
338                          codepoints[i],u_errorName(status));
339                 break;
340             }
341         }
342 
343         if(passed==false){
344            log_err("uscript_getScript failed.\n");
345         }
346     }
347     {
348         UScriptCode code= USCRIPT_INVALID_CODE;
349         UErrorCode  status = U_ZERO_ERROR;
350         code = uscript_getScript(0x001D169,&status);
351         if(code != USCRIPT_INHERITED){
352             log_err("\\U001D169 is not contained in USCRIPT_INHERITED");
353         }
354     }
355     {
356         UScriptCode code= USCRIPT_INVALID_CODE;
357         UErrorCode  status = U_ZERO_ERROR;
358         int32_t err = 0;
359 
360         for(i = 0; i<=0x10ffff; i++){
361             code =  uscript_getScript(i,&status);
362             if(code == USCRIPT_INVALID_CODE){
363                 err++;
364                 log_err("uscript_getScript for codepoint \\U%08X failed.\n", i);
365             }
366         }
367         if(err>0){
368             log_err("uscript_getScript failed for %d codepoints\n", err);
369         }
370     }
371     {
372         for(i=0; (UScriptCode)i< USCRIPT_CODE_LIMIT; i++){
373             const char* name = uscript_getName((UScriptCode)i);
374             if(name==NULL || strcmp(name,"")==0){
375                 log_err("uscript_getName failed for code %i: name is NULL or \"\"\n",i);
376             }
377         }
378     }
379 
380     {
381         /*
382          * These script codes were originally added to ICU pre-3.6, so that ICU would
383          * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
384          * These script codes were added with only short names because we don't
385          * want to invent long names ourselves.
386          * Unicode 5 and later encode some of these scripts and give them long names.
387          * Whenever this happens, the long script names here need to be updated.
388          */
389         static const char* expectedLong[] = {
390             "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs",
391             "Egyd", "Egyh", "Egyptian_Hieroglyphs",
392             "Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds",
393             "Javanese", "Kayah_Li", "Latf", "Latg",
394             "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs",
395             "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician",
396             "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
397             "Zxxx", "Unknown",
398             "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese",
399             "Moon", "Meetei_Mayek",
400             /* new in ICU 4.0 */
401             "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
402             "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv",
403             "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
404             "Zmth", "Zsym",
405             /* new in ICU 4.4 */
406             "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
407             /* new in ICU 4.6 */
408             "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel",
409             "Loma", "Mende_Kikakui", "Meroitic_Cursive",
410             "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi",
411             /* new in ICU 4.8 */
412             "Afak", "Jurc", "Mro", "Nushu", "Sharada", "Sora_Sompeng", "Takri", "Tangut", "Wole",
413             /* new in ICU 49 */
414             "Anatolian_Hieroglyphs", "Khojki", "Tirhuta",
415             /* new in ICU 52 */
416             "Caucasian_Albanian", "Mahajani",
417             /* new in ICU 54 */
418             "Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham",
419             // new in ICU 58
420             "Adlam", "Bhaiksuki", "Marchen", "Newa", "Osage", "Hanb", "Jamo", "Zsye",
421             // new in ICU 60
422             "Masaram_Gondi", "Soyombo", "Zanabazar_Square",
423             // new in ICU 61
424             "Dogra", "Gunjala_Gondi", "Makasar", "Medefaidrin",
425             "Hanifi_Rohingya", "Sogdian", "Old_Sogdian",
426             // new in ICU 64
427             "Elymaic", "Nyiakeng_Puachue_Hmong", "Nandinagari", "Wancho",
428             // new in ICU 66
429             "Chorasmian", "Dives_Akuru", "Khitan_Small_Script", "Yezidi",
430             // new in ICU 70
431             "Cypro_Minoan", "Old_Uyghur", "Tangsa", "Toto", "Vithkuqi",
432             // new in ICU 72
433             "Kawi", "Nag_Mundari",
434             // new in ICU 75
435             "Aran",
436         };
437         static const char* expectedShort[] = {
438             "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
439             "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
440             "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
441             "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
442             "Zxxx", "Zzzz",
443             "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
444             "Moon", "Mtei",
445             /* new in ICU 4.0 */
446             "Armi", "Avst", "Cakm", "Kore",
447             "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
448             "Zmth", "Zsym",
449             /* new in ICU 4.4 */
450             "Bamu", "Lisu", "Nkgb", "Sarb",
451             /* new in ICU 4.6 */
452             "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
453             "Narb", "Nbat", "Palm", "Sind", "Wara",
454             /* new in ICU 4.8 */
455             "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
456             /* new in ICU 49 */
457             "Hluw", "Khoj", "Tirh",
458             /* new in ICU 52 */
459             "Aghb", "Mahj",
460             /* new in ICU 54 */
461             "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd",
462             // new in ICU 58
463             "Adlm", "Bhks", "Marc", "Newa", "Osge", "Hanb", "Jamo", "Zsye",
464             // new in ICU 60
465             "Gonm", "Soyo", "Zanb",
466             // new in ICU 61
467             "Dogr", "Gong", "Maka", "Medf", "Rohg", "Sogd", "Sogo",
468             // new in ICU 64
469             "Elym", "Hmnp", "Nand", "Wcho",
470             // new in ICU 66
471             "Chrs", "Diak", "Kits", "Yezi",
472             // new in ICU 70
473             "Cpmn", "Ougr", "Tnsa", "Toto", "Vith",
474             // new in ICU 72
475             "Kawi", "Nagm",
476             // new in ICU 75
477             "Aran",
478         };
479         int32_t j = 0;
480         if(UPRV_LENGTHOF(expectedLong)!=(USCRIPT_CODE_LIMIT-USCRIPT_BALINESE)) {
481             log_err("need to add new script codes in cucdapi.c!\n");
482             return;
483         }
484         for(i=USCRIPT_BALINESE; (UScriptCode)i<USCRIPT_CODE_LIMIT; i++, j++){
485             const char* name = uscript_getName((UScriptCode)i);
486             if(name==NULL || strcmp(name,expectedLong[j])!=0){
487                 log_err("uscript_getName failed for code %i: %s!=%s\n", i, name, expectedLong[j]);
488             }
489             name = uscript_getShortName((UScriptCode)i);
490             if(name==NULL || strcmp(name,expectedShort[j])!=0){
491                 log_err("uscript_getShortName failed for code %i: %s!=%s\n", i, name, expectedShort[j]);
492             }
493         }
494         for(i=0; i<UPRV_LENGTHOF(expectedLong); i++){
495             UScriptCode fillIn[5] = {USCRIPT_INVALID_CODE};
496             UErrorCode status = U_ZERO_ERROR;
497             int32_t len = 0;
498             len = uscript_getCode(expectedShort[i], fillIn, UPRV_LENGTHOF(fillIn), &status);
499             if(U_FAILURE(status)){
500                 log_err("uscript_getCode failed for script name %s. Error: %s\n",expectedShort[i], u_errorName(status));
501             }
502             if(len>1){
503                 log_err("uscript_getCode did not return expected number of codes for script %s. EXPECTED: 1 GOT: %i\n", expectedShort[i], len);
504             }
505             if(fillIn[0]!= (UScriptCode)(USCRIPT_BALINESE+i)){
506                 log_err("uscript_getCode did not return expected code for script %s. EXPECTED: %i GOT: %i\n", expectedShort[i], (USCRIPT_BALINESE+i), fillIn[0] );
507             }
508         }
509     }
510 
511     {
512         /* test characters which have Script_Extensions */
513         UErrorCode errorCode=U_ZERO_ERROR;
514         if(!(
515                 USCRIPT_COMMON==uscript_getScript(0x0640, &errorCode) &&
516                 USCRIPT_INHERITED==uscript_getScript(0x0650, &errorCode) &&
517                 USCRIPT_ARABIC==uscript_getScript(0xfdf2, &errorCode)) ||
518             U_FAILURE(errorCode)
519         ) {
520             log_err("uscript_getScript(character with Script_Extensions) failed\n");
521         }
522     }
523 }
524 
TestHasScript(void)525 void TestHasScript(void) {
526     if(!(
527         !uscript_hasScript(0x063f, USCRIPT_COMMON) &&
528         uscript_hasScript(0x063f, USCRIPT_ARABIC) &&  /* main Script value */
529         !uscript_hasScript(0x063f, USCRIPT_SYRIAC) &&
530         !uscript_hasScript(0x063f, USCRIPT_THAANA))
531     ) {
532         log_err("uscript_hasScript(U+063F, ...) is wrong\n");
533     }
534     if(!(
535         !uscript_hasScript(0x0640, USCRIPT_COMMON) &&  /* main Script value */
536         uscript_hasScript(0x0640, USCRIPT_ARABIC) &&
537         uscript_hasScript(0x0640, USCRIPT_SYRIAC) &&
538         !uscript_hasScript(0x0640, USCRIPT_THAANA))
539     ) {
540         log_err("uscript_hasScript(U+0640, ...) is wrong\n");
541     }
542     if(!(
543         !uscript_hasScript(0x0650, USCRIPT_INHERITED) &&  /* main Script value */
544         uscript_hasScript(0x0650, USCRIPT_ARABIC) &&
545         uscript_hasScript(0x0650, USCRIPT_SYRIAC) &&
546         !uscript_hasScript(0x0650, USCRIPT_THAANA))
547     ) {
548         log_err("uscript_hasScript(U+0650, ...) is wrong\n");
549     }
550     if(!(
551         !uscript_hasScript(0x0660, USCRIPT_COMMON) &&  /* main Script value */
552         uscript_hasScript(0x0660, USCRIPT_ARABIC) &&
553         !uscript_hasScript(0x0660, USCRIPT_SYRIAC) &&
554         uscript_hasScript(0x0660, USCRIPT_THAANA))
555     ) {
556         log_err("uscript_hasScript(U+0660, ...) is wrong\n");
557     }
558     if(!(
559         !uscript_hasScript(0xfdf2, USCRIPT_COMMON) &&
560         uscript_hasScript(0xfdf2, USCRIPT_ARABIC) &&  /* main Script value */
561         !uscript_hasScript(0xfdf2, USCRIPT_SYRIAC) &&
562         uscript_hasScript(0xfdf2, USCRIPT_THAANA))
563     ) {
564         log_err("uscript_hasScript(U+FDF2, ...) is wrong\n");
565     }
566     if(uscript_hasScript(0x0640, 0xaffe)) {
567         /* An unguarded implementation might go into an infinite loop. */
568         log_err("uscript_hasScript(U+0640, bogus 0xaffe) is wrong\n");
569     }
570 }
571 
scriptsContain(UScriptCode scripts[],int32_t length,UScriptCode script)572 static UBool scriptsContain(UScriptCode scripts[], int32_t length, UScriptCode script) {
573     UBool contain=false;
574     int32_t prev=-1, i;
575     for(i=0; i<length; ++i) {
576         int32_t s=scripts[i];
577         if(s<=prev) {
578             log_err("uscript_getScriptExtensions() not in sorted order: %d %d\n", (int)prev, (int)s);
579         }
580         if(s==script) { contain=true; }
581     }
582     return contain;
583 }
584 
TestGetScriptExtensions(void)585 void TestGetScriptExtensions(void) {
586     UScriptCode scripts[20];
587     int32_t length;
588     UErrorCode errorCode;
589 
590     /* errors and overflows */
591     errorCode=U_PARSE_ERROR;
592     length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
593     if(errorCode!=U_PARSE_ERROR) {
594         log_err("uscript_getScriptExtensions(U+0640, U_PARSE_ERROR) did not preserve the UErrorCode - %s\n",
595               u_errorName(errorCode));
596     }
597     errorCode=U_ZERO_ERROR;
598     length=uscript_getScriptExtensions(0x0640, NULL, UPRV_LENGTHOF(scripts), &errorCode);
599     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
600         log_err("uscript_getScriptExtensions(U+0640, NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
601               u_errorName(errorCode));
602     }
603     errorCode=U_ZERO_ERROR;
604     length=uscript_getScriptExtensions(0x0640, scripts, -1, &errorCode);
605     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
606         log_err("uscript_getScriptExtensions(U+0640, capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
607               u_errorName(errorCode));
608     }
609     errorCode=U_ZERO_ERROR;
610     length=uscript_getScriptExtensions(0x0640, scripts, 0, &errorCode);
611     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
612         log_err("uscript_getScriptExtensions(U+0640, capacity=0: pure preflighting)=%d < 3 - %s\n",
613               (int)length, u_errorName(errorCode));
614     }
615     errorCode=U_ZERO_ERROR;
616     length=uscript_getScriptExtensions(0x0640, scripts, 1, &errorCode);
617     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
618         log_err("uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d < 3 - %s\n",
619               (int)length, u_errorName(errorCode));
620     }
621     /* U+063F has only a Script code, no Script_Extensions. */
622     errorCode=U_ZERO_ERROR;
623     length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode);
624     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=1) {
625         log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s\n",
626               (int)length, u_errorName(errorCode));
627     }
628 
629     /* invalid code points */
630     errorCode=U_ZERO_ERROR;
631     length=uscript_getScriptExtensions(-1, scripts, UPRV_LENGTHOF(scripts), &errorCode);
632     if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
633         log_err("uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s\n",
634               (int)length, u_errorName(errorCode));
635     }
636     errorCode=U_ZERO_ERROR;
637     length=uscript_getScriptExtensions(0x110000, scripts, UPRV_LENGTHOF(scripts), &errorCode);
638     if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
639         log_err("uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s\n",
640               (int)length, u_errorName(errorCode));
641     }
642 
643     /* normal usage */
644     errorCode=U_ZERO_ERROR;
645     length=uscript_getScriptExtensions(0x063f, scripts, 1, &errorCode);
646     if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_ARABIC) {
647         log_err("uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s\n",
648               (int)length, u_errorName(errorCode));
649     }
650     errorCode=U_ZERO_ERROR;
651     length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
652     if(U_FAILURE(errorCode) || length<3 ||
653             !scriptsContain(scripts, length, USCRIPT_ARABIC) ||
654             !scriptsContain(scripts, length, USCRIPT_SYRIAC) ||
655             !scriptsContain(scripts, length, USCRIPT_MANDAIC)) {
656         log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n",
657               (int)length, u_errorName(errorCode));
658     }
659     errorCode=U_ZERO_ERROR;
660     length=uscript_getScriptExtensions(0xfdf2, scripts, UPRV_LENGTHOF(scripts), &errorCode);
661     if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_THAANA) {
662         log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n",
663               (int)length, u_errorName(errorCode));
664     }
665     errorCode=U_ZERO_ERROR;
666     length=uscript_getScriptExtensions(0xff65, scripts, UPRV_LENGTHOF(scripts), &errorCode);
667     if(U_FAILURE(errorCode) || length!=6 || scripts[0]!=USCRIPT_BOPOMOFO || scripts[5]!=USCRIPT_YI) {
668         log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n",
669               (int)length, u_errorName(errorCode));
670     }
671 }
672 
TestScriptMetadataAPI(void)673 void TestScriptMetadataAPI(void) {
674     /* API & code coverage. More testing in intltest/ucdtest.cpp. */
675     UErrorCode errorCode=U_ZERO_ERROR;
676     UChar sample[8];
677 
678     if(uscript_getSampleString(USCRIPT_LATIN, sample, UPRV_LENGTHOF(sample), &errorCode)!=1 ||
679             U_FAILURE(errorCode) ||
680             uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN ||
681             sample[1]!=0) {
682         log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode));
683     }
684     sample[0]=0xfffe;
685     if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 ||
686             errorCode!=U_BUFFER_OVERFLOW_ERROR ||
687             sample[0]!=0xfffe) {
688         log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode));
689     }
690     errorCode=U_ZERO_ERROR;
691     if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, UPRV_LENGTHOF(sample), &errorCode)!=0 ||
692             U_FAILURE(errorCode) ||
693             sample[0]!=0) {
694         log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode));
695     }
696     sample[0]=0xfffe;
697     if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 ||
698             errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
699             sample[0]!=0xfffe) {
700         log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode));
701     }
702 
703     if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED ||
704             // Unicode 10 gives up on "aspirational".
705             uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_LIMITED_USE ||
706             uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE ||
707             uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED ||
708             uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED ||
709             uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED ||
710             uscript_getUsage(USCRIPT_CODE_LIMIT)!=USCRIPT_USAGE_NOT_ENCODED) {
711         log_err("uscript_getUsage() failed\n");
712     }
713 
714     if(uscript_isRightToLeft(USCRIPT_LATIN) ||
715             uscript_isRightToLeft(USCRIPT_CIRTH) ||
716             !uscript_isRightToLeft(USCRIPT_ARABIC) ||
717             !uscript_isRightToLeft(USCRIPT_HEBREW)) {
718         log_err("uscript_isRightToLeft() failed\n");
719     }
720 
721     if(uscript_breaksBetweenLetters(USCRIPT_LATIN) ||
722             uscript_breaksBetweenLetters(USCRIPT_CIRTH) ||
723             !uscript_breaksBetweenLetters(USCRIPT_HAN) ||
724             !uscript_breaksBetweenLetters(USCRIPT_THAI)) {
725         log_err("uscript_breaksBetweenLetters() failed\n");
726     }
727 
728     if(uscript_isCased(USCRIPT_CIRTH) ||
729             uscript_isCased(USCRIPT_HAN) ||
730             !uscript_isCased(USCRIPT_LATIN) ||
731             !uscript_isCased(USCRIPT_GREEK)) {
732         log_err("uscript_isCased() failed\n");
733     }
734 }
735 
TestBinaryValues(void)736 void TestBinaryValues(void) {
737     /*
738      * Unicode 5.1 explicitly defines binary property value aliases.
739      * Verify that they are all recognized.
740      */
741     static const char *const falseValues[]={ "N", "No", "F", "False" };
742     static const char *const trueValues[]={ "Y", "Yes", "T", "True" };
743     int32_t i;
744     for(i=0; i<UPRV_LENGTHOF(falseValues); ++i) {
745         if(false!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, falseValues[i])) {
746             log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=false (Are you missing data?)\n", falseValues[i]);
747         }
748     }
749     for(i=0; i<UPRV_LENGTHOF(trueValues); ++i) {
750         if(true!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, trueValues[i])) {
751             log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=true (Are you missing data?)\n", trueValues[i]);
752         }
753     }
754 }
755