• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * Copyright (c) 1997-2016, International Business Machines
5  * Corporation and others. All Rights Reserved.
6  ********************************************************************/
7 
8 #include <stdbool.h>
9 #include <string.h>
10 #include "unicode/utypes.h"
11 #include "unicode/uscript.h"
12 #include "unicode/uchar.h"
13 #include "cintltst.h"
14 #include "cucdapi.h"
15 #include "cmemory.h"
16 
scriptsToString(const UScriptCode scripts[],int32_t length,char s[])17 static void scriptsToString(const UScriptCode scripts[], int32_t length, char s[]) {
18     int32_t i;
19     if(length == 0) {
20         strcpy(s, "(no scripts)");
21         return;
22     }
23     s[0] = 0;
24     for(i = 0; i < length; ++i) {
25         if(i > 0) {
26             strcat(s, " ");
27         }
28         strcat(s, uscript_getShortName(scripts[i]));
29     }
30 }
31 
assertEqualScripts(const char * msg,const UScriptCode scripts1[],int32_t length1,const UScriptCode scripts2[],int32_t length2,UErrorCode errorCode)32 static void assertEqualScripts(const char *msg,
33                                const UScriptCode scripts1[], int32_t length1,
34                                const UScriptCode scripts2[], int32_t length2,
35                                UErrorCode errorCode) {
36     char s1[80];
37     char s2[80];
38     if(U_FAILURE(errorCode)) {
39         log_err("Failed: %s - %s\n", msg, u_errorName(errorCode));
40         return;
41     }
42     scriptsToString(scripts1, length1, s1);
43     scriptsToString(scripts2, length2, s2);
44     if(0!=strcmp(s1, s2)) {
45         log_data_err("Failed: %s: expected %s but got %s\n", msg, s1, s2);
46     }
47 }
48 
TestUScriptCodeAPI()49 void TestUScriptCodeAPI(){
50     int i =0;
51     int numErrors =0;
52     {
53         const char* testNames[]={
54         /* test locale */
55         "en", "en_US", "sr", "ta" , "te_IN",
56         "hi", "he", "ar",
57         /* test abbr */
58         "Hani", "Hang","Hebr","Hira",
59         "Knda","Kana","Khmr","Lao",
60         "Latn",/*"Latf","Latg",*/
61         "Mlym", "Mong",
62 
63         /* test names */
64         "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN",
65         "GOTHIC",  "GREEK",  "GUJARATI", "COMMON", "INHERITED",
66         /* test lower case names */
67         "malayalam", "mongolian", "myanmar", "ogham", "old-italic",
68         "oriya",     "runic",     "sinhala", "syriac","tamil",
69         "telugu",    "thaana",    "thai",    "tibetan",
70         /* test the bounds*/
71         "tagb", "arabic",
72         /* test bogus */
73         "asfdasd", "5464", "12235",
74         /* test the last index */
75         "zyyy", "YI",
76         NULL
77         };
78         UScriptCode expected[] ={
79             /* locales should return */
80             USCRIPT_LATIN, USCRIPT_LATIN, USCRIPT_CYRILLIC, USCRIPT_TAMIL, USCRIPT_TELUGU,
81             USCRIPT_DEVANAGARI, USCRIPT_HEBREW, USCRIPT_ARABIC,
82             /* abbr should return */
83             USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
84             USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
85             USCRIPT_LATIN,/* USCRIPT_LATIN, USCRIPT_LATIN,*/
86             USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
87             /* names should return */
88             USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
89             USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI, USCRIPT_COMMON, USCRIPT_INHERITED,
90             /* lower case names should return */
91             USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN, USCRIPT_MYANMAR, USCRIPT_OGHAM, USCRIPT_OLD_ITALIC,
92             USCRIPT_ORIYA, USCRIPT_RUNIC, USCRIPT_SINHALA, USCRIPT_SYRIAC, USCRIPT_TAMIL,
93             USCRIPT_TELUGU, USCRIPT_THAANA, USCRIPT_THAI, USCRIPT_TIBETAN,
94             /* bounds */
95             USCRIPT_TAGBANWA, USCRIPT_ARABIC,
96             /* bogus names should return invalid code */
97             USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE,
98             USCRIPT_COMMON, USCRIPT_YI,
99         };
100 
101         UErrorCode err = U_ZERO_ERROR;
102 
103         const int32_t capacity = 10;
104 
105         for( ; testNames[i]!=NULL; i++){
106             UScriptCode script[10]={USCRIPT_INVALID_CODE};
107             uscript_getCode(testNames[i],script,capacity, &err);
108             if( script[0] != expected[i]){
109                    log_data_err("Error getting script code Got: %i  Expected: %i for name %s (Error code does not propagate if data is not present. Are you missing data?)\n",
110                        script[0],expected[i],testNames[i]);
111                    numErrors++;
112             }
113         }
114         if(numErrors >0 ){
115             log_data_err("Errors uchar_getScriptCode() : %i \n",numErrors);
116         }
117     }
118 
119     {
120         UErrorCode err = U_ZERO_ERROR;
121         int32_t capacity=0;
122         int32_t j;
123         UScriptCode jaCode[]={USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
124         UScriptCode script[10]={USCRIPT_INVALID_CODE};
125         int32_t num = uscript_getCode("ja",script,capacity, &err);
126         /* preflight */
127         if(err==U_BUFFER_OVERFLOW_ERROR){
128             err = U_ZERO_ERROR;
129             capacity = 10;
130             num = uscript_getCode("ja",script,capacity, &err);
131             if(num!=UPRV_LENGTHOF(jaCode)){
132                 log_err("Errors uscript_getScriptCode() for Japanese locale: num=%d, expected %d \n",
133                         num, UPRV_LENGTHOF(jaCode));
134             }
135             for(j=0;j<UPRV_LENGTHOF(jaCode);j++) {
136                 if(script[j]!=jaCode[j]) {
137                     log_err("Japanese locale: code #%d was %d (%s) but expected %d (%s)\n", j,
138                             script[j], uscript_getName(script[j]),
139                             jaCode[j], uscript_getName(jaCode[j]));
140 
141                 }
142             }
143         }else{
144             log_data_err("Errors in uscript_getScriptCode() expected error : %s got: %s \n",
145                 "U_BUFFER_OVERFLOW_ERROR",
146                  u_errorName(err));
147         }
148 
149     }
150     {
151         static const UScriptCode LATIN[1] = { USCRIPT_LATIN };
152         static const UScriptCode CYRILLIC[1] = { USCRIPT_CYRILLIC };
153         static const UScriptCode DEVANAGARI[1] = { USCRIPT_DEVANAGARI };
154         static const UScriptCode HAN[1] = { USCRIPT_HAN };
155         static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
156         static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
157         static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
158         UScriptCode scripts[5];
159         UErrorCode err;
160         int32_t num;
161 
162         // Should work regardless of whether we have locale data for the language.
163         err = U_ZERO_ERROR;
164         num = uscript_getCode("tg", scripts, UPRV_LENGTHOF(scripts), &err);
165         assertEqualScripts("tg script: Cyrl", CYRILLIC, 1, scripts, num, err);  // Tajik
166         err = U_ZERO_ERROR;
167         num = uscript_getCode("xsr", scripts, UPRV_LENGTHOF(scripts), &err);
168         assertEqualScripts("xsr script: Deva", DEVANAGARI, 1, scripts, num, err);  // Sherpa
169 
170         // Multi-script languages.
171         err = U_ZERO_ERROR;
172         num = uscript_getCode("ja", scripts, UPRV_LENGTHOF(scripts), &err);
173         assertEqualScripts("ja scripts: Kana Hira Hani",
174                            JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, num, err);
175         err = U_ZERO_ERROR;
176         num = uscript_getCode("ko", scripts, UPRV_LENGTHOF(scripts), &err);
177         assertEqualScripts("ko scripts: Hang Hani",
178                            KOREAN, UPRV_LENGTHOF(KOREAN), scripts, num, err);
179         err = U_ZERO_ERROR;
180         num = uscript_getCode("zh", scripts, UPRV_LENGTHOF(scripts), &err);
181         assertEqualScripts("zh script: Hani", HAN, 1, scripts, num, err);
182         err = U_ZERO_ERROR;
183         num = uscript_getCode("zh-Hant", scripts, UPRV_LENGTHOF(scripts), &err);
184         assertEqualScripts("zh-Hant scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
185         err = U_ZERO_ERROR;
186         num = uscript_getCode("zh-TW", scripts, UPRV_LENGTHOF(scripts), &err);
187         assertEqualScripts("zh-TW scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
188 
189         // Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro).
190         err = U_ZERO_ERROR;
191         num = uscript_getCode("ro-RO", scripts, UPRV_LENGTHOF(scripts), &err);
192         assertEqualScripts("ro-RO script: Latn", LATIN, 1, scripts, num, err);
193     }
194 
195     {
196         UScriptCode testAbbr[]={
197             /* names should return */
198             USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
199             USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI,
200         };
201 
202         const char* expectedNames[]={
203 
204             /* test names */
205             "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian",
206             "Gothic",  "Greek",  "Gujarati",
207              NULL
208         };
209         i=0;
210         while(i<UPRV_LENGTHOF(testAbbr)){
211             const char* name = uscript_getName(testAbbr[i]);
212              if(name == NULL) {
213                log_data_err("Couldn't get script name\n");
214                return;
215              }
216             numErrors=0;
217             if(strcmp(expectedNames[i],name)!=0){
218                 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedNames[i]);
219                 numErrors++;
220             }
221             if(numErrors > 0){
222                 if(numErrors >0 ){
223                     log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
224                 }
225             }
226             i++;
227         }
228 
229     }
230 
231     {
232         UScriptCode testAbbr[]={
233             /* abbr should return */
234             USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
235             USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
236             USCRIPT_LATIN,
237             USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
238         };
239 
240         const char* expectedAbbr[]={
241               /* test abbr */
242             "Hani", "Hang","Hebr","Hira",
243             "Knda","Kana","Khmr","Laoo",
244             "Latn",
245             "Mlym", "Mong",
246              NULL
247         };
248         i=0;
249         while(i<UPRV_LENGTHOF(testAbbr)){
250             const char* name = uscript_getShortName(testAbbr[i]);
251             numErrors=0;
252             if(strcmp(expectedAbbr[i],name)!=0){
253                 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedAbbr[i]);
254                 numErrors++;
255             }
256             if(numErrors > 0){
257                 if(numErrors >0 ){
258                     log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
259                 }
260             }
261             i++;
262         }
263 
264     }
265     /* now test uscript_getScript() API */
266     {
267         uint32_t codepoints[] = {
268                 0x0000FF9D, /* USCRIPT_KATAKANA*/
269                 0x0000FFBE, /* USCRIPT_HANGUL*/
270                 0x0000FFC7, /* USCRIPT_HANGUL*/
271                 0x0000FFCF, /* USCRIPT_HANGUL*/
272                 0x0000FFD7, /* USCRIPT_HANGUL*/
273                 0x0000FFDC, /* USCRIPT_HANGUL*/
274                 0x00010300, /* USCRIPT_OLD_ITALIC*/
275                 0x00010330, /* USCRIPT_GOTHIC*/
276                 0x0001034A, /* USCRIPT_GOTHIC*/
277                 0x00010400, /* USCRIPT_DESERET*/
278                 0x00010428, /* USCRIPT_DESERET*/
279                 0x0001D167, /* USCRIPT_INHERITED*/
280                 0x0001D17B, /* USCRIPT_INHERITED*/
281                 0x0001D185, /* USCRIPT_INHERITED*/
282                 0x0001D1AA, /* USCRIPT_INHERITED*/
283                 0x00020000, /* USCRIPT_HAN*/
284                 0x00000D02, /* USCRIPT_MALAYALAM*/
285                 0x00050005, /* USCRIPT_UNKNOWN (new Zzzz value in Unicode 5.0) */
286                 0x00000000, /* USCRIPT_COMMON*/
287                 0x0001D169, /* USCRIPT_INHERITED*/
288                 0x0001D182, /* USCRIPT_INHERITED*/
289                 0x0001D18B, /* USCRIPT_INHERITED*/
290                 0x0001D1AD, /* USCRIPT_INHERITED*/
291         };
292 
293         UScriptCode expected[] = {
294                 USCRIPT_KATAKANA ,
295                 USCRIPT_HANGUL ,
296                 USCRIPT_HANGUL ,
297                 USCRIPT_HANGUL ,
298                 USCRIPT_HANGUL ,
299                 USCRIPT_HANGUL ,
300                 USCRIPT_OLD_ITALIC,
301                 USCRIPT_GOTHIC ,
302                 USCRIPT_GOTHIC ,
303                 USCRIPT_DESERET ,
304                 USCRIPT_DESERET ,
305                 USCRIPT_INHERITED,
306                 USCRIPT_INHERITED,
307                 USCRIPT_INHERITED,
308                 USCRIPT_INHERITED,
309                 USCRIPT_HAN ,
310                 USCRIPT_MALAYALAM,
311                 USCRIPT_UNKNOWN,
312                 USCRIPT_COMMON,
313                 USCRIPT_INHERITED ,
314                 USCRIPT_INHERITED ,
315                 USCRIPT_INHERITED ,
316                 USCRIPT_INHERITED ,
317         };
318         UScriptCode code = USCRIPT_INVALID_CODE;
319         UErrorCode status = U_ZERO_ERROR;
320         UBool passed = true;
321 
322         for(i=0; i<UPRV_LENGTHOF(codepoints); ++i){
323             code = uscript_getScript(codepoints[i],&status);
324             if(U_SUCCESS(status)){
325                 if( code != expected[i] ||
326                     code != (UScriptCode)u_getIntPropertyValue(codepoints[i], UCHAR_SCRIPT)
327                 ) {
328                     log_err("uscript_getScript for codepoint \\U%08X failed\n",codepoints[i]);
329                     passed = false;
330                 }
331             }else{
332                 log_err("uscript_getScript for codepoint \\U%08X failed. Error: %s\n",
333                          codepoints[i],u_errorName(status));
334                 break;
335             }
336         }
337 
338         if(passed==false){
339            log_err("uscript_getScript failed.\n");
340         }
341     }
342     {
343         UScriptCode code= USCRIPT_INVALID_CODE;
344         UErrorCode  status = U_ZERO_ERROR;
345         code = uscript_getScript(0x001D169,&status);
346         if(code != USCRIPT_INHERITED){
347             log_err("\\U001D169 is not contained in USCRIPT_INHERITED");
348         }
349     }
350     {
351         UScriptCode code= USCRIPT_INVALID_CODE;
352         UErrorCode  status = U_ZERO_ERROR;
353         int32_t err = 0;
354 
355         for(i = 0; i<=0x10ffff; i++){
356             code =  uscript_getScript(i,&status);
357             if(code == USCRIPT_INVALID_CODE){
358                 err++;
359                 log_err("uscript_getScript for codepoint \\U%08X failed.\n", i);
360             }
361         }
362         if(err>0){
363             log_err("uscript_getScript failed for %d codepoints\n", err);
364         }
365     }
366     {
367         for(i=0; (UScriptCode)i< USCRIPT_CODE_LIMIT; i++){
368             const char* name = uscript_getName((UScriptCode)i);
369             if(name==NULL || strcmp(name,"")==0){
370                 log_err("uscript_getName failed for code %i: name is NULL or \"\"\n",i);
371             }
372         }
373     }
374 
375     {
376         /*
377          * These script codes were originally added to ICU pre-3.6, so that ICU would
378          * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
379          * These script codes were added with only short names because we don't
380          * want to invent long names ourselves.
381          * Unicode 5 and later encode some of these scripts and give them long names.
382          * Whenever this happens, the long script names here need to be updated.
383          */
384         static const char* expectedLong[] = {
385             "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs",
386             "Egyd", "Egyh", "Egyptian_Hieroglyphs",
387             "Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds",
388             "Javanese", "Kayah_Li", "Latf", "Latg",
389             "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs",
390             "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician",
391             "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
392             "Zxxx", "Unknown",
393             "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese",
394             "Moon", "Meetei_Mayek",
395             /* new in ICU 4.0 */
396             "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
397             "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv",
398             "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
399             "Zmth", "Zsym",
400             /* new in ICU 4.4 */
401             "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
402             /* new in ICU 4.6 */
403             "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel",
404             "Loma", "Mende_Kikakui", "Meroitic_Cursive",
405             "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi",
406             /* new in ICU 4.8 */
407             "Afak", "Jurc", "Mro", "Nushu", "Sharada", "Sora_Sompeng", "Takri", "Tangut", "Wole",
408             /* new in ICU 49 */
409             "Anatolian_Hieroglyphs", "Khojki", "Tirhuta",
410             /* new in ICU 52 */
411             "Caucasian_Albanian", "Mahajani",
412             /* new in ICU 54 */
413             "Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham",
414             // new in ICU 58
415             "Adlam", "Bhaiksuki", "Marchen", "Newa", "Osage", "Hanb", "Jamo", "Zsye",
416             // new in ICU 60
417             "Masaram_Gondi", "Soyombo", "Zanabazar_Square",
418             // new in ICU 61
419             "Dogra", "Gunjala_Gondi", "Makasar", "Medefaidrin",
420             "Hanifi_Rohingya", "Sogdian", "Old_Sogdian",
421             // new in ICU 64
422             "Elymaic", "Nyiakeng_Puachue_Hmong", "Nandinagari", "Wancho",
423             // new in ICU 66
424             "Chorasmian", "Dives_Akuru", "Khitan_Small_Script", "Yezidi",
425             // new in ICU 70
426             "Cypro_Minoan", "Old_Uyghur", "Tangsa", "Toto", "Vithkuqi",
427             // new in ICU 72
428             "Kawi", "Nag_Mundari",
429         };
430         static const char* expectedShort[] = {
431             "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
432             "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
433             "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
434             "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
435             "Zxxx", "Zzzz",
436             "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
437             "Moon", "Mtei",
438             /* new in ICU 4.0 */
439             "Armi", "Avst", "Cakm", "Kore",
440             "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
441             "Zmth", "Zsym",
442             /* new in ICU 4.4 */
443             "Bamu", "Lisu", "Nkgb", "Sarb",
444             /* new in ICU 4.6 */
445             "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
446             "Narb", "Nbat", "Palm", "Sind", "Wara",
447             /* new in ICU 4.8 */
448             "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
449             /* new in ICU 49 */
450             "Hluw", "Khoj", "Tirh",
451             /* new in ICU 52 */
452             "Aghb", "Mahj",
453             /* new in ICU 54 */
454             "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd",
455             // new in ICU 58
456             "Adlm", "Bhks", "Marc", "Newa", "Osge", "Hanb", "Jamo", "Zsye",
457             // new in ICU 60
458             "Gonm", "Soyo", "Zanb",
459             // new in ICU 61
460             "Dogr", "Gong", "Maka", "Medf", "Rohg", "Sogd", "Sogo",
461             // new in ICU 64
462             "Elym", "Hmnp", "Nand", "Wcho",
463             // new in ICU 66
464             "Chrs", "Diak", "Kits", "Yezi",
465             // new in ICU 70
466             "Cpmn", "Ougr", "Tnsa", "Toto", "Vith",
467             // new in ICU 72
468             "Kawi", "Nagm",
469         };
470         int32_t j = 0;
471         if(UPRV_LENGTHOF(expectedLong)!=(USCRIPT_CODE_LIMIT-USCRIPT_BALINESE)) {
472             log_err("need to add new script codes in cucdapi.c!\n");
473             return;
474         }
475         for(i=USCRIPT_BALINESE; (UScriptCode)i<USCRIPT_CODE_LIMIT; i++, j++){
476             const char* name = uscript_getName((UScriptCode)i);
477             if(name==NULL || strcmp(name,expectedLong[j])!=0){
478                 log_err("uscript_getName failed for code %i: %s!=%s\n", i, name, expectedLong[j]);
479             }
480             name = uscript_getShortName((UScriptCode)i);
481             if(name==NULL || strcmp(name,expectedShort[j])!=0){
482                 log_err("uscript_getShortName failed for code %i: %s!=%s\n", i, name, expectedShort[j]);
483             }
484         }
485         for(i=0; i<UPRV_LENGTHOF(expectedLong); i++){
486             UScriptCode fillIn[5] = {USCRIPT_INVALID_CODE};
487             UErrorCode status = U_ZERO_ERROR;
488             int32_t len = 0;
489             len = uscript_getCode(expectedShort[i], fillIn, UPRV_LENGTHOF(fillIn), &status);
490             if(U_FAILURE(status)){
491                 log_err("uscript_getCode failed for script name %s. Error: %s\n",expectedShort[i], u_errorName(status));
492             }
493             if(len>1){
494                 log_err("uscript_getCode did not return expected number of codes for script %s. EXPECTED: 1 GOT: %i\n", expectedShort[i], len);
495             }
496             if(fillIn[0]!= (UScriptCode)(USCRIPT_BALINESE+i)){
497                 log_err("uscript_getCode did not return expected code for script %s. EXPECTED: %i GOT: %i\n", expectedShort[i], (USCRIPT_BALINESE+i), fillIn[0] );
498             }
499         }
500     }
501 
502     {
503         /* test characters which have Script_Extensions */
504         UErrorCode errorCode=U_ZERO_ERROR;
505         if(!(
506                 USCRIPT_COMMON==uscript_getScript(0x0640, &errorCode) &&
507                 USCRIPT_INHERITED==uscript_getScript(0x0650, &errorCode) &&
508                 USCRIPT_ARABIC==uscript_getScript(0xfdf2, &errorCode)) ||
509             U_FAILURE(errorCode)
510         ) {
511             log_err("uscript_getScript(character with Script_Extensions) failed\n");
512         }
513     }
514 }
515 
TestHasScript()516 void TestHasScript() {
517     if(!(
518         !uscript_hasScript(0x063f, USCRIPT_COMMON) &&
519         uscript_hasScript(0x063f, USCRIPT_ARABIC) &&  /* main Script value */
520         !uscript_hasScript(0x063f, USCRIPT_SYRIAC) &&
521         !uscript_hasScript(0x063f, USCRIPT_THAANA))
522     ) {
523         log_err("uscript_hasScript(U+063F, ...) is wrong\n");
524     }
525     if(!(
526         !uscript_hasScript(0x0640, USCRIPT_COMMON) &&  /* main Script value */
527         uscript_hasScript(0x0640, USCRIPT_ARABIC) &&
528         uscript_hasScript(0x0640, USCRIPT_SYRIAC) &&
529         !uscript_hasScript(0x0640, USCRIPT_THAANA))
530     ) {
531         log_err("uscript_hasScript(U+0640, ...) is wrong\n");
532     }
533     if(!(
534         !uscript_hasScript(0x0650, USCRIPT_INHERITED) &&  /* main Script value */
535         uscript_hasScript(0x0650, USCRIPT_ARABIC) &&
536         uscript_hasScript(0x0650, USCRIPT_SYRIAC) &&
537         !uscript_hasScript(0x0650, USCRIPT_THAANA))
538     ) {
539         log_err("uscript_hasScript(U+0650, ...) is wrong\n");
540     }
541     if(!(
542         !uscript_hasScript(0x0660, USCRIPT_COMMON) &&  /* main Script value */
543         uscript_hasScript(0x0660, USCRIPT_ARABIC) &&
544         !uscript_hasScript(0x0660, USCRIPT_SYRIAC) &&
545         uscript_hasScript(0x0660, USCRIPT_THAANA))
546     ) {
547         log_err("uscript_hasScript(U+0660, ...) is wrong\n");
548     }
549     if(!(
550         !uscript_hasScript(0xfdf2, USCRIPT_COMMON) &&
551         uscript_hasScript(0xfdf2, USCRIPT_ARABIC) &&  /* main Script value */
552         !uscript_hasScript(0xfdf2, USCRIPT_SYRIAC) &&
553         uscript_hasScript(0xfdf2, USCRIPT_THAANA))
554     ) {
555         log_err("uscript_hasScript(U+FDF2, ...) is wrong\n");
556     }
557     if(uscript_hasScript(0x0640, 0xaffe)) {
558         /* An unguarded implementation might go into an infinite loop. */
559         log_err("uscript_hasScript(U+0640, bogus 0xaffe) is wrong\n");
560     }
561 }
562 
scriptsContain(UScriptCode scripts[],int32_t length,UScriptCode script)563 static UBool scriptsContain(UScriptCode scripts[], int32_t length, UScriptCode script) {
564     UBool contain=false;
565     int32_t prev=-1, i;
566     for(i=0; i<length; ++i) {
567         int32_t s=scripts[i];
568         if(s<=prev) {
569             log_err("uscript_getScriptExtensions() not in sorted order: %d %d\n", (int)prev, (int)s);
570         }
571         if(s==script) { contain=true; }
572     }
573     return contain;
574 }
575 
TestGetScriptExtensions()576 void TestGetScriptExtensions() {
577     UScriptCode scripts[20];
578     int32_t length;
579     UErrorCode errorCode;
580 
581     /* errors and overflows */
582     errorCode=U_PARSE_ERROR;
583     length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
584     if(errorCode!=U_PARSE_ERROR) {
585         log_err("uscript_getScriptExtensions(U+0640, U_PARSE_ERROR) did not preserve the UErrorCode - %s\n",
586               u_errorName(errorCode));
587     }
588     errorCode=U_ZERO_ERROR;
589     length=uscript_getScriptExtensions(0x0640, NULL, UPRV_LENGTHOF(scripts), &errorCode);
590     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
591         log_err("uscript_getScriptExtensions(U+0640, NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
592               u_errorName(errorCode));
593     }
594     errorCode=U_ZERO_ERROR;
595     length=uscript_getScriptExtensions(0x0640, scripts, -1, &errorCode);
596     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
597         log_err("uscript_getScriptExtensions(U+0640, capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
598               u_errorName(errorCode));
599     }
600     errorCode=U_ZERO_ERROR;
601     length=uscript_getScriptExtensions(0x0640, scripts, 0, &errorCode);
602     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
603         log_err("uscript_getScriptExtensions(U+0640, capacity=0: pure preflighting)=%d < 3 - %s\n",
604               (int)length, u_errorName(errorCode));
605     }
606     errorCode=U_ZERO_ERROR;
607     length=uscript_getScriptExtensions(0x0640, scripts, 1, &errorCode);
608     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
609         log_err("uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d < 3 - %s\n",
610               (int)length, u_errorName(errorCode));
611     }
612     /* U+063F has only a Script code, no Script_Extensions. */
613     errorCode=U_ZERO_ERROR;
614     length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode);
615     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=1) {
616         log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s\n",
617               (int)length, u_errorName(errorCode));
618     }
619 
620     /* invalid code points */
621     errorCode=U_ZERO_ERROR;
622     length=uscript_getScriptExtensions(-1, scripts, UPRV_LENGTHOF(scripts), &errorCode);
623     if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
624         log_err("uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s\n",
625               (int)length, u_errorName(errorCode));
626     }
627     errorCode=U_ZERO_ERROR;
628     length=uscript_getScriptExtensions(0x110000, scripts, UPRV_LENGTHOF(scripts), &errorCode);
629     if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
630         log_err("uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s\n",
631               (int)length, u_errorName(errorCode));
632     }
633 
634     /* normal usage */
635     errorCode=U_ZERO_ERROR;
636     length=uscript_getScriptExtensions(0x063f, scripts, 1, &errorCode);
637     if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_ARABIC) {
638         log_err("uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s\n",
639               (int)length, u_errorName(errorCode));
640     }
641     errorCode=U_ZERO_ERROR;
642     length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
643     if(U_FAILURE(errorCode) || length<3 ||
644             !scriptsContain(scripts, length, USCRIPT_ARABIC) ||
645             !scriptsContain(scripts, length, USCRIPT_SYRIAC) ||
646             !scriptsContain(scripts, length, USCRIPT_MANDAIC)) {
647         log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n",
648               (int)length, u_errorName(errorCode));
649     }
650     errorCode=U_ZERO_ERROR;
651     length=uscript_getScriptExtensions(0xfdf2, scripts, UPRV_LENGTHOF(scripts), &errorCode);
652     if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_THAANA) {
653         log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n",
654               (int)length, u_errorName(errorCode));
655     }
656     errorCode=U_ZERO_ERROR;
657     length=uscript_getScriptExtensions(0xff65, scripts, UPRV_LENGTHOF(scripts), &errorCode);
658     if(U_FAILURE(errorCode) || length!=6 || scripts[0]!=USCRIPT_BOPOMOFO || scripts[5]!=USCRIPT_YI) {
659         log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n",
660               (int)length, u_errorName(errorCode));
661     }
662 }
663 
TestScriptMetadataAPI()664 void TestScriptMetadataAPI() {
665     /* API & code coverage. More testing in intltest/ucdtest.cpp. */
666     UErrorCode errorCode=U_ZERO_ERROR;
667     UChar sample[8];
668 
669     if(uscript_getSampleString(USCRIPT_LATIN, sample, UPRV_LENGTHOF(sample), &errorCode)!=1 ||
670             U_FAILURE(errorCode) ||
671             uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN ||
672             sample[1]!=0) {
673         log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode));
674     }
675     sample[0]=0xfffe;
676     if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 ||
677             errorCode!=U_BUFFER_OVERFLOW_ERROR ||
678             sample[0]!=0xfffe) {
679         log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode));
680     }
681     errorCode=U_ZERO_ERROR;
682     if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, UPRV_LENGTHOF(sample), &errorCode)!=0 ||
683             U_FAILURE(errorCode) ||
684             sample[0]!=0) {
685         log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode));
686     }
687     sample[0]=0xfffe;
688     if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 ||
689             errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
690             sample[0]!=0xfffe) {
691         log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode));
692     }
693 
694     if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED ||
695             // Unicode 10 gives up on "aspirational".
696             uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_LIMITED_USE ||
697             uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE ||
698             uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED ||
699             uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED ||
700             uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED ||
701             uscript_getUsage(USCRIPT_CODE_LIMIT)!=USCRIPT_USAGE_NOT_ENCODED) {
702         log_err("uscript_getUsage() failed\n");
703     }
704 
705     if(uscript_isRightToLeft(USCRIPT_LATIN) ||
706             uscript_isRightToLeft(USCRIPT_CIRTH) ||
707             !uscript_isRightToLeft(USCRIPT_ARABIC) ||
708             !uscript_isRightToLeft(USCRIPT_HEBREW)) {
709         log_err("uscript_isRightToLeft() failed\n");
710     }
711 
712     if(uscript_breaksBetweenLetters(USCRIPT_LATIN) ||
713             uscript_breaksBetweenLetters(USCRIPT_CIRTH) ||
714             !uscript_breaksBetweenLetters(USCRIPT_HAN) ||
715             !uscript_breaksBetweenLetters(USCRIPT_THAI)) {
716         log_err("uscript_breaksBetweenLetters() failed\n");
717     }
718 
719     if(uscript_isCased(USCRIPT_CIRTH) ||
720             uscript_isCased(USCRIPT_HAN) ||
721             !uscript_isCased(USCRIPT_LATIN) ||
722             !uscript_isCased(USCRIPT_GREEK)) {
723         log_err("uscript_isCased() failed\n");
724     }
725 }
726 
TestBinaryValues()727 void TestBinaryValues() {
728     /*
729      * Unicode 5.1 explicitly defines binary property value aliases.
730      * Verify that they are all recognized.
731      */
732     static const char *const falseValues[]={ "N", "No", "F", "False" };
733     static const char *const trueValues[]={ "Y", "Yes", "T", "True" };
734     int32_t i;
735     for(i=0; i<UPRV_LENGTHOF(falseValues); ++i) {
736         if(false!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, falseValues[i])) {
737             log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=false (Are you missing data?)\n", falseValues[i]);
738         }
739     }
740     for(i=0; i<UPRV_LENGTHOF(trueValues); ++i) {
741         if(true!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, trueValues[i])) {
742             log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=true (Are you missing data?)\n", trueValues[i]);
743         }
744     }
745 }
746