• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /********************************************************************
2  * Copyright (c) 1997-2014, International Business Machines
3  * Corporation and others. All Rights Reserved.
4  ********************************************************************/
5 
6 #include <string.h>
7 #include "unicode/utypes.h"
8 #include "unicode/uscript.h"
9 #include "unicode/uchar.h"
10 #include "cintltst.h"
11 #include "cucdapi.h"
12 #include "cmemory.h"
13 
scriptsToString(const UScriptCode scripts[],int32_t length,char s[])14 static void scriptsToString(const UScriptCode scripts[], int32_t length, char s[]) {
15     int32_t i;
16     if(length == 0) {
17         strcpy(s, "(no scripts)");
18         return;
19     }
20     s[0] = 0;
21     for(i = 0; i < length; ++i) {
22         if(i > 0) {
23             strcat(s, " ");
24         }
25         strcat(s, uscript_getShortName(scripts[i]));
26     }
27 }
28 
assertEqualScripts(const char * msg,const UScriptCode scripts1[],int32_t length1,const UScriptCode scripts2[],int32_t length2,UErrorCode errorCode)29 static void assertEqualScripts(const char *msg,
30                                const UScriptCode scripts1[], int32_t length1,
31                                const UScriptCode scripts2[], int32_t length2,
32                                UErrorCode errorCode) {
33     char s1[80];
34     char s2[80];
35     if(U_FAILURE(errorCode)) {
36         log_err("Failed: %s - %s\n", msg, u_errorName(errorCode));
37         return;
38     }
39     scriptsToString(scripts1, length1, s1);
40     scriptsToString(scripts2, length2, s2);
41     if(0!=strcmp(s1, s2)) {
42         log_data_err("Failed: %s: expected %s but got %s\n", msg, s1, s2);
43     }
44 }
45 
TestUScriptCodeAPI()46 void TestUScriptCodeAPI(){
47     int i =0;
48     int numErrors =0;
49     {
50         const char* testNames[]={
51         /* test locale */
52         "en", "en_US", "sr", "ta" , "te_IN",
53         "hi", "he", "ar",
54         /* test abbr */
55         "Hani", "Hang","Hebr","Hira",
56         "Knda","Kana","Khmr","Lao",
57         "Latn",/*"Latf","Latg",*/
58         "Mlym", "Mong",
59 
60         /* test names */
61         "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN",
62         "GOTHIC",  "GREEK",  "GUJARATI", "COMMON", "INHERITED",
63         /* test lower case names */
64         "malayalam", "mongolian", "myanmar", "ogham", "old-italic",
65         "oriya",     "runic",     "sinhala", "syriac","tamil",
66         "telugu",    "thaana",    "thai",    "tibetan",
67         /* test the bounds*/
68         "tagb", "arabic",
69         /* test bogus */
70         "asfdasd", "5464", "12235",
71         /* test the last index */
72         "zyyy", "YI",
73         NULL
74         };
75         UScriptCode expected[] ={
76             /* locales should return */
77             USCRIPT_LATIN, USCRIPT_LATIN, USCRIPT_CYRILLIC, USCRIPT_TAMIL, USCRIPT_TELUGU,
78             USCRIPT_DEVANAGARI, USCRIPT_HEBREW, USCRIPT_ARABIC,
79             /* abbr should return */
80             USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
81             USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
82             USCRIPT_LATIN,/* USCRIPT_LATIN, USCRIPT_LATIN,*/
83             USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
84             /* names should return */
85             USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
86             USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI, USCRIPT_COMMON, USCRIPT_INHERITED,
87             /* lower case names should return */
88             USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN, USCRIPT_MYANMAR, USCRIPT_OGHAM, USCRIPT_OLD_ITALIC,
89             USCRIPT_ORIYA, USCRIPT_RUNIC, USCRIPT_SINHALA, USCRIPT_SYRIAC, USCRIPT_TAMIL,
90             USCRIPT_TELUGU, USCRIPT_THAANA, USCRIPT_THAI, USCRIPT_TIBETAN,
91             /* bounds */
92             USCRIPT_TAGBANWA, USCRIPT_ARABIC,
93             /* bogus names should return invalid code */
94             USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE,
95             USCRIPT_COMMON, USCRIPT_YI,
96         };
97 
98         UErrorCode err = U_ZERO_ERROR;
99 
100         const int32_t capacity = 10;
101 
102         for( ; testNames[i]!=NULL; i++){
103             UScriptCode script[10]={USCRIPT_INVALID_CODE};
104             uscript_getCode(testNames[i],script,capacity, &err);
105             if( script[0] != expected[i]){
106                    log_data_err("Error getting script code Got: %i  Expected: %i for name %s (Error code does not propagate if data is not present. Are you missing data?)\n",
107                        script[0],expected[i],testNames[i]);
108                    numErrors++;
109             }
110         }
111         if(numErrors >0 ){
112             log_data_err("Errors uchar_getScriptCode() : %i \n",numErrors);
113         }
114     }
115 
116     {
117         UErrorCode err = U_ZERO_ERROR;
118         int32_t capacity=0;
119         int32_t j;
120         UScriptCode jaCode[]={USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
121         UScriptCode script[10]={USCRIPT_INVALID_CODE};
122         int32_t num = uscript_getCode("ja",script,capacity, &err);
123         /* preflight */
124         if(err==U_BUFFER_OVERFLOW_ERROR){
125             err = U_ZERO_ERROR;
126             capacity = 10;
127             num = uscript_getCode("ja",script,capacity, &err);
128             if(num!=(sizeof(jaCode)/sizeof(UScriptCode))){
129                 log_err("Errors uscript_getScriptCode() for Japanese locale: num=%d, expected %d \n",
130                         num, (sizeof(jaCode)/sizeof(UScriptCode)));
131             }
132             for(j=0;j<sizeof(jaCode)/sizeof(UScriptCode);j++) {
133                 if(script[j]!=jaCode[j]) {
134                     log_err("Japanese locale: code #%d was %d (%s) but expected %d (%s)\n", j,
135                             script[j], uscript_getName(script[j]),
136                             jaCode[j], uscript_getName(jaCode[j]));
137 
138                 }
139             }
140         }else{
141             log_data_err("Errors in uscript_getScriptCode() expected error : %s got: %s \n",
142                 "U_BUFFER_OVERFLOW_ERROR",
143                  u_errorName(err));
144         }
145 
146     }
147     {
148         static const UScriptCode LATIN[1] = { USCRIPT_LATIN };
149         static const UScriptCode CYRILLIC[1] = { USCRIPT_CYRILLIC };
150         static const UScriptCode DEVANAGARI[1] = { USCRIPT_DEVANAGARI };
151         static const UScriptCode HAN[1] = { USCRIPT_HAN };
152         static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
153         static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
154         static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
155         UScriptCode scripts[5];
156         UErrorCode err;
157         int32_t num;
158 
159         // Should work regardless of whether we have locale data for the language.
160         err = U_ZERO_ERROR;
161         num = uscript_getCode("tg", scripts, UPRV_LENGTHOF(scripts), &err);
162         assertEqualScripts("tg script: Cyrl", CYRILLIC, 1, scripts, num, err);  // Tajik
163         err = U_ZERO_ERROR;
164         num = uscript_getCode("xsr", scripts, UPRV_LENGTHOF(scripts), &err);
165         assertEqualScripts("xsr script: Deva", DEVANAGARI, 1, scripts, num, err);  // Sherpa
166 
167         // Multi-script languages.
168         err = U_ZERO_ERROR;
169         num = uscript_getCode("ja", scripts, UPRV_LENGTHOF(scripts), &err);
170         assertEqualScripts("ja scripts: Kana Hira Hani",
171                            JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, num, err);
172         err = U_ZERO_ERROR;
173         num = uscript_getCode("ko", scripts, UPRV_LENGTHOF(scripts), &err);
174         assertEqualScripts("ko scripts: Hang Hani",
175                            KOREAN, UPRV_LENGTHOF(KOREAN), scripts, num, err);
176         err = U_ZERO_ERROR;
177         num = uscript_getCode("zh", scripts, UPRV_LENGTHOF(scripts), &err);
178         assertEqualScripts("zh script: Hani", HAN, 1, scripts, num, err);
179         err = U_ZERO_ERROR;
180         num = uscript_getCode("zh-Hant", scripts, UPRV_LENGTHOF(scripts), &err);
181         assertEqualScripts("zh-Hant scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
182         err = U_ZERO_ERROR;
183         num = uscript_getCode("zh-TW", scripts, UPRV_LENGTHOF(scripts), &err);
184         assertEqualScripts("zh-TW scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
185 
186         // Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro).
187         err = U_ZERO_ERROR;
188         num = uscript_getCode("ro-RO", scripts, UPRV_LENGTHOF(scripts), &err);
189         assertEqualScripts("ro-RO script: Latn", LATIN, 1, scripts, num, err);
190     }
191 
192     {
193         UScriptCode testAbbr[]={
194             /* names should return */
195             USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
196             USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI,
197         };
198 
199         const char* expectedNames[]={
200 
201             /* test names */
202             "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian",
203             "Gothic",  "Greek",  "Gujarati",
204              NULL
205         };
206         i=0;
207         while(i<sizeof(testAbbr)/sizeof(UScriptCode)){
208             const char* name = uscript_getName(testAbbr[i]);
209              if(name == NULL) {
210                log_data_err("Couldn't get script name\n");
211                return;
212              }
213             numErrors=0;
214             if(strcmp(expectedNames[i],name)!=0){
215                 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedNames[i]);
216                 numErrors++;
217             }
218             if(numErrors > 0){
219                 if(numErrors >0 ){
220                     log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
221                 }
222             }
223             i++;
224         }
225 
226     }
227 
228     {
229         UScriptCode testAbbr[]={
230             /* abbr should return */
231             USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
232             USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
233             USCRIPT_LATIN,
234             USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
235         };
236 
237         const char* expectedAbbr[]={
238               /* test abbr */
239             "Hani", "Hang","Hebr","Hira",
240             "Knda","Kana","Khmr","Laoo",
241             "Latn",
242             "Mlym", "Mong",
243              NULL
244         };
245         i=0;
246         while(i<sizeof(testAbbr)/sizeof(UScriptCode)){
247             const char* name = uscript_getShortName(testAbbr[i]);
248             numErrors=0;
249             if(strcmp(expectedAbbr[i],name)!=0){
250                 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedAbbr[i]);
251                 numErrors++;
252             }
253             if(numErrors > 0){
254                 if(numErrors >0 ){
255                     log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
256                 }
257             }
258             i++;
259         }
260 
261     }
262     /* now test uscript_getScript() API */
263     {
264         uint32_t codepoints[] = {
265                 0x0000FF9D, /* USCRIPT_KATAKANA*/
266                 0x0000FFBE, /* USCRIPT_HANGUL*/
267                 0x0000FFC7, /* USCRIPT_HANGUL*/
268                 0x0000FFCF, /* USCRIPT_HANGUL*/
269                 0x0000FFD7, /* USCRIPT_HANGUL*/
270                 0x0000FFDC, /* USCRIPT_HANGUL*/
271                 0x00010300, /* USCRIPT_OLD_ITALIC*/
272                 0x00010330, /* USCRIPT_GOTHIC*/
273                 0x0001034A, /* USCRIPT_GOTHIC*/
274                 0x00010400, /* USCRIPT_DESERET*/
275                 0x00010428, /* USCRIPT_DESERET*/
276                 0x0001D167, /* USCRIPT_INHERITED*/
277                 0x0001D17B, /* USCRIPT_INHERITED*/
278                 0x0001D185, /* USCRIPT_INHERITED*/
279                 0x0001D1AA, /* USCRIPT_INHERITED*/
280                 0x00020000, /* USCRIPT_HAN*/
281                 0x00000D02, /* USCRIPT_MALAYALAM*/
282                 0x00000D00, /* USCRIPT_UNKNOWN (new Zzzz value in Unicode 5.0) */
283                 0x00000000, /* USCRIPT_COMMON*/
284                 0x0001D169, /* USCRIPT_INHERITED*/
285                 0x0001D182, /* USCRIPT_INHERITED*/
286                 0x0001D18B, /* USCRIPT_INHERITED*/
287                 0x0001D1AD, /* USCRIPT_INHERITED*/
288         };
289 
290         UScriptCode expected[] = {
291                 USCRIPT_KATAKANA ,
292                 USCRIPT_HANGUL ,
293                 USCRIPT_HANGUL ,
294                 USCRIPT_HANGUL ,
295                 USCRIPT_HANGUL ,
296                 USCRIPT_HANGUL ,
297                 USCRIPT_OLD_ITALIC,
298                 USCRIPT_GOTHIC ,
299                 USCRIPT_GOTHIC ,
300                 USCRIPT_DESERET ,
301                 USCRIPT_DESERET ,
302                 USCRIPT_INHERITED,
303                 USCRIPT_INHERITED,
304                 USCRIPT_INHERITED,
305                 USCRIPT_INHERITED,
306                 USCRIPT_HAN ,
307                 USCRIPT_MALAYALAM,
308                 USCRIPT_UNKNOWN,
309                 USCRIPT_COMMON,
310                 USCRIPT_INHERITED ,
311                 USCRIPT_INHERITED ,
312                 USCRIPT_INHERITED ,
313                 USCRIPT_INHERITED ,
314         };
315         UScriptCode code = USCRIPT_INVALID_CODE;
316         UErrorCode status = U_ZERO_ERROR;
317         UBool passed = TRUE;
318 
319         for(i=0; i<UPRV_LENGTHOF(codepoints); ++i){
320             code = uscript_getScript(codepoints[i],&status);
321             if(U_SUCCESS(status)){
322                 if( code != expected[i] ||
323                     code != (UScriptCode)u_getIntPropertyValue(codepoints[i], UCHAR_SCRIPT)
324                 ) {
325                     log_err("uscript_getScript for codepoint \\U%08X failed\n",codepoints[i]);
326                     passed = FALSE;
327                 }
328             }else{
329                 log_err("uscript_getScript for codepoint \\U%08X failed. Error: %s\n",
330                          codepoints[i],u_errorName(status));
331                 break;
332             }
333         }
334 
335         if(passed==FALSE){
336            log_err("uscript_getScript failed.\n");
337         }
338     }
339     {
340         UScriptCode code= USCRIPT_INVALID_CODE;
341         UErrorCode  status = U_ZERO_ERROR;
342         code = uscript_getScript(0x001D169,&status);
343         if(code != USCRIPT_INHERITED){
344             log_err("\\U001D169 is not contained in USCRIPT_INHERITED");
345         }
346     }
347     {
348         UScriptCode code= USCRIPT_INVALID_CODE;
349         UErrorCode  status = U_ZERO_ERROR;
350         int32_t err = 0;
351 
352         for(i = 0; i<=0x10ffff; i++){
353             code =  uscript_getScript(i,&status);
354             if(code == USCRIPT_INVALID_CODE){
355                 err++;
356                 log_err("uscript_getScript for codepoint \\U%08X failed.\n", i);
357             }
358         }
359         if(err>0){
360             log_err("uscript_getScript failed for %d codepoints\n", err);
361         }
362     }
363     {
364         for(i=0; (UScriptCode)i< USCRIPT_CODE_LIMIT; i++){
365             const char* name = uscript_getName((UScriptCode)i);
366             if(name==NULL || strcmp(name,"")==0){
367                 log_err("uscript_getName failed for code %i: name is NULL or \"\"\n",i);
368             }
369         }
370     }
371 
372     {
373         /*
374          * These script codes were originally added to ICU pre-3.6, so that ICU would
375          * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
376          * These script codes were added with only short names because we don't
377          * want to invent long names ourselves.
378          * Unicode 5 and later encode some of these scripts and give them long names.
379          * Whenever this happens, the long script names here need to be updated.
380          */
381         static const char* expectedLong[] = {
382             "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs",
383             "Egyd", "Egyh", "Egyptian_Hieroglyphs",
384             "Geok", "Hans", "Hant", "Pahawh_Hmong", "Hung", "Inds",
385             "Javanese", "Kayah_Li", "Latf", "Latg",
386             "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs",
387             "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician",
388             "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
389             "Zxxx", "Unknown",
390             "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "Sgnw", "Sundanese",
391             "Moon", "Meetei_Mayek",
392             /* new in ICU 4.0 */
393             "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
394             "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv",
395             "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
396             "Zmth", "Zsym",
397             /* new in ICU 4.4 */
398             "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
399             /* new in ICU 4.6 */
400             "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel",
401             "Loma", "Mende_Kikakui", "Meroitic_Cursive",
402             "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi",
403             /* new in ICU 4.8 */
404             "Afak", "Jurc", "Mro", "Nshu", "Sharada", "Sora_Sompeng", "Takri", "Tang", "Wole",
405             /* new in ICU 49 */
406             "Hluw", "Khojki", "Tirhuta",
407             /* new in ICU 52 */
408             "Caucasian_Albanian", "Mahajani",
409             /* new in ICU 54 */
410             "Ahom", "Hatr", "Modi", "Mult", "Pau_Cin_Hau", "Siddham"
411         };
412         static const char* expectedShort[] = {
413             "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
414             "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
415             "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
416             "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
417             "Zxxx", "Zzzz",
418             "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
419             "Moon", "Mtei",
420             /* new in ICU 4.0 */
421             "Armi", "Avst", "Cakm", "Kore",
422             "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
423             "Zmth", "Zsym",
424             /* new in ICU 4.4 */
425             "Bamu", "Lisu", "Nkgb", "Sarb",
426             /* new in ICU 4.6 */
427             "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
428             "Narb", "Nbat", "Palm", "Sind", "Wara",
429             /* new in ICU 4.8 */
430             "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
431             /* new in ICU 49 */
432             "Hluw", "Khoj", "Tirh",
433             /* new in ICU 52 */
434             "Aghb", "Mahj",
435             /* new in ICU 54 */
436             "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd"
437         };
438         int32_t j = 0;
439         if(UPRV_LENGTHOF(expectedLong)!=(USCRIPT_CODE_LIMIT-USCRIPT_BALINESE)) {
440             log_err("need to add new script codes in cucdapi.c!\n");
441             return;
442         }
443         for(i=USCRIPT_BALINESE; (UScriptCode)i<USCRIPT_CODE_LIMIT; i++, j++){
444             const char* name = uscript_getName((UScriptCode)i);
445             if(name==NULL || strcmp(name,expectedLong[j])!=0){
446                 log_err("uscript_getName failed for code %i: %s!=%s\n", i, name, expectedLong[j]);
447             }
448             name = uscript_getShortName((UScriptCode)i);
449             if(name==NULL || strcmp(name,expectedShort[j])!=0){
450                 log_err("uscript_getShortName failed for code %i: %s!=%s\n", i, name, expectedShort[j]);
451             }
452         }
453         for(i=0; i<UPRV_LENGTHOF(expectedLong); i++){
454             UScriptCode fillIn[5] = {USCRIPT_INVALID_CODE};
455             UErrorCode status = U_ZERO_ERROR;
456             int32_t len = 0;
457             len = uscript_getCode(expectedShort[i], fillIn, UPRV_LENGTHOF(fillIn), &status);
458             if(U_FAILURE(status)){
459                 log_err("uscript_getCode failed for script name %s. Error: %s\n",expectedShort[i], u_errorName(status));
460             }
461             if(len>1){
462                 log_err("uscript_getCode did not return expected number of codes for script %s. EXPECTED: 1 GOT: %i\n", expectedShort[i], len);
463             }
464             if(fillIn[0]!= (UScriptCode)(USCRIPT_BALINESE+i)){
465                 log_err("uscript_getCode did not return expected code for script %s. EXPECTED: %i GOT: %i\n", expectedShort[i], (USCRIPT_BALINESE+i), fillIn[0] );
466             }
467         }
468     }
469 
470     {
471         /* test characters which have Script_Extensions */
472         UErrorCode errorCode=U_ZERO_ERROR;
473         if(!(
474                 USCRIPT_COMMON==uscript_getScript(0x0640, &errorCode) &&
475                 USCRIPT_INHERITED==uscript_getScript(0x0650, &errorCode) &&
476                 USCRIPT_ARABIC==uscript_getScript(0xfdf2, &errorCode)) ||
477             U_FAILURE(errorCode)
478         ) {
479             log_err("uscript_getScript(character with Script_Extensions) failed\n");
480         }
481     }
482 }
483 
TestHasScript()484 void TestHasScript() {
485     if(!(
486         !uscript_hasScript(0x063f, USCRIPT_COMMON) &&
487         uscript_hasScript(0x063f, USCRIPT_ARABIC) &&  /* main Script value */
488         !uscript_hasScript(0x063f, USCRIPT_SYRIAC) &&
489         !uscript_hasScript(0x063f, USCRIPT_THAANA))
490     ) {
491         log_err("uscript_hasScript(U+063F, ...) is wrong\n");
492     }
493     if(!(
494         !uscript_hasScript(0x0640, USCRIPT_COMMON) &&  /* main Script value */
495         uscript_hasScript(0x0640, USCRIPT_ARABIC) &&
496         uscript_hasScript(0x0640, USCRIPT_SYRIAC) &&
497         !uscript_hasScript(0x0640, USCRIPT_THAANA))
498     ) {
499         log_err("uscript_hasScript(U+0640, ...) is wrong\n");
500     }
501     if(!(
502         !uscript_hasScript(0x0650, USCRIPT_INHERITED) &&  /* main Script value */
503         uscript_hasScript(0x0650, USCRIPT_ARABIC) &&
504         uscript_hasScript(0x0650, USCRIPT_SYRIAC) &&
505         !uscript_hasScript(0x0650, USCRIPT_THAANA))
506     ) {
507         log_err("uscript_hasScript(U+0650, ...) is wrong\n");
508     }
509     if(!(
510         !uscript_hasScript(0x0660, USCRIPT_COMMON) &&  /* main Script value */
511         uscript_hasScript(0x0660, USCRIPT_ARABIC) &&
512         !uscript_hasScript(0x0660, USCRIPT_SYRIAC) &&
513         uscript_hasScript(0x0660, USCRIPT_THAANA))
514     ) {
515         log_err("uscript_hasScript(U+0660, ...) is wrong\n");
516     }
517     if(!(
518         !uscript_hasScript(0xfdf2, USCRIPT_COMMON) &&
519         uscript_hasScript(0xfdf2, USCRIPT_ARABIC) &&  /* main Script value */
520         !uscript_hasScript(0xfdf2, USCRIPT_SYRIAC) &&
521         uscript_hasScript(0xfdf2, USCRIPT_THAANA))
522     ) {
523         log_err("uscript_hasScript(U+FDF2, ...) is wrong\n");
524     }
525     if(uscript_hasScript(0x0640, 0xaffe)) {
526         /* An unguarded implementation might go into an infinite loop. */
527         log_err("uscript_hasScript(U+0640, bogus 0xaffe) is wrong\n");
528     }
529 }
530 
scriptsContain(int32_t scripts[],int32_t length,int32_t script)531 static UBool scriptsContain(int32_t scripts[], int32_t length, int32_t script) {
532     UBool contain=FALSE;
533     int32_t prev=-1, i;
534     for(i=0; i<length; ++i) {
535         int32_t s=scripts[i];
536         if(s<=prev) {
537             log_err("uscript_getScriptExtensions() not in sorted order: %d %d\n", (int)prev, (int)s);
538         }
539         if(s==script) { contain=TRUE; }
540     }
541     return contain;
542 }
543 
TestGetScriptExtensions()544 void TestGetScriptExtensions() {
545     UScriptCode scripts[20];
546     int32_t length;
547     UErrorCode errorCode;
548 
549     /* errors and overflows */
550     errorCode=U_PARSE_ERROR;
551     length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
552     if(errorCode!=U_PARSE_ERROR) {
553         log_err("uscript_getScriptExtensions(U+0640, U_PARSE_ERROR) did not preserve the UErrorCode - %s\n",
554               u_errorName(errorCode));
555     }
556     errorCode=U_ZERO_ERROR;
557     length=uscript_getScriptExtensions(0x0640, NULL, UPRV_LENGTHOF(scripts), &errorCode);
558     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
559         log_err("uscript_getScriptExtensions(U+0640, NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
560               u_errorName(errorCode));
561     }
562     errorCode=U_ZERO_ERROR;
563     length=uscript_getScriptExtensions(0x0640, scripts, -1, &errorCode);
564     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
565         log_err("uscript_getScriptExtensions(U+0640, capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
566               u_errorName(errorCode));
567     }
568     errorCode=U_ZERO_ERROR;
569     length=uscript_getScriptExtensions(0x0640, scripts, 0, &errorCode);
570     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
571         log_err("uscript_getScriptExtensions(U+0640, capacity=0: pure preflighting)=%d < 3 - %s\n",
572               (int)length, u_errorName(errorCode));
573     }
574     errorCode=U_ZERO_ERROR;
575     length=uscript_getScriptExtensions(0x0640, scripts, 1, &errorCode);
576     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
577         log_err("uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d < 3 - %s\n",
578               (int)length, u_errorName(errorCode));
579     }
580     /* U+063F has only a Script code, no Script_Extensions. */
581     errorCode=U_ZERO_ERROR;
582     length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode);
583     if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=1) {
584         log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s\n",
585               (int)length, u_errorName(errorCode));
586     }
587 
588     /* invalid code points */
589     errorCode=U_ZERO_ERROR;
590     length=uscript_getScriptExtensions(-1, scripts, UPRV_LENGTHOF(scripts), &errorCode);
591     if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
592         log_err("uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s\n",
593               (int)length, u_errorName(errorCode));
594     }
595     errorCode=U_ZERO_ERROR;
596     length=uscript_getScriptExtensions(0x110000, scripts, UPRV_LENGTHOF(scripts), &errorCode);
597     if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
598         log_err("uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s\n",
599               (int)length, u_errorName(errorCode));
600     }
601 
602     /* normal usage */
603     errorCode=U_ZERO_ERROR;
604     length=uscript_getScriptExtensions(0x063f, scripts, 1, &errorCode);
605     if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_ARABIC) {
606         log_err("uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s\n",
607               (int)length, u_errorName(errorCode));
608     }
609     errorCode=U_ZERO_ERROR;
610     length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
611     if(U_FAILURE(errorCode) || length<3 ||
612             !scriptsContain(scripts, length, USCRIPT_ARABIC) ||
613             !scriptsContain(scripts, length, USCRIPT_SYRIAC) ||
614             !scriptsContain(scripts, length, USCRIPT_MANDAIC)) {
615         log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n",
616               (int)length, u_errorName(errorCode));
617     }
618     errorCode=U_ZERO_ERROR;
619     length=uscript_getScriptExtensions(0xfdf2, scripts, UPRV_LENGTHOF(scripts), &errorCode);
620     if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_THAANA) {
621         log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n",
622               (int)length, u_errorName(errorCode));
623     }
624     errorCode=U_ZERO_ERROR;
625     length=uscript_getScriptExtensions(0xff65, scripts, UPRV_LENGTHOF(scripts), &errorCode);
626     if(U_FAILURE(errorCode) || length!=6 || scripts[0]!=USCRIPT_BOPOMOFO || scripts[5]!=USCRIPT_YI) {
627         log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n",
628               (int)length, u_errorName(errorCode));
629     }
630 }
631 
TestScriptMetadataAPI()632 void TestScriptMetadataAPI() {
633     /* API & code coverage. More testing in intltest/ucdtest.cpp. */
634     UErrorCode errorCode=U_ZERO_ERROR;
635     UChar sample[8];
636 
637     if(uscript_getSampleString(USCRIPT_LATIN, sample, UPRV_LENGTHOF(sample), &errorCode)!=1 ||
638             U_FAILURE(errorCode) ||
639             uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN ||
640             sample[1]!=0) {
641         log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode));
642     }
643     sample[0]=0xfffe;
644     if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 ||
645             errorCode!=U_BUFFER_OVERFLOW_ERROR ||
646             sample[0]!=0xfffe) {
647         log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode));
648     }
649     errorCode=U_ZERO_ERROR;
650     if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, UPRV_LENGTHOF(sample), &errorCode)!=0 ||
651             U_FAILURE(errorCode) ||
652             sample[0]!=0) {
653         log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode));
654     }
655     sample[0]=0xfffe;
656     if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 ||
657             errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
658             sample[0]!=0xfffe) {
659         log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode));
660     }
661 
662     if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED ||
663             uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_ASPIRATIONAL ||
664             uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE ||
665             uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED ||
666             uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED ||
667             uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED ||
668             uscript_getUsage(USCRIPT_CODE_LIMIT)!=USCRIPT_USAGE_NOT_ENCODED) {
669         log_err("uscript_getUsage() failed\n");
670     }
671 
672     if(uscript_isRightToLeft(USCRIPT_LATIN) ||
673             uscript_isRightToLeft(USCRIPT_CIRTH) ||
674             !uscript_isRightToLeft(USCRIPT_ARABIC) ||
675             !uscript_isRightToLeft(USCRIPT_HEBREW)) {
676         log_err("uscript_isRightToLeft() failed\n");
677     }
678 
679     if(uscript_breaksBetweenLetters(USCRIPT_LATIN) ||
680             uscript_breaksBetweenLetters(USCRIPT_CIRTH) ||
681             !uscript_breaksBetweenLetters(USCRIPT_HAN) ||
682             !uscript_breaksBetweenLetters(USCRIPT_THAI)) {
683         log_err("uscript_breaksBetweenLetters() failed\n");
684     }
685 
686     if(uscript_isCased(USCRIPT_CIRTH) ||
687             uscript_isCased(USCRIPT_HAN) ||
688             !uscript_isCased(USCRIPT_LATIN) ||
689             !uscript_isCased(USCRIPT_GREEK)) {
690         log_err("uscript_isCased() failed\n");
691     }
692 }
693 
TestBinaryValues()694 void TestBinaryValues() {
695     /*
696      * Unicode 5.1 explicitly defines binary property value aliases.
697      * Verify that they are all recognized.
698      */
699     static const char *const falseValues[]={ "N", "No", "F", "False" };
700     static const char *const trueValues[]={ "Y", "Yes", "T", "True" };
701     int32_t i;
702     for(i=0; i<UPRV_LENGTHOF(falseValues); ++i) {
703         if(FALSE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, falseValues[i])) {
704             log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=FALSE (Are you missing data?)\n", falseValues[i]);
705         }
706     }
707     for(i=0; i<UPRV_LENGTHOF(trueValues); ++i) {
708         if(TRUE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, trueValues[i])) {
709             log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=TRUE (Are you missing data?)\n", trueValues[i]);
710         }
711     }
712 }
713