1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * Copyright (c) 1997-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 ********************************************************************/
7
8 #include <stdbool.h>
9 #include <string.h>
10 #include "unicode/utypes.h"
11 #include "unicode/uscript.h"
12 #include "unicode/uchar.h"
13 #include "cintltst.h"
14 #include "cucdapi.h"
15 #include "cmemory.h"
16
scriptsToString(const UScriptCode scripts[],int32_t length,char s[])17 static void scriptsToString(const UScriptCode scripts[], int32_t length, char s[]) {
18 int32_t i;
19 if(length == 0) {
20 strcpy(s, "(no scripts)");
21 return;
22 }
23 s[0] = 0;
24 for(i = 0; i < length; ++i) {
25 if(i > 0) {
26 strcat(s, " ");
27 }
28 strcat(s, uscript_getShortName(scripts[i]));
29 }
30 }
31
assertEqualScripts(const char * msg,const UScriptCode scripts1[],int32_t length1,const UScriptCode scripts2[],int32_t length2,UErrorCode errorCode)32 static void assertEqualScripts(const char *msg,
33 const UScriptCode scripts1[], int32_t length1,
34 const UScriptCode scripts2[], int32_t length2,
35 UErrorCode errorCode) {
36 char s1[80];
37 char s2[80];
38 if(U_FAILURE(errorCode)) {
39 log_err("Failed: %s - %s\n", msg, u_errorName(errorCode));
40 return;
41 }
42 scriptsToString(scripts1, length1, s1);
43 scriptsToString(scripts2, length2, s2);
44 if(0!=strcmp(s1, s2)) {
45 log_data_err("Failed: %s: expected %s but got %s\n", msg, s1, s2);
46 }
47 }
48
TestUScriptCodeAPI(void)49 void TestUScriptCodeAPI(void){
50 int i =0;
51 int numErrors =0;
52 {
53 const char* testNames[]={
54 /* test locale */
55 "en", "en_US", "sr", "ta" , "te_IN",
56 "hi", "he", "ar",
57 /* test abbr */
58 "Hani", "Hang","Hebr","Hira",
59 "Knda","Kana","Khmr","Lao",
60 "Latn",/*"Latf","Latg",*/
61 "Mlym", "Mong",
62
63 /* test names */
64 "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN",
65 "GOTHIC", "GREEK", "GUJARATI", "COMMON", "INHERITED",
66 /* test lower case names */
67 "malayalam", "mongolian", "myanmar", "ogham", "old-italic",
68 "oriya", "runic", "sinhala", "syriac","tamil",
69 "telugu", "thaana", "thai", "tibetan",
70 /* test the bounds*/
71 "tagb", "arabic",
72 /* test bogus */
73 "asfdasd", "5464", "12235",
74 /* test the last index */
75 "zyyy", "YI",
76 /* test other cases that are ambiguous (script alias vs language tag) */
77 "han", "mro", "nko", "old-hungarian", "new-tai-lue",
78 NULL
79 };
80 UScriptCode expected[] ={
81 /* locales should return */
82 USCRIPT_LATIN, USCRIPT_LATIN, USCRIPT_CYRILLIC, USCRIPT_TAMIL, USCRIPT_TELUGU,
83 USCRIPT_DEVANAGARI, USCRIPT_HEBREW, USCRIPT_ARABIC,
84 /* abbr should return */
85 USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
86 USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
87 USCRIPT_LATIN,/* USCRIPT_LATIN, USCRIPT_LATIN,*/
88 USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
89 /* names should return */
90 USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
91 USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI, USCRIPT_COMMON, USCRIPT_INHERITED,
92 /* lower case names should return */
93 USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN, USCRIPT_MYANMAR, USCRIPT_OGHAM, USCRIPT_OLD_ITALIC,
94 USCRIPT_ORIYA, USCRIPT_RUNIC, USCRIPT_SINHALA, USCRIPT_SYRIAC, USCRIPT_TAMIL,
95 USCRIPT_TELUGU, USCRIPT_THAANA, USCRIPT_THAI, USCRIPT_TIBETAN,
96 /* bounds */
97 USCRIPT_TAGBANWA, USCRIPT_ARABIC,
98 /* bogus names should return invalid code */
99 USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE,
100 /* test the last index */
101 USCRIPT_COMMON, USCRIPT_YI,
102 /* test other cases that are ambiguous (script alias vs language tag) */
103 USCRIPT_HAN, USCRIPT_MRO, USCRIPT_NKO, USCRIPT_OLD_HUNGARIAN, USCRIPT_NEW_TAI_LUE,
104 };
105
106 UErrorCode err = U_ZERO_ERROR;
107
108 const int32_t capacity = 10;
109
110 for( ; testNames[i]!=NULL; i++){
111 UScriptCode script[10]={USCRIPT_INVALID_CODE};
112 uscript_getCode(testNames[i],script,capacity, &err);
113 if( script[0] != expected[i]){
114 log_data_err("Error getting script code Got: %i Expected: %i for name %s (Error code does not propagate if data is not present. Are you missing data?)\n",
115 script[0],expected[i],testNames[i]);
116 numErrors++;
117 }
118 }
119 if(numErrors >0 ){
120 log_data_err("Errors uchar_getScriptCode() : %i \n",numErrors);
121 }
122 }
123
124 {
125 UErrorCode err = U_ZERO_ERROR;
126 int32_t capacity=0;
127 int32_t j;
128 UScriptCode jaCode[]={USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
129 UScriptCode script[10]={USCRIPT_INVALID_CODE};
130 int32_t num = uscript_getCode("ja",script,capacity, &err);
131 /* preflight */
132 if(err==U_BUFFER_OVERFLOW_ERROR){
133 err = U_ZERO_ERROR;
134 capacity = 10;
135 num = uscript_getCode("ja",script,capacity, &err);
136 if(num!=UPRV_LENGTHOF(jaCode)){
137 log_err("Errors uscript_getScriptCode() for Japanese locale: num=%d, expected %d \n",
138 num, UPRV_LENGTHOF(jaCode));
139 }
140 for(j=0;j<UPRV_LENGTHOF(jaCode);j++) {
141 if(script[j]!=jaCode[j]) {
142 log_err("Japanese locale: code #%d was %d (%s) but expected %d (%s)\n", j,
143 script[j], uscript_getName(script[j]),
144 jaCode[j], uscript_getName(jaCode[j]));
145
146 }
147 }
148 }else{
149 log_data_err("Errors in uscript_getScriptCode() expected error : %s got: %s \n",
150 "U_BUFFER_OVERFLOW_ERROR",
151 u_errorName(err));
152 }
153
154 }
155 {
156 static const UScriptCode LATIN[1] = { USCRIPT_LATIN };
157 static const UScriptCode CYRILLIC[1] = { USCRIPT_CYRILLIC };
158 static const UScriptCode DEVANAGARI[1] = { USCRIPT_DEVANAGARI };
159 static const UScriptCode HAN[1] = { USCRIPT_HAN };
160 static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
161 static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
162 static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
163 UScriptCode scripts[5];
164 UErrorCode err;
165 int32_t num;
166
167 // Should work regardless of whether we have locale data for the language.
168 err = U_ZERO_ERROR;
169 num = uscript_getCode("tg", scripts, UPRV_LENGTHOF(scripts), &err);
170 assertEqualScripts("tg script: Cyrl", CYRILLIC, 1, scripts, num, err); // Tajik
171 err = U_ZERO_ERROR;
172 num = uscript_getCode("xsr", scripts, UPRV_LENGTHOF(scripts), &err);
173 assertEqualScripts("xsr script: Deva", DEVANAGARI, 1, scripts, num, err); // Sherpa
174
175 // Multi-script languages.
176 err = U_ZERO_ERROR;
177 num = uscript_getCode("ja", scripts, UPRV_LENGTHOF(scripts), &err);
178 assertEqualScripts("ja scripts: Kana Hira Hani",
179 JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, num, err);
180 err = U_ZERO_ERROR;
181 num = uscript_getCode("ko", scripts, UPRV_LENGTHOF(scripts), &err);
182 assertEqualScripts("ko scripts: Hang Hani",
183 KOREAN, UPRV_LENGTHOF(KOREAN), scripts, num, err);
184 err = U_ZERO_ERROR;
185 num = uscript_getCode("zh", scripts, UPRV_LENGTHOF(scripts), &err);
186 assertEqualScripts("zh script: Hani", HAN, 1, scripts, num, err);
187 err = U_ZERO_ERROR;
188 num = uscript_getCode("zh-Hant", scripts, UPRV_LENGTHOF(scripts), &err);
189 assertEqualScripts("zh-Hant scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
190 err = U_ZERO_ERROR;
191 num = uscript_getCode("zh-TW", scripts, UPRV_LENGTHOF(scripts), &err);
192 assertEqualScripts("zh-TW scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
193
194 // Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro).
195 err = U_ZERO_ERROR;
196 num = uscript_getCode("ro-RO", scripts, UPRV_LENGTHOF(scripts), &err);
197 assertEqualScripts("ro-RO script: Latn", LATIN, 1, scripts, num, err);
198 }
199
200 {
201 UScriptCode testAbbr[]={
202 /* names should return */
203 USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
204 USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI,
205 };
206
207 const char* expectedNames[]={
208
209 /* test names */
210 "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian",
211 "Gothic", "Greek", "Gujarati",
212 NULL
213 };
214 i=0;
215 while(i<UPRV_LENGTHOF(testAbbr)){
216 const char* name = uscript_getName(testAbbr[i]);
217 if(name == NULL) {
218 log_data_err("Couldn't get script name\n");
219 return;
220 }
221 numErrors=0;
222 if(strcmp(expectedNames[i],name)!=0){
223 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedNames[i]);
224 numErrors++;
225 }
226 if(numErrors > 0){
227 if(numErrors >0 ){
228 log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
229 }
230 }
231 i++;
232 }
233
234 }
235
236 {
237 UScriptCode testAbbr[]={
238 /* abbr should return */
239 USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
240 USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
241 USCRIPT_LATIN,
242 USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
243 };
244
245 const char* expectedAbbr[]={
246 /* test abbr */
247 "Hani", "Hang","Hebr","Hira",
248 "Knda","Kana","Khmr","Laoo",
249 "Latn",
250 "Mlym", "Mong",
251 NULL
252 };
253 i=0;
254 while(i<UPRV_LENGTHOF(testAbbr)){
255 const char* name = uscript_getShortName(testAbbr[i]);
256 numErrors=0;
257 if(strcmp(expectedAbbr[i],name)!=0){
258 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedAbbr[i]);
259 numErrors++;
260 }
261 if(numErrors > 0){
262 if(numErrors >0 ){
263 log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
264 }
265 }
266 i++;
267 }
268
269 }
270 /* now test uscript_getScript() API */
271 {
272 uint32_t codepoints[] = {
273 0x0000FF9D, /* USCRIPT_KATAKANA*/
274 0x0000FFBE, /* USCRIPT_HANGUL*/
275 0x0000FFC7, /* USCRIPT_HANGUL*/
276 0x0000FFCF, /* USCRIPT_HANGUL*/
277 0x0000FFD7, /* USCRIPT_HANGUL*/
278 0x0000FFDC, /* USCRIPT_HANGUL*/
279 0x00010300, /* USCRIPT_OLD_ITALIC*/
280 0x00010330, /* USCRIPT_GOTHIC*/
281 0x0001034A, /* USCRIPT_GOTHIC*/
282 0x00010400, /* USCRIPT_DESERET*/
283 0x00010428, /* USCRIPT_DESERET*/
284 0x0001D167, /* USCRIPT_INHERITED*/
285 0x0001D17B, /* USCRIPT_INHERITED*/
286 0x0001D185, /* USCRIPT_INHERITED*/
287 0x0001D1AA, /* USCRIPT_INHERITED*/
288 0x00020000, /* USCRIPT_HAN*/
289 0x00000D02, /* USCRIPT_MALAYALAM*/
290 0x00050005, /* USCRIPT_UNKNOWN (new Zzzz value in Unicode 5.0) */
291 0x00000000, /* USCRIPT_COMMON*/
292 0x0001D169, /* USCRIPT_INHERITED*/
293 0x0001D182, /* USCRIPT_INHERITED*/
294 0x0001D18B, /* USCRIPT_INHERITED*/
295 0x0001D1AD, /* USCRIPT_INHERITED*/
296 };
297
298 UScriptCode expected[] = {
299 USCRIPT_KATAKANA ,
300 USCRIPT_HANGUL ,
301 USCRIPT_HANGUL ,
302 USCRIPT_HANGUL ,
303 USCRIPT_HANGUL ,
304 USCRIPT_HANGUL ,
305 USCRIPT_OLD_ITALIC,
306 USCRIPT_GOTHIC ,
307 USCRIPT_GOTHIC ,
308 USCRIPT_DESERET ,
309 USCRIPT_DESERET ,
310 USCRIPT_INHERITED,
311 USCRIPT_INHERITED,
312 USCRIPT_INHERITED,
313 USCRIPT_INHERITED,
314 USCRIPT_HAN ,
315 USCRIPT_MALAYALAM,
316 USCRIPT_UNKNOWN,
317 USCRIPT_COMMON,
318 USCRIPT_INHERITED ,
319 USCRIPT_INHERITED ,
320 USCRIPT_INHERITED ,
321 USCRIPT_INHERITED ,
322 };
323 UScriptCode code = USCRIPT_INVALID_CODE;
324 UErrorCode status = U_ZERO_ERROR;
325 UBool passed = true;
326
327 for(i=0; i<UPRV_LENGTHOF(codepoints); ++i){
328 code = uscript_getScript(codepoints[i],&status);
329 if(U_SUCCESS(status)){
330 if( code != expected[i] ||
331 code != (UScriptCode)u_getIntPropertyValue(codepoints[i], UCHAR_SCRIPT)
332 ) {
333 log_err("uscript_getScript for codepoint \\U%08X failed\n",codepoints[i]);
334 passed = false;
335 }
336 }else{
337 log_err("uscript_getScript for codepoint \\U%08X failed. Error: %s\n",
338 codepoints[i],u_errorName(status));
339 break;
340 }
341 }
342
343 if(passed==false){
344 log_err("uscript_getScript failed.\n");
345 }
346 }
347 {
348 UScriptCode code= USCRIPT_INVALID_CODE;
349 UErrorCode status = U_ZERO_ERROR;
350 code = uscript_getScript(0x001D169,&status);
351 if(code != USCRIPT_INHERITED){
352 log_err("\\U001D169 is not contained in USCRIPT_INHERITED");
353 }
354 }
355 {
356 UScriptCode code= USCRIPT_INVALID_CODE;
357 UErrorCode status = U_ZERO_ERROR;
358 int32_t err = 0;
359
360 for(i = 0; i<=0x10ffff; i++){
361 code = uscript_getScript(i,&status);
362 if(code == USCRIPT_INVALID_CODE){
363 err++;
364 log_err("uscript_getScript for codepoint \\U%08X failed.\n", i);
365 }
366 }
367 if(err>0){
368 log_err("uscript_getScript failed for %d codepoints\n", err);
369 }
370 }
371 {
372 for(i=0; (UScriptCode)i< USCRIPT_CODE_LIMIT; i++){
373 const char* name = uscript_getName((UScriptCode)i);
374 if(name==NULL || strcmp(name,"")==0){
375 log_err("uscript_getName failed for code %i: name is NULL or \"\"\n",i);
376 }
377 }
378 }
379
380 {
381 /*
382 * These script codes were originally added to ICU pre-3.6, so that ICU would
383 * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
384 * These script codes were added with only short names because we don't
385 * want to invent long names ourselves.
386 * Unicode 5 and later encode some of these scripts and give them long names.
387 * Whenever this happens, the long script names here need to be updated.
388 */
389 static const char* expectedLong[] = {
390 "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs",
391 "Egyd", "Egyh", "Egyptian_Hieroglyphs",
392 "Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds",
393 "Javanese", "Kayah_Li", "Latf", "Latg",
394 "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs",
395 "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician",
396 "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
397 "Zxxx", "Unknown",
398 "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese",
399 "Moon", "Meetei_Mayek",
400 /* new in ICU 4.0 */
401 "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
402 "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv",
403 "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
404 "Zmth", "Zsym",
405 /* new in ICU 4.4 */
406 "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
407 /* new in ICU 4.6 */
408 "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel",
409 "Loma", "Mende_Kikakui", "Meroitic_Cursive",
410 "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi",
411 /* new in ICU 4.8 */
412 "Afak", "Jurc", "Mro", "Nushu", "Sharada", "Sora_Sompeng", "Takri", "Tangut", "Wole",
413 /* new in ICU 49 */
414 "Anatolian_Hieroglyphs", "Khojki", "Tirhuta",
415 /* new in ICU 52 */
416 "Caucasian_Albanian", "Mahajani",
417 /* new in ICU 54 */
418 "Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham",
419 // new in ICU 58
420 "Adlam", "Bhaiksuki", "Marchen", "Newa", "Osage", "Hanb", "Jamo", "Zsye",
421 // new in ICU 60
422 "Masaram_Gondi", "Soyombo", "Zanabazar_Square",
423 // new in ICU 61
424 "Dogra", "Gunjala_Gondi", "Makasar", "Medefaidrin",
425 "Hanifi_Rohingya", "Sogdian", "Old_Sogdian",
426 // new in ICU 64
427 "Elymaic", "Nyiakeng_Puachue_Hmong", "Nandinagari", "Wancho",
428 // new in ICU 66
429 "Chorasmian", "Dives_Akuru", "Khitan_Small_Script", "Yezidi",
430 // new in ICU 70
431 "Cypro_Minoan", "Old_Uyghur", "Tangsa", "Toto", "Vithkuqi",
432 // new in ICU 72
433 "Kawi", "Nag_Mundari",
434 // new in ICU 75
435 "Aran",
436 };
437 static const char* expectedShort[] = {
438 "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
439 "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
440 "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
441 "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
442 "Zxxx", "Zzzz",
443 "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
444 "Moon", "Mtei",
445 /* new in ICU 4.0 */
446 "Armi", "Avst", "Cakm", "Kore",
447 "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
448 "Zmth", "Zsym",
449 /* new in ICU 4.4 */
450 "Bamu", "Lisu", "Nkgb", "Sarb",
451 /* new in ICU 4.6 */
452 "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
453 "Narb", "Nbat", "Palm", "Sind", "Wara",
454 /* new in ICU 4.8 */
455 "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
456 /* new in ICU 49 */
457 "Hluw", "Khoj", "Tirh",
458 /* new in ICU 52 */
459 "Aghb", "Mahj",
460 /* new in ICU 54 */
461 "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd",
462 // new in ICU 58
463 "Adlm", "Bhks", "Marc", "Newa", "Osge", "Hanb", "Jamo", "Zsye",
464 // new in ICU 60
465 "Gonm", "Soyo", "Zanb",
466 // new in ICU 61
467 "Dogr", "Gong", "Maka", "Medf", "Rohg", "Sogd", "Sogo",
468 // new in ICU 64
469 "Elym", "Hmnp", "Nand", "Wcho",
470 // new in ICU 66
471 "Chrs", "Diak", "Kits", "Yezi",
472 // new in ICU 70
473 "Cpmn", "Ougr", "Tnsa", "Toto", "Vith",
474 // new in ICU 72
475 "Kawi", "Nagm",
476 // new in ICU 75
477 "Aran",
478 };
479 int32_t j = 0;
480 if(UPRV_LENGTHOF(expectedLong)!=(USCRIPT_CODE_LIMIT-USCRIPT_BALINESE)) {
481 log_err("need to add new script codes in cucdapi.c!\n");
482 return;
483 }
484 for(i=USCRIPT_BALINESE; (UScriptCode)i<USCRIPT_CODE_LIMIT; i++, j++){
485 const char* name = uscript_getName((UScriptCode)i);
486 if(name==NULL || strcmp(name,expectedLong[j])!=0){
487 log_err("uscript_getName failed for code %i: %s!=%s\n", i, name, expectedLong[j]);
488 }
489 name = uscript_getShortName((UScriptCode)i);
490 if(name==NULL || strcmp(name,expectedShort[j])!=0){
491 log_err("uscript_getShortName failed for code %i: %s!=%s\n", i, name, expectedShort[j]);
492 }
493 }
494 for(i=0; i<UPRV_LENGTHOF(expectedLong); i++){
495 UScriptCode fillIn[5] = {USCRIPT_INVALID_CODE};
496 UErrorCode status = U_ZERO_ERROR;
497 int32_t len = 0;
498 len = uscript_getCode(expectedShort[i], fillIn, UPRV_LENGTHOF(fillIn), &status);
499 if(U_FAILURE(status)){
500 log_err("uscript_getCode failed for script name %s. Error: %s\n",expectedShort[i], u_errorName(status));
501 }
502 if(len>1){
503 log_err("uscript_getCode did not return expected number of codes for script %s. EXPECTED: 1 GOT: %i\n", expectedShort[i], len);
504 }
505 if(fillIn[0]!= (UScriptCode)(USCRIPT_BALINESE+i)){
506 log_err("uscript_getCode did not return expected code for script %s. EXPECTED: %i GOT: %i\n", expectedShort[i], (USCRIPT_BALINESE+i), fillIn[0] );
507 }
508 }
509 }
510
511 {
512 /* test characters which have Script_Extensions */
513 UErrorCode errorCode=U_ZERO_ERROR;
514 if(!(
515 USCRIPT_COMMON==uscript_getScript(0x0640, &errorCode) &&
516 USCRIPT_INHERITED==uscript_getScript(0x0650, &errorCode) &&
517 USCRIPT_ARABIC==uscript_getScript(0xfdf2, &errorCode)) ||
518 U_FAILURE(errorCode)
519 ) {
520 log_err("uscript_getScript(character with Script_Extensions) failed\n");
521 }
522 }
523 }
524
TestHasScript(void)525 void TestHasScript(void) {
526 if(!(
527 !uscript_hasScript(0x063f, USCRIPT_COMMON) &&
528 uscript_hasScript(0x063f, USCRIPT_ARABIC) && /* main Script value */
529 !uscript_hasScript(0x063f, USCRIPT_SYRIAC) &&
530 !uscript_hasScript(0x063f, USCRIPT_THAANA))
531 ) {
532 log_err("uscript_hasScript(U+063F, ...) is wrong\n");
533 }
534 if(!(
535 !uscript_hasScript(0x0640, USCRIPT_COMMON) && /* main Script value */
536 uscript_hasScript(0x0640, USCRIPT_ARABIC) &&
537 uscript_hasScript(0x0640, USCRIPT_SYRIAC) &&
538 !uscript_hasScript(0x0640, USCRIPT_THAANA))
539 ) {
540 log_err("uscript_hasScript(U+0640, ...) is wrong\n");
541 }
542 if(!(
543 !uscript_hasScript(0x0650, USCRIPT_INHERITED) && /* main Script value */
544 uscript_hasScript(0x0650, USCRIPT_ARABIC) &&
545 uscript_hasScript(0x0650, USCRIPT_SYRIAC) &&
546 !uscript_hasScript(0x0650, USCRIPT_THAANA))
547 ) {
548 log_err("uscript_hasScript(U+0650, ...) is wrong\n");
549 }
550 if(!(
551 !uscript_hasScript(0x0660, USCRIPT_COMMON) && /* main Script value */
552 uscript_hasScript(0x0660, USCRIPT_ARABIC) &&
553 !uscript_hasScript(0x0660, USCRIPT_SYRIAC) &&
554 uscript_hasScript(0x0660, USCRIPT_THAANA))
555 ) {
556 log_err("uscript_hasScript(U+0660, ...) is wrong\n");
557 }
558 if(!(
559 !uscript_hasScript(0xfdf2, USCRIPT_COMMON) &&
560 uscript_hasScript(0xfdf2, USCRIPT_ARABIC) && /* main Script value */
561 !uscript_hasScript(0xfdf2, USCRIPT_SYRIAC) &&
562 uscript_hasScript(0xfdf2, USCRIPT_THAANA))
563 ) {
564 log_err("uscript_hasScript(U+FDF2, ...) is wrong\n");
565 }
566 if(uscript_hasScript(0x0640, 0xaffe)) {
567 /* An unguarded implementation might go into an infinite loop. */
568 log_err("uscript_hasScript(U+0640, bogus 0xaffe) is wrong\n");
569 }
570 }
571
scriptsContain(UScriptCode scripts[],int32_t length,UScriptCode script)572 static UBool scriptsContain(UScriptCode scripts[], int32_t length, UScriptCode script) {
573 UBool contain=false;
574 int32_t prev=-1, i;
575 for(i=0; i<length; ++i) {
576 int32_t s=scripts[i];
577 if(s<=prev) {
578 log_err("uscript_getScriptExtensions() not in sorted order: %d %d\n", (int)prev, (int)s);
579 }
580 if(s==script) { contain=true; }
581 }
582 return contain;
583 }
584
TestGetScriptExtensions(void)585 void TestGetScriptExtensions(void) {
586 UScriptCode scripts[20];
587 int32_t length;
588 UErrorCode errorCode;
589
590 /* errors and overflows */
591 errorCode=U_PARSE_ERROR;
592 length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
593 if(errorCode!=U_PARSE_ERROR) {
594 log_err("uscript_getScriptExtensions(U+0640, U_PARSE_ERROR) did not preserve the UErrorCode - %s\n",
595 u_errorName(errorCode));
596 }
597 errorCode=U_ZERO_ERROR;
598 length=uscript_getScriptExtensions(0x0640, NULL, UPRV_LENGTHOF(scripts), &errorCode);
599 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
600 log_err("uscript_getScriptExtensions(U+0640, NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
601 u_errorName(errorCode));
602 }
603 errorCode=U_ZERO_ERROR;
604 length=uscript_getScriptExtensions(0x0640, scripts, -1, &errorCode);
605 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
606 log_err("uscript_getScriptExtensions(U+0640, capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
607 u_errorName(errorCode));
608 }
609 errorCode=U_ZERO_ERROR;
610 length=uscript_getScriptExtensions(0x0640, scripts, 0, &errorCode);
611 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
612 log_err("uscript_getScriptExtensions(U+0640, capacity=0: pure preflighting)=%d < 3 - %s\n",
613 (int)length, u_errorName(errorCode));
614 }
615 errorCode=U_ZERO_ERROR;
616 length=uscript_getScriptExtensions(0x0640, scripts, 1, &errorCode);
617 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
618 log_err("uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d < 3 - %s\n",
619 (int)length, u_errorName(errorCode));
620 }
621 /* U+063F has only a Script code, no Script_Extensions. */
622 errorCode=U_ZERO_ERROR;
623 length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode);
624 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=1) {
625 log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s\n",
626 (int)length, u_errorName(errorCode));
627 }
628
629 /* invalid code points */
630 errorCode=U_ZERO_ERROR;
631 length=uscript_getScriptExtensions(-1, scripts, UPRV_LENGTHOF(scripts), &errorCode);
632 if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
633 log_err("uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s\n",
634 (int)length, u_errorName(errorCode));
635 }
636 errorCode=U_ZERO_ERROR;
637 length=uscript_getScriptExtensions(0x110000, scripts, UPRV_LENGTHOF(scripts), &errorCode);
638 if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
639 log_err("uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s\n",
640 (int)length, u_errorName(errorCode));
641 }
642
643 /* normal usage */
644 errorCode=U_ZERO_ERROR;
645 length=uscript_getScriptExtensions(0x063f, scripts, 1, &errorCode);
646 if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_ARABIC) {
647 log_err("uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s\n",
648 (int)length, u_errorName(errorCode));
649 }
650 errorCode=U_ZERO_ERROR;
651 length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
652 if(U_FAILURE(errorCode) || length<3 ||
653 !scriptsContain(scripts, length, USCRIPT_ARABIC) ||
654 !scriptsContain(scripts, length, USCRIPT_SYRIAC) ||
655 !scriptsContain(scripts, length, USCRIPT_MANDAIC)) {
656 log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n",
657 (int)length, u_errorName(errorCode));
658 }
659 errorCode=U_ZERO_ERROR;
660 length=uscript_getScriptExtensions(0xfdf2, scripts, UPRV_LENGTHOF(scripts), &errorCode);
661 if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_THAANA) {
662 log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n",
663 (int)length, u_errorName(errorCode));
664 }
665 errorCode=U_ZERO_ERROR;
666 length=uscript_getScriptExtensions(0xff65, scripts, UPRV_LENGTHOF(scripts), &errorCode);
667 if(U_FAILURE(errorCode) || length!=6 || scripts[0]!=USCRIPT_BOPOMOFO || scripts[5]!=USCRIPT_YI) {
668 log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n",
669 (int)length, u_errorName(errorCode));
670 }
671 }
672
TestScriptMetadataAPI(void)673 void TestScriptMetadataAPI(void) {
674 /* API & code coverage. More testing in intltest/ucdtest.cpp. */
675 UErrorCode errorCode=U_ZERO_ERROR;
676 UChar sample[8];
677
678 if(uscript_getSampleString(USCRIPT_LATIN, sample, UPRV_LENGTHOF(sample), &errorCode)!=1 ||
679 U_FAILURE(errorCode) ||
680 uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN ||
681 sample[1]!=0) {
682 log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode));
683 }
684 sample[0]=0xfffe;
685 if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 ||
686 errorCode!=U_BUFFER_OVERFLOW_ERROR ||
687 sample[0]!=0xfffe) {
688 log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode));
689 }
690 errorCode=U_ZERO_ERROR;
691 if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, UPRV_LENGTHOF(sample), &errorCode)!=0 ||
692 U_FAILURE(errorCode) ||
693 sample[0]!=0) {
694 log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode));
695 }
696 sample[0]=0xfffe;
697 if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 ||
698 errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
699 sample[0]!=0xfffe) {
700 log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode));
701 }
702
703 if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED ||
704 // Unicode 10 gives up on "aspirational".
705 uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_LIMITED_USE ||
706 uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE ||
707 uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED ||
708 uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED ||
709 uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED ||
710 uscript_getUsage(USCRIPT_CODE_LIMIT)!=USCRIPT_USAGE_NOT_ENCODED) {
711 log_err("uscript_getUsage() failed\n");
712 }
713
714 if(uscript_isRightToLeft(USCRIPT_LATIN) ||
715 uscript_isRightToLeft(USCRIPT_CIRTH) ||
716 !uscript_isRightToLeft(USCRIPT_ARABIC) ||
717 !uscript_isRightToLeft(USCRIPT_HEBREW)) {
718 log_err("uscript_isRightToLeft() failed\n");
719 }
720
721 if(uscript_breaksBetweenLetters(USCRIPT_LATIN) ||
722 uscript_breaksBetweenLetters(USCRIPT_CIRTH) ||
723 !uscript_breaksBetweenLetters(USCRIPT_HAN) ||
724 !uscript_breaksBetweenLetters(USCRIPT_THAI)) {
725 log_err("uscript_breaksBetweenLetters() failed\n");
726 }
727
728 if(uscript_isCased(USCRIPT_CIRTH) ||
729 uscript_isCased(USCRIPT_HAN) ||
730 !uscript_isCased(USCRIPT_LATIN) ||
731 !uscript_isCased(USCRIPT_GREEK)) {
732 log_err("uscript_isCased() failed\n");
733 }
734 }
735
TestBinaryValues(void)736 void TestBinaryValues(void) {
737 /*
738 * Unicode 5.1 explicitly defines binary property value aliases.
739 * Verify that they are all recognized.
740 */
741 static const char *const falseValues[]={ "N", "No", "F", "False" };
742 static const char *const trueValues[]={ "Y", "Yes", "T", "True" };
743 int32_t i;
744 for(i=0; i<UPRV_LENGTHOF(falseValues); ++i) {
745 if(false!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, falseValues[i])) {
746 log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=false (Are you missing data?)\n", falseValues[i]);
747 }
748 }
749 for(i=0; i<UPRV_LENGTHOF(trueValues); ++i) {
750 if(true!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, trueValues[i])) {
751 log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=true (Are you missing data?)\n", trueValues[i]);
752 }
753 }
754 }
755