1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * Copyright (c) 1997-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 ********************************************************************/
7
8 #include <stdbool.h>
9 #include <string.h>
10 #include "unicode/utypes.h"
11 #include "unicode/uscript.h"
12 #include "unicode/uchar.h"
13 #include "cintltst.h"
14 #include "cucdapi.h"
15 #include "cmemory.h"
16
scriptsToString(const UScriptCode scripts[],int32_t length,char s[])17 static void scriptsToString(const UScriptCode scripts[], int32_t length, char s[]) {
18 int32_t i;
19 if(length == 0) {
20 strcpy(s, "(no scripts)");
21 return;
22 }
23 s[0] = 0;
24 for(i = 0; i < length; ++i) {
25 if(i > 0) {
26 strcat(s, " ");
27 }
28 strcat(s, uscript_getShortName(scripts[i]));
29 }
30 }
31
assertEqualScripts(const char * msg,const UScriptCode scripts1[],int32_t length1,const UScriptCode scripts2[],int32_t length2,UErrorCode errorCode)32 static void assertEqualScripts(const char *msg,
33 const UScriptCode scripts1[], int32_t length1,
34 const UScriptCode scripts2[], int32_t length2,
35 UErrorCode errorCode) {
36 char s1[80];
37 char s2[80];
38 if(U_FAILURE(errorCode)) {
39 log_err("Failed: %s - %s\n", msg, u_errorName(errorCode));
40 return;
41 }
42 scriptsToString(scripts1, length1, s1);
43 scriptsToString(scripts2, length2, s2);
44 if(0!=strcmp(s1, s2)) {
45 log_data_err("Failed: %s: expected %s but got %s\n", msg, s1, s2);
46 }
47 }
48
TestUScriptCodeAPI()49 void TestUScriptCodeAPI(){
50 int i =0;
51 int numErrors =0;
52 {
53 const char* testNames[]={
54 /* test locale */
55 "en", "en_US", "sr", "ta" , "te_IN",
56 "hi", "he", "ar",
57 /* test abbr */
58 "Hani", "Hang","Hebr","Hira",
59 "Knda","Kana","Khmr","Lao",
60 "Latn",/*"Latf","Latg",*/
61 "Mlym", "Mong",
62
63 /* test names */
64 "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN",
65 "GOTHIC", "GREEK", "GUJARATI", "COMMON", "INHERITED",
66 /* test lower case names */
67 "malayalam", "mongolian", "myanmar", "ogham", "old-italic",
68 "oriya", "runic", "sinhala", "syriac","tamil",
69 "telugu", "thaana", "thai", "tibetan",
70 /* test the bounds*/
71 "tagb", "arabic",
72 /* test bogus */
73 "asfdasd", "5464", "12235",
74 /* test the last index */
75 "zyyy", "YI",
76 /* test other cases that are ambiguous (script alias vs language tag) */
77 "han", "mro", "nko", "old-hungarian", "new-tai-lue",
78 NULL
79 };
80 UScriptCode expected[] ={
81 /* locales should return */
82 USCRIPT_LATIN, USCRIPT_LATIN, USCRIPT_CYRILLIC, USCRIPT_TAMIL, USCRIPT_TELUGU,
83 USCRIPT_DEVANAGARI, USCRIPT_HEBREW, USCRIPT_ARABIC,
84 /* abbr should return */
85 USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
86 USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
87 USCRIPT_LATIN,/* USCRIPT_LATIN, USCRIPT_LATIN,*/
88 USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
89 /* names should return */
90 USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
91 USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI, USCRIPT_COMMON, USCRIPT_INHERITED,
92 /* lower case names should return */
93 USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN, USCRIPT_MYANMAR, USCRIPT_OGHAM, USCRIPT_OLD_ITALIC,
94 USCRIPT_ORIYA, USCRIPT_RUNIC, USCRIPT_SINHALA, USCRIPT_SYRIAC, USCRIPT_TAMIL,
95 USCRIPT_TELUGU, USCRIPT_THAANA, USCRIPT_THAI, USCRIPT_TIBETAN,
96 /* bounds */
97 USCRIPT_TAGBANWA, USCRIPT_ARABIC,
98 /* bogus names should return invalid code */
99 USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE,
100 /* test the last index */
101 USCRIPT_COMMON, USCRIPT_YI,
102 /* test other cases that are ambiguous (script alias vs language tag) */
103 USCRIPT_HAN, USCRIPT_MRO, USCRIPT_NKO, USCRIPT_OLD_HUNGARIAN, USCRIPT_NEW_TAI_LUE,
104 };
105
106 UErrorCode err = U_ZERO_ERROR;
107
108 const int32_t capacity = 10;
109
110 for( ; testNames[i]!=NULL; i++){
111 UScriptCode script[10]={USCRIPT_INVALID_CODE};
112 uscript_getCode(testNames[i],script,capacity, &err);
113 if( script[0] != expected[i]){
114 log_data_err("Error getting script code Got: %i Expected: %i for name %s (Error code does not propagate if data is not present. Are you missing data?)\n",
115 script[0],expected[i],testNames[i]);
116 numErrors++;
117 }
118 }
119 if(numErrors >0 ){
120 log_data_err("Errors uchar_getScriptCode() : %i \n",numErrors);
121 }
122 }
123
124 {
125 UErrorCode err = U_ZERO_ERROR;
126 int32_t capacity=0;
127 int32_t j;
128 UScriptCode jaCode[]={USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
129 UScriptCode script[10]={USCRIPT_INVALID_CODE};
130 int32_t num = uscript_getCode("ja",script,capacity, &err);
131 /* preflight */
132 if(err==U_BUFFER_OVERFLOW_ERROR){
133 err = U_ZERO_ERROR;
134 capacity = 10;
135 num = uscript_getCode("ja",script,capacity, &err);
136 if(num!=UPRV_LENGTHOF(jaCode)){
137 log_err("Errors uscript_getScriptCode() for Japanese locale: num=%d, expected %d \n",
138 num, UPRV_LENGTHOF(jaCode));
139 }
140 for(j=0;j<UPRV_LENGTHOF(jaCode);j++) {
141 if(script[j]!=jaCode[j]) {
142 log_err("Japanese locale: code #%d was %d (%s) but expected %d (%s)\n", j,
143 script[j], uscript_getName(script[j]),
144 jaCode[j], uscript_getName(jaCode[j]));
145
146 }
147 }
148 }else{
149 log_data_err("Errors in uscript_getScriptCode() expected error : %s got: %s \n",
150 "U_BUFFER_OVERFLOW_ERROR",
151 u_errorName(err));
152 }
153
154 }
155 {
156 static const UScriptCode LATIN[1] = { USCRIPT_LATIN };
157 static const UScriptCode CYRILLIC[1] = { USCRIPT_CYRILLIC };
158 static const UScriptCode DEVANAGARI[1] = { USCRIPT_DEVANAGARI };
159 static const UScriptCode HAN[1] = { USCRIPT_HAN };
160 static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
161 static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
162 static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
163 UScriptCode scripts[5];
164 UErrorCode err;
165 int32_t num;
166
167 // Should work regardless of whether we have locale data for the language.
168 err = U_ZERO_ERROR;
169 num = uscript_getCode("tg", scripts, UPRV_LENGTHOF(scripts), &err);
170 assertEqualScripts("tg script: Cyrl", CYRILLIC, 1, scripts, num, err); // Tajik
171 err = U_ZERO_ERROR;
172 num = uscript_getCode("xsr", scripts, UPRV_LENGTHOF(scripts), &err);
173 assertEqualScripts("xsr script: Deva", DEVANAGARI, 1, scripts, num, err); // Sherpa
174
175 // Multi-script languages.
176 err = U_ZERO_ERROR;
177 num = uscript_getCode("ja", scripts, UPRV_LENGTHOF(scripts), &err);
178 assertEqualScripts("ja scripts: Kana Hira Hani",
179 JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, num, err);
180 err = U_ZERO_ERROR;
181 num = uscript_getCode("ko", scripts, UPRV_LENGTHOF(scripts), &err);
182 assertEqualScripts("ko scripts: Hang Hani",
183 KOREAN, UPRV_LENGTHOF(KOREAN), scripts, num, err);
184 err = U_ZERO_ERROR;
185 num = uscript_getCode("zh", scripts, UPRV_LENGTHOF(scripts), &err);
186 assertEqualScripts("zh script: Hani", HAN, 1, scripts, num, err);
187 err = U_ZERO_ERROR;
188 num = uscript_getCode("zh-Hant", scripts, UPRV_LENGTHOF(scripts), &err);
189 assertEqualScripts("zh-Hant scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
190 err = U_ZERO_ERROR;
191 num = uscript_getCode("zh-TW", scripts, UPRV_LENGTHOF(scripts), &err);
192 assertEqualScripts("zh-TW scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
193
194 // Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro).
195 err = U_ZERO_ERROR;
196 num = uscript_getCode("ro-RO", scripts, UPRV_LENGTHOF(scripts), &err);
197 assertEqualScripts("ro-RO script: Latn", LATIN, 1, scripts, num, err);
198 }
199
200 {
201 UScriptCode testAbbr[]={
202 /* names should return */
203 USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
204 USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI,
205 };
206
207 const char* expectedNames[]={
208
209 /* test names */
210 "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian",
211 "Gothic", "Greek", "Gujarati",
212 NULL
213 };
214 i=0;
215 while(i<UPRV_LENGTHOF(testAbbr)){
216 const char* name = uscript_getName(testAbbr[i]);
217 if(name == NULL) {
218 log_data_err("Couldn't get script name\n");
219 return;
220 }
221 numErrors=0;
222 if(strcmp(expectedNames[i],name)!=0){
223 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedNames[i]);
224 numErrors++;
225 }
226 if(numErrors > 0){
227 if(numErrors >0 ){
228 log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
229 }
230 }
231 i++;
232 }
233
234 }
235
236 {
237 UScriptCode testAbbr[]={
238 /* abbr should return */
239 USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
240 USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
241 USCRIPT_LATIN,
242 USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
243 };
244
245 const char* expectedAbbr[]={
246 /* test abbr */
247 "Hani", "Hang","Hebr","Hira",
248 "Knda","Kana","Khmr","Laoo",
249 "Latn",
250 "Mlym", "Mong",
251 NULL
252 };
253 i=0;
254 while(i<UPRV_LENGTHOF(testAbbr)){
255 const char* name = uscript_getShortName(testAbbr[i]);
256 numErrors=0;
257 if(strcmp(expectedAbbr[i],name)!=0){
258 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedAbbr[i]);
259 numErrors++;
260 }
261 if(numErrors > 0){
262 if(numErrors >0 ){
263 log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
264 }
265 }
266 i++;
267 }
268
269 }
270 /* now test uscript_getScript() API */
271 {
272 uint32_t codepoints[] = {
273 0x0000FF9D, /* USCRIPT_KATAKANA*/
274 0x0000FFBE, /* USCRIPT_HANGUL*/
275 0x0000FFC7, /* USCRIPT_HANGUL*/
276 0x0000FFCF, /* USCRIPT_HANGUL*/
277 0x0000FFD7, /* USCRIPT_HANGUL*/
278 0x0000FFDC, /* USCRIPT_HANGUL*/
279 0x00010300, /* USCRIPT_OLD_ITALIC*/
280 0x00010330, /* USCRIPT_GOTHIC*/
281 0x0001034A, /* USCRIPT_GOTHIC*/
282 0x00010400, /* USCRIPT_DESERET*/
283 0x00010428, /* USCRIPT_DESERET*/
284 0x0001D167, /* USCRIPT_INHERITED*/
285 0x0001D17B, /* USCRIPT_INHERITED*/
286 0x0001D185, /* USCRIPT_INHERITED*/
287 0x0001D1AA, /* USCRIPT_INHERITED*/
288 0x00020000, /* USCRIPT_HAN*/
289 0x00000D02, /* USCRIPT_MALAYALAM*/
290 0x00050005, /* USCRIPT_UNKNOWN (new Zzzz value in Unicode 5.0) */
291 0x00000000, /* USCRIPT_COMMON*/
292 0x0001D169, /* USCRIPT_INHERITED*/
293 0x0001D182, /* USCRIPT_INHERITED*/
294 0x0001D18B, /* USCRIPT_INHERITED*/
295 0x0001D1AD, /* USCRIPT_INHERITED*/
296 };
297
298 UScriptCode expected[] = {
299 USCRIPT_KATAKANA ,
300 USCRIPT_HANGUL ,
301 USCRIPT_HANGUL ,
302 USCRIPT_HANGUL ,
303 USCRIPT_HANGUL ,
304 USCRIPT_HANGUL ,
305 USCRIPT_OLD_ITALIC,
306 USCRIPT_GOTHIC ,
307 USCRIPT_GOTHIC ,
308 USCRIPT_DESERET ,
309 USCRIPT_DESERET ,
310 USCRIPT_INHERITED,
311 USCRIPT_INHERITED,
312 USCRIPT_INHERITED,
313 USCRIPT_INHERITED,
314 USCRIPT_HAN ,
315 USCRIPT_MALAYALAM,
316 USCRIPT_UNKNOWN,
317 USCRIPT_COMMON,
318 USCRIPT_INHERITED ,
319 USCRIPT_INHERITED ,
320 USCRIPT_INHERITED ,
321 USCRIPT_INHERITED ,
322 };
323 UScriptCode code = USCRIPT_INVALID_CODE;
324 UErrorCode status = U_ZERO_ERROR;
325 UBool passed = true;
326
327 for(i=0; i<UPRV_LENGTHOF(codepoints); ++i){
328 code = uscript_getScript(codepoints[i],&status);
329 if(U_SUCCESS(status)){
330 if( code != expected[i] ||
331 code != (UScriptCode)u_getIntPropertyValue(codepoints[i], UCHAR_SCRIPT)
332 ) {
333 log_err("uscript_getScript for codepoint \\U%08X failed\n",codepoints[i]);
334 passed = false;
335 }
336 }else{
337 log_err("uscript_getScript for codepoint \\U%08X failed. Error: %s\n",
338 codepoints[i],u_errorName(status));
339 break;
340 }
341 }
342
343 if(passed==false){
344 log_err("uscript_getScript failed.\n");
345 }
346 }
347 {
348 UScriptCode code= USCRIPT_INVALID_CODE;
349 UErrorCode status = U_ZERO_ERROR;
350 code = uscript_getScript(0x001D169,&status);
351 if(code != USCRIPT_INHERITED){
352 log_err("\\U001D169 is not contained in USCRIPT_INHERITED");
353 }
354 }
355 {
356 UScriptCode code= USCRIPT_INVALID_CODE;
357 UErrorCode status = U_ZERO_ERROR;
358 int32_t err = 0;
359
360 for(i = 0; i<=0x10ffff; i++){
361 code = uscript_getScript(i,&status);
362 if(code == USCRIPT_INVALID_CODE){
363 err++;
364 log_err("uscript_getScript for codepoint \\U%08X failed.\n", i);
365 }
366 }
367 if(err>0){
368 log_err("uscript_getScript failed for %d codepoints\n", err);
369 }
370 }
371 {
372 for(i=0; (UScriptCode)i< USCRIPT_CODE_LIMIT; i++){
373 const char* name = uscript_getName((UScriptCode)i);
374 if(name==NULL || strcmp(name,"")==0){
375 log_err("uscript_getName failed for code %i: name is NULL or \"\"\n",i);
376 }
377 }
378 }
379
380 {
381 /*
382 * These script codes were originally added to ICU pre-3.6, so that ICU would
383 * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
384 * These script codes were added with only short names because we don't
385 * want to invent long names ourselves.
386 * Unicode 5 and later encode some of these scripts and give them long names.
387 * Whenever this happens, the long script names here need to be updated.
388 */
389 static const char* expectedLong[] = {
390 "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs",
391 "Egyd", "Egyh", "Egyptian_Hieroglyphs",
392 "Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds",
393 "Javanese", "Kayah_Li", "Latf", "Latg",
394 "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs",
395 "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician",
396 "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
397 "Zxxx", "Unknown",
398 "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese",
399 "Moon", "Meetei_Mayek",
400 /* new in ICU 4.0 */
401 "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
402 "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv",
403 "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
404 "Zmth", "Zsym",
405 /* new in ICU 4.4 */
406 "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
407 /* new in ICU 4.6 */
408 "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel",
409 "Loma", "Mende_Kikakui", "Meroitic_Cursive",
410 "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi",
411 /* new in ICU 4.8 */
412 "Afak", "Jurc", "Mro", "Nushu", "Sharada", "Sora_Sompeng", "Takri", "Tangut", "Wole",
413 /* new in ICU 49 */
414 "Anatolian_Hieroglyphs", "Khojki", "Tirhuta",
415 /* new in ICU 52 */
416 "Caucasian_Albanian", "Mahajani",
417 /* new in ICU 54 */
418 "Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham",
419 // new in ICU 58
420 "Adlam", "Bhaiksuki", "Marchen", "Newa", "Osage", "Hanb", "Jamo", "Zsye",
421 // new in ICU 60
422 "Masaram_Gondi", "Soyombo", "Zanabazar_Square",
423 // new in ICU 61
424 "Dogra", "Gunjala_Gondi", "Makasar", "Medefaidrin",
425 "Hanifi_Rohingya", "Sogdian", "Old_Sogdian",
426 // new in ICU 64
427 "Elymaic", "Nyiakeng_Puachue_Hmong", "Nandinagari", "Wancho",
428 // new in ICU 66
429 "Chorasmian", "Dives_Akuru", "Khitan_Small_Script", "Yezidi",
430 // new in ICU 70
431 "Cypro_Minoan", "Old_Uyghur", "Tangsa", "Toto", "Vithkuqi",
432 // new in ICU 72
433 "Kawi", "Nag_Mundari",
434 };
435 static const char* expectedShort[] = {
436 "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
437 "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
438 "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
439 "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
440 "Zxxx", "Zzzz",
441 "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
442 "Moon", "Mtei",
443 /* new in ICU 4.0 */
444 "Armi", "Avst", "Cakm", "Kore",
445 "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
446 "Zmth", "Zsym",
447 /* new in ICU 4.4 */
448 "Bamu", "Lisu", "Nkgb", "Sarb",
449 /* new in ICU 4.6 */
450 "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
451 "Narb", "Nbat", "Palm", "Sind", "Wara",
452 /* new in ICU 4.8 */
453 "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
454 /* new in ICU 49 */
455 "Hluw", "Khoj", "Tirh",
456 /* new in ICU 52 */
457 "Aghb", "Mahj",
458 /* new in ICU 54 */
459 "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd",
460 // new in ICU 58
461 "Adlm", "Bhks", "Marc", "Newa", "Osge", "Hanb", "Jamo", "Zsye",
462 // new in ICU 60
463 "Gonm", "Soyo", "Zanb",
464 // new in ICU 61
465 "Dogr", "Gong", "Maka", "Medf", "Rohg", "Sogd", "Sogo",
466 // new in ICU 64
467 "Elym", "Hmnp", "Nand", "Wcho",
468 // new in ICU 66
469 "Chrs", "Diak", "Kits", "Yezi",
470 // new in ICU 70
471 "Cpmn", "Ougr", "Tnsa", "Toto", "Vith",
472 // new in ICU 72
473 "Kawi", "Nagm",
474 };
475 int32_t j = 0;
476 if(UPRV_LENGTHOF(expectedLong)!=(USCRIPT_CODE_LIMIT-USCRIPT_BALINESE)) {
477 log_err("need to add new script codes in cucdapi.c!\n");
478 return;
479 }
480 for(i=USCRIPT_BALINESE; (UScriptCode)i<USCRIPT_CODE_LIMIT; i++, j++){
481 const char* name = uscript_getName((UScriptCode)i);
482 if(name==NULL || strcmp(name,expectedLong[j])!=0){
483 log_err("uscript_getName failed for code %i: %s!=%s\n", i, name, expectedLong[j]);
484 }
485 name = uscript_getShortName((UScriptCode)i);
486 if(name==NULL || strcmp(name,expectedShort[j])!=0){
487 log_err("uscript_getShortName failed for code %i: %s!=%s\n", i, name, expectedShort[j]);
488 }
489 }
490 for(i=0; i<UPRV_LENGTHOF(expectedLong); i++){
491 UScriptCode fillIn[5] = {USCRIPT_INVALID_CODE};
492 UErrorCode status = U_ZERO_ERROR;
493 int32_t len = 0;
494 len = uscript_getCode(expectedShort[i], fillIn, UPRV_LENGTHOF(fillIn), &status);
495 if(U_FAILURE(status)){
496 log_err("uscript_getCode failed for script name %s. Error: %s\n",expectedShort[i], u_errorName(status));
497 }
498 if(len>1){
499 log_err("uscript_getCode did not return expected number of codes for script %s. EXPECTED: 1 GOT: %i\n", expectedShort[i], len);
500 }
501 if(fillIn[0]!= (UScriptCode)(USCRIPT_BALINESE+i)){
502 log_err("uscript_getCode did not return expected code for script %s. EXPECTED: %i GOT: %i\n", expectedShort[i], (USCRIPT_BALINESE+i), fillIn[0] );
503 }
504 }
505 }
506
507 {
508 /* test characters which have Script_Extensions */
509 UErrorCode errorCode=U_ZERO_ERROR;
510 if(!(
511 USCRIPT_COMMON==uscript_getScript(0x0640, &errorCode) &&
512 USCRIPT_INHERITED==uscript_getScript(0x0650, &errorCode) &&
513 USCRIPT_ARABIC==uscript_getScript(0xfdf2, &errorCode)) ||
514 U_FAILURE(errorCode)
515 ) {
516 log_err("uscript_getScript(character with Script_Extensions) failed\n");
517 }
518 }
519 }
520
TestHasScript()521 void TestHasScript() {
522 if(!(
523 !uscript_hasScript(0x063f, USCRIPT_COMMON) &&
524 uscript_hasScript(0x063f, USCRIPT_ARABIC) && /* main Script value */
525 !uscript_hasScript(0x063f, USCRIPT_SYRIAC) &&
526 !uscript_hasScript(0x063f, USCRIPT_THAANA))
527 ) {
528 log_err("uscript_hasScript(U+063F, ...) is wrong\n");
529 }
530 if(!(
531 !uscript_hasScript(0x0640, USCRIPT_COMMON) && /* main Script value */
532 uscript_hasScript(0x0640, USCRIPT_ARABIC) &&
533 uscript_hasScript(0x0640, USCRIPT_SYRIAC) &&
534 !uscript_hasScript(0x0640, USCRIPT_THAANA))
535 ) {
536 log_err("uscript_hasScript(U+0640, ...) is wrong\n");
537 }
538 if(!(
539 !uscript_hasScript(0x0650, USCRIPT_INHERITED) && /* main Script value */
540 uscript_hasScript(0x0650, USCRIPT_ARABIC) &&
541 uscript_hasScript(0x0650, USCRIPT_SYRIAC) &&
542 !uscript_hasScript(0x0650, USCRIPT_THAANA))
543 ) {
544 log_err("uscript_hasScript(U+0650, ...) is wrong\n");
545 }
546 if(!(
547 !uscript_hasScript(0x0660, USCRIPT_COMMON) && /* main Script value */
548 uscript_hasScript(0x0660, USCRIPT_ARABIC) &&
549 !uscript_hasScript(0x0660, USCRIPT_SYRIAC) &&
550 uscript_hasScript(0x0660, USCRIPT_THAANA))
551 ) {
552 log_err("uscript_hasScript(U+0660, ...) is wrong\n");
553 }
554 if(!(
555 !uscript_hasScript(0xfdf2, USCRIPT_COMMON) &&
556 uscript_hasScript(0xfdf2, USCRIPT_ARABIC) && /* main Script value */
557 !uscript_hasScript(0xfdf2, USCRIPT_SYRIAC) &&
558 uscript_hasScript(0xfdf2, USCRIPT_THAANA))
559 ) {
560 log_err("uscript_hasScript(U+FDF2, ...) is wrong\n");
561 }
562 if(uscript_hasScript(0x0640, 0xaffe)) {
563 /* An unguarded implementation might go into an infinite loop. */
564 log_err("uscript_hasScript(U+0640, bogus 0xaffe) is wrong\n");
565 }
566 }
567
scriptsContain(UScriptCode scripts[],int32_t length,UScriptCode script)568 static UBool scriptsContain(UScriptCode scripts[], int32_t length, UScriptCode script) {
569 UBool contain=false;
570 int32_t prev=-1, i;
571 for(i=0; i<length; ++i) {
572 int32_t s=scripts[i];
573 if(s<=prev) {
574 log_err("uscript_getScriptExtensions() not in sorted order: %d %d\n", (int)prev, (int)s);
575 }
576 if(s==script) { contain=true; }
577 }
578 return contain;
579 }
580
TestGetScriptExtensions()581 void TestGetScriptExtensions() {
582 UScriptCode scripts[20];
583 int32_t length;
584 UErrorCode errorCode;
585
586 /* errors and overflows */
587 errorCode=U_PARSE_ERROR;
588 length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
589 if(errorCode!=U_PARSE_ERROR) {
590 log_err("uscript_getScriptExtensions(U+0640, U_PARSE_ERROR) did not preserve the UErrorCode - %s\n",
591 u_errorName(errorCode));
592 }
593 errorCode=U_ZERO_ERROR;
594 length=uscript_getScriptExtensions(0x0640, NULL, UPRV_LENGTHOF(scripts), &errorCode);
595 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
596 log_err("uscript_getScriptExtensions(U+0640, NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
597 u_errorName(errorCode));
598 }
599 errorCode=U_ZERO_ERROR;
600 length=uscript_getScriptExtensions(0x0640, scripts, -1, &errorCode);
601 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
602 log_err("uscript_getScriptExtensions(U+0640, capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
603 u_errorName(errorCode));
604 }
605 errorCode=U_ZERO_ERROR;
606 length=uscript_getScriptExtensions(0x0640, scripts, 0, &errorCode);
607 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
608 log_err("uscript_getScriptExtensions(U+0640, capacity=0: pure preflighting)=%d < 3 - %s\n",
609 (int)length, u_errorName(errorCode));
610 }
611 errorCode=U_ZERO_ERROR;
612 length=uscript_getScriptExtensions(0x0640, scripts, 1, &errorCode);
613 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
614 log_err("uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d < 3 - %s\n",
615 (int)length, u_errorName(errorCode));
616 }
617 /* U+063F has only a Script code, no Script_Extensions. */
618 errorCode=U_ZERO_ERROR;
619 length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode);
620 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=1) {
621 log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s\n",
622 (int)length, u_errorName(errorCode));
623 }
624
625 /* invalid code points */
626 errorCode=U_ZERO_ERROR;
627 length=uscript_getScriptExtensions(-1, scripts, UPRV_LENGTHOF(scripts), &errorCode);
628 if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
629 log_err("uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s\n",
630 (int)length, u_errorName(errorCode));
631 }
632 errorCode=U_ZERO_ERROR;
633 length=uscript_getScriptExtensions(0x110000, scripts, UPRV_LENGTHOF(scripts), &errorCode);
634 if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
635 log_err("uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s\n",
636 (int)length, u_errorName(errorCode));
637 }
638
639 /* normal usage */
640 errorCode=U_ZERO_ERROR;
641 length=uscript_getScriptExtensions(0x063f, scripts, 1, &errorCode);
642 if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_ARABIC) {
643 log_err("uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s\n",
644 (int)length, u_errorName(errorCode));
645 }
646 errorCode=U_ZERO_ERROR;
647 length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
648 if(U_FAILURE(errorCode) || length<3 ||
649 !scriptsContain(scripts, length, USCRIPT_ARABIC) ||
650 !scriptsContain(scripts, length, USCRIPT_SYRIAC) ||
651 !scriptsContain(scripts, length, USCRIPT_MANDAIC)) {
652 log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n",
653 (int)length, u_errorName(errorCode));
654 }
655 errorCode=U_ZERO_ERROR;
656 length=uscript_getScriptExtensions(0xfdf2, scripts, UPRV_LENGTHOF(scripts), &errorCode);
657 if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_THAANA) {
658 log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n",
659 (int)length, u_errorName(errorCode));
660 }
661 errorCode=U_ZERO_ERROR;
662 length=uscript_getScriptExtensions(0xff65, scripts, UPRV_LENGTHOF(scripts), &errorCode);
663 if(U_FAILURE(errorCode) || length!=6 || scripts[0]!=USCRIPT_BOPOMOFO || scripts[5]!=USCRIPT_YI) {
664 log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n",
665 (int)length, u_errorName(errorCode));
666 }
667 }
668
TestScriptMetadataAPI()669 void TestScriptMetadataAPI() {
670 /* API & code coverage. More testing in intltest/ucdtest.cpp. */
671 UErrorCode errorCode=U_ZERO_ERROR;
672 UChar sample[8];
673
674 if(uscript_getSampleString(USCRIPT_LATIN, sample, UPRV_LENGTHOF(sample), &errorCode)!=1 ||
675 U_FAILURE(errorCode) ||
676 uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN ||
677 sample[1]!=0) {
678 log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode));
679 }
680 sample[0]=0xfffe;
681 if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 ||
682 errorCode!=U_BUFFER_OVERFLOW_ERROR ||
683 sample[0]!=0xfffe) {
684 log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode));
685 }
686 errorCode=U_ZERO_ERROR;
687 if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, UPRV_LENGTHOF(sample), &errorCode)!=0 ||
688 U_FAILURE(errorCode) ||
689 sample[0]!=0) {
690 log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode));
691 }
692 sample[0]=0xfffe;
693 if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 ||
694 errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
695 sample[0]!=0xfffe) {
696 log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode));
697 }
698
699 if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED ||
700 // Unicode 10 gives up on "aspirational".
701 uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_LIMITED_USE ||
702 uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE ||
703 uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED ||
704 uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED ||
705 uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED ||
706 uscript_getUsage(USCRIPT_CODE_LIMIT)!=USCRIPT_USAGE_NOT_ENCODED) {
707 log_err("uscript_getUsage() failed\n");
708 }
709
710 if(uscript_isRightToLeft(USCRIPT_LATIN) ||
711 uscript_isRightToLeft(USCRIPT_CIRTH) ||
712 !uscript_isRightToLeft(USCRIPT_ARABIC) ||
713 !uscript_isRightToLeft(USCRIPT_HEBREW)) {
714 log_err("uscript_isRightToLeft() failed\n");
715 }
716
717 if(uscript_breaksBetweenLetters(USCRIPT_LATIN) ||
718 uscript_breaksBetweenLetters(USCRIPT_CIRTH) ||
719 !uscript_breaksBetweenLetters(USCRIPT_HAN) ||
720 !uscript_breaksBetweenLetters(USCRIPT_THAI)) {
721 log_err("uscript_breaksBetweenLetters() failed\n");
722 }
723
724 if(uscript_isCased(USCRIPT_CIRTH) ||
725 uscript_isCased(USCRIPT_HAN) ||
726 !uscript_isCased(USCRIPT_LATIN) ||
727 !uscript_isCased(USCRIPT_GREEK)) {
728 log_err("uscript_isCased() failed\n");
729 }
730 }
731
TestBinaryValues()732 void TestBinaryValues() {
733 /*
734 * Unicode 5.1 explicitly defines binary property value aliases.
735 * Verify that they are all recognized.
736 */
737 static const char *const falseValues[]={ "N", "No", "F", "False" };
738 static const char *const trueValues[]={ "Y", "Yes", "T", "True" };
739 int32_t i;
740 for(i=0; i<UPRV_LENGTHOF(falseValues); ++i) {
741 if(false!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, falseValues[i])) {
742 log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=false (Are you missing data?)\n", falseValues[i]);
743 }
744 }
745 for(i=0; i<UPRV_LENGTHOF(trueValues); ++i) {
746 if(true!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, trueValues[i])) {
747 log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=true (Are you missing data?)\n", trueValues[i]);
748 }
749 }
750 }
751