1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * Copyright (c) 1997-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 ********************************************************************/
7
8 #include <stdbool.h>
9 #include <string.h>
10 #include "unicode/utypes.h"
11 #include "unicode/uscript.h"
12 #include "unicode/uchar.h"
13 #include "cintltst.h"
14 #include "cucdapi.h"
15 #include "cmemory.h"
16
scriptsToString(const UScriptCode scripts[],int32_t length,char s[])17 static void scriptsToString(const UScriptCode scripts[], int32_t length, char s[]) {
18 int32_t i;
19 if(length == 0) {
20 strcpy(s, "(no scripts)");
21 return;
22 }
23 s[0] = 0;
24 for(i = 0; i < length; ++i) {
25 if(i > 0) {
26 strcat(s, " ");
27 }
28 strcat(s, uscript_getShortName(scripts[i]));
29 }
30 }
31
assertEqualScripts(const char * msg,const UScriptCode scripts1[],int32_t length1,const UScriptCode scripts2[],int32_t length2,UErrorCode errorCode)32 static void assertEqualScripts(const char *msg,
33 const UScriptCode scripts1[], int32_t length1,
34 const UScriptCode scripts2[], int32_t length2,
35 UErrorCode errorCode) {
36 char s1[80];
37 char s2[80];
38 if(U_FAILURE(errorCode)) {
39 log_err("Failed: %s - %s\n", msg, u_errorName(errorCode));
40 return;
41 }
42 scriptsToString(scripts1, length1, s1);
43 scriptsToString(scripts2, length2, s2);
44 if(0!=strcmp(s1, s2)) {
45 log_data_err("Failed: %s: expected %s but got %s\n", msg, s1, s2);
46 }
47 }
48
TestUScriptCodeAPI()49 void TestUScriptCodeAPI(){
50 int i =0;
51 int numErrors =0;
52 {
53 const char* testNames[]={
54 /* test locale */
55 "en", "en_US", "sr", "ta" , "te_IN",
56 "hi", "he", "ar",
57 /* test abbr */
58 "Hani", "Hang","Hebr","Hira",
59 "Knda","Kana","Khmr","Lao",
60 "Latn",/*"Latf","Latg",*/
61 "Mlym", "Mong",
62
63 /* test names */
64 "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN",
65 "GOTHIC", "GREEK", "GUJARATI", "COMMON", "INHERITED",
66 /* test lower case names */
67 "malayalam", "mongolian", "myanmar", "ogham", "old-italic",
68 "oriya", "runic", "sinhala", "syriac","tamil",
69 "telugu", "thaana", "thai", "tibetan",
70 /* test the bounds*/
71 "tagb", "arabic",
72 /* test bogus */
73 "asfdasd", "5464", "12235",
74 /* test the last index */
75 "zyyy", "YI",
76 NULL
77 };
78 UScriptCode expected[] ={
79 /* locales should return */
80 USCRIPT_LATIN, USCRIPT_LATIN, USCRIPT_CYRILLIC, USCRIPT_TAMIL, USCRIPT_TELUGU,
81 USCRIPT_DEVANAGARI, USCRIPT_HEBREW, USCRIPT_ARABIC,
82 /* abbr should return */
83 USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
84 USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
85 USCRIPT_LATIN,/* USCRIPT_LATIN, USCRIPT_LATIN,*/
86 USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
87 /* names should return */
88 USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
89 USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI, USCRIPT_COMMON, USCRIPT_INHERITED,
90 /* lower case names should return */
91 USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN, USCRIPT_MYANMAR, USCRIPT_OGHAM, USCRIPT_OLD_ITALIC,
92 USCRIPT_ORIYA, USCRIPT_RUNIC, USCRIPT_SINHALA, USCRIPT_SYRIAC, USCRIPT_TAMIL,
93 USCRIPT_TELUGU, USCRIPT_THAANA, USCRIPT_THAI, USCRIPT_TIBETAN,
94 /* bounds */
95 USCRIPT_TAGBANWA, USCRIPT_ARABIC,
96 /* bogus names should return invalid code */
97 USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE,
98 USCRIPT_COMMON, USCRIPT_YI,
99 };
100
101 UErrorCode err = U_ZERO_ERROR;
102
103 const int32_t capacity = 10;
104
105 for( ; testNames[i]!=NULL; i++){
106 UScriptCode script[10]={USCRIPT_INVALID_CODE};
107 uscript_getCode(testNames[i],script,capacity, &err);
108 if( script[0] != expected[i]){
109 log_data_err("Error getting script code Got: %i Expected: %i for name %s (Error code does not propagate if data is not present. Are you missing data?)\n",
110 script[0],expected[i],testNames[i]);
111 numErrors++;
112 }
113 }
114 if(numErrors >0 ){
115 log_data_err("Errors uchar_getScriptCode() : %i \n",numErrors);
116 }
117 }
118
119 {
120 UErrorCode err = U_ZERO_ERROR;
121 int32_t capacity=0;
122 int32_t j;
123 UScriptCode jaCode[]={USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
124 UScriptCode script[10]={USCRIPT_INVALID_CODE};
125 int32_t num = uscript_getCode("ja",script,capacity, &err);
126 /* preflight */
127 if(err==U_BUFFER_OVERFLOW_ERROR){
128 err = U_ZERO_ERROR;
129 capacity = 10;
130 num = uscript_getCode("ja",script,capacity, &err);
131 if(num!=UPRV_LENGTHOF(jaCode)){
132 log_err("Errors uscript_getScriptCode() for Japanese locale: num=%d, expected %d \n",
133 num, UPRV_LENGTHOF(jaCode));
134 }
135 for(j=0;j<UPRV_LENGTHOF(jaCode);j++) {
136 if(script[j]!=jaCode[j]) {
137 log_err("Japanese locale: code #%d was %d (%s) but expected %d (%s)\n", j,
138 script[j], uscript_getName(script[j]),
139 jaCode[j], uscript_getName(jaCode[j]));
140
141 }
142 }
143 }else{
144 log_data_err("Errors in uscript_getScriptCode() expected error : %s got: %s \n",
145 "U_BUFFER_OVERFLOW_ERROR",
146 u_errorName(err));
147 }
148
149 }
150 {
151 static const UScriptCode LATIN[1] = { USCRIPT_LATIN };
152 static const UScriptCode CYRILLIC[1] = { USCRIPT_CYRILLIC };
153 static const UScriptCode DEVANAGARI[1] = { USCRIPT_DEVANAGARI };
154 static const UScriptCode HAN[1] = { USCRIPT_HAN };
155 static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
156 static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
157 static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
158 UScriptCode scripts[5];
159 UErrorCode err;
160 int32_t num;
161
162 // Should work regardless of whether we have locale data for the language.
163 err = U_ZERO_ERROR;
164 num = uscript_getCode("tg", scripts, UPRV_LENGTHOF(scripts), &err);
165 assertEqualScripts("tg script: Cyrl", CYRILLIC, 1, scripts, num, err); // Tajik
166 err = U_ZERO_ERROR;
167 num = uscript_getCode("xsr", scripts, UPRV_LENGTHOF(scripts), &err);
168 assertEqualScripts("xsr script: Deva", DEVANAGARI, 1, scripts, num, err); // Sherpa
169
170 // Multi-script languages.
171 err = U_ZERO_ERROR;
172 num = uscript_getCode("ja", scripts, UPRV_LENGTHOF(scripts), &err);
173 assertEqualScripts("ja scripts: Kana Hira Hani",
174 JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, num, err);
175 err = U_ZERO_ERROR;
176 num = uscript_getCode("ko", scripts, UPRV_LENGTHOF(scripts), &err);
177 assertEqualScripts("ko scripts: Hang Hani",
178 KOREAN, UPRV_LENGTHOF(KOREAN), scripts, num, err);
179 err = U_ZERO_ERROR;
180 num = uscript_getCode("zh", scripts, UPRV_LENGTHOF(scripts), &err);
181 assertEqualScripts("zh script: Hani", HAN, 1, scripts, num, err);
182 err = U_ZERO_ERROR;
183 num = uscript_getCode("zh-Hant", scripts, UPRV_LENGTHOF(scripts), &err);
184 assertEqualScripts("zh-Hant scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
185 err = U_ZERO_ERROR;
186 num = uscript_getCode("zh-TW", scripts, UPRV_LENGTHOF(scripts), &err);
187 assertEqualScripts("zh-TW scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
188
189 // Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro).
190 err = U_ZERO_ERROR;
191 num = uscript_getCode("ro-RO", scripts, UPRV_LENGTHOF(scripts), &err);
192 assertEqualScripts("ro-RO script: Latn", LATIN, 1, scripts, num, err);
193 }
194
195 {
196 UScriptCode testAbbr[]={
197 /* names should return */
198 USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
199 USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI,
200 };
201
202 const char* expectedNames[]={
203
204 /* test names */
205 "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian",
206 "Gothic", "Greek", "Gujarati",
207 NULL
208 };
209 i=0;
210 while(i<UPRV_LENGTHOF(testAbbr)){
211 const char* name = uscript_getName(testAbbr[i]);
212 if(name == NULL) {
213 log_data_err("Couldn't get script name\n");
214 return;
215 }
216 numErrors=0;
217 if(strcmp(expectedNames[i],name)!=0){
218 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedNames[i]);
219 numErrors++;
220 }
221 if(numErrors > 0){
222 if(numErrors >0 ){
223 log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
224 }
225 }
226 i++;
227 }
228
229 }
230
231 {
232 UScriptCode testAbbr[]={
233 /* abbr should return */
234 USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
235 USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
236 USCRIPT_LATIN,
237 USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
238 };
239
240 const char* expectedAbbr[]={
241 /* test abbr */
242 "Hani", "Hang","Hebr","Hira",
243 "Knda","Kana","Khmr","Laoo",
244 "Latn",
245 "Mlym", "Mong",
246 NULL
247 };
248 i=0;
249 while(i<UPRV_LENGTHOF(testAbbr)){
250 const char* name = uscript_getShortName(testAbbr[i]);
251 numErrors=0;
252 if(strcmp(expectedAbbr[i],name)!=0){
253 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedAbbr[i]);
254 numErrors++;
255 }
256 if(numErrors > 0){
257 if(numErrors >0 ){
258 log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
259 }
260 }
261 i++;
262 }
263
264 }
265 /* now test uscript_getScript() API */
266 {
267 uint32_t codepoints[] = {
268 0x0000FF9D, /* USCRIPT_KATAKANA*/
269 0x0000FFBE, /* USCRIPT_HANGUL*/
270 0x0000FFC7, /* USCRIPT_HANGUL*/
271 0x0000FFCF, /* USCRIPT_HANGUL*/
272 0x0000FFD7, /* USCRIPT_HANGUL*/
273 0x0000FFDC, /* USCRIPT_HANGUL*/
274 0x00010300, /* USCRIPT_OLD_ITALIC*/
275 0x00010330, /* USCRIPT_GOTHIC*/
276 0x0001034A, /* USCRIPT_GOTHIC*/
277 0x00010400, /* USCRIPT_DESERET*/
278 0x00010428, /* USCRIPT_DESERET*/
279 0x0001D167, /* USCRIPT_INHERITED*/
280 0x0001D17B, /* USCRIPT_INHERITED*/
281 0x0001D185, /* USCRIPT_INHERITED*/
282 0x0001D1AA, /* USCRIPT_INHERITED*/
283 0x00020000, /* USCRIPT_HAN*/
284 0x00000D02, /* USCRIPT_MALAYALAM*/
285 0x00050005, /* USCRIPT_UNKNOWN (new Zzzz value in Unicode 5.0) */
286 0x00000000, /* USCRIPT_COMMON*/
287 0x0001D169, /* USCRIPT_INHERITED*/
288 0x0001D182, /* USCRIPT_INHERITED*/
289 0x0001D18B, /* USCRIPT_INHERITED*/
290 0x0001D1AD, /* USCRIPT_INHERITED*/
291 };
292
293 UScriptCode expected[] = {
294 USCRIPT_KATAKANA ,
295 USCRIPT_HANGUL ,
296 USCRIPT_HANGUL ,
297 USCRIPT_HANGUL ,
298 USCRIPT_HANGUL ,
299 USCRIPT_HANGUL ,
300 USCRIPT_OLD_ITALIC,
301 USCRIPT_GOTHIC ,
302 USCRIPT_GOTHIC ,
303 USCRIPT_DESERET ,
304 USCRIPT_DESERET ,
305 USCRIPT_INHERITED,
306 USCRIPT_INHERITED,
307 USCRIPT_INHERITED,
308 USCRIPT_INHERITED,
309 USCRIPT_HAN ,
310 USCRIPT_MALAYALAM,
311 USCRIPT_UNKNOWN,
312 USCRIPT_COMMON,
313 USCRIPT_INHERITED ,
314 USCRIPT_INHERITED ,
315 USCRIPT_INHERITED ,
316 USCRIPT_INHERITED ,
317 };
318 UScriptCode code = USCRIPT_INVALID_CODE;
319 UErrorCode status = U_ZERO_ERROR;
320 UBool passed = true;
321
322 for(i=0; i<UPRV_LENGTHOF(codepoints); ++i){
323 code = uscript_getScript(codepoints[i],&status);
324 if(U_SUCCESS(status)){
325 if( code != expected[i] ||
326 code != (UScriptCode)u_getIntPropertyValue(codepoints[i], UCHAR_SCRIPT)
327 ) {
328 log_err("uscript_getScript for codepoint \\U%08X failed\n",codepoints[i]);
329 passed = false;
330 }
331 }else{
332 log_err("uscript_getScript for codepoint \\U%08X failed. Error: %s\n",
333 codepoints[i],u_errorName(status));
334 break;
335 }
336 }
337
338 if(passed==false){
339 log_err("uscript_getScript failed.\n");
340 }
341 }
342 {
343 UScriptCode code= USCRIPT_INVALID_CODE;
344 UErrorCode status = U_ZERO_ERROR;
345 code = uscript_getScript(0x001D169,&status);
346 if(code != USCRIPT_INHERITED){
347 log_err("\\U001D169 is not contained in USCRIPT_INHERITED");
348 }
349 }
350 {
351 UScriptCode code= USCRIPT_INVALID_CODE;
352 UErrorCode status = U_ZERO_ERROR;
353 int32_t err = 0;
354
355 for(i = 0; i<=0x10ffff; i++){
356 code = uscript_getScript(i,&status);
357 if(code == USCRIPT_INVALID_CODE){
358 err++;
359 log_err("uscript_getScript for codepoint \\U%08X failed.\n", i);
360 }
361 }
362 if(err>0){
363 log_err("uscript_getScript failed for %d codepoints\n", err);
364 }
365 }
366 {
367 for(i=0; (UScriptCode)i< USCRIPT_CODE_LIMIT; i++){
368 const char* name = uscript_getName((UScriptCode)i);
369 if(name==NULL || strcmp(name,"")==0){
370 log_err("uscript_getName failed for code %i: name is NULL or \"\"\n",i);
371 }
372 }
373 }
374
375 {
376 /*
377 * These script codes were originally added to ICU pre-3.6, so that ICU would
378 * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
379 * These script codes were added with only short names because we don't
380 * want to invent long names ourselves.
381 * Unicode 5 and later encode some of these scripts and give them long names.
382 * Whenever this happens, the long script names here need to be updated.
383 */
384 static const char* expectedLong[] = {
385 "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs",
386 "Egyd", "Egyh", "Egyptian_Hieroglyphs",
387 "Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds",
388 "Javanese", "Kayah_Li", "Latf", "Latg",
389 "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs",
390 "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician",
391 "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
392 "Zxxx", "Unknown",
393 "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese",
394 "Moon", "Meetei_Mayek",
395 /* new in ICU 4.0 */
396 "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
397 "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv",
398 "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
399 "Zmth", "Zsym",
400 /* new in ICU 4.4 */
401 "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
402 /* new in ICU 4.6 */
403 "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel",
404 "Loma", "Mende_Kikakui", "Meroitic_Cursive",
405 "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi",
406 /* new in ICU 4.8 */
407 "Afak", "Jurc", "Mro", "Nushu", "Sharada", "Sora_Sompeng", "Takri", "Tangut", "Wole",
408 /* new in ICU 49 */
409 "Anatolian_Hieroglyphs", "Khojki", "Tirhuta",
410 /* new in ICU 52 */
411 "Caucasian_Albanian", "Mahajani",
412 /* new in ICU 54 */
413 "Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham",
414 // new in ICU 58
415 "Adlam", "Bhaiksuki", "Marchen", "Newa", "Osage", "Hanb", "Jamo", "Zsye",
416 // new in ICU 60
417 "Masaram_Gondi", "Soyombo", "Zanabazar_Square",
418 // new in ICU 61
419 "Dogra", "Gunjala_Gondi", "Makasar", "Medefaidrin",
420 "Hanifi_Rohingya", "Sogdian", "Old_Sogdian",
421 // new in ICU 64
422 "Elymaic", "Nyiakeng_Puachue_Hmong", "Nandinagari", "Wancho",
423 // new in ICU 66
424 "Chorasmian", "Dives_Akuru", "Khitan_Small_Script", "Yezidi",
425 // new in ICU 70
426 "Cypro_Minoan", "Old_Uyghur", "Tangsa", "Toto", "Vithkuqi",
427 // new in ICU 72
428 "Kawi", "Nag_Mundari",
429 };
430 static const char* expectedShort[] = {
431 "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
432 "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
433 "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
434 "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
435 "Zxxx", "Zzzz",
436 "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
437 "Moon", "Mtei",
438 /* new in ICU 4.0 */
439 "Armi", "Avst", "Cakm", "Kore",
440 "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
441 "Zmth", "Zsym",
442 /* new in ICU 4.4 */
443 "Bamu", "Lisu", "Nkgb", "Sarb",
444 /* new in ICU 4.6 */
445 "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
446 "Narb", "Nbat", "Palm", "Sind", "Wara",
447 /* new in ICU 4.8 */
448 "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
449 /* new in ICU 49 */
450 "Hluw", "Khoj", "Tirh",
451 /* new in ICU 52 */
452 "Aghb", "Mahj",
453 /* new in ICU 54 */
454 "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd",
455 // new in ICU 58
456 "Adlm", "Bhks", "Marc", "Newa", "Osge", "Hanb", "Jamo", "Zsye",
457 // new in ICU 60
458 "Gonm", "Soyo", "Zanb",
459 // new in ICU 61
460 "Dogr", "Gong", "Maka", "Medf", "Rohg", "Sogd", "Sogo",
461 // new in ICU 64
462 "Elym", "Hmnp", "Nand", "Wcho",
463 // new in ICU 66
464 "Chrs", "Diak", "Kits", "Yezi",
465 // new in ICU 70
466 "Cpmn", "Ougr", "Tnsa", "Toto", "Vith",
467 // new in ICU 72
468 "Kawi", "Nagm",
469 };
470 int32_t j = 0;
471 if(UPRV_LENGTHOF(expectedLong)!=(USCRIPT_CODE_LIMIT-USCRIPT_BALINESE)) {
472 log_err("need to add new script codes in cucdapi.c!\n");
473 return;
474 }
475 for(i=USCRIPT_BALINESE; (UScriptCode)i<USCRIPT_CODE_LIMIT; i++, j++){
476 const char* name = uscript_getName((UScriptCode)i);
477 if(name==NULL || strcmp(name,expectedLong[j])!=0){
478 log_err("uscript_getName failed for code %i: %s!=%s\n", i, name, expectedLong[j]);
479 }
480 name = uscript_getShortName((UScriptCode)i);
481 if(name==NULL || strcmp(name,expectedShort[j])!=0){
482 log_err("uscript_getShortName failed for code %i: %s!=%s\n", i, name, expectedShort[j]);
483 }
484 }
485 for(i=0; i<UPRV_LENGTHOF(expectedLong); i++){
486 UScriptCode fillIn[5] = {USCRIPT_INVALID_CODE};
487 UErrorCode status = U_ZERO_ERROR;
488 int32_t len = 0;
489 len = uscript_getCode(expectedShort[i], fillIn, UPRV_LENGTHOF(fillIn), &status);
490 if(U_FAILURE(status)){
491 log_err("uscript_getCode failed for script name %s. Error: %s\n",expectedShort[i], u_errorName(status));
492 }
493 if(len>1){
494 log_err("uscript_getCode did not return expected number of codes for script %s. EXPECTED: 1 GOT: %i\n", expectedShort[i], len);
495 }
496 if(fillIn[0]!= (UScriptCode)(USCRIPT_BALINESE+i)){
497 log_err("uscript_getCode did not return expected code for script %s. EXPECTED: %i GOT: %i\n", expectedShort[i], (USCRIPT_BALINESE+i), fillIn[0] );
498 }
499 }
500 }
501
502 {
503 /* test characters which have Script_Extensions */
504 UErrorCode errorCode=U_ZERO_ERROR;
505 if(!(
506 USCRIPT_COMMON==uscript_getScript(0x0640, &errorCode) &&
507 USCRIPT_INHERITED==uscript_getScript(0x0650, &errorCode) &&
508 USCRIPT_ARABIC==uscript_getScript(0xfdf2, &errorCode)) ||
509 U_FAILURE(errorCode)
510 ) {
511 log_err("uscript_getScript(character with Script_Extensions) failed\n");
512 }
513 }
514 }
515
TestHasScript()516 void TestHasScript() {
517 if(!(
518 !uscript_hasScript(0x063f, USCRIPT_COMMON) &&
519 uscript_hasScript(0x063f, USCRIPT_ARABIC) && /* main Script value */
520 !uscript_hasScript(0x063f, USCRIPT_SYRIAC) &&
521 !uscript_hasScript(0x063f, USCRIPT_THAANA))
522 ) {
523 log_err("uscript_hasScript(U+063F, ...) is wrong\n");
524 }
525 if(!(
526 !uscript_hasScript(0x0640, USCRIPT_COMMON) && /* main Script value */
527 uscript_hasScript(0x0640, USCRIPT_ARABIC) &&
528 uscript_hasScript(0x0640, USCRIPT_SYRIAC) &&
529 !uscript_hasScript(0x0640, USCRIPT_THAANA))
530 ) {
531 log_err("uscript_hasScript(U+0640, ...) is wrong\n");
532 }
533 if(!(
534 !uscript_hasScript(0x0650, USCRIPT_INHERITED) && /* main Script value */
535 uscript_hasScript(0x0650, USCRIPT_ARABIC) &&
536 uscript_hasScript(0x0650, USCRIPT_SYRIAC) &&
537 !uscript_hasScript(0x0650, USCRIPT_THAANA))
538 ) {
539 log_err("uscript_hasScript(U+0650, ...) is wrong\n");
540 }
541 if(!(
542 !uscript_hasScript(0x0660, USCRIPT_COMMON) && /* main Script value */
543 uscript_hasScript(0x0660, USCRIPT_ARABIC) &&
544 !uscript_hasScript(0x0660, USCRIPT_SYRIAC) &&
545 uscript_hasScript(0x0660, USCRIPT_THAANA))
546 ) {
547 log_err("uscript_hasScript(U+0660, ...) is wrong\n");
548 }
549 if(!(
550 !uscript_hasScript(0xfdf2, USCRIPT_COMMON) &&
551 uscript_hasScript(0xfdf2, USCRIPT_ARABIC) && /* main Script value */
552 !uscript_hasScript(0xfdf2, USCRIPT_SYRIAC) &&
553 uscript_hasScript(0xfdf2, USCRIPT_THAANA))
554 ) {
555 log_err("uscript_hasScript(U+FDF2, ...) is wrong\n");
556 }
557 if(uscript_hasScript(0x0640, 0xaffe)) {
558 /* An unguarded implementation might go into an infinite loop. */
559 log_err("uscript_hasScript(U+0640, bogus 0xaffe) is wrong\n");
560 }
561 }
562
scriptsContain(UScriptCode scripts[],int32_t length,UScriptCode script)563 static UBool scriptsContain(UScriptCode scripts[], int32_t length, UScriptCode script) {
564 UBool contain=false;
565 int32_t prev=-1, i;
566 for(i=0; i<length; ++i) {
567 int32_t s=scripts[i];
568 if(s<=prev) {
569 log_err("uscript_getScriptExtensions() not in sorted order: %d %d\n", (int)prev, (int)s);
570 }
571 if(s==script) { contain=true; }
572 }
573 return contain;
574 }
575
TestGetScriptExtensions()576 void TestGetScriptExtensions() {
577 UScriptCode scripts[20];
578 int32_t length;
579 UErrorCode errorCode;
580
581 /* errors and overflows */
582 errorCode=U_PARSE_ERROR;
583 length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
584 if(errorCode!=U_PARSE_ERROR) {
585 log_err("uscript_getScriptExtensions(U+0640, U_PARSE_ERROR) did not preserve the UErrorCode - %s\n",
586 u_errorName(errorCode));
587 }
588 errorCode=U_ZERO_ERROR;
589 length=uscript_getScriptExtensions(0x0640, NULL, UPRV_LENGTHOF(scripts), &errorCode);
590 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
591 log_err("uscript_getScriptExtensions(U+0640, NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
592 u_errorName(errorCode));
593 }
594 errorCode=U_ZERO_ERROR;
595 length=uscript_getScriptExtensions(0x0640, scripts, -1, &errorCode);
596 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
597 log_err("uscript_getScriptExtensions(U+0640, capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
598 u_errorName(errorCode));
599 }
600 errorCode=U_ZERO_ERROR;
601 length=uscript_getScriptExtensions(0x0640, scripts, 0, &errorCode);
602 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
603 log_err("uscript_getScriptExtensions(U+0640, capacity=0: pure preflighting)=%d < 3 - %s\n",
604 (int)length, u_errorName(errorCode));
605 }
606 errorCode=U_ZERO_ERROR;
607 length=uscript_getScriptExtensions(0x0640, scripts, 1, &errorCode);
608 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
609 log_err("uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d < 3 - %s\n",
610 (int)length, u_errorName(errorCode));
611 }
612 /* U+063F has only a Script code, no Script_Extensions. */
613 errorCode=U_ZERO_ERROR;
614 length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode);
615 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=1) {
616 log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s\n",
617 (int)length, u_errorName(errorCode));
618 }
619
620 /* invalid code points */
621 errorCode=U_ZERO_ERROR;
622 length=uscript_getScriptExtensions(-1, scripts, UPRV_LENGTHOF(scripts), &errorCode);
623 if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
624 log_err("uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s\n",
625 (int)length, u_errorName(errorCode));
626 }
627 errorCode=U_ZERO_ERROR;
628 length=uscript_getScriptExtensions(0x110000, scripts, UPRV_LENGTHOF(scripts), &errorCode);
629 if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
630 log_err("uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s\n",
631 (int)length, u_errorName(errorCode));
632 }
633
634 /* normal usage */
635 errorCode=U_ZERO_ERROR;
636 length=uscript_getScriptExtensions(0x063f, scripts, 1, &errorCode);
637 if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_ARABIC) {
638 log_err("uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s\n",
639 (int)length, u_errorName(errorCode));
640 }
641 errorCode=U_ZERO_ERROR;
642 length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
643 if(U_FAILURE(errorCode) || length<3 ||
644 !scriptsContain(scripts, length, USCRIPT_ARABIC) ||
645 !scriptsContain(scripts, length, USCRIPT_SYRIAC) ||
646 !scriptsContain(scripts, length, USCRIPT_MANDAIC)) {
647 log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n",
648 (int)length, u_errorName(errorCode));
649 }
650 errorCode=U_ZERO_ERROR;
651 length=uscript_getScriptExtensions(0xfdf2, scripts, UPRV_LENGTHOF(scripts), &errorCode);
652 if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_THAANA) {
653 log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n",
654 (int)length, u_errorName(errorCode));
655 }
656 errorCode=U_ZERO_ERROR;
657 length=uscript_getScriptExtensions(0xff65, scripts, UPRV_LENGTHOF(scripts), &errorCode);
658 if(U_FAILURE(errorCode) || length!=6 || scripts[0]!=USCRIPT_BOPOMOFO || scripts[5]!=USCRIPT_YI) {
659 log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n",
660 (int)length, u_errorName(errorCode));
661 }
662 }
663
TestScriptMetadataAPI()664 void TestScriptMetadataAPI() {
665 /* API & code coverage. More testing in intltest/ucdtest.cpp. */
666 UErrorCode errorCode=U_ZERO_ERROR;
667 UChar sample[8];
668
669 if(uscript_getSampleString(USCRIPT_LATIN, sample, UPRV_LENGTHOF(sample), &errorCode)!=1 ||
670 U_FAILURE(errorCode) ||
671 uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN ||
672 sample[1]!=0) {
673 log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode));
674 }
675 sample[0]=0xfffe;
676 if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 ||
677 errorCode!=U_BUFFER_OVERFLOW_ERROR ||
678 sample[0]!=0xfffe) {
679 log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode));
680 }
681 errorCode=U_ZERO_ERROR;
682 if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, UPRV_LENGTHOF(sample), &errorCode)!=0 ||
683 U_FAILURE(errorCode) ||
684 sample[0]!=0) {
685 log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode));
686 }
687 sample[0]=0xfffe;
688 if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 ||
689 errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
690 sample[0]!=0xfffe) {
691 log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode));
692 }
693
694 if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED ||
695 // Unicode 10 gives up on "aspirational".
696 uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_LIMITED_USE ||
697 uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE ||
698 uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED ||
699 uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED ||
700 uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED ||
701 uscript_getUsage(USCRIPT_CODE_LIMIT)!=USCRIPT_USAGE_NOT_ENCODED) {
702 log_err("uscript_getUsage() failed\n");
703 }
704
705 if(uscript_isRightToLeft(USCRIPT_LATIN) ||
706 uscript_isRightToLeft(USCRIPT_CIRTH) ||
707 !uscript_isRightToLeft(USCRIPT_ARABIC) ||
708 !uscript_isRightToLeft(USCRIPT_HEBREW)) {
709 log_err("uscript_isRightToLeft() failed\n");
710 }
711
712 if(uscript_breaksBetweenLetters(USCRIPT_LATIN) ||
713 uscript_breaksBetweenLetters(USCRIPT_CIRTH) ||
714 !uscript_breaksBetweenLetters(USCRIPT_HAN) ||
715 !uscript_breaksBetweenLetters(USCRIPT_THAI)) {
716 log_err("uscript_breaksBetweenLetters() failed\n");
717 }
718
719 if(uscript_isCased(USCRIPT_CIRTH) ||
720 uscript_isCased(USCRIPT_HAN) ||
721 !uscript_isCased(USCRIPT_LATIN) ||
722 !uscript_isCased(USCRIPT_GREEK)) {
723 log_err("uscript_isCased() failed\n");
724 }
725 }
726
TestBinaryValues()727 void TestBinaryValues() {
728 /*
729 * Unicode 5.1 explicitly defines binary property value aliases.
730 * Verify that they are all recognized.
731 */
732 static const char *const falseValues[]={ "N", "No", "F", "False" };
733 static const char *const trueValues[]={ "Y", "Yes", "T", "True" };
734 int32_t i;
735 for(i=0; i<UPRV_LENGTHOF(falseValues); ++i) {
736 if(false!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, falseValues[i])) {
737 log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=false (Are you missing data?)\n", falseValues[i]);
738 }
739 }
740 for(i=0; i<UPRV_LENGTHOF(trueValues); ++i) {
741 if(true!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, trueValues[i])) {
742 log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=true (Are you missing data?)\n", trueValues[i]);
743 }
744 }
745 }
746