1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * Copyright (c) 1997-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 ********************************************************************/
7
8 #include <string.h>
9 #include "unicode/utypes.h"
10 #include "unicode/uscript.h"
11 #include "unicode/uchar.h"
12 #include "cintltst.h"
13 #include "cucdapi.h"
14 #include "cmemory.h"
15
scriptsToString(const UScriptCode scripts[],int32_t length,char s[])16 static void scriptsToString(const UScriptCode scripts[], int32_t length, char s[]) {
17 int32_t i;
18 if(length == 0) {
19 strcpy(s, "(no scripts)");
20 return;
21 }
22 s[0] = 0;
23 for(i = 0; i < length; ++i) {
24 if(i > 0) {
25 strcat(s, " ");
26 }
27 strcat(s, uscript_getShortName(scripts[i]));
28 }
29 }
30
assertEqualScripts(const char * msg,const UScriptCode scripts1[],int32_t length1,const UScriptCode scripts2[],int32_t length2,UErrorCode errorCode)31 static void assertEqualScripts(const char *msg,
32 const UScriptCode scripts1[], int32_t length1,
33 const UScriptCode scripts2[], int32_t length2,
34 UErrorCode errorCode) {
35 char s1[80];
36 char s2[80];
37 if(U_FAILURE(errorCode)) {
38 log_err("Failed: %s - %s\n", msg, u_errorName(errorCode));
39 return;
40 }
41 scriptsToString(scripts1, length1, s1);
42 scriptsToString(scripts2, length2, s2);
43 if(0!=strcmp(s1, s2)) {
44 log_data_err("Failed: %s: expected %s but got %s\n", msg, s1, s2);
45 }
46 }
47
TestUScriptCodeAPI()48 void TestUScriptCodeAPI(){
49 int i =0;
50 int numErrors =0;
51 {
52 const char* testNames[]={
53 /* test locale */
54 "en", "en_US", "sr", "ta" , "te_IN",
55 "hi", "he", "ar",
56 /* test abbr */
57 "Hani", "Hang","Hebr","Hira",
58 "Knda","Kana","Khmr","Lao",
59 "Latn",/*"Latf","Latg",*/
60 "Mlym", "Mong",
61
62 /* test names */
63 "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN",
64 "GOTHIC", "GREEK", "GUJARATI", "COMMON", "INHERITED",
65 /* test lower case names */
66 "malayalam", "mongolian", "myanmar", "ogham", "old-italic",
67 "oriya", "runic", "sinhala", "syriac","tamil",
68 "telugu", "thaana", "thai", "tibetan",
69 /* test the bounds*/
70 "tagb", "arabic",
71 /* test bogus */
72 "asfdasd", "5464", "12235",
73 /* test the last index */
74 "zyyy", "YI",
75 NULL
76 };
77 UScriptCode expected[] ={
78 /* locales should return */
79 USCRIPT_LATIN, USCRIPT_LATIN, USCRIPT_CYRILLIC, USCRIPT_TAMIL, USCRIPT_TELUGU,
80 USCRIPT_DEVANAGARI, USCRIPT_HEBREW, USCRIPT_ARABIC,
81 /* abbr should return */
82 USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
83 USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
84 USCRIPT_LATIN,/* USCRIPT_LATIN, USCRIPT_LATIN,*/
85 USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
86 /* names should return */
87 USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
88 USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI, USCRIPT_COMMON, USCRIPT_INHERITED,
89 /* lower case names should return */
90 USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN, USCRIPT_MYANMAR, USCRIPT_OGHAM, USCRIPT_OLD_ITALIC,
91 USCRIPT_ORIYA, USCRIPT_RUNIC, USCRIPT_SINHALA, USCRIPT_SYRIAC, USCRIPT_TAMIL,
92 USCRIPT_TELUGU, USCRIPT_THAANA, USCRIPT_THAI, USCRIPT_TIBETAN,
93 /* bounds */
94 USCRIPT_TAGBANWA, USCRIPT_ARABIC,
95 /* bogus names should return invalid code */
96 USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE,
97 USCRIPT_COMMON, USCRIPT_YI,
98 };
99
100 UErrorCode err = U_ZERO_ERROR;
101
102 const int32_t capacity = 10;
103
104 for( ; testNames[i]!=NULL; i++){
105 UScriptCode script[10]={USCRIPT_INVALID_CODE};
106 uscript_getCode(testNames[i],script,capacity, &err);
107 if( script[0] != expected[i]){
108 log_data_err("Error getting script code Got: %i Expected: %i for name %s (Error code does not propagate if data is not present. Are you missing data?)\n",
109 script[0],expected[i],testNames[i]);
110 numErrors++;
111 }
112 }
113 if(numErrors >0 ){
114 log_data_err("Errors uchar_getScriptCode() : %i \n",numErrors);
115 }
116 }
117
118 {
119 UErrorCode err = U_ZERO_ERROR;
120 int32_t capacity=0;
121 int32_t j;
122 UScriptCode jaCode[]={USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
123 UScriptCode script[10]={USCRIPT_INVALID_CODE};
124 int32_t num = uscript_getCode("ja",script,capacity, &err);
125 /* preflight */
126 if(err==U_BUFFER_OVERFLOW_ERROR){
127 err = U_ZERO_ERROR;
128 capacity = 10;
129 num = uscript_getCode("ja",script,capacity, &err);
130 if(num!=UPRV_LENGTHOF(jaCode)){
131 log_err("Errors uscript_getScriptCode() for Japanese locale: num=%d, expected %d \n",
132 num, UPRV_LENGTHOF(jaCode));
133 }
134 for(j=0;j<UPRV_LENGTHOF(jaCode);j++) {
135 if(script[j]!=jaCode[j]) {
136 log_err("Japanese locale: code #%d was %d (%s) but expected %d (%s)\n", j,
137 script[j], uscript_getName(script[j]),
138 jaCode[j], uscript_getName(jaCode[j]));
139
140 }
141 }
142 }else{
143 log_data_err("Errors in uscript_getScriptCode() expected error : %s got: %s \n",
144 "U_BUFFER_OVERFLOW_ERROR",
145 u_errorName(err));
146 }
147
148 }
149 {
150 static const UScriptCode LATIN[1] = { USCRIPT_LATIN };
151 static const UScriptCode CYRILLIC[1] = { USCRIPT_CYRILLIC };
152 static const UScriptCode DEVANAGARI[1] = { USCRIPT_DEVANAGARI };
153 static const UScriptCode HAN[1] = { USCRIPT_HAN };
154 static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
155 static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
156 static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
157 UScriptCode scripts[5];
158 UErrorCode err;
159 int32_t num;
160
161 // Should work regardless of whether we have locale data for the language.
162 err = U_ZERO_ERROR;
163 num = uscript_getCode("tg", scripts, UPRV_LENGTHOF(scripts), &err);
164 assertEqualScripts("tg script: Cyrl", CYRILLIC, 1, scripts, num, err); // Tajik
165 err = U_ZERO_ERROR;
166 num = uscript_getCode("xsr", scripts, UPRV_LENGTHOF(scripts), &err);
167 assertEqualScripts("xsr script: Deva", DEVANAGARI, 1, scripts, num, err); // Sherpa
168
169 // Multi-script languages.
170 err = U_ZERO_ERROR;
171 num = uscript_getCode("ja", scripts, UPRV_LENGTHOF(scripts), &err);
172 assertEqualScripts("ja scripts: Kana Hira Hani",
173 JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, num, err);
174 err = U_ZERO_ERROR;
175 num = uscript_getCode("ko", scripts, UPRV_LENGTHOF(scripts), &err);
176 assertEqualScripts("ko scripts: Hang Hani",
177 KOREAN, UPRV_LENGTHOF(KOREAN), scripts, num, err);
178 err = U_ZERO_ERROR;
179 num = uscript_getCode("zh", scripts, UPRV_LENGTHOF(scripts), &err);
180 assertEqualScripts("zh script: Hani", HAN, 1, scripts, num, err);
181 err = U_ZERO_ERROR;
182 num = uscript_getCode("zh-Hant", scripts, UPRV_LENGTHOF(scripts), &err);
183 assertEqualScripts("zh-Hant scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
184 err = U_ZERO_ERROR;
185 num = uscript_getCode("zh-TW", scripts, UPRV_LENGTHOF(scripts), &err);
186 assertEqualScripts("zh-TW scripts: Hani Bopo", HAN_BOPO, 2, scripts, num, err);
187
188 // Ambiguous API, but this probably wants to return Latin rather than Rongorongo (Roro).
189 err = U_ZERO_ERROR;
190 num = uscript_getCode("ro-RO", scripts, UPRV_LENGTHOF(scripts), &err);
191 assertEqualScripts("ro-RO script: Latn", LATIN, 1, scripts, num, err);
192 }
193
194 {
195 UScriptCode testAbbr[]={
196 /* names should return */
197 USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
198 USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI,
199 };
200
201 const char* expectedNames[]={
202
203 /* test names */
204 "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian",
205 "Gothic", "Greek", "Gujarati",
206 NULL
207 };
208 i=0;
209 while(i<UPRV_LENGTHOF(testAbbr)){
210 const char* name = uscript_getName(testAbbr[i]);
211 if(name == NULL) {
212 log_data_err("Couldn't get script name\n");
213 return;
214 }
215 numErrors=0;
216 if(strcmp(expectedNames[i],name)!=0){
217 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedNames[i]);
218 numErrors++;
219 }
220 if(numErrors > 0){
221 if(numErrors >0 ){
222 log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
223 }
224 }
225 i++;
226 }
227
228 }
229
230 {
231 UScriptCode testAbbr[]={
232 /* abbr should return */
233 USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
234 USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
235 USCRIPT_LATIN,
236 USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
237 };
238
239 const char* expectedAbbr[]={
240 /* test abbr */
241 "Hani", "Hang","Hebr","Hira",
242 "Knda","Kana","Khmr","Laoo",
243 "Latn",
244 "Mlym", "Mong",
245 NULL
246 };
247 i=0;
248 while(i<UPRV_LENGTHOF(testAbbr)){
249 const char* name = uscript_getShortName(testAbbr[i]);
250 numErrors=0;
251 if(strcmp(expectedAbbr[i],name)!=0){
252 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedAbbr[i]);
253 numErrors++;
254 }
255 if(numErrors > 0){
256 if(numErrors >0 ){
257 log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
258 }
259 }
260 i++;
261 }
262
263 }
264 /* now test uscript_getScript() API */
265 {
266 uint32_t codepoints[] = {
267 0x0000FF9D, /* USCRIPT_KATAKANA*/
268 0x0000FFBE, /* USCRIPT_HANGUL*/
269 0x0000FFC7, /* USCRIPT_HANGUL*/
270 0x0000FFCF, /* USCRIPT_HANGUL*/
271 0x0000FFD7, /* USCRIPT_HANGUL*/
272 0x0000FFDC, /* USCRIPT_HANGUL*/
273 0x00010300, /* USCRIPT_OLD_ITALIC*/
274 0x00010330, /* USCRIPT_GOTHIC*/
275 0x0001034A, /* USCRIPT_GOTHIC*/
276 0x00010400, /* USCRIPT_DESERET*/
277 0x00010428, /* USCRIPT_DESERET*/
278 0x0001D167, /* USCRIPT_INHERITED*/
279 0x0001D17B, /* USCRIPT_INHERITED*/
280 0x0001D185, /* USCRIPT_INHERITED*/
281 0x0001D1AA, /* USCRIPT_INHERITED*/
282 0x00020000, /* USCRIPT_HAN*/
283 0x00000D02, /* USCRIPT_MALAYALAM*/
284 0x00050005, /* USCRIPT_UNKNOWN (new Zzzz value in Unicode 5.0) */
285 0x00000000, /* USCRIPT_COMMON*/
286 0x0001D169, /* USCRIPT_INHERITED*/
287 0x0001D182, /* USCRIPT_INHERITED*/
288 0x0001D18B, /* USCRIPT_INHERITED*/
289 0x0001D1AD, /* USCRIPT_INHERITED*/
290 };
291
292 UScriptCode expected[] = {
293 USCRIPT_KATAKANA ,
294 USCRIPT_HANGUL ,
295 USCRIPT_HANGUL ,
296 USCRIPT_HANGUL ,
297 USCRIPT_HANGUL ,
298 USCRIPT_HANGUL ,
299 USCRIPT_OLD_ITALIC,
300 USCRIPT_GOTHIC ,
301 USCRIPT_GOTHIC ,
302 USCRIPT_DESERET ,
303 USCRIPT_DESERET ,
304 USCRIPT_INHERITED,
305 USCRIPT_INHERITED,
306 USCRIPT_INHERITED,
307 USCRIPT_INHERITED,
308 USCRIPT_HAN ,
309 USCRIPT_MALAYALAM,
310 USCRIPT_UNKNOWN,
311 USCRIPT_COMMON,
312 USCRIPT_INHERITED ,
313 USCRIPT_INHERITED ,
314 USCRIPT_INHERITED ,
315 USCRIPT_INHERITED ,
316 };
317 UScriptCode code = USCRIPT_INVALID_CODE;
318 UErrorCode status = U_ZERO_ERROR;
319 UBool passed = TRUE;
320
321 for(i=0; i<UPRV_LENGTHOF(codepoints); ++i){
322 code = uscript_getScript(codepoints[i],&status);
323 if(U_SUCCESS(status)){
324 if( code != expected[i] ||
325 code != (UScriptCode)u_getIntPropertyValue(codepoints[i], UCHAR_SCRIPT)
326 ) {
327 log_err("uscript_getScript for codepoint \\U%08X failed\n",codepoints[i]);
328 passed = FALSE;
329 }
330 }else{
331 log_err("uscript_getScript for codepoint \\U%08X failed. Error: %s\n",
332 codepoints[i],u_errorName(status));
333 break;
334 }
335 }
336
337 if(passed==FALSE){
338 log_err("uscript_getScript failed.\n");
339 }
340 }
341 {
342 UScriptCode code= USCRIPT_INVALID_CODE;
343 UErrorCode status = U_ZERO_ERROR;
344 code = uscript_getScript(0x001D169,&status);
345 if(code != USCRIPT_INHERITED){
346 log_err("\\U001D169 is not contained in USCRIPT_INHERITED");
347 }
348 }
349 {
350 UScriptCode code= USCRIPT_INVALID_CODE;
351 UErrorCode status = U_ZERO_ERROR;
352 int32_t err = 0;
353
354 for(i = 0; i<=0x10ffff; i++){
355 code = uscript_getScript(i,&status);
356 if(code == USCRIPT_INVALID_CODE){
357 err++;
358 log_err("uscript_getScript for codepoint \\U%08X failed.\n", i);
359 }
360 }
361 if(err>0){
362 log_err("uscript_getScript failed for %d codepoints\n", err);
363 }
364 }
365 {
366 for(i=0; (UScriptCode)i< USCRIPT_CODE_LIMIT; i++){
367 const char* name = uscript_getName((UScriptCode)i);
368 if(name==NULL || strcmp(name,"")==0){
369 log_err("uscript_getName failed for code %i: name is NULL or \"\"\n",i);
370 }
371 }
372 }
373
374 {
375 /*
376 * These script codes were originally added to ICU pre-3.6, so that ICU would
377 * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
378 * These script codes were added with only short names because we don't
379 * want to invent long names ourselves.
380 * Unicode 5 and later encode some of these scripts and give them long names.
381 * Whenever this happens, the long script names here need to be updated.
382 */
383 static const char* expectedLong[] = {
384 "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs",
385 "Egyd", "Egyh", "Egyptian_Hieroglyphs",
386 "Geok", "Hans", "Hant", "Pahawh_Hmong", "Old_Hungarian", "Inds",
387 "Javanese", "Kayah_Li", "Latf", "Latg",
388 "Lepcha", "Linear_A", "Mandaic", "Maya", "Meroitic_Hieroglyphs",
389 "Nko", "Old_Turkic", "Old_Permic", "Phags_Pa", "Phoenician",
390 "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
391 "Zxxx", "Unknown",
392 "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "SignWriting", "Sundanese",
393 "Moon", "Meetei_Mayek",
394 /* new in ICU 4.0 */
395 "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
396 "Kaithi", "Manichaean", "Inscriptional_Pahlavi", "Psalter_Pahlavi", "Phlv",
397 "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
398 "Zmth", "Zsym",
399 /* new in ICU 4.4 */
400 "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
401 /* new in ICU 4.6 */
402 "Bassa_Vah", "Duployan", "Elbasan", "Grantha", "Kpel",
403 "Loma", "Mende_Kikakui", "Meroitic_Cursive",
404 "Old_North_Arabian", "Nabataean", "Palmyrene", "Khudawadi", "Warang_Citi",
405 /* new in ICU 4.8 */
406 "Afak", "Jurc", "Mro", "Nushu", "Sharada", "Sora_Sompeng", "Takri", "Tangut", "Wole",
407 /* new in ICU 49 */
408 "Anatolian_Hieroglyphs", "Khojki", "Tirhuta",
409 /* new in ICU 52 */
410 "Caucasian_Albanian", "Mahajani",
411 /* new in ICU 54 */
412 "Ahom", "Hatran", "Modi", "Multani", "Pau_Cin_Hau", "Siddham",
413 // new in ICU 58
414 "Adlam", "Bhaiksuki", "Marchen", "Newa", "Osage", "Hanb", "Jamo", "Zsye",
415 // new in ICU 60
416 "Masaram_Gondi", "Soyombo", "Zanabazar_Square",
417 // new in ICU 61
418 "Dogra", "Gunjala_Gondi", "Makasar", "Medefaidrin",
419 "Hanifi_Rohingya", "Sogdian", "Old_Sogdian",
420 // new in ICU 64
421 "Elymaic", "Nyiakeng_Puachue_Hmong", "Nandinagari", "Wancho",
422 // new in ICU 66
423 "Chorasmian", "Dives_Akuru", "Khitan_Small_Script", "Yezidi",
424 };
425 static const char* expectedShort[] = {
426 "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
427 "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
428 "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
429 "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
430 "Zxxx", "Zzzz",
431 "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
432 "Moon", "Mtei",
433 /* new in ICU 4.0 */
434 "Armi", "Avst", "Cakm", "Kore",
435 "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
436 "Zmth", "Zsym",
437 /* new in ICU 4.4 */
438 "Bamu", "Lisu", "Nkgb", "Sarb",
439 /* new in ICU 4.6 */
440 "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
441 "Narb", "Nbat", "Palm", "Sind", "Wara",
442 /* new in ICU 4.8 */
443 "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
444 /* new in ICU 49 */
445 "Hluw", "Khoj", "Tirh",
446 /* new in ICU 52 */
447 "Aghb", "Mahj",
448 /* new in ICU 54 */
449 "Ahom", "Hatr", "Modi", "Mult", "Pauc", "Sidd",
450 // new in ICU 58
451 "Adlm", "Bhks", "Marc", "Newa", "Osge", "Hanb", "Jamo", "Zsye",
452 // new in ICU 60
453 "Gonm", "Soyo", "Zanb",
454 // new in ICU 61
455 "Dogr", "Gong", "Maka", "Medf", "Rohg", "Sogd", "Sogo",
456 // new in ICU 64
457 "Elym", "Hmnp", "Nand", "Wcho",
458 // new in ICU 66
459 "Chrs", "Diak", "Kits", "Yezi",
460 };
461 int32_t j = 0;
462 if(UPRV_LENGTHOF(expectedLong)!=(USCRIPT_CODE_LIMIT-USCRIPT_BALINESE)) {
463 log_err("need to add new script codes in cucdapi.c!\n");
464 return;
465 }
466 for(i=USCRIPT_BALINESE; (UScriptCode)i<USCRIPT_CODE_LIMIT; i++, j++){
467 const char* name = uscript_getName((UScriptCode)i);
468 if(name==NULL || strcmp(name,expectedLong[j])!=0){
469 log_err("uscript_getName failed for code %i: %s!=%s\n", i, name, expectedLong[j]);
470 }
471 name = uscript_getShortName((UScriptCode)i);
472 if(name==NULL || strcmp(name,expectedShort[j])!=0){
473 log_err("uscript_getShortName failed for code %i: %s!=%s\n", i, name, expectedShort[j]);
474 }
475 }
476 for(i=0; i<UPRV_LENGTHOF(expectedLong); i++){
477 UScriptCode fillIn[5] = {USCRIPT_INVALID_CODE};
478 UErrorCode status = U_ZERO_ERROR;
479 int32_t len = 0;
480 len = uscript_getCode(expectedShort[i], fillIn, UPRV_LENGTHOF(fillIn), &status);
481 if(U_FAILURE(status)){
482 log_err("uscript_getCode failed for script name %s. Error: %s\n",expectedShort[i], u_errorName(status));
483 }
484 if(len>1){
485 log_err("uscript_getCode did not return expected number of codes for script %s. EXPECTED: 1 GOT: %i\n", expectedShort[i], len);
486 }
487 if(fillIn[0]!= (UScriptCode)(USCRIPT_BALINESE+i)){
488 log_err("uscript_getCode did not return expected code for script %s. EXPECTED: %i GOT: %i\n", expectedShort[i], (USCRIPT_BALINESE+i), fillIn[0] );
489 }
490 }
491 }
492
493 {
494 /* test characters which have Script_Extensions */
495 UErrorCode errorCode=U_ZERO_ERROR;
496 if(!(
497 USCRIPT_COMMON==uscript_getScript(0x0640, &errorCode) &&
498 USCRIPT_INHERITED==uscript_getScript(0x0650, &errorCode) &&
499 USCRIPT_ARABIC==uscript_getScript(0xfdf2, &errorCode)) ||
500 U_FAILURE(errorCode)
501 ) {
502 log_err("uscript_getScript(character with Script_Extensions) failed\n");
503 }
504 }
505 }
506
TestHasScript()507 void TestHasScript() {
508 if(!(
509 !uscript_hasScript(0x063f, USCRIPT_COMMON) &&
510 uscript_hasScript(0x063f, USCRIPT_ARABIC) && /* main Script value */
511 !uscript_hasScript(0x063f, USCRIPT_SYRIAC) &&
512 !uscript_hasScript(0x063f, USCRIPT_THAANA))
513 ) {
514 log_err("uscript_hasScript(U+063F, ...) is wrong\n");
515 }
516 if(!(
517 !uscript_hasScript(0x0640, USCRIPT_COMMON) && /* main Script value */
518 uscript_hasScript(0x0640, USCRIPT_ARABIC) &&
519 uscript_hasScript(0x0640, USCRIPT_SYRIAC) &&
520 !uscript_hasScript(0x0640, USCRIPT_THAANA))
521 ) {
522 log_err("uscript_hasScript(U+0640, ...) is wrong\n");
523 }
524 if(!(
525 !uscript_hasScript(0x0650, USCRIPT_INHERITED) && /* main Script value */
526 uscript_hasScript(0x0650, USCRIPT_ARABIC) &&
527 uscript_hasScript(0x0650, USCRIPT_SYRIAC) &&
528 !uscript_hasScript(0x0650, USCRIPT_THAANA))
529 ) {
530 log_err("uscript_hasScript(U+0650, ...) is wrong\n");
531 }
532 if(!(
533 !uscript_hasScript(0x0660, USCRIPT_COMMON) && /* main Script value */
534 uscript_hasScript(0x0660, USCRIPT_ARABIC) &&
535 !uscript_hasScript(0x0660, USCRIPT_SYRIAC) &&
536 uscript_hasScript(0x0660, USCRIPT_THAANA))
537 ) {
538 log_err("uscript_hasScript(U+0660, ...) is wrong\n");
539 }
540 if(!(
541 !uscript_hasScript(0xfdf2, USCRIPT_COMMON) &&
542 uscript_hasScript(0xfdf2, USCRIPT_ARABIC) && /* main Script value */
543 !uscript_hasScript(0xfdf2, USCRIPT_SYRIAC) &&
544 uscript_hasScript(0xfdf2, USCRIPT_THAANA))
545 ) {
546 log_err("uscript_hasScript(U+FDF2, ...) is wrong\n");
547 }
548 if(uscript_hasScript(0x0640, 0xaffe)) {
549 /* An unguarded implementation might go into an infinite loop. */
550 log_err("uscript_hasScript(U+0640, bogus 0xaffe) is wrong\n");
551 }
552 }
553
scriptsContain(UScriptCode scripts[],int32_t length,UScriptCode script)554 static UBool scriptsContain(UScriptCode scripts[], int32_t length, UScriptCode script) {
555 UBool contain=FALSE;
556 int32_t prev=-1, i;
557 for(i=0; i<length; ++i) {
558 int32_t s=scripts[i];
559 if(s<=prev) {
560 log_err("uscript_getScriptExtensions() not in sorted order: %d %d\n", (int)prev, (int)s);
561 }
562 if(s==script) { contain=TRUE; }
563 }
564 return contain;
565 }
566
TestGetScriptExtensions()567 void TestGetScriptExtensions() {
568 UScriptCode scripts[20];
569 int32_t length;
570 UErrorCode errorCode;
571
572 /* errors and overflows */
573 errorCode=U_PARSE_ERROR;
574 length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
575 if(errorCode!=U_PARSE_ERROR) {
576 log_err("uscript_getScriptExtensions(U+0640, U_PARSE_ERROR) did not preserve the UErrorCode - %s\n",
577 u_errorName(errorCode));
578 }
579 errorCode=U_ZERO_ERROR;
580 length=uscript_getScriptExtensions(0x0640, NULL, UPRV_LENGTHOF(scripts), &errorCode);
581 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
582 log_err("uscript_getScriptExtensions(U+0640, NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
583 u_errorName(errorCode));
584 }
585 errorCode=U_ZERO_ERROR;
586 length=uscript_getScriptExtensions(0x0640, scripts, -1, &errorCode);
587 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
588 log_err("uscript_getScriptExtensions(U+0640, capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
589 u_errorName(errorCode));
590 }
591 errorCode=U_ZERO_ERROR;
592 length=uscript_getScriptExtensions(0x0640, scripts, 0, &errorCode);
593 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
594 log_err("uscript_getScriptExtensions(U+0640, capacity=0: pure preflighting)=%d < 3 - %s\n",
595 (int)length, u_errorName(errorCode));
596 }
597 errorCode=U_ZERO_ERROR;
598 length=uscript_getScriptExtensions(0x0640, scripts, 1, &errorCode);
599 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length<3) {
600 log_err("uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d < 3 - %s\n",
601 (int)length, u_errorName(errorCode));
602 }
603 /* U+063F has only a Script code, no Script_Extensions. */
604 errorCode=U_ZERO_ERROR;
605 length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode);
606 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=1) {
607 log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s\n",
608 (int)length, u_errorName(errorCode));
609 }
610
611 /* invalid code points */
612 errorCode=U_ZERO_ERROR;
613 length=uscript_getScriptExtensions(-1, scripts, UPRV_LENGTHOF(scripts), &errorCode);
614 if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
615 log_err("uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s\n",
616 (int)length, u_errorName(errorCode));
617 }
618 errorCode=U_ZERO_ERROR;
619 length=uscript_getScriptExtensions(0x110000, scripts, UPRV_LENGTHOF(scripts), &errorCode);
620 if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
621 log_err("uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s\n",
622 (int)length, u_errorName(errorCode));
623 }
624
625 /* normal usage */
626 errorCode=U_ZERO_ERROR;
627 length=uscript_getScriptExtensions(0x063f, scripts, 1, &errorCode);
628 if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_ARABIC) {
629 log_err("uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s\n",
630 (int)length, u_errorName(errorCode));
631 }
632 errorCode=U_ZERO_ERROR;
633 length=uscript_getScriptExtensions(0x0640, scripts, UPRV_LENGTHOF(scripts), &errorCode);
634 if(U_FAILURE(errorCode) || length<3 ||
635 !scriptsContain(scripts, length, USCRIPT_ARABIC) ||
636 !scriptsContain(scripts, length, USCRIPT_SYRIAC) ||
637 !scriptsContain(scripts, length, USCRIPT_MANDAIC)) {
638 log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n",
639 (int)length, u_errorName(errorCode));
640 }
641 errorCode=U_ZERO_ERROR;
642 length=uscript_getScriptExtensions(0xfdf2, scripts, UPRV_LENGTHOF(scripts), &errorCode);
643 if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_THAANA) {
644 log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n",
645 (int)length, u_errorName(errorCode));
646 }
647 errorCode=U_ZERO_ERROR;
648 length=uscript_getScriptExtensions(0xff65, scripts, UPRV_LENGTHOF(scripts), &errorCode);
649 if(U_FAILURE(errorCode) || length!=6 || scripts[0]!=USCRIPT_BOPOMOFO || scripts[5]!=USCRIPT_YI) {
650 log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n",
651 (int)length, u_errorName(errorCode));
652 }
653 }
654
TestScriptMetadataAPI()655 void TestScriptMetadataAPI() {
656 /* API & code coverage. More testing in intltest/ucdtest.cpp. */
657 UErrorCode errorCode=U_ZERO_ERROR;
658 UChar sample[8];
659
660 if(uscript_getSampleString(USCRIPT_LATIN, sample, UPRV_LENGTHOF(sample), &errorCode)!=1 ||
661 U_FAILURE(errorCode) ||
662 uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN ||
663 sample[1]!=0) {
664 log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode));
665 }
666 sample[0]=0xfffe;
667 if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 ||
668 errorCode!=U_BUFFER_OVERFLOW_ERROR ||
669 sample[0]!=0xfffe) {
670 log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode));
671 }
672 errorCode=U_ZERO_ERROR;
673 if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, UPRV_LENGTHOF(sample), &errorCode)!=0 ||
674 U_FAILURE(errorCode) ||
675 sample[0]!=0) {
676 log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode));
677 }
678 sample[0]=0xfffe;
679 if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 ||
680 errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
681 sample[0]!=0xfffe) {
682 log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode));
683 }
684
685 if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED ||
686 // Unicode 10 gives up on "aspirational".
687 uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_LIMITED_USE ||
688 uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE ||
689 uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED ||
690 uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED ||
691 uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED ||
692 uscript_getUsage(USCRIPT_CODE_LIMIT)!=USCRIPT_USAGE_NOT_ENCODED) {
693 log_err("uscript_getUsage() failed\n");
694 }
695
696 if(uscript_isRightToLeft(USCRIPT_LATIN) ||
697 uscript_isRightToLeft(USCRIPT_CIRTH) ||
698 !uscript_isRightToLeft(USCRIPT_ARABIC) ||
699 !uscript_isRightToLeft(USCRIPT_HEBREW)) {
700 log_err("uscript_isRightToLeft() failed\n");
701 }
702
703 if(uscript_breaksBetweenLetters(USCRIPT_LATIN) ||
704 uscript_breaksBetweenLetters(USCRIPT_CIRTH) ||
705 !uscript_breaksBetweenLetters(USCRIPT_HAN) ||
706 !uscript_breaksBetweenLetters(USCRIPT_THAI)) {
707 log_err("uscript_breaksBetweenLetters() failed\n");
708 }
709
710 if(uscript_isCased(USCRIPT_CIRTH) ||
711 uscript_isCased(USCRIPT_HAN) ||
712 !uscript_isCased(USCRIPT_LATIN) ||
713 !uscript_isCased(USCRIPT_GREEK)) {
714 log_err("uscript_isCased() failed\n");
715 }
716 }
717
TestBinaryValues()718 void TestBinaryValues() {
719 /*
720 * Unicode 5.1 explicitly defines binary property value aliases.
721 * Verify that they are all recognized.
722 */
723 static const char *const falseValues[]={ "N", "No", "F", "False" };
724 static const char *const trueValues[]={ "Y", "Yes", "T", "True" };
725 int32_t i;
726 for(i=0; i<UPRV_LENGTHOF(falseValues); ++i) {
727 if(FALSE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, falseValues[i])) {
728 log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=FALSE (Are you missing data?)\n", falseValues[i]);
729 }
730 }
731 for(i=0; i<UPRV_LENGTHOF(trueValues); ++i) {
732 if(TRUE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, trueValues[i])) {
733 log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=TRUE (Are you missing data?)\n", trueValues[i]);
734 }
735 }
736 }
737