1 /********************************************************************
2 * Copyright (c) 1997-2014, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 ********************************************************************/
5
6 #include <string.h>
7 #include "unicode/utypes.h"
8 #include "unicode/uscript.h"
9 #include "unicode/uchar.h"
10 #include "cintltst.h"
11 #include "cucdapi.h"
12
13 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof(array[0]))
14
TestUScriptCodeAPI()15 void TestUScriptCodeAPI(){
16 int i =0;
17 int numErrors =0;
18 {
19 const char* testNames[]={
20 /* test locale */
21 "en", "en_US", "sr", "ta" , "te_IN",
22 "hi", "he", "ar",
23 /* test abbr */
24 "Hani", "Hang","Hebr","Hira",
25 "Knda","Kana","Khmr","Lao",
26 "Latn",/*"Latf","Latg",*/
27 "Mlym", "Mong",
28
29 /* test names */
30 "CYRILLIC","DESERET","DEVANAGARI","ETHIOPIC","GEORGIAN",
31 "GOTHIC", "GREEK", "GUJARATI", "COMMON", "INHERITED",
32 /* test lower case names */
33 "malayalam", "mongolian", "myanmar", "ogham", "old-italic",
34 "oriya", "runic", "sinhala", "syriac","tamil",
35 "telugu", "thaana", "thai", "tibetan",
36 /* test the bounds*/
37 "tagb", "arabic",
38 /* test bogus */
39 "asfdasd", "5464", "12235",
40 /* test the last index */
41 "zyyy", "YI",
42 NULL
43 };
44 UScriptCode expected[] ={
45 /* locales should return */
46 USCRIPT_LATIN, USCRIPT_LATIN, USCRIPT_CYRILLIC, USCRIPT_TAMIL, USCRIPT_TELUGU,
47 USCRIPT_DEVANAGARI, USCRIPT_HEBREW, USCRIPT_ARABIC,
48 /* abbr should return */
49 USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
50 USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
51 USCRIPT_LATIN,/* USCRIPT_LATIN, USCRIPT_LATIN,*/
52 USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
53 /* names should return */
54 USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
55 USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI, USCRIPT_COMMON, USCRIPT_INHERITED,
56 /* lower case names should return */
57 USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN, USCRIPT_MYANMAR, USCRIPT_OGHAM, USCRIPT_OLD_ITALIC,
58 USCRIPT_ORIYA, USCRIPT_RUNIC, USCRIPT_SINHALA, USCRIPT_SYRIAC, USCRIPT_TAMIL,
59 USCRIPT_TELUGU, USCRIPT_THAANA, USCRIPT_THAI, USCRIPT_TIBETAN,
60 /* bounds */
61 USCRIPT_TAGBANWA, USCRIPT_ARABIC,
62 /* bogus names should return invalid code */
63 USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE, USCRIPT_INVALID_CODE,
64 USCRIPT_COMMON, USCRIPT_YI,
65 };
66
67 UErrorCode err = U_ZERO_ERROR;
68
69 const int32_t capacity = 10;
70
71 for( ; testNames[i]!=NULL; i++){
72 UScriptCode script[10]={USCRIPT_INVALID_CODE};
73 uscript_getCode(testNames[i],script,capacity, &err);
74 if( script[0] != expected[i]){
75 log_data_err("Error getting script code Got: %i Expected: %i for name %s (Error code does not propagate if data is not present. Are you missing data?)\n",
76 script[0],expected[i],testNames[i]);
77 numErrors++;
78 }
79 }
80 if(numErrors >0 ){
81 log_data_err("Errors uchar_getScriptCode() : %i \n",numErrors);
82 }
83 }
84
85 {
86 UErrorCode err = U_ZERO_ERROR;
87 int32_t capacity=0;
88 int32_t j;
89 UScriptCode jaCode[]={USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
90 UScriptCode script[10]={USCRIPT_INVALID_CODE};
91 int32_t num = uscript_getCode("ja",script,capacity, &err);
92 /* preflight */
93 if(err==U_BUFFER_OVERFLOW_ERROR){
94 err = U_ZERO_ERROR;
95 capacity = 10;
96 num = uscript_getCode("ja",script,capacity, &err);
97 if(num!=(sizeof(jaCode)/sizeof(UScriptCode))){
98 log_err("Errors uscript_getScriptCode() for Japanese locale: num=%d, expected %d \n",
99 num, (sizeof(jaCode)/sizeof(UScriptCode)));
100 }
101 for(j=0;j<sizeof(jaCode)/sizeof(UScriptCode);j++) {
102 if(script[j]!=jaCode[j]) {
103 log_err("Japanese locale: code #%d was %d (%s) but expected %d (%s)\n", j,
104 script[j], uscript_getName(script[j]),
105 jaCode[j], uscript_getName(jaCode[j]));
106
107 }
108 }
109 }else{
110 log_data_err("Errors in uscript_getScriptCode() expected error : %s got: %s \n",
111 "U_BUFFER_OVERFLOW_ERROR",
112 u_errorName(err));
113 }
114
115 }
116
117 {
118 UScriptCode testAbbr[]={
119 /* names should return */
120 USCRIPT_CYRILLIC, USCRIPT_DESERET, USCRIPT_DEVANAGARI, USCRIPT_ETHIOPIC, USCRIPT_GEORGIAN,
121 USCRIPT_GOTHIC, USCRIPT_GREEK, USCRIPT_GUJARATI,
122 };
123
124 const char* expectedNames[]={
125
126 /* test names */
127 "Cyrillic","Deseret","Devanagari","Ethiopic","Georgian",
128 "Gothic", "Greek", "Gujarati",
129 NULL
130 };
131 i=0;
132 while(i<sizeof(testAbbr)/sizeof(UScriptCode)){
133 const char* name = uscript_getName(testAbbr[i]);
134 if(name == NULL) {
135 log_data_err("Couldn't get script name\n");
136 return;
137 }
138 numErrors=0;
139 if(strcmp(expectedNames[i],name)!=0){
140 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedNames[i]);
141 numErrors++;
142 }
143 if(numErrors > 0){
144 if(numErrors >0 ){
145 log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
146 }
147 }
148 i++;
149 }
150
151 }
152
153 {
154 UScriptCode testAbbr[]={
155 /* abbr should return */
156 USCRIPT_HAN, USCRIPT_HANGUL, USCRIPT_HEBREW, USCRIPT_HIRAGANA,
157 USCRIPT_KANNADA, USCRIPT_KATAKANA, USCRIPT_KHMER, USCRIPT_LAO,
158 USCRIPT_LATIN,
159 USCRIPT_MALAYALAM, USCRIPT_MONGOLIAN,
160 };
161
162 const char* expectedAbbr[]={
163 /* test abbr */
164 "Hani", "Hang","Hebr","Hira",
165 "Knda","Kana","Khmr","Laoo",
166 "Latn",
167 "Mlym", "Mong",
168 NULL
169 };
170 i=0;
171 while(i<sizeof(testAbbr)/sizeof(UScriptCode)){
172 const char* name = uscript_getShortName(testAbbr[i]);
173 numErrors=0;
174 if(strcmp(expectedAbbr[i],name)!=0){
175 log_err("Error getting abbreviations Got: %s Expected: %s\n",name,expectedAbbr[i]);
176 numErrors++;
177 }
178 if(numErrors > 0){
179 if(numErrors >0 ){
180 log_err("Errors uchar_getScriptAbbr() : %i \n",numErrors);
181 }
182 }
183 i++;
184 }
185
186 }
187 /* now test uscript_getScript() API */
188 {
189 uint32_t codepoints[] = {
190 0x0000FF9D, /* USCRIPT_KATAKANA*/
191 0x0000FFBE, /* USCRIPT_HANGUL*/
192 0x0000FFC7, /* USCRIPT_HANGUL*/
193 0x0000FFCF, /* USCRIPT_HANGUL*/
194 0x0000FFD7, /* USCRIPT_HANGUL*/
195 0x0000FFDC, /* USCRIPT_HANGUL*/
196 0x00010300, /* USCRIPT_OLD_ITALIC*/
197 0x00010330, /* USCRIPT_GOTHIC*/
198 0x0001034A, /* USCRIPT_GOTHIC*/
199 0x00010400, /* USCRIPT_DESERET*/
200 0x00010428, /* USCRIPT_DESERET*/
201 0x0001D167, /* USCRIPT_INHERITED*/
202 0x0001D17B, /* USCRIPT_INHERITED*/
203 0x0001D185, /* USCRIPT_INHERITED*/
204 0x0001D1AA, /* USCRIPT_INHERITED*/
205 0x00020000, /* USCRIPT_HAN*/
206 0x00000D02, /* USCRIPT_MALAYALAM*/
207 0x00000D00, /* USCRIPT_UNKNOWN (new Zzzz value in Unicode 5.0) */
208 0x00000000, /* USCRIPT_COMMON*/
209 0x0001D169, /* USCRIPT_INHERITED*/
210 0x0001D182, /* USCRIPT_INHERITED*/
211 0x0001D18B, /* USCRIPT_INHERITED*/
212 0x0001D1AD, /* USCRIPT_INHERITED*/
213 };
214
215 UScriptCode expected[] = {
216 USCRIPT_KATAKANA ,
217 USCRIPT_HANGUL ,
218 USCRIPT_HANGUL ,
219 USCRIPT_HANGUL ,
220 USCRIPT_HANGUL ,
221 USCRIPT_HANGUL ,
222 USCRIPT_OLD_ITALIC,
223 USCRIPT_GOTHIC ,
224 USCRIPT_GOTHIC ,
225 USCRIPT_DESERET ,
226 USCRIPT_DESERET ,
227 USCRIPT_INHERITED,
228 USCRIPT_INHERITED,
229 USCRIPT_INHERITED,
230 USCRIPT_INHERITED,
231 USCRIPT_HAN ,
232 USCRIPT_MALAYALAM,
233 USCRIPT_UNKNOWN,
234 USCRIPT_COMMON,
235 USCRIPT_INHERITED ,
236 USCRIPT_INHERITED ,
237 USCRIPT_INHERITED ,
238 USCRIPT_INHERITED ,
239 };
240 UScriptCode code = USCRIPT_INVALID_CODE;
241 UErrorCode status = U_ZERO_ERROR;
242 UBool passed = TRUE;
243
244 for(i=0; i<LENGTHOF(codepoints); ++i){
245 code = uscript_getScript(codepoints[i],&status);
246 if(U_SUCCESS(status)){
247 if( code != expected[i] ||
248 code != (UScriptCode)u_getIntPropertyValue(codepoints[i], UCHAR_SCRIPT)
249 ) {
250 log_err("uscript_getScript for codepoint \\U%08X failed\n",codepoints[i]);
251 passed = FALSE;
252 }
253 }else{
254 log_err("uscript_getScript for codepoint \\U%08X failed. Error: %s\n",
255 codepoints[i],u_errorName(status));
256 break;
257 }
258 }
259
260 if(passed==FALSE){
261 log_err("uscript_getScript failed.\n");
262 }
263 }
264 {
265 UScriptCode code= USCRIPT_INVALID_CODE;
266 UErrorCode status = U_ZERO_ERROR;
267 code = uscript_getScript(0x001D169,&status);
268 if(code != USCRIPT_INHERITED){
269 log_err("\\U001D169 is not contained in USCRIPT_INHERITED");
270 }
271 }
272 {
273 UScriptCode code= USCRIPT_INVALID_CODE;
274 UErrorCode status = U_ZERO_ERROR;
275 int32_t err = 0;
276
277 for(i = 0; i<=0x10ffff; i++){
278 code = uscript_getScript(i,&status);
279 if(code == USCRIPT_INVALID_CODE){
280 err++;
281 log_err("uscript_getScript for codepoint \\U%08X failed.\n", i);
282 }
283 }
284 if(err>0){
285 log_err("uscript_getScript failed for %d codepoints\n", err);
286 }
287 }
288 {
289 for(i=0; (UScriptCode)i< USCRIPT_CODE_LIMIT; i++){
290 const char* name = uscript_getName((UScriptCode)i);
291 if(name==NULL || strcmp(name,"")==0){
292 log_err("uscript_getName failed for code %i: name is NULL or \"\"\n",i);
293 }
294 }
295 }
296
297 {
298 /*
299 * These script codes were originally added to ICU pre-3.6, so that ICU would
300 * have all ISO 15924 script codes. ICU was then based on Unicode 4.1.
301 * These script codes were added with only short names because we don't
302 * want to invent long names ourselves.
303 * Unicode 5 and later encode some of these scripts and give them long names.
304 * Whenever this happens, the long script names here need to be updated.
305 */
306 static const char* expectedLong[] = {
307 "Balinese", "Batak", "Blis", "Brahmi", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyptian_Hieroglyphs",
308 "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Javanese", "Kayah_Li", "Latf", "Latg",
309 "Lepcha", "Lina", "Mandaic", "Maya", "Meroitic_Hieroglyphs", "Nko", "Old_Turkic", "Perm", "Phags_Pa", "Phoenician",
310 "Miao", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vai", "Visp", "Cuneiform",
311 "Zxxx", "Unknown",
312 "Carian", "Jpan", "Tai_Tham", "Lycian", "Lydian", "Ol_Chiki", "Rejang", "Saurashtra", "Sgnw", "Sundanese",
313 "Moon", "Meetei_Mayek",
314 /* new in ICU 4.0 */
315 "Imperial_Aramaic", "Avestan", "Chakma", "Kore",
316 "Kaithi", "Mani", "Inscriptional_Pahlavi", "Phlp", "Phlv", "Inscriptional_Parthian", "Samaritan", "Tai_Viet",
317 "Zmth", "Zsym",
318 /* new in ICU 4.4 */
319 "Bamum", "Lisu", "Nkgb", "Old_South_Arabian",
320 /* new in ICU 4.6 */
321 "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Meroitic_Cursive",
322 "Narb", "Nbat", "Palm", "Sind", "Wara",
323 /* new in ICU 4.8 */
324 "Afak", "Jurc", "Mroo", "Nshu", "Sharada", "Sora_Sompeng", "Takri", "Tang", "Wole",
325 /* new in ICU 49 */
326 "Hluw", "Khoj", "Tirh",
327 /* new in ICU 52 */
328 "Aghb", "Mahj"
329 };
330 static const char* expectedShort[] = {
331 "Bali", "Batk", "Blis", "Brah", "Cham", "Cirt", "Cyrs", "Egyd", "Egyh", "Egyp",
332 "Geok", "Hans", "Hant", "Hmng", "Hung", "Inds", "Java", "Kali", "Latf", "Latg",
333 "Lepc", "Lina", "Mand", "Maya", "Mero", "Nkoo", "Orkh", "Perm", "Phag", "Phnx",
334 "Plrd", "Roro", "Sara", "Syre", "Syrj", "Syrn", "Teng", "Vaii", "Visp", "Xsux",
335 "Zxxx", "Zzzz",
336 "Cari", "Jpan", "Lana", "Lyci", "Lydi", "Olck", "Rjng", "Saur", "Sgnw", "Sund",
337 "Moon", "Mtei",
338 /* new in ICU 4.0 */
339 "Armi", "Avst", "Cakm", "Kore",
340 "Kthi", "Mani", "Phli", "Phlp", "Phlv", "Prti", "Samr", "Tavt",
341 "Zmth", "Zsym",
342 /* new in ICU 4.4 */
343 "Bamu", "Lisu", "Nkgb", "Sarb",
344 /* new in ICU 4.6 */
345 "Bass", "Dupl", "Elba", "Gran", "Kpel", "Loma", "Mend", "Merc",
346 "Narb", "Nbat", "Palm", "Sind", "Wara",
347 /* new in ICU 4.8 */
348 "Afak", "Jurc", "Mroo", "Nshu", "Shrd", "Sora", "Takr", "Tang", "Wole",
349 /* new in ICU 49 */
350 "Hluw", "Khoj", "Tirh",
351 /* new in ICU 52 */
352 "Aghb", "Mahj"
353 };
354 int32_t j = 0;
355 if(LENGTHOF(expectedLong)!=(USCRIPT_CODE_LIMIT-USCRIPT_BALINESE)) {
356 log_err("need to add new script codes in cucdapi.c!\n");
357 return;
358 }
359 for(i=USCRIPT_BALINESE; (UScriptCode)i<USCRIPT_CODE_LIMIT; i++, j++){
360 const char* name = uscript_getName((UScriptCode)i);
361 if(name==NULL || strcmp(name,expectedLong[j])!=0){
362 log_err("uscript_getName failed for code %i: %s!=%s\n", i, name, expectedLong[j]);
363 }
364 name = uscript_getShortName((UScriptCode)i);
365 if(name==NULL || strcmp(name,expectedShort[j])!=0){
366 log_err("uscript_getShortName failed for code %i: %s!=%s\n", i, name, expectedShort[j]);
367 }
368 }
369 for(i=0; i<LENGTHOF(expectedLong); i++){
370 UScriptCode fillIn[5] = {USCRIPT_INVALID_CODE};
371 UErrorCode status = U_ZERO_ERROR;
372 int32_t len = 0;
373 len = uscript_getCode(expectedShort[i], fillIn, LENGTHOF(fillIn), &status);
374 if(U_FAILURE(status)){
375 log_err("uscript_getCode failed for script name %s. Error: %s\n",expectedShort[i], u_errorName(status));
376 }
377 if(len>1){
378 log_err("uscript_getCode did not return expected number of codes for script %s. EXPECTED: 1 GOT: %i\n", expectedShort[i], len);
379 }
380 if(fillIn[0]!= (UScriptCode)(USCRIPT_BALINESE+i)){
381 log_err("uscript_getCode did not return expected code for script %s. EXPECTED: %i GOT: %i\n", expectedShort[i], (USCRIPT_BALINESE+i), fillIn[0] );
382 }
383 }
384 }
385
386 {
387 /* test characters which have Script_Extensions */
388 UErrorCode errorCode=U_ZERO_ERROR;
389 if(!(
390 USCRIPT_COMMON==uscript_getScript(0x0640, &errorCode) &&
391 USCRIPT_INHERITED==uscript_getScript(0x0650, &errorCode) &&
392 USCRIPT_ARABIC==uscript_getScript(0xfdf2, &errorCode)) ||
393 U_FAILURE(errorCode)
394 ) {
395 log_err("uscript_getScript(character with Script_Extensions) failed\n");
396 }
397 }
398 }
399
TestHasScript()400 void TestHasScript() {
401 if(!(
402 !uscript_hasScript(0x063f, USCRIPT_COMMON) &&
403 uscript_hasScript(0x063f, USCRIPT_ARABIC) && /* main Script value */
404 !uscript_hasScript(0x063f, USCRIPT_SYRIAC) &&
405 !uscript_hasScript(0x063f, USCRIPT_THAANA))
406 ) {
407 log_err("uscript_hasScript(U+063F, ...) is wrong\n");
408 }
409 if(!(
410 !uscript_hasScript(0x0640, USCRIPT_COMMON) && /* main Script value */
411 uscript_hasScript(0x0640, USCRIPT_ARABIC) &&
412 uscript_hasScript(0x0640, USCRIPT_SYRIAC) &&
413 !uscript_hasScript(0x0640, USCRIPT_THAANA))
414 ) {
415 log_err("uscript_hasScript(U+0640, ...) is wrong\n");
416 }
417 if(!(
418 !uscript_hasScript(0x0650, USCRIPT_INHERITED) && /* main Script value */
419 uscript_hasScript(0x0650, USCRIPT_ARABIC) &&
420 uscript_hasScript(0x0650, USCRIPT_SYRIAC) &&
421 !uscript_hasScript(0x0650, USCRIPT_THAANA))
422 ) {
423 log_err("uscript_hasScript(U+0650, ...) is wrong\n");
424 }
425 if(!(
426 !uscript_hasScript(0x0660, USCRIPT_COMMON) && /* main Script value */
427 uscript_hasScript(0x0660, USCRIPT_ARABIC) &&
428 !uscript_hasScript(0x0660, USCRIPT_SYRIAC) &&
429 uscript_hasScript(0x0660, USCRIPT_THAANA))
430 ) {
431 log_err("uscript_hasScript(U+0660, ...) is wrong\n");
432 }
433 if(!(
434 !uscript_hasScript(0xfdf2, USCRIPT_COMMON) &&
435 uscript_hasScript(0xfdf2, USCRIPT_ARABIC) && /* main Script value */
436 !uscript_hasScript(0xfdf2, USCRIPT_SYRIAC) &&
437 uscript_hasScript(0xfdf2, USCRIPT_THAANA))
438 ) {
439 log_err("uscript_hasScript(U+FDF2, ...) is wrong\n");
440 }
441 if(uscript_hasScript(0x0640, 0xaffe)) {
442 /* An unguarded implementation might go into an infinite loop. */
443 log_err("uscript_hasScript(U+0640, bogus 0xaffe) is wrong\n");
444 }
445 }
446
TestGetScriptExtensions()447 void TestGetScriptExtensions() {
448 UScriptCode scripts[20];
449 int32_t length;
450 UErrorCode errorCode;
451
452 /* errors and overflows */
453 errorCode=U_PARSE_ERROR;
454 length=uscript_getScriptExtensions(0x0640, scripts, LENGTHOF(scripts), &errorCode);
455 if(errorCode!=U_PARSE_ERROR) {
456 log_err("uscript_getScriptExtensions(U+0640, U_PARSE_ERROR) did not preserve the UErrorCode - %s\n",
457 u_errorName(errorCode));
458 }
459 errorCode=U_ZERO_ERROR;
460 length=uscript_getScriptExtensions(0x0640, NULL, LENGTHOF(scripts), &errorCode);
461 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
462 log_err("uscript_getScriptExtensions(U+0640, NULL) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
463 u_errorName(errorCode));
464 }
465 errorCode=U_ZERO_ERROR;
466 length=uscript_getScriptExtensions(0x0640, scripts, -1, &errorCode);
467 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
468 log_err("uscript_getScriptExtensions(U+0640, capacity<0) did not set U_ILLEGAL_ARGUMENT_ERROR - %s\n",
469 u_errorName(errorCode));
470 }
471 errorCode=U_ZERO_ERROR;
472 length=uscript_getScriptExtensions(0x0640, scripts, 0, &errorCode);
473 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
474 log_err("uscript_getScriptExtensions(U+0640, capacity=0: pure preflighting)=%d != 3 - %s\n",
475 (int)length, u_errorName(errorCode));
476 }
477 errorCode=U_ZERO_ERROR;
478 length=uscript_getScriptExtensions(0x0640, scripts, 1, &errorCode);
479 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=3) {
480 log_err("uscript_getScriptExtensions(U+0640, capacity=1: preflighting)=%d != 3 - %s\n",
481 (int)length, u_errorName(errorCode));
482 }
483 /* U+063F has only a Script code, no Script_Extensions. */
484 errorCode=U_ZERO_ERROR;
485 length=uscript_getScriptExtensions(0x063f, scripts, 0, &errorCode);
486 if(errorCode!=U_BUFFER_OVERFLOW_ERROR || length!=1) {
487 log_err("uscript_getScriptExtensions(U+063F, capacity=0)=%d != 1 - %s\n",
488 (int)length, u_errorName(errorCode));
489 }
490
491 /* invalid code points */
492 errorCode=U_ZERO_ERROR;
493 length=uscript_getScriptExtensions(-1, scripts, LENGTHOF(scripts), &errorCode);
494 if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
495 log_err("uscript_getScriptExtensions(-1)=%d does not return {UNKNOWN} - %s\n",
496 (int)length, u_errorName(errorCode));
497 }
498 errorCode=U_ZERO_ERROR;
499 length=uscript_getScriptExtensions(0x110000, scripts, LENGTHOF(scripts), &errorCode);
500 if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_UNKNOWN) {
501 log_err("uscript_getScriptExtensions(0x110000)=%d does not return {UNKNOWN} - %s\n",
502 (int)length, u_errorName(errorCode));
503 }
504
505 /* normal usage */
506 errorCode=U_ZERO_ERROR;
507 length=uscript_getScriptExtensions(0x063f, scripts, 1, &errorCode);
508 if(U_FAILURE(errorCode) || length!=1 || scripts[0]!=USCRIPT_ARABIC) {
509 log_err("uscript_getScriptExtensions(U+063F, capacity=1)=%d does not return {ARABIC} - %s\n",
510 (int)length, u_errorName(errorCode));
511 }
512 errorCode=U_ZERO_ERROR;
513 length=uscript_getScriptExtensions(0x0640, scripts, LENGTHOF(scripts), &errorCode);
514 if(U_FAILURE(errorCode) || length!=3 ||
515 scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_SYRIAC || scripts[2]!=USCRIPT_MANDAIC
516 ) {
517 log_err("uscript_getScriptExtensions(U+0640)=%d failed - %s\n",
518 (int)length, u_errorName(errorCode));
519 }
520 errorCode=U_ZERO_ERROR;
521 length=uscript_getScriptExtensions(0xfdf2, scripts, LENGTHOF(scripts), &errorCode);
522 if(U_FAILURE(errorCode) || length!=2 || scripts[0]!=USCRIPT_ARABIC || scripts[1]!=USCRIPT_THAANA) {
523 log_err("uscript_getScriptExtensions(U+FDF2)=%d failed - %s\n",
524 (int)length, u_errorName(errorCode));
525 }
526 errorCode=U_ZERO_ERROR;
527 length=uscript_getScriptExtensions(0xff65, scripts, LENGTHOF(scripts), &errorCode);
528 if(U_FAILURE(errorCode) || length!=6 || scripts[0]!=USCRIPT_BOPOMOFO || scripts[5]!=USCRIPT_YI) {
529 log_err("uscript_getScriptExtensions(U+FF65)=%d failed - %s\n",
530 (int)length, u_errorName(errorCode));
531 }
532 }
533
TestScriptMetadataAPI()534 void TestScriptMetadataAPI() {
535 /* API & code coverage. More testing in intltest/ucdtest.cpp. */
536 UErrorCode errorCode=U_ZERO_ERROR;
537 UChar sample[8];
538
539 if(uscript_getSampleString(USCRIPT_LATIN, sample, LENGTHOF(sample), &errorCode)!=1 ||
540 U_FAILURE(errorCode) ||
541 uscript_getScript(sample[0], &errorCode)!=USCRIPT_LATIN ||
542 sample[1]!=0) {
543 log_err("uscript_getSampleString(Latn) failed - %s\n", u_errorName(errorCode));
544 }
545 sample[0]=0xfffe;
546 if(uscript_getSampleString(USCRIPT_LATIN, sample, 0, &errorCode)!=1 ||
547 errorCode!=U_BUFFER_OVERFLOW_ERROR ||
548 sample[0]!=0xfffe) {
549 log_err("uscript_getSampleString(Latn, capacity=0) failed - %s\n", u_errorName(errorCode));
550 }
551 errorCode=U_ZERO_ERROR;
552 if(uscript_getSampleString(USCRIPT_INVALID_CODE, sample, LENGTHOF(sample), &errorCode)!=0 ||
553 U_FAILURE(errorCode) ||
554 sample[0]!=0) {
555 log_err("uscript_getSampleString(invalid) failed - %s\n", u_errorName(errorCode));
556 }
557 sample[0]=0xfffe;
558 if(uscript_getSampleString(USCRIPT_CODE_LIMIT, sample, 0, &errorCode)!=0 ||
559 errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
560 sample[0]!=0xfffe) {
561 log_err("uscript_getSampleString(limit, capacity=0) failed - %s\n", u_errorName(errorCode));
562 }
563
564 if(uscript_getUsage(USCRIPT_LATIN)!=USCRIPT_USAGE_RECOMMENDED ||
565 uscript_getUsage(USCRIPT_YI)!=USCRIPT_USAGE_ASPIRATIONAL ||
566 uscript_getUsage(USCRIPT_CHEROKEE)!=USCRIPT_USAGE_LIMITED_USE ||
567 uscript_getUsage(USCRIPT_COPTIC)!=USCRIPT_USAGE_EXCLUDED ||
568 uscript_getUsage(USCRIPT_CIRTH)!=USCRIPT_USAGE_NOT_ENCODED ||
569 uscript_getUsage(USCRIPT_INVALID_CODE)!=USCRIPT_USAGE_NOT_ENCODED ||
570 uscript_getUsage(USCRIPT_CODE_LIMIT)!=USCRIPT_USAGE_NOT_ENCODED) {
571 log_err("uscript_getUsage() failed\n");
572 }
573
574 if(uscript_isRightToLeft(USCRIPT_LATIN) ||
575 uscript_isRightToLeft(USCRIPT_CIRTH) ||
576 !uscript_isRightToLeft(USCRIPT_ARABIC) ||
577 !uscript_isRightToLeft(USCRIPT_HEBREW)) {
578 log_err("uscript_isRightToLeft() failed\n");
579 }
580
581 if(uscript_breaksBetweenLetters(USCRIPT_LATIN) ||
582 uscript_breaksBetweenLetters(USCRIPT_CIRTH) ||
583 !uscript_breaksBetweenLetters(USCRIPT_HAN) ||
584 !uscript_breaksBetweenLetters(USCRIPT_THAI)) {
585 log_err("uscript_breaksBetweenLetters() failed\n");
586 }
587
588 if(uscript_isCased(USCRIPT_CIRTH) ||
589 uscript_isCased(USCRIPT_HAN) ||
590 !uscript_isCased(USCRIPT_LATIN) ||
591 !uscript_isCased(USCRIPT_GREEK)) {
592 log_err("uscript_isCased() failed\n");
593 }
594 }
595
TestBinaryValues()596 void TestBinaryValues() {
597 /*
598 * Unicode 5.1 explicitly defines binary property value aliases.
599 * Verify that they are all recognized.
600 */
601 static const char *const falseValues[]={ "N", "No", "F", "False" };
602 static const char *const trueValues[]={ "Y", "Yes", "T", "True" };
603 int32_t i;
604 for(i=0; i<LENGTHOF(falseValues); ++i) {
605 if(FALSE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, falseValues[i])) {
606 log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=FALSE (Are you missing data?)\n", falseValues[i]);
607 }
608 }
609 for(i=0; i<LENGTHOF(trueValues); ++i) {
610 if(TRUE!=u_getPropertyValueEnum(UCHAR_ALPHABETIC, trueValues[i])) {
611 log_data_err("u_getPropertyValueEnum(UCHAR_ALPHABETIC, \"%s\")!=TRUE (Are you missing data?)\n", trueValues[i]);
612 }
613 }
614 }
615