1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * Copyright (c) 1997-2014, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 ********************************************************************
7 *
8 * File UCNVSELTST.C
9 *
10 * Modification History:
11 * Name Description
12 * MOHAMED ELDAWY Creation
13 ********************************************************************
14 */
15
16 /* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/
17
18 #include "ucnvseltst.h"
19
20 #include <stdbool.h>
21 #include <stdio.h>
22
23 #include "unicode/utypes.h"
24 #include "unicode/ucnvsel.h"
25 #include "unicode/ustring.h"
26 #include "unicode/utf8.h"
27 #include "cmemory.h"
28 #include "cstring.h"
29 #include "propsvec.h"
30
31 #define FILENAME_BUFFER 1024
32
33 #define TDSRCPATH ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING
34
35 static void TestSelector(void);
36 static void TestUPropsVector(void);
37 void addCnvSelTest(TestNode** root); /* Declaration required to suppress compiler warnings. */
38
addCnvSelTest(TestNode ** root)39 void addCnvSelTest(TestNode** root)
40 {
41 addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector");
42 addTest(root, &TestUPropsVector, "tsconv/ucnvseltst/TestUPropsVector");
43 }
44
45 static const char **gAvailableNames = NULL;
46 static int32_t gCountAvailable = 0;
47
48 static UBool
getAvailableNames()49 getAvailableNames() {
50 int32_t i;
51 if (gAvailableNames != NULL) {
52 return true;
53 }
54 gCountAvailable = ucnv_countAvailable();
55 if (gCountAvailable == 0) {
56 log_data_err("No converters available.\n");
57 return false;
58 }
59 gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const char *));
60 if (gAvailableNames == NULL) {
61 log_err("unable to allocate memory for %ld available converter names\n",
62 (long)gCountAvailable);
63 return false;
64 }
65 for (i = 0; i < gCountAvailable; ++i) {
66 gAvailableNames[i] = ucnv_getAvailableName(i);
67 }
68 return true;
69 }
70
71 static void
releaseAvailableNames()72 releaseAvailableNames() {
73 uprv_free((void *)gAvailableNames);
74 gAvailableNames = NULL;
75 gCountAvailable = 0;
76 }
77
78 static const char **
getEncodings(int32_t start,int32_t step,int32_t count,int32_t * pCount)79 getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) {
80 const char **names;
81 int32_t i;
82
83 *pCount = 0;
84 if (count <= 0) {
85 return NULL;
86 }
87 names = (const char **)uprv_malloc(count * sizeof(char *));
88 if (names == NULL) {
89 log_err("memory allocation error for %ld pointers\n", (long)count);
90 return NULL;
91 }
92 if (step == 0 && count > 0) {
93 step = 1;
94 }
95 for (i = 0; i < count; ++i) {
96 if (0 <= start && start < gCountAvailable) {
97 names[i] = gAvailableNames[start];
98 start += step;
99 ++*pCount;
100 }
101 }
102 return names;
103 }
104
105 #if 0
106 /*
107 * ucnvsel_open() does not support "no encodings":
108 * Given 0 encodings it will open a selector for all available ones.
109 */
110 static const char **
111 getNoEncodings(int32_t *pCount) {
112 *pCount = 0;
113 return NULL;
114 }
115 #endif
116
117 static const char **
getOneEncoding(int32_t * pCount)118 getOneEncoding(int32_t *pCount) {
119 return getEncodings(1, 0, 1, pCount);
120 }
121
122 static const char **
getFirstEvenEncodings(int32_t * pCount)123 getFirstEvenEncodings(int32_t *pCount) {
124 return getEncodings(0, 2, 25, pCount);
125 }
126
127 static const char **
getMiddleEncodings(int32_t * pCount)128 getMiddleEncodings(int32_t *pCount) {
129 return getEncodings(gCountAvailable - 12, 1, 22, pCount);
130 }
131
132 static const char **
getLastEncodings(int32_t * pCount)133 getLastEncodings(int32_t *pCount) {
134 return getEncodings(gCountAvailable - 1, -1, 25, pCount);
135 }
136
137 static const char **
getSomeEncodings(int32_t * pCount)138 getSomeEncodings(int32_t *pCount) {
139 /* 20 evenly distributed */
140 return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount);
141 }
142
143 static const char **
getEveryThirdEncoding(int32_t * pCount)144 getEveryThirdEncoding(int32_t *pCount) {
145 return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount);
146 }
147
148 static const char **
getAllEncodings(int32_t * pCount)149 getAllEncodings(int32_t *pCount) {
150 return getEncodings(0, 1, gCountAvailable, pCount);
151 }
152
153 typedef const char **GetEncodingsFn(int32_t *);
154
155 static GetEncodingsFn *const getEncodingsFns[] = {
156 getOneEncoding,
157 getFirstEvenEncodings,
158 getMiddleEncodings,
159 getLastEncodings,
160 getSomeEncodings,
161 getEveryThirdEncoding,
162 getAllEncodings
163 };
164
fopenOrError(const char * filename)165 static FILE *fopenOrError(const char *filename) {
166 int32_t needLen;
167 FILE *f;
168 char fnbuf[FILENAME_BUFFER];
169 const char* directory = ctest_dataSrcDir();
170 needLen = (int32_t)(uprv_strlen(directory) + uprv_strlen(TDSRCPATH) + uprv_strlen(filename) + 1);
171 if(needLen > FILENAME_BUFFER) {
172 log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n",
173 filename, needLen, FILENAME_BUFFER);
174 return NULL;
175 }
176
177 strcpy(fnbuf, directory);
178 strcat(fnbuf, TDSRCPATH);
179 strcat(fnbuf, filename);
180
181 f = fopen(fnbuf, "rb");
182
183 if(f == NULL) {
184 log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename);
185 }
186 return f;
187 }
188
189 typedef struct TestText {
190 char *text, *textLimit;
191 char *limit;
192 int32_t number;
193 } TestText;
194
195 static void
text_reset(TestText * tt)196 text_reset(TestText *tt) {
197 tt->limit = tt->text;
198 tt->number = 0;
199 }
200
201 static char *
text_nextString(TestText * tt,int32_t * pLength)202 text_nextString(TestText *tt, int32_t *pLength) {
203 char *s = tt->limit;
204 if (s == tt->textLimit) {
205 /* we already delivered the last string */
206 return NULL;
207 } else if (s == tt->text) {
208 /* first string */
209 if ((tt->textLimit - tt->text) >= 3 &&
210 s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf
211 ) {
212 s += 3; /* skip the UTF-8 signature byte sequence (U+FEFF) */
213 }
214 } else {
215 /* skip the string terminator */
216 ++s;
217 ++tt->number;
218 }
219
220 /* find the end of this string */
221 tt->limit = uprv_strchr(s, 0);
222 *pLength = (int32_t)(tt->limit - s);
223 return s;
224 }
225
226 static UBool
text_open(TestText * tt)227 text_open(TestText *tt) {
228 FILE *f;
229 char *s;
230 int32_t length;
231 uprv_memset(tt, 0, sizeof(TestText));
232 f = fopenOrError("ConverterSelectorTestUTF8.txt");
233 if(!f) {
234 return false;
235 }
236 fseek(f, 0, SEEK_END);
237 length = (int32_t)ftell(f);
238 fseek(f, 0, SEEK_SET);
239 tt->text = (char *)uprv_malloc(length + 1);
240 if (tt->text == NULL) {
241 fclose(f);
242 return false;
243 }
244 if (length != (int32_t)fread(tt->text, 1, length, f)) {
245 log_err("error reading %ld bytes from test text file\n", (long)length);
246 length = 0;
247 uprv_free(tt->text);
248 }
249 fclose(f);
250 tt->textLimit = tt->text + length;
251 *tt->textLimit = 0;
252 /* replace all Unicode '#' (U+0023) with NUL */
253 for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {}
254 text_reset(tt);
255 return true;
256 }
257
258 static void
text_close(TestText * tt)259 text_close(TestText *tt) {
260 uprv_free(tt->text);
261 }
262
findIndex(const char * converterName)263 static int32_t findIndex(const char* converterName) {
264 int32_t i;
265 for (i = 0 ; i < gCountAvailable; i++) {
266 if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) {
267 return i;
268 }
269 }
270 return -1;
271 }
272
273 static UBool *
getResultsManually(const char ** encodings,int32_t num_encodings,const char * utf8,int32_t length,const USet * excludedCodePoints,const UConverterUnicodeSet whichSet)274 getResultsManually(const char** encodings, int32_t num_encodings,
275 const char *utf8, int32_t length,
276 const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) {
277 UBool* resultsManually;
278 int32_t i;
279
280 resultsManually = (UBool*) uprv_malloc(gCountAvailable);
281 uprv_memset(resultsManually, 0, gCountAvailable);
282
283 for(i = 0 ; i < num_encodings ; i++) {
284 UErrorCode status = U_ZERO_ERROR;
285 /* get unicode set for that converter */
286 USet* set;
287 UConverter* test_converter;
288 UChar32 cp;
289 int32_t encIndex, offset;
290
291 set = uset_openEmpty();
292 test_converter = ucnv_open(encodings[i], &status);
293 ucnv_getUnicodeSet(test_converter, set,
294 whichSet, &status);
295 if (excludedCodePoints != NULL) {
296 uset_addAll(set, excludedCodePoints);
297 }
298 uset_freeze(set);
299 offset = 0;
300 cp = 0;
301
302 encIndex = findIndex(encodings[i]);
303 /*
304 * The following is almost, but not entirely, the same as
305 * resultsManually[encIndex] =
306 * (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length);
307 * They might be different if the set contains strings,
308 * or if the utf8 string contains an illegal sequence.
309 *
310 * The UConverterSelector does not currently handle strings that can be
311 * converted, and it treats an illegal sequence as convertible
312 * while uset_spanUTF8() treats it like U+FFFD which may not be convertible.
313 */
314 resultsManually[encIndex] = true;
315 while(offset<length) {
316 U8_NEXT(utf8, offset, length, cp);
317 if (cp >= 0 && !uset_contains(set, cp)) {
318 resultsManually[encIndex] = false;
319 break;
320 }
321 }
322 uset_close(set);
323 ucnv_close(test_converter);
324 }
325 return resultsManually;
326 }
327
328 /* closes res but does not free resultsManually */
verifyResult(UEnumeration * res,const UBool * resultsManually)329 static void verifyResult(UEnumeration* res, const UBool *resultsManually) {
330 UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool));
331 const char* name;
332 UErrorCode status = U_ZERO_ERROR;
333 int32_t i;
334
335 /* fill the bool for the selector results! */
336 uprv_memset(resultsFromSystem, 0, gCountAvailable);
337 while ((name = uenum_next(res,NULL, &status)) != NULL) {
338 resultsFromSystem[findIndex(name)] = true;
339 }
340 for(i = 0 ; i < gCountAvailable; i++) {
341 if(resultsManually[i] != resultsFromSystem[i]) {
342 log_err("failure in converter selector\n"
343 "converter %s had conflicting results -- manual: %d, system %d\n",
344 gAvailableNames[i], resultsManually[i], resultsFromSystem[i]);
345 }
346 }
347 uprv_free(resultsFromSystem);
348 uenum_close(res);
349 }
350
351 static UConverterSelector *
serializeAndUnserialize(UConverterSelector * sel,char ** buffer,UErrorCode * status)352 serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *status) {
353 char *new_buffer;
354 int32_t ser_len, ser_len2;
355 /* preflight */
356 ser_len = ucnvsel_serialize(sel, NULL, 0, status);
357 if (*status != U_BUFFER_OVERFLOW_ERROR) {
358 log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status));
359 return sel;
360 }
361 new_buffer = (char *)uprv_malloc(ser_len);
362 *status = U_ZERO_ERROR;
363 ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status);
364 if (U_FAILURE(*status) || ser_len != ser_len2) {
365 log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status));
366 uprv_free(new_buffer);
367 return sel;
368 }
369 ucnvsel_close(sel);
370 uprv_free(*buffer);
371 *buffer = new_buffer;
372 sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status);
373 if (U_FAILURE(*status)) {
374 log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status));
375 return NULL;
376 }
377 return sel;
378 }
379
TestSelector()380 static void TestSelector()
381 {
382 TestText text;
383 USet* excluded_sets[3] = { NULL };
384 int32_t i, testCaseIdx;
385
386 if (!getAvailableNames()) {
387 return;
388 }
389 if (!text_open(&text)) {
390 releaseAvailableNames();
391 }
392
393 excluded_sets[0] = uset_openEmpty();
394 for(i = 1 ; i < 3 ; i++) {
395 excluded_sets[i] = uset_open(i*30, i*30+500);
396 }
397
398 for(testCaseIdx = 0; testCaseIdx < UPRV_LENGTHOF(getEncodingsFns); testCaseIdx++)
399 {
400 int32_t excluded_set_id;
401 int32_t num_encodings;
402 const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings);
403 if (getTestOption(QUICK_OPTION) && num_encodings > 25) {
404 uprv_free((void *)encodings);
405 continue;
406 }
407
408 /*
409 * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++)
410 *
411 * This loop was replaced by the following statement because
412 * the loop made the test run longer without adding to the code coverage.
413 * The handling of the exclusion set is independent of the
414 * set of encodings, so there is no need to test every combination.
415 */
416 excluded_set_id = testCaseIdx % UPRV_LENGTHOF(excluded_sets);
417 {
418 UConverterSelector *sel_rt, *sel_fb;
419 char *buffer_fb = NULL;
420 UErrorCode status = U_ZERO_ERROR;
421 sel_rt = ucnvsel_open(encodings, num_encodings,
422 excluded_sets[excluded_set_id],
423 UCNV_ROUNDTRIP_SET, &status);
424 if (num_encodings == gCountAvailable) {
425 /* test the special "all converters" parameter values */
426 sel_fb = ucnvsel_open(NULL, 0,
427 excluded_sets[excluded_set_id],
428 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
429 } else if (uset_isEmpty(excluded_sets[excluded_set_id])) {
430 /* test that a NULL set gives the same results as an empty set */
431 sel_fb = ucnvsel_open(encodings, num_encodings,
432 NULL,
433 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
434 } else {
435 sel_fb = ucnvsel_open(encodings, num_encodings,
436 excluded_sets[excluded_set_id],
437 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
438 }
439 if (U_FAILURE(status)) {
440 log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_errorName(status));
441 ucnvsel_close(sel_rt);
442 uprv_free((void *)encodings);
443 continue;
444 }
445
446 text_reset(&text);
447 for (;;) {
448 UBool *manual_rt, *manual_fb;
449 static UChar utf16[10000];
450 char *s;
451 int32_t length8, length16;
452
453 s = text_nextString(&text, &length8);
454 if (s == NULL || (getTestOption(QUICK_OPTION) && text.number > 3)) {
455 break;
456 }
457
458 manual_rt = getResultsManually(encodings, num_encodings,
459 s, length8,
460 excluded_sets[excluded_set_id],
461 UCNV_ROUNDTRIP_SET);
462 manual_fb = getResultsManually(encodings, num_encodings,
463 s, length8,
464 excluded_sets[excluded_set_id],
465 UCNV_ROUNDTRIP_AND_FALLBACK_SET);
466 /* UTF-8 with length */
467 status = U_ZERO_ERROR;
468 verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_rt);
469 verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_fb);
470 /* UTF-8 NUL-terminated */
471 verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt);
472 verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb);
473
474 u_strFromUTF8(utf16, UPRV_LENGTHOF(utf16), &length16, s, length8, &status);
475 if (U_FAILURE(status)) {
476 log_err("error converting the test text (string %ld) to UTF-16 - %s\n",
477 (long)text.number, u_errorName(status));
478 } else {
479 if (text.number == 0) {
480 sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status);
481 }
482 if (U_SUCCESS(status)) {
483 /* UTF-16 with length */
484 verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &status), manual_rt);
485 verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &status), manual_fb);
486 /* UTF-16 NUL-terminated */
487 verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), manual_rt);
488 verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), manual_fb);
489 }
490 }
491
492 uprv_free(manual_rt);
493 uprv_free(manual_fb);
494 }
495 ucnvsel_close(sel_rt);
496 ucnvsel_close(sel_fb);
497 uprv_free(buffer_fb);
498 }
499 uprv_free((void *)encodings);
500 }
501
502 releaseAvailableNames();
503 text_close(&text);
504 for(i = 0 ; i < 3 ; i++) {
505 uset_close(excluded_sets[i]);
506 }
507 }
508
509 /* Improve code coverage of UPropsVectors */
TestUPropsVector()510 static void TestUPropsVector() {
511 UErrorCode errorCode = U_ILLEGAL_ARGUMENT_ERROR;
512 UPropsVectors *pv = upvec_open(100, &errorCode);
513 if (pv != NULL) {
514 log_err("Should have returned NULL if UErrorCode is an error.");
515 return;
516 }
517 errorCode = U_ZERO_ERROR;
518 pv = upvec_open(-1, &errorCode);
519 if (pv != NULL || U_SUCCESS(errorCode)) {
520 log_err("Should have returned NULL if column is less than 0.\n");
521 return;
522 }
523 errorCode = U_ZERO_ERROR;
524 pv = upvec_open(100, &errorCode);
525 if (pv == NULL || U_FAILURE(errorCode)) {
526 log_err("Unable to open UPropsVectors.\n");
527 return;
528 }
529
530 if (upvec_getValue(pv, 0, 1) != 0) {
531 log_err("upvec_getValue should return 0.\n");
532 }
533 if (upvec_getRow(pv, 0, NULL, NULL) == NULL) {
534 log_err("upvec_getRow should not return NULL.\n");
535 }
536 if (upvec_getArray(pv, NULL, NULL) != NULL) {
537 log_err("upvec_getArray should return NULL.\n");
538 }
539
540 upvec_close(pv);
541 }
542