1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * Copyright (c) 1997-2014, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 ********************************************************************
7 *
8 * File UCNVSELTST.C
9 *
10 * Modification History:
11 * Name Description
12 * MOHAMED ELDAWY Creation
13 ********************************************************************
14 */
15
16 /* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/
17
18 #include "ucnvseltst.h"
19
20 #include <stdio.h>
21
22 #include "unicode/utypes.h"
23 #include "unicode/ucnvsel.h"
24 #include "unicode/ustring.h"
25 #include "cmemory.h"
26 #include "cstring.h"
27 #include "propsvec.h"
28
29 #define FILENAME_BUFFER 1024
30
31 #define TDSRCPATH ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING
32
33 static void TestSelector(void);
34 static void TestUPropsVector(void);
35 void addCnvSelTest(TestNode** root); /* Declaration required to suppress compiler warnings. */
36
addCnvSelTest(TestNode ** root)37 void addCnvSelTest(TestNode** root)
38 {
39 addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector");
40 addTest(root, &TestUPropsVector, "tsconv/ucnvseltst/TestUPropsVector");
41 }
42
43 static const char **gAvailableNames = NULL;
44 static int32_t gCountAvailable = 0;
45
46 static UBool
getAvailableNames()47 getAvailableNames() {
48 int32_t i;
49 if (gAvailableNames != NULL) {
50 return TRUE;
51 }
52 gCountAvailable = ucnv_countAvailable();
53 if (gCountAvailable == 0) {
54 log_data_err("No converters available.\n");
55 return FALSE;
56 }
57 gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const char *));
58 if (gAvailableNames == NULL) {
59 log_err("unable to allocate memory for %ld available converter names\n",
60 (long)gCountAvailable);
61 return FALSE;
62 }
63 for (i = 0; i < gCountAvailable; ++i) {
64 gAvailableNames[i] = ucnv_getAvailableName(i);
65 }
66 return TRUE;
67 }
68
69 static void
releaseAvailableNames()70 releaseAvailableNames() {
71 uprv_free((void *)gAvailableNames);
72 gAvailableNames = NULL;
73 gCountAvailable = 0;
74 }
75
76 static const char **
getEncodings(int32_t start,int32_t step,int32_t count,int32_t * pCount)77 getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) {
78 const char **names;
79 int32_t i;
80
81 *pCount = 0;
82 if (count <= 0) {
83 return NULL;
84 }
85 names = (const char **)uprv_malloc(count * sizeof(char *));
86 if (names == NULL) {
87 log_err("memory allocation error for %ld pointers\n", (long)count);
88 return NULL;
89 }
90 if (step == 0 && count > 0) {
91 step = 1;
92 }
93 for (i = 0; i < count; ++i) {
94 if (0 <= start && start < gCountAvailable) {
95 names[i] = gAvailableNames[start];
96 start += step;
97 ++*pCount;
98 }
99 }
100 return names;
101 }
102
103 #if 0
104 /*
105 * ucnvsel_open() does not support "no encodings":
106 * Given 0 encodings it will open a selector for all available ones.
107 */
108 static const char **
109 getNoEncodings(int32_t *pCount) {
110 *pCount = 0;
111 return NULL;
112 }
113 #endif
114
115 static const char **
getOneEncoding(int32_t * pCount)116 getOneEncoding(int32_t *pCount) {
117 return getEncodings(1, 0, 1, pCount);
118 }
119
120 static const char **
getFirstEvenEncodings(int32_t * pCount)121 getFirstEvenEncodings(int32_t *pCount) {
122 return getEncodings(0, 2, 25, pCount);
123 }
124
125 static const char **
getMiddleEncodings(int32_t * pCount)126 getMiddleEncodings(int32_t *pCount) {
127 return getEncodings(gCountAvailable - 12, 1, 22, pCount);
128 }
129
130 static const char **
getLastEncodings(int32_t * pCount)131 getLastEncodings(int32_t *pCount) {
132 return getEncodings(gCountAvailable - 1, -1, 25, pCount);
133 }
134
135 static const char **
getSomeEncodings(int32_t * pCount)136 getSomeEncodings(int32_t *pCount) {
137 /* 20 evenly distributed */
138 return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount);
139 }
140
141 static const char **
getEveryThirdEncoding(int32_t * pCount)142 getEveryThirdEncoding(int32_t *pCount) {
143 return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount);
144 }
145
146 static const char **
getAllEncodings(int32_t * pCount)147 getAllEncodings(int32_t *pCount) {
148 return getEncodings(0, 1, gCountAvailable, pCount);
149 }
150
151 typedef const char **GetEncodingsFn(int32_t *);
152
153 static GetEncodingsFn *const getEncodingsFns[] = {
154 getOneEncoding,
155 getFirstEvenEncodings,
156 getMiddleEncodings,
157 getLastEncodings,
158 getSomeEncodings,
159 getEveryThirdEncoding,
160 getAllEncodings
161 };
162
fopenOrError(const char * filename)163 static FILE *fopenOrError(const char *filename) {
164 int32_t needLen;
165 FILE *f;
166 char fnbuf[FILENAME_BUFFER];
167 const char* directory= ctest_dataSrcDir();
168 needLen = uprv_strlen(directory)+uprv_strlen(TDSRCPATH)+uprv_strlen(filename)+1;
169 if(needLen > FILENAME_BUFFER) {
170 log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n",
171 filename, needLen, FILENAME_BUFFER);
172 return NULL;
173 }
174
175 strcpy(fnbuf, directory);
176 strcat(fnbuf, TDSRCPATH);
177 strcat(fnbuf, filename);
178
179 f = fopen(fnbuf, "rb");
180
181 if(f == NULL) {
182 log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename);
183 }
184 return f;
185 }
186
187 typedef struct TestText {
188 char *text, *textLimit;
189 char *limit;
190 int32_t number;
191 } TestText;
192
193 static void
text_reset(TestText * tt)194 text_reset(TestText *tt) {
195 tt->limit = tt->text;
196 tt->number = 0;
197 }
198
199 static char *
text_nextString(TestText * tt,int32_t * pLength)200 text_nextString(TestText *tt, int32_t *pLength) {
201 char *s = tt->limit;
202 if (s == tt->textLimit) {
203 /* we already delivered the last string */
204 return NULL;
205 } else if (s == tt->text) {
206 /* first string */
207 if ((tt->textLimit - tt->text) >= 3 &&
208 s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf
209 ) {
210 s += 3; /* skip the UTF-8 signature byte sequence (U+FEFF) */
211 }
212 } else {
213 /* skip the string terminator */
214 ++s;
215 ++tt->number;
216 }
217
218 /* find the end of this string */
219 tt->limit = uprv_strchr(s, 0);
220 *pLength = (int32_t)(tt->limit - s);
221 return s;
222 }
223
224 static UBool
text_open(TestText * tt)225 text_open(TestText *tt) {
226 FILE *f;
227 char *s;
228 int32_t length;
229 uprv_memset(tt, 0, sizeof(TestText));
230 f = fopenOrError("ConverterSelectorTestUTF8.txt");
231 if(!f) {
232 return FALSE;
233 }
234 fseek(f, 0, SEEK_END);
235 length = (int32_t)ftell(f);
236 fseek(f, 0, SEEK_SET);
237 tt->text = (char *)uprv_malloc(length + 1);
238 if (tt->text == NULL) {
239 fclose(f);
240 return FALSE;
241 }
242 if (length != fread(tt->text, 1, length, f)) {
243 log_err("error reading %ld bytes from test text file\n", (long)length);
244 length = 0;
245 uprv_free(tt->text);
246 }
247 fclose(f);
248 tt->textLimit = tt->text + length;
249 *tt->textLimit = 0;
250 /* replace all Unicode '#' (U+0023) with NUL */
251 for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {}
252 text_reset(tt);
253 return TRUE;
254 }
255
256 static void
text_close(TestText * tt)257 text_close(TestText *tt) {
258 uprv_free(tt->text);
259 }
260
findIndex(const char * converterName)261 static int32_t findIndex(const char* converterName) {
262 int32_t i;
263 for (i = 0 ; i < gCountAvailable; i++) {
264 if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) {
265 return i;
266 }
267 }
268 return -1;
269 }
270
271 static UBool *
getResultsManually(const char ** encodings,int32_t num_encodings,const char * utf8,int32_t length,const USet * excludedCodePoints,const UConverterUnicodeSet whichSet)272 getResultsManually(const char** encodings, int32_t num_encodings,
273 const char *utf8, int32_t length,
274 const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) {
275 UBool* resultsManually;
276 int32_t i;
277
278 resultsManually = (UBool*) uprv_malloc(gCountAvailable);
279 uprv_memset(resultsManually, 0, gCountAvailable);
280
281 for(i = 0 ; i < num_encodings ; i++) {
282 UErrorCode status = U_ZERO_ERROR;
283 /* get unicode set for that converter */
284 USet* set;
285 UConverter* test_converter;
286 UChar32 cp;
287 int32_t encIndex, offset;
288
289 set = uset_openEmpty();
290 test_converter = ucnv_open(encodings[i], &status);
291 ucnv_getUnicodeSet(test_converter, set,
292 whichSet, &status);
293 if (excludedCodePoints != NULL) {
294 uset_addAll(set, excludedCodePoints);
295 }
296 uset_freeze(set);
297 offset = 0;
298 cp = 0;
299
300 encIndex = findIndex(encodings[i]);
301 /*
302 * The following is almost, but not entirely, the same as
303 * resultsManually[encIndex] =
304 * (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length);
305 * They might be different if the set contains strings,
306 * or if the utf8 string contains an illegal sequence.
307 *
308 * The UConverterSelector does not currently handle strings that can be
309 * converted, and it treats an illegal sequence as convertible
310 * while uset_spanUTF8() treats it like U+FFFD which may not be convertible.
311 */
312 resultsManually[encIndex] = TRUE;
313 while(offset<length) {
314 U8_NEXT(utf8, offset, length, cp);
315 if (cp >= 0 && !uset_contains(set, cp)) {
316 resultsManually[encIndex] = FALSE;
317 break;
318 }
319 }
320 uset_close(set);
321 ucnv_close(test_converter);
322 }
323 return resultsManually;
324 }
325
326 /* closes res but does not free resultsManually */
verifyResult(UEnumeration * res,const UBool * resultsManually)327 static void verifyResult(UEnumeration* res, const UBool *resultsManually) {
328 UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool));
329 const char* name;
330 UErrorCode status = U_ZERO_ERROR;
331 int32_t i;
332
333 /* fill the bool for the selector results! */
334 uprv_memset(resultsFromSystem, 0, gCountAvailable);
335 while ((name = uenum_next(res,NULL, &status)) != NULL) {
336 resultsFromSystem[findIndex(name)] = TRUE;
337 }
338 for(i = 0 ; i < gCountAvailable; i++) {
339 if(resultsManually[i] != resultsFromSystem[i]) {
340 log_err("failure in converter selector\n"
341 "converter %s had conflicting results -- manual: %d, system %d\n",
342 gAvailableNames[i], resultsManually[i], resultsFromSystem[i]);
343 }
344 }
345 uprv_free(resultsFromSystem);
346 uenum_close(res);
347 }
348
349 static UConverterSelector *
serializeAndUnserialize(UConverterSelector * sel,char ** buffer,UErrorCode * status)350 serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *status) {
351 char *new_buffer;
352 int32_t ser_len, ser_len2;
353 /* preflight */
354 ser_len = ucnvsel_serialize(sel, NULL, 0, status);
355 if (*status != U_BUFFER_OVERFLOW_ERROR) {
356 log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status));
357 return sel;
358 }
359 new_buffer = (char *)uprv_malloc(ser_len);
360 *status = U_ZERO_ERROR;
361 ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status);
362 if (U_FAILURE(*status) || ser_len != ser_len2) {
363 log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status));
364 uprv_free(new_buffer);
365 return sel;
366 }
367 ucnvsel_close(sel);
368 uprv_free(*buffer);
369 *buffer = new_buffer;
370 sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status);
371 if (U_FAILURE(*status)) {
372 log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status));
373 return NULL;
374 }
375 return sel;
376 }
377
TestSelector()378 static void TestSelector()
379 {
380 TestText text;
381 USet* excluded_sets[3] = { NULL };
382 int32_t i, testCaseIdx;
383
384 if (!getAvailableNames()) {
385 return;
386 }
387 if (!text_open(&text)) {
388 releaseAvailableNames();;
389 }
390
391 excluded_sets[0] = uset_openEmpty();
392 for(i = 1 ; i < 3 ; i++) {
393 excluded_sets[i] = uset_open(i*30, i*30+500);
394 }
395
396 for(testCaseIdx = 0; testCaseIdx < UPRV_LENGTHOF(getEncodingsFns); testCaseIdx++)
397 {
398 int32_t excluded_set_id;
399 int32_t num_encodings;
400 const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings);
401 if (getTestOption(QUICK_OPTION) && num_encodings > 25) {
402 uprv_free((void *)encodings);
403 continue;
404 }
405
406 /*
407 * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++)
408 *
409 * This loop was replaced by the following statement because
410 * the loop made the test run longer without adding to the code coverage.
411 * The handling of the exclusion set is independent of the
412 * set of encodings, so there is no need to test every combination.
413 */
414 excluded_set_id = testCaseIdx % UPRV_LENGTHOF(excluded_sets);
415 {
416 UConverterSelector *sel_rt, *sel_fb;
417 char *buffer_fb = NULL;
418 UErrorCode status = U_ZERO_ERROR;
419 sel_rt = ucnvsel_open(encodings, num_encodings,
420 excluded_sets[excluded_set_id],
421 UCNV_ROUNDTRIP_SET, &status);
422 if (num_encodings == gCountAvailable) {
423 /* test the special "all converters" parameter values */
424 sel_fb = ucnvsel_open(NULL, 0,
425 excluded_sets[excluded_set_id],
426 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
427 } else if (uset_isEmpty(excluded_sets[excluded_set_id])) {
428 /* test that a NULL set gives the same results as an empty set */
429 sel_fb = ucnvsel_open(encodings, num_encodings,
430 NULL,
431 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
432 } else {
433 sel_fb = ucnvsel_open(encodings, num_encodings,
434 excluded_sets[excluded_set_id],
435 UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
436 }
437 if (U_FAILURE(status)) {
438 log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_errorName(status));
439 ucnvsel_close(sel_rt);
440 uprv_free((void *)encodings);
441 continue;
442 }
443
444 text_reset(&text);
445 for (;;) {
446 UBool *manual_rt, *manual_fb;
447 static UChar utf16[10000];
448 char *s;
449 int32_t length8, length16;
450
451 s = text_nextString(&text, &length8);
452 if (s == NULL || (getTestOption(QUICK_OPTION) && text.number > 3)) {
453 break;
454 }
455
456 manual_rt = getResultsManually(encodings, num_encodings,
457 s, length8,
458 excluded_sets[excluded_set_id],
459 UCNV_ROUNDTRIP_SET);
460 manual_fb = getResultsManually(encodings, num_encodings,
461 s, length8,
462 excluded_sets[excluded_set_id],
463 UCNV_ROUNDTRIP_AND_FALLBACK_SET);
464 /* UTF-8 with length */
465 status = U_ZERO_ERROR;
466 verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_rt);
467 verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_fb);
468 /* UTF-8 NUL-terminated */
469 verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt);
470 verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb);
471
472 u_strFromUTF8(utf16, UPRV_LENGTHOF(utf16), &length16, s, length8, &status);
473 if (U_FAILURE(status)) {
474 log_err("error converting the test text (string %ld) to UTF-16 - %s\n",
475 (long)text.number, u_errorName(status));
476 } else {
477 if (text.number == 0) {
478 sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status);
479 }
480 if (U_SUCCESS(status)) {
481 /* UTF-16 with length */
482 verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &status), manual_rt);
483 verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &status), manual_fb);
484 /* UTF-16 NUL-terminated */
485 verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), manual_rt);
486 verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), manual_fb);
487 }
488 }
489
490 uprv_free(manual_rt);
491 uprv_free(manual_fb);
492 }
493 ucnvsel_close(sel_rt);
494 ucnvsel_close(sel_fb);
495 uprv_free(buffer_fb);
496 }
497 uprv_free((void *)encodings);
498 }
499
500 releaseAvailableNames();
501 text_close(&text);
502 for(i = 0 ; i < 3 ; i++) {
503 uset_close(excluded_sets[i]);
504 }
505 }
506
507 /* Improve code coverage of UPropsVectors */
TestUPropsVector()508 static void TestUPropsVector() {
509 UErrorCode errorCode = U_ILLEGAL_ARGUMENT_ERROR;
510 UPropsVectors *pv = upvec_open(100, &errorCode);
511 if (pv != NULL) {
512 log_err("Should have returned NULL if UErrorCode is an error.");
513 return;
514 }
515 errorCode = U_ZERO_ERROR;
516 pv = upvec_open(-1, &errorCode);
517 if (pv != NULL || U_SUCCESS(errorCode)) {
518 log_err("Should have returned NULL if column is less than 0.\n");
519 return;
520 }
521 errorCode = U_ZERO_ERROR;
522 pv = upvec_open(100, &errorCode);
523 if (pv == NULL || U_FAILURE(errorCode)) {
524 log_err("Unable to open UPropsVectors.\n");
525 return;
526 }
527
528 if (upvec_getValue(pv, 0, 1) != 0) {
529 log_err("upvec_getValue should return 0.\n");
530 }
531 if (upvec_getRow(pv, 0, NULL, NULL) == NULL) {
532 log_err("upvec_getRow should not return NULL.\n");
533 }
534 if (upvec_getArray(pv, NULL, NULL) != NULL) {
535 log_err("upvec_getArray should return NULL.\n");
536 }
537
538 upvec_close(pv);
539 }
540