• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * Copyright (c) 1997-2014, International Business Machines
5  * Corporation and others. All Rights Reserved.
6  ********************************************************************
7  *
8  * File UCNVSELTST.C
9  *
10  * Modification History:
11  *        Name                     Description
12  *     MOHAMED ELDAWY               Creation
13  ********************************************************************
14  */
15 
16 /* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/
17 
18 #include "ucnvseltst.h"
19 
20 #include <stdbool.h>
21 #include <stdio.h>
22 
23 #include "unicode/utypes.h"
24 #include "unicode/ucnvsel.h"
25 #include "unicode/ustring.h"
26 #include "unicode/utf8.h"
27 #include "cmemory.h"
28 #include "cstring.h"
29 #include "propsvec.h"
30 
31 #define FILENAME_BUFFER 1024
32 
33 #define TDSRCPATH  ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING
34 
35 static void TestSelector(void);
36 static void TestUPropsVector(void);
37 void addCnvSelTest(TestNode** root);  /* Declaration required to suppress compiler warnings. */
38 
addCnvSelTest(TestNode ** root)39 void addCnvSelTest(TestNode** root)
40 {
41     addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector");
42     addTest(root, &TestUPropsVector, "tsconv/ucnvseltst/TestUPropsVector");
43 }
44 
45 static const char **gAvailableNames = NULL;
46 static int32_t gCountAvailable = 0;
47 
48 static UBool
getAvailableNames()49 getAvailableNames() {
50   int32_t i;
51   if (gAvailableNames != NULL) {
52     return true;
53   }
54   gCountAvailable = ucnv_countAvailable();
55   if (gCountAvailable == 0) {
56     log_data_err("No converters available.\n");
57     return false;
58   }
59   gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const char *));
60   if (gAvailableNames == NULL) {
61     log_err("unable to allocate memory for %ld available converter names\n",
62             (long)gCountAvailable);
63     return false;
64   }
65   for (i = 0; i < gCountAvailable; ++i) {
66     gAvailableNames[i] = ucnv_getAvailableName(i);
67   }
68   return true;
69 }
70 
71 static void
releaseAvailableNames()72 releaseAvailableNames() {
73   uprv_free((void *)gAvailableNames);
74   gAvailableNames = NULL;
75   gCountAvailable = 0;
76 }
77 
78 static const char **
getEncodings(int32_t start,int32_t step,int32_t count,int32_t * pCount)79 getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) {
80   const char **names;
81   int32_t i;
82 
83   *pCount = 0;
84   if (count <= 0) {
85     return NULL;
86   }
87   names = (const char **)uprv_malloc(count * sizeof(char *));
88   if (names == NULL) {
89     log_err("memory allocation error for %ld pointers\n", (long)count);
90     return NULL;
91   }
92   if (step == 0 && count > 0) {
93     step = 1;
94   }
95   for (i = 0; i < count; ++i) {
96     if (0 <= start && start < gCountAvailable) {
97       names[i] = gAvailableNames[start];
98       start += step;
99       ++*pCount;
100     }
101   }
102   return names;
103 }
104 
105 #if 0
106 /*
107  * ucnvsel_open() does not support "no encodings":
108  * Given 0 encodings it will open a selector for all available ones.
109  */
110 static const char **
111 getNoEncodings(int32_t *pCount) {
112   *pCount = 0;
113   return NULL;
114 }
115 #endif
116 
117 static const char **
getOneEncoding(int32_t * pCount)118 getOneEncoding(int32_t *pCount) {
119   return getEncodings(1, 0, 1, pCount);
120 }
121 
122 static const char **
getFirstEvenEncodings(int32_t * pCount)123 getFirstEvenEncodings(int32_t *pCount) {
124   return getEncodings(0, 2, 25, pCount);
125 }
126 
127 static const char **
getMiddleEncodings(int32_t * pCount)128 getMiddleEncodings(int32_t *pCount) {
129   return getEncodings(gCountAvailable - 12, 1, 22, pCount);
130 }
131 
132 static const char **
getLastEncodings(int32_t * pCount)133 getLastEncodings(int32_t *pCount) {
134   return getEncodings(gCountAvailable - 1, -1, 25, pCount);
135 }
136 
137 static const char **
getSomeEncodings(int32_t * pCount)138 getSomeEncodings(int32_t *pCount) {
139   /* 20 evenly distributed */
140   return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount);
141 }
142 
143 static const char **
getEveryThirdEncoding(int32_t * pCount)144 getEveryThirdEncoding(int32_t *pCount) {
145   return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount);
146 }
147 
148 static const char **
getAllEncodings(int32_t * pCount)149 getAllEncodings(int32_t *pCount) {
150   return getEncodings(0, 1, gCountAvailable, pCount);
151 }
152 
153 typedef const char **GetEncodingsFn(int32_t *);
154 
155 static GetEncodingsFn *const getEncodingsFns[] = {
156   getOneEncoding,
157   getFirstEvenEncodings,
158   getMiddleEncodings,
159   getLastEncodings,
160   getSomeEncodings,
161   getEveryThirdEncoding,
162   getAllEncodings
163 };
164 
fopenOrError(const char * filename)165 static FILE *fopenOrError(const char *filename) {
166     int32_t needLen;
167     FILE *f;
168     char fnbuf[FILENAME_BUFFER];
169     const char* directory = ctest_dataSrcDir();
170     needLen = (int32_t)(uprv_strlen(directory) + uprv_strlen(TDSRCPATH) + uprv_strlen(filename) + 1);
171     if(needLen > FILENAME_BUFFER) {
172         log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n",
173                 filename, needLen, FILENAME_BUFFER);
174         return NULL;
175     }
176 
177     strcpy(fnbuf, directory);
178     strcat(fnbuf, TDSRCPATH);
179     strcat(fnbuf, filename);
180 
181     f = fopen(fnbuf, "rb");
182 
183     if(f == NULL) {
184         log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename);
185     }
186     return f;
187 }
188 
189 typedef struct TestText {
190   char *text, *textLimit;
191   char *limit;
192   int32_t number;
193 } TestText;
194 
195 static void
text_reset(TestText * tt)196 text_reset(TestText *tt) {
197   tt->limit = tt->text;
198   tt->number = 0;
199 }
200 
201 static char *
text_nextString(TestText * tt,int32_t * pLength)202 text_nextString(TestText *tt, int32_t *pLength) {
203   char *s = tt->limit;
204   if (s == tt->textLimit) {
205     /* we already delivered the last string */
206     return NULL;
207   } else if (s == tt->text) {
208     /* first string */
209     if ((tt->textLimit - tt->text) >= 3 &&
210         s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf
211     ) {
212       s += 3;  /* skip the UTF-8 signature byte sequence (U+FEFF) */
213     }
214   } else {
215     /* skip the string terminator */
216     ++s;
217     ++tt->number;
218   }
219 
220   /* find the end of this string */
221   tt->limit = uprv_strchr(s, 0);
222   *pLength = (int32_t)(tt->limit - s);
223   return s;
224 }
225 
226 static UBool
text_open(TestText * tt)227 text_open(TestText *tt) {
228   FILE *f;
229   char *s;
230   int32_t length;
231   uprv_memset(tt, 0, sizeof(TestText));
232   f = fopenOrError("ConverterSelectorTestUTF8.txt");
233   if(!f) {
234     return false;
235   }
236   fseek(f, 0, SEEK_END);
237   length = (int32_t)ftell(f);
238   fseek(f, 0, SEEK_SET);
239   tt->text = (char *)uprv_malloc(length + 1);
240   if (tt->text == NULL) {
241     fclose(f);
242     return false;
243   }
244   if (length != (int32_t)fread(tt->text, 1, length, f)) {
245     log_err("error reading %ld bytes from test text file\n", (long)length);
246     length = 0;
247     uprv_free(tt->text);
248   }
249   fclose(f);
250   tt->textLimit = tt->text + length;
251   *tt->textLimit = 0;
252   /* replace all Unicode '#' (U+0023) with NUL */
253   for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {}
254   text_reset(tt);
255   return true;
256 }
257 
258 static void
text_close(TestText * tt)259 text_close(TestText *tt) {
260   uprv_free(tt->text);
261 }
262 
findIndex(const char * converterName)263 static int32_t findIndex(const char* converterName) {
264   int32_t i;
265   for (i = 0 ; i < gCountAvailable; i++) {
266     if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) {
267       return i;
268     }
269   }
270   return -1;
271 }
272 
273 static UBool *
getResultsManually(const char ** encodings,int32_t num_encodings,const char * utf8,int32_t length,const USet * excludedCodePoints,const UConverterUnicodeSet whichSet)274 getResultsManually(const char** encodings, int32_t num_encodings,
275                    const char *utf8, int32_t length,
276                    const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) {
277   UBool* resultsManually;
278   int32_t i;
279 
280   resultsManually = (UBool*) uprv_malloc(gCountAvailable);
281   uprv_memset(resultsManually, 0, gCountAvailable);
282 
283   for(i = 0 ; i < num_encodings ; i++) {
284     UErrorCode status = U_ZERO_ERROR;
285     /* get unicode set for that converter */
286     USet* set;
287     UConverter* test_converter;
288     UChar32 cp;
289     int32_t encIndex, offset;
290 
291     set = uset_openEmpty();
292     test_converter = ucnv_open(encodings[i], &status);
293     ucnv_getUnicodeSet(test_converter, set,
294                        whichSet, &status);
295     if (excludedCodePoints != NULL) {
296       uset_addAll(set, excludedCodePoints);
297     }
298     uset_freeze(set);
299     offset = 0;
300     cp = 0;
301 
302     encIndex = findIndex(encodings[i]);
303     /*
304      * The following is almost, but not entirely, the same as
305      * resultsManually[encIndex] =
306      *   (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length);
307      * They might be different if the set contains strings,
308      * or if the utf8 string contains an illegal sequence.
309      *
310      * The UConverterSelector does not currently handle strings that can be
311      * converted, and it treats an illegal sequence as convertible
312      * while uset_spanUTF8() treats it like U+FFFD which may not be convertible.
313      */
314     resultsManually[encIndex] = true;
315     while(offset<length) {
316       U8_NEXT(utf8, offset, length, cp);
317       if (cp >= 0 && !uset_contains(set, cp)) {
318         resultsManually[encIndex] = false;
319         break;
320       }
321     }
322     uset_close(set);
323     ucnv_close(test_converter);
324   }
325   return resultsManually;
326 }
327 
328 /* closes res but does not free resultsManually */
verifyResult(UEnumeration * res,const UBool * resultsManually)329 static void verifyResult(UEnumeration* res, const UBool *resultsManually) {
330   UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool));
331   const char* name;
332   UErrorCode status = U_ZERO_ERROR;
333   int32_t i;
334 
335   /* fill the bool for the selector results! */
336   uprv_memset(resultsFromSystem, 0, gCountAvailable);
337   while ((name = uenum_next(res,NULL, &status)) != NULL) {
338     resultsFromSystem[findIndex(name)] = true;
339   }
340   for(i = 0 ; i < gCountAvailable; i++) {
341     if(resultsManually[i] != resultsFromSystem[i]) {
342       log_err("failure in converter selector\n"
343               "converter %s had conflicting results -- manual: %d, system %d\n",
344               gAvailableNames[i], resultsManually[i], resultsFromSystem[i]);
345     }
346   }
347   uprv_free(resultsFromSystem);
348   uenum_close(res);
349 }
350 
351 static UConverterSelector *
serializeAndUnserialize(UConverterSelector * sel,char ** buffer,UErrorCode * status)352 serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *status) {
353   char *new_buffer;
354   int32_t ser_len, ser_len2;
355   /* preflight */
356   ser_len = ucnvsel_serialize(sel, NULL, 0, status);
357   if (*status != U_BUFFER_OVERFLOW_ERROR) {
358     log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status));
359     return sel;
360   }
361   new_buffer = (char *)uprv_malloc(ser_len);
362   *status = U_ZERO_ERROR;
363   ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status);
364   if (U_FAILURE(*status) || ser_len != ser_len2) {
365     log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status));
366     uprv_free(new_buffer);
367     return sel;
368   }
369   ucnvsel_close(sel);
370   uprv_free(*buffer);
371   *buffer = new_buffer;
372   sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status);
373   if (U_FAILURE(*status)) {
374     log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status));
375     return NULL;
376   }
377   return sel;
378 }
379 
TestSelector()380 static void TestSelector()
381 {
382   TestText text;
383   USet* excluded_sets[3] = { NULL };
384   int32_t i, testCaseIdx;
385 
386   if (!getAvailableNames()) {
387     return;
388   }
389   if (!text_open(&text)) {
390     releaseAvailableNames();
391   }
392 
393   excluded_sets[0] = uset_openEmpty();
394   for(i = 1 ; i < 3 ; i++) {
395     excluded_sets[i] = uset_open(i*30, i*30+500);
396   }
397 
398   for(testCaseIdx = 0; testCaseIdx < UPRV_LENGTHOF(getEncodingsFns); testCaseIdx++)
399   {
400     int32_t excluded_set_id;
401     int32_t num_encodings;
402     const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings);
403     if (getTestOption(QUICK_OPTION) && num_encodings > 25) {
404       uprv_free((void *)encodings);
405       continue;
406     }
407 
408     /*
409      * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++)
410      *
411      * This loop was replaced by the following statement because
412      * the loop made the test run longer without adding to the code coverage.
413      * The handling of the exclusion set is independent of the
414      * set of encodings, so there is no need to test every combination.
415      */
416     excluded_set_id = testCaseIdx % UPRV_LENGTHOF(excluded_sets);
417     {
418       UConverterSelector *sel_rt, *sel_fb;
419       char *buffer_fb = NULL;
420       UErrorCode status = U_ZERO_ERROR;
421       sel_rt = ucnvsel_open(encodings, num_encodings,
422                             excluded_sets[excluded_set_id],
423                             UCNV_ROUNDTRIP_SET, &status);
424       if (num_encodings == gCountAvailable) {
425         /* test the special "all converters" parameter values */
426         sel_fb = ucnvsel_open(NULL, 0,
427                               excluded_sets[excluded_set_id],
428                               UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
429       } else if (uset_isEmpty(excluded_sets[excluded_set_id])) {
430         /* test that a NULL set gives the same results as an empty set */
431         sel_fb = ucnvsel_open(encodings, num_encodings,
432                               NULL,
433                               UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
434       } else {
435         sel_fb = ucnvsel_open(encodings, num_encodings,
436                               excluded_sets[excluded_set_id],
437                               UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
438       }
439       if (U_FAILURE(status)) {
440         log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_errorName(status));
441         ucnvsel_close(sel_rt);
442         uprv_free((void *)encodings);
443         continue;
444       }
445 
446       text_reset(&text);
447       for (;;) {
448         UBool *manual_rt, *manual_fb;
449         static UChar utf16[10000];
450         char *s;
451         int32_t length8, length16;
452 
453         s = text_nextString(&text, &length8);
454         if (s == NULL || (getTestOption(QUICK_OPTION) && text.number > 3)) {
455           break;
456         }
457 
458         manual_rt = getResultsManually(encodings, num_encodings,
459                                        s, length8,
460                                        excluded_sets[excluded_set_id],
461                                        UCNV_ROUNDTRIP_SET);
462         manual_fb = getResultsManually(encodings, num_encodings,
463                                        s, length8,
464                                        excluded_sets[excluded_set_id],
465                                        UCNV_ROUNDTRIP_AND_FALLBACK_SET);
466         /* UTF-8 with length */
467         status = U_ZERO_ERROR;
468         verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_rt);
469         verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_fb);
470         /* UTF-8 NUL-terminated */
471         verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt);
472         verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb);
473 
474         u_strFromUTF8(utf16, UPRV_LENGTHOF(utf16), &length16, s, length8, &status);
475         if (U_FAILURE(status)) {
476           log_err("error converting the test text (string %ld) to UTF-16 - %s\n",
477                   (long)text.number, u_errorName(status));
478         } else {
479           if (text.number == 0) {
480             sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status);
481           }
482           if (U_SUCCESS(status)) {
483             /* UTF-16 with length */
484             verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &status), manual_rt);
485             verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &status), manual_fb);
486             /* UTF-16 NUL-terminated */
487             verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), manual_rt);
488             verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), manual_fb);
489           }
490         }
491 
492         uprv_free(manual_rt);
493         uprv_free(manual_fb);
494       }
495       ucnvsel_close(sel_rt);
496       ucnvsel_close(sel_fb);
497       uprv_free(buffer_fb);
498     }
499     uprv_free((void *)encodings);
500   }
501 
502   releaseAvailableNames();
503   text_close(&text);
504   for(i = 0 ; i < 3 ; i++) {
505     uset_close(excluded_sets[i]);
506   }
507 }
508 
509 /* Improve code coverage of UPropsVectors */
TestUPropsVector()510 static void TestUPropsVector() {
511     UErrorCode errorCode = U_ILLEGAL_ARGUMENT_ERROR;
512     UPropsVectors *pv = upvec_open(100, &errorCode);
513     if (pv != NULL) {
514         log_err("Should have returned NULL if UErrorCode is an error.");
515         return;
516     }
517     errorCode = U_ZERO_ERROR;
518     pv = upvec_open(-1, &errorCode);
519     if (pv != NULL || U_SUCCESS(errorCode)) {
520         log_err("Should have returned NULL if column is less than 0.\n");
521         return;
522     }
523     errorCode = U_ZERO_ERROR;
524     pv = upvec_open(100, &errorCode);
525     if (pv == NULL || U_FAILURE(errorCode)) {
526         log_err("Unable to open UPropsVectors.\n");
527         return;
528     }
529 
530     if (upvec_getValue(pv, 0, 1) != 0) {
531         log_err("upvec_getValue should return 0.\n");
532     }
533     if (upvec_getRow(pv, 0, NULL, NULL) == NULL) {
534         log_err("upvec_getRow should not return NULL.\n");
535     }
536     if (upvec_getArray(pv, NULL, NULL) != NULL) {
537         log_err("upvec_getArray should return NULL.\n");
538     }
539 
540     upvec_close(pv);
541 }
542