• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /********************************************************************
2  * Copyright (c) 1997-2014, International Business Machines
3  * Corporation and others. All Rights Reserved.
4  ********************************************************************
5  *
6  * File UCNVSELTST.C
7  *
8  * Modification History:
9  *        Name                     Description
10  *     MOHAMED ELDAWY               Creation
11  ********************************************************************
12  */
13 
14 /* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/
15 
16 #include "ucnvseltst.h"
17 
18 #include <stdio.h>
19 
20 #include "unicode/utypes.h"
21 #include "unicode/ucnvsel.h"
22 #include "unicode/ustring.h"
23 #include "cmemory.h"
24 #include "cstring.h"
25 #include "propsvec.h"
26 
27 #define FILENAME_BUFFER 1024
28 
29 #define TDSRCPATH  ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING
30 
31 static void TestSelector(void);
32 static void TestUPropsVector(void);
33 void addCnvSelTest(TestNode** root);  /* Declaration required to suppress compiler warnings. */
34 
addCnvSelTest(TestNode ** root)35 void addCnvSelTest(TestNode** root)
36 {
37     addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector");
38     addTest(root, &TestUPropsVector, "tsconv/ucnvseltst/TestUPropsVector");
39 }
40 
41 static const char **gAvailableNames = NULL;
42 static int32_t gCountAvailable = 0;
43 
44 static UBool
getAvailableNames()45 getAvailableNames() {
46   int32_t i;
47   if (gAvailableNames != NULL) {
48     return TRUE;
49   }
50   gCountAvailable = ucnv_countAvailable();
51   if (gCountAvailable == 0) {
52     log_data_err("No converters available.\n");
53     return FALSE;
54   }
55   gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const char *));
56   if (gAvailableNames == NULL) {
57     log_err("unable to allocate memory for %ld available converter names\n",
58             (long)gCountAvailable);
59     return FALSE;
60   }
61   for (i = 0; i < gCountAvailable; ++i) {
62     gAvailableNames[i] = ucnv_getAvailableName(i);
63   }
64   return TRUE;
65 }
66 
67 static void
releaseAvailableNames()68 releaseAvailableNames() {
69   uprv_free((void *)gAvailableNames);
70   gAvailableNames = NULL;
71   gCountAvailable = 0;
72 }
73 
74 static const char **
getEncodings(int32_t start,int32_t step,int32_t count,int32_t * pCount)75 getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) {
76   const char **names;
77   int32_t i;
78 
79   *pCount = 0;
80   if (count <= 0) {
81     return NULL;
82   }
83   names = (const char **)uprv_malloc(count * sizeof(char *));
84   if (names == NULL) {
85     log_err("memory allocation error for %ld pointers\n", (long)count);
86     return NULL;
87   }
88   if (step == 0 && count > 0) {
89     step = 1;
90   }
91   for (i = 0; i < count; ++i) {
92     if (0 <= start && start < gCountAvailable) {
93       names[i] = gAvailableNames[start];
94       start += step;
95       ++*pCount;
96     }
97   }
98   return names;
99 }
100 
101 #if 0
102 /*
103  * ucnvsel_open() does not support "no encodings":
104  * Given 0 encodings it will open a selector for all available ones.
105  */
106 static const char **
107 getNoEncodings(int32_t *pCount) {
108   *pCount = 0;
109   return NULL;
110 }
111 #endif
112 
113 static const char **
getOneEncoding(int32_t * pCount)114 getOneEncoding(int32_t *pCount) {
115   return getEncodings(1, 0, 1, pCount);
116 }
117 
118 static const char **
getFirstEvenEncodings(int32_t * pCount)119 getFirstEvenEncodings(int32_t *pCount) {
120   return getEncodings(0, 2, 25, pCount);
121 }
122 
123 static const char **
getMiddleEncodings(int32_t * pCount)124 getMiddleEncodings(int32_t *pCount) {
125   return getEncodings(gCountAvailable - 12, 1, 22, pCount);
126 }
127 
128 static const char **
getLastEncodings(int32_t * pCount)129 getLastEncodings(int32_t *pCount) {
130   return getEncodings(gCountAvailable - 1, -1, 25, pCount);
131 }
132 
133 static const char **
getSomeEncodings(int32_t * pCount)134 getSomeEncodings(int32_t *pCount) {
135   /* 20 evenly distributed */
136   return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount);
137 }
138 
139 static const char **
getEveryThirdEncoding(int32_t * pCount)140 getEveryThirdEncoding(int32_t *pCount) {
141   return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount);
142 }
143 
144 static const char **
getAllEncodings(int32_t * pCount)145 getAllEncodings(int32_t *pCount) {
146   return getEncodings(0, 1, gCountAvailable, pCount);
147 }
148 
149 typedef const char **GetEncodingsFn(int32_t *);
150 
151 static GetEncodingsFn *const getEncodingsFns[] = {
152   getOneEncoding,
153   getFirstEvenEncodings,
154   getMiddleEncodings,
155   getLastEncodings,
156   getSomeEncodings,
157   getEveryThirdEncoding,
158   getAllEncodings
159 };
160 
fopenOrError(const char * filename)161 static FILE *fopenOrError(const char *filename) {
162     int32_t needLen;
163     FILE *f;
164     char fnbuf[FILENAME_BUFFER];
165     const char* directory= ctest_dataSrcDir();
166     needLen = uprv_strlen(directory)+uprv_strlen(TDSRCPATH)+uprv_strlen(filename)+1;
167     if(needLen > FILENAME_BUFFER) {
168         log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n",
169                 filename, needLen, FILENAME_BUFFER);
170         return NULL;
171     }
172 
173     strcpy(fnbuf, directory);
174     strcat(fnbuf, TDSRCPATH);
175     strcat(fnbuf, filename);
176 
177     f = fopen(fnbuf, "rb");
178 
179     if(f == NULL) {
180         log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename);
181     }
182     return f;
183 }
184 
185 typedef struct TestText {
186   char *text, *textLimit;
187   char *limit;
188   int32_t number;
189 } TestText;
190 
191 static void
text_reset(TestText * tt)192 text_reset(TestText *tt) {
193   tt->limit = tt->text;
194   tt->number = 0;
195 }
196 
197 static char *
text_nextString(TestText * tt,int32_t * pLength)198 text_nextString(TestText *tt, int32_t *pLength) {
199   char *s = tt->limit;
200   if (s == tt->textLimit) {
201     /* we already delivered the last string */
202     return NULL;
203   } else if (s == tt->text) {
204     /* first string */
205     if ((tt->textLimit - tt->text) >= 3 &&
206         s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf
207     ) {
208       s += 3;  /* skip the UTF-8 signature byte sequence (U+FEFF) */
209     }
210   } else {
211     /* skip the string terminator */
212     ++s;
213     ++tt->number;
214   }
215 
216   /* find the end of this string */
217   tt->limit = uprv_strchr(s, 0);
218   *pLength = (int32_t)(tt->limit - s);
219   return s;
220 }
221 
222 static UBool
text_open(TestText * tt)223 text_open(TestText *tt) {
224   FILE *f;
225   char *s;
226   int32_t length;
227   uprv_memset(tt, 0, sizeof(TestText));
228   f = fopenOrError("ConverterSelectorTestUTF8.txt");
229   if(!f) {
230     return FALSE;
231   }
232   fseek(f, 0, SEEK_END);
233   length = (int32_t)ftell(f);
234   fseek(f, 0, SEEK_SET);
235   tt->text = (char *)uprv_malloc(length + 1);
236   if (tt->text == NULL) {
237     fclose(f);
238     return FALSE;
239   }
240   if (length != fread(tt->text, 1, length, f)) {
241     log_err("error reading %ld bytes from test text file\n", (long)length);
242     length = 0;
243     uprv_free(tt->text);
244   }
245   fclose(f);
246   tt->textLimit = tt->text + length;
247   *tt->textLimit = 0;
248   /* replace all Unicode '#' (U+0023) with NUL */
249   for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {}
250   text_reset(tt);
251   return TRUE;
252 }
253 
254 static void
text_close(TestText * tt)255 text_close(TestText *tt) {
256   uprv_free(tt->text);
257 }
258 
findIndex(const char * converterName)259 static int32_t findIndex(const char* converterName) {
260   int32_t i;
261   for (i = 0 ; i < gCountAvailable; i++) {
262     if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) {
263       return i;
264     }
265   }
266   return -1;
267 }
268 
269 static UBool *
getResultsManually(const char ** encodings,int32_t num_encodings,const char * utf8,int32_t length,const USet * excludedCodePoints,const UConverterUnicodeSet whichSet)270 getResultsManually(const char** encodings, int32_t num_encodings,
271                    const char *utf8, int32_t length,
272                    const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) {
273   UBool* resultsManually;
274   int32_t i;
275 
276   resultsManually = (UBool*) uprv_malloc(gCountAvailable);
277   uprv_memset(resultsManually, 0, gCountAvailable);
278 
279   for(i = 0 ; i < num_encodings ; i++) {
280     UErrorCode status = U_ZERO_ERROR;
281     /* get unicode set for that converter */
282     USet* set;
283     UConverter* test_converter;
284     UChar32 cp;
285     int32_t encIndex, offset;
286 
287     set = uset_openEmpty();
288     test_converter = ucnv_open(encodings[i], &status);
289     ucnv_getUnicodeSet(test_converter, set,
290                        whichSet, &status);
291     if (excludedCodePoints != NULL) {
292       uset_addAll(set, excludedCodePoints);
293     }
294     uset_freeze(set);
295     offset = 0;
296     cp = 0;
297 
298     encIndex = findIndex(encodings[i]);
299     /*
300      * The following is almost, but not entirely, the same as
301      * resultsManually[encIndex] =
302      *   (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length);
303      * They might be different if the set contains strings,
304      * or if the utf8 string contains an illegal sequence.
305      *
306      * The UConverterSelector does not currently handle strings that can be
307      * converted, and it treats an illegal sequence as convertible
308      * while uset_spanUTF8() treats it like U+FFFD which may not be convertible.
309      */
310     resultsManually[encIndex] = TRUE;
311     while(offset<length) {
312       U8_NEXT(utf8, offset, length, cp);
313       if (cp >= 0 && !uset_contains(set, cp)) {
314         resultsManually[encIndex] = FALSE;
315         break;
316       }
317     }
318     uset_close(set);
319     ucnv_close(test_converter);
320   }
321   return resultsManually;
322 }
323 
324 /* closes res but does not free resultsManually */
verifyResult(UEnumeration * res,const UBool * resultsManually)325 static void verifyResult(UEnumeration* res, const UBool *resultsManually) {
326   UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool));
327   const char* name;
328   UErrorCode status = U_ZERO_ERROR;
329   int32_t i;
330 
331   /* fill the bool for the selector results! */
332   uprv_memset(resultsFromSystem, 0, gCountAvailable);
333   while ((name = uenum_next(res,NULL, &status)) != NULL) {
334     resultsFromSystem[findIndex(name)] = TRUE;
335   }
336   for(i = 0 ; i < gCountAvailable; i++) {
337     if(resultsManually[i] != resultsFromSystem[i]) {
338       log_err("failure in converter selector\n"
339               "converter %s had conflicting results -- manual: %d, system %d\n",
340               gAvailableNames[i], resultsManually[i], resultsFromSystem[i]);
341     }
342   }
343   uprv_free(resultsFromSystem);
344   uenum_close(res);
345 }
346 
347 static UConverterSelector *
serializeAndUnserialize(UConverterSelector * sel,char ** buffer,UErrorCode * status)348 serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *status) {
349   char *new_buffer;
350   int32_t ser_len, ser_len2;
351   /* preflight */
352   ser_len = ucnvsel_serialize(sel, NULL, 0, status);
353   if (*status != U_BUFFER_OVERFLOW_ERROR) {
354     log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status));
355     return sel;
356   }
357   new_buffer = (char *)uprv_malloc(ser_len);
358   *status = U_ZERO_ERROR;
359   ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status);
360   if (U_FAILURE(*status) || ser_len != ser_len2) {
361     log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status));
362     uprv_free(new_buffer);
363     return sel;
364   }
365   ucnvsel_close(sel);
366   uprv_free(*buffer);
367   *buffer = new_buffer;
368   sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status);
369   if (U_FAILURE(*status)) {
370     log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status));
371     return NULL;
372   }
373   return sel;
374 }
375 
TestSelector()376 static void TestSelector()
377 {
378   TestText text;
379   USet* excluded_sets[3] = { NULL };
380   int32_t i, testCaseIdx;
381 
382   if (!getAvailableNames()) {
383     return;
384   }
385   if (!text_open(&text)) {
386     releaseAvailableNames();;
387   }
388 
389   excluded_sets[0] = uset_openEmpty();
390   for(i = 1 ; i < 3 ; i++) {
391     excluded_sets[i] = uset_open(i*30, i*30+500);
392   }
393 
394   for(testCaseIdx = 0; testCaseIdx < UPRV_LENGTHOF(getEncodingsFns); testCaseIdx++)
395   {
396     int32_t excluded_set_id;
397     int32_t num_encodings;
398     const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings);
399     if (getTestOption(QUICK_OPTION) && num_encodings > 25) {
400       uprv_free((void *)encodings);
401       continue;
402     }
403 
404     /*
405      * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++)
406      *
407      * This loop was replaced by the following statement because
408      * the loop made the test run longer without adding to the code coverage.
409      * The handling of the exclusion set is independent of the
410      * set of encodings, so there is no need to test every combination.
411      */
412     excluded_set_id = testCaseIdx % UPRV_LENGTHOF(excluded_sets);
413     {
414       UConverterSelector *sel_rt, *sel_fb;
415       char *buffer_fb = NULL;
416       UErrorCode status = U_ZERO_ERROR;
417       sel_rt = ucnvsel_open(encodings, num_encodings,
418                             excluded_sets[excluded_set_id],
419                             UCNV_ROUNDTRIP_SET, &status);
420       if (num_encodings == gCountAvailable) {
421         /* test the special "all converters" parameter values */
422         sel_fb = ucnvsel_open(NULL, 0,
423                               excluded_sets[excluded_set_id],
424                               UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
425       } else if (uset_isEmpty(excluded_sets[excluded_set_id])) {
426         /* test that a NULL set gives the same results as an empty set */
427         sel_fb = ucnvsel_open(encodings, num_encodings,
428                               NULL,
429                               UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
430       } else {
431         sel_fb = ucnvsel_open(encodings, num_encodings,
432                               excluded_sets[excluded_set_id],
433                               UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
434       }
435       if (U_FAILURE(status)) {
436         log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_errorName(status));
437         ucnvsel_close(sel_rt);
438         uprv_free((void *)encodings);
439         continue;
440       }
441 
442       text_reset(&text);
443       for (;;) {
444         UBool *manual_rt, *manual_fb;
445         static UChar utf16[10000];
446         char *s;
447         int32_t length8, length16;
448 
449         s = text_nextString(&text, &length8);
450         if (s == NULL || (getTestOption(QUICK_OPTION) && text.number > 3)) {
451           break;
452         }
453 
454         manual_rt = getResultsManually(encodings, num_encodings,
455                                        s, length8,
456                                        excluded_sets[excluded_set_id],
457                                        UCNV_ROUNDTRIP_SET);
458         manual_fb = getResultsManually(encodings, num_encodings,
459                                        s, length8,
460                                        excluded_sets[excluded_set_id],
461                                        UCNV_ROUNDTRIP_AND_FALLBACK_SET);
462         /* UTF-8 with length */
463         status = U_ZERO_ERROR;
464         verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_rt);
465         verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_fb);
466         /* UTF-8 NUL-terminated */
467         verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt);
468         verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb);
469 
470         u_strFromUTF8(utf16, UPRV_LENGTHOF(utf16), &length16, s, length8, &status);
471         if (U_FAILURE(status)) {
472           log_err("error converting the test text (string %ld) to UTF-16 - %s\n",
473                   (long)text.number, u_errorName(status));
474         } else {
475           if (text.number == 0) {
476             sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status);
477           }
478           if (U_SUCCESS(status)) {
479             /* UTF-16 with length */
480             verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &status), manual_rt);
481             verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &status), manual_fb);
482             /* UTF-16 NUL-terminated */
483             verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), manual_rt);
484             verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), manual_fb);
485           }
486         }
487 
488         uprv_free(manual_rt);
489         uprv_free(manual_fb);
490       }
491       ucnvsel_close(sel_rt);
492       ucnvsel_close(sel_fb);
493       uprv_free(buffer_fb);
494     }
495     uprv_free((void *)encodings);
496   }
497 
498   releaseAvailableNames();
499   text_close(&text);
500   for(i = 0 ; i < 3 ; i++) {
501     uset_close(excluded_sets[i]);
502   }
503 }
504 
505 /* Improve code coverage of UPropsVectors */
TestUPropsVector()506 static void TestUPropsVector() {
507     UErrorCode errorCode = U_ILLEGAL_ARGUMENT_ERROR;
508     UPropsVectors *pv = upvec_open(100, &errorCode);
509     if (pv != NULL) {
510         log_err("Should have returned NULL if UErrorCode is an error.");
511         return;
512     }
513     errorCode = U_ZERO_ERROR;
514     pv = upvec_open(-1, &errorCode);
515     if (pv != NULL || U_SUCCESS(errorCode)) {
516         log_err("Should have returned NULL if column is less than 0.\n");
517         return;
518     }
519     errorCode = U_ZERO_ERROR;
520     pv = upvec_open(100, &errorCode);
521     if (pv == NULL || U_FAILURE(errorCode)) {
522         log_err("Unable to open UPropsVectors.\n");
523         return;
524     }
525 
526     if (upvec_getValue(pv, 0, 1) != 0) {
527         log_err("upvec_getValue should return 0.\n");
528     }
529     if (upvec_getRow(pv, 0, NULL, NULL) == NULL) {
530         log_err("upvec_getRow should not return NULL.\n");
531     }
532     if (upvec_getArray(pv, NULL, NULL) != NULL) {
533         log_err("upvec_getArray should return NULL.\n");
534     }
535 
536     upvec_close(pv);
537 }
538