• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * Copyright (c) 1997-2014, International Business Machines
5  * Corporation and others. All Rights Reserved.
6  ********************************************************************
7  *
8  * File UCNVSELTST.C
9  *
10  * Modification History:
11  *        Name                     Description
12  *     MOHAMED ELDAWY               Creation
13  ********************************************************************
14  */
15 
16 /* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/
17 
18 #include "ucnvseltst.h"
19 
20 #include <stdio.h>
21 
22 #include "unicode/utypes.h"
23 #include "unicode/ucnvsel.h"
24 #include "unicode/ustring.h"
25 #include "cmemory.h"
26 #include "cstring.h"
27 #include "propsvec.h"
28 
29 #define FILENAME_BUFFER 1024
30 
31 #define TDSRCPATH  ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING
32 
33 static void TestSelector(void);
34 static void TestUPropsVector(void);
35 void addCnvSelTest(TestNode** root);  /* Declaration required to suppress compiler warnings. */
36 
addCnvSelTest(TestNode ** root)37 void addCnvSelTest(TestNode** root)
38 {
39     addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector");
40     addTest(root, &TestUPropsVector, "tsconv/ucnvseltst/TestUPropsVector");
41 }
42 
43 static const char **gAvailableNames = NULL;
44 static int32_t gCountAvailable = 0;
45 
46 static UBool
getAvailableNames()47 getAvailableNames() {
48   int32_t i;
49   if (gAvailableNames != NULL) {
50     return TRUE;
51   }
52   gCountAvailable = ucnv_countAvailable();
53   if (gCountAvailable == 0) {
54     log_data_err("No converters available.\n");
55     return FALSE;
56   }
57   gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const char *));
58   if (gAvailableNames == NULL) {
59     log_err("unable to allocate memory for %ld available converter names\n",
60             (long)gCountAvailable);
61     return FALSE;
62   }
63   for (i = 0; i < gCountAvailable; ++i) {
64     gAvailableNames[i] = ucnv_getAvailableName(i);
65   }
66   return TRUE;
67 }
68 
69 static void
releaseAvailableNames()70 releaseAvailableNames() {
71   uprv_free((void *)gAvailableNames);
72   gAvailableNames = NULL;
73   gCountAvailable = 0;
74 }
75 
76 static const char **
getEncodings(int32_t start,int32_t step,int32_t count,int32_t * pCount)77 getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) {
78   const char **names;
79   int32_t i;
80 
81   *pCount = 0;
82   if (count <= 0) {
83     return NULL;
84   }
85   names = (const char **)uprv_malloc(count * sizeof(char *));
86   if (names == NULL) {
87     log_err("memory allocation error for %ld pointers\n", (long)count);
88     return NULL;
89   }
90   if (step == 0 && count > 0) {
91     step = 1;
92   }
93   for (i = 0; i < count; ++i) {
94     if (0 <= start && start < gCountAvailable) {
95       names[i] = gAvailableNames[start];
96       start += step;
97       ++*pCount;
98     }
99   }
100   return names;
101 }
102 
103 #if 0
104 /*
105  * ucnvsel_open() does not support "no encodings":
106  * Given 0 encodings it will open a selector for all available ones.
107  */
108 static const char **
109 getNoEncodings(int32_t *pCount) {
110   *pCount = 0;
111   return NULL;
112 }
113 #endif
114 
115 static const char **
getOneEncoding(int32_t * pCount)116 getOneEncoding(int32_t *pCount) {
117   return getEncodings(1, 0, 1, pCount);
118 }
119 
120 static const char **
getFirstEvenEncodings(int32_t * pCount)121 getFirstEvenEncodings(int32_t *pCount) {
122   return getEncodings(0, 2, 25, pCount);
123 }
124 
125 static const char **
getMiddleEncodings(int32_t * pCount)126 getMiddleEncodings(int32_t *pCount) {
127   return getEncodings(gCountAvailable - 12, 1, 22, pCount);
128 }
129 
130 static const char **
getLastEncodings(int32_t * pCount)131 getLastEncodings(int32_t *pCount) {
132   return getEncodings(gCountAvailable - 1, -1, 25, pCount);
133 }
134 
135 static const char **
getSomeEncodings(int32_t * pCount)136 getSomeEncodings(int32_t *pCount) {
137   /* 20 evenly distributed */
138   return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount);
139 }
140 
141 static const char **
getEveryThirdEncoding(int32_t * pCount)142 getEveryThirdEncoding(int32_t *pCount) {
143   return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount);
144 }
145 
146 static const char **
getAllEncodings(int32_t * pCount)147 getAllEncodings(int32_t *pCount) {
148   return getEncodings(0, 1, gCountAvailable, pCount);
149 }
150 
151 typedef const char **GetEncodingsFn(int32_t *);
152 
153 static GetEncodingsFn *const getEncodingsFns[] = {
154   getOneEncoding,
155   getFirstEvenEncodings,
156   getMiddleEncodings,
157   getLastEncodings,
158   getSomeEncodings,
159   getEveryThirdEncoding,
160   getAllEncodings
161 };
162 
fopenOrError(const char * filename)163 static FILE *fopenOrError(const char *filename) {
164     int32_t needLen;
165     FILE *f;
166     char fnbuf[FILENAME_BUFFER];
167     const char* directory= ctest_dataSrcDir();
168     needLen = uprv_strlen(directory)+uprv_strlen(TDSRCPATH)+uprv_strlen(filename)+1;
169     if(needLen > FILENAME_BUFFER) {
170         log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n",
171                 filename, needLen, FILENAME_BUFFER);
172         return NULL;
173     }
174 
175     strcpy(fnbuf, directory);
176     strcat(fnbuf, TDSRCPATH);
177     strcat(fnbuf, filename);
178 
179     f = fopen(fnbuf, "rb");
180 
181     if(f == NULL) {
182         log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename);
183     }
184     return f;
185 }
186 
187 typedef struct TestText {
188   char *text, *textLimit;
189   char *limit;
190   int32_t number;
191 } TestText;
192 
193 static void
text_reset(TestText * tt)194 text_reset(TestText *tt) {
195   tt->limit = tt->text;
196   tt->number = 0;
197 }
198 
199 static char *
text_nextString(TestText * tt,int32_t * pLength)200 text_nextString(TestText *tt, int32_t *pLength) {
201   char *s = tt->limit;
202   if (s == tt->textLimit) {
203     /* we already delivered the last string */
204     return NULL;
205   } else if (s == tt->text) {
206     /* first string */
207     if ((tt->textLimit - tt->text) >= 3 &&
208         s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf
209     ) {
210       s += 3;  /* skip the UTF-8 signature byte sequence (U+FEFF) */
211     }
212   } else {
213     /* skip the string terminator */
214     ++s;
215     ++tt->number;
216   }
217 
218   /* find the end of this string */
219   tt->limit = uprv_strchr(s, 0);
220   *pLength = (int32_t)(tt->limit - s);
221   return s;
222 }
223 
224 static UBool
text_open(TestText * tt)225 text_open(TestText *tt) {
226   FILE *f;
227   char *s;
228   int32_t length;
229   uprv_memset(tt, 0, sizeof(TestText));
230   f = fopenOrError("ConverterSelectorTestUTF8.txt");
231   if(!f) {
232     return FALSE;
233   }
234   fseek(f, 0, SEEK_END);
235   length = (int32_t)ftell(f);
236   fseek(f, 0, SEEK_SET);
237   tt->text = (char *)uprv_malloc(length + 1);
238   if (tt->text == NULL) {
239     fclose(f);
240     return FALSE;
241   }
242   if (length != fread(tt->text, 1, length, f)) {
243     log_err("error reading %ld bytes from test text file\n", (long)length);
244     length = 0;
245     uprv_free(tt->text);
246   }
247   fclose(f);
248   tt->textLimit = tt->text + length;
249   *tt->textLimit = 0;
250   /* replace all Unicode '#' (U+0023) with NUL */
251   for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {}
252   text_reset(tt);
253   return TRUE;
254 }
255 
256 static void
text_close(TestText * tt)257 text_close(TestText *tt) {
258   uprv_free(tt->text);
259 }
260 
findIndex(const char * converterName)261 static int32_t findIndex(const char* converterName) {
262   int32_t i;
263   for (i = 0 ; i < gCountAvailable; i++) {
264     if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) {
265       return i;
266     }
267   }
268   return -1;
269 }
270 
271 static UBool *
getResultsManually(const char ** encodings,int32_t num_encodings,const char * utf8,int32_t length,const USet * excludedCodePoints,const UConverterUnicodeSet whichSet)272 getResultsManually(const char** encodings, int32_t num_encodings,
273                    const char *utf8, int32_t length,
274                    const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) {
275   UBool* resultsManually;
276   int32_t i;
277 
278   resultsManually = (UBool*) uprv_malloc(gCountAvailable);
279   uprv_memset(resultsManually, 0, gCountAvailable);
280 
281   for(i = 0 ; i < num_encodings ; i++) {
282     UErrorCode status = U_ZERO_ERROR;
283     /* get unicode set for that converter */
284     USet* set;
285     UConverter* test_converter;
286     UChar32 cp;
287     int32_t encIndex, offset;
288 
289     set = uset_openEmpty();
290     test_converter = ucnv_open(encodings[i], &status);
291     ucnv_getUnicodeSet(test_converter, set,
292                        whichSet, &status);
293     if (excludedCodePoints != NULL) {
294       uset_addAll(set, excludedCodePoints);
295     }
296     uset_freeze(set);
297     offset = 0;
298     cp = 0;
299 
300     encIndex = findIndex(encodings[i]);
301     /*
302      * The following is almost, but not entirely, the same as
303      * resultsManually[encIndex] =
304      *   (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length);
305      * They might be different if the set contains strings,
306      * or if the utf8 string contains an illegal sequence.
307      *
308      * The UConverterSelector does not currently handle strings that can be
309      * converted, and it treats an illegal sequence as convertible
310      * while uset_spanUTF8() treats it like U+FFFD which may not be convertible.
311      */
312     resultsManually[encIndex] = TRUE;
313     while(offset<length) {
314       U8_NEXT(utf8, offset, length, cp);
315       if (cp >= 0 && !uset_contains(set, cp)) {
316         resultsManually[encIndex] = FALSE;
317         break;
318       }
319     }
320     uset_close(set);
321     ucnv_close(test_converter);
322   }
323   return resultsManually;
324 }
325 
326 /* closes res but does not free resultsManually */
verifyResult(UEnumeration * res,const UBool * resultsManually)327 static void verifyResult(UEnumeration* res, const UBool *resultsManually) {
328   UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool));
329   const char* name;
330   UErrorCode status = U_ZERO_ERROR;
331   int32_t i;
332 
333   /* fill the bool for the selector results! */
334   uprv_memset(resultsFromSystem, 0, gCountAvailable);
335   while ((name = uenum_next(res,NULL, &status)) != NULL) {
336     resultsFromSystem[findIndex(name)] = TRUE;
337   }
338   for(i = 0 ; i < gCountAvailable; i++) {
339     if(resultsManually[i] != resultsFromSystem[i]) {
340       log_err("failure in converter selector\n"
341               "converter %s had conflicting results -- manual: %d, system %d\n",
342               gAvailableNames[i], resultsManually[i], resultsFromSystem[i]);
343     }
344   }
345   uprv_free(resultsFromSystem);
346   uenum_close(res);
347 }
348 
349 static UConverterSelector *
serializeAndUnserialize(UConverterSelector * sel,char ** buffer,UErrorCode * status)350 serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *status) {
351   char *new_buffer;
352   int32_t ser_len, ser_len2;
353   /* preflight */
354   ser_len = ucnvsel_serialize(sel, NULL, 0, status);
355   if (*status != U_BUFFER_OVERFLOW_ERROR) {
356     log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status));
357     return sel;
358   }
359   new_buffer = (char *)uprv_malloc(ser_len);
360   *status = U_ZERO_ERROR;
361   ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status);
362   if (U_FAILURE(*status) || ser_len != ser_len2) {
363     log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status));
364     uprv_free(new_buffer);
365     return sel;
366   }
367   ucnvsel_close(sel);
368   uprv_free(*buffer);
369   *buffer = new_buffer;
370   sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status);
371   if (U_FAILURE(*status)) {
372     log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status));
373     return NULL;
374   }
375   return sel;
376 }
377 
TestSelector()378 static void TestSelector()
379 {
380   TestText text;
381   USet* excluded_sets[3] = { NULL };
382   int32_t i, testCaseIdx;
383 
384   if (!getAvailableNames()) {
385     return;
386   }
387   if (!text_open(&text)) {
388     releaseAvailableNames();;
389   }
390 
391   excluded_sets[0] = uset_openEmpty();
392   for(i = 1 ; i < 3 ; i++) {
393     excluded_sets[i] = uset_open(i*30, i*30+500);
394   }
395 
396   for(testCaseIdx = 0; testCaseIdx < UPRV_LENGTHOF(getEncodingsFns); testCaseIdx++)
397   {
398     int32_t excluded_set_id;
399     int32_t num_encodings;
400     const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings);
401     if (getTestOption(QUICK_OPTION) && num_encodings > 25) {
402       uprv_free((void *)encodings);
403       continue;
404     }
405 
406     /*
407      * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++)
408      *
409      * This loop was replaced by the following statement because
410      * the loop made the test run longer without adding to the code coverage.
411      * The handling of the exclusion set is independent of the
412      * set of encodings, so there is no need to test every combination.
413      */
414     excluded_set_id = testCaseIdx % UPRV_LENGTHOF(excluded_sets);
415     {
416       UConverterSelector *sel_rt, *sel_fb;
417       char *buffer_fb = NULL;
418       UErrorCode status = U_ZERO_ERROR;
419       sel_rt = ucnvsel_open(encodings, num_encodings,
420                             excluded_sets[excluded_set_id],
421                             UCNV_ROUNDTRIP_SET, &status);
422       if (num_encodings == gCountAvailable) {
423         /* test the special "all converters" parameter values */
424         sel_fb = ucnvsel_open(NULL, 0,
425                               excluded_sets[excluded_set_id],
426                               UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
427       } else if (uset_isEmpty(excluded_sets[excluded_set_id])) {
428         /* test that a NULL set gives the same results as an empty set */
429         sel_fb = ucnvsel_open(encodings, num_encodings,
430                               NULL,
431                               UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
432       } else {
433         sel_fb = ucnvsel_open(encodings, num_encodings,
434                               excluded_sets[excluded_set_id],
435                               UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
436       }
437       if (U_FAILURE(status)) {
438         log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_errorName(status));
439         ucnvsel_close(sel_rt);
440         uprv_free((void *)encodings);
441         continue;
442       }
443 
444       text_reset(&text);
445       for (;;) {
446         UBool *manual_rt, *manual_fb;
447         static UChar utf16[10000];
448         char *s;
449         int32_t length8, length16;
450 
451         s = text_nextString(&text, &length8);
452         if (s == NULL || (getTestOption(QUICK_OPTION) && text.number > 3)) {
453           break;
454         }
455 
456         manual_rt = getResultsManually(encodings, num_encodings,
457                                        s, length8,
458                                        excluded_sets[excluded_set_id],
459                                        UCNV_ROUNDTRIP_SET);
460         manual_fb = getResultsManually(encodings, num_encodings,
461                                        s, length8,
462                                        excluded_sets[excluded_set_id],
463                                        UCNV_ROUNDTRIP_AND_FALLBACK_SET);
464         /* UTF-8 with length */
465         status = U_ZERO_ERROR;
466         verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_rt);
467         verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_fb);
468         /* UTF-8 NUL-terminated */
469         verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt);
470         verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb);
471 
472         u_strFromUTF8(utf16, UPRV_LENGTHOF(utf16), &length16, s, length8, &status);
473         if (U_FAILURE(status)) {
474           log_err("error converting the test text (string %ld) to UTF-16 - %s\n",
475                   (long)text.number, u_errorName(status));
476         } else {
477           if (text.number == 0) {
478             sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status);
479           }
480           if (U_SUCCESS(status)) {
481             /* UTF-16 with length */
482             verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &status), manual_rt);
483             verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &status), manual_fb);
484             /* UTF-16 NUL-terminated */
485             verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), manual_rt);
486             verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), manual_fb);
487           }
488         }
489 
490         uprv_free(manual_rt);
491         uprv_free(manual_fb);
492       }
493       ucnvsel_close(sel_rt);
494       ucnvsel_close(sel_fb);
495       uprv_free(buffer_fb);
496     }
497     uprv_free((void *)encodings);
498   }
499 
500   releaseAvailableNames();
501   text_close(&text);
502   for(i = 0 ; i < 3 ; i++) {
503     uset_close(excluded_sets[i]);
504   }
505 }
506 
507 /* Improve code coverage of UPropsVectors */
TestUPropsVector()508 static void TestUPropsVector() {
509     UErrorCode errorCode = U_ILLEGAL_ARGUMENT_ERROR;
510     UPropsVectors *pv = upvec_open(100, &errorCode);
511     if (pv != NULL) {
512         log_err("Should have returned NULL if UErrorCode is an error.");
513         return;
514     }
515     errorCode = U_ZERO_ERROR;
516     pv = upvec_open(-1, &errorCode);
517     if (pv != NULL || U_SUCCESS(errorCode)) {
518         log_err("Should have returned NULL if column is less than 0.\n");
519         return;
520     }
521     errorCode = U_ZERO_ERROR;
522     pv = upvec_open(100, &errorCode);
523     if (pv == NULL || U_FAILURE(errorCode)) {
524         log_err("Unable to open UPropsVectors.\n");
525         return;
526     }
527 
528     if (upvec_getValue(pv, 0, 1) != 0) {
529         log_err("upvec_getValue should return 0.\n");
530     }
531     if (upvec_getRow(pv, 0, NULL, NULL) == NULL) {
532         log_err("upvec_getRow should not return NULL.\n");
533     }
534     if (upvec_getArray(pv, NULL, NULL) != NULL) {
535         log_err("upvec_getArray should return NULL.\n");
536     }
537 
538     upvec_close(pv);
539 }
540