• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * Copyright (c) 1997-2014, International Business Machines
5  * Corporation and others. All Rights Reserved.
6  ********************************************************************
7  *
8  * File UCNVSELTST.C
9  *
10  * Modification History:
11  *        Name                     Description
12  *     MOHAMED ELDAWY               Creation
13  ********************************************************************
14  */
15 
16 /* C API AND FUNCTIONALITY TEST FOR CONVERTER SELECTOR (ucnvsel.h)*/
17 
18 #include "ucnvseltst.h"
19 
20 #include <stdio.h>
21 
22 #include "unicode/utypes.h"
23 #include "unicode/ucnvsel.h"
24 #include "unicode/ustring.h"
25 #include "unicode/utf8.h"
26 #include "cmemory.h"
27 #include "cstring.h"
28 #include "propsvec.h"
29 
30 #define FILENAME_BUFFER 1024
31 
32 #define TDSRCPATH  ".." U_FILE_SEP_STRING "test" U_FILE_SEP_STRING "testdata" U_FILE_SEP_STRING
33 
34 static void TestSelector(void);
35 static void TestUPropsVector(void);
36 void addCnvSelTest(TestNode** root);  /* Declaration required to suppress compiler warnings. */
37 
addCnvSelTest(TestNode ** root)38 void addCnvSelTest(TestNode** root)
39 {
40     addTest(root, &TestSelector, "tsconv/ucnvseltst/TestSelector");
41     addTest(root, &TestUPropsVector, "tsconv/ucnvseltst/TestUPropsVector");
42 }
43 
44 static const char **gAvailableNames = NULL;
45 static int32_t gCountAvailable = 0;
46 
47 static UBool
getAvailableNames()48 getAvailableNames() {
49   int32_t i;
50   if (gAvailableNames != NULL) {
51     return TRUE;
52   }
53   gCountAvailable = ucnv_countAvailable();
54   if (gCountAvailable == 0) {
55     log_data_err("No converters available.\n");
56     return FALSE;
57   }
58   gAvailableNames = (const char **)uprv_malloc(gCountAvailable * sizeof(const char *));
59   if (gAvailableNames == NULL) {
60     log_err("unable to allocate memory for %ld available converter names\n",
61             (long)gCountAvailable);
62     return FALSE;
63   }
64   for (i = 0; i < gCountAvailable; ++i) {
65     gAvailableNames[i] = ucnv_getAvailableName(i);
66   }
67   return TRUE;
68 }
69 
70 static void
releaseAvailableNames()71 releaseAvailableNames() {
72   uprv_free((void *)gAvailableNames);
73   gAvailableNames = NULL;
74   gCountAvailable = 0;
75 }
76 
77 static const char **
getEncodings(int32_t start,int32_t step,int32_t count,int32_t * pCount)78 getEncodings(int32_t start, int32_t step, int32_t count, int32_t *pCount) {
79   const char **names;
80   int32_t i;
81 
82   *pCount = 0;
83   if (count <= 0) {
84     return NULL;
85   }
86   names = (const char **)uprv_malloc(count * sizeof(char *));
87   if (names == NULL) {
88     log_err("memory allocation error for %ld pointers\n", (long)count);
89     return NULL;
90   }
91   if (step == 0 && count > 0) {
92     step = 1;
93   }
94   for (i = 0; i < count; ++i) {
95     if (0 <= start && start < gCountAvailable) {
96       names[i] = gAvailableNames[start];
97       start += step;
98       ++*pCount;
99     }
100   }
101   return names;
102 }
103 
104 #if 0
105 /*
106  * ucnvsel_open() does not support "no encodings":
107  * Given 0 encodings it will open a selector for all available ones.
108  */
109 static const char **
110 getNoEncodings(int32_t *pCount) {
111   *pCount = 0;
112   return NULL;
113 }
114 #endif
115 
116 static const char **
getOneEncoding(int32_t * pCount)117 getOneEncoding(int32_t *pCount) {
118   return getEncodings(1, 0, 1, pCount);
119 }
120 
121 static const char **
getFirstEvenEncodings(int32_t * pCount)122 getFirstEvenEncodings(int32_t *pCount) {
123   return getEncodings(0, 2, 25, pCount);
124 }
125 
126 static const char **
getMiddleEncodings(int32_t * pCount)127 getMiddleEncodings(int32_t *pCount) {
128   return getEncodings(gCountAvailable - 12, 1, 22, pCount);
129 }
130 
131 static const char **
getLastEncodings(int32_t * pCount)132 getLastEncodings(int32_t *pCount) {
133   return getEncodings(gCountAvailable - 1, -1, 25, pCount);
134 }
135 
136 static const char **
getSomeEncodings(int32_t * pCount)137 getSomeEncodings(int32_t *pCount) {
138   /* 20 evenly distributed */
139   return getEncodings(5, (gCountAvailable + 19)/ 20, 20, pCount);
140 }
141 
142 static const char **
getEveryThirdEncoding(int32_t * pCount)143 getEveryThirdEncoding(int32_t *pCount) {
144   return getEncodings(2, 3, (gCountAvailable + 2 )/ 3, pCount);
145 }
146 
147 static const char **
getAllEncodings(int32_t * pCount)148 getAllEncodings(int32_t *pCount) {
149   return getEncodings(0, 1, gCountAvailable, pCount);
150 }
151 
152 typedef const char **GetEncodingsFn(int32_t *);
153 
154 static GetEncodingsFn *const getEncodingsFns[] = {
155   getOneEncoding,
156   getFirstEvenEncodings,
157   getMiddleEncodings,
158   getLastEncodings,
159   getSomeEncodings,
160   getEveryThirdEncoding,
161   getAllEncodings
162 };
163 
fopenOrError(const char * filename)164 static FILE *fopenOrError(const char *filename) {
165     int32_t needLen;
166     FILE *f;
167     char fnbuf[FILENAME_BUFFER];
168     const char* directory = ctest_dataSrcDir();
169     needLen = (int32_t)(uprv_strlen(directory) + uprv_strlen(TDSRCPATH) + uprv_strlen(filename) + 1);
170     if(needLen > FILENAME_BUFFER) {
171         log_err("FAIL: Could not load %s. Filename buffer overflow, needed %d but buffer is %d\n",
172                 filename, needLen, FILENAME_BUFFER);
173         return NULL;
174     }
175 
176     strcpy(fnbuf, directory);
177     strcat(fnbuf, TDSRCPATH);
178     strcat(fnbuf, filename);
179 
180     f = fopen(fnbuf, "rb");
181 
182     if(f == NULL) {
183         log_data_err("FAIL: Could not load %s [%s]\n", fnbuf, filename);
184     }
185     return f;
186 }
187 
188 typedef struct TestText {
189   char *text, *textLimit;
190   char *limit;
191   int32_t number;
192 } TestText;
193 
194 static void
text_reset(TestText * tt)195 text_reset(TestText *tt) {
196   tt->limit = tt->text;
197   tt->number = 0;
198 }
199 
200 static char *
text_nextString(TestText * tt,int32_t * pLength)201 text_nextString(TestText *tt, int32_t *pLength) {
202   char *s = tt->limit;
203   if (s == tt->textLimit) {
204     /* we already delivered the last string */
205     return NULL;
206   } else if (s == tt->text) {
207     /* first string */
208     if ((tt->textLimit - tt->text) >= 3 &&
209         s[0] == (char)0xef && s[1] == (char)0xbb && s[2] == (char)0xbf
210     ) {
211       s += 3;  /* skip the UTF-8 signature byte sequence (U+FEFF) */
212     }
213   } else {
214     /* skip the string terminator */
215     ++s;
216     ++tt->number;
217   }
218 
219   /* find the end of this string */
220   tt->limit = uprv_strchr(s, 0);
221   *pLength = (int32_t)(tt->limit - s);
222   return s;
223 }
224 
225 static UBool
text_open(TestText * tt)226 text_open(TestText *tt) {
227   FILE *f;
228   char *s;
229   int32_t length;
230   uprv_memset(tt, 0, sizeof(TestText));
231   f = fopenOrError("ConverterSelectorTestUTF8.txt");
232   if(!f) {
233     return FALSE;
234   }
235   fseek(f, 0, SEEK_END);
236   length = (int32_t)ftell(f);
237   fseek(f, 0, SEEK_SET);
238   tt->text = (char *)uprv_malloc(length + 1);
239   if (tt->text == NULL) {
240     fclose(f);
241     return FALSE;
242   }
243   if (length != (int32_t)fread(tt->text, 1, length, f)) {
244     log_err("error reading %ld bytes from test text file\n", (long)length);
245     length = 0;
246     uprv_free(tt->text);
247   }
248   fclose(f);
249   tt->textLimit = tt->text + length;
250   *tt->textLimit = 0;
251   /* replace all Unicode '#' (U+0023) with NUL */
252   for(s = tt->text; (s = uprv_strchr(s, 0x23)) != NULL; *s++ = 0) {}
253   text_reset(tt);
254   return TRUE;
255 }
256 
257 static void
text_close(TestText * tt)258 text_close(TestText *tt) {
259   uprv_free(tt->text);
260 }
261 
findIndex(const char * converterName)262 static int32_t findIndex(const char* converterName) {
263   int32_t i;
264   for (i = 0 ; i < gCountAvailable; i++) {
265     if(ucnv_compareNames(gAvailableNames[i], converterName) == 0) {
266       return i;
267     }
268   }
269   return -1;
270 }
271 
272 static UBool *
getResultsManually(const char ** encodings,int32_t num_encodings,const char * utf8,int32_t length,const USet * excludedCodePoints,const UConverterUnicodeSet whichSet)273 getResultsManually(const char** encodings, int32_t num_encodings,
274                    const char *utf8, int32_t length,
275                    const USet* excludedCodePoints, const UConverterUnicodeSet whichSet) {
276   UBool* resultsManually;
277   int32_t i;
278 
279   resultsManually = (UBool*) uprv_malloc(gCountAvailable);
280   uprv_memset(resultsManually, 0, gCountAvailable);
281 
282   for(i = 0 ; i < num_encodings ; i++) {
283     UErrorCode status = U_ZERO_ERROR;
284     /* get unicode set for that converter */
285     USet* set;
286     UConverter* test_converter;
287     UChar32 cp;
288     int32_t encIndex, offset;
289 
290     set = uset_openEmpty();
291     test_converter = ucnv_open(encodings[i], &status);
292     ucnv_getUnicodeSet(test_converter, set,
293                        whichSet, &status);
294     if (excludedCodePoints != NULL) {
295       uset_addAll(set, excludedCodePoints);
296     }
297     uset_freeze(set);
298     offset = 0;
299     cp = 0;
300 
301     encIndex = findIndex(encodings[i]);
302     /*
303      * The following is almost, but not entirely, the same as
304      * resultsManually[encIndex] =
305      *   (UBool)(uset_spanUTF8(set, utf8, length, USET_SPAN_SIMPLE) == length);
306      * They might be different if the set contains strings,
307      * or if the utf8 string contains an illegal sequence.
308      *
309      * The UConverterSelector does not currently handle strings that can be
310      * converted, and it treats an illegal sequence as convertible
311      * while uset_spanUTF8() treats it like U+FFFD which may not be convertible.
312      */
313     resultsManually[encIndex] = TRUE;
314     while(offset<length) {
315       U8_NEXT(utf8, offset, length, cp);
316       if (cp >= 0 && !uset_contains(set, cp)) {
317         resultsManually[encIndex] = FALSE;
318         break;
319       }
320     }
321     uset_close(set);
322     ucnv_close(test_converter);
323   }
324   return resultsManually;
325 }
326 
327 /* closes res but does not free resultsManually */
verifyResult(UEnumeration * res,const UBool * resultsManually)328 static void verifyResult(UEnumeration* res, const UBool *resultsManually) {
329   UBool* resultsFromSystem = (UBool*) uprv_malloc(gCountAvailable * sizeof(UBool));
330   const char* name;
331   UErrorCode status = U_ZERO_ERROR;
332   int32_t i;
333 
334   /* fill the bool for the selector results! */
335   uprv_memset(resultsFromSystem, 0, gCountAvailable);
336   while ((name = uenum_next(res,NULL, &status)) != NULL) {
337     resultsFromSystem[findIndex(name)] = TRUE;
338   }
339   for(i = 0 ; i < gCountAvailable; i++) {
340     if(resultsManually[i] != resultsFromSystem[i]) {
341       log_err("failure in converter selector\n"
342               "converter %s had conflicting results -- manual: %d, system %d\n",
343               gAvailableNames[i], resultsManually[i], resultsFromSystem[i]);
344     }
345   }
346   uprv_free(resultsFromSystem);
347   uenum_close(res);
348 }
349 
350 static UConverterSelector *
serializeAndUnserialize(UConverterSelector * sel,char ** buffer,UErrorCode * status)351 serializeAndUnserialize(UConverterSelector *sel, char **buffer, UErrorCode *status) {
352   char *new_buffer;
353   int32_t ser_len, ser_len2;
354   /* preflight */
355   ser_len = ucnvsel_serialize(sel, NULL, 0, status);
356   if (*status != U_BUFFER_OVERFLOW_ERROR) {
357     log_err("ucnvsel_serialize(preflighting) failed: %s\n", u_errorName(*status));
358     return sel;
359   }
360   new_buffer = (char *)uprv_malloc(ser_len);
361   *status = U_ZERO_ERROR;
362   ser_len2 = ucnvsel_serialize(sel, new_buffer, ser_len, status);
363   if (U_FAILURE(*status) || ser_len != ser_len2) {
364     log_err("ucnvsel_serialize() failed: %s\n", u_errorName(*status));
365     uprv_free(new_buffer);
366     return sel;
367   }
368   ucnvsel_close(sel);
369   uprv_free(*buffer);
370   *buffer = new_buffer;
371   sel = ucnvsel_openFromSerialized(new_buffer, ser_len, status);
372   if (U_FAILURE(*status)) {
373     log_err("ucnvsel_openFromSerialized() failed: %s\n", u_errorName(*status));
374     return NULL;
375   }
376   return sel;
377 }
378 
TestSelector()379 static void TestSelector()
380 {
381   TestText text;
382   USet* excluded_sets[3] = { NULL };
383   int32_t i, testCaseIdx;
384 
385   if (!getAvailableNames()) {
386     return;
387   }
388   if (!text_open(&text)) {
389     releaseAvailableNames();
390   }
391 
392   excluded_sets[0] = uset_openEmpty();
393   for(i = 1 ; i < 3 ; i++) {
394     excluded_sets[i] = uset_open(i*30, i*30+500);
395   }
396 
397   for(testCaseIdx = 0; testCaseIdx < UPRV_LENGTHOF(getEncodingsFns); testCaseIdx++)
398   {
399     int32_t excluded_set_id;
400     int32_t num_encodings;
401     const char **encodings = getEncodingsFns[testCaseIdx](&num_encodings);
402     if (getTestOption(QUICK_OPTION) && num_encodings > 25) {
403       uprv_free((void *)encodings);
404       continue;
405     }
406 
407     /*
408      * for(excluded_set_id = 0 ; excluded_set_id < 3 ; excluded_set_id++)
409      *
410      * This loop was replaced by the following statement because
411      * the loop made the test run longer without adding to the code coverage.
412      * The handling of the exclusion set is independent of the
413      * set of encodings, so there is no need to test every combination.
414      */
415     excluded_set_id = testCaseIdx % UPRV_LENGTHOF(excluded_sets);
416     {
417       UConverterSelector *sel_rt, *sel_fb;
418       char *buffer_fb = NULL;
419       UErrorCode status = U_ZERO_ERROR;
420       sel_rt = ucnvsel_open(encodings, num_encodings,
421                             excluded_sets[excluded_set_id],
422                             UCNV_ROUNDTRIP_SET, &status);
423       if (num_encodings == gCountAvailable) {
424         /* test the special "all converters" parameter values */
425         sel_fb = ucnvsel_open(NULL, 0,
426                               excluded_sets[excluded_set_id],
427                               UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
428       } else if (uset_isEmpty(excluded_sets[excluded_set_id])) {
429         /* test that a NULL set gives the same results as an empty set */
430         sel_fb = ucnvsel_open(encodings, num_encodings,
431                               NULL,
432                               UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
433       } else {
434         sel_fb = ucnvsel_open(encodings, num_encodings,
435                               excluded_sets[excluded_set_id],
436                               UCNV_ROUNDTRIP_AND_FALLBACK_SET, &status);
437       }
438       if (U_FAILURE(status)) {
439         log_err("ucnv_sel_open(encodings %ld) failed - %s\n", testCaseIdx, u_errorName(status));
440         ucnvsel_close(sel_rt);
441         uprv_free((void *)encodings);
442         continue;
443       }
444 
445       text_reset(&text);
446       for (;;) {
447         UBool *manual_rt, *manual_fb;
448         static UChar utf16[10000];
449         char *s;
450         int32_t length8, length16;
451 
452         s = text_nextString(&text, &length8);
453         if (s == NULL || (getTestOption(QUICK_OPTION) && text.number > 3)) {
454           break;
455         }
456 
457         manual_rt = getResultsManually(encodings, num_encodings,
458                                        s, length8,
459                                        excluded_sets[excluded_set_id],
460                                        UCNV_ROUNDTRIP_SET);
461         manual_fb = getResultsManually(encodings, num_encodings,
462                                        s, length8,
463                                        excluded_sets[excluded_set_id],
464                                        UCNV_ROUNDTRIP_AND_FALLBACK_SET);
465         /* UTF-8 with length */
466         status = U_ZERO_ERROR;
467         verifyResult(ucnvsel_selectForUTF8(sel_rt, s, length8, &status), manual_rt);
468         verifyResult(ucnvsel_selectForUTF8(sel_fb, s, length8, &status), manual_fb);
469         /* UTF-8 NUL-terminated */
470         verifyResult(ucnvsel_selectForUTF8(sel_rt, s, -1, &status), manual_rt);
471         verifyResult(ucnvsel_selectForUTF8(sel_fb, s, -1, &status), manual_fb);
472 
473         u_strFromUTF8(utf16, UPRV_LENGTHOF(utf16), &length16, s, length8, &status);
474         if (U_FAILURE(status)) {
475           log_err("error converting the test text (string %ld) to UTF-16 - %s\n",
476                   (long)text.number, u_errorName(status));
477         } else {
478           if (text.number == 0) {
479             sel_fb = serializeAndUnserialize(sel_fb, &buffer_fb, &status);
480           }
481           if (U_SUCCESS(status)) {
482             /* UTF-16 with length */
483             verifyResult(ucnvsel_selectForString(sel_rt, utf16, length16, &status), manual_rt);
484             verifyResult(ucnvsel_selectForString(sel_fb, utf16, length16, &status), manual_fb);
485             /* UTF-16 NUL-terminated */
486             verifyResult(ucnvsel_selectForString(sel_rt, utf16, -1, &status), manual_rt);
487             verifyResult(ucnvsel_selectForString(sel_fb, utf16, -1, &status), manual_fb);
488           }
489         }
490 
491         uprv_free(manual_rt);
492         uprv_free(manual_fb);
493       }
494       ucnvsel_close(sel_rt);
495       ucnvsel_close(sel_fb);
496       uprv_free(buffer_fb);
497     }
498     uprv_free((void *)encodings);
499   }
500 
501   releaseAvailableNames();
502   text_close(&text);
503   for(i = 0 ; i < 3 ; i++) {
504     uset_close(excluded_sets[i]);
505   }
506 }
507 
508 /* Improve code coverage of UPropsVectors */
TestUPropsVector()509 static void TestUPropsVector() {
510     UErrorCode errorCode = U_ILLEGAL_ARGUMENT_ERROR;
511     UPropsVectors *pv = upvec_open(100, &errorCode);
512     if (pv != NULL) {
513         log_err("Should have returned NULL if UErrorCode is an error.");
514         return;
515     }
516     errorCode = U_ZERO_ERROR;
517     pv = upvec_open(-1, &errorCode);
518     if (pv != NULL || U_SUCCESS(errorCode)) {
519         log_err("Should have returned NULL if column is less than 0.\n");
520         return;
521     }
522     errorCode = U_ZERO_ERROR;
523     pv = upvec_open(100, &errorCode);
524     if (pv == NULL || U_FAILURE(errorCode)) {
525         log_err("Unable to open UPropsVectors.\n");
526         return;
527     }
528 
529     if (upvec_getValue(pv, 0, 1) != 0) {
530         log_err("upvec_getValue should return 0.\n");
531     }
532     if (upvec_getRow(pv, 0, NULL, NULL) == NULL) {
533         log_err("upvec_getRow should not return NULL.\n");
534     }
535     if (upvec_getArray(pv, NULL, NULL) != NULL) {
536         log_err("upvec_getArray should return NULL.\n");
537     }
538 
539     upvec_close(pv);
540 }
541