• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4  * COPYRIGHT:
5  * Copyright (c) 1997-2016, International Business Machines Corporation and
6  * others. All Rights Reserved.
7  ********************************************************************/
8 /*****************************************************************************
9 *
10 * File ccapitst.c
11 *
12 * Modification History:
13 *        Name                      Description
14 *     Madhu Katragadda              Ported for C API
15 ******************************************************************************
16 */
17 
18 #include <stdalign.h>
19 #include <stdbool.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <ctype.h>
24 #include "unicode/uloc.h"
25 #include "unicode/ucnv.h"
26 #include "unicode/ucnv_err.h"
27 #include "unicode/putil.h"
28 #include "unicode/uset.h"
29 #include "unicode/ustring.h"
30 #include "unicode/utf8.h"
31 #include "ucnv_bld.h" /* for sizeof(UConverter) */
32 #include "cmemory.h"  /* for UAlignedMemory */
33 #include "cintltst.h"
34 #include "ccapitst.h"
35 #include "cstring.h"
36 
37 #define NUM_CODEPAGE 1
38 #define MAX_FILE_LEN 1024*20
39 #define UCS_FILE_NAME_SIZE 512
40 
41 /*returns an action other than the one provided*/
42 #if !UCONFIG_NO_LEGACY_CONVERSION
43 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA);
44 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA);
45 #endif
46 
47 static UConverter *
cnv_open(const char * name,UErrorCode * pErrorCode)48 cnv_open(const char *name, UErrorCode *pErrorCode) {
49     if(name!=NULL && name[0]=='*') {
50         return ucnv_openPackage(loadTestData(pErrorCode), name+1, pErrorCode);
51     } else {
52         return ucnv_open(name, pErrorCode);
53     }
54 }
55 
56 
57 static void ListNames(void);
58 static void TestFlushCache(void);
59 static void TestDuplicateAlias(void);
60 static void TestCCSID(void);
61 static void TestJ932(void);
62 static void TestJ1968(void);
63 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
64 static void TestLMBCSMaxChar(void);
65 #endif
66 
67 static void TestConvertClone(void);
68 #if !UCONFIG_NO_LEGACY_CONVERSION
69 static void TestConvertSafeCloneCallback(void);
70 #endif
71 
72 static void TestEBCDICSwapLFNL(void);
73 static void TestConvertEx(void);
74 static void TestConvertExFromUTF8(void);
75 static void TestConvertExFromUTF8_C5F0(void);
76 static void TestConvertAlgorithmic(void);
77        void TestDefaultConverterError(void);    /* defined in cctest.c */
78        void TestDefaultConverterSet(void);    /* defined in cctest.c */
79 static void TestToUCountPending(void);
80 static void TestFromUCountPending(void);
81 static void TestDefaultName(void);
82 static void TestCompareNames(void);
83 static void TestSubstString(void);
84 static void InvalidArguments(void);
85 static void TestGetName(void);
86 static void TestUTFBOM(void);
87 
88 void addTestConvert(TestNode** root);
89 
addTestConvert(TestNode ** root)90 void addTestConvert(TestNode** root)
91 {
92     addTest(root, &ListNames,                   "tsconv/ccapitst/ListNames");
93     addTest(root, &TestConvert,                 "tsconv/ccapitst/TestConvert");
94     addTest(root, &TestFlushCache,              "tsconv/ccapitst/TestFlushCache");
95     addTest(root, &TestAlias,                   "tsconv/ccapitst/TestAlias");
96     addTest(root, &TestDuplicateAlias,          "tsconv/ccapitst/TestDuplicateAlias");
97     addTest(root, &TestConvertSafeClone,        "tsconv/ccapitst/TestConvertSafeClone");
98     addTest(root, &TestConvertClone,            "tsconv/ccapitst/TestConvertClone");
99 #if !UCONFIG_NO_LEGACY_CONVERSION
100     addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback");
101 #endif
102     addTest(root, &TestCCSID,                   "tsconv/ccapitst/TestCCSID");
103     addTest(root, &TestJ932,                    "tsconv/ccapitst/TestJ932");
104     addTest(root, &TestJ1968,                   "tsconv/ccapitst/TestJ1968");
105 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
106     addTest(root, &TestLMBCSMaxChar,            "tsconv/ccapitst/TestLMBCSMaxChar");
107 #endif
108     addTest(root, &TestEBCDICSwapLFNL,          "tsconv/ccapitst/TestEBCDICSwapLFNL");
109     addTest(root, &TestConvertEx,               "tsconv/ccapitst/TestConvertEx");
110     addTest(root, &TestConvertExFromUTF8,       "tsconv/ccapitst/TestConvertExFromUTF8");
111     addTest(root, &TestConvertExFromUTF8_C5F0,  "tsconv/ccapitst/TestConvertExFromUTF8_C5F0");
112     addTest(root, &TestConvertAlgorithmic,      "tsconv/ccapitst/TestConvertAlgorithmic");
113     addTest(root, &TestDefaultConverterError,   "tsconv/ccapitst/TestDefaultConverterError");
114     addTest(root, &TestDefaultConverterSet,     "tsconv/ccapitst/TestDefaultConverterSet");
115 #if !UCONFIG_NO_FILE_IO
116     addTest(root, &TestToUCountPending,         "tsconv/ccapitst/TestToUCountPending");
117     addTest(root, &TestFromUCountPending,       "tsconv/ccapitst/TestFromUCountPending");
118 #endif
119     addTest(root, &TestDefaultName,             "tsconv/ccapitst/TestDefaultName");
120     addTest(root, &TestCompareNames,            "tsconv/ccapitst/TestCompareNames");
121     addTest(root, &TestSubstString,             "tsconv/ccapitst/TestSubstString");
122     addTest(root, &InvalidArguments,            "tsconv/ccapitst/InvalidArguments");
123     addTest(root, &TestGetName,                 "tsconv/ccapitst/TestGetName");
124     addTest(root, &TestUTFBOM,                  "tsconv/ccapitst/TestUTFBOM");
125 }
126 
ListNames(void)127 static void ListNames(void) {
128     UErrorCode          err                 =   U_ZERO_ERROR;
129     int32_t             testLong1           =   0;
130     const char*            available_conv;
131     UEnumeration *allNamesEnum = NULL;
132     int32_t allNamesCount = 0;
133     uint16_t            count;
134 
135     log_verbose("Testing ucnv_openAllNames()...");
136     allNamesEnum = ucnv_openAllNames(&err);
137     if(U_FAILURE(err)) {
138         log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err));
139     }
140     else {
141         const char *string = NULL;
142         int32_t len = 0;
143         int32_t count1 = 0;
144         int32_t count2 = 0;
145         allNamesCount = uenum_count(allNamesEnum, &err);
146         while ((string = uenum_next(allNamesEnum, &len, &err))) {
147             count1++;
148             log_verbose("read \"%s\", length %i\n", string, len);
149         }
150         if (U_FAILURE(err)) {
151             log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err));
152             err = U_ZERO_ERROR;
153         }
154         uenum_reset(allNamesEnum, &err);
155         while ((string = uenum_next(allNamesEnum, &len, &err))) {
156             count2++;
157             ucnv_close(ucnv_open(string, &err));
158             log_verbose("read \"%s\", length %i (%s)\n", string, len, U_SUCCESS(err) ? "available" : "unavailable");
159             err = U_ZERO_ERROR;
160         }
161         if (count1 != count2) {
162             log_err("FAILURE! uenum_reset(allNamesEnum, &err); doesn't work\n");
163         }
164     }
165     uenum_close(allNamesEnum);
166     err = U_ZERO_ERROR;
167 
168     /*Tests ucnv_getAvailableName(), getAvailableCount()*/
169 
170     log_verbose("Testing ucnv_countAvailable()...");
171 
172     testLong1=ucnv_countAvailable();
173     log_info("Number of available codepages: %d/%d\n", testLong1, allNamesCount);
174 
175     log_verbose("\n---Testing ucnv_getAvailableName..");  /*need to check this out */
176 
177     available_conv = ucnv_getAvailableName(testLong1);
178        /*test ucnv_getAvailableName with err condition*/
179     log_verbose("\n---Testing ucnv_getAvailableName..with index < 0 ");
180     available_conv = ucnv_getAvailableName(-1);
181     if(available_conv != NULL){
182         log_err("ucnv_getAvailableName() with index < 0) should return NULL\n");
183     }
184 
185     /* Test ucnv_countAliases() etc. */
186     count = ucnv_countAliases("utf-8", &err);
187     if(U_FAILURE(err)) {
188         log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err));
189     } else if(count <= 0) {
190         log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count);
191     } else {
192         /* try to get the aliases individually */
193         const char *alias;
194         alias = ucnv_getAlias("utf-8", 0, &err);
195         if(U_FAILURE(err)) {
196             log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s\n", myErrorName(err));
197         } else if(strcmp("UTF-8", alias) != 0) {
198             log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s instead of UTF-8\n", alias);
199         } else {
200             uint16_t aliasNum;
201             for(aliasNum = 0; aliasNum < count; ++aliasNum) {
202                 alias = ucnv_getAlias("utf-8", aliasNum, &err);
203                 if(U_FAILURE(err)) {
204                     log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err));
205                 } else if(strlen(alias) > 20) {
206                     /* sanity check */
207                     log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> alias %s insanely long, corrupt?!\n", aliasNum, alias);
208                 } else {
209                     log_verbose("alias %d for utf-8: %s\n", aliasNum, alias);
210                 }
211             }
212             if(U_SUCCESS(err)) {
213                 /* try to fill an array with all aliases */
214                 const char **aliases;
215                 aliases=(const char **)malloc(count * sizeof(const char *));
216                 if(aliases != 0) {
217                     ucnv_getAliases("utf-8", aliases, &err);
218                     if(U_FAILURE(err)) {
219                         log_err("FAILURE! ucnv_getAliases(\"utf-8\") -> %s\n", myErrorName(err));
220                     } else {
221                         for(aliasNum = 0; aliasNum < count; ++aliasNum) {
222                             /* compare the pointers with the ones returned individually */
223                             alias = ucnv_getAlias("utf-8", aliasNum, &err);
224                             if(U_FAILURE(err)) {
225                                 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err));
226                             } else if(aliases[aliasNum] != alias) {
227                                 log_err("FAILURE! ucnv_getAliases(\"utf-8\")[%d] != ucnv_getAlias(\"utf-8\", %d)\n", aliasNum, aliasNum);
228                             }
229                         }
230                     }
231                     free((char **)aliases);
232                 }
233             }
234         }
235     }
236 }
237 
238 
TestConvert(void)239 static void TestConvert(void)
240 {
241 #if !UCONFIG_NO_LEGACY_CONVERSION
242     char                myptr[4];
243     char                save[4];
244     int32_t             testLong1           =   0;
245     uint16_t            rest                =   0;
246     int32_t             len                 =   0;
247     int32_t             x                   =   0;
248     FILE*               ucs_file_in         =   NULL;
249     UChar                BOM                 =   0x0000;
250     UChar                myUChar           =   0x0000;
251     char*               mytarget; /*    [MAX_FILE_LEN] */
252     char*               mytarget_1;
253     char*               mytarget_use;
254     UChar*                consumedUni         =   NULL;
255     char*               consumed            =   NULL;
256     char*                 output_cp_buffer; /*    [MAX_FILE_LEN] */
257     UChar*                ucs_file_buffer; /*    [MAX_FILE_LEN] */
258     UChar*                ucs_file_buffer_use;
259     UChar*                my_ucs_file_buffer; /*    [MAX_FILE_LEN] */
260     UChar*                my_ucs_file_buffer_1;
261     int8_t                ii                  =   0;
262     uint16_t            codepage_index      =   0;
263     int32_t             cp                  =   0;
264     UErrorCode          err                 =   U_ZERO_ERROR;
265     char                ucs_file_name[UCS_FILE_NAME_SIZE];
266     UConverterFromUCallback          MIA1, MIA1_2;
267     UConverterToUCallback              MIA2, MIA2_2;
268     const void         *MIA1Context, *MIA1Context2, *MIA2Context, *MIA2Context2;
269     UConverter*            someConverters[5];
270     UConverter*         myConverter = 0;
271     UChar*                displayname = 0;
272 
273     const char* locale;
274 
275     UChar* uchar1 = 0;
276     UChar* uchar2 = 0;
277     UChar* uchar3 = 0;
278     int32_t targetcapacity2;
279     int32_t targetcapacity;
280     int32_t targetsize;
281     int32_t disnamelen;
282 
283     const UChar* tmp_ucs_buf;
284     const UChar* tmp_consumedUni=NULL;
285     const char* tmp_mytarget_use;
286     const char* tmp_consumed;
287 
288     /******************************************************************
289                                 Checking Unicode -> ksc
290      ******************************************************************/
291 
292     const char*      CodePagesToTest[NUM_CODEPAGE]       =
293     {
294        "ibm-949_P110-1999"
295 
296 
297     };
298     const uint16_t CodePageNumberToTest[NUM_CODEPAGE]             =
299     {
300         949
301     };
302 
303 
304     const int8_t     CodePagesMinChars[NUM_CODEPAGE] =
305     {
306         1
307 
308     };
309 
310     const int8_t     CodePagesMaxChars[NUM_CODEPAGE] =
311     {
312         2
313 
314     };
315 
316     const uint16_t        CodePagesSubstitutionChars[NUM_CODEPAGE]    =
317     {
318         0xAFFE
319     };
320 
321     const char* CodePagesTestFiles[NUM_CODEPAGE]    =
322     {
323       "uni-text.bin"
324     };
325 
326 
327     const UConverterPlatform        CodePagesPlatform[NUM_CODEPAGE]    =
328     {
329         UCNV_IBM
330 
331     };
332 
333     const char* CodePagesLocale[NUM_CODEPAGE] =
334     {
335         "ko_KR"
336     };
337 
338     UConverterFromUCallback oldFromUAction = NULL;
339     UConverterToUCallback oldToUAction = NULL;
340     const void* oldFromUContext = NULL;
341     const void* oldToUContext = NULL;
342 
343     /* Allocate memory */
344     mytarget = (char*) malloc(MAX_FILE_LEN * sizeof(mytarget[0]));
345     output_cp_buffer = (char*) malloc(MAX_FILE_LEN * sizeof(output_cp_buffer[0]));
346     ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(ucs_file_buffer[0]));
347     my_ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(my_ucs_file_buffer[0]));
348 
349     ucs_file_buffer_use = ucs_file_buffer;
350     mytarget_1=mytarget;
351     mytarget_use        = mytarget;
352     my_ucs_file_buffer_1=my_ucs_file_buffer;
353 
354     /* flush the converter cache to get a consistent state before the flushing is tested */
355     ucnv_flushCache();
356 
357     /*Testing ucnv_openU()*/
358     {
359         UChar converterName[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/
360         UChar firstSortedName[]={ 0x0021, 0x0000}; /* ! */
361         UChar lastSortedName[]={ 0x007E, 0x0000}; /* ~ */
362         const char *illegalNameChars={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"};
363         UChar illegalName[100];
364         UConverter *converter=NULL;
365         err=U_ZERO_ERROR;
366         converter=ucnv_openU(converterName, &err);
367         if(U_FAILURE(err)){
368             log_data_err("FAILURE! ucnv_openU(ibm-943, err) failed. %s\n", myErrorName(err));
369         }
370         ucnv_close(converter);
371         err=U_ZERO_ERROR;
372         converter=ucnv_openU(NULL, &err);
373         if(U_FAILURE(err)){
374             log_err("FAILURE! ucnv_openU(NULL, err)  failed. %s\n", myErrorName(err));
375         }
376         ucnv_close(converter);
377         /*testing with error value*/
378         err=U_ILLEGAL_ARGUMENT_ERROR;
379         converter=ucnv_openU(converterName, &err);
380         if(!(converter == NULL)){
381             log_data_err("FAILURE! ucnv_openU(ibm-943, U_ILLEGAL_ARGUMENT_ERROR) is expected to fail\n");
382         }
383         ucnv_close(converter);
384         err=U_ZERO_ERROR;
385         u_uastrcpy(illegalName, "");
386         u_uastrcpy(illegalName, illegalNameChars);
387         ucnv_openU(illegalName, &err);
388         if(!(err==U_ILLEGAL_ARGUMENT_ERROR)){
389             log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n");
390         }
391 
392         err=U_ZERO_ERROR;
393         ucnv_openU(firstSortedName, &err);
394         if(err!=U_FILE_ACCESS_ERROR){
395             log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n");
396         }
397 
398         err=U_ZERO_ERROR;
399         ucnv_openU(lastSortedName, &err);
400         if(err!=U_FILE_ACCESS_ERROR){
401             log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n");
402         }
403 
404         err=U_ZERO_ERROR;
405     }
406     log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n");
407     {
408          UConverter *cnv=NULL;
409          err=U_ZERO_ERROR;
410          cnv=ucnv_open("ibm-949,Madhu", &err);
411          if(U_FAILURE(err)){
412             log_data_err("FAILURE! ucnv_open(\"ibm-949,Madhu\", err)  failed. %s\n", myErrorName(err));
413          }
414          ucnv_close(cnv);
415 
416     }
417       /*Testing ucnv_convert()*/
418     {
419         int32_t targetLimit=0, sourceLimit=0, i=0, targetCapacity=0;
420         const uint8_t source[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00};
421         const uint8_t expectedTarget[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00};
422         char *target=0;
423         sourceLimit=UPRV_LENGTHOF(source);
424         err=U_ZERO_ERROR;
425         targetLimit=0;
426 
427         targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", NULL, targetLimit , (const char*)source, sourceLimit, &err);
428         if(err == U_BUFFER_OVERFLOW_ERROR){
429             err=U_ZERO_ERROR;
430             targetLimit=targetCapacity+1;
431             target=(char*)malloc(sizeof(char) * targetLimit);
432             targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
433         }
434         if(U_FAILURE(err)){
435             log_data_err("FAILURE! ucnv_convert(ibm-1363->ibm-1364) failed. %s\n", myErrorName(err));
436         }
437         else {
438             for(i=0; i<targetCapacity; i++){
439                 if(target[i] != expectedTarget[i]){
440                     log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d,  Expected: %lx Got: %lx\n", i, (UChar)expectedTarget[i], (uint8_t)target[i]);
441                 }
442             }
443 
444             i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source+1, -1, &err);
445             if(U_FAILURE(err) || i!=7){
446                 log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n",
447                     u_errorName(err), i);
448             }
449 
450             /*Test error conditions*/
451             err=U_ZERO_ERROR;
452             i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, 0, &err);
453             if(i !=0){
454                 log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n");
455             }
456 
457             err=U_ILLEGAL_ARGUMENT_ERROR;
458             sourceLimit=UPRV_LENGTHOF(source);
459             i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
460             if(i !=0 ){
461                 log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n");
462             }
463 
464             err=U_ZERO_ERROR;
465             sourceLimit=UPRV_LENGTHOF(source);
466             targetLimit=0;
467             i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
468             if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){
469                 log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n");
470             }
471             err=U_ZERO_ERROR;
472             free(target);
473         }
474     }
475 
476     /*Testing ucnv_openCCSID and ucnv_open with error conditions*/
477     log_verbose("\n---Testing ucnv_open with err ! = U_ZERO_ERROR...\n");
478     err=U_ILLEGAL_ARGUMENT_ERROR;
479     if(ucnv_open(NULL, &err) != NULL){
480         log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
481     }
482     if(ucnv_openCCSID(1051, UCNV_IBM, &err) != NULL){
483         log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
484     }
485     err=U_ZERO_ERROR;
486 
487     /* Testing ucnv_openCCSID(), ucnv_open(), ucnv_getName() */
488     log_verbose("\n---Testing ucnv_open default...\n");
489     someConverters[0] = ucnv_open(NULL,&err);
490     someConverters[1] = ucnv_open(NULL,&err);
491     someConverters[2] = ucnv_open("utf8", &err);
492     someConverters[3] = ucnv_openCCSID(949,UCNV_IBM,&err);
493     ucnv_close(ucnv_openCCSID(1051, UCNV_IBM, &err)); /* test for j350; ucnv_close(NULL) is safe */
494     if (U_FAILURE(err)){ log_data_err("FAILURE! %s\n", myErrorName(err));}
495 
496     /* Testing ucnv_getName()*/
497     /*default code page */
498     ucnv_getName(someConverters[0], &err);
499     if(U_FAILURE(err)) {
500         log_data_err("getName[0] failed\n");
501     } else {
502         log_verbose("getName(someConverters[0]) returned %s\n", ucnv_getName(someConverters[0], &err));
503     }
504     ucnv_getName(someConverters[1], &err);
505     if(U_FAILURE(err)) {
506         log_data_err("getName[1] failed\n");
507     } else {
508         log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters[1], &err));
509     }
510 
511     ucnv_close(someConverters[0]);
512     ucnv_close(someConverters[1]);
513     ucnv_close(someConverters[2]);
514     ucnv_close(someConverters[3]);
515 
516 
517     for (codepage_index=0; codepage_index <  NUM_CODEPAGE; ++codepage_index)
518     {
519         int32_t i = 0;
520 
521         err = U_ZERO_ERROR;
522 #ifdef U_TOPSRCDIR
523         strcpy(ucs_file_name, U_TOPSRCDIR U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING);
524 #else
525         strcpy(ucs_file_name, loadTestData(&err));
526 
527         if(U_FAILURE(err)){
528             log_err("\nCouldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err));
529             return;
530         }
531 
532         {
533             char* index = strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR);
534 
535             if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){
536                     *(index+1)=0;
537             }
538         }
539 
540         strcat(ucs_file_name,".."U_FILE_SEP_STRING);
541 #endif
542         strcat(ucs_file_name, CodePagesTestFiles[codepage_index]);
543 
544         ucs_file_in = fopen(ucs_file_name,"rb");
545         if (!ucs_file_in)
546         {
547             log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name);
548             return;
549         }
550 
551         /*Creates a converter and testing ucnv_openCCSID(u_int code_page, platform, errstatus*/
552 
553         /*  myConverter =ucnv_openCCSID(CodePageNumberToTest[codepage_index],UCNV_IBM, &err); */
554         /*  ucnv_flushCache(); */
555         myConverter =ucnv_open( "ibm-949", &err);
556         if (!myConverter || U_FAILURE(err))
557         {
558             log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err));
559             fclose(ucs_file_in);
560             break;
561         }
562 
563         /*testing for ucnv_getName()  */
564         log_verbose("Testing ucnv_getName()...\n");
565         ucnv_getName(myConverter, &err);
566         if(U_FAILURE(err))
567             log_err("Error in getName\n");
568         else
569         {
570             log_verbose("getName o.k. %s\n", ucnv_getName(myConverter, &err));
571         }
572         if (uprv_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index]))
573             log_err("getName failed\n");
574         else
575             log_verbose("getName ok\n");
576         /*Test getName with error condition*/
577         {
578             const char* name=0;
579             err=U_ILLEGAL_ARGUMENT_ERROR;
580             log_verbose("Testing ucnv_getName with err != U_ZERO_ERROR");
581             name=ucnv_getName(myConverter, &err);
582             if(name != NULL){
583                 log_err("ucnv_getName() with err != U_ZERO_ERROR is expected to fail");
584             }
585             err=U_ZERO_ERROR;
586         }
587 
588 
589         /*Tests ucnv_getMaxCharSize() and ucnv_getMinCharSize()*/
590 
591         log_verbose("Testing ucnv_getMaxCharSize()...\n");
592         if (ucnv_getMaxCharSize(myConverter)==CodePagesMaxChars[codepage_index])
593             log_verbose("Max byte per character OK\n");
594         else
595             log_err("Max byte per character failed\n");
596 
597         log_verbose("\n---Testing ucnv_getMinCharSize()...\n");
598         if (ucnv_getMinCharSize(myConverter)==CodePagesMinChars[codepage_index])
599             log_verbose("Min byte per character OK\n");
600         else
601             log_err("Min byte per character failed\n");
602 
603 
604         /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars()*/
605         log_verbose("\n---Testing ucnv_getSubstChars...\n");
606         ii=4;
607         ucnv_getSubstChars(myConverter, myptr, &ii, &err);
608         if (ii <= 0) {
609             log_err("ucnv_getSubstChars returned a negative number %d\n", ii);
610         }
611 
612         for(x=0;x<ii;x++)
613             rest = (uint16_t)(((unsigned char)rest << 8) + (unsigned char)myptr[x]);
614         if (rest==CodePagesSubstitutionChars[codepage_index])
615             log_verbose("Substitution character ok\n");
616         else
617             log_err("Substitution character failed.\n");
618 
619         log_verbose("\n---Testing ucnv_setSubstChars RoundTrip Test ...\n");
620         ucnv_setSubstChars(myConverter, myptr, ii, &err);
621         if (U_FAILURE(err))
622         {
623             log_err("FAILURE! %s\n", myErrorName(err));
624         }
625         ucnv_getSubstChars(myConverter,save, &ii, &err);
626         if (U_FAILURE(err))
627         {
628             log_err("FAILURE! %s\n", myErrorName(err));
629         }
630 
631         if (strncmp(save, myptr, ii))
632             log_err("Saved substitution character failed\n");
633         else
634             log_verbose("Saved substitution character ok\n");
635 
636         /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars() with error conditions*/
637         log_verbose("\n---Testing ucnv_getSubstChars.. with len < minBytesPerChar\n");
638         ii=1;
639         ucnv_getSubstChars(myConverter, myptr, &ii, &err);
640         if(err != U_INDEX_OUTOFBOUNDS_ERROR){
641             log_err("ucnv_getSubstChars() with len < minBytesPerChar should throw U_INDEX_OUTOFBOUNDS_ERROR Got %s\n", myErrorName(err));
642         }
643         err=U_ZERO_ERROR;
644         ii=4;
645         ucnv_getSubstChars(myConverter, myptr, &ii, &err);
646         log_verbose("\n---Testing ucnv_setSubstChars.. with len < minBytesPerChar\n");
647         ucnv_setSubstChars(myConverter, myptr, 0, &err);
648         if(err != U_ILLEGAL_ARGUMENT_ERROR){
649             log_err("ucnv_setSubstChars() with len < minBytesPerChar should throw U_ILLEGAL_ARGUMENT_ERROR Got %s\n", myErrorName(err));
650         }
651         log_verbose("\n---Testing ucnv_setSubstChars.. with err != U_ZERO_ERROR \n");
652         strcpy(myptr, "abc");
653         ucnv_setSubstChars(myConverter, myptr, ii, &err);
654         err=U_ZERO_ERROR;
655         ucnv_getSubstChars(myConverter, save, &ii, &err);
656         if(strncmp(save, myptr, ii) == 0){
657             log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't set the SubstChars and just return\n");
658         }
659         log_verbose("\n---Testing ucnv_getSubstChars.. with err != U_ZERO_ERROR \n");
660         err=U_ZERO_ERROR;
661         strcpy(myptr, "abc");
662         ucnv_setSubstChars(myConverter, myptr, ii, &err);
663         err=U_ILLEGAL_ARGUMENT_ERROR;
664         ucnv_getSubstChars(myConverter, save, &ii, &err);
665         if(strncmp(save, myptr, ii) == 0){
666             log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't fill the SubstChars in the buffer, it just returns\n");
667         }
668         err=U_ZERO_ERROR;
669         /*------*/
670 
671 #ifdef U_ENABLE_GENERIC_ISO_2022
672         /*resetState  ucnv_reset()*/
673         log_verbose("\n---Testing ucnv_reset()..\n");
674         ucnv_reset(myConverter);
675         {
676              UChar32 c;
677              const uint8_t in[]={  0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80};
678              const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
679              UConverter *cnv=ucnv_open("ISO_2022", &err);
680              if(U_FAILURE(err)) {
681                 log_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
682              }
683              c=ucnv_getNextUChar(cnv, &source, limit, &err);
684              if((U_FAILURE(err) || c != (UChar32)0x0031)) {
685                 log_err("ucnv_getNextUChar() failed: %s\n", u_errorName(err));
686              }
687              ucnv_reset(cnv);
688              ucnv_close(cnv);
689 
690         }
691 #endif
692 
693         /*getDisplayName*/
694         log_verbose("\n---Testing ucnv_getDisplayName()...\n");
695         locale=CodePagesLocale[codepage_index];
696         len=0;
697         displayname=NULL;
698         disnamelen = ucnv_getDisplayName(myConverter, locale, displayname, len, &err);
699         if(err==U_BUFFER_OVERFLOW_ERROR) {
700             err=U_ZERO_ERROR;
701             displayname=(UChar*)malloc((disnamelen+1) * sizeof(UChar));
702             ucnv_getDisplayName(myConverter,locale,displayname,disnamelen+1, &err);
703             if(U_FAILURE(err)) {
704                 log_err("getDisplayName failed. The error is  %s\n", myErrorName(err));
705             }
706             else {
707                 log_verbose(" getDisplayName o.k.\n");
708             }
709             free(displayname);
710             displayname=NULL;
711         }
712         else {
713             log_err("getDisplayName preflight doesn't work. Error is  %s\n", myErrorName(err));
714         }
715         /*test ucnv_getDiaplayName with error condition*/
716         err= U_ILLEGAL_ARGUMENT_ERROR;
717         len=ucnv_getDisplayName(myConverter,locale,NULL,0, &err);
718         if( len !=0 ){
719             log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n");
720         }
721         /*test ucnv_getDiaplayName with error condition*/
722         err=U_ZERO_ERROR;
723         len=ucnv_getDisplayName(NULL,locale,NULL,0, &err);
724         if( len !=0 || U_SUCCESS(err)){
725             log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n");
726         }
727         err=U_ZERO_ERROR;
728 
729         /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/
730         ucnv_getFromUCallBack(myConverter, &MIA1, &MIA1Context);
731 
732         log_verbose("\n---Testing ucnv_setFromUCallBack...\n");
733         ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err);
734         if (U_FAILURE(err) || oldFromUAction != MIA1 || oldFromUContext != MIA1Context)
735         {
736             log_err("FAILURE! %s\n", myErrorName(err));
737         }
738 
739         ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2);
740         if (MIA1_2 != otherUnicodeAction(MIA1) || MIA1Context2 != &BOM)
741             log_err("get From UCallBack failed\n");
742         else
743             log_verbose("get From UCallBack ok\n");
744 
745         log_verbose("\n---Testing getFromUCallBack Roundtrip...\n");
746         ucnv_setFromUCallBack(myConverter,MIA1, MIA1Context, &oldFromUAction, &oldFromUContext, &err);
747         if (U_FAILURE(err) || oldFromUAction != otherUnicodeAction(MIA1) || oldFromUContext != &BOM)
748         {
749             log_err("FAILURE! %s\n", myErrorName(err));
750         }
751 
752         ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2);
753         if (MIA1_2 != MIA1 || MIA1Context2 != MIA1Context)
754             log_err("get From UCallBack action failed\n");
755         else
756             log_verbose("get From UCallBack action ok\n");
757 
758         /*testing ucnv_setToUCallBack with error conditions*/
759         err=U_ILLEGAL_ARGUMENT_ERROR;
760         log_verbose("\n---Testing setFromUCallBack. with err != U_ZERO_ERROR..\n");
761         ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err);
762         ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2);
763         if(MIA1_2 == otherUnicodeAction(MIA1) || MIA1Context2 == &BOM){
764             log_err("To setFromUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
765         }
766         err=U_ZERO_ERROR;
767 
768 
769         /*testing ucnv_setToUCallBack() and ucnv_getToUCallBack()*/
770         ucnv_getToUCallBack(myConverter, &MIA2, &MIA2Context);
771 
772         log_verbose("\n---Testing setTo UCallBack...\n");
773         ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), &BOM, &oldToUAction, &oldToUContext, &err);
774         if (U_FAILURE(err) || oldToUAction != MIA2 || oldToUContext != MIA2Context)
775         {
776             log_err("FAILURE! %s\n", myErrorName(err));
777         }
778 
779         ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2);
780         if (MIA2_2 != otherCharAction(MIA2) || MIA2Context2 != &BOM)
781             log_err("To UCallBack failed\n");
782         else
783             log_verbose("To UCallBack ok\n");
784 
785         log_verbose("\n---Testing setTo UCallBack Roundtrip...\n");
786         ucnv_setToUCallBack(myConverter,MIA2, MIA2Context, &oldToUAction, &oldToUContext, &err);
787         if (U_FAILURE(err) || oldToUAction != otherCharAction(MIA2) || oldToUContext != &BOM)
788         { log_err("FAILURE! %s\n", myErrorName(err));  }
789 
790         ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2);
791         if (MIA2_2 != MIA2 || MIA2Context2 != MIA2Context)
792             log_err("To UCallBack failed\n");
793         else
794             log_verbose("To UCallBack ok\n");
795 
796         /*testing ucnv_setToUCallBack with error conditions*/
797         err=U_ILLEGAL_ARGUMENT_ERROR;
798         log_verbose("\n---Testing setToUCallBack. with err != U_ZERO_ERROR..\n");
799         ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), NULL, &oldToUAction, &oldToUContext, &err);
800         ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2);
801         if (MIA2_2 == otherCharAction(MIA2) || MIA2Context2 == &BOM){
802             log_err("To setToUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
803         }
804         err=U_ZERO_ERROR;
805 
806 
807         /*getcodepageid testing ucnv_getCCSID() */
808         log_verbose("\n----Testing getCCSID....\n");
809         cp =    ucnv_getCCSID(myConverter,&err);
810         if (U_FAILURE(err))
811         {
812             log_err("FAILURE!..... %s\n", myErrorName(err));
813         }
814         if (cp != CodePageNumberToTest[codepage_index])
815             log_err("Codepage number test failed\n");
816         else
817             log_verbose("Codepage number test OK\n");
818 
819         /*testing ucnv_getCCSID() with err != U_ZERO_ERROR*/
820         err=U_ILLEGAL_ARGUMENT_ERROR;
821         if( ucnv_getCCSID(myConverter,&err) != -1){
822             log_err("ucnv_getCCSID() with err != U_ZERO_ERROR is supposed to fail\n");
823         }
824         err=U_ZERO_ERROR;
825 
826         /*getCodepagePlatform testing ucnv_getPlatform()*/
827         log_verbose("\n---Testing getCodepagePlatform ..\n");
828         if (CodePagesPlatform[codepage_index]!=ucnv_getPlatform(myConverter, &err))
829             log_err("Platform codepage test failed\n");
830         else
831             log_verbose("Platform codepage test ok\n");
832 
833         if (U_FAILURE(err))
834         {
835             log_err("FAILURE! %s\n", myErrorName(err));
836         }
837         /*testing ucnv_getPlatform() with err != U_ZERO_ERROR*/
838         err= U_ILLEGAL_ARGUMENT_ERROR;
839         if(ucnv_getPlatform(myConverter, &err) != UCNV_UNKNOWN){
840             log_err("ucnv)getPlatform with err != U_ZERO_ERROR is supposed to fail\n");
841         }
842         err=U_ZERO_ERROR;
843 
844 
845         /*Reads the BOM*/
846         {
847             // Note: gcc produces a compile warning if the return value from fread() is ignored.
848             size_t numRead = fread(&BOM, sizeof(UChar), 1, ucs_file_in);
849             (void)numRead;
850         }
851         if (BOM!=0xFEFF && BOM!=0xFFFE)
852         {
853             log_err("File Missing BOM...Bailing!\n");
854             fclose(ucs_file_in);
855             break;
856         }
857 
858 
859         /*Reads in the file*/
860         while(!feof(ucs_file_in)&&(i+=(int32_t)fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in)))
861         {
862             myUChar = ucs_file_buffer[i-1];
863 
864             ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN*/
865         }
866 
867         myUChar = ucs_file_buffer[i-1];
868         ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN Corner Case*/
869 
870 
871         /*testing ucnv_fromUChars() and ucnv_toUChars() */
872         /*uchar1---fromUChar--->output_cp_buffer --toUChar--->uchar2*/
873 
874         uchar1=(UChar*)malloc(sizeof(UChar) * (i+1));
875         u_uastrcpy(uchar1,"");
876         u_strncpy(uchar1,ucs_file_buffer,i);
877         uchar1[i] = 0;
878 
879         uchar3=(UChar*)malloc(sizeof(UChar)*(i+1));
880         u_uastrcpy(uchar3,"");
881         u_strncpy(uchar3,ucs_file_buffer,i);
882         uchar3[i] = 0;
883 
884         /*Calls the Conversion Routine */
885         testLong1 = MAX_FILE_LEN;
886         log_verbose("\n---Testing ucnv_fromUChars()\n");
887         targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1,  uchar1, -1, &err);
888         if (U_FAILURE(err))
889         {
890             log_err("\nFAILURE...%s\n", myErrorName(err));
891         }
892         else
893             log_verbose(" ucnv_fromUChars() o.k.\n");
894 
895         /*test the conversion routine */
896         log_verbose("\n---Testing ucnv_toUChars()\n");
897         /*call it first time for trapping the targetcapacity and size needed to allocate memory for the buffer uchar2 */
898         targetcapacity2=0;
899         targetsize = ucnv_toUChars(myConverter,
900                      NULL,
901                      targetcapacity2,
902                      output_cp_buffer,
903                      (int32_t)strlen(output_cp_buffer),
904                      &err);
905         /*if there is an buffer overflow then trap the values and pass them and make the actual call*/
906 
907         if(err==U_BUFFER_OVERFLOW_ERROR)
908         {
909             err=U_ZERO_ERROR;
910             uchar2=(UChar*)malloc((targetsize+1) * sizeof(UChar));
911             targetsize = ucnv_toUChars(myConverter,
912                    uchar2,
913                    targetsize+1,
914                    output_cp_buffer,
915                    (int32_t)strlen(output_cp_buffer),
916                    &err);
917 
918             if(U_FAILURE(err))
919                 log_err("ucnv_toUChars() FAILED %s\n", myErrorName(err));
920             else
921                 log_verbose(" ucnv_toUChars() o.k.\n");
922 
923             if(u_strcmp(uchar1,uchar2)!=0)
924                 log_err("equality test failed with conversion routine\n");
925         }
926         else
927         {
928             log_err("ERR: calling toUChars: Didn't get U_BUFFER_OVERFLOW .. expected it.\n");
929         }
930         /*Testing ucnv_fromUChars and ucnv_toUChars with error conditions*/
931         err=U_ILLEGAL_ARGUMENT_ERROR;
932         log_verbose("\n---Testing ucnv_fromUChars() with err != U_ZERO_ERROR\n");
933         targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1,  uchar1, -1, &err);
934         if (targetcapacity !=0) {
935             log_err("\nFAILURE: ucnv_fromUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
936         }
937         err=U_ZERO_ERROR;
938         log_verbose("\n---Testing ucnv_fromUChars() with converter=NULL\n");
939         targetcapacity = ucnv_fromUChars(NULL, output_cp_buffer, testLong1,  uchar1, -1, &err);
940         if (targetcapacity !=0 || err != U_ILLEGAL_ARGUMENT_ERROR) {
941             log_err("\nFAILURE: ucnv_fromUChars with converter=NULL is expected to fail\n");
942         }
943         err=U_ZERO_ERROR;
944         log_verbose("\n---Testing ucnv_fromUChars() with sourceLength = 0\n");
945         targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1,  uchar1, 0, &err);
946         if (targetcapacity !=0) {
947             log_err("\nFAILURE: ucnv_fromUChars with sourceLength 0 is expected to return 0\n");
948         }
949         log_verbose("\n---Testing ucnv_fromUChars() with targetLength = 0\n");
950         targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, 0,  uchar1, -1, &err);
951         if (err != U_BUFFER_OVERFLOW_ERROR) {
952             log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n");
953         }
954         /*toUChars with error conditions*/
955         targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, (int32_t)strlen(output_cp_buffer), &err);
956         if(targetsize != 0){
957             log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
958         }
959         err=U_ZERO_ERROR;
960         targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, (int32_t)strlen(output_cp_buffer), &err);
961         if(targetsize != 0 || err != U_ILLEGAL_ARGUMENT_ERROR){
962             log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n");
963         }
964         err=U_ZERO_ERROR;
965         targetsize = ucnv_toUChars(myConverter, uchar2, 0, output_cp_buffer, 0, &err);
966         if (targetsize !=0) {
967             log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n");
968         }
969         targetcapacity2=0;
970         targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, (int32_t)strlen(output_cp_buffer), &err);
971         if (err != U_STRING_NOT_TERMINATED_WARNING) {
972             log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n",
973                     u_errorName(err));
974         }
975         err=U_ZERO_ERROR;
976         /*-----*/
977 
978 
979         /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */
980         /*Clean up re-usable vars*/
981         log_verbose("Testing ucnv_fromUnicode().....\n");
982         tmp_ucs_buf=ucs_file_buffer_use;
983         ucnv_fromUnicode(myConverter, &mytarget_1,
984                  mytarget + MAX_FILE_LEN,
985                  &tmp_ucs_buf,
986                  ucs_file_buffer_use+i,
987                  NULL,
988                  true,
989                  &err);
990         consumedUni = (UChar*)tmp_consumedUni;
991         (void)consumedUni;   /* Suppress set but not used warning. */
992 
993         if (U_FAILURE(err))
994         {
995             log_err("FAILURE! %s\n", myErrorName(err));
996         }
997         else
998             log_verbose("ucnv_fromUnicode()   o.k.\n");
999 
1000         /*Uni1 ----ToUnicode----> Cp2 ----FromUnicode---->Uni3 */
1001         log_verbose("Testing ucnv_toUnicode().....\n");
1002         tmp_mytarget_use=mytarget_use;
1003         tmp_consumed = consumed;
1004         ucnv_toUnicode(myConverter, &my_ucs_file_buffer_1,
1005                 my_ucs_file_buffer + MAX_FILE_LEN,
1006                 &tmp_mytarget_use,
1007                 mytarget_use + (mytarget_1 - mytarget),
1008                 NULL,
1009                 false,
1010                 &err);
1011         consumed = (char*)tmp_consumed;
1012         if (U_FAILURE(err))
1013         {
1014             log_err("FAILURE! %s\n", myErrorName(err));
1015         }
1016         else
1017             log_verbose("ucnv_toUnicode()  o.k.\n");
1018 
1019 
1020         log_verbose("\n---Testing   RoundTrip ...\n");
1021 
1022 
1023         u_strncpy(uchar3, my_ucs_file_buffer,i);
1024         uchar3[i] = 0;
1025 
1026         if(u_strcmp(uchar1,uchar3)==0)
1027             log_verbose("Equality test o.k.\n");
1028         else
1029             log_err("Equality test failed\n");
1030 
1031         /*sanity compare */
1032         if(uchar2 == NULL)
1033         {
1034             log_err("uchar2 was NULL (ccapitst.c line %d), couldn't do sanity check\n", __LINE__);
1035         }
1036         else
1037         {
1038             if(u_strcmp(uchar2, uchar3)==0)
1039                 log_verbose("Equality test o.k.\n");
1040             else
1041                 log_err("Equality test failed\n");
1042         }
1043 
1044         fclose(ucs_file_in);
1045         ucnv_close(myConverter);
1046         if (uchar1 != 0) free(uchar1);
1047         if (uchar2 != 0) free(uchar2);
1048         if (uchar3 != 0) free(uchar3);
1049     }
1050 
1051     free((void*)mytarget);
1052     free((void*)output_cp_buffer);
1053     free((void*)ucs_file_buffer);
1054     free((void*)my_ucs_file_buffer);
1055 #endif
1056 }
1057 
1058 #if !UCONFIG_NO_LEGACY_CONVERSION
otherUnicodeAction(UConverterFromUCallback MIA)1059 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA)
1060 {
1061     return (MIA==(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP)?(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_SUBSTITUTE:(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP;
1062 }
1063 
otherCharAction(UConverterToUCallback MIA)1064 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA)
1065 {
1066     return (MIA==(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP)?(UConverterToUCallback)UCNV_TO_U_CALLBACK_SUBSTITUTE:(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP;
1067 }
1068 #endif
1069 
TestFlushCache(void)1070 static void TestFlushCache(void) {
1071 #if !UCONFIG_NO_LEGACY_CONVERSION
1072     UErrorCode          err                 =   U_ZERO_ERROR;
1073     UConverter*            someConverters[5];
1074     int flushCount = 0;
1075 
1076     /* flush the converter cache to get a consistent state before the flushing is tested */
1077     ucnv_flushCache();
1078 
1079     /*Testing ucnv_open()*/
1080     /* Note: These converters have been chosen because they do NOT
1081        encode the Latin characters (U+0041, ...), and therefore are
1082        highly unlikely to be chosen as system default codepages */
1083 
1084     someConverters[0] = ucnv_open("ibm-1047", &err);
1085     if (U_FAILURE(err)) {
1086         log_data_err("FAILURE! %s\n", myErrorName(err));
1087     }
1088 
1089     someConverters[1] = ucnv_open("ibm-1047", &err);
1090     if (U_FAILURE(err)) {
1091         log_data_err("FAILURE! %s\n", myErrorName(err));
1092     }
1093 
1094     someConverters[2] = ucnv_open("ibm-1047", &err);
1095     if (U_FAILURE(err)) {
1096         log_data_err("FAILURE! %s\n", myErrorName(err));
1097     }
1098 
1099     someConverters[3] = ucnv_open("gb18030", &err);
1100     if (U_FAILURE(err)) {
1101         log_data_err("FAILURE! %s\n", myErrorName(err));
1102     }
1103 
1104     someConverters[4] = ucnv_open("ibm-954", &err);
1105     if (U_FAILURE(err)) {
1106         log_data_err("FAILURE! %s\n", myErrorName(err));
1107     }
1108 
1109 
1110     /* Testing ucnv_flushCache() */
1111     log_verbose("\n---Testing ucnv_flushCache...\n");
1112     if ((flushCount=ucnv_flushCache())==0)
1113         log_verbose("Flush cache ok\n");
1114     else
1115         log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount);
1116 
1117     /*testing ucnv_close() and ucnv_flushCache() */
1118     ucnv_close(someConverters[0]);
1119     ucnv_close(someConverters[1]);
1120 
1121     if ((flushCount=ucnv_flushCache())==0)
1122         log_verbose("Flush cache ok\n");
1123     else
1124         log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount);
1125 
1126     ucnv_close(someConverters[2]);
1127     ucnv_close(someConverters[3]);
1128 
1129     if ((flushCount=ucnv_flushCache())==2)
1130         log_verbose("Flush cache ok\n");  /*because first, second and third are same  */
1131     else
1132         log_data_err("Flush Cache failed  line %d, got %d expected 2 or there is an error in ucnv_close()\n",
1133             __LINE__,
1134             flushCount);
1135 
1136     ucnv_close(someConverters[4]);
1137     if ( (flushCount=ucnv_flushCache())==1)
1138         log_verbose("Flush cache ok\n");
1139     else
1140         log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__, flushCount);
1141 #endif
1142 }
1143 
1144 /**
1145  * Test the converter alias API, specifically the fuzzy matching of
1146  * alias names and the alias table integrity.  Make sure each
1147  * converter has at least one alias (itself), and that its listed
1148  * aliases map back to itself.  Check some hard-coded UTF-8 and
1149  * ISO_2022 aliases to make sure they work.
1150  */
TestAlias(void)1151 static void TestAlias(void) {
1152     int32_t i, ncnv;
1153     UErrorCode status = U_ZERO_ERROR;
1154 
1155     /* Predetermined aliases that we expect to map back to ISO_2022
1156      * and UTF-8.  UPDATE THIS DATA AS NECESSARY. */
1157     const char* ISO_2022_NAMES[] =
1158         {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2",
1159          "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"};
1160     int32_t ISO_2022_NAMES_LENGTH = UPRV_LENGTHOF(ISO_2022_NAMES);
1161     const char *UTF8_NAMES[] =
1162         { "UTF-8", "utf-8", "utf8", "ibm-1208",
1163           "utf_8", "ibm1208", "cp1208" };
1164     int32_t UTF8_NAMES_LENGTH = UPRV_LENGTHOF(UTF8_NAMES);
1165 
1166     struct {
1167         const char *name;
1168         const char *alias;
1169     } CONVERTERS_NAMES[] = {
1170         { "UTF-32BE", "UTF32_BigEndian" },
1171         { "UTF-32LE", "UTF32_LittleEndian" },
1172         { "UTF-32",   "ISO-10646-UCS-4" },
1173         { "UTF32_PlatformEndian", "UTF32_PlatformEndian" },
1174         { "UTF-32",   "ucs-4" }
1175     };
1176     int32_t CONVERTERS_NAMES_LENGTH = UPRV_LENGTHOF(CONVERTERS_NAMES);
1177 
1178     /* When there are bugs in gencnval or in ucnv_io, converters can
1179        appear to have no aliases. */
1180     ncnv = ucnv_countAvailable();
1181     log_verbose("%d converters\n", ncnv);
1182     for (i=0; i<ncnv; ++i) {
1183         const char *name = ucnv_getAvailableName(i);
1184         const char *alias0;
1185         uint16_t na = ucnv_countAliases(name, &status);
1186         uint16_t j;
1187         UConverter *cnv;
1188 
1189         if (na == 0) {
1190             log_err("FAIL: Converter \"%s\" (i=%d)"
1191                     " has no aliases; expect at least one\n",
1192                     name, i);
1193             continue;
1194         }
1195         cnv = ucnv_open(name, &status);
1196         if (U_FAILURE(status)) {
1197             log_data_err("FAIL: Converter \"%s\" (i=%d)"
1198                     " can't be opened.\n",
1199                     name, i);
1200         }
1201         else {
1202             if (strcmp(ucnv_getName(cnv, &status), name) != 0
1203                 && (strstr(name, "PlatformEndian") == 0 && strstr(name, "OppositeEndian") == 0)) {
1204                 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. "
1205                         "They should be the same\n",
1206                         name, ucnv_getName(cnv, &status));
1207             }
1208         }
1209         ucnv_close(cnv);
1210 
1211         status = U_ZERO_ERROR;
1212         alias0 = ucnv_getAlias(name, 0, &status);
1213         for (j=1; j<na; ++j) {
1214             const char *alias;
1215             /* Make sure each alias maps back to the the same list of
1216                aliases.  Assume that if alias 0 is the same, the whole
1217                list is the same (this should always be true). */
1218             const char *mapBack;
1219 
1220             status = U_ZERO_ERROR;
1221             alias = ucnv_getAlias(name, j, &status);
1222             if (status == U_AMBIGUOUS_ALIAS_WARNING) {
1223                 log_err("FAIL: Converter \"%s\"is ambiguous\n", name);
1224             }
1225 
1226             if (alias == NULL) {
1227                 log_err("FAIL: Converter \"%s\" -> "
1228                         "alias[%d]=NULL\n",
1229                         name, j);
1230                 continue;
1231             }
1232 
1233             mapBack = ucnv_getAlias(alias, 0, &status);
1234 
1235             if (mapBack == NULL) {
1236                 log_err("FAIL: Converter \"%s\" -> "
1237                         "alias[%d]=\"%s\" -> "
1238                         "alias[0]=NULL, exp. \"%s\"\n",
1239                         name, j, alias, alias0);
1240                 continue;
1241             }
1242 
1243             if (0 != strcmp(alias0, mapBack)) {
1244                 int32_t idx;
1245                 UBool foundAlias = false;
1246                 if (status == U_AMBIGUOUS_ALIAS_WARNING) {
1247                     /* Make sure that we only get this mismapping when there is
1248                        an ambiguous alias, and the other converter has this alias too. */
1249                     for (idx = 0; idx < ucnv_countAliases(mapBack, &status); idx++) {
1250                         if (strcmp(ucnv_getAlias(mapBack, (uint16_t)idx, &status), alias) == 0) {
1251                             foundAlias = true;
1252                             break;
1253                         }
1254                     }
1255                 }
1256                 /* else not ambiguous, and this is a real problem. foundAlias = false */
1257 
1258                 if (!foundAlias) {
1259                     log_err("FAIL: Converter \"%s\" -> "
1260                             "alias[%d]=\"%s\" -> "
1261                             "alias[0]=\"%s\", exp. \"%s\"\n",
1262                             name, j, alias, mapBack, alias0);
1263                 }
1264             }
1265         }
1266     }
1267 
1268 
1269     /* Check a list of predetermined aliases that we expect to map
1270      * back to ISO_2022 and UTF-8. */
1271     for (i=1; i<ISO_2022_NAMES_LENGTH; ++i) {
1272         const char* mapBack = ucnv_getAlias(ISO_2022_NAMES[i], 0, &status);
1273         if(!mapBack) {
1274           log_data_err("Couldn't get alias for %s. You probably have no data\n", ISO_2022_NAMES[i]);
1275           continue;
1276         }
1277         if (0 != strcmp(mapBack, ISO_2022_NAMES[0])) {
1278             log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n",
1279                     ISO_2022_NAMES[i], mapBack);
1280         }
1281     }
1282 
1283 
1284     for (i=1; i<UTF8_NAMES_LENGTH; ++i) {
1285         const char* mapBack = ucnv_getAlias(UTF8_NAMES[i], 0, &status);
1286         if(!mapBack) {
1287           log_data_err("Couldn't get alias for %s. You probably have no data\n", UTF8_NAMES[i]);
1288           continue;
1289         }
1290         if (mapBack && 0 != strcmp(mapBack, UTF8_NAMES[0])) {
1291             log_err("FAIL: \"%s\" -> \"%s\", expect UTF-8\n",
1292                     UTF8_NAMES[i], mapBack);
1293         }
1294     }
1295 
1296     /*
1297      * Check a list of predetermined aliases that we expect to map
1298      * back to predermined converter names.
1299      */
1300 
1301     for (i = 0; i < CONVERTERS_NAMES_LENGTH; ++i) {
1302         const char* mapBack = ucnv_getAlias(CONVERTERS_NAMES[i].alias, 0, &status);
1303         if(!mapBack) {
1304           log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i].name);
1305           continue;
1306         }
1307         if (0 != strcmp(mapBack, CONVERTERS_NAMES[i].name)) {
1308             log_err("FAIL: \"%s\" -> \"%s\", expect %s\n",
1309                     CONVERTERS_NAMES[i].alias, mapBack, CONVERTERS_NAMES[i].name);
1310         }
1311     }
1312 
1313 }
1314 
TestDuplicateAlias(void)1315 static void TestDuplicateAlias(void) {
1316     const char *alias;
1317     UErrorCode status = U_ZERO_ERROR;
1318 
1319     status = U_ZERO_ERROR;
1320     alias = ucnv_getStandardName("Shift_JIS", "IBM", &status);
1321     if (alias == NULL || strcmp(alias, "ibm-943") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) {
1322         log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias);
1323     }
1324     status = U_ZERO_ERROR;
1325     alias = ucnv_getStandardName("ibm-943", "IANA", &status);
1326     if (alias == NULL || strcmp(alias, "Shift_JIS") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) {
1327         log_data_err("FAIL: Didn't get Shift_JIS for ibm-943 {IANA}. Got %s\n", alias);
1328     }
1329     status = U_ZERO_ERROR;
1330     alias = ucnv_getStandardName("ibm-943_P130-2000", "IANA", &status);
1331     if (alias != NULL || status == U_AMBIGUOUS_ALIAS_WARNING) {
1332         log_data_err("FAIL: Didn't get NULL for ibm-943 {IANA}. Got %s\n", alias);
1333     }
1334 }
1335 
1336 
1337 /* Test safe clone callback */
1338 
TSCC_nextSerial(void)1339 static uint32_t    TSCC_nextSerial(void)
1340 {
1341     static uint32_t n = 1;
1342 
1343     return (n++);
1344 }
1345 
1346 typedef struct
1347 {
1348     uint32_t       magic;      /* 0xC0FFEE to identify that the object is OK */
1349     uint32_t       serial;     /* minted from nextSerial, above */
1350     UBool          wasClosed;  /* close happened on the object */
1351 } TSCCContext;
1352 
TSCC_clone(TSCCContext * ctx)1353 static TSCCContext *TSCC_clone(TSCCContext *ctx)
1354 {
1355     TSCCContext *newCtx = (TSCCContext *)malloc(sizeof(TSCCContext));
1356 
1357     newCtx->serial = TSCC_nextSerial();
1358     newCtx->wasClosed = 0;
1359     newCtx->magic = 0xC0FFEE;
1360 
1361     log_verbose("TSCC_clone: %p:%d -> new context %p:%d\n", ctx, ctx->serial, newCtx, newCtx->serial);
1362 
1363     return newCtx;
1364 }
1365 
1366 #if !UCONFIG_NO_LEGACY_CONVERSION
TSCC_fromU(const void * context,UConverterFromUnicodeArgs * fromUArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * err)1367 static void TSCC_fromU(const void *context,
1368                         UConverterFromUnicodeArgs *fromUArgs,
1369                         const UChar* codeUnits,
1370                         int32_t length,
1371                         UChar32 codePoint,
1372                         UConverterCallbackReason reason,
1373                         UErrorCode * err)
1374 {
1375     // suppress compiler warnings about unused variables
1376     (void)codeUnits;
1377     (void)length;
1378     (void)codePoint;
1379 
1380     TSCCContext *ctx = (TSCCContext*)context;
1381     UConverterFromUCallback junkFrom;
1382 
1383     log_verbose("TSCC_fromU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, fromUArgs->converter);
1384 
1385     if(ctx->magic != 0xC0FFEE) {
1386         log_err("TSCC_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic);
1387         return;
1388     }
1389 
1390     if(reason == UCNV_CLONE) {
1391         UErrorCode subErr = U_ZERO_ERROR;
1392         TSCCContext *newCtx;
1393         TSCCContext *junkCtx;
1394         TSCCContext **pjunkCtx = &junkCtx;
1395 
1396         /* "recreate" it */
1397         log_verbose("TSCC_fromU: cloning..\n");
1398         newCtx = TSCC_clone(ctx);
1399 
1400         if(newCtx == NULL) {
1401             log_err("TSCC_fromU: internal clone failed on %p\n", ctx);
1402         }
1403 
1404         /* now, SET it */
1405         ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)pjunkCtx);
1406         ucnv_setFromUCallBack(fromUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr);
1407 
1408         if(U_FAILURE(subErr)) {
1409             *err = subErr;
1410         }
1411     }
1412 
1413     if(reason == UCNV_CLOSE) {
1414         log_verbose("TSCC_fromU: Context %p:%d closing\n", ctx, ctx->serial);
1415         ctx->wasClosed = true;
1416     }
1417 }
1418 
TSCC_toU(const void * context,UConverterToUnicodeArgs * toUArgs,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * err)1419 static void TSCC_toU(const void *context,
1420                         UConverterToUnicodeArgs *toUArgs,
1421                         const char* codeUnits,
1422                         int32_t length,
1423                         UConverterCallbackReason reason,
1424                         UErrorCode * err)
1425 {
1426     // suppress compiler warnings about unused variables
1427     (void)codeUnits;
1428     (void)length;
1429 
1430     TSCCContext *ctx = (TSCCContext*)context;
1431     UConverterToUCallback junkFrom;
1432 
1433     log_verbose("TSCC_toU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, toUArgs->converter);
1434 
1435     if(ctx->magic != 0xC0FFEE) {
1436         log_err("TSCC_toU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic);
1437         return;
1438     }
1439 
1440     if(reason == UCNV_CLONE) {
1441         UErrorCode subErr = U_ZERO_ERROR;
1442         TSCCContext *newCtx;
1443         TSCCContext *junkCtx;
1444         TSCCContext **pjunkCtx = &junkCtx;
1445 
1446         /* "recreate" it */
1447         log_verbose("TSCC_toU: cloning..\n");
1448         newCtx = TSCC_clone(ctx);
1449 
1450         if(newCtx == NULL) {
1451             log_err("TSCC_toU: internal clone failed on %p\n", ctx);
1452         }
1453 
1454         /* now, SET it */
1455         ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)pjunkCtx);
1456         ucnv_setToUCallBack(toUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr);
1457 
1458         if(U_FAILURE(subErr)) {
1459             *err = subErr;
1460         }
1461     }
1462 
1463     if(reason == UCNV_CLOSE) {
1464         log_verbose("TSCC_toU: Context %p:%d closing\n", ctx, ctx->serial);
1465         ctx->wasClosed = true;
1466     }
1467 }
1468 
TSCC_init(TSCCContext * q)1469 static void TSCC_init(TSCCContext *q)
1470 {
1471     q->magic = 0xC0FFEE;
1472     q->serial = TSCC_nextSerial();
1473     q->wasClosed = 0;
1474 }
1475 
TSCC_print_log(TSCCContext * q,const char * name)1476 static void TSCC_print_log(TSCCContext *q, const char *name)
1477 {
1478     if(q==NULL) {
1479         log_verbose("TSCContext: %s is NULL!!\n", name);
1480     } else {
1481         if(q->magic != 0xC0FFEE) {
1482             log_err("TSCCContext: %p:%d's magic is %x, supposed to be 0xC0FFEE\n",
1483                     q,q->serial, q->magic);
1484         }
1485         log_verbose("TSCCContext %p:%d=%s - magic %x, %s\n",
1486                     q, q->serial, name, q->magic, q->wasClosed?"CLOSED":"open");
1487     }
1488 }
1489 
TestConvertSafeCloneCallback(void)1490 static void TestConvertSafeCloneCallback(void)
1491 {
1492     UErrorCode err = U_ZERO_ERROR;
1493     TSCCContext from1, to1;
1494     TSCCContext *from2, *from3, *to2, *to3;
1495     TSCCContext **pfrom2 = &from2, **pfrom3 = &from3, **pto2 = &to2, **pto3 = &to3;
1496     char hunk[8192];
1497     int32_t hunkSize = 8192;
1498     UConverterFromUCallback junkFrom;
1499     UConverterToUCallback junkTo;
1500     UConverter *conv1, *conv2 = NULL;
1501 
1502     conv1 = ucnv_open("iso-8859-3", &err);
1503 
1504     if(U_FAILURE(err)) {
1505         log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err));
1506         return;
1507     }
1508 
1509     log_verbose("Opened conv1=%p\n", conv1);
1510 
1511     TSCC_init(&from1);
1512     TSCC_init(&to1);
1513 
1514     TSCC_print_log(&from1, "from1");
1515     TSCC_print_log(&to1, "to1");
1516 
1517     ucnv_setFromUCallBack(conv1, TSCC_fromU, &from1, NULL, NULL, &err);
1518     log_verbose("Set from1 on conv1\n");
1519     TSCC_print_log(&from1, "from1");
1520 
1521     ucnv_setToUCallBack(conv1, TSCC_toU, &to1, NULL, NULL, &err);
1522     log_verbose("Set to1 on conv1\n");
1523     TSCC_print_log(&to1, "to1");
1524 
1525     conv2 = ucnv_safeClone(conv1, hunk, &hunkSize, &err);
1526     if(U_FAILURE(err)) {
1527         log_err("safeClone failed: %s\n", u_errorName(err));
1528         return;
1529     }
1530     log_verbose("Cloned to conv2=%p.\n", conv2);
1531 
1532 /**********   from *********************/
1533     ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)pfrom2);
1534     ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)pfrom3);
1535 
1536     TSCC_print_log(from2, "from2");
1537     TSCC_print_log(from3, "from3(==from1)");
1538 
1539     if(from2 == NULL) {
1540         log_err("FAIL! from2 is null \n");
1541         return;
1542     }
1543 
1544     if(from3 == NULL) {
1545         log_err("FAIL! from3 is null \n");
1546         return;
1547     }
1548 
1549     if(from3 != (&from1) ) {
1550         log_err("FAIL! conv1's FROM context changed!\n");
1551     }
1552 
1553     if(from2 == (&from1) ) {
1554         log_err("FAIL! conv1's FROM context is the same as conv2's!\n");
1555     }
1556 
1557     if(from1.wasClosed) {
1558         log_err("FAIL! from1 is closed \n");
1559     }
1560 
1561     if(from2->wasClosed) {
1562         log_err("FAIL! from2 was closed\n");
1563     }
1564 
1565 /**********   to *********************/
1566     ucnv_getToUCallBack(conv2, &junkTo, (const void**)pto2);
1567     ucnv_getToUCallBack(conv1, &junkTo, (const void**)pto3);
1568 
1569     TSCC_print_log(to2, "to2");
1570     TSCC_print_log(to3, "to3(==to1)");
1571 
1572     if(to2 == NULL) {
1573         log_err("FAIL! to2 is null \n");
1574         return;
1575     }
1576 
1577     if(to3 == NULL) {
1578         log_err("FAIL! to3 is null \n");
1579         return;
1580     }
1581 
1582     if(to3 != (&to1) ) {
1583         log_err("FAIL! conv1's TO context changed!\n");
1584     }
1585 
1586     if(to2 == (&to1) ) {
1587         log_err("FAIL! conv1's TO context is the same as conv2's!\n");
1588     }
1589 
1590     if(to1.wasClosed) {
1591         log_err("FAIL! to1 is closed \n");
1592     }
1593 
1594     if(to2->wasClosed) {
1595         log_err("FAIL! to2 was closed\n");
1596     }
1597 
1598 /*************************************/
1599 
1600     ucnv_close(conv1);
1601     log_verbose("ucnv_closed (conv1)\n");
1602     TSCC_print_log(&from1, "from1");
1603     TSCC_print_log(from2, "from2");
1604     TSCC_print_log(&to1, "to1");
1605     TSCC_print_log(to2, "to2");
1606 
1607     if(from1.wasClosed == false) {
1608         log_err("FAIL! from1 is NOT closed \n");
1609     }
1610 
1611     if(from2->wasClosed) {
1612         log_err("FAIL! from2 was closed\n");
1613     }
1614 
1615     if(to1.wasClosed == false) {
1616         log_err("FAIL! to1 is NOT closed \n");
1617     }
1618 
1619     if(to2->wasClosed) {
1620         log_err("FAIL! to2 was closed\n");
1621     }
1622 
1623     ucnv_close(conv2);
1624     log_verbose("ucnv_closed (conv2)\n");
1625 
1626     TSCC_print_log(&from1, "from1");
1627     TSCC_print_log(from2, "from2");
1628 
1629     if(from1.wasClosed == false) {
1630         log_err("FAIL! from1 is NOT closed \n");
1631     }
1632 
1633     if(from2->wasClosed == false) {
1634         log_err("FAIL! from2 was NOT closed\n");
1635     }
1636 
1637     TSCC_print_log(&to1, "to1");
1638     TSCC_print_log(to2, "to2");
1639 
1640     if(to1.wasClosed == false) {
1641         log_err("FAIL! to1 is NOT closed \n");
1642     }
1643 
1644     if(to2->wasClosed == false) {
1645         log_err("FAIL! to2 was NOT closed\n");
1646     }
1647 
1648     if(to2 != (&to1)) {
1649         free(to2); /* to1 is stack based */
1650     }
1651     if(from2 != (&from1)) {
1652         free(from2); /* from1 is stack based */
1653     }
1654 }
1655 #endif
1656 
1657 static UBool
containsAnyOtherByte(uint8_t * p,int32_t length,uint8_t b)1658 containsAnyOtherByte(uint8_t *p, int32_t length, uint8_t b) {
1659     while(length>0) {
1660         if(*p!=b) {
1661             return true;
1662         }
1663         ++p;
1664         --length;
1665     }
1666     return false;
1667 }
1668 
TestConvertSafeClone(void)1669 static void TestConvertSafeClone(void)
1670 {
1671     /* one 'regular' & all the 'private stateful' converters */
1672     static const char *const names[] = {
1673 #if !UCONFIG_NO_LEGACY_CONVERSION
1674         "ibm-1047",
1675         "ISO_2022,locale=zh,version=1",
1676 #endif
1677         "SCSU",
1678 #if !UCONFIG_NO_LEGACY_CONVERSION
1679         "HZ",
1680         "lmbcs",
1681         "ISCII,version=0",
1682         "ISO_2022,locale=kr,version=1",
1683         "ISO_2022,locale=jp,version=2",
1684 #endif
1685         "BOCU-1",
1686         "UTF-7",
1687 #if !UCONFIG_NO_LEGACY_CONVERSION
1688         "IMAP-mailbox-name",
1689         "ibm-1047-s390"
1690 #else
1691         "IMAP=mailbox-name"
1692 #endif
1693     };
1694 
1695     /* store the actual sizes of each converter */
1696     int32_t actualSizes[UPRV_LENGTHOF(names)];
1697 
1698     static const int32_t bufferSizes[] = {
1699         U_CNV_SAFECLONE_BUFFERSIZE,
1700         (int32_t)(3*sizeof(UConverter))/2,  /* 1.5*sizeof(UConverter) */
1701         (int32_t)sizeof(UConverter)/2       /* 0.5*sizeof(UConverter) */
1702     };
1703 
1704     char charBuffer[21];   /* Leave at an odd number for alignment testing */
1705     uint8_t buffer[3] [U_CNV_SAFECLONE_BUFFERSIZE];
1706     int32_t bufferSize, maxBufferSize;
1707     const char *maxName;
1708     UConverter * cnv, *cnv2;
1709     UErrorCode err;
1710 
1711     char *pCharBuffer;
1712     const char *pConstCharBuffer;
1713     const char *charBufferLimit = charBuffer + UPRV_LENGTHOF(charBuffer);
1714     UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */
1715     UChar uniCharBuffer[20];
1716     char  charSourceBuffer[] = { 0x1b, 0x24, 0x42 };
1717     const char *pCharSource = charSourceBuffer;
1718     const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer);
1719     UChar *pUCharTarget = uniCharBuffer;
1720     UChar *pUCharTargetLimit = uniCharBuffer + UPRV_LENGTHOF(uniCharBuffer);
1721     const UChar * pUniBuffer;
1722     const UChar *uniBufferLimit = uniBuffer + UPRV_LENGTHOF(uniBuffer);
1723     int32_t idx, j;
1724 
1725     err = U_ZERO_ERROR;
1726     cnv = ucnv_open(names[0], &err);
1727     if(U_SUCCESS(err)) {
1728         /* Check the various error & informational states: */
1729 
1730         /* Null status - just returns NULL */
1731         bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
1732         if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, NULL))
1733         {
1734             log_err("FAIL: Cloned converter failed to deal correctly with null status\n");
1735         }
1736         /* error status - should return 0 & keep error the same */
1737         err = U_MEMORY_ALLOCATION_ERROR;
1738         if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR)
1739         {
1740             log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n");
1741         }
1742         err = U_ZERO_ERROR;
1743 
1744         /* Null buffer size pointer is ok */
1745         if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], NULL, &err)) || U_FAILURE(err))
1746         {
1747             log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n");
1748         }
1749         ucnv_close(cnv2);
1750         err = U_ZERO_ERROR;
1751 
1752         /* buffer size pointer is 0 - fill in pbufferSize with a size */
1753         bufferSize = 0;
1754         if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0)
1755         {
1756             log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n");
1757         }
1758         /* Verify our define is large enough  */
1759         if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize)
1760         {
1761             log_err("FAIL: Pre-calculated buffer size is too small\n");
1762         }
1763         /* Verify we can use this run-time calculated size */
1764         if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err))
1765         {
1766             log_err("FAIL: Converter can't be cloned with run-time size\n");
1767         }
1768         if (cnv2) {
1769             ucnv_close(cnv2);
1770         }
1771 
1772         /* size one byte too small - should allocate & let us know */
1773         --bufferSize;
1774         if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
1775         {
1776             log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n");
1777         }
1778         if (cnv2) {
1779             ucnv_close(cnv2);
1780         }
1781 
1782         err = U_ZERO_ERROR;
1783         bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
1784 
1785         /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */
1786         if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
1787         {
1788             log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n");
1789         }
1790         if (cnv2) {
1791             ucnv_close(cnv2);
1792         }
1793 
1794         err = U_ZERO_ERROR;
1795 
1796         /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
1797         if (NULL != ucnv_safeClone(NULL, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR)
1798         {
1799             log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n");
1800         }
1801 
1802         ucnv_close(cnv);
1803     }
1804 
1805     maxBufferSize = 0;
1806     maxName = "";
1807 
1808     /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/
1809 
1810     for(j = 0; j < UPRV_LENGTHOF(bufferSizes); ++j) {
1811         for (idx = 0; idx < UPRV_LENGTHOF(names); idx++)
1812         {
1813             err = U_ZERO_ERROR;
1814             cnv = ucnv_open(names[idx], &err);
1815             if(U_FAILURE(err)) {
1816                 log_data_err("ucnv_open(\"%s\") failed - %s\n", names[idx], u_errorName(err));
1817                 continue;
1818             }
1819 
1820             if(j == 0) {
1821                 /* preflight to get maxBufferSize */
1822                 actualSizes[idx] = 0;
1823                 ucnv_safeClone(cnv, NULL, &actualSizes[idx], &err);
1824                 if(actualSizes[idx] > maxBufferSize) {
1825                     maxBufferSize = actualSizes[idx];
1826                     maxName = names[idx];
1827                 }
1828             }
1829 
1830             memset(buffer, 0xaa, sizeof(buffer));
1831 
1832             bufferSize = bufferSizes[j];
1833             cnv2 = ucnv_safeClone(cnv, buffer[1], &bufferSize, &err);
1834 
1835             /* close the original immediately to make sure that the clone works by itself */
1836             ucnv_close(cnv);
1837 
1838             if( actualSizes[idx] <= (bufferSizes[j] - (int32_t)alignof(UConverter)) &&
1839                 err == U_SAFECLONE_ALLOCATED_WARNING
1840             ) {
1841                 log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[idx]);
1842             }
1843 
1844             /* check if the clone function overwrote any bytes that it is not supposed to touch */
1845             if(bufferSize <= bufferSizes[j]) {
1846                 /* used the stack buffer */
1847                 if( containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer[0]), 0xaa) ||
1848                     containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa)
1849                 ) {
1850                     log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n",
1851                         names[idx], bufferSize, bufferSizes[j]);
1852                 }
1853             } else {
1854                 /* heap-allocated the clone */
1855                 if(containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)) {
1856                     log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n",
1857                         names[idx], bufferSize, bufferSizes[j]);
1858                 }
1859             }
1860 
1861             pCharBuffer = charBuffer;
1862             pUniBuffer = uniBuffer;
1863 
1864             ucnv_fromUnicode(cnv2,
1865                             &pCharBuffer,
1866                             charBufferLimit,
1867                             &pUniBuffer,
1868                             uniBufferLimit,
1869                             NULL,
1870                             true,
1871                             &err);
1872             if(U_FAILURE(err)){
1873                 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err));
1874             }
1875             ucnv_toUnicode(cnv2,
1876                            &pUCharTarget,
1877                            pUCharTargetLimit,
1878                            &pCharSource,
1879                            pCharSourceLimit,
1880                            NULL,
1881                            true,
1882                            &err
1883                            );
1884 
1885             if(U_FAILURE(err)){
1886                 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err));
1887             }
1888 
1889             pConstCharBuffer = charBuffer;
1890             if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err))
1891             {
1892                 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err));
1893             }
1894             ucnv_close(cnv2);
1895         }
1896     }
1897 
1898     log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu  max preflighted clone size=%d (%s)  U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1899         sizeof(UConverter), maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE);
1900     if(maxBufferSize > U_CNV_SAFECLONE_BUFFERSIZE) {
1901         log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1902             maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE);
1903     }
1904 }
1905 
1906 
TestConvertClone(void)1907 static void TestConvertClone(void)
1908 {
1909     /* one 'regular' & all the 'private stateful' converters */
1910     static const char *const names[] = {
1911 #if !UCONFIG_NO_LEGACY_CONVERSION
1912         "ibm-1047",
1913         "ISO_2022,locale=zh,version=1",
1914 #endif
1915         "SCSU",
1916 #if !UCONFIG_NO_LEGACY_CONVERSION
1917         "HZ",
1918         "lmbcs",
1919         "ISCII,version=0",
1920         "ISO_2022,locale=kr,version=1",
1921         "ISO_2022,locale=jp,version=2",
1922 #endif
1923         "BOCU-1",
1924         "UTF-7",
1925 #if !UCONFIG_NO_LEGACY_CONVERSION
1926         "IMAP-mailbox-name",
1927         "ibm-1047-s390"
1928 #else
1929         "IMAP=mailbox-name"
1930 #endif
1931     };
1932 
1933     char charBuffer[21];   /* Leave at an odd number for alignment testing */
1934     UConverter * cnv, *cnv2;
1935     UErrorCode err;
1936 
1937     char *pCharBuffer;
1938     const char *pConstCharBuffer;
1939     const char *charBufferLimit = charBuffer + UPRV_LENGTHOF(charBuffer);
1940     UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */
1941     UChar uniCharBuffer[20];
1942     char  charSourceBuffer[] = { 0x1b, 0x24, 0x42 };
1943     const char *pCharSource = charSourceBuffer;
1944     const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer);
1945     UChar *pUCharTarget = uniCharBuffer;
1946     UChar *pUCharTargetLimit = uniCharBuffer + UPRV_LENGTHOF(uniCharBuffer);
1947     const UChar * pUniBuffer;
1948     const UChar *uniBufferLimit = uniBuffer + UPRV_LENGTHOF(uniBuffer);
1949     int32_t idx;
1950 
1951     err = U_ZERO_ERROR;
1952     cnv = ucnv_open(names[0], &err);
1953     if(U_SUCCESS(err)) {
1954         /* Check the various error & informational states: */
1955 
1956         /* Null status - just returns NULL */
1957         if (NULL != ucnv_clone(cnv, NULL))
1958         {
1959             log_err("FAIL: Cloned converter failed to deal correctly with null status\n");
1960         }
1961         /* error status - should return 0 & keep error the same */
1962         err = U_MEMORY_ALLOCATION_ERROR;
1963         if (NULL != ucnv_clone(cnv, &err) || err != U_MEMORY_ALLOCATION_ERROR)
1964         {
1965             log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n");
1966         }
1967         err = U_ZERO_ERROR;
1968 
1969         /* Null buffer size pointer is ok */
1970         if (NULL == (cnv2 = ucnv_clone(cnv, &err)) || U_FAILURE(err))
1971         {
1972             log_err("FAIL: Failed to clone.\n");
1973         }
1974         ucnv_close(cnv2);
1975         err = U_ZERO_ERROR;
1976 
1977         /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
1978         if (NULL != ucnv_clone(NULL, &err) || err != U_ILLEGAL_ARGUMENT_ERROR)
1979         {
1980             log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n");
1981         }
1982 
1983         ucnv_close(cnv);
1984     }
1985 
1986     /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/
1987     for (idx = 0; idx < UPRV_LENGTHOF(names); idx++)
1988     {
1989         err = U_ZERO_ERROR;
1990         cnv = ucnv_open(names[idx], &err);
1991         if(U_FAILURE(err)) {
1992             log_data_err("ucnv_open(\"%s\") failed - %s\n", names[idx], u_errorName(err));
1993             continue;
1994         }
1995 
1996         cnv2 = ucnv_clone(cnv, &err);
1997 
1998         /* close the original immediately to make sure that the clone works by itself */
1999         ucnv_close(cnv);
2000 
2001         pCharBuffer = charBuffer;
2002         pUniBuffer = uniBuffer;
2003 
2004         ucnv_fromUnicode(cnv2,
2005                         &pCharBuffer,
2006                         charBufferLimit,
2007                         &pUniBuffer,
2008                         uniBufferLimit,
2009                         NULL,
2010                         true,
2011                         &err);
2012         if(U_FAILURE(err)){
2013             log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err));
2014         }
2015         ucnv_toUnicode(cnv2,
2016                         &pUCharTarget,
2017                         pUCharTargetLimit,
2018                         &pCharSource,
2019                         pCharSourceLimit,
2020                         NULL,
2021                         true,
2022                         &err
2023                         );
2024 
2025         if(U_FAILURE(err)){
2026             log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err));
2027         }
2028 
2029         pConstCharBuffer = charBuffer;
2030         if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err))
2031         {
2032             log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err));
2033         }
2034         ucnv_close(cnv2);
2035     }
2036 }
2037 
TestCCSID(void)2038 static void TestCCSID(void) {
2039 #if !UCONFIG_NO_LEGACY_CONVERSION
2040     UConverter *cnv;
2041     UErrorCode errorCode;
2042     int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 };
2043     int32_t i, ccsid;
2044 
2045     for(i=0; i<UPRV_LENGTHOF(ccsids); ++i) {
2046         ccsid=ccsids[i];
2047 
2048         errorCode=U_ZERO_ERROR;
2049         cnv=ucnv_openCCSID(ccsid, UCNV_IBM, &errorCode);
2050         if(U_FAILURE(errorCode)) {
2051         log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid, u_errorName(errorCode));
2052             continue;
2053         }
2054 
2055         if(ccsid!=ucnv_getCCSID(cnv, &errorCode)) {
2056             log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid, ucnv_getCCSID(cnv, &errorCode));
2057         }
2058 
2059         /* skip gb18030(ccsid 1392) */
2060         if(ccsid != 1392 && UCNV_IBM!=ucnv_getPlatform(cnv, &errorCode)) {
2061             log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid, ucnv_getPlatform(cnv, &errorCode));
2062         }
2063 
2064         ucnv_close(cnv);
2065     }
2066 #endif
2067 }
2068 
2069 /* jitterbug 932: ucnv_convert() bugs --------------------------------------- */
2070 
2071 /* CHUNK_SIZE defined in common\ucnv.c: */
2072 #define CHUNK_SIZE 1024
2073 
2074 static void bug1(void);
2075 static void bug2(void);
2076 static void bug3(void);
2077 
2078 static void
TestJ932(void)2079 TestJ932(void)
2080 {
2081    bug1(); /* Unicode intermediate buffer straddle bug */
2082    bug2(); /* pre-flighting size incorrect caused by simple overflow */
2083    bug3(); /* pre-flighting size incorrect caused by expansion overflow */
2084 }
2085 
2086 /*
2087  * jitterbug 932: test chunking boundary conditions in
2088 
2089     int32_t  ucnv_convert(const char *toConverterName,
2090                           const char *fromConverterName,
2091                           char *target,
2092                           int32_t targetSize,
2093                           const char *source,
2094                           int32_t sourceSize,
2095                           UErrorCode * err)
2096 
2097  * See discussions on the icu mailing list in
2098  * 2001-April with the subject "converter 'flush' question".
2099  *
2100  * Bug report and test code provided by Edward J. Batutis.
2101  */
bug1(void)2102 static void bug1(void)
2103 {
2104 #if !UCONFIG_NO_LEGACY_CONVERSION
2105    char char_in[CHUNK_SIZE+32];
2106    char char_out[CHUNK_SIZE*2];
2107 
2108    /* GB 18030 equivalent of U+10000 is 90308130 */
2109    static const char test_seq[]={ (char)0x90u, 0x30, (char)0x81u, 0x30 };
2110 
2111    UErrorCode err = U_ZERO_ERROR;
2112    int32_t i, test_seq_len = sizeof(test_seq);
2113 
2114    /*
2115     * causes straddle bug in Unicode intermediate buffer by sliding the test sequence forward
2116     * until the straddle bug appears. I didn't want to hard-code everything so this test could
2117     * be expanded - however this is the only type of straddle bug I can think of at the moment -
2118     * a high surrogate in the last position of the Unicode intermediate buffer. Apparently no
2119     * other Unicode sequences cause a bug since combining sequences are not supported by the
2120     * converters.
2121     */
2122 
2123    for (i = test_seq_len; i >= 0; i--) {
2124       /* put character sequence into input buffer */
2125       memset(char_in, 0x61, sizeof(char_in)); /* GB 18030 'a' */
2126       memcpy(char_in + (CHUNK_SIZE - i), test_seq, test_seq_len);
2127 
2128       /* do the conversion */
2129       ucnv_convert("us-ascii", /* out */
2130                    "gb18030",  /* in */
2131                    char_out,
2132                    sizeof(char_out),
2133                    char_in,
2134                    sizeof(char_in),
2135                    &err);
2136 
2137       /* bug1: */
2138       if (err == U_TRUNCATED_CHAR_FOUND) {
2139          /* this happens when surrogate pair straddles the intermediate buffer in
2140           * T_UConverter_fromCodepageToCodepage */
2141          log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n");
2142       }
2143    }
2144 #endif
2145 }
2146 
2147 /* bug2: pre-flighting loop bug: simple overflow causes bug */
bug2(void)2148 static void bug2(void)
2149 {
2150     /* US-ASCII "1234567890" */
2151     static const char source[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 };
2152 #if !UCONFIG_ONLY_HTML_CONVERSION
2153     static const char sourceUTF8[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 };
2154     static const char sourceUTF32[]={ 0x00, 0x00, 0x00, 0x30,
2155                                       0x00, 0x00, 0x00, 0x31,
2156                                       0x00, 0x00, 0x00, 0x32,
2157                                       0x00, 0x00, 0x00, 0x33,
2158                                       0x00, 0x00, 0x00, 0x34,
2159                                       0x00, 0x00, 0x00, 0x35,
2160                                       0x00, 0x00, 0x00, 0x36,
2161                                       0x00, 0x00, 0x00, 0x37,
2162                                       0x00, 0x00, 0x00, 0x38,
2163                                       0x00, 0x00, (char)0xf0, 0x00};
2164 #endif
2165 
2166     static char target[5];
2167 
2168     UErrorCode err = U_ZERO_ERROR;
2169     int32_t size;
2170 
2171     /* do the conversion */
2172     size = ucnv_convert("iso-8859-1", /* out */
2173                         "us-ascii",  /* in */
2174                         target,
2175                         sizeof(target),
2176                         source,
2177                         sizeof(source),
2178                         &err);
2179 
2180     if ( size != 10 ) {
2181         /* bug2: size is 5, should be 10 */
2182         log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size);
2183     }
2184 
2185 #if !UCONFIG_ONLY_HTML_CONVERSION
2186     err = U_ZERO_ERROR;
2187     /* do the conversion */
2188     size = ucnv_convert("UTF-32BE", /* out */
2189                         "UTF-8",  /* in */
2190                         target,
2191                         sizeof(target),
2192                         sourceUTF8,
2193                         sizeof(sourceUTF8),
2194                         &err);
2195 
2196     if ( size != 32 ) {
2197         /* bug2: size is 5, should be 32 */
2198         log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d instead of 32\n", size);
2199     }
2200 
2201     err = U_ZERO_ERROR;
2202     /* do the conversion */
2203     size = ucnv_convert("UTF-8", /* out */
2204                         "UTF-32BE",  /* in */
2205                         target,
2206                         sizeof(target),
2207                         sourceUTF32,
2208                         sizeof(sourceUTF32),
2209                         &err);
2210 
2211     if ( size != 12 ) {
2212         /* bug2: size is 5, should be 12 */
2213         log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size);
2214     }
2215 #endif
2216 }
2217 
2218 /*
2219  * bug3: when the characters expand going from source to target codepage
2220  *       you get bug3 in addition to bug2
2221  */
bug3(void)2222 static void bug3(void)
2223 {
2224 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
2225     char char_in[CHUNK_SIZE*4];
2226     char target[5];
2227     UErrorCode err = U_ZERO_ERROR;
2228     int32_t size;
2229 
2230     /*
2231      * first get the buggy size from bug2 then
2232      * compare it to buggy size with an expansion
2233      */
2234     memset(char_in, 0x61, sizeof(char_in)); /* US-ASCII 'a' */
2235 
2236     /* do the conversion */
2237     size = ucnv_convert("lmbcs",     /* out */
2238                         "us-ascii",  /* in */
2239                         target,
2240                         sizeof(target),
2241                         char_in,
2242                         sizeof(char_in),
2243                         &err);
2244 
2245     if ( size != sizeof(char_in) ) {
2246         /*
2247          * bug2: size is 0x2805 (CHUNK_SIZE*2+5 - maybe 5 is the size of the overflow buffer
2248          * in the converter?), should be CHUNK_SIZE*4
2249          *
2250          * Markus 2001-05-18: 5 is the size of our target[] here, ucnv_convert() did not reset targetSize...
2251          */
2252         log_data_err("error j932 bug 2/3a: expected preflighting size 0x%04x, got 0x%04x\n", sizeof(char_in), size);
2253     }
2254 
2255     /*
2256      * now do the conversion with expansion
2257      * ascii 0x08 expands to 0x0F 0x28 in lmbcs
2258      */
2259     memset(char_in, 8, sizeof(char_in));
2260     err = U_ZERO_ERROR;
2261 
2262     /* do the conversion */
2263     size = ucnv_convert("lmbcs", /* out */
2264                         "us-ascii",  /* in */
2265                         target,
2266                         sizeof(target),
2267                         char_in,
2268                         sizeof(char_in),
2269                         &err);
2270 
2271     /* expect 2X expansion */
2272     if ( size != sizeof(char_in) * 2 ) {
2273         /*
2274          * bug3:
2275          * bug2 would lead us to expect 0x2805, but it isn't that either, it is 0x3c05:
2276          */
2277         log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in) * 2, size);
2278     }
2279 #endif
2280 }
2281 
2282 static void
convertExStreaming(UConverter * srcCnv,UConverter * targetCnv,const char * src,int32_t srcLength,const char * expectTarget,int32_t expectTargetLength,int32_t chunkSize,const char * testName,UErrorCode expectCode)2283 convertExStreaming(UConverter *srcCnv, UConverter *targetCnv,
2284                    const char *src, int32_t srcLength,
2285                    const char *expectTarget, int32_t expectTargetLength,
2286                    int32_t chunkSize,
2287                    const char *testName,
2288                    UErrorCode expectCode) {
2289     UChar pivotBuffer[CHUNK_SIZE];
2290     UChar *pivotSource, *pivotTarget;
2291     const UChar *pivotLimit;
2292 
2293     char targetBuffer[CHUNK_SIZE];
2294     char *target;
2295     const char *srcLimit, *finalSrcLimit, *targetLimit;
2296 
2297     int32_t targetLength;
2298 
2299     UBool flush;
2300 
2301     UErrorCode errorCode;
2302 
2303     /* setup */
2304     if(chunkSize>CHUNK_SIZE) {
2305         chunkSize=CHUNK_SIZE;
2306     }
2307 
2308     pivotSource=pivotTarget=pivotBuffer;
2309     pivotLimit=pivotBuffer+chunkSize;
2310 
2311     finalSrcLimit=src+srcLength;
2312     target=targetBuffer;
2313     targetLimit=targetBuffer+chunkSize;
2314 
2315     ucnv_resetToUnicode(srcCnv);
2316     ucnv_resetFromUnicode(targetCnv);
2317 
2318     errorCode=U_ZERO_ERROR;
2319     flush=false;
2320 
2321     /* convert, streaming-style (both converters and pivot keep state) */
2322     for(;;) {
2323         /* for testing, give ucnv_convertEx() at most <chunkSize> input/pivot/output units at a time */
2324         if(src+chunkSize<=finalSrcLimit) {
2325             srcLimit=src+chunkSize;
2326         } else {
2327             srcLimit=finalSrcLimit;
2328         }
2329         ucnv_convertEx(targetCnv, srcCnv,
2330                        &target, targetLimit,
2331                        &src, srcLimit,
2332                        pivotBuffer, &pivotSource, &pivotTarget, pivotLimit,
2333                        false, flush, &errorCode);
2334         targetLength=(int32_t)(target-targetBuffer);
2335         if(target>targetLimit) {
2336             log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n",
2337                     testName, chunkSize, target, targetLimit);
2338             break; /* TODO: major problem! */
2339         }
2340         if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
2341             /* continue converting another chunk */
2342             errorCode=U_ZERO_ERROR;
2343             if(targetLength+chunkSize<=(int32_t)sizeof(targetBuffer)) {
2344                 targetLimit=target+chunkSize;
2345             } else {
2346                 targetLimit=targetBuffer+(int32_t)sizeof(targetBuffer);
2347             }
2348         } else if(U_FAILURE(errorCode)) {
2349             /* failure */
2350             break;
2351         } else if(flush) {
2352             /* all done */
2353             break;
2354         } else if(src==finalSrcLimit && pivotSource==pivotTarget) {
2355             /* all consumed, now flush without input (separate from conversion for testing) */
2356             flush=true;
2357         }
2358     }
2359 
2360     if(!(errorCode==expectCode || (expectCode==U_ZERO_ERROR && errorCode==U_STRING_NOT_TERMINATED_WARNING))) {
2361         log_err("ucnv_convertEx(%s) chunk[%d] results in %s instead of %s\n",
2362                 testName, chunkSize, u_errorName(errorCode), u_errorName(expectCode));
2363     } else if(targetLength!=expectTargetLength) {
2364         log_err("ucnv_convertEx(%s) chunk[%d] writes %d bytes instead of %d\n",
2365                 testName, chunkSize, targetLength, expectTargetLength);
2366     } else if(memcmp(targetBuffer, expectTarget, targetLength)!=0) {
2367         log_err("ucnv_convertEx(%s) chunk[%d] writes different bytes than expected\n",
2368                 testName, chunkSize);
2369     }
2370 }
2371 
2372 static void
convertExMultiStreaming(UConverter * srcCnv,UConverter * targetCnv,const char * src,int32_t srcLength,const char * expectTarget,int32_t expectTargetLength,const char * testName,UErrorCode expectCode)2373 convertExMultiStreaming(UConverter *srcCnv, UConverter *targetCnv,
2374                         const char *src, int32_t srcLength,
2375                         const char *expectTarget, int32_t expectTargetLength,
2376                         const char *testName,
2377                         UErrorCode expectCode) {
2378     convertExStreaming(srcCnv, targetCnv,
2379                        src, srcLength,
2380                        expectTarget, expectTargetLength,
2381                        1, testName, expectCode);
2382     convertExStreaming(srcCnv, targetCnv,
2383                        src, srcLength,
2384                        expectTarget, expectTargetLength,
2385                        3, testName, expectCode);
2386     convertExStreaming(srcCnv, targetCnv,
2387                        src, srcLength,
2388                        expectTarget, expectTargetLength,
2389                        7, testName, expectCode);
2390 }
2391 
TestConvertEx(void)2392 static void TestConvertEx(void) {
2393 #if !UCONFIG_NO_LEGACY_CONVERSION
2394     static const uint8_t
2395     utf8[]={
2396         /* 4e00           30a1              ff61              0410 */
2397         0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2398     },
2399     shiftJIS[]={
2400         0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2401     },
2402     errorTarget[]={
2403         /*
2404          * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2405          * SUB, SUB, 0x40, SUB, SUB, 0x40
2406          */
2407         0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40
2408     };
2409 
2410     char srcBuffer[100], targetBuffer[100];
2411 
2412     const char *src;
2413     char *target;
2414 
2415     UChar pivotBuffer[100];
2416     UChar *pivotSource, *pivotTarget;
2417 
2418     UConverter *cnv1, *cnv2;
2419     UErrorCode errorCode;
2420 
2421     errorCode=U_ZERO_ERROR;
2422     cnv1=ucnv_open("UTF-8", &errorCode);
2423     if(U_FAILURE(errorCode)) {
2424         log_err("unable to open a UTF-8 converter - %s\n", u_errorName(errorCode));
2425         return;
2426     }
2427 
2428     cnv2=ucnv_open("Shift-JIS", &errorCode);
2429     if(U_FAILURE(errorCode)) {
2430         log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode));
2431         ucnv_close(cnv1);
2432         return;
2433     }
2434 
2435     /* test ucnv_convertEx() with streaming conversion style */
2436     convertExMultiStreaming(cnv1, cnv2,
2437         (const char *)utf8, sizeof(utf8), (const char *)shiftJIS, sizeof(shiftJIS),
2438         "UTF-8 -> Shift-JIS", U_ZERO_ERROR);
2439 
2440     convertExMultiStreaming(cnv2, cnv1,
2441         (const char *)shiftJIS, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8),
2442         "Shift-JIS -> UTF-8", U_ZERO_ERROR);
2443 
2444     /* U_ZERO_ERROR because by default the SUB callbacks are set */
2445     convertExMultiStreaming(cnv1, cnv2,
2446         (const char *)shiftJIS, sizeof(shiftJIS), (const char *)errorTarget, sizeof(errorTarget),
2447         "shiftJIS[] UTF-8 -> Shift-JIS", U_ZERO_ERROR);
2448 
2449     /* test some simple conversions */
2450 
2451     /* NUL-terminated source and target */
2452     errorCode=U_STRING_NOT_TERMINATED_WARNING;
2453     memcpy(srcBuffer, utf8, sizeof(utf8));
2454     srcBuffer[sizeof(utf8)]=0;
2455     src=srcBuffer;
2456     target=targetBuffer;
2457     ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2458                    NULL, NULL, NULL, NULL, true, true, &errorCode);
2459     if( errorCode!=U_ZERO_ERROR ||
2460         target-targetBuffer!=sizeof(shiftJIS) ||
2461         *target!=0 ||
2462         memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0
2463     ) {
2464         log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s - writes %d bytes, expect %d\n",
2465                 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS));
2466     }
2467 
2468     /* NUL-terminated source and U_STRING_NOT_TERMINATED_WARNING */
2469     errorCode=U_AMBIGUOUS_ALIAS_WARNING;
2470     memset(targetBuffer, 0xff, sizeof(targetBuffer));
2471     src=srcBuffer;
2472     target=targetBuffer;
2473     ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(shiftJIS), &src, NULL,
2474                    NULL, NULL, NULL, NULL, true, true, &errorCode);
2475     if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2476         target-targetBuffer!=sizeof(shiftJIS) ||
2477         *target!=(char)0xff ||
2478         memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0
2479     ) {
2480         log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s, expect U_STRING_NOT_TERMINATED_WARNING - writes %d bytes, expect %d\n",
2481                 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS));
2482     }
2483 
2484     /* bad arguments */
2485     errorCode=U_MESSAGE_PARSE_ERROR;
2486     src=srcBuffer;
2487     target=targetBuffer;
2488     ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2489                    NULL, NULL, NULL, NULL, true, true, &errorCode);
2490     if(errorCode!=U_MESSAGE_PARSE_ERROR) {
2491         log_err("ucnv_convertEx(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode));
2492     }
2493 
2494     /* pivotLimit==pivotStart */
2495     errorCode=U_ZERO_ERROR;
2496     pivotSource=pivotTarget=pivotBuffer;
2497     ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2498                    pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer, true, true, &errorCode);
2499     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2500         log_err("ucnv_convertEx(pivotLimit==pivotStart) sets %s\n", u_errorName(errorCode));
2501     }
2502 
2503     /* *pivotSource==NULL */
2504     errorCode=U_ZERO_ERROR;
2505     pivotSource=NULL;
2506     ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2507                    pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, true, true, &errorCode);
2508     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2509         log_err("ucnv_convertEx(*pivotSource==NULL) sets %s\n", u_errorName(errorCode));
2510     }
2511 
2512     /* *source==NULL */
2513     errorCode=U_ZERO_ERROR;
2514     src=NULL;
2515     pivotSource=pivotBuffer;
2516     ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2517                    pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, true, true, &errorCode);
2518     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2519         log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode));
2520     }
2521 
2522     /* streaming conversion without a pivot buffer */
2523     errorCode=U_ZERO_ERROR;
2524     src=srcBuffer;
2525     pivotSource=pivotBuffer;
2526     ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2527                    NULL, &pivotSource, &pivotTarget, pivotBuffer+1, true, false, &errorCode);
2528     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2529         log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode));
2530     }
2531 
2532     ucnv_close(cnv1);
2533     ucnv_close(cnv2);
2534 #endif
2535 }
2536 
2537 /* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */
2538 static const char *const badUTF8[]={
2539     /* trail byte */
2540     "\x80",
2541 
2542     /* truncated multi-byte sequences */
2543     "\xd0",
2544     "\xe0",
2545     "\xe1",
2546     "\xed",
2547     "\xee",
2548     "\xf0",
2549     "\xf1",
2550     "\xf4",
2551     "\xf8",
2552     "\xfc",
2553 
2554     "\xe0\x80",
2555     "\xe0\xa0",
2556     "\xe1\x80",
2557     "\xed\x80",
2558     "\xed\xa0",
2559     "\xee\x80",
2560     "\xf0\x80",
2561     "\xf0\x90",
2562     "\xf1\x80",
2563     "\xf4\x80",
2564     "\xf4\x90",
2565     "\xf8\x80",
2566     "\xfc\x80",
2567 
2568     "\xf0\x80\x80",
2569     "\xf0\x90\x80",
2570     "\xf1\x80\x80",
2571     "\xf4\x80\x80",
2572     "\xf4\x90\x80",
2573     "\xf8\x80\x80",
2574     "\xfc\x80\x80",
2575 
2576     "\xf8\x80\x80\x80",
2577     "\xfc\x80\x80\x80",
2578 
2579     "\xfc\x80\x80\x80\x80",
2580 
2581     /* complete sequences but non-shortest forms or out of range etc. */
2582     "\xc0\x80",
2583     "\xe0\x80\x80",
2584     "\xed\xa0\x80",
2585     "\xf0\x80\x80\x80",
2586     "\xf4\x90\x80\x80",
2587     "\xf8\x80\x80\x80\x80",
2588     "\xfc\x80\x80\x80\x80\x80",
2589     "\xfe",
2590     "\xff"
2591 };
2592 
2593 #define ARG_CHAR_ARR_SIZE 8
2594 
2595 /* get some character that can be converted and convert it */
getTestChar(UConverter * cnv,const char * converterName,char charUTF8[4],int32_t * pCharUTF8Length,char char0[ARG_CHAR_ARR_SIZE],int32_t * pChar0Length,char char1[ARG_CHAR_ARR_SIZE],int32_t * pChar1Length)2596 static UBool getTestChar(UConverter *cnv, const char *converterName,
2597                          char charUTF8[4], int32_t *pCharUTF8Length,
2598                          char char0[ARG_CHAR_ARR_SIZE], int32_t *pChar0Length,
2599                          char char1[ARG_CHAR_ARR_SIZE], int32_t *pChar1Length) {
2600     UChar utf16[U16_MAX_LENGTH];
2601     int32_t utf16Length;
2602 
2603     const UChar *utf16Source;
2604     char *target;
2605 
2606     USet *set;
2607     UChar32 c;
2608     UErrorCode errorCode;
2609 
2610     errorCode=U_ZERO_ERROR;
2611     set=uset_open(1, 0);
2612     ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
2613     c=uset_charAt(set, uset_size(set)/2);
2614     uset_close(set);
2615 
2616     utf16Length=0;
2617     U16_APPEND_UNSAFE(utf16, utf16Length, c);
2618     *pCharUTF8Length=0;
2619     U8_APPEND_UNSAFE(charUTF8, *pCharUTF8Length, c);
2620 
2621     utf16Source=utf16;
2622     target=char0;
2623     ucnv_fromUnicode(cnv,
2624                      &target, char0+ARG_CHAR_ARR_SIZE,
2625                      &utf16Source, utf16+utf16Length,
2626                      NULL, false, &errorCode);
2627     *pChar0Length=(int32_t)(target-char0);
2628 
2629     utf16Source=utf16;
2630     target=char1;
2631     ucnv_fromUnicode(cnv,
2632                      &target, char1+ARG_CHAR_ARR_SIZE,
2633                      &utf16Source, utf16+utf16Length,
2634                      NULL, false, &errorCode);
2635     *pChar1Length=(int32_t)(target-char1);
2636 
2637     if(U_FAILURE(errorCode)) {
2638         log_err("unable to get test character for %s - %s\n", converterName, u_errorName(errorCode));
2639         return false;
2640     }
2641     return true;
2642 }
2643 
isOneTruncatedUTF8(const char * s,int32_t length)2644 static UBool isOneTruncatedUTF8(const char *s, int32_t length) {
2645     if(length==0) {
2646         return false;
2647     } else if(length==1) {
2648         return U8_IS_LEAD(s[0]);
2649     } else {
2650         int32_t count=U8_COUNT_TRAIL_BYTES(s[0]);
2651         if(length<=count) {
2652             // 2 or more bytes, but fewer than the lead byte indicates.
2653             int32_t oneLength=0;
2654             U8_FWD_1(s, oneLength, length);
2655             // Truncated if we reach the end of the string.
2656             // Not true if the lead byte and first trail byte do not start a valid sequence,
2657             // e.g., E0 80 -> oneLength=1.
2658             return oneLength==length;
2659         }
2660         return false;
2661     }
2662 }
2663 
testFromTruncatedUTF8(UConverter * utf8Cnv,UConverter * cnv,const char * converterName,char charUTF8[4],int32_t charUTF8Length,char char0[8],int32_t char0Length,char char1[8],int32_t char1Length)2664 static void testFromTruncatedUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName,
2665                                   char charUTF8[4], int32_t charUTF8Length,
2666                                   char char0[8], int32_t char0Length,
2667                                   char char1[8], int32_t char1Length) {
2668     // suppress compiler warnings about unused variables
2669     (void)char0;
2670     (void)char0Length;
2671     (void)char1;
2672     (void)char1Length;
2673 
2674     char utf8[16];
2675     int32_t utf8Length;
2676 
2677     char output[16];
2678     int32_t outputLength;
2679 
2680     char invalidChars[8];
2681     int8_t invalidLength;
2682 
2683     const char *source;
2684     char *target;
2685 
2686     UChar pivotBuffer[8];
2687     UChar *pivotSource, *pivotTarget;
2688 
2689     UErrorCode errorCode;
2690     int32_t i;
2691 
2692     /* test truncated sequences */
2693     errorCode=U_ZERO_ERROR;
2694     ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2695 
2696     memcpy(utf8, charUTF8, charUTF8Length);
2697 
2698     for(i=0; i<UPRV_LENGTHOF(badUTF8); ++i) {
2699         /* truncated sequence? */
2700         int32_t length = (int32_t)strlen(badUTF8[i]);
2701         if(!isOneTruncatedUTF8(badUTF8[i], length)) {
2702             continue;
2703         }
2704 
2705         /* assemble a string with the test character and the truncated sequence */
2706         memcpy(utf8+charUTF8Length, badUTF8[i], length);
2707         utf8Length=charUTF8Length+length;
2708 
2709         /* convert and check the invalidChars */
2710         source=utf8;
2711         target=output;
2712         pivotSource=pivotTarget=pivotBuffer;
2713         errorCode=U_ZERO_ERROR;
2714         ucnv_convertEx(cnv, utf8Cnv,
2715                        &target, output+sizeof(output),
2716                        &source, utf8+utf8Length,
2717                        pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+UPRV_LENGTHOF(pivotBuffer),
2718                        true, true, /* reset & flush */
2719                        &errorCode);
2720         outputLength=(int32_t)(target-output);
2721         (void)outputLength;   /* Suppress set but not used warning. */
2722         if(errorCode!=U_TRUNCATED_CHAR_FOUND || pivotSource!=pivotBuffer) {
2723             log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode), converterName, (long)i);
2724             continue;
2725         }
2726 
2727         errorCode=U_ZERO_ERROR;
2728         invalidLength=(int8_t)sizeof(invalidChars);
2729         ucnv_getInvalidChars(utf8Cnv, invalidChars, &invalidLength, &errorCode);
2730         if(invalidLength!=length || 0!=memcmp(invalidChars, badUTF8[i], length)) {
2731             log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName, (long)i);
2732         }
2733     }
2734 }
2735 
testFromBadUTF8(UConverter * utf8Cnv,UConverter * cnv,const char * converterName,char charUTF8[4],int32_t charUTF8Length,char char0[8],int32_t char0Length,char char1[8],int32_t char1Length)2736 static void testFromBadUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName,
2737                             char charUTF8[4], int32_t charUTF8Length,
2738                             char char0[8], int32_t char0Length,
2739                             char char1[8], int32_t char1Length) {
2740     char utf8[600], expect[600];
2741     int32_t utf8Length, expectLength;
2742 
2743     char testName[32];
2744 
2745     UErrorCode errorCode;
2746     int32_t i;
2747 
2748     errorCode=U_ZERO_ERROR;
2749     ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, NULL, &errorCode);
2750 
2751     /*
2752      * assemble an input string with the test character between each
2753      * bad sequence,
2754      * and an expected string with repeated test character output
2755      */
2756     memcpy(utf8, charUTF8, charUTF8Length);
2757     utf8Length=charUTF8Length;
2758 
2759     memcpy(expect, char0, char0Length);
2760     expectLength=char0Length;
2761 
2762     for(i=0; i<UPRV_LENGTHOF(badUTF8); ++i) {
2763         int32_t length = (int32_t)strlen(badUTF8[i]);
2764         memcpy(utf8+utf8Length, badUTF8[i], length);
2765         utf8Length+=length;
2766 
2767         memcpy(utf8+utf8Length, charUTF8, charUTF8Length);
2768         utf8Length+=charUTF8Length;
2769 
2770         memcpy(expect+expectLength, char1, char1Length);
2771         expectLength+=char1Length;
2772     }
2773 
2774     /* expect that each bad UTF-8 sequence is detected and skipped */
2775     strcpy(testName, "from bad UTF-8 to ");
2776     strcat(testName, converterName);
2777 
2778     convertExMultiStreaming(utf8Cnv, cnv,
2779                             utf8, utf8Length,
2780                             expect, expectLength,
2781                             testName,
2782                             U_ZERO_ERROR);
2783 }
2784 
2785 /* Test illegal UTF-8 input. */
TestConvertExFromUTF8(void)2786 static void TestConvertExFromUTF8(void) {
2787     static const char *const converterNames[]={
2788 #if !UCONFIG_NO_LEGACY_CONVERSION
2789         "windows-1252",
2790         "shift-jis",
2791 #endif
2792         "us-ascii",
2793         "iso-8859-1",
2794         "utf-8"
2795     };
2796 
2797     UConverter *utf8Cnv, *cnv;
2798     UErrorCode errorCode;
2799     int32_t i;
2800 
2801     /* fromUnicode versions of some character, from initial state and later */
2802     char charUTF8[4], char0[8], char1[8];
2803     int32_t charUTF8Length, char0Length, char1Length;
2804 
2805     errorCode=U_ZERO_ERROR;
2806     utf8Cnv=ucnv_open("UTF-8", &errorCode);
2807     if(U_FAILURE(errorCode)) {
2808         log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode));
2809         return;
2810     }
2811 
2812     for(i=0; i<UPRV_LENGTHOF(converterNames); ++i) {
2813         errorCode=U_ZERO_ERROR;
2814         cnv=ucnv_open(converterNames[i], &errorCode);
2815         if(U_FAILURE(errorCode)) {
2816             log_data_err("unable to open %s converter - %s\n", converterNames[i], u_errorName(errorCode));
2817             continue;
2818         }
2819         if(!getTestChar(cnv, converterNames[i], charUTF8, &charUTF8Length, char0, &char0Length, char1, &char1Length)) {
2820             continue;
2821         }
2822         testFromTruncatedUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length);
2823         testFromBadUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length);
2824         ucnv_close(cnv);
2825     }
2826     ucnv_close(utf8Cnv);
2827 }
2828 
TestConvertExFromUTF8_C5F0(void)2829 static void TestConvertExFromUTF8_C5F0(void) {
2830     static const char *const converterNames[]={
2831 #if !UCONFIG_NO_LEGACY_CONVERSION
2832         "windows-1251",
2833         "shift-jis",
2834 #endif
2835         "us-ascii",
2836         "iso-8859-1",
2837         "utf-8"
2838     };
2839 
2840     UConverter *utf8Cnv, *cnv;
2841     UErrorCode errorCode;
2842     int32_t i;
2843 
2844     static const char bad_utf8[2]={ (char)0xC5, (char)0xF0 };
2845     /* Expect "&#65533;&#65533;" (2x U+FFFD as decimal NCRs) */
2846     static const char twoNCRs[16]={
2847         0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B,
2848         0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B
2849     };
2850     static const char twoFFFD[6]={
2851         (char)0xef, (char)0xbf, (char)0xbd,
2852         (char)0xef, (char)0xbf, (char)0xbd
2853     };
2854     const char *expected;
2855     int32_t expectedLength;
2856     char dest[20];  /* longer than longest expectedLength */
2857 
2858     const char *src;
2859     char *target;
2860 
2861     UChar pivotBuffer[128];
2862     UChar *pivotSource, *pivotTarget;
2863 
2864     errorCode=U_ZERO_ERROR;
2865     utf8Cnv=ucnv_open("UTF-8", &errorCode);
2866     if(U_FAILURE(errorCode)) {
2867         log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode));
2868         return;
2869     }
2870 
2871     for(i=0; i<UPRV_LENGTHOF(converterNames); ++i) {
2872         errorCode=U_ZERO_ERROR;
2873         cnv=ucnv_open(converterNames[i], &errorCode);
2874         ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
2875                               NULL, NULL, &errorCode);
2876         if(U_FAILURE(errorCode)) {
2877             log_data_err("unable to open %s converter - %s\n",
2878                          converterNames[i], u_errorName(errorCode));
2879             continue;
2880         }
2881         src=bad_utf8;
2882         target=dest;
2883         uprv_memset(dest, 9, sizeof(dest));
2884         if(i==UPRV_LENGTHOF(converterNames)-1) {
2885             /* conversion to UTF-8 yields two U+FFFD directly */
2886             expected=twoFFFD;
2887             expectedLength=6;
2888         } else {
2889             /* conversion to a non-Unicode charset yields two NCRs */
2890             expected=twoNCRs;
2891             expectedLength=16;
2892         }
2893         pivotBuffer[0]=0;
2894         pivotBuffer[1]=1;
2895         pivotBuffer[2]=2;
2896         pivotSource=pivotTarget=pivotBuffer;
2897         ucnv_convertEx(
2898             cnv, utf8Cnv,
2899             &target, dest+expectedLength,
2900             &src, bad_utf8+sizeof(bad_utf8),
2901             pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+UPRV_LENGTHOF(pivotBuffer),
2902             true, true, &errorCode);
2903         if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || src!=bad_utf8+2 ||
2904             target!=dest+expectedLength || 0!=uprv_memcmp(dest, expected, expectedLength) ||
2905             dest[expectedLength]!=9
2906         ) {
2907             log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames[i]);
2908         }
2909         ucnv_close(cnv);
2910     }
2911     ucnv_close(utf8Cnv);
2912 }
2913 
2914 static void
TestConvertAlgorithmic(void)2915 TestConvertAlgorithmic(void) {
2916 #if !UCONFIG_NO_LEGACY_CONVERSION
2917     static const uint8_t
2918     utf8[]={
2919         /* 4e00           30a1              ff61              0410 */
2920         0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2921     },
2922     shiftJIS[]={
2923         0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2924     },
2925   /*errorTarget[]={*/
2926         /*
2927          * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2928          * SUB, SUB, 0x40, SUB, SUB, 0x40
2929          */
2930   /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/
2931   /*},*/
2932     utf16[]={
2933         0xfe, 0xff /* BOM only, no text */
2934     };
2935 #if !UCONFIG_ONLY_HTML_CONVERSION
2936     static const uint8_t utf32[]={
2937         0xff, 0xfe, 0, 0 /* BOM only, no text */
2938     };
2939 #endif
2940 
2941     char target[100], utf8NUL[100], shiftJISNUL[100];
2942 
2943     UConverter *cnv;
2944     UErrorCode errorCode;
2945 
2946     int32_t length;
2947 
2948     errorCode=U_ZERO_ERROR;
2949     cnv=ucnv_open("Shift-JIS", &errorCode);
2950     if(U_FAILURE(errorCode)) {
2951         log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode));
2952         ucnv_close(cnv);
2953         return;
2954     }
2955 
2956     memcpy(utf8NUL, utf8, sizeof(utf8));
2957     utf8NUL[sizeof(utf8)]=0;
2958     memcpy(shiftJISNUL, shiftJIS, sizeof(shiftJIS));
2959     shiftJISNUL[sizeof(shiftJIS)]=0;
2960 
2961     /*
2962      * The to/from algorithmic convenience functions share a common implementation,
2963      * so we need not test all permutations of them.
2964      */
2965 
2966     /* length in, not terminated out */
2967     errorCode=U_ZERO_ERROR;
2968     length=ucnv_fromAlgorithmic(cnv, UCNV_UTF8, target, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), &errorCode);
2969     if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2970         length!=sizeof(shiftJIS) ||
2971         memcmp(target, shiftJIS, length)!=0
2972     ) {
2973         log_err("ucnv_fromAlgorithmic(UTF-8 -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect %d\n",
2974                 u_errorName(errorCode), length, sizeof(shiftJIS));
2975     }
2976 
2977     /* terminated in and out */
2978     memset(target, 0x55, sizeof(target));
2979     errorCode=U_STRING_NOT_TERMINATED_WARNING;
2980     length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, -1, &errorCode);
2981     if( errorCode!=U_ZERO_ERROR ||
2982         length!=sizeof(utf8) ||
2983         memcmp(target, utf8, length)!=0
2984     ) {
2985         log_err("ucnv_toAlgorithmic(Shift-JIS -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect %d\n",
2986                 u_errorName(errorCode), length, sizeof(shiftJIS));
2987     }
2988 
2989     /* empty string, some target buffer */
2990     errorCode=U_STRING_NOT_TERMINATED_WARNING;
2991     length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, 0, &errorCode);
2992     if( errorCode!=U_ZERO_ERROR ||
2993         length!=0
2994     ) {
2995         log_err("ucnv_toAlgorithmic(empty string -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect 0\n",
2996                 u_errorName(errorCode), length);
2997     }
2998 
2999     /* pseudo-empty string, no target buffer */
3000     errorCode=U_ZERO_ERROR;
3001     length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode);
3002     if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
3003         length!=0
3004     ) {
3005         log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
3006                 u_errorName(errorCode), length);
3007     }
3008 
3009 #if !UCONFIG_ONLY_HTML_CONVERSION
3010     errorCode=U_ZERO_ERROR;
3011     length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32, 4, &errorCode);
3012     if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
3013         length!=0
3014     ) {
3015         log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
3016                 u_errorName(errorCode), length);
3017     }
3018 #endif
3019 
3020     /* bad arguments */
3021     errorCode=U_MESSAGE_PARSE_ERROR;
3022     length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode);
3023     if(errorCode!=U_MESSAGE_PARSE_ERROR) {
3024         log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode));
3025     }
3026 
3027     /* source==NULL */
3028     errorCode=U_ZERO_ERROR;
3029     length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, NULL, 2, &errorCode);
3030     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
3031         log_err("ucnv_fromAlgorithmic(source==NULL) sets %s\n", u_errorName(errorCode));
3032     }
3033 
3034     /* illegal alg. type */
3035     errorCode=U_ZERO_ERROR;
3036     length=ucnv_fromAlgorithmic(cnv, (UConverterType)99, target, 0, (const char *)utf16, 2, &errorCode);
3037     if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
3038         log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode));
3039     }
3040 ucnv_close(cnv);
3041 #endif
3042 }
3043 
3044 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
TestLMBCSMaxChar(void)3045 static void TestLMBCSMaxChar(void) {
3046     static const struct {
3047         int8_t maxSize;
3048         const char *name;
3049     } converter[] = {
3050         /* some non-LMBCS converters - perfect test setup here */
3051         { 1, "US-ASCII"},
3052         { 1, "ISO-8859-1"},
3053 
3054         { 2, "UTF-16"},
3055         { 2, "UTF-16BE"},
3056         { 3, "UTF-8"},
3057         { 3, "CESU-8"},
3058         { 3, "SCSU"},
3059         { 4, "UTF-32"},
3060         { 4, "UTF-7"},
3061         { 4, "IMAP-mailbox-name"},
3062         { 4, "BOCU-1"},
3063 
3064         { 1, "windows-1256"},
3065         { 2, "Shift-JIS"},
3066         { 2, "ibm-16684"},
3067         { 3, "ibm-930"},
3068         { 3, "ibm-1390"},
3069         { 4, "*test3"},
3070         { 16,"*test4"},
3071 
3072         { 4, "ISCII"},
3073         { 4, "HZ"},
3074 
3075         { 3, "ISO-2022"},
3076         { 8, "ISO-2022-KR"},
3077         { 6, "ISO-2022-JP"},
3078         { 8, "ISO-2022-CN"},
3079 
3080         /* LMBCS */
3081         { 3, "LMBCS-1"},
3082         { 3, "LMBCS-2"},
3083         { 3, "LMBCS-3"},
3084         { 3, "LMBCS-4"},
3085         { 3, "LMBCS-5"},
3086         { 3, "LMBCS-6"},
3087         { 3, "LMBCS-8"},
3088         { 3, "LMBCS-11"},
3089         { 3, "LMBCS-16"},
3090         { 3, "LMBCS-17"},
3091         { 3, "LMBCS-18"},
3092         { 3, "LMBCS-19"}
3093     };
3094     int32_t idx;
3095 
3096     for (idx = 0; idx < UPRV_LENGTHOF(converter); idx++) {
3097         UErrorCode status = U_ZERO_ERROR;
3098         UConverter *cnv = cnv_open(converter[idx].name, &status);
3099         if (U_FAILURE(status)) {
3100             continue;
3101         }
3102         if (converter[idx].maxSize != ucnv_getMaxCharSize(cnv)) {
3103             log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n",
3104                 converter[idx].name, converter[idx].maxSize, ucnv_getMaxCharSize(cnv));
3105         }
3106         ucnv_close(cnv);
3107     }
3108 
3109     /* mostly test that the macro compiles */
3110     if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) {
3111         log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n");
3112     }
3113 }
3114 #endif
3115 
TestJ1968(void)3116 static void TestJ1968(void) {
3117     UErrorCode err = U_ZERO_ERROR;
3118     UConverter *cnv;
3119     char myConvName[] = "My really really really really really really really really really really really"
3120                           " really really really really really really really really really really really"
3121                           " really really really really really really really really long converter name";
3122     UChar myConvNameU[sizeof(myConvName)];
3123 
3124     u_charsToUChars(myConvName, myConvNameU, sizeof(myConvName));
3125 
3126     err = U_ZERO_ERROR;
3127     myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH+1] = 0;
3128     cnv = ucnv_openU(myConvNameU, &err);
3129     if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3130         log_err("1U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3131     }
3132 
3133     err = U_ZERO_ERROR;
3134     myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0;
3135     cnv = ucnv_openU(myConvNameU, &err);
3136     if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3137         log_err("2U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3138     }
3139 
3140     err = U_ZERO_ERROR;
3141     myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0;
3142     cnv = ucnv_openU(myConvNameU, &err);
3143     if (cnv || err != U_FILE_ACCESS_ERROR) {
3144         log_err("3U) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
3145     }
3146 
3147 
3148 
3149 
3150     err = U_ZERO_ERROR;
3151     cnv = ucnv_open(myConvName, &err);
3152     if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3153         log_err("1) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3154     }
3155 
3156     err = U_ZERO_ERROR;
3157     myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = ',';
3158     cnv = ucnv_open(myConvName, &err);
3159     if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3160         log_err("2) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3161     }
3162 
3163     err = U_ZERO_ERROR;
3164     myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ',';
3165     cnv = ucnv_open(myConvName, &err);
3166     if (cnv || err != U_FILE_ACCESS_ERROR) {
3167         log_err("3) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
3168     }
3169 
3170     err = U_ZERO_ERROR;
3171     myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ',';
3172     memcpy(myConvName + UCNV_MAX_CONVERTER_NAME_LENGTH, "locale=", 7);
3173     cnv = ucnv_open(myConvName, &err);
3174     if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3175         log_err("4) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3176     }
3177 
3178     /* The comma isn't really a part of the converter name. */
3179     err = U_ZERO_ERROR;
3180     myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0;
3181     cnv = ucnv_open(myConvName, &err);
3182     if (cnv || err != U_FILE_ACCESS_ERROR) {
3183         log_err("5) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
3184     }
3185 
3186     err = U_ZERO_ERROR;
3187     myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ' ';
3188     cnv = ucnv_open(myConvName, &err);
3189     if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3190         log_err("6) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3191     }
3192 
3193     err = U_ZERO_ERROR;
3194     myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0;
3195     cnv = ucnv_open(myConvName, &err);
3196     if (cnv || err != U_FILE_ACCESS_ERROR) {
3197         log_err("7) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
3198     }
3199 
3200 }
3201 
3202 #if !UCONFIG_NO_LEGACY_CONVERSION
3203 static void
testSwap(const char * name,UBool swap)3204 testSwap(const char *name, UBool swap) {
3205     /*
3206      * Test Unicode text.
3207      * Contains characters that are the highest for some of the
3208      * tested conversions, to make sure that the ucnvmbcs.c code that modifies the
3209      * tables copies the entire tables.
3210      */
3211     static const UChar text[]={
3212         0x61, 0xd, 0x62, 0xa, 0x4e00, 0x3000, 0xfffd, 0xa, 0x20, 0x85, 0xff5e, 0x7a
3213     };
3214 
3215     UChar uNormal[32], uSwapped[32];
3216     char normal[32], swapped[32];
3217     const UChar *pcu;
3218     UChar *pu;
3219     char *pc;
3220     int32_t i, normalLength, swappedLength;
3221     UChar u;
3222     char c;
3223 
3224     const char *swappedName;
3225     UConverter *cnv, *swapCnv;
3226     UErrorCode errorCode;
3227 
3228     /* if the swap flag is false, then the test encoding is not EBCDIC and must not swap */
3229 
3230     /* open both the normal and the LF/NL-swapping converters */
3231     strcpy(swapped, name);
3232     strcat(swapped, UCNV_SWAP_LFNL_OPTION_STRING);
3233 
3234     errorCode=U_ZERO_ERROR;
3235     swapCnv=ucnv_open(swapped, &errorCode);
3236     cnv=ucnv_open(name, &errorCode);
3237     if(U_FAILURE(errorCode)) {
3238         log_data_err("TestEBCDICSwapLFNL error: unable to open %s or %s (%s)\n", name, swapped, u_errorName(errorCode));
3239         goto cleanup;
3240     }
3241 
3242     /* the name must contain the swap option if and only if we expect the converter to swap */
3243     swappedName=ucnv_getName(swapCnv, &errorCode);
3244     if(U_FAILURE(errorCode)) {
3245         log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl) failed (%s)\n", name, u_errorName(errorCode));
3246         goto cleanup;
3247     }
3248 
3249     pc=strstr(swappedName, UCNV_SWAP_LFNL_OPTION_STRING);
3250     if(swap != (pc!=NULL)) {
3251         log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl)=%s should (%d) contain 'swaplfnl'\n", name, swappedName, swap);
3252         goto cleanup;
3253     }
3254 
3255     /* convert to EBCDIC */
3256     pcu=text;
3257     pc=normal;
3258     ucnv_fromUnicode(cnv, &pc, normal+UPRV_LENGTHOF(normal), &pcu, text+UPRV_LENGTHOF(text), NULL, true, &errorCode);
3259     normalLength=(int32_t)(pc-normal);
3260 
3261     pcu=text;
3262     pc=swapped;
3263     ucnv_fromUnicode(swapCnv, &pc, swapped+UPRV_LENGTHOF(swapped), &pcu, text+UPRV_LENGTHOF(text), NULL, true, &errorCode);
3264     swappedLength=(int32_t)(pc-swapped);
3265 
3266     if(U_FAILURE(errorCode)) {
3267         log_err("TestEBCDICSwapLFNL error converting to %s - (%s)\n", name, u_errorName(errorCode));
3268         goto cleanup;
3269     }
3270 
3271     /* compare EBCDIC output */
3272     if(normalLength!=swappedLength) {
3273         log_err("TestEBCDICSwapLFNL error converting to %s - output lengths %d vs. %d\n", name, normalLength, swappedLength);
3274         goto cleanup;
3275     }
3276     for(i=0; i<normalLength; ++i) {
3277         /* swap EBCDIC LF/NL for comparison */
3278         c=normal[i];
3279         if(swap) {
3280             if(c==0x15) {
3281                 c=0x25;
3282             } else if(c==0x25) {
3283                 c=0x15;
3284             }
3285         }
3286 
3287         if(c!=swapped[i]) {
3288             log_err("TestEBCDICSwapLFNL error converting to %s - did not swap properly, output[%d]=0x%02x\n", name, i, (uint8_t)swapped[i]);
3289             goto cleanup;
3290         }
3291     }
3292 
3293     /* convert back to Unicode (may not roundtrip) */
3294     pc=normal;
3295     pu=uNormal;
3296     ucnv_toUnicode(cnv, &pu, uNormal+UPRV_LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, true, &errorCode);
3297     normalLength=(int32_t)(pu-uNormal);
3298 
3299     pc=normal;
3300     pu=uSwapped;
3301     ucnv_toUnicode(swapCnv, &pu, uSwapped+UPRV_LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, true, &errorCode);
3302     swappedLength=(int32_t)(pu-uSwapped);
3303 
3304     if(U_FAILURE(errorCode)) {
3305         log_err("TestEBCDICSwapLFNL error converting from %s - (%s)\n", name, u_errorName(errorCode));
3306         goto cleanup;
3307     }
3308 
3309     /* compare EBCDIC output */
3310     if(normalLength!=swappedLength) {
3311         log_err("TestEBCDICSwapLFNL error converting from %s - output lengths %d vs. %d\n", name, normalLength, swappedLength);
3312         goto cleanup;
3313     }
3314     for(i=0; i<normalLength; ++i) {
3315         /* swap EBCDIC LF/NL for comparison */
3316         u=uNormal[i];
3317         if(swap) {
3318             if(u==0xa) {
3319                 u=0x85;
3320             } else if(u==0x85) {
3321                 u=0xa;
3322             }
3323         }
3324 
3325         if(u!=uSwapped[i]) {
3326             log_err("TestEBCDICSwapLFNL error converting from %s - did not swap properly, output[%d]=U+%04x\n", name, i, uSwapped[i]);
3327             goto cleanup;
3328         }
3329     }
3330 
3331     /* clean up */
3332 cleanup:
3333     ucnv_close(cnv);
3334     ucnv_close(swapCnv);
3335 }
3336 
3337 static void
TestEBCDICSwapLFNL(void)3338 TestEBCDICSwapLFNL(void) {
3339     static const struct {
3340         const char *name;
3341         UBool swap;
3342     } tests[]={
3343         { "ibm-37", true },
3344         { "ibm-1047", true },
3345         { "ibm-1140", true },
3346         { "ibm-930", true },
3347         { "iso-8859-3", false }
3348     };
3349 
3350     int i;
3351 
3352     for(i=0; i<UPRV_LENGTHOF(tests); ++i) {
3353         testSwap(tests[i].name, tests[i].swap);
3354     }
3355 }
3356 #else
3357 static void
TestEBCDICSwapLFNL()3358 TestEBCDICSwapLFNL() {
3359   /* test nothing... */
3360 }
3361 #endif
3362 
TestFromUCountPending(void)3363 static void TestFromUCountPending(void){
3364 #if !UCONFIG_NO_LEGACY_CONVERSION
3365     UErrorCode status = U_ZERO_ERROR;
3366 /*       const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */
3367     static const struct {
3368         UChar input[6];
3369         int32_t len;
3370         int32_t exp;
3371     }fromUnicodeTests[] = {
3372         /*m:n conversion*/
3373         {{0xdbc4},1,1},
3374         {{ 0xdbc4, 0xde34, 0xd84d},3,1},
3375         {{ 0xdbc4, 0xde34, 0xd900},3,3},
3376     };
3377     int i;
3378     UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status);
3379     if(U_FAILURE(status)){
3380         log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
3381         return;
3382     }
3383     for(i=0; i<UPRV_LENGTHOF(fromUnicodeTests); ++i) {
3384         char tgt[10];
3385         char* target = tgt;
3386         char* targetLimit = target + 10;
3387         const UChar* source = fromUnicodeTests[i].input;
3388         const UChar* sourceLimit = source + fromUnicodeTests[i].len;
3389         int32_t len = 0;
3390         ucnv_reset(cnv);
3391         ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, false, &status);
3392         len = ucnv_fromUCountPending(cnv, &status);
3393         if(U_FAILURE(status)){
3394             log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3395             status = U_ZERO_ERROR;
3396             continue;
3397         }
3398         if(len != fromUnicodeTests[i].exp){
3399             log_err("Did not get the expected output for ucnv_fromUInputConsumed.\n");
3400         }
3401     }
3402     status = U_ZERO_ERROR;
3403     {
3404         /*
3405          * The converter has to read the tail before it knows that
3406          * only head alone matches.
3407          * At the end, the output for head will overflow the target,
3408          * middle will be pending, and tail will not have been consumed.
3409          */
3410         /*
3411         \U00101234  -> x (<U101234>   \x07 |0)
3412         \U00101234\U00050005 -> y (<U101234>+<U50005>          \x07+\x00+\x01\x02\x0e+\x05 |0)
3413         \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0)
3414         \U00060007 -> unassigned
3415         */
3416         static const UChar head[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */
3417         static const UChar middle[] = {0xD940,0x0000};     /* first half of \U00060006 or \U00060007 */
3418         static const UChar tail[] = {0xDC07,0x0000};/* second half of \U00060007 */
3419         char tgt[10];
3420         char* target = tgt;
3421         char* targetLimit = target + 2; /* expect overflow from converting \U00101234\U00050005 */
3422         const UChar* source = head;
3423         const UChar* sourceLimit = source + u_strlen(head);
3424         int32_t len = 0;
3425         ucnv_reset(cnv);
3426         ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, false, &status);
3427         len = ucnv_fromUCountPending(cnv, &status);
3428         if(U_FAILURE(status)){
3429             log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3430             status = U_ZERO_ERROR;
3431         }
3432         if(len!=4){
3433             log_err("ucnv_fromUInputHeld did not return correct length for head\n");
3434         }
3435         source = middle;
3436         sourceLimit = source + u_strlen(middle);
3437         ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, false, &status);
3438         len = ucnv_fromUCountPending(cnv, &status);
3439         if(U_FAILURE(status)){
3440             log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3441             status = U_ZERO_ERROR;
3442         }
3443         if(len!=5){
3444             log_err("ucnv_fromUInputHeld did not return correct length for middle\n");
3445         }
3446         source = tail;
3447         sourceLimit = source + u_strlen(tail);
3448         ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, false, &status);
3449         if(status != U_BUFFER_OVERFLOW_ERROR){
3450             log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3451         }
3452         status = U_ZERO_ERROR;
3453         len = ucnv_fromUCountPending(cnv, &status);
3454         /* middle[1] is pending, tail has not been consumed */
3455         if(U_FAILURE(status)){
3456             log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status));
3457         }
3458         if(len!=1){
3459             log_err("ucnv_fromUInputHeld did not return correct length for tail\n");
3460         }
3461     }
3462     ucnv_close(cnv);
3463 #endif
3464 }
3465 
3466 static void
TestToUCountPending(void)3467 TestToUCountPending(void){
3468 #if !UCONFIG_NO_LEGACY_CONVERSION
3469     UErrorCode status = U_ZERO_ERROR;
3470     static const struct {
3471         char input[6];
3472         int32_t len;
3473         int32_t exp;
3474     }toUnicodeTests[] = {
3475         /*m:n conversion*/
3476         {{0x05, 0x01, 0x02},3,3},
3477         {{0x01, 0x02},2,2},
3478         {{0x07,  0x00, 0x01, 0x02},4,4},
3479     };
3480 
3481     int i;
3482     UConverterToUCallback *oldToUAction= NULL;
3483     UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status);
3484     if(U_FAILURE(status)){
3485         log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
3486         return;
3487     }
3488     ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status);
3489     for(i=0; i<UPRV_LENGTHOF(toUnicodeTests); ++i) {
3490         UChar tgt[20];
3491         UChar* target = tgt;
3492         UChar* targetLimit = target + 20;
3493         const char* source = toUnicodeTests[i].input;
3494         const char* sourceLimit = source + toUnicodeTests[i].len;
3495         int32_t len = 0;
3496         ucnv_reset(cnv);
3497         ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, false, &status);
3498         len = ucnv_toUCountPending(cnv,&status);
3499         if(U_FAILURE(status)){
3500             log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3501             status = U_ZERO_ERROR;
3502             continue;
3503         }
3504         if(len != toUnicodeTests[i].exp){
3505             log_err("Did not get the expected output for ucnv_toUInputConsumed.\n");
3506         }
3507     }
3508     status = U_ZERO_ERROR;
3509     ucnv_close(cnv);
3510 
3511     {
3512         /*
3513          * The converter has to read the tail before it knows that
3514          * only head alone matches.
3515          * At the end, the output for head will overflow the target,
3516          * mid will be pending, and tail will not have been consumed.
3517          */
3518         char head[] = { 0x01, 0x02, 0x03, 0x0a , 0x00};
3519         char mid[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 };
3520         char tail[] = {  0x01, 0x02, 0x03, 0x0d, 0x00 };
3521         /*
3522         0x01, 0x02, 0x03, 0x0a  -> x (<U23456>    \x01\x02\x03\x0a |0)
3523         0x01, 0x02, 0x03, 0x0b  -> y (<U000b>     \x01\x02\x03\x0b |0)
3524         0x01, 0x02, 0x03, 0x0d  -> z (<U34567>    \x01\x02\x03\x0d |3)
3525         0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar")
3526         */
3527         UChar tgt[10];
3528         UChar* target = tgt;
3529         UChar* targetLimit = target + 1; /* expect overflow from converting */
3530         const char* source = head;
3531         const char* sourceLimit = source + strlen(head);
3532         int32_t len = 0;
3533         cnv = ucnv_openPackage(loadTestData(&status), "test4", &status);
3534         if(U_FAILURE(status)){
3535             log_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
3536             return;
3537         }
3538         ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status);
3539         ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, false, &status);
3540         len = ucnv_toUCountPending(cnv,&status);
3541         if(U_FAILURE(status)){
3542             log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3543         }
3544         if(len != 4){
3545             log_err("Did not get the expected len for head.\n");
3546         }
3547         source=mid;
3548         sourceLimit = source+strlen(mid);
3549         ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, false, &status);
3550         len = ucnv_toUCountPending(cnv,&status);
3551         if(U_FAILURE(status)){
3552             log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3553         }
3554         if(len != 8){
3555             log_err("Did not get the expected len for mid.\n");
3556         }
3557 
3558         source=tail;
3559         sourceLimit = source+strlen(tail);
3560         targetLimit = target;
3561         ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, false, &status);
3562         if(status != U_BUFFER_OVERFLOW_ERROR){
3563             log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3564         }
3565         status = U_ZERO_ERROR;
3566         len = ucnv_toUCountPending(cnv,&status);
3567         /* mid[4] is pending, tail has not been consumed */
3568         if(U_FAILURE(status)){
3569             log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status));
3570         }
3571         if(len != 4){
3572             log_err("Did not get the expected len for tail.\n");
3573         }
3574         ucnv_close(cnv);
3575     }
3576 #endif
3577 }
3578 
TestOneDefaultNameChange(const char * name,const char * expected)3579 static void TestOneDefaultNameChange(const char *name, const char *expected) {
3580     UErrorCode status = U_ZERO_ERROR;
3581     UConverter *cnv;
3582     ucnv_setDefaultName(name);
3583     if(strcmp(ucnv_getDefaultName(), expected)==0)
3584         log_verbose("setDefaultName of %s works.\n", name);
3585     else
3586         log_err("setDefaultName of %s failed\n", name);
3587     cnv=ucnv_open(NULL, &status);
3588     if (U_FAILURE(status) || cnv == NULL) {
3589         log_err("opening the default converter of %s failed\n", name);
3590         return;
3591     }
3592     if(strcmp(ucnv_getName(cnv, &status), expected)==0)
3593         log_verbose("ucnv_getName of %s works.\n", name);
3594     else
3595         log_err("ucnv_getName of %s failed\n", name);
3596     ucnv_close(cnv);
3597 }
3598 
TestDefaultName(void)3599 static void TestDefaultName(void) {
3600     /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/
3601     static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1];
3602     strcpy(defaultName, ucnv_getDefaultName());
3603 
3604     log_verbose("getDefaultName returned %s\n", defaultName);
3605 
3606     /*change the default name by setting it */
3607     TestOneDefaultNameChange("UTF-8", "UTF-8");
3608 #if U_CHARSET_IS_UTF8
3609     TestOneDefaultNameChange("ISCII,version=1", "UTF-8");
3610     TestOneDefaultNameChange("ISCII,version=2", "UTF-8");
3611     TestOneDefaultNameChange("ISO-8859-1", "UTF-8");
3612 #else
3613 # if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
3614     TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1");
3615     TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2");
3616 # endif
3617     TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1");
3618 #endif
3619 
3620     /*set the default name back*/
3621     ucnv_setDefaultName(defaultName);
3622 }
3623 
3624 /* Test that ucnv_compareNames() matches names according to spec. ----------- */
3625 
3626 static int
sign(int n)3627 sign(int n) {
3628     if(n==0) {
3629         return 0;
3630     } else if(n<0) {
3631         return -1;
3632     } else /* n>0 */ {
3633         return 1;
3634     }
3635 }
3636 
3637 static void
compareNames(const char ** names)3638 compareNames(const char **names) {
3639     const char *relation, *name1, *name2;
3640     int rel, result;
3641 
3642     relation=*names++;
3643     if(*relation=='=') {
3644         rel = 0;
3645     } else if(*relation=='<') {
3646         rel = -1;
3647     } else {
3648         rel = 1;
3649     }
3650 
3651     name1=*names++;
3652     if(name1==NULL) {
3653         return;
3654     }
3655     while((name2=*names++)!=NULL) {
3656         result=ucnv_compareNames(name1, name2);
3657         if(sign(result)!=rel) {
3658             log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1, name2, result, rel);
3659         }
3660         name1=name2;
3661     }
3662 }
3663 
3664 static void
TestCompareNames(void)3665 TestCompareNames(void) {
3666     static const char *equalUTF8[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL };
3667     static const char *equalIBM[]={ "=", "ibm-37", "IBM037", "i-B-m  00037", "ibm-0037", "IBM00037", NULL };
3668     static const char *lessMac[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL };
3669     static const char *lessUTF080[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL };
3670 
3671     compareNames(equalUTF8);
3672     compareNames(equalIBM);
3673     compareNames(lessMac);
3674     compareNames(lessUTF080);
3675 }
3676 
3677 static void
TestSubstString(void)3678 TestSubstString(void) {
3679     static const UChar surrogate[1]={ 0xd900 };
3680     char buffer[16];
3681 
3682     static const UChar sub[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3683     static const char subChars[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3684     UConverter *cnv;
3685     UErrorCode errorCode;
3686     int32_t length;
3687     int8_t len8;
3688 
3689     /* UTF-16/32: test that the BOM is output before the sub character */
3690     errorCode=U_ZERO_ERROR;
3691     cnv=ucnv_open("UTF-16", &errorCode);
3692     if(U_FAILURE(errorCode)) {
3693         log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode));
3694         return;
3695     }
3696     length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
3697     ucnv_close(cnv);
3698     if(U_FAILURE(errorCode) ||
3699         length!=4 ||
3700         NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode)
3701     ) {
3702         log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n");
3703     }
3704 
3705     errorCode=U_ZERO_ERROR;
3706     cnv=ucnv_open("UTF-32", &errorCode);
3707     if(U_FAILURE(errorCode)) {
3708         log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode));
3709         return;
3710     }
3711     length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
3712     ucnv_close(cnv);
3713     if(U_FAILURE(errorCode) ||
3714         length!=8 ||
3715         NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode)
3716     ) {
3717         log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n");
3718     }
3719 
3720     /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */
3721     errorCode=U_ZERO_ERROR;
3722     cnv=ucnv_open("ISO-8859-1", &errorCode);
3723     if(U_FAILURE(errorCode)) {
3724         log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode));
3725         return;
3726     }
3727     ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode);
3728     if(U_FAILURE(errorCode)) {
3729         log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode));
3730     } else {
3731         len8 = sizeof(buffer);
3732         ucnv_getSubstChars(cnv, buffer, &len8, &errorCode);
3733         /* Stateless converter, we expect the string converted to charset bytes. */
3734         if(U_FAILURE(errorCode) || len8!=sizeof(subChars) || 0!=uprv_memcmp(buffer, subChars, len8)) {
3735             log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode));
3736         }
3737     }
3738     ucnv_close(cnv);
3739 
3740 #if !UCONFIG_NO_LEGACY_CONVERSION
3741     errorCode=U_ZERO_ERROR;
3742     cnv=ucnv_open("HZ", &errorCode);
3743     if(U_FAILURE(errorCode)) {
3744         log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode));
3745         return;
3746     }
3747     ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode);
3748     if(U_FAILURE(errorCode)) {
3749         log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode));
3750     } else {
3751         len8 = sizeof(buffer);
3752         ucnv_getSubstChars(cnv, buffer, &len8, &errorCode);
3753         /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */
3754         if(U_FAILURE(errorCode) || len8!=0) {
3755             log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode));
3756         }
3757     }
3758     ucnv_close(cnv);
3759 #endif
3760     /*
3761      * Further testing of ucnv_setSubstString() is done via intltest convert.
3762      * We do not test edge cases of illegal arguments and similar because the
3763      * function implementation uses all of its parameters in calls to other
3764      * functions with UErrorCode parameters.
3765      */
3766 }
3767 
3768 static void
InvalidArguments(void)3769 InvalidArguments(void) {
3770     UConverter *cnv;
3771     UErrorCode errorCode;
3772     char charBuffer[2] = {1, 1};
3773     char ucharAsCharBuffer[2] = {2, 2};
3774     char *charsPtr = charBuffer;
3775     UChar *ucharsPtr = (UChar *)ucharAsCharBuffer;
3776     UChar *ucharsBadPtr = (UChar *)(ucharAsCharBuffer + 1);
3777 
3778     errorCode=U_ZERO_ERROR;
3779     cnv=ucnv_open("UTF-8", &errorCode);
3780     if(U_FAILURE(errorCode)) {
3781         log_err("ucnv_open() failed - %s\n", u_errorName(errorCode));
3782         return;
3783     }
3784 
3785     errorCode=U_ZERO_ERROR;
3786     /* This one should fail because an incomplete UChar is being passed in */
3787     ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsPtr, ucharsBadPtr, NULL, true, &errorCode);
3788     if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3789         log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode));
3790     }
3791 
3792     errorCode=U_ZERO_ERROR;
3793     /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3794     ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, true, &errorCode);
3795     if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3796         log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode));
3797     }
3798 
3799     errorCode=U_ZERO_ERROR;
3800     /* This one should fail because an incomplete UChar is being passed in */
3801     ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, true, &errorCode);
3802     if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3803         log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode));
3804     }
3805 
3806     errorCode=U_ZERO_ERROR;
3807     /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3808     ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, true, &errorCode);
3809     if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3810         log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode));
3811     }
3812 
3813     if (charBuffer[0] != 1 || charBuffer[1] != 1
3814         || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2)
3815     {
3816         log_err("Data was incorrectly written to buffers\n");
3817     }
3818 
3819     ucnv_close(cnv);
3820 }
3821 
TestGetName(void)3822 static void TestGetName(void) {
3823     static const char *const names[] = {
3824         "Unicode",                  "UTF-16",
3825         "UnicodeBigUnmarked",       "UTF-16BE",
3826         "UnicodeBig",               "UTF-16BE,version=1",
3827         "UnicodeLittleUnmarked",    "UTF-16LE",
3828         "UnicodeLittle",            "UTF-16LE,version=1",
3829         "x-UTF-16LE-BOM",           "UTF-16LE,version=1"
3830     };
3831     int32_t i;
3832     for(i = 0; i < UPRV_LENGTHOF(names); i += 2) {
3833         UErrorCode errorCode = U_ZERO_ERROR;
3834         UConverter *cnv = ucnv_open(names[i], &errorCode);
3835         if(U_SUCCESS(errorCode)) {
3836             const char *name = ucnv_getName(cnv, &errorCode);
3837             if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) {
3838                 log_err("ucnv_getName(%s) = %s != %s -- %s\n",
3839                         names[i], name, names[i+1], u_errorName(errorCode));
3840             }
3841             ucnv_close(cnv);
3842         }
3843     }
3844 }
3845 
TestUTFBOM(void)3846 static void TestUTFBOM(void) {
3847     static const UChar a16[] = { 0x61 };
3848     static const char *const names[] = {
3849         "UTF-16",
3850         "UTF-16,version=1",
3851         "UTF-16BE",
3852         "UnicodeBig",
3853         "UTF-16LE",
3854         "UnicodeLittle"
3855     };
3856     static const uint8_t expected[][5] = {
3857 #if U_IS_BIG_ENDIAN
3858         { 4, 0xfe, 0xff, 0, 0x61 },
3859         { 4, 0xfe, 0xff, 0, 0x61 },
3860 #else
3861         { 4, 0xff, 0xfe, 0x61, 0 },
3862         { 4, 0xff, 0xfe, 0x61, 0 },
3863 #endif
3864 
3865         { 2, 0, 0x61 },
3866         { 4, 0xfe, 0xff, 0, 0x61 },
3867 
3868         { 2, 0x61, 0 },
3869         { 4, 0xff, 0xfe, 0x61, 0 }
3870     };
3871 
3872     char bytes[10];
3873     int32_t i;
3874 
3875     for(i = 0; i < UPRV_LENGTHOF(names); ++i) {
3876         UErrorCode errorCode = U_ZERO_ERROR;
3877         UConverter *cnv = ucnv_open(names[i], &errorCode);
3878         int32_t length = 0;
3879         const uint8_t *exp = expected[i];
3880         if (U_FAILURE(errorCode)) {
3881            log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode));
3882            continue;
3883         }
3884         length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode);
3885 
3886         if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) {
3887             log_err("unexpected %s BOM writing behavior -- %s\n",
3888                     names[i], u_errorName(errorCode));
3889         }
3890         ucnv_close(cnv);
3891     }
3892 }
3893