1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*****************************************************************************
9 *
10 * File ccapitst.c
11 *
12 * Modification History:
13 * Name Description
14 * Madhu Katragadda Ported for C API
15 ******************************************************************************
16 */
17
18 #include <stdalign.h>
19 #include <stdbool.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <ctype.h>
24 #include "unicode/uloc.h"
25 #include "unicode/ucnv.h"
26 #include "unicode/ucnv_err.h"
27 #include "unicode/putil.h"
28 #include "unicode/uset.h"
29 #include "unicode/ustring.h"
30 #include "unicode/utf8.h"
31 #include "ucnv_bld.h" /* for sizeof(UConverter) */
32 #include "cmemory.h" /* for UAlignedMemory */
33 #include "cintltst.h"
34 #include "ccapitst.h"
35 #include "cstring.h"
36
37 #define NUM_CODEPAGE 1
38 #define MAX_FILE_LEN 1024*20
39 #define UCS_FILE_NAME_SIZE 512
40
41 /*returns an action other than the one provided*/
42 #if !UCONFIG_NO_LEGACY_CONVERSION
43 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA);
44 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA);
45 #endif
46
47 static UConverter *
cnv_open(const char * name,UErrorCode * pErrorCode)48 cnv_open(const char *name, UErrorCode *pErrorCode) {
49 if(name!=NULL && name[0]=='*') {
50 return ucnv_openPackage(loadTestData(pErrorCode), name+1, pErrorCode);
51 } else {
52 return ucnv_open(name, pErrorCode);
53 }
54 }
55
56
57 static void ListNames(void);
58 static void TestFlushCache(void);
59 static void TestDuplicateAlias(void);
60 static void TestCCSID(void);
61 static void TestJ932(void);
62 static void TestJ1968(void);
63 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
64 static void TestLMBCSMaxChar(void);
65 #endif
66
67 static void TestConvertClone(void);
68 #if !UCONFIG_NO_LEGACY_CONVERSION
69 static void TestConvertSafeCloneCallback(void);
70 #endif
71
72 static void TestEBCDICSwapLFNL(void);
73 static void TestConvertEx(void);
74 static void TestConvertExFromUTF8(void);
75 static void TestConvertExFromUTF8_C5F0(void);
76 static void TestConvertAlgorithmic(void);
77 void TestDefaultConverterError(void); /* defined in cctest.c */
78 void TestDefaultConverterSet(void); /* defined in cctest.c */
79 static void TestToUCountPending(void);
80 static void TestFromUCountPending(void);
81 static void TestDefaultName(void);
82 static void TestCompareNames(void);
83 static void TestSubstString(void);
84 static void InvalidArguments(void);
85 static void TestGetName(void);
86 static void TestUTFBOM(void);
87
88 void addTestConvert(TestNode** root);
89
addTestConvert(TestNode ** root)90 void addTestConvert(TestNode** root)
91 {
92 addTest(root, &ListNames, "tsconv/ccapitst/ListNames");
93 addTest(root, &TestConvert, "tsconv/ccapitst/TestConvert");
94 addTest(root, &TestFlushCache, "tsconv/ccapitst/TestFlushCache");
95 addTest(root, &TestAlias, "tsconv/ccapitst/TestAlias");
96 addTest(root, &TestDuplicateAlias, "tsconv/ccapitst/TestDuplicateAlias");
97 addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone");
98 addTest(root, &TestConvertClone, "tsconv/ccapitst/TestConvertClone");
99 #if !UCONFIG_NO_LEGACY_CONVERSION
100 addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback");
101 #endif
102 addTest(root, &TestCCSID, "tsconv/ccapitst/TestCCSID");
103 addTest(root, &TestJ932, "tsconv/ccapitst/TestJ932");
104 addTest(root, &TestJ1968, "tsconv/ccapitst/TestJ1968");
105 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
106 addTest(root, &TestLMBCSMaxChar, "tsconv/ccapitst/TestLMBCSMaxChar");
107 #endif
108 addTest(root, &TestEBCDICSwapLFNL, "tsconv/ccapitst/TestEBCDICSwapLFNL");
109 addTest(root, &TestConvertEx, "tsconv/ccapitst/TestConvertEx");
110 addTest(root, &TestConvertExFromUTF8, "tsconv/ccapitst/TestConvertExFromUTF8");
111 addTest(root, &TestConvertExFromUTF8_C5F0, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0");
112 addTest(root, &TestConvertAlgorithmic, "tsconv/ccapitst/TestConvertAlgorithmic");
113 addTest(root, &TestDefaultConverterError, "tsconv/ccapitst/TestDefaultConverterError");
114 addTest(root, &TestDefaultConverterSet, "tsconv/ccapitst/TestDefaultConverterSet");
115 #if !UCONFIG_NO_FILE_IO
116 addTest(root, &TestToUCountPending, "tsconv/ccapitst/TestToUCountPending");
117 addTest(root, &TestFromUCountPending, "tsconv/ccapitst/TestFromUCountPending");
118 #endif
119 addTest(root, &TestDefaultName, "tsconv/ccapitst/TestDefaultName");
120 addTest(root, &TestCompareNames, "tsconv/ccapitst/TestCompareNames");
121 addTest(root, &TestSubstString, "tsconv/ccapitst/TestSubstString");
122 addTest(root, &InvalidArguments, "tsconv/ccapitst/InvalidArguments");
123 addTest(root, &TestGetName, "tsconv/ccapitst/TestGetName");
124 addTest(root, &TestUTFBOM, "tsconv/ccapitst/TestUTFBOM");
125 }
126
ListNames(void)127 static void ListNames(void) {
128 UErrorCode err = U_ZERO_ERROR;
129 int32_t testLong1 = 0;
130 const char* available_conv;
131 UEnumeration *allNamesEnum = NULL;
132 int32_t allNamesCount = 0;
133 uint16_t count;
134
135 log_verbose("Testing ucnv_openAllNames()...");
136 allNamesEnum = ucnv_openAllNames(&err);
137 if(U_FAILURE(err)) {
138 log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err));
139 }
140 else {
141 const char *string = NULL;
142 int32_t len = 0;
143 int32_t count1 = 0;
144 int32_t count2 = 0;
145 allNamesCount = uenum_count(allNamesEnum, &err);
146 while ((string = uenum_next(allNamesEnum, &len, &err))) {
147 count1++;
148 log_verbose("read \"%s\", length %i\n", string, len);
149 }
150 if (U_FAILURE(err)) {
151 log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err));
152 err = U_ZERO_ERROR;
153 }
154 uenum_reset(allNamesEnum, &err);
155 while ((string = uenum_next(allNamesEnum, &len, &err))) {
156 count2++;
157 ucnv_close(ucnv_open(string, &err));
158 log_verbose("read \"%s\", length %i (%s)\n", string, len, U_SUCCESS(err) ? "available" : "unavailable");
159 err = U_ZERO_ERROR;
160 }
161 if (count1 != count2) {
162 log_err("FAILURE! uenum_reset(allNamesEnum, &err); doesn't work\n");
163 }
164 }
165 uenum_close(allNamesEnum);
166 err = U_ZERO_ERROR;
167
168 /*Tests ucnv_getAvailableName(), getAvailableCount()*/
169
170 log_verbose("Testing ucnv_countAvailable()...");
171
172 testLong1=ucnv_countAvailable();
173 log_info("Number of available codepages: %d/%d\n", testLong1, allNamesCount);
174
175 log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */
176
177 available_conv = ucnv_getAvailableName(testLong1);
178 /*test ucnv_getAvailableName with err condition*/
179 log_verbose("\n---Testing ucnv_getAvailableName..with index < 0 ");
180 available_conv = ucnv_getAvailableName(-1);
181 if(available_conv != NULL){
182 log_err("ucnv_getAvailableName() with index < 0) should return NULL\n");
183 }
184
185 /* Test ucnv_countAliases() etc. */
186 count = ucnv_countAliases("utf-8", &err);
187 if(U_FAILURE(err)) {
188 log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err));
189 } else if(count <= 0) {
190 log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count);
191 } else {
192 /* try to get the aliases individually */
193 const char *alias;
194 alias = ucnv_getAlias("utf-8", 0, &err);
195 if(U_FAILURE(err)) {
196 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s\n", myErrorName(err));
197 } else if(strcmp("UTF-8", alias) != 0) {
198 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s instead of UTF-8\n", alias);
199 } else {
200 uint16_t aliasNum;
201 for(aliasNum = 0; aliasNum < count; ++aliasNum) {
202 alias = ucnv_getAlias("utf-8", aliasNum, &err);
203 if(U_FAILURE(err)) {
204 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err));
205 } else if(strlen(alias) > 20) {
206 /* sanity check */
207 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> alias %s insanely long, corrupt?!\n", aliasNum, alias);
208 } else {
209 log_verbose("alias %d for utf-8: %s\n", aliasNum, alias);
210 }
211 }
212 if(U_SUCCESS(err)) {
213 /* try to fill an array with all aliases */
214 const char **aliases;
215 aliases=(const char **)malloc(count * sizeof(const char *));
216 if(aliases != 0) {
217 ucnv_getAliases("utf-8", aliases, &err);
218 if(U_FAILURE(err)) {
219 log_err("FAILURE! ucnv_getAliases(\"utf-8\") -> %s\n", myErrorName(err));
220 } else {
221 for(aliasNum = 0; aliasNum < count; ++aliasNum) {
222 /* compare the pointers with the ones returned individually */
223 alias = ucnv_getAlias("utf-8", aliasNum, &err);
224 if(U_FAILURE(err)) {
225 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err));
226 } else if(aliases[aliasNum] != alias) {
227 log_err("FAILURE! ucnv_getAliases(\"utf-8\")[%d] != ucnv_getAlias(\"utf-8\", %d)\n", aliasNum, aliasNum);
228 }
229 }
230 }
231 free((char **)aliases);
232 }
233 }
234 }
235 }
236 }
237
238
TestConvert(void)239 static void TestConvert(void)
240 {
241 #if !UCONFIG_NO_LEGACY_CONVERSION
242 char myptr[4];
243 char save[4];
244 int32_t testLong1 = 0;
245 uint16_t rest = 0;
246 int32_t len = 0;
247 int32_t x = 0;
248 FILE* ucs_file_in = NULL;
249 UChar BOM = 0x0000;
250 UChar myUChar = 0x0000;
251 char* mytarget; /* [MAX_FILE_LEN] */
252 char* mytarget_1;
253 char* mytarget_use;
254 UChar* consumedUni = NULL;
255 char* consumed = NULL;
256 char* output_cp_buffer; /* [MAX_FILE_LEN] */
257 UChar* ucs_file_buffer; /* [MAX_FILE_LEN] */
258 UChar* ucs_file_buffer_use;
259 UChar* my_ucs_file_buffer; /* [MAX_FILE_LEN] */
260 UChar* my_ucs_file_buffer_1;
261 int8_t ii = 0;
262 uint16_t codepage_index = 0;
263 int32_t cp = 0;
264 UErrorCode err = U_ZERO_ERROR;
265 char ucs_file_name[UCS_FILE_NAME_SIZE];
266 UConverterFromUCallback MIA1, MIA1_2;
267 UConverterToUCallback MIA2, MIA2_2;
268 const void *MIA1Context, *MIA1Context2, *MIA2Context, *MIA2Context2;
269 UConverter* someConverters[5];
270 UConverter* myConverter = 0;
271 UChar* displayname = 0;
272
273 const char* locale;
274
275 UChar* uchar1 = 0;
276 UChar* uchar2 = 0;
277 UChar* uchar3 = 0;
278 int32_t targetcapacity2;
279 int32_t targetcapacity;
280 int32_t targetsize;
281 int32_t disnamelen;
282
283 const UChar* tmp_ucs_buf;
284 const UChar* tmp_consumedUni=NULL;
285 const char* tmp_mytarget_use;
286 const char* tmp_consumed;
287
288 /******************************************************************
289 Checking Unicode -> ksc
290 ******************************************************************/
291
292 const char* CodePagesToTest[NUM_CODEPAGE] =
293 {
294 "ibm-949_P110-1999"
295
296
297 };
298 const uint16_t CodePageNumberToTest[NUM_CODEPAGE] =
299 {
300 949
301 };
302
303
304 const int8_t CodePagesMinChars[NUM_CODEPAGE] =
305 {
306 1
307
308 };
309
310 const int8_t CodePagesMaxChars[NUM_CODEPAGE] =
311 {
312 2
313
314 };
315
316 const uint16_t CodePagesSubstitutionChars[NUM_CODEPAGE] =
317 {
318 0xAFFE
319 };
320
321 const char* CodePagesTestFiles[NUM_CODEPAGE] =
322 {
323 "uni-text.bin"
324 };
325
326
327 const UConverterPlatform CodePagesPlatform[NUM_CODEPAGE] =
328 {
329 UCNV_IBM
330
331 };
332
333 const char* CodePagesLocale[NUM_CODEPAGE] =
334 {
335 "ko_KR"
336 };
337
338 UConverterFromUCallback oldFromUAction = NULL;
339 UConverterToUCallback oldToUAction = NULL;
340 const void* oldFromUContext = NULL;
341 const void* oldToUContext = NULL;
342
343 /* Allocate memory */
344 mytarget = (char*) malloc(MAX_FILE_LEN * sizeof(mytarget[0]));
345 output_cp_buffer = (char*) malloc(MAX_FILE_LEN * sizeof(output_cp_buffer[0]));
346 ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(ucs_file_buffer[0]));
347 my_ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(my_ucs_file_buffer[0]));
348
349 ucs_file_buffer_use = ucs_file_buffer;
350 mytarget_1=mytarget;
351 mytarget_use = mytarget;
352 my_ucs_file_buffer_1=my_ucs_file_buffer;
353
354 /* flush the converter cache to get a consistent state before the flushing is tested */
355 ucnv_flushCache();
356
357 /*Testing ucnv_openU()*/
358 {
359 UChar converterName[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/
360 UChar firstSortedName[]={ 0x0021, 0x0000}; /* ! */
361 UChar lastSortedName[]={ 0x007E, 0x0000}; /* ~ */
362 const char *illegalNameChars={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"};
363 UChar illegalName[100];
364 UConverter *converter=NULL;
365 err=U_ZERO_ERROR;
366 converter=ucnv_openU(converterName, &err);
367 if(U_FAILURE(err)){
368 log_data_err("FAILURE! ucnv_openU(ibm-943, err) failed. %s\n", myErrorName(err));
369 }
370 ucnv_close(converter);
371 err=U_ZERO_ERROR;
372 converter=ucnv_openU(NULL, &err);
373 if(U_FAILURE(err)){
374 log_err("FAILURE! ucnv_openU(NULL, err) failed. %s\n", myErrorName(err));
375 }
376 ucnv_close(converter);
377 /*testing with error value*/
378 err=U_ILLEGAL_ARGUMENT_ERROR;
379 converter=ucnv_openU(converterName, &err);
380 if(!(converter == NULL)){
381 log_data_err("FAILURE! ucnv_openU(ibm-943, U_ILLEGAL_ARGUMENT_ERROR) is expected to fail\n");
382 }
383 ucnv_close(converter);
384 err=U_ZERO_ERROR;
385 u_uastrcpy(illegalName, "");
386 u_uastrcpy(illegalName, illegalNameChars);
387 ucnv_openU(illegalName, &err);
388 if(!(err==U_ILLEGAL_ARGUMENT_ERROR)){
389 log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n");
390 }
391
392 err=U_ZERO_ERROR;
393 ucnv_openU(firstSortedName, &err);
394 if(err!=U_FILE_ACCESS_ERROR){
395 log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n");
396 }
397
398 err=U_ZERO_ERROR;
399 ucnv_openU(lastSortedName, &err);
400 if(err!=U_FILE_ACCESS_ERROR){
401 log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n");
402 }
403
404 err=U_ZERO_ERROR;
405 }
406 log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n");
407 {
408 UConverter *cnv=NULL;
409 err=U_ZERO_ERROR;
410 cnv=ucnv_open("ibm-949,Madhu", &err);
411 if(U_FAILURE(err)){
412 log_data_err("FAILURE! ucnv_open(\"ibm-949,Madhu\", err) failed. %s\n", myErrorName(err));
413 }
414 ucnv_close(cnv);
415
416 }
417 /*Testing ucnv_convert()*/
418 {
419 int32_t targetLimit=0, sourceLimit=0, i=0, targetCapacity=0;
420 const uint8_t source[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00};
421 const uint8_t expectedTarget[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00};
422 char *target=0;
423 sourceLimit=UPRV_LENGTHOF(source);
424 err=U_ZERO_ERROR;
425 targetLimit=0;
426
427 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", NULL, targetLimit , (const char*)source, sourceLimit, &err);
428 if(err == U_BUFFER_OVERFLOW_ERROR){
429 err=U_ZERO_ERROR;
430 targetLimit=targetCapacity+1;
431 target=(char*)malloc(sizeof(char) * targetLimit);
432 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
433 }
434 if(U_FAILURE(err)){
435 log_data_err("FAILURE! ucnv_convert(ibm-1363->ibm-1364) failed. %s\n", myErrorName(err));
436 }
437 else {
438 for(i=0; i<targetCapacity; i++){
439 if(target[i] != expectedTarget[i]){
440 log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i, (UChar)expectedTarget[i], (uint8_t)target[i]);
441 }
442 }
443
444 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source+1, -1, &err);
445 if(U_FAILURE(err) || i!=7){
446 log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n",
447 u_errorName(err), i);
448 }
449
450 /*Test error conditions*/
451 err=U_ZERO_ERROR;
452 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, 0, &err);
453 if(i !=0){
454 log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n");
455 }
456
457 err=U_ILLEGAL_ARGUMENT_ERROR;
458 sourceLimit=UPRV_LENGTHOF(source);
459 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
460 if(i !=0 ){
461 log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n");
462 }
463
464 err=U_ZERO_ERROR;
465 sourceLimit=UPRV_LENGTHOF(source);
466 targetLimit=0;
467 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
468 if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){
469 log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n");
470 }
471 err=U_ZERO_ERROR;
472 free(target);
473 }
474 }
475
476 /*Testing ucnv_openCCSID and ucnv_open with error conditions*/
477 log_verbose("\n---Testing ucnv_open with err ! = U_ZERO_ERROR...\n");
478 err=U_ILLEGAL_ARGUMENT_ERROR;
479 if(ucnv_open(NULL, &err) != NULL){
480 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
481 }
482 if(ucnv_openCCSID(1051, UCNV_IBM, &err) != NULL){
483 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
484 }
485 err=U_ZERO_ERROR;
486
487 /* Testing ucnv_openCCSID(), ucnv_open(), ucnv_getName() */
488 log_verbose("\n---Testing ucnv_open default...\n");
489 someConverters[0] = ucnv_open(NULL,&err);
490 someConverters[1] = ucnv_open(NULL,&err);
491 someConverters[2] = ucnv_open("utf8", &err);
492 someConverters[3] = ucnv_openCCSID(949,UCNV_IBM,&err);
493 ucnv_close(ucnv_openCCSID(1051, UCNV_IBM, &err)); /* test for j350; ucnv_close(NULL) is safe */
494 if (U_FAILURE(err)){ log_data_err("FAILURE! %s\n", myErrorName(err));}
495
496 /* Testing ucnv_getName()*/
497 /*default code page */
498 ucnv_getName(someConverters[0], &err);
499 if(U_FAILURE(err)) {
500 log_data_err("getName[0] failed\n");
501 } else {
502 log_verbose("getName(someConverters[0]) returned %s\n", ucnv_getName(someConverters[0], &err));
503 }
504 ucnv_getName(someConverters[1], &err);
505 if(U_FAILURE(err)) {
506 log_data_err("getName[1] failed\n");
507 } else {
508 log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters[1], &err));
509 }
510
511 ucnv_close(someConverters[0]);
512 ucnv_close(someConverters[1]);
513 ucnv_close(someConverters[2]);
514 ucnv_close(someConverters[3]);
515
516
517 for (codepage_index=0; codepage_index < NUM_CODEPAGE; ++codepage_index)
518 {
519 int32_t i = 0;
520
521 err = U_ZERO_ERROR;
522 #ifdef U_TOPSRCDIR
523 strcpy(ucs_file_name, U_TOPSRCDIR U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING);
524 #else
525 strcpy(ucs_file_name, loadTestData(&err));
526
527 if(U_FAILURE(err)){
528 log_err("\nCouldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err));
529 return;
530 }
531
532 {
533 char* index = strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR);
534
535 if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){
536 *(index+1)=0;
537 }
538 }
539
540 strcat(ucs_file_name,".."U_FILE_SEP_STRING);
541 #endif
542 strcat(ucs_file_name, CodePagesTestFiles[codepage_index]);
543
544 ucs_file_in = fopen(ucs_file_name,"rb");
545 if (!ucs_file_in)
546 {
547 log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name);
548 return;
549 }
550
551 /*Creates a converter and testing ucnv_openCCSID(u_int code_page, platform, errstatus*/
552
553 /* myConverter =ucnv_openCCSID(CodePageNumberToTest[codepage_index],UCNV_IBM, &err); */
554 /* ucnv_flushCache(); */
555 myConverter =ucnv_open( "ibm-949", &err);
556 if (!myConverter || U_FAILURE(err))
557 {
558 log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err));
559 fclose(ucs_file_in);
560 break;
561 }
562
563 /*testing for ucnv_getName() */
564 log_verbose("Testing ucnv_getName()...\n");
565 ucnv_getName(myConverter, &err);
566 if(U_FAILURE(err))
567 log_err("Error in getName\n");
568 else
569 {
570 log_verbose("getName o.k. %s\n", ucnv_getName(myConverter, &err));
571 }
572 if (uprv_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index]))
573 log_err("getName failed\n");
574 else
575 log_verbose("getName ok\n");
576 /*Test getName with error condition*/
577 {
578 const char* name=0;
579 err=U_ILLEGAL_ARGUMENT_ERROR;
580 log_verbose("Testing ucnv_getName with err != U_ZERO_ERROR");
581 name=ucnv_getName(myConverter, &err);
582 if(name != NULL){
583 log_err("ucnv_getName() with err != U_ZERO_ERROR is expected to fail");
584 }
585 err=U_ZERO_ERROR;
586 }
587
588
589 /*Tests ucnv_getMaxCharSize() and ucnv_getMinCharSize()*/
590
591 log_verbose("Testing ucnv_getMaxCharSize()...\n");
592 if (ucnv_getMaxCharSize(myConverter)==CodePagesMaxChars[codepage_index])
593 log_verbose("Max byte per character OK\n");
594 else
595 log_err("Max byte per character failed\n");
596
597 log_verbose("\n---Testing ucnv_getMinCharSize()...\n");
598 if (ucnv_getMinCharSize(myConverter)==CodePagesMinChars[codepage_index])
599 log_verbose("Min byte per character OK\n");
600 else
601 log_err("Min byte per character failed\n");
602
603
604 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars()*/
605 log_verbose("\n---Testing ucnv_getSubstChars...\n");
606 ii=4;
607 ucnv_getSubstChars(myConverter, myptr, &ii, &err);
608 if (ii <= 0) {
609 log_err("ucnv_getSubstChars returned a negative number %d\n", ii);
610 }
611
612 for(x=0;x<ii;x++)
613 rest = (uint16_t)(((unsigned char)rest << 8) + (unsigned char)myptr[x]);
614 if (rest==CodePagesSubstitutionChars[codepage_index])
615 log_verbose("Substitution character ok\n");
616 else
617 log_err("Substitution character failed.\n");
618
619 log_verbose("\n---Testing ucnv_setSubstChars RoundTrip Test ...\n");
620 ucnv_setSubstChars(myConverter, myptr, ii, &err);
621 if (U_FAILURE(err))
622 {
623 log_err("FAILURE! %s\n", myErrorName(err));
624 }
625 ucnv_getSubstChars(myConverter,save, &ii, &err);
626 if (U_FAILURE(err))
627 {
628 log_err("FAILURE! %s\n", myErrorName(err));
629 }
630
631 if (strncmp(save, myptr, ii))
632 log_err("Saved substitution character failed\n");
633 else
634 log_verbose("Saved substitution character ok\n");
635
636 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars() with error conditions*/
637 log_verbose("\n---Testing ucnv_getSubstChars.. with len < minBytesPerChar\n");
638 ii=1;
639 ucnv_getSubstChars(myConverter, myptr, &ii, &err);
640 if(err != U_INDEX_OUTOFBOUNDS_ERROR){
641 log_err("ucnv_getSubstChars() with len < minBytesPerChar should throw U_INDEX_OUTOFBOUNDS_ERROR Got %s\n", myErrorName(err));
642 }
643 err=U_ZERO_ERROR;
644 ii=4;
645 ucnv_getSubstChars(myConverter, myptr, &ii, &err);
646 log_verbose("\n---Testing ucnv_setSubstChars.. with len < minBytesPerChar\n");
647 ucnv_setSubstChars(myConverter, myptr, 0, &err);
648 if(err != U_ILLEGAL_ARGUMENT_ERROR){
649 log_err("ucnv_setSubstChars() with len < minBytesPerChar should throw U_ILLEGAL_ARGUMENT_ERROR Got %s\n", myErrorName(err));
650 }
651 log_verbose("\n---Testing ucnv_setSubstChars.. with err != U_ZERO_ERROR \n");
652 strcpy(myptr, "abc");
653 ucnv_setSubstChars(myConverter, myptr, ii, &err);
654 err=U_ZERO_ERROR;
655 ucnv_getSubstChars(myConverter, save, &ii, &err);
656 if(strncmp(save, myptr, ii) == 0){
657 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't set the SubstChars and just return\n");
658 }
659 log_verbose("\n---Testing ucnv_getSubstChars.. with err != U_ZERO_ERROR \n");
660 err=U_ZERO_ERROR;
661 strcpy(myptr, "abc");
662 ucnv_setSubstChars(myConverter, myptr, ii, &err);
663 err=U_ILLEGAL_ARGUMENT_ERROR;
664 ucnv_getSubstChars(myConverter, save, &ii, &err);
665 if(strncmp(save, myptr, ii) == 0){
666 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't fill the SubstChars in the buffer, it just returns\n");
667 }
668 err=U_ZERO_ERROR;
669 /*------*/
670
671 #ifdef U_ENABLE_GENERIC_ISO_2022
672 /*resetState ucnv_reset()*/
673 log_verbose("\n---Testing ucnv_reset()..\n");
674 ucnv_reset(myConverter);
675 {
676 UChar32 c;
677 const uint8_t in[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80};
678 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
679 UConverter *cnv=ucnv_open("ISO_2022", &err);
680 if(U_FAILURE(err)) {
681 log_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
682 }
683 c=ucnv_getNextUChar(cnv, &source, limit, &err);
684 if((U_FAILURE(err) || c != (UChar32)0x0031)) {
685 log_err("ucnv_getNextUChar() failed: %s\n", u_errorName(err));
686 }
687 ucnv_reset(cnv);
688 ucnv_close(cnv);
689
690 }
691 #endif
692
693 /*getDisplayName*/
694 log_verbose("\n---Testing ucnv_getDisplayName()...\n");
695 locale=CodePagesLocale[codepage_index];
696 len=0;
697 displayname=NULL;
698 disnamelen = ucnv_getDisplayName(myConverter, locale, displayname, len, &err);
699 if(err==U_BUFFER_OVERFLOW_ERROR) {
700 err=U_ZERO_ERROR;
701 displayname=(UChar*)malloc((disnamelen+1) * sizeof(UChar));
702 ucnv_getDisplayName(myConverter,locale,displayname,disnamelen+1, &err);
703 if(U_FAILURE(err)) {
704 log_err("getDisplayName failed. The error is %s\n", myErrorName(err));
705 }
706 else {
707 log_verbose(" getDisplayName o.k.\n");
708 }
709 free(displayname);
710 displayname=NULL;
711 }
712 else {
713 log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err));
714 }
715 /*test ucnv_getDiaplayName with error condition*/
716 err= U_ILLEGAL_ARGUMENT_ERROR;
717 len=ucnv_getDisplayName(myConverter,locale,NULL,0, &err);
718 if( len !=0 ){
719 log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n");
720 }
721 /*test ucnv_getDiaplayName with error condition*/
722 err=U_ZERO_ERROR;
723 len=ucnv_getDisplayName(NULL,locale,NULL,0, &err);
724 if( len !=0 || U_SUCCESS(err)){
725 log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n");
726 }
727 err=U_ZERO_ERROR;
728
729 /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/
730 ucnv_getFromUCallBack(myConverter, &MIA1, &MIA1Context);
731
732 log_verbose("\n---Testing ucnv_setFromUCallBack...\n");
733 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err);
734 if (U_FAILURE(err) || oldFromUAction != MIA1 || oldFromUContext != MIA1Context)
735 {
736 log_err("FAILURE! %s\n", myErrorName(err));
737 }
738
739 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2);
740 if (MIA1_2 != otherUnicodeAction(MIA1) || MIA1Context2 != &BOM)
741 log_err("get From UCallBack failed\n");
742 else
743 log_verbose("get From UCallBack ok\n");
744
745 log_verbose("\n---Testing getFromUCallBack Roundtrip...\n");
746 ucnv_setFromUCallBack(myConverter,MIA1, MIA1Context, &oldFromUAction, &oldFromUContext, &err);
747 if (U_FAILURE(err) || oldFromUAction != otherUnicodeAction(MIA1) || oldFromUContext != &BOM)
748 {
749 log_err("FAILURE! %s\n", myErrorName(err));
750 }
751
752 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2);
753 if (MIA1_2 != MIA1 || MIA1Context2 != MIA1Context)
754 log_err("get From UCallBack action failed\n");
755 else
756 log_verbose("get From UCallBack action ok\n");
757
758 /*testing ucnv_setToUCallBack with error conditions*/
759 err=U_ILLEGAL_ARGUMENT_ERROR;
760 log_verbose("\n---Testing setFromUCallBack. with err != U_ZERO_ERROR..\n");
761 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err);
762 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2);
763 if(MIA1_2 == otherUnicodeAction(MIA1) || MIA1Context2 == &BOM){
764 log_err("To setFromUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
765 }
766 err=U_ZERO_ERROR;
767
768
769 /*testing ucnv_setToUCallBack() and ucnv_getToUCallBack()*/
770 ucnv_getToUCallBack(myConverter, &MIA2, &MIA2Context);
771
772 log_verbose("\n---Testing setTo UCallBack...\n");
773 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), &BOM, &oldToUAction, &oldToUContext, &err);
774 if (U_FAILURE(err) || oldToUAction != MIA2 || oldToUContext != MIA2Context)
775 {
776 log_err("FAILURE! %s\n", myErrorName(err));
777 }
778
779 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2);
780 if (MIA2_2 != otherCharAction(MIA2) || MIA2Context2 != &BOM)
781 log_err("To UCallBack failed\n");
782 else
783 log_verbose("To UCallBack ok\n");
784
785 log_verbose("\n---Testing setTo UCallBack Roundtrip...\n");
786 ucnv_setToUCallBack(myConverter,MIA2, MIA2Context, &oldToUAction, &oldToUContext, &err);
787 if (U_FAILURE(err) || oldToUAction != otherCharAction(MIA2) || oldToUContext != &BOM)
788 { log_err("FAILURE! %s\n", myErrorName(err)); }
789
790 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2);
791 if (MIA2_2 != MIA2 || MIA2Context2 != MIA2Context)
792 log_err("To UCallBack failed\n");
793 else
794 log_verbose("To UCallBack ok\n");
795
796 /*testing ucnv_setToUCallBack with error conditions*/
797 err=U_ILLEGAL_ARGUMENT_ERROR;
798 log_verbose("\n---Testing setToUCallBack. with err != U_ZERO_ERROR..\n");
799 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), NULL, &oldToUAction, &oldToUContext, &err);
800 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2);
801 if (MIA2_2 == otherCharAction(MIA2) || MIA2Context2 == &BOM){
802 log_err("To setToUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
803 }
804 err=U_ZERO_ERROR;
805
806
807 /*getcodepageid testing ucnv_getCCSID() */
808 log_verbose("\n----Testing getCCSID....\n");
809 cp = ucnv_getCCSID(myConverter,&err);
810 if (U_FAILURE(err))
811 {
812 log_err("FAILURE!..... %s\n", myErrorName(err));
813 }
814 if (cp != CodePageNumberToTest[codepage_index])
815 log_err("Codepage number test failed\n");
816 else
817 log_verbose("Codepage number test OK\n");
818
819 /*testing ucnv_getCCSID() with err != U_ZERO_ERROR*/
820 err=U_ILLEGAL_ARGUMENT_ERROR;
821 if( ucnv_getCCSID(myConverter,&err) != -1){
822 log_err("ucnv_getCCSID() with err != U_ZERO_ERROR is supposed to fail\n");
823 }
824 err=U_ZERO_ERROR;
825
826 /*getCodepagePlatform testing ucnv_getPlatform()*/
827 log_verbose("\n---Testing getCodepagePlatform ..\n");
828 if (CodePagesPlatform[codepage_index]!=ucnv_getPlatform(myConverter, &err))
829 log_err("Platform codepage test failed\n");
830 else
831 log_verbose("Platform codepage test ok\n");
832
833 if (U_FAILURE(err))
834 {
835 log_err("FAILURE! %s\n", myErrorName(err));
836 }
837 /*testing ucnv_getPlatform() with err != U_ZERO_ERROR*/
838 err= U_ILLEGAL_ARGUMENT_ERROR;
839 if(ucnv_getPlatform(myConverter, &err) != UCNV_UNKNOWN){
840 log_err("ucnv)getPlatform with err != U_ZERO_ERROR is supposed to fail\n");
841 }
842 err=U_ZERO_ERROR;
843
844
845 /*Reads the BOM*/
846 {
847 // Note: gcc produces a compile warning if the return value from fread() is ignored.
848 size_t numRead = fread(&BOM, sizeof(UChar), 1, ucs_file_in);
849 (void)numRead;
850 }
851 if (BOM!=0xFEFF && BOM!=0xFFFE)
852 {
853 log_err("File Missing BOM...Bailing!\n");
854 fclose(ucs_file_in);
855 break;
856 }
857
858
859 /*Reads in the file*/
860 while(!feof(ucs_file_in)&&(i+=(int32_t)fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in)))
861 {
862 myUChar = ucs_file_buffer[i-1];
863
864 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN*/
865 }
866
867 myUChar = ucs_file_buffer[i-1];
868 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN Corner Case*/
869
870
871 /*testing ucnv_fromUChars() and ucnv_toUChars() */
872 /*uchar1---fromUChar--->output_cp_buffer --toUChar--->uchar2*/
873
874 uchar1=(UChar*)malloc(sizeof(UChar) * (i+1));
875 u_uastrcpy(uchar1,"");
876 u_strncpy(uchar1,ucs_file_buffer,i);
877 uchar1[i] = 0;
878
879 uchar3=(UChar*)malloc(sizeof(UChar)*(i+1));
880 u_uastrcpy(uchar3,"");
881 u_strncpy(uchar3,ucs_file_buffer,i);
882 uchar3[i] = 0;
883
884 /*Calls the Conversion Routine */
885 testLong1 = MAX_FILE_LEN;
886 log_verbose("\n---Testing ucnv_fromUChars()\n");
887 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err);
888 if (U_FAILURE(err))
889 {
890 log_err("\nFAILURE...%s\n", myErrorName(err));
891 }
892 else
893 log_verbose(" ucnv_fromUChars() o.k.\n");
894
895 /*test the conversion routine */
896 log_verbose("\n---Testing ucnv_toUChars()\n");
897 /*call it first time for trapping the targetcapacity and size needed to allocate memory for the buffer uchar2 */
898 targetcapacity2=0;
899 targetsize = ucnv_toUChars(myConverter,
900 NULL,
901 targetcapacity2,
902 output_cp_buffer,
903 (int32_t)strlen(output_cp_buffer),
904 &err);
905 /*if there is an buffer overflow then trap the values and pass them and make the actual call*/
906
907 if(err==U_BUFFER_OVERFLOW_ERROR)
908 {
909 err=U_ZERO_ERROR;
910 uchar2=(UChar*)malloc((targetsize+1) * sizeof(UChar));
911 targetsize = ucnv_toUChars(myConverter,
912 uchar2,
913 targetsize+1,
914 output_cp_buffer,
915 (int32_t)strlen(output_cp_buffer),
916 &err);
917
918 if(U_FAILURE(err))
919 log_err("ucnv_toUChars() FAILED %s\n", myErrorName(err));
920 else
921 log_verbose(" ucnv_toUChars() o.k.\n");
922
923 if(u_strcmp(uchar1,uchar2)!=0)
924 log_err("equality test failed with conversion routine\n");
925 }
926 else
927 {
928 log_err("ERR: calling toUChars: Didn't get U_BUFFER_OVERFLOW .. expected it.\n");
929 }
930 /*Testing ucnv_fromUChars and ucnv_toUChars with error conditions*/
931 err=U_ILLEGAL_ARGUMENT_ERROR;
932 log_verbose("\n---Testing ucnv_fromUChars() with err != U_ZERO_ERROR\n");
933 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err);
934 if (targetcapacity !=0) {
935 log_err("\nFAILURE: ucnv_fromUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
936 }
937 err=U_ZERO_ERROR;
938 log_verbose("\n---Testing ucnv_fromUChars() with converter=NULL\n");
939 targetcapacity = ucnv_fromUChars(NULL, output_cp_buffer, testLong1, uchar1, -1, &err);
940 if (targetcapacity !=0 || err != U_ILLEGAL_ARGUMENT_ERROR) {
941 log_err("\nFAILURE: ucnv_fromUChars with converter=NULL is expected to fail\n");
942 }
943 err=U_ZERO_ERROR;
944 log_verbose("\n---Testing ucnv_fromUChars() with sourceLength = 0\n");
945 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, 0, &err);
946 if (targetcapacity !=0) {
947 log_err("\nFAILURE: ucnv_fromUChars with sourceLength 0 is expected to return 0\n");
948 }
949 log_verbose("\n---Testing ucnv_fromUChars() with targetLength = 0\n");
950 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, 0, uchar1, -1, &err);
951 if (err != U_BUFFER_OVERFLOW_ERROR) {
952 log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n");
953 }
954 /*toUChars with error conditions*/
955 targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, (int32_t)strlen(output_cp_buffer), &err);
956 if(targetsize != 0){
957 log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
958 }
959 err=U_ZERO_ERROR;
960 targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, (int32_t)strlen(output_cp_buffer), &err);
961 if(targetsize != 0 || err != U_ILLEGAL_ARGUMENT_ERROR){
962 log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n");
963 }
964 err=U_ZERO_ERROR;
965 targetsize = ucnv_toUChars(myConverter, uchar2, 0, output_cp_buffer, 0, &err);
966 if (targetsize !=0) {
967 log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n");
968 }
969 targetcapacity2=0;
970 targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, (int32_t)strlen(output_cp_buffer), &err);
971 if (err != U_STRING_NOT_TERMINATED_WARNING) {
972 log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n",
973 u_errorName(err));
974 }
975 err=U_ZERO_ERROR;
976 /*-----*/
977
978
979 /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */
980 /*Clean up re-usable vars*/
981 log_verbose("Testing ucnv_fromUnicode().....\n");
982 tmp_ucs_buf=ucs_file_buffer_use;
983 ucnv_fromUnicode(myConverter, &mytarget_1,
984 mytarget + MAX_FILE_LEN,
985 &tmp_ucs_buf,
986 ucs_file_buffer_use+i,
987 NULL,
988 true,
989 &err);
990 consumedUni = (UChar*)tmp_consumedUni;
991 (void)consumedUni; /* Suppress set but not used warning. */
992
993 if (U_FAILURE(err))
994 {
995 log_err("FAILURE! %s\n", myErrorName(err));
996 }
997 else
998 log_verbose("ucnv_fromUnicode() o.k.\n");
999
1000 /*Uni1 ----ToUnicode----> Cp2 ----FromUnicode---->Uni3 */
1001 log_verbose("Testing ucnv_toUnicode().....\n");
1002 tmp_mytarget_use=mytarget_use;
1003 tmp_consumed = consumed;
1004 ucnv_toUnicode(myConverter, &my_ucs_file_buffer_1,
1005 my_ucs_file_buffer + MAX_FILE_LEN,
1006 &tmp_mytarget_use,
1007 mytarget_use + (mytarget_1 - mytarget),
1008 NULL,
1009 false,
1010 &err);
1011 consumed = (char*)tmp_consumed;
1012 if (U_FAILURE(err))
1013 {
1014 log_err("FAILURE! %s\n", myErrorName(err));
1015 }
1016 else
1017 log_verbose("ucnv_toUnicode() o.k.\n");
1018
1019
1020 log_verbose("\n---Testing RoundTrip ...\n");
1021
1022
1023 u_strncpy(uchar3, my_ucs_file_buffer,i);
1024 uchar3[i] = 0;
1025
1026 if(u_strcmp(uchar1,uchar3)==0)
1027 log_verbose("Equality test o.k.\n");
1028 else
1029 log_err("Equality test failed\n");
1030
1031 /*sanity compare */
1032 if(uchar2 == NULL)
1033 {
1034 log_err("uchar2 was NULL (ccapitst.c line %d), couldn't do sanity check\n", __LINE__);
1035 }
1036 else
1037 {
1038 if(u_strcmp(uchar2, uchar3)==0)
1039 log_verbose("Equality test o.k.\n");
1040 else
1041 log_err("Equality test failed\n");
1042 }
1043
1044 fclose(ucs_file_in);
1045 ucnv_close(myConverter);
1046 if (uchar1 != 0) free(uchar1);
1047 if (uchar2 != 0) free(uchar2);
1048 if (uchar3 != 0) free(uchar3);
1049 }
1050
1051 free((void*)mytarget);
1052 free((void*)output_cp_buffer);
1053 free((void*)ucs_file_buffer);
1054 free((void*)my_ucs_file_buffer);
1055 #endif
1056 }
1057
1058 #if !UCONFIG_NO_LEGACY_CONVERSION
otherUnicodeAction(UConverterFromUCallback MIA)1059 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA)
1060 {
1061 return (MIA==(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP)?(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_SUBSTITUTE:(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP;
1062 }
1063
otherCharAction(UConverterToUCallback MIA)1064 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA)
1065 {
1066 return (MIA==(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP)?(UConverterToUCallback)UCNV_TO_U_CALLBACK_SUBSTITUTE:(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP;
1067 }
1068 #endif
1069
TestFlushCache(void)1070 static void TestFlushCache(void) {
1071 #if !UCONFIG_NO_LEGACY_CONVERSION
1072 UErrorCode err = U_ZERO_ERROR;
1073 UConverter* someConverters[5];
1074 int flushCount = 0;
1075
1076 /* flush the converter cache to get a consistent state before the flushing is tested */
1077 ucnv_flushCache();
1078
1079 /*Testing ucnv_open()*/
1080 /* Note: These converters have been chosen because they do NOT
1081 encode the Latin characters (U+0041, ...), and therefore are
1082 highly unlikely to be chosen as system default codepages */
1083
1084 someConverters[0] = ucnv_open("ibm-1047", &err);
1085 if (U_FAILURE(err)) {
1086 log_data_err("FAILURE! %s\n", myErrorName(err));
1087 }
1088
1089 someConverters[1] = ucnv_open("ibm-1047", &err);
1090 if (U_FAILURE(err)) {
1091 log_data_err("FAILURE! %s\n", myErrorName(err));
1092 }
1093
1094 someConverters[2] = ucnv_open("ibm-1047", &err);
1095 if (U_FAILURE(err)) {
1096 log_data_err("FAILURE! %s\n", myErrorName(err));
1097 }
1098
1099 someConverters[3] = ucnv_open("gb18030", &err);
1100 if (U_FAILURE(err)) {
1101 log_data_err("FAILURE! %s\n", myErrorName(err));
1102 }
1103
1104 someConverters[4] = ucnv_open("ibm-954", &err);
1105 if (U_FAILURE(err)) {
1106 log_data_err("FAILURE! %s\n", myErrorName(err));
1107 }
1108
1109
1110 /* Testing ucnv_flushCache() */
1111 log_verbose("\n---Testing ucnv_flushCache...\n");
1112 if ((flushCount=ucnv_flushCache())==0)
1113 log_verbose("Flush cache ok\n");
1114 else
1115 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount);
1116
1117 /*testing ucnv_close() and ucnv_flushCache() */
1118 ucnv_close(someConverters[0]);
1119 ucnv_close(someConverters[1]);
1120
1121 if ((flushCount=ucnv_flushCache())==0)
1122 log_verbose("Flush cache ok\n");
1123 else
1124 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount);
1125
1126 ucnv_close(someConverters[2]);
1127 ucnv_close(someConverters[3]);
1128
1129 if ((flushCount=ucnv_flushCache())==2)
1130 log_verbose("Flush cache ok\n"); /*because first, second and third are same */
1131 else
1132 log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n",
1133 __LINE__,
1134 flushCount);
1135
1136 ucnv_close(someConverters[4]);
1137 if ( (flushCount=ucnv_flushCache())==1)
1138 log_verbose("Flush cache ok\n");
1139 else
1140 log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__, flushCount);
1141 #endif
1142 }
1143
1144 /**
1145 * Test the converter alias API, specifically the fuzzy matching of
1146 * alias names and the alias table integrity. Make sure each
1147 * converter has at least one alias (itself), and that its listed
1148 * aliases map back to itself. Check some hard-coded UTF-8 and
1149 * ISO_2022 aliases to make sure they work.
1150 */
TestAlias(void)1151 static void TestAlias(void) {
1152 int32_t i, ncnv;
1153 UErrorCode status = U_ZERO_ERROR;
1154
1155 /* Predetermined aliases that we expect to map back to ISO_2022
1156 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */
1157 const char* ISO_2022_NAMES[] =
1158 {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2",
1159 "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"};
1160 int32_t ISO_2022_NAMES_LENGTH = UPRV_LENGTHOF(ISO_2022_NAMES);
1161 const char *UTF8_NAMES[] =
1162 { "UTF-8", "utf-8", "utf8", "ibm-1208",
1163 "utf_8", "ibm1208", "cp1208" };
1164 int32_t UTF8_NAMES_LENGTH = UPRV_LENGTHOF(UTF8_NAMES);
1165
1166 struct {
1167 const char *name;
1168 const char *alias;
1169 } CONVERTERS_NAMES[] = {
1170 { "UTF-32BE", "UTF32_BigEndian" },
1171 { "UTF-32LE", "UTF32_LittleEndian" },
1172 { "UTF-32", "ISO-10646-UCS-4" },
1173 { "UTF32_PlatformEndian", "UTF32_PlatformEndian" },
1174 { "UTF-32", "ucs-4" }
1175 };
1176 int32_t CONVERTERS_NAMES_LENGTH = UPRV_LENGTHOF(CONVERTERS_NAMES);
1177
1178 /* When there are bugs in gencnval or in ucnv_io, converters can
1179 appear to have no aliases. */
1180 ncnv = ucnv_countAvailable();
1181 log_verbose("%d converters\n", ncnv);
1182 for (i=0; i<ncnv; ++i) {
1183 const char *name = ucnv_getAvailableName(i);
1184 const char *alias0;
1185 uint16_t na = ucnv_countAliases(name, &status);
1186 uint16_t j;
1187 UConverter *cnv;
1188
1189 if (na == 0) {
1190 log_err("FAIL: Converter \"%s\" (i=%d)"
1191 " has no aliases; expect at least one\n",
1192 name, i);
1193 continue;
1194 }
1195 cnv = ucnv_open(name, &status);
1196 if (U_FAILURE(status)) {
1197 log_data_err("FAIL: Converter \"%s\" (i=%d)"
1198 " can't be opened.\n",
1199 name, i);
1200 }
1201 else {
1202 if (strcmp(ucnv_getName(cnv, &status), name) != 0
1203 && (strstr(name, "PlatformEndian") == 0 && strstr(name, "OppositeEndian") == 0)) {
1204 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. "
1205 "They should be the same\n",
1206 name, ucnv_getName(cnv, &status));
1207 }
1208 }
1209 ucnv_close(cnv);
1210
1211 status = U_ZERO_ERROR;
1212 alias0 = ucnv_getAlias(name, 0, &status);
1213 for (j=1; j<na; ++j) {
1214 const char *alias;
1215 /* Make sure each alias maps back to the the same list of
1216 aliases. Assume that if alias 0 is the same, the whole
1217 list is the same (this should always be true). */
1218 const char *mapBack;
1219
1220 status = U_ZERO_ERROR;
1221 alias = ucnv_getAlias(name, j, &status);
1222 if (status == U_AMBIGUOUS_ALIAS_WARNING) {
1223 log_err("FAIL: Converter \"%s\"is ambiguous\n", name);
1224 }
1225
1226 if (alias == NULL) {
1227 log_err("FAIL: Converter \"%s\" -> "
1228 "alias[%d]=NULL\n",
1229 name, j);
1230 continue;
1231 }
1232
1233 mapBack = ucnv_getAlias(alias, 0, &status);
1234
1235 if (mapBack == NULL) {
1236 log_err("FAIL: Converter \"%s\" -> "
1237 "alias[%d]=\"%s\" -> "
1238 "alias[0]=NULL, exp. \"%s\"\n",
1239 name, j, alias, alias0);
1240 continue;
1241 }
1242
1243 if (0 != strcmp(alias0, mapBack)) {
1244 int32_t idx;
1245 UBool foundAlias = false;
1246 if (status == U_AMBIGUOUS_ALIAS_WARNING) {
1247 /* Make sure that we only get this mismapping when there is
1248 an ambiguous alias, and the other converter has this alias too. */
1249 for (idx = 0; idx < ucnv_countAliases(mapBack, &status); idx++) {
1250 if (strcmp(ucnv_getAlias(mapBack, (uint16_t)idx, &status), alias) == 0) {
1251 foundAlias = true;
1252 break;
1253 }
1254 }
1255 }
1256 /* else not ambiguous, and this is a real problem. foundAlias = false */
1257
1258 if (!foundAlias) {
1259 log_err("FAIL: Converter \"%s\" -> "
1260 "alias[%d]=\"%s\" -> "
1261 "alias[0]=\"%s\", exp. \"%s\"\n",
1262 name, j, alias, mapBack, alias0);
1263 }
1264 }
1265 }
1266 }
1267
1268
1269 /* Check a list of predetermined aliases that we expect to map
1270 * back to ISO_2022 and UTF-8. */
1271 for (i=1; i<ISO_2022_NAMES_LENGTH; ++i) {
1272 const char* mapBack = ucnv_getAlias(ISO_2022_NAMES[i], 0, &status);
1273 if(!mapBack) {
1274 log_data_err("Couldn't get alias for %s. You probably have no data\n", ISO_2022_NAMES[i]);
1275 continue;
1276 }
1277 if (0 != strcmp(mapBack, ISO_2022_NAMES[0])) {
1278 log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n",
1279 ISO_2022_NAMES[i], mapBack);
1280 }
1281 }
1282
1283
1284 for (i=1; i<UTF8_NAMES_LENGTH; ++i) {
1285 const char* mapBack = ucnv_getAlias(UTF8_NAMES[i], 0, &status);
1286 if(!mapBack) {
1287 log_data_err("Couldn't get alias for %s. You probably have no data\n", UTF8_NAMES[i]);
1288 continue;
1289 }
1290 if (mapBack && 0 != strcmp(mapBack, UTF8_NAMES[0])) {
1291 log_err("FAIL: \"%s\" -> \"%s\", expect UTF-8\n",
1292 UTF8_NAMES[i], mapBack);
1293 }
1294 }
1295
1296 /*
1297 * Check a list of predetermined aliases that we expect to map
1298 * back to predermined converter names.
1299 */
1300
1301 for (i = 0; i < CONVERTERS_NAMES_LENGTH; ++i) {
1302 const char* mapBack = ucnv_getAlias(CONVERTERS_NAMES[i].alias, 0, &status);
1303 if(!mapBack) {
1304 log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i].name);
1305 continue;
1306 }
1307 if (0 != strcmp(mapBack, CONVERTERS_NAMES[i].name)) {
1308 log_err("FAIL: \"%s\" -> \"%s\", expect %s\n",
1309 CONVERTERS_NAMES[i].alias, mapBack, CONVERTERS_NAMES[i].name);
1310 }
1311 }
1312
1313 }
1314
TestDuplicateAlias(void)1315 static void TestDuplicateAlias(void) {
1316 const char *alias;
1317 UErrorCode status = U_ZERO_ERROR;
1318
1319 status = U_ZERO_ERROR;
1320 alias = ucnv_getStandardName("Shift_JIS", "IBM", &status);
1321 if (alias == NULL || strcmp(alias, "ibm-943") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) {
1322 log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias);
1323 }
1324 status = U_ZERO_ERROR;
1325 alias = ucnv_getStandardName("ibm-943", "IANA", &status);
1326 if (alias == NULL || strcmp(alias, "Shift_JIS") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) {
1327 log_data_err("FAIL: Didn't get Shift_JIS for ibm-943 {IANA}. Got %s\n", alias);
1328 }
1329 status = U_ZERO_ERROR;
1330 alias = ucnv_getStandardName("ibm-943_P130-2000", "IANA", &status);
1331 if (alias != NULL || status == U_AMBIGUOUS_ALIAS_WARNING) {
1332 log_data_err("FAIL: Didn't get NULL for ibm-943 {IANA}. Got %s\n", alias);
1333 }
1334 }
1335
1336
1337 /* Test safe clone callback */
1338
TSCC_nextSerial(void)1339 static uint32_t TSCC_nextSerial(void)
1340 {
1341 static uint32_t n = 1;
1342
1343 return (n++);
1344 }
1345
1346 typedef struct
1347 {
1348 uint32_t magic; /* 0xC0FFEE to identify that the object is OK */
1349 uint32_t serial; /* minted from nextSerial, above */
1350 UBool wasClosed; /* close happened on the object */
1351 } TSCCContext;
1352
TSCC_clone(TSCCContext * ctx)1353 static TSCCContext *TSCC_clone(TSCCContext *ctx)
1354 {
1355 TSCCContext *newCtx = (TSCCContext *)malloc(sizeof(TSCCContext));
1356
1357 newCtx->serial = TSCC_nextSerial();
1358 newCtx->wasClosed = 0;
1359 newCtx->magic = 0xC0FFEE;
1360
1361 log_verbose("TSCC_clone: %p:%d -> new context %p:%d\n", ctx, ctx->serial, newCtx, newCtx->serial);
1362
1363 return newCtx;
1364 }
1365
1366 #if !UCONFIG_NO_LEGACY_CONVERSION
TSCC_fromU(const void * context,UConverterFromUnicodeArgs * fromUArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * err)1367 static void TSCC_fromU(const void *context,
1368 UConverterFromUnicodeArgs *fromUArgs,
1369 const UChar* codeUnits,
1370 int32_t length,
1371 UChar32 codePoint,
1372 UConverterCallbackReason reason,
1373 UErrorCode * err)
1374 {
1375 // suppress compiler warnings about unused variables
1376 (void)codeUnits;
1377 (void)length;
1378 (void)codePoint;
1379
1380 TSCCContext *ctx = (TSCCContext*)context;
1381 UConverterFromUCallback junkFrom;
1382
1383 log_verbose("TSCC_fromU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, fromUArgs->converter);
1384
1385 if(ctx->magic != 0xC0FFEE) {
1386 log_err("TSCC_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic);
1387 return;
1388 }
1389
1390 if(reason == UCNV_CLONE) {
1391 UErrorCode subErr = U_ZERO_ERROR;
1392 TSCCContext *newCtx;
1393 TSCCContext *junkCtx;
1394 TSCCContext **pjunkCtx = &junkCtx;
1395
1396 /* "recreate" it */
1397 log_verbose("TSCC_fromU: cloning..\n");
1398 newCtx = TSCC_clone(ctx);
1399
1400 if(newCtx == NULL) {
1401 log_err("TSCC_fromU: internal clone failed on %p\n", ctx);
1402 }
1403
1404 /* now, SET it */
1405 ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)pjunkCtx);
1406 ucnv_setFromUCallBack(fromUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr);
1407
1408 if(U_FAILURE(subErr)) {
1409 *err = subErr;
1410 }
1411 }
1412
1413 if(reason == UCNV_CLOSE) {
1414 log_verbose("TSCC_fromU: Context %p:%d closing\n", ctx, ctx->serial);
1415 ctx->wasClosed = true;
1416 }
1417 }
1418
TSCC_toU(const void * context,UConverterToUnicodeArgs * toUArgs,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * err)1419 static void TSCC_toU(const void *context,
1420 UConverterToUnicodeArgs *toUArgs,
1421 const char* codeUnits,
1422 int32_t length,
1423 UConverterCallbackReason reason,
1424 UErrorCode * err)
1425 {
1426 // suppress compiler warnings about unused variables
1427 (void)codeUnits;
1428 (void)length;
1429
1430 TSCCContext *ctx = (TSCCContext*)context;
1431 UConverterToUCallback junkFrom;
1432
1433 log_verbose("TSCC_toU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, toUArgs->converter);
1434
1435 if(ctx->magic != 0xC0FFEE) {
1436 log_err("TSCC_toU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic);
1437 return;
1438 }
1439
1440 if(reason == UCNV_CLONE) {
1441 UErrorCode subErr = U_ZERO_ERROR;
1442 TSCCContext *newCtx;
1443 TSCCContext *junkCtx;
1444 TSCCContext **pjunkCtx = &junkCtx;
1445
1446 /* "recreate" it */
1447 log_verbose("TSCC_toU: cloning..\n");
1448 newCtx = TSCC_clone(ctx);
1449
1450 if(newCtx == NULL) {
1451 log_err("TSCC_toU: internal clone failed on %p\n", ctx);
1452 }
1453
1454 /* now, SET it */
1455 ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)pjunkCtx);
1456 ucnv_setToUCallBack(toUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr);
1457
1458 if(U_FAILURE(subErr)) {
1459 *err = subErr;
1460 }
1461 }
1462
1463 if(reason == UCNV_CLOSE) {
1464 log_verbose("TSCC_toU: Context %p:%d closing\n", ctx, ctx->serial);
1465 ctx->wasClosed = true;
1466 }
1467 }
1468
TSCC_init(TSCCContext * q)1469 static void TSCC_init(TSCCContext *q)
1470 {
1471 q->magic = 0xC0FFEE;
1472 q->serial = TSCC_nextSerial();
1473 q->wasClosed = 0;
1474 }
1475
TSCC_print_log(TSCCContext * q,const char * name)1476 static void TSCC_print_log(TSCCContext *q, const char *name)
1477 {
1478 if(q==NULL) {
1479 log_verbose("TSCContext: %s is NULL!!\n", name);
1480 } else {
1481 if(q->magic != 0xC0FFEE) {
1482 log_err("TSCCContext: %p:%d's magic is %x, supposed to be 0xC0FFEE\n",
1483 q,q->serial, q->magic);
1484 }
1485 log_verbose("TSCCContext %p:%d=%s - magic %x, %s\n",
1486 q, q->serial, name, q->magic, q->wasClosed?"CLOSED":"open");
1487 }
1488 }
1489
TestConvertSafeCloneCallback(void)1490 static void TestConvertSafeCloneCallback(void)
1491 {
1492 UErrorCode err = U_ZERO_ERROR;
1493 TSCCContext from1, to1;
1494 TSCCContext *from2, *from3, *to2, *to3;
1495 TSCCContext **pfrom2 = &from2, **pfrom3 = &from3, **pto2 = &to2, **pto3 = &to3;
1496 char hunk[8192];
1497 int32_t hunkSize = 8192;
1498 UConverterFromUCallback junkFrom;
1499 UConverterToUCallback junkTo;
1500 UConverter *conv1, *conv2 = NULL;
1501
1502 conv1 = ucnv_open("iso-8859-3", &err);
1503
1504 if(U_FAILURE(err)) {
1505 log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err));
1506 return;
1507 }
1508
1509 log_verbose("Opened conv1=%p\n", conv1);
1510
1511 TSCC_init(&from1);
1512 TSCC_init(&to1);
1513
1514 TSCC_print_log(&from1, "from1");
1515 TSCC_print_log(&to1, "to1");
1516
1517 ucnv_setFromUCallBack(conv1, TSCC_fromU, &from1, NULL, NULL, &err);
1518 log_verbose("Set from1 on conv1\n");
1519 TSCC_print_log(&from1, "from1");
1520
1521 ucnv_setToUCallBack(conv1, TSCC_toU, &to1, NULL, NULL, &err);
1522 log_verbose("Set to1 on conv1\n");
1523 TSCC_print_log(&to1, "to1");
1524
1525 conv2 = ucnv_safeClone(conv1, hunk, &hunkSize, &err);
1526 if(U_FAILURE(err)) {
1527 log_err("safeClone failed: %s\n", u_errorName(err));
1528 return;
1529 }
1530 log_verbose("Cloned to conv2=%p.\n", conv2);
1531
1532 /********** from *********************/
1533 ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)pfrom2);
1534 ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)pfrom3);
1535
1536 TSCC_print_log(from2, "from2");
1537 TSCC_print_log(from3, "from3(==from1)");
1538
1539 if(from2 == NULL) {
1540 log_err("FAIL! from2 is null \n");
1541 return;
1542 }
1543
1544 if(from3 == NULL) {
1545 log_err("FAIL! from3 is null \n");
1546 return;
1547 }
1548
1549 if(from3 != (&from1) ) {
1550 log_err("FAIL! conv1's FROM context changed!\n");
1551 }
1552
1553 if(from2 == (&from1) ) {
1554 log_err("FAIL! conv1's FROM context is the same as conv2's!\n");
1555 }
1556
1557 if(from1.wasClosed) {
1558 log_err("FAIL! from1 is closed \n");
1559 }
1560
1561 if(from2->wasClosed) {
1562 log_err("FAIL! from2 was closed\n");
1563 }
1564
1565 /********** to *********************/
1566 ucnv_getToUCallBack(conv2, &junkTo, (const void**)pto2);
1567 ucnv_getToUCallBack(conv1, &junkTo, (const void**)pto3);
1568
1569 TSCC_print_log(to2, "to2");
1570 TSCC_print_log(to3, "to3(==to1)");
1571
1572 if(to2 == NULL) {
1573 log_err("FAIL! to2 is null \n");
1574 return;
1575 }
1576
1577 if(to3 == NULL) {
1578 log_err("FAIL! to3 is null \n");
1579 return;
1580 }
1581
1582 if(to3 != (&to1) ) {
1583 log_err("FAIL! conv1's TO context changed!\n");
1584 }
1585
1586 if(to2 == (&to1) ) {
1587 log_err("FAIL! conv1's TO context is the same as conv2's!\n");
1588 }
1589
1590 if(to1.wasClosed) {
1591 log_err("FAIL! to1 is closed \n");
1592 }
1593
1594 if(to2->wasClosed) {
1595 log_err("FAIL! to2 was closed\n");
1596 }
1597
1598 /*************************************/
1599
1600 ucnv_close(conv1);
1601 log_verbose("ucnv_closed (conv1)\n");
1602 TSCC_print_log(&from1, "from1");
1603 TSCC_print_log(from2, "from2");
1604 TSCC_print_log(&to1, "to1");
1605 TSCC_print_log(to2, "to2");
1606
1607 if(from1.wasClosed == false) {
1608 log_err("FAIL! from1 is NOT closed \n");
1609 }
1610
1611 if(from2->wasClosed) {
1612 log_err("FAIL! from2 was closed\n");
1613 }
1614
1615 if(to1.wasClosed == false) {
1616 log_err("FAIL! to1 is NOT closed \n");
1617 }
1618
1619 if(to2->wasClosed) {
1620 log_err("FAIL! to2 was closed\n");
1621 }
1622
1623 ucnv_close(conv2);
1624 log_verbose("ucnv_closed (conv2)\n");
1625
1626 TSCC_print_log(&from1, "from1");
1627 TSCC_print_log(from2, "from2");
1628
1629 if(from1.wasClosed == false) {
1630 log_err("FAIL! from1 is NOT closed \n");
1631 }
1632
1633 if(from2->wasClosed == false) {
1634 log_err("FAIL! from2 was NOT closed\n");
1635 }
1636
1637 TSCC_print_log(&to1, "to1");
1638 TSCC_print_log(to2, "to2");
1639
1640 if(to1.wasClosed == false) {
1641 log_err("FAIL! to1 is NOT closed \n");
1642 }
1643
1644 if(to2->wasClosed == false) {
1645 log_err("FAIL! to2 was NOT closed\n");
1646 }
1647
1648 if(to2 != (&to1)) {
1649 free(to2); /* to1 is stack based */
1650 }
1651 if(from2 != (&from1)) {
1652 free(from2); /* from1 is stack based */
1653 }
1654 }
1655 #endif
1656
1657 static UBool
containsAnyOtherByte(uint8_t * p,int32_t length,uint8_t b)1658 containsAnyOtherByte(uint8_t *p, int32_t length, uint8_t b) {
1659 while(length>0) {
1660 if(*p!=b) {
1661 return true;
1662 }
1663 ++p;
1664 --length;
1665 }
1666 return false;
1667 }
1668
TestConvertSafeClone(void)1669 static void TestConvertSafeClone(void)
1670 {
1671 /* one 'regular' & all the 'private stateful' converters */
1672 static const char *const names[] = {
1673 #if !UCONFIG_NO_LEGACY_CONVERSION
1674 "ibm-1047",
1675 "ISO_2022,locale=zh,version=1",
1676 #endif
1677 "SCSU",
1678 #if !UCONFIG_NO_LEGACY_CONVERSION
1679 "HZ",
1680 "lmbcs",
1681 "ISCII,version=0",
1682 "ISO_2022,locale=kr,version=1",
1683 "ISO_2022,locale=jp,version=2",
1684 #endif
1685 "BOCU-1",
1686 "UTF-7",
1687 #if !UCONFIG_NO_LEGACY_CONVERSION
1688 "IMAP-mailbox-name",
1689 "ibm-1047-s390"
1690 #else
1691 "IMAP=mailbox-name"
1692 #endif
1693 };
1694
1695 /* store the actual sizes of each converter */
1696 int32_t actualSizes[UPRV_LENGTHOF(names)];
1697
1698 static const int32_t bufferSizes[] = {
1699 U_CNV_SAFECLONE_BUFFERSIZE,
1700 (int32_t)(3*sizeof(UConverter))/2, /* 1.5*sizeof(UConverter) */
1701 (int32_t)sizeof(UConverter)/2 /* 0.5*sizeof(UConverter) */
1702 };
1703
1704 char charBuffer[21]; /* Leave at an odd number for alignment testing */
1705 uint8_t buffer[3] [U_CNV_SAFECLONE_BUFFERSIZE];
1706 int32_t bufferSize, maxBufferSize;
1707 const char *maxName;
1708 UConverter * cnv, *cnv2;
1709 UErrorCode err;
1710
1711 char *pCharBuffer;
1712 const char *pConstCharBuffer;
1713 const char *charBufferLimit = charBuffer + UPRV_LENGTHOF(charBuffer);
1714 UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */
1715 UChar uniCharBuffer[20];
1716 char charSourceBuffer[] = { 0x1b, 0x24, 0x42 };
1717 const char *pCharSource = charSourceBuffer;
1718 const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer);
1719 UChar *pUCharTarget = uniCharBuffer;
1720 UChar *pUCharTargetLimit = uniCharBuffer + UPRV_LENGTHOF(uniCharBuffer);
1721 const UChar * pUniBuffer;
1722 const UChar *uniBufferLimit = uniBuffer + UPRV_LENGTHOF(uniBuffer);
1723 int32_t idx, j;
1724
1725 err = U_ZERO_ERROR;
1726 cnv = ucnv_open(names[0], &err);
1727 if(U_SUCCESS(err)) {
1728 /* Check the various error & informational states: */
1729
1730 /* Null status - just returns NULL */
1731 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
1732 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, NULL))
1733 {
1734 log_err("FAIL: Cloned converter failed to deal correctly with null status\n");
1735 }
1736 /* error status - should return 0 & keep error the same */
1737 err = U_MEMORY_ALLOCATION_ERROR;
1738 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR)
1739 {
1740 log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n");
1741 }
1742 err = U_ZERO_ERROR;
1743
1744 /* Null buffer size pointer is ok */
1745 if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], NULL, &err)) || U_FAILURE(err))
1746 {
1747 log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n");
1748 }
1749 ucnv_close(cnv2);
1750 err = U_ZERO_ERROR;
1751
1752 /* buffer size pointer is 0 - fill in pbufferSize with a size */
1753 bufferSize = 0;
1754 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0)
1755 {
1756 log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n");
1757 }
1758 /* Verify our define is large enough */
1759 if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize)
1760 {
1761 log_err("FAIL: Pre-calculated buffer size is too small\n");
1762 }
1763 /* Verify we can use this run-time calculated size */
1764 if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err))
1765 {
1766 log_err("FAIL: Converter can't be cloned with run-time size\n");
1767 }
1768 if (cnv2) {
1769 ucnv_close(cnv2);
1770 }
1771
1772 /* size one byte too small - should allocate & let us know */
1773 --bufferSize;
1774 if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
1775 {
1776 log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n");
1777 }
1778 if (cnv2) {
1779 ucnv_close(cnv2);
1780 }
1781
1782 err = U_ZERO_ERROR;
1783 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
1784
1785 /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */
1786 if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
1787 {
1788 log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n");
1789 }
1790 if (cnv2) {
1791 ucnv_close(cnv2);
1792 }
1793
1794 err = U_ZERO_ERROR;
1795
1796 /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
1797 if (NULL != ucnv_safeClone(NULL, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR)
1798 {
1799 log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n");
1800 }
1801
1802 ucnv_close(cnv);
1803 }
1804
1805 maxBufferSize = 0;
1806 maxName = "";
1807
1808 /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/
1809
1810 for(j = 0; j < UPRV_LENGTHOF(bufferSizes); ++j) {
1811 for (idx = 0; idx < UPRV_LENGTHOF(names); idx++)
1812 {
1813 err = U_ZERO_ERROR;
1814 cnv = ucnv_open(names[idx], &err);
1815 if(U_FAILURE(err)) {
1816 log_data_err("ucnv_open(\"%s\") failed - %s\n", names[idx], u_errorName(err));
1817 continue;
1818 }
1819
1820 if(j == 0) {
1821 /* preflight to get maxBufferSize */
1822 actualSizes[idx] = 0;
1823 ucnv_safeClone(cnv, NULL, &actualSizes[idx], &err);
1824 if(actualSizes[idx] > maxBufferSize) {
1825 maxBufferSize = actualSizes[idx];
1826 maxName = names[idx];
1827 }
1828 }
1829
1830 memset(buffer, 0xaa, sizeof(buffer));
1831
1832 bufferSize = bufferSizes[j];
1833 cnv2 = ucnv_safeClone(cnv, buffer[1], &bufferSize, &err);
1834
1835 /* close the original immediately to make sure that the clone works by itself */
1836 ucnv_close(cnv);
1837
1838 if( actualSizes[idx] <= (bufferSizes[j] - (int32_t)alignof(UConverter)) &&
1839 err == U_SAFECLONE_ALLOCATED_WARNING
1840 ) {
1841 log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[idx]);
1842 }
1843
1844 /* check if the clone function overwrote any bytes that it is not supposed to touch */
1845 if(bufferSize <= bufferSizes[j]) {
1846 /* used the stack buffer */
1847 if( containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer[0]), 0xaa) ||
1848 containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa)
1849 ) {
1850 log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n",
1851 names[idx], bufferSize, bufferSizes[j]);
1852 }
1853 } else {
1854 /* heap-allocated the clone */
1855 if(containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)) {
1856 log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n",
1857 names[idx], bufferSize, bufferSizes[j]);
1858 }
1859 }
1860
1861 pCharBuffer = charBuffer;
1862 pUniBuffer = uniBuffer;
1863
1864 ucnv_fromUnicode(cnv2,
1865 &pCharBuffer,
1866 charBufferLimit,
1867 &pUniBuffer,
1868 uniBufferLimit,
1869 NULL,
1870 true,
1871 &err);
1872 if(U_FAILURE(err)){
1873 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err));
1874 }
1875 ucnv_toUnicode(cnv2,
1876 &pUCharTarget,
1877 pUCharTargetLimit,
1878 &pCharSource,
1879 pCharSourceLimit,
1880 NULL,
1881 true,
1882 &err
1883 );
1884
1885 if(U_FAILURE(err)){
1886 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err));
1887 }
1888
1889 pConstCharBuffer = charBuffer;
1890 if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err))
1891 {
1892 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err));
1893 }
1894 ucnv_close(cnv2);
1895 }
1896 }
1897
1898 log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1899 sizeof(UConverter), maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE);
1900 if(maxBufferSize > U_CNV_SAFECLONE_BUFFERSIZE) {
1901 log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1902 maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE);
1903 }
1904 }
1905
1906
TestConvertClone(void)1907 static void TestConvertClone(void)
1908 {
1909 /* one 'regular' & all the 'private stateful' converters */
1910 static const char *const names[] = {
1911 #if !UCONFIG_NO_LEGACY_CONVERSION
1912 "ibm-1047",
1913 "ISO_2022,locale=zh,version=1",
1914 #endif
1915 "SCSU",
1916 #if !UCONFIG_NO_LEGACY_CONVERSION
1917 "HZ",
1918 "lmbcs",
1919 "ISCII,version=0",
1920 "ISO_2022,locale=kr,version=1",
1921 "ISO_2022,locale=jp,version=2",
1922 #endif
1923 "BOCU-1",
1924 "UTF-7",
1925 #if !UCONFIG_NO_LEGACY_CONVERSION
1926 "IMAP-mailbox-name",
1927 "ibm-1047-s390"
1928 #else
1929 "IMAP=mailbox-name"
1930 #endif
1931 };
1932
1933 char charBuffer[21]; /* Leave at an odd number for alignment testing */
1934 UConverter * cnv, *cnv2;
1935 UErrorCode err;
1936
1937 char *pCharBuffer;
1938 const char *pConstCharBuffer;
1939 const char *charBufferLimit = charBuffer + UPRV_LENGTHOF(charBuffer);
1940 UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */
1941 UChar uniCharBuffer[20];
1942 char charSourceBuffer[] = { 0x1b, 0x24, 0x42 };
1943 const char *pCharSource = charSourceBuffer;
1944 const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer);
1945 UChar *pUCharTarget = uniCharBuffer;
1946 UChar *pUCharTargetLimit = uniCharBuffer + UPRV_LENGTHOF(uniCharBuffer);
1947 const UChar * pUniBuffer;
1948 const UChar *uniBufferLimit = uniBuffer + UPRV_LENGTHOF(uniBuffer);
1949 int32_t idx;
1950
1951 err = U_ZERO_ERROR;
1952 cnv = ucnv_open(names[0], &err);
1953 if(U_SUCCESS(err)) {
1954 /* Check the various error & informational states: */
1955
1956 /* Null status - just returns NULL */
1957 if (NULL != ucnv_clone(cnv, NULL))
1958 {
1959 log_err("FAIL: Cloned converter failed to deal correctly with null status\n");
1960 }
1961 /* error status - should return 0 & keep error the same */
1962 err = U_MEMORY_ALLOCATION_ERROR;
1963 if (NULL != ucnv_clone(cnv, &err) || err != U_MEMORY_ALLOCATION_ERROR)
1964 {
1965 log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n");
1966 }
1967 err = U_ZERO_ERROR;
1968
1969 /* Null buffer size pointer is ok */
1970 if (NULL == (cnv2 = ucnv_clone(cnv, &err)) || U_FAILURE(err))
1971 {
1972 log_err("FAIL: Failed to clone.\n");
1973 }
1974 ucnv_close(cnv2);
1975 err = U_ZERO_ERROR;
1976
1977 /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
1978 if (NULL != ucnv_clone(NULL, &err) || err != U_ILLEGAL_ARGUMENT_ERROR)
1979 {
1980 log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n");
1981 }
1982
1983 ucnv_close(cnv);
1984 }
1985
1986 /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/
1987 for (idx = 0; idx < UPRV_LENGTHOF(names); idx++)
1988 {
1989 err = U_ZERO_ERROR;
1990 cnv = ucnv_open(names[idx], &err);
1991 if(U_FAILURE(err)) {
1992 log_data_err("ucnv_open(\"%s\") failed - %s\n", names[idx], u_errorName(err));
1993 continue;
1994 }
1995
1996 cnv2 = ucnv_clone(cnv, &err);
1997
1998 /* close the original immediately to make sure that the clone works by itself */
1999 ucnv_close(cnv);
2000
2001 pCharBuffer = charBuffer;
2002 pUniBuffer = uniBuffer;
2003
2004 ucnv_fromUnicode(cnv2,
2005 &pCharBuffer,
2006 charBufferLimit,
2007 &pUniBuffer,
2008 uniBufferLimit,
2009 NULL,
2010 true,
2011 &err);
2012 if(U_FAILURE(err)){
2013 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err));
2014 }
2015 ucnv_toUnicode(cnv2,
2016 &pUCharTarget,
2017 pUCharTargetLimit,
2018 &pCharSource,
2019 pCharSourceLimit,
2020 NULL,
2021 true,
2022 &err
2023 );
2024
2025 if(U_FAILURE(err)){
2026 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err));
2027 }
2028
2029 pConstCharBuffer = charBuffer;
2030 if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err))
2031 {
2032 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err));
2033 }
2034 ucnv_close(cnv2);
2035 }
2036 }
2037
TestCCSID(void)2038 static void TestCCSID(void) {
2039 #if !UCONFIG_NO_LEGACY_CONVERSION
2040 UConverter *cnv;
2041 UErrorCode errorCode;
2042 int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 };
2043 int32_t i, ccsid;
2044
2045 for(i=0; i<UPRV_LENGTHOF(ccsids); ++i) {
2046 ccsid=ccsids[i];
2047
2048 errorCode=U_ZERO_ERROR;
2049 cnv=ucnv_openCCSID(ccsid, UCNV_IBM, &errorCode);
2050 if(U_FAILURE(errorCode)) {
2051 log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid, u_errorName(errorCode));
2052 continue;
2053 }
2054
2055 if(ccsid!=ucnv_getCCSID(cnv, &errorCode)) {
2056 log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid, ucnv_getCCSID(cnv, &errorCode));
2057 }
2058
2059 /* skip gb18030(ccsid 1392) */
2060 if(ccsid != 1392 && UCNV_IBM!=ucnv_getPlatform(cnv, &errorCode)) {
2061 log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid, ucnv_getPlatform(cnv, &errorCode));
2062 }
2063
2064 ucnv_close(cnv);
2065 }
2066 #endif
2067 }
2068
2069 /* jitterbug 932: ucnv_convert() bugs --------------------------------------- */
2070
2071 /* CHUNK_SIZE defined in common\ucnv.c: */
2072 #define CHUNK_SIZE 1024
2073
2074 static void bug1(void);
2075 static void bug2(void);
2076 static void bug3(void);
2077
2078 static void
TestJ932(void)2079 TestJ932(void)
2080 {
2081 bug1(); /* Unicode intermediate buffer straddle bug */
2082 bug2(); /* pre-flighting size incorrect caused by simple overflow */
2083 bug3(); /* pre-flighting size incorrect caused by expansion overflow */
2084 }
2085
2086 /*
2087 * jitterbug 932: test chunking boundary conditions in
2088
2089 int32_t ucnv_convert(const char *toConverterName,
2090 const char *fromConverterName,
2091 char *target,
2092 int32_t targetSize,
2093 const char *source,
2094 int32_t sourceSize,
2095 UErrorCode * err)
2096
2097 * See discussions on the icu mailing list in
2098 * 2001-April with the subject "converter 'flush' question".
2099 *
2100 * Bug report and test code provided by Edward J. Batutis.
2101 */
bug1(void)2102 static void bug1(void)
2103 {
2104 #if !UCONFIG_NO_LEGACY_CONVERSION
2105 char char_in[CHUNK_SIZE+32];
2106 char char_out[CHUNK_SIZE*2];
2107
2108 /* GB 18030 equivalent of U+10000 is 90308130 */
2109 static const char test_seq[]={ (char)0x90u, 0x30, (char)0x81u, 0x30 };
2110
2111 UErrorCode err = U_ZERO_ERROR;
2112 int32_t i, test_seq_len = sizeof(test_seq);
2113
2114 /*
2115 * causes straddle bug in Unicode intermediate buffer by sliding the test sequence forward
2116 * until the straddle bug appears. I didn't want to hard-code everything so this test could
2117 * be expanded - however this is the only type of straddle bug I can think of at the moment -
2118 * a high surrogate in the last position of the Unicode intermediate buffer. Apparently no
2119 * other Unicode sequences cause a bug since combining sequences are not supported by the
2120 * converters.
2121 */
2122
2123 for (i = test_seq_len; i >= 0; i--) {
2124 /* put character sequence into input buffer */
2125 memset(char_in, 0x61, sizeof(char_in)); /* GB 18030 'a' */
2126 memcpy(char_in + (CHUNK_SIZE - i), test_seq, test_seq_len);
2127
2128 /* do the conversion */
2129 ucnv_convert("us-ascii", /* out */
2130 "gb18030", /* in */
2131 char_out,
2132 sizeof(char_out),
2133 char_in,
2134 sizeof(char_in),
2135 &err);
2136
2137 /* bug1: */
2138 if (err == U_TRUNCATED_CHAR_FOUND) {
2139 /* this happens when surrogate pair straddles the intermediate buffer in
2140 * T_UConverter_fromCodepageToCodepage */
2141 log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n");
2142 }
2143 }
2144 #endif
2145 }
2146
2147 /* bug2: pre-flighting loop bug: simple overflow causes bug */
bug2(void)2148 static void bug2(void)
2149 {
2150 /* US-ASCII "1234567890" */
2151 static const char source[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 };
2152 #if !UCONFIG_ONLY_HTML_CONVERSION
2153 static const char sourceUTF8[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 };
2154 static const char sourceUTF32[]={ 0x00, 0x00, 0x00, 0x30,
2155 0x00, 0x00, 0x00, 0x31,
2156 0x00, 0x00, 0x00, 0x32,
2157 0x00, 0x00, 0x00, 0x33,
2158 0x00, 0x00, 0x00, 0x34,
2159 0x00, 0x00, 0x00, 0x35,
2160 0x00, 0x00, 0x00, 0x36,
2161 0x00, 0x00, 0x00, 0x37,
2162 0x00, 0x00, 0x00, 0x38,
2163 0x00, 0x00, (char)0xf0, 0x00};
2164 #endif
2165
2166 static char target[5];
2167
2168 UErrorCode err = U_ZERO_ERROR;
2169 int32_t size;
2170
2171 /* do the conversion */
2172 size = ucnv_convert("iso-8859-1", /* out */
2173 "us-ascii", /* in */
2174 target,
2175 sizeof(target),
2176 source,
2177 sizeof(source),
2178 &err);
2179
2180 if ( size != 10 ) {
2181 /* bug2: size is 5, should be 10 */
2182 log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size);
2183 }
2184
2185 #if !UCONFIG_ONLY_HTML_CONVERSION
2186 err = U_ZERO_ERROR;
2187 /* do the conversion */
2188 size = ucnv_convert("UTF-32BE", /* out */
2189 "UTF-8", /* in */
2190 target,
2191 sizeof(target),
2192 sourceUTF8,
2193 sizeof(sourceUTF8),
2194 &err);
2195
2196 if ( size != 32 ) {
2197 /* bug2: size is 5, should be 32 */
2198 log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d instead of 32\n", size);
2199 }
2200
2201 err = U_ZERO_ERROR;
2202 /* do the conversion */
2203 size = ucnv_convert("UTF-8", /* out */
2204 "UTF-32BE", /* in */
2205 target,
2206 sizeof(target),
2207 sourceUTF32,
2208 sizeof(sourceUTF32),
2209 &err);
2210
2211 if ( size != 12 ) {
2212 /* bug2: size is 5, should be 12 */
2213 log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size);
2214 }
2215 #endif
2216 }
2217
2218 /*
2219 * bug3: when the characters expand going from source to target codepage
2220 * you get bug3 in addition to bug2
2221 */
bug3(void)2222 static void bug3(void)
2223 {
2224 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
2225 char char_in[CHUNK_SIZE*4];
2226 char target[5];
2227 UErrorCode err = U_ZERO_ERROR;
2228 int32_t size;
2229
2230 /*
2231 * first get the buggy size from bug2 then
2232 * compare it to buggy size with an expansion
2233 */
2234 memset(char_in, 0x61, sizeof(char_in)); /* US-ASCII 'a' */
2235
2236 /* do the conversion */
2237 size = ucnv_convert("lmbcs", /* out */
2238 "us-ascii", /* in */
2239 target,
2240 sizeof(target),
2241 char_in,
2242 sizeof(char_in),
2243 &err);
2244
2245 if ( size != sizeof(char_in) ) {
2246 /*
2247 * bug2: size is 0x2805 (CHUNK_SIZE*2+5 - maybe 5 is the size of the overflow buffer
2248 * in the converter?), should be CHUNK_SIZE*4
2249 *
2250 * Markus 2001-05-18: 5 is the size of our target[] here, ucnv_convert() did not reset targetSize...
2251 */
2252 log_data_err("error j932 bug 2/3a: expected preflighting size 0x%04x, got 0x%04x\n", sizeof(char_in), size);
2253 }
2254
2255 /*
2256 * now do the conversion with expansion
2257 * ascii 0x08 expands to 0x0F 0x28 in lmbcs
2258 */
2259 memset(char_in, 8, sizeof(char_in));
2260 err = U_ZERO_ERROR;
2261
2262 /* do the conversion */
2263 size = ucnv_convert("lmbcs", /* out */
2264 "us-ascii", /* in */
2265 target,
2266 sizeof(target),
2267 char_in,
2268 sizeof(char_in),
2269 &err);
2270
2271 /* expect 2X expansion */
2272 if ( size != sizeof(char_in) * 2 ) {
2273 /*
2274 * bug3:
2275 * bug2 would lead us to expect 0x2805, but it isn't that either, it is 0x3c05:
2276 */
2277 log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in) * 2, size);
2278 }
2279 #endif
2280 }
2281
2282 static void
convertExStreaming(UConverter * srcCnv,UConverter * targetCnv,const char * src,int32_t srcLength,const char * expectTarget,int32_t expectTargetLength,int32_t chunkSize,const char * testName,UErrorCode expectCode)2283 convertExStreaming(UConverter *srcCnv, UConverter *targetCnv,
2284 const char *src, int32_t srcLength,
2285 const char *expectTarget, int32_t expectTargetLength,
2286 int32_t chunkSize,
2287 const char *testName,
2288 UErrorCode expectCode) {
2289 UChar pivotBuffer[CHUNK_SIZE];
2290 UChar *pivotSource, *pivotTarget;
2291 const UChar *pivotLimit;
2292
2293 char targetBuffer[CHUNK_SIZE];
2294 char *target;
2295 const char *srcLimit, *finalSrcLimit, *targetLimit;
2296
2297 int32_t targetLength;
2298
2299 UBool flush;
2300
2301 UErrorCode errorCode;
2302
2303 /* setup */
2304 if(chunkSize>CHUNK_SIZE) {
2305 chunkSize=CHUNK_SIZE;
2306 }
2307
2308 pivotSource=pivotTarget=pivotBuffer;
2309 pivotLimit=pivotBuffer+chunkSize;
2310
2311 finalSrcLimit=src+srcLength;
2312 target=targetBuffer;
2313 targetLimit=targetBuffer+chunkSize;
2314
2315 ucnv_resetToUnicode(srcCnv);
2316 ucnv_resetFromUnicode(targetCnv);
2317
2318 errorCode=U_ZERO_ERROR;
2319 flush=false;
2320
2321 /* convert, streaming-style (both converters and pivot keep state) */
2322 for(;;) {
2323 /* for testing, give ucnv_convertEx() at most <chunkSize> input/pivot/output units at a time */
2324 if(src+chunkSize<=finalSrcLimit) {
2325 srcLimit=src+chunkSize;
2326 } else {
2327 srcLimit=finalSrcLimit;
2328 }
2329 ucnv_convertEx(targetCnv, srcCnv,
2330 &target, targetLimit,
2331 &src, srcLimit,
2332 pivotBuffer, &pivotSource, &pivotTarget, pivotLimit,
2333 false, flush, &errorCode);
2334 targetLength=(int32_t)(target-targetBuffer);
2335 if(target>targetLimit) {
2336 log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n",
2337 testName, chunkSize, target, targetLimit);
2338 break; /* TODO: major problem! */
2339 }
2340 if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
2341 /* continue converting another chunk */
2342 errorCode=U_ZERO_ERROR;
2343 if(targetLength+chunkSize<=(int32_t)sizeof(targetBuffer)) {
2344 targetLimit=target+chunkSize;
2345 } else {
2346 targetLimit=targetBuffer+(int32_t)sizeof(targetBuffer);
2347 }
2348 } else if(U_FAILURE(errorCode)) {
2349 /* failure */
2350 break;
2351 } else if(flush) {
2352 /* all done */
2353 break;
2354 } else if(src==finalSrcLimit && pivotSource==pivotTarget) {
2355 /* all consumed, now flush without input (separate from conversion for testing) */
2356 flush=true;
2357 }
2358 }
2359
2360 if(!(errorCode==expectCode || (expectCode==U_ZERO_ERROR && errorCode==U_STRING_NOT_TERMINATED_WARNING))) {
2361 log_err("ucnv_convertEx(%s) chunk[%d] results in %s instead of %s\n",
2362 testName, chunkSize, u_errorName(errorCode), u_errorName(expectCode));
2363 } else if(targetLength!=expectTargetLength) {
2364 log_err("ucnv_convertEx(%s) chunk[%d] writes %d bytes instead of %d\n",
2365 testName, chunkSize, targetLength, expectTargetLength);
2366 } else if(memcmp(targetBuffer, expectTarget, targetLength)!=0) {
2367 log_err("ucnv_convertEx(%s) chunk[%d] writes different bytes than expected\n",
2368 testName, chunkSize);
2369 }
2370 }
2371
2372 static void
convertExMultiStreaming(UConverter * srcCnv,UConverter * targetCnv,const char * src,int32_t srcLength,const char * expectTarget,int32_t expectTargetLength,const char * testName,UErrorCode expectCode)2373 convertExMultiStreaming(UConverter *srcCnv, UConverter *targetCnv,
2374 const char *src, int32_t srcLength,
2375 const char *expectTarget, int32_t expectTargetLength,
2376 const char *testName,
2377 UErrorCode expectCode) {
2378 convertExStreaming(srcCnv, targetCnv,
2379 src, srcLength,
2380 expectTarget, expectTargetLength,
2381 1, testName, expectCode);
2382 convertExStreaming(srcCnv, targetCnv,
2383 src, srcLength,
2384 expectTarget, expectTargetLength,
2385 3, testName, expectCode);
2386 convertExStreaming(srcCnv, targetCnv,
2387 src, srcLength,
2388 expectTarget, expectTargetLength,
2389 7, testName, expectCode);
2390 }
2391
TestConvertEx(void)2392 static void TestConvertEx(void) {
2393 #if !UCONFIG_NO_LEGACY_CONVERSION
2394 static const uint8_t
2395 utf8[]={
2396 /* 4e00 30a1 ff61 0410 */
2397 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2398 },
2399 shiftJIS[]={
2400 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2401 },
2402 errorTarget[]={
2403 /*
2404 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2405 * SUB, SUB, 0x40, SUB, SUB, 0x40
2406 */
2407 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40
2408 };
2409
2410 char srcBuffer[100], targetBuffer[100];
2411
2412 const char *src;
2413 char *target;
2414
2415 UChar pivotBuffer[100];
2416 UChar *pivotSource, *pivotTarget;
2417
2418 UConverter *cnv1, *cnv2;
2419 UErrorCode errorCode;
2420
2421 errorCode=U_ZERO_ERROR;
2422 cnv1=ucnv_open("UTF-8", &errorCode);
2423 if(U_FAILURE(errorCode)) {
2424 log_err("unable to open a UTF-8 converter - %s\n", u_errorName(errorCode));
2425 return;
2426 }
2427
2428 cnv2=ucnv_open("Shift-JIS", &errorCode);
2429 if(U_FAILURE(errorCode)) {
2430 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode));
2431 ucnv_close(cnv1);
2432 return;
2433 }
2434
2435 /* test ucnv_convertEx() with streaming conversion style */
2436 convertExMultiStreaming(cnv1, cnv2,
2437 (const char *)utf8, sizeof(utf8), (const char *)shiftJIS, sizeof(shiftJIS),
2438 "UTF-8 -> Shift-JIS", U_ZERO_ERROR);
2439
2440 convertExMultiStreaming(cnv2, cnv1,
2441 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8),
2442 "Shift-JIS -> UTF-8", U_ZERO_ERROR);
2443
2444 /* U_ZERO_ERROR because by default the SUB callbacks are set */
2445 convertExMultiStreaming(cnv1, cnv2,
2446 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)errorTarget, sizeof(errorTarget),
2447 "shiftJIS[] UTF-8 -> Shift-JIS", U_ZERO_ERROR);
2448
2449 /* test some simple conversions */
2450
2451 /* NUL-terminated source and target */
2452 errorCode=U_STRING_NOT_TERMINATED_WARNING;
2453 memcpy(srcBuffer, utf8, sizeof(utf8));
2454 srcBuffer[sizeof(utf8)]=0;
2455 src=srcBuffer;
2456 target=targetBuffer;
2457 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2458 NULL, NULL, NULL, NULL, true, true, &errorCode);
2459 if( errorCode!=U_ZERO_ERROR ||
2460 target-targetBuffer!=sizeof(shiftJIS) ||
2461 *target!=0 ||
2462 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0
2463 ) {
2464 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s - writes %d bytes, expect %d\n",
2465 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS));
2466 }
2467
2468 /* NUL-terminated source and U_STRING_NOT_TERMINATED_WARNING */
2469 errorCode=U_AMBIGUOUS_ALIAS_WARNING;
2470 memset(targetBuffer, 0xff, sizeof(targetBuffer));
2471 src=srcBuffer;
2472 target=targetBuffer;
2473 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(shiftJIS), &src, NULL,
2474 NULL, NULL, NULL, NULL, true, true, &errorCode);
2475 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2476 target-targetBuffer!=sizeof(shiftJIS) ||
2477 *target!=(char)0xff ||
2478 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0
2479 ) {
2480 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s, expect U_STRING_NOT_TERMINATED_WARNING - writes %d bytes, expect %d\n",
2481 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS));
2482 }
2483
2484 /* bad arguments */
2485 errorCode=U_MESSAGE_PARSE_ERROR;
2486 src=srcBuffer;
2487 target=targetBuffer;
2488 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2489 NULL, NULL, NULL, NULL, true, true, &errorCode);
2490 if(errorCode!=U_MESSAGE_PARSE_ERROR) {
2491 log_err("ucnv_convertEx(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode));
2492 }
2493
2494 /* pivotLimit==pivotStart */
2495 errorCode=U_ZERO_ERROR;
2496 pivotSource=pivotTarget=pivotBuffer;
2497 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2498 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer, true, true, &errorCode);
2499 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2500 log_err("ucnv_convertEx(pivotLimit==pivotStart) sets %s\n", u_errorName(errorCode));
2501 }
2502
2503 /* *pivotSource==NULL */
2504 errorCode=U_ZERO_ERROR;
2505 pivotSource=NULL;
2506 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2507 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, true, true, &errorCode);
2508 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2509 log_err("ucnv_convertEx(*pivotSource==NULL) sets %s\n", u_errorName(errorCode));
2510 }
2511
2512 /* *source==NULL */
2513 errorCode=U_ZERO_ERROR;
2514 src=NULL;
2515 pivotSource=pivotBuffer;
2516 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2517 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, true, true, &errorCode);
2518 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2519 log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode));
2520 }
2521
2522 /* streaming conversion without a pivot buffer */
2523 errorCode=U_ZERO_ERROR;
2524 src=srcBuffer;
2525 pivotSource=pivotBuffer;
2526 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2527 NULL, &pivotSource, &pivotTarget, pivotBuffer+1, true, false, &errorCode);
2528 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2529 log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode));
2530 }
2531
2532 ucnv_close(cnv1);
2533 ucnv_close(cnv2);
2534 #endif
2535 }
2536
2537 /* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */
2538 static const char *const badUTF8[]={
2539 /* trail byte */
2540 "\x80",
2541
2542 /* truncated multi-byte sequences */
2543 "\xd0",
2544 "\xe0",
2545 "\xe1",
2546 "\xed",
2547 "\xee",
2548 "\xf0",
2549 "\xf1",
2550 "\xf4",
2551 "\xf8",
2552 "\xfc",
2553
2554 "\xe0\x80",
2555 "\xe0\xa0",
2556 "\xe1\x80",
2557 "\xed\x80",
2558 "\xed\xa0",
2559 "\xee\x80",
2560 "\xf0\x80",
2561 "\xf0\x90",
2562 "\xf1\x80",
2563 "\xf4\x80",
2564 "\xf4\x90",
2565 "\xf8\x80",
2566 "\xfc\x80",
2567
2568 "\xf0\x80\x80",
2569 "\xf0\x90\x80",
2570 "\xf1\x80\x80",
2571 "\xf4\x80\x80",
2572 "\xf4\x90\x80",
2573 "\xf8\x80\x80",
2574 "\xfc\x80\x80",
2575
2576 "\xf8\x80\x80\x80",
2577 "\xfc\x80\x80\x80",
2578
2579 "\xfc\x80\x80\x80\x80",
2580
2581 /* complete sequences but non-shortest forms or out of range etc. */
2582 "\xc0\x80",
2583 "\xe0\x80\x80",
2584 "\xed\xa0\x80",
2585 "\xf0\x80\x80\x80",
2586 "\xf4\x90\x80\x80",
2587 "\xf8\x80\x80\x80\x80",
2588 "\xfc\x80\x80\x80\x80\x80",
2589 "\xfe",
2590 "\xff"
2591 };
2592
2593 #define ARG_CHAR_ARR_SIZE 8
2594
2595 /* get some character that can be converted and convert it */
getTestChar(UConverter * cnv,const char * converterName,char charUTF8[4],int32_t * pCharUTF8Length,char char0[ARG_CHAR_ARR_SIZE],int32_t * pChar0Length,char char1[ARG_CHAR_ARR_SIZE],int32_t * pChar1Length)2596 static UBool getTestChar(UConverter *cnv, const char *converterName,
2597 char charUTF8[4], int32_t *pCharUTF8Length,
2598 char char0[ARG_CHAR_ARR_SIZE], int32_t *pChar0Length,
2599 char char1[ARG_CHAR_ARR_SIZE], int32_t *pChar1Length) {
2600 UChar utf16[U16_MAX_LENGTH];
2601 int32_t utf16Length;
2602
2603 const UChar *utf16Source;
2604 char *target;
2605
2606 USet *set;
2607 UChar32 c;
2608 UErrorCode errorCode;
2609
2610 errorCode=U_ZERO_ERROR;
2611 set=uset_open(1, 0);
2612 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
2613 c=uset_charAt(set, uset_size(set)/2);
2614 uset_close(set);
2615
2616 utf16Length=0;
2617 U16_APPEND_UNSAFE(utf16, utf16Length, c);
2618 *pCharUTF8Length=0;
2619 U8_APPEND_UNSAFE(charUTF8, *pCharUTF8Length, c);
2620
2621 utf16Source=utf16;
2622 target=char0;
2623 ucnv_fromUnicode(cnv,
2624 &target, char0+ARG_CHAR_ARR_SIZE,
2625 &utf16Source, utf16+utf16Length,
2626 NULL, false, &errorCode);
2627 *pChar0Length=(int32_t)(target-char0);
2628
2629 utf16Source=utf16;
2630 target=char1;
2631 ucnv_fromUnicode(cnv,
2632 &target, char1+ARG_CHAR_ARR_SIZE,
2633 &utf16Source, utf16+utf16Length,
2634 NULL, false, &errorCode);
2635 *pChar1Length=(int32_t)(target-char1);
2636
2637 if(U_FAILURE(errorCode)) {
2638 log_err("unable to get test character for %s - %s\n", converterName, u_errorName(errorCode));
2639 return false;
2640 }
2641 return true;
2642 }
2643
isOneTruncatedUTF8(const char * s,int32_t length)2644 static UBool isOneTruncatedUTF8(const char *s, int32_t length) {
2645 if(length==0) {
2646 return false;
2647 } else if(length==1) {
2648 return U8_IS_LEAD(s[0]);
2649 } else {
2650 int32_t count=U8_COUNT_TRAIL_BYTES(s[0]);
2651 if(length<=count) {
2652 // 2 or more bytes, but fewer than the lead byte indicates.
2653 int32_t oneLength=0;
2654 U8_FWD_1(s, oneLength, length);
2655 // Truncated if we reach the end of the string.
2656 // Not true if the lead byte and first trail byte do not start a valid sequence,
2657 // e.g., E0 80 -> oneLength=1.
2658 return oneLength==length;
2659 }
2660 return false;
2661 }
2662 }
2663
testFromTruncatedUTF8(UConverter * utf8Cnv,UConverter * cnv,const char * converterName,char charUTF8[4],int32_t charUTF8Length,char char0[8],int32_t char0Length,char char1[8],int32_t char1Length)2664 static void testFromTruncatedUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName,
2665 char charUTF8[4], int32_t charUTF8Length,
2666 char char0[8], int32_t char0Length,
2667 char char1[8], int32_t char1Length) {
2668 // suppress compiler warnings about unused variables
2669 (void)char0;
2670 (void)char0Length;
2671 (void)char1;
2672 (void)char1Length;
2673
2674 char utf8[16];
2675 int32_t utf8Length;
2676
2677 char output[16];
2678 int32_t outputLength;
2679
2680 char invalidChars[8];
2681 int8_t invalidLength;
2682
2683 const char *source;
2684 char *target;
2685
2686 UChar pivotBuffer[8];
2687 UChar *pivotSource, *pivotTarget;
2688
2689 UErrorCode errorCode;
2690 int32_t i;
2691
2692 /* test truncated sequences */
2693 errorCode=U_ZERO_ERROR;
2694 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2695
2696 memcpy(utf8, charUTF8, charUTF8Length);
2697
2698 for(i=0; i<UPRV_LENGTHOF(badUTF8); ++i) {
2699 /* truncated sequence? */
2700 int32_t length = (int32_t)strlen(badUTF8[i]);
2701 if(!isOneTruncatedUTF8(badUTF8[i], length)) {
2702 continue;
2703 }
2704
2705 /* assemble a string with the test character and the truncated sequence */
2706 memcpy(utf8+charUTF8Length, badUTF8[i], length);
2707 utf8Length=charUTF8Length+length;
2708
2709 /* convert and check the invalidChars */
2710 source=utf8;
2711 target=output;
2712 pivotSource=pivotTarget=pivotBuffer;
2713 errorCode=U_ZERO_ERROR;
2714 ucnv_convertEx(cnv, utf8Cnv,
2715 &target, output+sizeof(output),
2716 &source, utf8+utf8Length,
2717 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+UPRV_LENGTHOF(pivotBuffer),
2718 true, true, /* reset & flush */
2719 &errorCode);
2720 outputLength=(int32_t)(target-output);
2721 (void)outputLength; /* Suppress set but not used warning. */
2722 if(errorCode!=U_TRUNCATED_CHAR_FOUND || pivotSource!=pivotBuffer) {
2723 log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode), converterName, (long)i);
2724 continue;
2725 }
2726
2727 errorCode=U_ZERO_ERROR;
2728 invalidLength=(int8_t)sizeof(invalidChars);
2729 ucnv_getInvalidChars(utf8Cnv, invalidChars, &invalidLength, &errorCode);
2730 if(invalidLength!=length || 0!=memcmp(invalidChars, badUTF8[i], length)) {
2731 log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName, (long)i);
2732 }
2733 }
2734 }
2735
testFromBadUTF8(UConverter * utf8Cnv,UConverter * cnv,const char * converterName,char charUTF8[4],int32_t charUTF8Length,char char0[8],int32_t char0Length,char char1[8],int32_t char1Length)2736 static void testFromBadUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName,
2737 char charUTF8[4], int32_t charUTF8Length,
2738 char char0[8], int32_t char0Length,
2739 char char1[8], int32_t char1Length) {
2740 char utf8[600], expect[600];
2741 int32_t utf8Length, expectLength;
2742
2743 char testName[32];
2744
2745 UErrorCode errorCode;
2746 int32_t i;
2747
2748 errorCode=U_ZERO_ERROR;
2749 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, NULL, &errorCode);
2750
2751 /*
2752 * assemble an input string with the test character between each
2753 * bad sequence,
2754 * and an expected string with repeated test character output
2755 */
2756 memcpy(utf8, charUTF8, charUTF8Length);
2757 utf8Length=charUTF8Length;
2758
2759 memcpy(expect, char0, char0Length);
2760 expectLength=char0Length;
2761
2762 for(i=0; i<UPRV_LENGTHOF(badUTF8); ++i) {
2763 int32_t length = (int32_t)strlen(badUTF8[i]);
2764 memcpy(utf8+utf8Length, badUTF8[i], length);
2765 utf8Length+=length;
2766
2767 memcpy(utf8+utf8Length, charUTF8, charUTF8Length);
2768 utf8Length+=charUTF8Length;
2769
2770 memcpy(expect+expectLength, char1, char1Length);
2771 expectLength+=char1Length;
2772 }
2773
2774 /* expect that each bad UTF-8 sequence is detected and skipped */
2775 strcpy(testName, "from bad UTF-8 to ");
2776 strcat(testName, converterName);
2777
2778 convertExMultiStreaming(utf8Cnv, cnv,
2779 utf8, utf8Length,
2780 expect, expectLength,
2781 testName,
2782 U_ZERO_ERROR);
2783 }
2784
2785 /* Test illegal UTF-8 input. */
TestConvertExFromUTF8(void)2786 static void TestConvertExFromUTF8(void) {
2787 static const char *const converterNames[]={
2788 #if !UCONFIG_NO_LEGACY_CONVERSION
2789 "windows-1252",
2790 "shift-jis",
2791 #endif
2792 "us-ascii",
2793 "iso-8859-1",
2794 "utf-8"
2795 };
2796
2797 UConverter *utf8Cnv, *cnv;
2798 UErrorCode errorCode;
2799 int32_t i;
2800
2801 /* fromUnicode versions of some character, from initial state and later */
2802 char charUTF8[4], char0[8], char1[8];
2803 int32_t charUTF8Length, char0Length, char1Length;
2804
2805 errorCode=U_ZERO_ERROR;
2806 utf8Cnv=ucnv_open("UTF-8", &errorCode);
2807 if(U_FAILURE(errorCode)) {
2808 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode));
2809 return;
2810 }
2811
2812 for(i=0; i<UPRV_LENGTHOF(converterNames); ++i) {
2813 errorCode=U_ZERO_ERROR;
2814 cnv=ucnv_open(converterNames[i], &errorCode);
2815 if(U_FAILURE(errorCode)) {
2816 log_data_err("unable to open %s converter - %s\n", converterNames[i], u_errorName(errorCode));
2817 continue;
2818 }
2819 if(!getTestChar(cnv, converterNames[i], charUTF8, &charUTF8Length, char0, &char0Length, char1, &char1Length)) {
2820 continue;
2821 }
2822 testFromTruncatedUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length);
2823 testFromBadUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length);
2824 ucnv_close(cnv);
2825 }
2826 ucnv_close(utf8Cnv);
2827 }
2828
TestConvertExFromUTF8_C5F0(void)2829 static void TestConvertExFromUTF8_C5F0(void) {
2830 static const char *const converterNames[]={
2831 #if !UCONFIG_NO_LEGACY_CONVERSION
2832 "windows-1251",
2833 "shift-jis",
2834 #endif
2835 "us-ascii",
2836 "iso-8859-1",
2837 "utf-8"
2838 };
2839
2840 UConverter *utf8Cnv, *cnv;
2841 UErrorCode errorCode;
2842 int32_t i;
2843
2844 static const char bad_utf8[2]={ (char)0xC5, (char)0xF0 };
2845 /* Expect "��" (2x U+FFFD as decimal NCRs) */
2846 static const char twoNCRs[16]={
2847 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B,
2848 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B
2849 };
2850 static const char twoFFFD[6]={
2851 (char)0xef, (char)0xbf, (char)0xbd,
2852 (char)0xef, (char)0xbf, (char)0xbd
2853 };
2854 const char *expected;
2855 int32_t expectedLength;
2856 char dest[20]; /* longer than longest expectedLength */
2857
2858 const char *src;
2859 char *target;
2860
2861 UChar pivotBuffer[128];
2862 UChar *pivotSource, *pivotTarget;
2863
2864 errorCode=U_ZERO_ERROR;
2865 utf8Cnv=ucnv_open("UTF-8", &errorCode);
2866 if(U_FAILURE(errorCode)) {
2867 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode));
2868 return;
2869 }
2870
2871 for(i=0; i<UPRV_LENGTHOF(converterNames); ++i) {
2872 errorCode=U_ZERO_ERROR;
2873 cnv=ucnv_open(converterNames[i], &errorCode);
2874 ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
2875 NULL, NULL, &errorCode);
2876 if(U_FAILURE(errorCode)) {
2877 log_data_err("unable to open %s converter - %s\n",
2878 converterNames[i], u_errorName(errorCode));
2879 continue;
2880 }
2881 src=bad_utf8;
2882 target=dest;
2883 uprv_memset(dest, 9, sizeof(dest));
2884 if(i==UPRV_LENGTHOF(converterNames)-1) {
2885 /* conversion to UTF-8 yields two U+FFFD directly */
2886 expected=twoFFFD;
2887 expectedLength=6;
2888 } else {
2889 /* conversion to a non-Unicode charset yields two NCRs */
2890 expected=twoNCRs;
2891 expectedLength=16;
2892 }
2893 pivotBuffer[0]=0;
2894 pivotBuffer[1]=1;
2895 pivotBuffer[2]=2;
2896 pivotSource=pivotTarget=pivotBuffer;
2897 ucnv_convertEx(
2898 cnv, utf8Cnv,
2899 &target, dest+expectedLength,
2900 &src, bad_utf8+sizeof(bad_utf8),
2901 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+UPRV_LENGTHOF(pivotBuffer),
2902 true, true, &errorCode);
2903 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || src!=bad_utf8+2 ||
2904 target!=dest+expectedLength || 0!=uprv_memcmp(dest, expected, expectedLength) ||
2905 dest[expectedLength]!=9
2906 ) {
2907 log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames[i]);
2908 }
2909 ucnv_close(cnv);
2910 }
2911 ucnv_close(utf8Cnv);
2912 }
2913
2914 static void
TestConvertAlgorithmic(void)2915 TestConvertAlgorithmic(void) {
2916 #if !UCONFIG_NO_LEGACY_CONVERSION
2917 static const uint8_t
2918 utf8[]={
2919 /* 4e00 30a1 ff61 0410 */
2920 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2921 },
2922 shiftJIS[]={
2923 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2924 },
2925 /*errorTarget[]={*/
2926 /*
2927 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2928 * SUB, SUB, 0x40, SUB, SUB, 0x40
2929 */
2930 /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/
2931 /*},*/
2932 utf16[]={
2933 0xfe, 0xff /* BOM only, no text */
2934 };
2935 #if !UCONFIG_ONLY_HTML_CONVERSION
2936 static const uint8_t utf32[]={
2937 0xff, 0xfe, 0, 0 /* BOM only, no text */
2938 };
2939 #endif
2940
2941 char target[100], utf8NUL[100], shiftJISNUL[100];
2942
2943 UConverter *cnv;
2944 UErrorCode errorCode;
2945
2946 int32_t length;
2947
2948 errorCode=U_ZERO_ERROR;
2949 cnv=ucnv_open("Shift-JIS", &errorCode);
2950 if(U_FAILURE(errorCode)) {
2951 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode));
2952 ucnv_close(cnv);
2953 return;
2954 }
2955
2956 memcpy(utf8NUL, utf8, sizeof(utf8));
2957 utf8NUL[sizeof(utf8)]=0;
2958 memcpy(shiftJISNUL, shiftJIS, sizeof(shiftJIS));
2959 shiftJISNUL[sizeof(shiftJIS)]=0;
2960
2961 /*
2962 * The to/from algorithmic convenience functions share a common implementation,
2963 * so we need not test all permutations of them.
2964 */
2965
2966 /* length in, not terminated out */
2967 errorCode=U_ZERO_ERROR;
2968 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF8, target, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), &errorCode);
2969 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2970 length!=sizeof(shiftJIS) ||
2971 memcmp(target, shiftJIS, length)!=0
2972 ) {
2973 log_err("ucnv_fromAlgorithmic(UTF-8 -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect %d\n",
2974 u_errorName(errorCode), length, sizeof(shiftJIS));
2975 }
2976
2977 /* terminated in and out */
2978 memset(target, 0x55, sizeof(target));
2979 errorCode=U_STRING_NOT_TERMINATED_WARNING;
2980 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, -1, &errorCode);
2981 if( errorCode!=U_ZERO_ERROR ||
2982 length!=sizeof(utf8) ||
2983 memcmp(target, utf8, length)!=0
2984 ) {
2985 log_err("ucnv_toAlgorithmic(Shift-JIS -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect %d\n",
2986 u_errorName(errorCode), length, sizeof(shiftJIS));
2987 }
2988
2989 /* empty string, some target buffer */
2990 errorCode=U_STRING_NOT_TERMINATED_WARNING;
2991 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, 0, &errorCode);
2992 if( errorCode!=U_ZERO_ERROR ||
2993 length!=0
2994 ) {
2995 log_err("ucnv_toAlgorithmic(empty string -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect 0\n",
2996 u_errorName(errorCode), length);
2997 }
2998
2999 /* pseudo-empty string, no target buffer */
3000 errorCode=U_ZERO_ERROR;
3001 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode);
3002 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
3003 length!=0
3004 ) {
3005 log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
3006 u_errorName(errorCode), length);
3007 }
3008
3009 #if !UCONFIG_ONLY_HTML_CONVERSION
3010 errorCode=U_ZERO_ERROR;
3011 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32, 4, &errorCode);
3012 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
3013 length!=0
3014 ) {
3015 log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
3016 u_errorName(errorCode), length);
3017 }
3018 #endif
3019
3020 /* bad arguments */
3021 errorCode=U_MESSAGE_PARSE_ERROR;
3022 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode);
3023 if(errorCode!=U_MESSAGE_PARSE_ERROR) {
3024 log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode));
3025 }
3026
3027 /* source==NULL */
3028 errorCode=U_ZERO_ERROR;
3029 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, NULL, 2, &errorCode);
3030 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
3031 log_err("ucnv_fromAlgorithmic(source==NULL) sets %s\n", u_errorName(errorCode));
3032 }
3033
3034 /* illegal alg. type */
3035 errorCode=U_ZERO_ERROR;
3036 length=ucnv_fromAlgorithmic(cnv, (UConverterType)99, target, 0, (const char *)utf16, 2, &errorCode);
3037 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
3038 log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode));
3039 }
3040 ucnv_close(cnv);
3041 #endif
3042 }
3043
3044 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
TestLMBCSMaxChar(void)3045 static void TestLMBCSMaxChar(void) {
3046 static const struct {
3047 int8_t maxSize;
3048 const char *name;
3049 } converter[] = {
3050 /* some non-LMBCS converters - perfect test setup here */
3051 { 1, "US-ASCII"},
3052 { 1, "ISO-8859-1"},
3053
3054 { 2, "UTF-16"},
3055 { 2, "UTF-16BE"},
3056 { 3, "UTF-8"},
3057 { 3, "CESU-8"},
3058 { 3, "SCSU"},
3059 { 4, "UTF-32"},
3060 { 4, "UTF-7"},
3061 { 4, "IMAP-mailbox-name"},
3062 { 4, "BOCU-1"},
3063
3064 { 1, "windows-1256"},
3065 { 2, "Shift-JIS"},
3066 { 2, "ibm-16684"},
3067 { 3, "ibm-930"},
3068 { 3, "ibm-1390"},
3069 { 4, "*test3"},
3070 { 16,"*test4"},
3071
3072 { 4, "ISCII"},
3073 { 4, "HZ"},
3074
3075 { 3, "ISO-2022"},
3076 { 8, "ISO-2022-KR"},
3077 { 6, "ISO-2022-JP"},
3078 { 8, "ISO-2022-CN"},
3079
3080 /* LMBCS */
3081 { 3, "LMBCS-1"},
3082 { 3, "LMBCS-2"},
3083 { 3, "LMBCS-3"},
3084 { 3, "LMBCS-4"},
3085 { 3, "LMBCS-5"},
3086 { 3, "LMBCS-6"},
3087 { 3, "LMBCS-8"},
3088 { 3, "LMBCS-11"},
3089 { 3, "LMBCS-16"},
3090 { 3, "LMBCS-17"},
3091 { 3, "LMBCS-18"},
3092 { 3, "LMBCS-19"}
3093 };
3094 int32_t idx;
3095
3096 for (idx = 0; idx < UPRV_LENGTHOF(converter); idx++) {
3097 UErrorCode status = U_ZERO_ERROR;
3098 UConverter *cnv = cnv_open(converter[idx].name, &status);
3099 if (U_FAILURE(status)) {
3100 continue;
3101 }
3102 if (converter[idx].maxSize != ucnv_getMaxCharSize(cnv)) {
3103 log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n",
3104 converter[idx].name, converter[idx].maxSize, ucnv_getMaxCharSize(cnv));
3105 }
3106 ucnv_close(cnv);
3107 }
3108
3109 /* mostly test that the macro compiles */
3110 if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) {
3111 log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n");
3112 }
3113 }
3114 #endif
3115
TestJ1968(void)3116 static void TestJ1968(void) {
3117 UErrorCode err = U_ZERO_ERROR;
3118 UConverter *cnv;
3119 char myConvName[] = "My really really really really really really really really really really really"
3120 " really really really really really really really really really really really"
3121 " really really really really really really really really long converter name";
3122 UChar myConvNameU[sizeof(myConvName)];
3123
3124 u_charsToUChars(myConvName, myConvNameU, sizeof(myConvName));
3125
3126 err = U_ZERO_ERROR;
3127 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH+1] = 0;
3128 cnv = ucnv_openU(myConvNameU, &err);
3129 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3130 log_err("1U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3131 }
3132
3133 err = U_ZERO_ERROR;
3134 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0;
3135 cnv = ucnv_openU(myConvNameU, &err);
3136 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3137 log_err("2U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3138 }
3139
3140 err = U_ZERO_ERROR;
3141 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0;
3142 cnv = ucnv_openU(myConvNameU, &err);
3143 if (cnv || err != U_FILE_ACCESS_ERROR) {
3144 log_err("3U) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
3145 }
3146
3147
3148
3149
3150 err = U_ZERO_ERROR;
3151 cnv = ucnv_open(myConvName, &err);
3152 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3153 log_err("1) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3154 }
3155
3156 err = U_ZERO_ERROR;
3157 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = ',';
3158 cnv = ucnv_open(myConvName, &err);
3159 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3160 log_err("2) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3161 }
3162
3163 err = U_ZERO_ERROR;
3164 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ',';
3165 cnv = ucnv_open(myConvName, &err);
3166 if (cnv || err != U_FILE_ACCESS_ERROR) {
3167 log_err("3) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
3168 }
3169
3170 err = U_ZERO_ERROR;
3171 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ',';
3172 memcpy(myConvName + UCNV_MAX_CONVERTER_NAME_LENGTH, "locale=", 7);
3173 cnv = ucnv_open(myConvName, &err);
3174 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3175 log_err("4) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3176 }
3177
3178 /* The comma isn't really a part of the converter name. */
3179 err = U_ZERO_ERROR;
3180 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0;
3181 cnv = ucnv_open(myConvName, &err);
3182 if (cnv || err != U_FILE_ACCESS_ERROR) {
3183 log_err("5) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
3184 }
3185
3186 err = U_ZERO_ERROR;
3187 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ' ';
3188 cnv = ucnv_open(myConvName, &err);
3189 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3190 log_err("6) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3191 }
3192
3193 err = U_ZERO_ERROR;
3194 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0;
3195 cnv = ucnv_open(myConvName, &err);
3196 if (cnv || err != U_FILE_ACCESS_ERROR) {
3197 log_err("7) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
3198 }
3199
3200 }
3201
3202 #if !UCONFIG_NO_LEGACY_CONVERSION
3203 static void
testSwap(const char * name,UBool swap)3204 testSwap(const char *name, UBool swap) {
3205 /*
3206 * Test Unicode text.
3207 * Contains characters that are the highest for some of the
3208 * tested conversions, to make sure that the ucnvmbcs.c code that modifies the
3209 * tables copies the entire tables.
3210 */
3211 static const UChar text[]={
3212 0x61, 0xd, 0x62, 0xa, 0x4e00, 0x3000, 0xfffd, 0xa, 0x20, 0x85, 0xff5e, 0x7a
3213 };
3214
3215 UChar uNormal[32], uSwapped[32];
3216 char normal[32], swapped[32];
3217 const UChar *pcu;
3218 UChar *pu;
3219 char *pc;
3220 int32_t i, normalLength, swappedLength;
3221 UChar u;
3222 char c;
3223
3224 const char *swappedName;
3225 UConverter *cnv, *swapCnv;
3226 UErrorCode errorCode;
3227
3228 /* if the swap flag is false, then the test encoding is not EBCDIC and must not swap */
3229
3230 /* open both the normal and the LF/NL-swapping converters */
3231 strcpy(swapped, name);
3232 strcat(swapped, UCNV_SWAP_LFNL_OPTION_STRING);
3233
3234 errorCode=U_ZERO_ERROR;
3235 swapCnv=ucnv_open(swapped, &errorCode);
3236 cnv=ucnv_open(name, &errorCode);
3237 if(U_FAILURE(errorCode)) {
3238 log_data_err("TestEBCDICSwapLFNL error: unable to open %s or %s (%s)\n", name, swapped, u_errorName(errorCode));
3239 goto cleanup;
3240 }
3241
3242 /* the name must contain the swap option if and only if we expect the converter to swap */
3243 swappedName=ucnv_getName(swapCnv, &errorCode);
3244 if(U_FAILURE(errorCode)) {
3245 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl) failed (%s)\n", name, u_errorName(errorCode));
3246 goto cleanup;
3247 }
3248
3249 pc=strstr(swappedName, UCNV_SWAP_LFNL_OPTION_STRING);
3250 if(swap != (pc!=NULL)) {
3251 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl)=%s should (%d) contain 'swaplfnl'\n", name, swappedName, swap);
3252 goto cleanup;
3253 }
3254
3255 /* convert to EBCDIC */
3256 pcu=text;
3257 pc=normal;
3258 ucnv_fromUnicode(cnv, &pc, normal+UPRV_LENGTHOF(normal), &pcu, text+UPRV_LENGTHOF(text), NULL, true, &errorCode);
3259 normalLength=(int32_t)(pc-normal);
3260
3261 pcu=text;
3262 pc=swapped;
3263 ucnv_fromUnicode(swapCnv, &pc, swapped+UPRV_LENGTHOF(swapped), &pcu, text+UPRV_LENGTHOF(text), NULL, true, &errorCode);
3264 swappedLength=(int32_t)(pc-swapped);
3265
3266 if(U_FAILURE(errorCode)) {
3267 log_err("TestEBCDICSwapLFNL error converting to %s - (%s)\n", name, u_errorName(errorCode));
3268 goto cleanup;
3269 }
3270
3271 /* compare EBCDIC output */
3272 if(normalLength!=swappedLength) {
3273 log_err("TestEBCDICSwapLFNL error converting to %s - output lengths %d vs. %d\n", name, normalLength, swappedLength);
3274 goto cleanup;
3275 }
3276 for(i=0; i<normalLength; ++i) {
3277 /* swap EBCDIC LF/NL for comparison */
3278 c=normal[i];
3279 if(swap) {
3280 if(c==0x15) {
3281 c=0x25;
3282 } else if(c==0x25) {
3283 c=0x15;
3284 }
3285 }
3286
3287 if(c!=swapped[i]) {
3288 log_err("TestEBCDICSwapLFNL error converting to %s - did not swap properly, output[%d]=0x%02x\n", name, i, (uint8_t)swapped[i]);
3289 goto cleanup;
3290 }
3291 }
3292
3293 /* convert back to Unicode (may not roundtrip) */
3294 pc=normal;
3295 pu=uNormal;
3296 ucnv_toUnicode(cnv, &pu, uNormal+UPRV_LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, true, &errorCode);
3297 normalLength=(int32_t)(pu-uNormal);
3298
3299 pc=normal;
3300 pu=uSwapped;
3301 ucnv_toUnicode(swapCnv, &pu, uSwapped+UPRV_LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, true, &errorCode);
3302 swappedLength=(int32_t)(pu-uSwapped);
3303
3304 if(U_FAILURE(errorCode)) {
3305 log_err("TestEBCDICSwapLFNL error converting from %s - (%s)\n", name, u_errorName(errorCode));
3306 goto cleanup;
3307 }
3308
3309 /* compare EBCDIC output */
3310 if(normalLength!=swappedLength) {
3311 log_err("TestEBCDICSwapLFNL error converting from %s - output lengths %d vs. %d\n", name, normalLength, swappedLength);
3312 goto cleanup;
3313 }
3314 for(i=0; i<normalLength; ++i) {
3315 /* swap EBCDIC LF/NL for comparison */
3316 u=uNormal[i];
3317 if(swap) {
3318 if(u==0xa) {
3319 u=0x85;
3320 } else if(u==0x85) {
3321 u=0xa;
3322 }
3323 }
3324
3325 if(u!=uSwapped[i]) {
3326 log_err("TestEBCDICSwapLFNL error converting from %s - did not swap properly, output[%d]=U+%04x\n", name, i, uSwapped[i]);
3327 goto cleanup;
3328 }
3329 }
3330
3331 /* clean up */
3332 cleanup:
3333 ucnv_close(cnv);
3334 ucnv_close(swapCnv);
3335 }
3336
3337 static void
TestEBCDICSwapLFNL(void)3338 TestEBCDICSwapLFNL(void) {
3339 static const struct {
3340 const char *name;
3341 UBool swap;
3342 } tests[]={
3343 { "ibm-37", true },
3344 { "ibm-1047", true },
3345 { "ibm-1140", true },
3346 { "ibm-930", true },
3347 { "iso-8859-3", false }
3348 };
3349
3350 int i;
3351
3352 for(i=0; i<UPRV_LENGTHOF(tests); ++i) {
3353 testSwap(tests[i].name, tests[i].swap);
3354 }
3355 }
3356 #else
3357 static void
TestEBCDICSwapLFNL()3358 TestEBCDICSwapLFNL() {
3359 /* test nothing... */
3360 }
3361 #endif
3362
TestFromUCountPending(void)3363 static void TestFromUCountPending(void){
3364 #if !UCONFIG_NO_LEGACY_CONVERSION
3365 UErrorCode status = U_ZERO_ERROR;
3366 /* const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */
3367 static const struct {
3368 UChar input[6];
3369 int32_t len;
3370 int32_t exp;
3371 }fromUnicodeTests[] = {
3372 /*m:n conversion*/
3373 {{0xdbc4},1,1},
3374 {{ 0xdbc4, 0xde34, 0xd84d},3,1},
3375 {{ 0xdbc4, 0xde34, 0xd900},3,3},
3376 };
3377 int i;
3378 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status);
3379 if(U_FAILURE(status)){
3380 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
3381 return;
3382 }
3383 for(i=0; i<UPRV_LENGTHOF(fromUnicodeTests); ++i) {
3384 char tgt[10];
3385 char* target = tgt;
3386 char* targetLimit = target + 10;
3387 const UChar* source = fromUnicodeTests[i].input;
3388 const UChar* sourceLimit = source + fromUnicodeTests[i].len;
3389 int32_t len = 0;
3390 ucnv_reset(cnv);
3391 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, false, &status);
3392 len = ucnv_fromUCountPending(cnv, &status);
3393 if(U_FAILURE(status)){
3394 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3395 status = U_ZERO_ERROR;
3396 continue;
3397 }
3398 if(len != fromUnicodeTests[i].exp){
3399 log_err("Did not get the expected output for ucnv_fromUInputConsumed.\n");
3400 }
3401 }
3402 status = U_ZERO_ERROR;
3403 {
3404 /*
3405 * The converter has to read the tail before it knows that
3406 * only head alone matches.
3407 * At the end, the output for head will overflow the target,
3408 * middle will be pending, and tail will not have been consumed.
3409 */
3410 /*
3411 \U00101234 -> x (<U101234> \x07 |0)
3412 \U00101234\U00050005 -> y (<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0)
3413 \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0)
3414 \U00060007 -> unassigned
3415 */
3416 static const UChar head[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */
3417 static const UChar middle[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */
3418 static const UChar tail[] = {0xDC07,0x0000};/* second half of \U00060007 */
3419 char tgt[10];
3420 char* target = tgt;
3421 char* targetLimit = target + 2; /* expect overflow from converting \U00101234\U00050005 */
3422 const UChar* source = head;
3423 const UChar* sourceLimit = source + u_strlen(head);
3424 int32_t len = 0;
3425 ucnv_reset(cnv);
3426 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, false, &status);
3427 len = ucnv_fromUCountPending(cnv, &status);
3428 if(U_FAILURE(status)){
3429 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3430 status = U_ZERO_ERROR;
3431 }
3432 if(len!=4){
3433 log_err("ucnv_fromUInputHeld did not return correct length for head\n");
3434 }
3435 source = middle;
3436 sourceLimit = source + u_strlen(middle);
3437 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, false, &status);
3438 len = ucnv_fromUCountPending(cnv, &status);
3439 if(U_FAILURE(status)){
3440 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3441 status = U_ZERO_ERROR;
3442 }
3443 if(len!=5){
3444 log_err("ucnv_fromUInputHeld did not return correct length for middle\n");
3445 }
3446 source = tail;
3447 sourceLimit = source + u_strlen(tail);
3448 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, false, &status);
3449 if(status != U_BUFFER_OVERFLOW_ERROR){
3450 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3451 }
3452 status = U_ZERO_ERROR;
3453 len = ucnv_fromUCountPending(cnv, &status);
3454 /* middle[1] is pending, tail has not been consumed */
3455 if(U_FAILURE(status)){
3456 log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status));
3457 }
3458 if(len!=1){
3459 log_err("ucnv_fromUInputHeld did not return correct length for tail\n");
3460 }
3461 }
3462 ucnv_close(cnv);
3463 #endif
3464 }
3465
3466 static void
TestToUCountPending(void)3467 TestToUCountPending(void){
3468 #if !UCONFIG_NO_LEGACY_CONVERSION
3469 UErrorCode status = U_ZERO_ERROR;
3470 static const struct {
3471 char input[6];
3472 int32_t len;
3473 int32_t exp;
3474 }toUnicodeTests[] = {
3475 /*m:n conversion*/
3476 {{0x05, 0x01, 0x02},3,3},
3477 {{0x01, 0x02},2,2},
3478 {{0x07, 0x00, 0x01, 0x02},4,4},
3479 };
3480
3481 int i;
3482 UConverterToUCallback *oldToUAction= NULL;
3483 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status);
3484 if(U_FAILURE(status)){
3485 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
3486 return;
3487 }
3488 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status);
3489 for(i=0; i<UPRV_LENGTHOF(toUnicodeTests); ++i) {
3490 UChar tgt[20];
3491 UChar* target = tgt;
3492 UChar* targetLimit = target + 20;
3493 const char* source = toUnicodeTests[i].input;
3494 const char* sourceLimit = source + toUnicodeTests[i].len;
3495 int32_t len = 0;
3496 ucnv_reset(cnv);
3497 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, false, &status);
3498 len = ucnv_toUCountPending(cnv,&status);
3499 if(U_FAILURE(status)){
3500 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3501 status = U_ZERO_ERROR;
3502 continue;
3503 }
3504 if(len != toUnicodeTests[i].exp){
3505 log_err("Did not get the expected output for ucnv_toUInputConsumed.\n");
3506 }
3507 }
3508 status = U_ZERO_ERROR;
3509 ucnv_close(cnv);
3510
3511 {
3512 /*
3513 * The converter has to read the tail before it knows that
3514 * only head alone matches.
3515 * At the end, the output for head will overflow the target,
3516 * mid will be pending, and tail will not have been consumed.
3517 */
3518 char head[] = { 0x01, 0x02, 0x03, 0x0a , 0x00};
3519 char mid[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 };
3520 char tail[] = { 0x01, 0x02, 0x03, 0x0d, 0x00 };
3521 /*
3522 0x01, 0x02, 0x03, 0x0a -> x (<U23456> \x01\x02\x03\x0a |0)
3523 0x01, 0x02, 0x03, 0x0b -> y (<U000b> \x01\x02\x03\x0b |0)
3524 0x01, 0x02, 0x03, 0x0d -> z (<U34567> \x01\x02\x03\x0d |3)
3525 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar")
3526 */
3527 UChar tgt[10];
3528 UChar* target = tgt;
3529 UChar* targetLimit = target + 1; /* expect overflow from converting */
3530 const char* source = head;
3531 const char* sourceLimit = source + strlen(head);
3532 int32_t len = 0;
3533 cnv = ucnv_openPackage(loadTestData(&status), "test4", &status);
3534 if(U_FAILURE(status)){
3535 log_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
3536 return;
3537 }
3538 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status);
3539 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, false, &status);
3540 len = ucnv_toUCountPending(cnv,&status);
3541 if(U_FAILURE(status)){
3542 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3543 }
3544 if(len != 4){
3545 log_err("Did not get the expected len for head.\n");
3546 }
3547 source=mid;
3548 sourceLimit = source+strlen(mid);
3549 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, false, &status);
3550 len = ucnv_toUCountPending(cnv,&status);
3551 if(U_FAILURE(status)){
3552 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3553 }
3554 if(len != 8){
3555 log_err("Did not get the expected len for mid.\n");
3556 }
3557
3558 source=tail;
3559 sourceLimit = source+strlen(tail);
3560 targetLimit = target;
3561 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, false, &status);
3562 if(status != U_BUFFER_OVERFLOW_ERROR){
3563 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3564 }
3565 status = U_ZERO_ERROR;
3566 len = ucnv_toUCountPending(cnv,&status);
3567 /* mid[4] is pending, tail has not been consumed */
3568 if(U_FAILURE(status)){
3569 log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status));
3570 }
3571 if(len != 4){
3572 log_err("Did not get the expected len for tail.\n");
3573 }
3574 ucnv_close(cnv);
3575 }
3576 #endif
3577 }
3578
TestOneDefaultNameChange(const char * name,const char * expected)3579 static void TestOneDefaultNameChange(const char *name, const char *expected) {
3580 UErrorCode status = U_ZERO_ERROR;
3581 UConverter *cnv;
3582 ucnv_setDefaultName(name);
3583 if(strcmp(ucnv_getDefaultName(), expected)==0)
3584 log_verbose("setDefaultName of %s works.\n", name);
3585 else
3586 log_err("setDefaultName of %s failed\n", name);
3587 cnv=ucnv_open(NULL, &status);
3588 if (U_FAILURE(status) || cnv == NULL) {
3589 log_err("opening the default converter of %s failed\n", name);
3590 return;
3591 }
3592 if(strcmp(ucnv_getName(cnv, &status), expected)==0)
3593 log_verbose("ucnv_getName of %s works.\n", name);
3594 else
3595 log_err("ucnv_getName of %s failed\n", name);
3596 ucnv_close(cnv);
3597 }
3598
TestDefaultName(void)3599 static void TestDefaultName(void) {
3600 /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/
3601 static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1];
3602 strcpy(defaultName, ucnv_getDefaultName());
3603
3604 log_verbose("getDefaultName returned %s\n", defaultName);
3605
3606 /*change the default name by setting it */
3607 TestOneDefaultNameChange("UTF-8", "UTF-8");
3608 #if U_CHARSET_IS_UTF8
3609 TestOneDefaultNameChange("ISCII,version=1", "UTF-8");
3610 TestOneDefaultNameChange("ISCII,version=2", "UTF-8");
3611 TestOneDefaultNameChange("ISO-8859-1", "UTF-8");
3612 #else
3613 # if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
3614 TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1");
3615 TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2");
3616 # endif
3617 TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1");
3618 #endif
3619
3620 /*set the default name back*/
3621 ucnv_setDefaultName(defaultName);
3622 }
3623
3624 /* Test that ucnv_compareNames() matches names according to spec. ----------- */
3625
3626 static int
sign(int n)3627 sign(int n) {
3628 if(n==0) {
3629 return 0;
3630 } else if(n<0) {
3631 return -1;
3632 } else /* n>0 */ {
3633 return 1;
3634 }
3635 }
3636
3637 static void
compareNames(const char ** names)3638 compareNames(const char **names) {
3639 const char *relation, *name1, *name2;
3640 int rel, result;
3641
3642 relation=*names++;
3643 if(*relation=='=') {
3644 rel = 0;
3645 } else if(*relation=='<') {
3646 rel = -1;
3647 } else {
3648 rel = 1;
3649 }
3650
3651 name1=*names++;
3652 if(name1==NULL) {
3653 return;
3654 }
3655 while((name2=*names++)!=NULL) {
3656 result=ucnv_compareNames(name1, name2);
3657 if(sign(result)!=rel) {
3658 log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1, name2, result, rel);
3659 }
3660 name1=name2;
3661 }
3662 }
3663
3664 static void
TestCompareNames(void)3665 TestCompareNames(void) {
3666 static const char *equalUTF8[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL };
3667 static const char *equalIBM[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL };
3668 static const char *lessMac[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL };
3669 static const char *lessUTF080[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL };
3670
3671 compareNames(equalUTF8);
3672 compareNames(equalIBM);
3673 compareNames(lessMac);
3674 compareNames(lessUTF080);
3675 }
3676
3677 static void
TestSubstString(void)3678 TestSubstString(void) {
3679 static const UChar surrogate[1]={ 0xd900 };
3680 char buffer[16];
3681
3682 static const UChar sub[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3683 static const char subChars[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3684 UConverter *cnv;
3685 UErrorCode errorCode;
3686 int32_t length;
3687 int8_t len8;
3688
3689 /* UTF-16/32: test that the BOM is output before the sub character */
3690 errorCode=U_ZERO_ERROR;
3691 cnv=ucnv_open("UTF-16", &errorCode);
3692 if(U_FAILURE(errorCode)) {
3693 log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode));
3694 return;
3695 }
3696 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
3697 ucnv_close(cnv);
3698 if(U_FAILURE(errorCode) ||
3699 length!=4 ||
3700 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode)
3701 ) {
3702 log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n");
3703 }
3704
3705 errorCode=U_ZERO_ERROR;
3706 cnv=ucnv_open("UTF-32", &errorCode);
3707 if(U_FAILURE(errorCode)) {
3708 log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode));
3709 return;
3710 }
3711 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
3712 ucnv_close(cnv);
3713 if(U_FAILURE(errorCode) ||
3714 length!=8 ||
3715 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode)
3716 ) {
3717 log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n");
3718 }
3719
3720 /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */
3721 errorCode=U_ZERO_ERROR;
3722 cnv=ucnv_open("ISO-8859-1", &errorCode);
3723 if(U_FAILURE(errorCode)) {
3724 log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode));
3725 return;
3726 }
3727 ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode);
3728 if(U_FAILURE(errorCode)) {
3729 log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode));
3730 } else {
3731 len8 = sizeof(buffer);
3732 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode);
3733 /* Stateless converter, we expect the string converted to charset bytes. */
3734 if(U_FAILURE(errorCode) || len8!=sizeof(subChars) || 0!=uprv_memcmp(buffer, subChars, len8)) {
3735 log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode));
3736 }
3737 }
3738 ucnv_close(cnv);
3739
3740 #if !UCONFIG_NO_LEGACY_CONVERSION
3741 errorCode=U_ZERO_ERROR;
3742 cnv=ucnv_open("HZ", &errorCode);
3743 if(U_FAILURE(errorCode)) {
3744 log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode));
3745 return;
3746 }
3747 ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode);
3748 if(U_FAILURE(errorCode)) {
3749 log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode));
3750 } else {
3751 len8 = sizeof(buffer);
3752 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode);
3753 /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */
3754 if(U_FAILURE(errorCode) || len8!=0) {
3755 log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode));
3756 }
3757 }
3758 ucnv_close(cnv);
3759 #endif
3760 /*
3761 * Further testing of ucnv_setSubstString() is done via intltest convert.
3762 * We do not test edge cases of illegal arguments and similar because the
3763 * function implementation uses all of its parameters in calls to other
3764 * functions with UErrorCode parameters.
3765 */
3766 }
3767
3768 static void
InvalidArguments(void)3769 InvalidArguments(void) {
3770 UConverter *cnv;
3771 UErrorCode errorCode;
3772 char charBuffer[2] = {1, 1};
3773 char ucharAsCharBuffer[2] = {2, 2};
3774 char *charsPtr = charBuffer;
3775 UChar *ucharsPtr = (UChar *)ucharAsCharBuffer;
3776 UChar *ucharsBadPtr = (UChar *)(ucharAsCharBuffer + 1);
3777
3778 errorCode=U_ZERO_ERROR;
3779 cnv=ucnv_open("UTF-8", &errorCode);
3780 if(U_FAILURE(errorCode)) {
3781 log_err("ucnv_open() failed - %s\n", u_errorName(errorCode));
3782 return;
3783 }
3784
3785 errorCode=U_ZERO_ERROR;
3786 /* This one should fail because an incomplete UChar is being passed in */
3787 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsPtr, ucharsBadPtr, NULL, true, &errorCode);
3788 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3789 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode));
3790 }
3791
3792 errorCode=U_ZERO_ERROR;
3793 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3794 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, true, &errorCode);
3795 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3796 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode));
3797 }
3798
3799 errorCode=U_ZERO_ERROR;
3800 /* This one should fail because an incomplete UChar is being passed in */
3801 ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, true, &errorCode);
3802 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3803 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode));
3804 }
3805
3806 errorCode=U_ZERO_ERROR;
3807 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3808 ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, true, &errorCode);
3809 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3810 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode));
3811 }
3812
3813 if (charBuffer[0] != 1 || charBuffer[1] != 1
3814 || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2)
3815 {
3816 log_err("Data was incorrectly written to buffers\n");
3817 }
3818
3819 ucnv_close(cnv);
3820 }
3821
TestGetName(void)3822 static void TestGetName(void) {
3823 static const char *const names[] = {
3824 "Unicode", "UTF-16",
3825 "UnicodeBigUnmarked", "UTF-16BE",
3826 "UnicodeBig", "UTF-16BE,version=1",
3827 "UnicodeLittleUnmarked", "UTF-16LE",
3828 "UnicodeLittle", "UTF-16LE,version=1",
3829 "x-UTF-16LE-BOM", "UTF-16LE,version=1"
3830 };
3831 int32_t i;
3832 for(i = 0; i < UPRV_LENGTHOF(names); i += 2) {
3833 UErrorCode errorCode = U_ZERO_ERROR;
3834 UConverter *cnv = ucnv_open(names[i], &errorCode);
3835 if(U_SUCCESS(errorCode)) {
3836 const char *name = ucnv_getName(cnv, &errorCode);
3837 if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) {
3838 log_err("ucnv_getName(%s) = %s != %s -- %s\n",
3839 names[i], name, names[i+1], u_errorName(errorCode));
3840 }
3841 ucnv_close(cnv);
3842 }
3843 }
3844 }
3845
TestUTFBOM(void)3846 static void TestUTFBOM(void) {
3847 static const UChar a16[] = { 0x61 };
3848 static const char *const names[] = {
3849 "UTF-16",
3850 "UTF-16,version=1",
3851 "UTF-16BE",
3852 "UnicodeBig",
3853 "UTF-16LE",
3854 "UnicodeLittle"
3855 };
3856 static const uint8_t expected[][5] = {
3857 #if U_IS_BIG_ENDIAN
3858 { 4, 0xfe, 0xff, 0, 0x61 },
3859 { 4, 0xfe, 0xff, 0, 0x61 },
3860 #else
3861 { 4, 0xff, 0xfe, 0x61, 0 },
3862 { 4, 0xff, 0xfe, 0x61, 0 },
3863 #endif
3864
3865 { 2, 0, 0x61 },
3866 { 4, 0xfe, 0xff, 0, 0x61 },
3867
3868 { 2, 0x61, 0 },
3869 { 4, 0xff, 0xfe, 0x61, 0 }
3870 };
3871
3872 char bytes[10];
3873 int32_t i;
3874
3875 for(i = 0; i < UPRV_LENGTHOF(names); ++i) {
3876 UErrorCode errorCode = U_ZERO_ERROR;
3877 UConverter *cnv = ucnv_open(names[i], &errorCode);
3878 int32_t length = 0;
3879 const uint8_t *exp = expected[i];
3880 if (U_FAILURE(errorCode)) {
3881 log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode));
3882 continue;
3883 }
3884 length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode);
3885
3886 if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) {
3887 log_err("unexpected %s BOM writing behavior -- %s\n",
3888 names[i], u_errorName(errorCode));
3889 }
3890 ucnv_close(cnv);
3891 }
3892 }
3893