1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*****************************************************************************
9 *
10 * File ccapitst.c
11 *
12 * Modification History:
13 * Name Description
14 * Madhu Katragadda Ported for C API
15 ******************************************************************************
16 */
17
18 #include <stdbool.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <ctype.h>
23 #include "unicode/uloc.h"
24 #include "unicode/ucnv.h"
25 #include "unicode/ucnv_err.h"
26 #include "unicode/putil.h"
27 #include "unicode/uset.h"
28 #include "unicode/ustring.h"
29 #include "unicode/utf8.h"
30 #include "ucnv_bld.h" /* for sizeof(UConverter) */
31 #include "cmemory.h" /* for UAlignedMemory */
32 #include "cintltst.h"
33 #include "ccapitst.h"
34 #include "cstring.h"
35
36 #define NUM_CODEPAGE 1
37 #define MAX_FILE_LEN 1024*20
38 #define UCS_FILE_NAME_SIZE 512
39
40 /* Similar to C++ alignof(type) */
41 #define ALIGNOF(type) offsetof (struct { char c; type member; }, member)
42
43 /*returns an action other than the one provided*/
44 #if !UCONFIG_NO_LEGACY_CONVERSION
45 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA);
46 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA);
47 #endif
48
49 static UConverter *
cnv_open(const char * name,UErrorCode * pErrorCode)50 cnv_open(const char *name, UErrorCode *pErrorCode) {
51 if(name!=NULL && name[0]=='*') {
52 return ucnv_openPackage(loadTestData(pErrorCode), name+1, pErrorCode);
53 } else {
54 return ucnv_open(name, pErrorCode);
55 }
56 }
57
58
59 static void ListNames(void);
60 static void TestFlushCache(void);
61 static void TestDuplicateAlias(void);
62 static void TestCCSID(void);
63 static void TestJ932(void);
64 static void TestJ1968(void);
65 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
66 static void TestLMBCSMaxChar(void);
67 #endif
68
69 static void TestConvertClone(void);
70 #if !UCONFIG_NO_LEGACY_CONVERSION
71 static void TestConvertSafeCloneCallback(void);
72 #endif
73
74 static void TestEBCDICSwapLFNL(void);
75 static void TestConvertEx(void);
76 static void TestConvertExFromUTF8(void);
77 static void TestConvertExFromUTF8_C5F0(void);
78 static void TestConvertAlgorithmic(void);
79 void TestDefaultConverterError(void); /* defined in cctest.c */
80 void TestDefaultConverterSet(void); /* defined in cctest.c */
81 static void TestToUCountPending(void);
82 static void TestFromUCountPending(void);
83 static void TestDefaultName(void);
84 static void TestCompareNames(void);
85 static void TestSubstString(void);
86 static void InvalidArguments(void);
87 static void TestGetName(void);
88 static void TestUTFBOM(void);
89
90 void addTestConvert(TestNode** root);
91
addTestConvert(TestNode ** root)92 void addTestConvert(TestNode** root)
93 {
94 addTest(root, &ListNames, "tsconv/ccapitst/ListNames");
95 addTest(root, &TestConvert, "tsconv/ccapitst/TestConvert");
96 addTest(root, &TestFlushCache, "tsconv/ccapitst/TestFlushCache");
97 addTest(root, &TestAlias, "tsconv/ccapitst/TestAlias");
98 addTest(root, &TestDuplicateAlias, "tsconv/ccapitst/TestDuplicateAlias");
99 addTest(root, &TestConvertSafeClone, "tsconv/ccapitst/TestConvertSafeClone");
100 addTest(root, &TestConvertClone, "tsconv/ccapitst/TestConvertClone");
101 #if !UCONFIG_NO_LEGACY_CONVERSION
102 addTest(root, &TestConvertSafeCloneCallback,"tsconv/ccapitst/TestConvertSafeCloneCallback");
103 #endif
104 addTest(root, &TestCCSID, "tsconv/ccapitst/TestCCSID");
105 addTest(root, &TestJ932, "tsconv/ccapitst/TestJ932");
106 addTest(root, &TestJ1968, "tsconv/ccapitst/TestJ1968");
107 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
108 addTest(root, &TestLMBCSMaxChar, "tsconv/ccapitst/TestLMBCSMaxChar");
109 #endif
110 addTest(root, &TestEBCDICSwapLFNL, "tsconv/ccapitst/TestEBCDICSwapLFNL");
111 addTest(root, &TestConvertEx, "tsconv/ccapitst/TestConvertEx");
112 addTest(root, &TestConvertExFromUTF8, "tsconv/ccapitst/TestConvertExFromUTF8");
113 addTest(root, &TestConvertExFromUTF8_C5F0, "tsconv/ccapitst/TestConvertExFromUTF8_C5F0");
114 addTest(root, &TestConvertAlgorithmic, "tsconv/ccapitst/TestConvertAlgorithmic");
115 addTest(root, &TestDefaultConverterError, "tsconv/ccapitst/TestDefaultConverterError");
116 addTest(root, &TestDefaultConverterSet, "tsconv/ccapitst/TestDefaultConverterSet");
117 #if !UCONFIG_NO_FILE_IO
118 addTest(root, &TestToUCountPending, "tsconv/ccapitst/TestToUCountPending");
119 addTest(root, &TestFromUCountPending, "tsconv/ccapitst/TestFromUCountPending");
120 #endif
121 addTest(root, &TestDefaultName, "tsconv/ccapitst/TestDefaultName");
122 addTest(root, &TestCompareNames, "tsconv/ccapitst/TestCompareNames");
123 addTest(root, &TestSubstString, "tsconv/ccapitst/TestSubstString");
124 addTest(root, &InvalidArguments, "tsconv/ccapitst/InvalidArguments");
125 addTest(root, &TestGetName, "tsconv/ccapitst/TestGetName");
126 addTest(root, &TestUTFBOM, "tsconv/ccapitst/TestUTFBOM");
127 }
128
ListNames(void)129 static void ListNames(void) {
130 UErrorCode err = U_ZERO_ERROR;
131 int32_t testLong1 = 0;
132 const char* available_conv;
133 UEnumeration *allNamesEnum = NULL;
134 int32_t allNamesCount = 0;
135 uint16_t count;
136
137 log_verbose("Testing ucnv_openAllNames()...");
138 allNamesEnum = ucnv_openAllNames(&err);
139 if(U_FAILURE(err)) {
140 log_data_err("FAILURE! ucnv_openAllNames() -> %s\n", myErrorName(err));
141 }
142 else {
143 const char *string = NULL;
144 int32_t len = 0;
145 int32_t count1 = 0;
146 int32_t count2 = 0;
147 allNamesCount = uenum_count(allNamesEnum, &err);
148 while ((string = uenum_next(allNamesEnum, &len, &err))) {
149 count1++;
150 log_verbose("read \"%s\", length %i\n", string, len);
151 }
152 if (U_FAILURE(err)) {
153 log_err("FAILURE! uenum_next(allNamesEnum...) set an error: %s\n", u_errorName(err));
154 err = U_ZERO_ERROR;
155 }
156 uenum_reset(allNamesEnum, &err);
157 while ((string = uenum_next(allNamesEnum, &len, &err))) {
158 count2++;
159 ucnv_close(ucnv_open(string, &err));
160 log_verbose("read \"%s\", length %i (%s)\n", string, len, U_SUCCESS(err) ? "available" : "unavailable");
161 err = U_ZERO_ERROR;
162 }
163 if (count1 != count2) {
164 log_err("FAILURE! uenum_reset(allNamesEnum, &err); doesn't work\n");
165 }
166 }
167 uenum_close(allNamesEnum);
168 err = U_ZERO_ERROR;
169
170 /*Tests ucnv_getAvailableName(), getAvailableCount()*/
171
172 log_verbose("Testing ucnv_countAvailable()...");
173
174 testLong1=ucnv_countAvailable();
175 log_info("Number of available codepages: %d/%d\n", testLong1, allNamesCount);
176
177 log_verbose("\n---Testing ucnv_getAvailableName.."); /*need to check this out */
178
179 available_conv = ucnv_getAvailableName(testLong1);
180 /*test ucnv_getAvailableName with err condition*/
181 log_verbose("\n---Testing ucnv_getAvailableName..with index < 0 ");
182 available_conv = ucnv_getAvailableName(-1);
183 if(available_conv != NULL){
184 log_err("ucnv_getAvailableName() with index < 0) should return NULL\n");
185 }
186
187 /* Test ucnv_countAliases() etc. */
188 count = ucnv_countAliases("utf-8", &err);
189 if(U_FAILURE(err)) {
190 log_data_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %s\n", myErrorName(err));
191 } else if(count <= 0) {
192 log_err("FAILURE! ucnv_countAliases(\"utf-8\") -> %d aliases\n", count);
193 } else {
194 /* try to get the aliases individually */
195 const char *alias;
196 alias = ucnv_getAlias("utf-8", 0, &err);
197 if(U_FAILURE(err)) {
198 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s\n", myErrorName(err));
199 } else if(strcmp("UTF-8", alias) != 0) {
200 log_err("FAILURE! ucnv_getAlias(\"utf-8\", 0) -> %s instead of UTF-8\n", alias);
201 } else {
202 uint16_t aliasNum;
203 for(aliasNum = 0; aliasNum < count; ++aliasNum) {
204 alias = ucnv_getAlias("utf-8", aliasNum, &err);
205 if(U_FAILURE(err)) {
206 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err));
207 } else if(strlen(alias) > 20) {
208 /* sanity check */
209 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> alias %s insanely long, corrupt?!\n", aliasNum, alias);
210 } else {
211 log_verbose("alias %d for utf-8: %s\n", aliasNum, alias);
212 }
213 }
214 if(U_SUCCESS(err)) {
215 /* try to fill an array with all aliases */
216 const char **aliases;
217 aliases=(const char **)malloc(count * sizeof(const char *));
218 if(aliases != 0) {
219 ucnv_getAliases("utf-8", aliases, &err);
220 if(U_FAILURE(err)) {
221 log_err("FAILURE! ucnv_getAliases(\"utf-8\") -> %s\n", myErrorName(err));
222 } else {
223 for(aliasNum = 0; aliasNum < count; ++aliasNum) {
224 /* compare the pointers with the ones returned individually */
225 alias = ucnv_getAlias("utf-8", aliasNum, &err);
226 if(U_FAILURE(err)) {
227 log_err("FAILURE! ucnv_getAlias(\"utf-8\", %d) -> %s\n", aliasNum, myErrorName(err));
228 } else if(aliases[aliasNum] != alias) {
229 log_err("FAILURE! ucnv_getAliases(\"utf-8\")[%d] != ucnv_getAlias(\"utf-8\", %d)\n", aliasNum, aliasNum);
230 }
231 }
232 }
233 free((char **)aliases);
234 }
235 }
236 }
237 }
238 }
239
240
TestConvert()241 static void TestConvert()
242 {
243 #if !UCONFIG_NO_LEGACY_CONVERSION
244 char myptr[4];
245 char save[4];
246 int32_t testLong1 = 0;
247 uint16_t rest = 0;
248 int32_t len = 0;
249 int32_t x = 0;
250 FILE* ucs_file_in = NULL;
251 UChar BOM = 0x0000;
252 UChar myUChar = 0x0000;
253 char* mytarget; /* [MAX_FILE_LEN] */
254 char* mytarget_1;
255 char* mytarget_use;
256 UChar* consumedUni = NULL;
257 char* consumed = NULL;
258 char* output_cp_buffer; /* [MAX_FILE_LEN] */
259 UChar* ucs_file_buffer; /* [MAX_FILE_LEN] */
260 UChar* ucs_file_buffer_use;
261 UChar* my_ucs_file_buffer; /* [MAX_FILE_LEN] */
262 UChar* my_ucs_file_buffer_1;
263 int8_t ii = 0;
264 uint16_t codepage_index = 0;
265 int32_t cp = 0;
266 UErrorCode err = U_ZERO_ERROR;
267 char ucs_file_name[UCS_FILE_NAME_SIZE];
268 UConverterFromUCallback MIA1, MIA1_2;
269 UConverterToUCallback MIA2, MIA2_2;
270 const void *MIA1Context, *MIA1Context2, *MIA2Context, *MIA2Context2;
271 UConverter* someConverters[5];
272 UConverter* myConverter = 0;
273 UChar* displayname = 0;
274
275 const char* locale;
276
277 UChar* uchar1 = 0;
278 UChar* uchar2 = 0;
279 UChar* uchar3 = 0;
280 int32_t targetcapacity2;
281 int32_t targetcapacity;
282 int32_t targetsize;
283 int32_t disnamelen;
284
285 const UChar* tmp_ucs_buf;
286 const UChar* tmp_consumedUni=NULL;
287 const char* tmp_mytarget_use;
288 const char* tmp_consumed;
289
290 /******************************************************************
291 Checking Unicode -> ksc
292 ******************************************************************/
293
294 const char* CodePagesToTest[NUM_CODEPAGE] =
295 {
296 "ibm-949_P110-1999"
297
298
299 };
300 const uint16_t CodePageNumberToTest[NUM_CODEPAGE] =
301 {
302 949
303 };
304
305
306 const int8_t CodePagesMinChars[NUM_CODEPAGE] =
307 {
308 1
309
310 };
311
312 const int8_t CodePagesMaxChars[NUM_CODEPAGE] =
313 {
314 2
315
316 };
317
318 const uint16_t CodePagesSubstitutionChars[NUM_CODEPAGE] =
319 {
320 0xAFFE
321 };
322
323 const char* CodePagesTestFiles[NUM_CODEPAGE] =
324 {
325 "uni-text.bin"
326 };
327
328
329 const UConverterPlatform CodePagesPlatform[NUM_CODEPAGE] =
330 {
331 UCNV_IBM
332
333 };
334
335 const char* CodePagesLocale[NUM_CODEPAGE] =
336 {
337 "ko_KR"
338 };
339
340 UConverterFromUCallback oldFromUAction = NULL;
341 UConverterToUCallback oldToUAction = NULL;
342 const void* oldFromUContext = NULL;
343 const void* oldToUContext = NULL;
344
345 /* Allocate memory */
346 mytarget = (char*) malloc(MAX_FILE_LEN * sizeof(mytarget[0]));
347 output_cp_buffer = (char*) malloc(MAX_FILE_LEN * sizeof(output_cp_buffer[0]));
348 ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(ucs_file_buffer[0]));
349 my_ucs_file_buffer = (UChar*) malloc(MAX_FILE_LEN * sizeof(my_ucs_file_buffer[0]));
350
351 ucs_file_buffer_use = ucs_file_buffer;
352 mytarget_1=mytarget;
353 mytarget_use = mytarget;
354 my_ucs_file_buffer_1=my_ucs_file_buffer;
355
356 /* flush the converter cache to get a consistent state before the flushing is tested */
357 ucnv_flushCache();
358
359 /*Testing ucnv_openU()*/
360 {
361 UChar converterName[]={ 0x0069, 0x0062, 0x006d, 0x002d, 0x0039, 0x0034, 0x0033, 0x0000}; /*ibm-943*/
362 UChar firstSortedName[]={ 0x0021, 0x0000}; /* ! */
363 UChar lastSortedName[]={ 0x007E, 0x0000}; /* ~ */
364 const char *illegalNameChars={ "ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943 ibm-943"};
365 UChar illegalName[100];
366 UConverter *converter=NULL;
367 err=U_ZERO_ERROR;
368 converter=ucnv_openU(converterName, &err);
369 if(U_FAILURE(err)){
370 log_data_err("FAILURE! ucnv_openU(ibm-943, err) failed. %s\n", myErrorName(err));
371 }
372 ucnv_close(converter);
373 err=U_ZERO_ERROR;
374 converter=ucnv_openU(NULL, &err);
375 if(U_FAILURE(err)){
376 log_err("FAILURE! ucnv_openU(NULL, err) failed. %s\n", myErrorName(err));
377 }
378 ucnv_close(converter);
379 /*testing with error value*/
380 err=U_ILLEGAL_ARGUMENT_ERROR;
381 converter=ucnv_openU(converterName, &err);
382 if(!(converter == NULL)){
383 log_data_err("FAILURE! ucnv_openU(ibm-943, U_ILLEGAL_ARGUMENT_ERROR) is expected to fail\n");
384 }
385 ucnv_close(converter);
386 err=U_ZERO_ERROR;
387 u_uastrcpy(illegalName, "");
388 u_uastrcpy(illegalName, illegalNameChars);
389 ucnv_openU(illegalName, &err);
390 if(!(err==U_ILLEGAL_ARGUMENT_ERROR)){
391 log_err("FAILURE! ucnv_openU(illegalName, err) is expected to fail\n");
392 }
393
394 err=U_ZERO_ERROR;
395 ucnv_openU(firstSortedName, &err);
396 if(err!=U_FILE_ACCESS_ERROR){
397 log_err("FAILURE! ucnv_openU(firstSortedName, err) is expected to fail\n");
398 }
399
400 err=U_ZERO_ERROR;
401 ucnv_openU(lastSortedName, &err);
402 if(err!=U_FILE_ACCESS_ERROR){
403 log_err("FAILURE! ucnv_openU(lastSortedName, err) is expected to fail\n");
404 }
405
406 err=U_ZERO_ERROR;
407 }
408 log_verbose("Testing ucnv_open() with converter name greater than 7 characters\n");
409 {
410 UConverter *cnv=NULL;
411 err=U_ZERO_ERROR;
412 cnv=ucnv_open("ibm-949,Madhu", &err);
413 if(U_FAILURE(err)){
414 log_data_err("FAILURE! ucnv_open(\"ibm-949,Madhu\", err) failed. %s\n", myErrorName(err));
415 }
416 ucnv_close(cnv);
417
418 }
419 /*Testing ucnv_convert()*/
420 {
421 int32_t targetLimit=0, sourceLimit=0, i=0, targetCapacity=0;
422 const uint8_t source[]={ 0x00, 0x04, 0x05, 0x06, 0xa2, 0xb4, 0x00};
423 const uint8_t expectedTarget[]={ 0x00, 0x37, 0x2d, 0x2e, 0x0e, 0x49, 0x62, 0x0f, 0x00};
424 char *target=0;
425 sourceLimit=UPRV_LENGTHOF(source);
426 err=U_ZERO_ERROR;
427 targetLimit=0;
428
429 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", NULL, targetLimit , (const char*)source, sourceLimit, &err);
430 if(err == U_BUFFER_OVERFLOW_ERROR){
431 err=U_ZERO_ERROR;
432 targetLimit=targetCapacity+1;
433 target=(char*)malloc(sizeof(char) * targetLimit);
434 targetCapacity=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
435 }
436 if(U_FAILURE(err)){
437 log_data_err("FAILURE! ucnv_convert(ibm-1363->ibm-1364) failed. %s\n", myErrorName(err));
438 }
439 else {
440 for(i=0; i<targetCapacity; i++){
441 if(target[i] != expectedTarget[i]){
442 log_err("FAIL: ucnv_convert(ibm-1363->ibm-1364) failed.at index \n i=%d, Expected: %lx Got: %lx\n", i, (UChar)expectedTarget[i], (uint8_t)target[i]);
443 }
444 }
445
446 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source+1, -1, &err);
447 if(U_FAILURE(err) || i!=7){
448 log_err("FAILURE! ucnv_convert() with sourceLimit=-1 failed: %s, returned %d instead of 7\n",
449 u_errorName(err), i);
450 }
451
452 /*Test error conditions*/
453 err=U_ZERO_ERROR;
454 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, 0, &err);
455 if(i !=0){
456 log_err("FAILURE! ucnv_convert() with sourceLimit=0 is expected to return 0\n");
457 }
458
459 err=U_ILLEGAL_ARGUMENT_ERROR;
460 sourceLimit=UPRV_LENGTHOF(source);
461 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
462 if(i !=0 ){
463 log_err("FAILURE! ucnv_convert() with err=U_ILLEGAL_ARGUMENT_ERROR is expected to return 0\n");
464 }
465
466 err=U_ZERO_ERROR;
467 sourceLimit=UPRV_LENGTHOF(source);
468 targetLimit=0;
469 i=ucnv_convert("ibm-1364", "ibm-1363", target, targetLimit , (const char*)source, sourceLimit, &err);
470 if(!(U_FAILURE(err) && err==U_BUFFER_OVERFLOW_ERROR)){
471 log_err("FAILURE! ucnv_convert() with targetLimit=0 is expected to throw U_BUFFER_OVERFLOW_ERROR\n");
472 }
473 err=U_ZERO_ERROR;
474 free(target);
475 }
476 }
477
478 /*Testing ucnv_openCCSID and ucnv_open with error conditions*/
479 log_verbose("\n---Testing ucnv_open with err ! = U_ZERO_ERROR...\n");
480 err=U_ILLEGAL_ARGUMENT_ERROR;
481 if(ucnv_open(NULL, &err) != NULL){
482 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
483 }
484 if(ucnv_openCCSID(1051, UCNV_IBM, &err) != NULL){
485 log_err("ucnv_open with err != U_ZERO_ERROR is supposed to fail\n");
486 }
487 err=U_ZERO_ERROR;
488
489 /* Testing ucnv_openCCSID(), ucnv_open(), ucnv_getName() */
490 log_verbose("\n---Testing ucnv_open default...\n");
491 someConverters[0] = ucnv_open(NULL,&err);
492 someConverters[1] = ucnv_open(NULL,&err);
493 someConverters[2] = ucnv_open("utf8", &err);
494 someConverters[3] = ucnv_openCCSID(949,UCNV_IBM,&err);
495 ucnv_close(ucnv_openCCSID(1051, UCNV_IBM, &err)); /* test for j350; ucnv_close(NULL) is safe */
496 if (U_FAILURE(err)){ log_data_err("FAILURE! %s\n", myErrorName(err));}
497
498 /* Testing ucnv_getName()*/
499 /*default code page */
500 ucnv_getName(someConverters[0], &err);
501 if(U_FAILURE(err)) {
502 log_data_err("getName[0] failed\n");
503 } else {
504 log_verbose("getName(someConverters[0]) returned %s\n", ucnv_getName(someConverters[0], &err));
505 }
506 ucnv_getName(someConverters[1], &err);
507 if(U_FAILURE(err)) {
508 log_data_err("getName[1] failed\n");
509 } else {
510 log_verbose("getName(someConverters[1]) returned %s\n", ucnv_getName(someConverters[1], &err));
511 }
512
513 ucnv_close(someConverters[0]);
514 ucnv_close(someConverters[1]);
515 ucnv_close(someConverters[2]);
516 ucnv_close(someConverters[3]);
517
518
519 for (codepage_index=0; codepage_index < NUM_CODEPAGE; ++codepage_index)
520 {
521 int32_t i = 0;
522
523 err = U_ZERO_ERROR;
524 #ifdef U_TOPSRCDIR
525 strcpy(ucs_file_name, U_TOPSRCDIR U_FILE_SEP_STRING"test"U_FILE_SEP_STRING"testdata"U_FILE_SEP_STRING);
526 #else
527 strcpy(ucs_file_name, loadTestData(&err));
528
529 if(U_FAILURE(err)){
530 log_err("\nCouldn't get the test data directory... Exiting...Error:%s\n", u_errorName(err));
531 return;
532 }
533
534 {
535 char* index = strrchr(ucs_file_name,(char)U_FILE_SEP_CHAR);
536
537 if((unsigned int)(index-ucs_file_name) != (strlen(ucs_file_name)-1)){
538 *(index+1)=0;
539 }
540 }
541
542 strcat(ucs_file_name,".."U_FILE_SEP_STRING);
543 #endif
544 strcat(ucs_file_name, CodePagesTestFiles[codepage_index]);
545
546 ucs_file_in = fopen(ucs_file_name,"rb");
547 if (!ucs_file_in)
548 {
549 log_data_err("Couldn't open the Unicode file [%s]... Exiting...\n", ucs_file_name);
550 return;
551 }
552
553 /*Creates a converter and testing ucnv_openCCSID(u_int code_page, platform, errstatus*/
554
555 /* myConverter =ucnv_openCCSID(CodePageNumberToTest[codepage_index],UCNV_IBM, &err); */
556 /* ucnv_flushCache(); */
557 myConverter =ucnv_open( "ibm-949", &err);
558 if (!myConverter || U_FAILURE(err))
559 {
560 log_data_err("Error creating the ibm-949 converter - %s \n", u_errorName(err));
561 fclose(ucs_file_in);
562 break;
563 }
564
565 /*testing for ucnv_getName() */
566 log_verbose("Testing ucnv_getName()...\n");
567 ucnv_getName(myConverter, &err);
568 if(U_FAILURE(err))
569 log_err("Error in getName\n");
570 else
571 {
572 log_verbose("getName o.k. %s\n", ucnv_getName(myConverter, &err));
573 }
574 if (uprv_stricmp(ucnv_getName(myConverter, &err), CodePagesToTest[codepage_index]))
575 log_err("getName failed\n");
576 else
577 log_verbose("getName ok\n");
578 /*Test getName with error condition*/
579 {
580 const char* name=0;
581 err=U_ILLEGAL_ARGUMENT_ERROR;
582 log_verbose("Testing ucnv_getName with err != U_ZERO_ERROR");
583 name=ucnv_getName(myConverter, &err);
584 if(name != NULL){
585 log_err("ucnv_getName() with err != U_ZERO_ERROR is expected to fail");
586 }
587 err=U_ZERO_ERROR;
588 }
589
590
591 /*Tests ucnv_getMaxCharSize() and ucnv_getMinCharSize()*/
592
593 log_verbose("Testing ucnv_getMaxCharSize()...\n");
594 if (ucnv_getMaxCharSize(myConverter)==CodePagesMaxChars[codepage_index])
595 log_verbose("Max byte per character OK\n");
596 else
597 log_err("Max byte per character failed\n");
598
599 log_verbose("\n---Testing ucnv_getMinCharSize()...\n");
600 if (ucnv_getMinCharSize(myConverter)==CodePagesMinChars[codepage_index])
601 log_verbose("Min byte per character OK\n");
602 else
603 log_err("Min byte per character failed\n");
604
605
606 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars()*/
607 log_verbose("\n---Testing ucnv_getSubstChars...\n");
608 ii=4;
609 ucnv_getSubstChars(myConverter, myptr, &ii, &err);
610 if (ii <= 0) {
611 log_err("ucnv_getSubstChars returned a negative number %d\n", ii);
612 }
613
614 for(x=0;x<ii;x++)
615 rest = (uint16_t)(((unsigned char)rest << 8) + (unsigned char)myptr[x]);
616 if (rest==CodePagesSubstitutionChars[codepage_index])
617 log_verbose("Substitution character ok\n");
618 else
619 log_err("Substitution character failed.\n");
620
621 log_verbose("\n---Testing ucnv_setSubstChars RoundTrip Test ...\n");
622 ucnv_setSubstChars(myConverter, myptr, ii, &err);
623 if (U_FAILURE(err))
624 {
625 log_err("FAILURE! %s\n", myErrorName(err));
626 }
627 ucnv_getSubstChars(myConverter,save, &ii, &err);
628 if (U_FAILURE(err))
629 {
630 log_err("FAILURE! %s\n", myErrorName(err));
631 }
632
633 if (strncmp(save, myptr, ii))
634 log_err("Saved substitution character failed\n");
635 else
636 log_verbose("Saved substitution character ok\n");
637
638 /*Testing for ucnv_getSubstChars() and ucnv_setSubstChars() with error conditions*/
639 log_verbose("\n---Testing ucnv_getSubstChars.. with len < minBytesPerChar\n");
640 ii=1;
641 ucnv_getSubstChars(myConverter, myptr, &ii, &err);
642 if(err != U_INDEX_OUTOFBOUNDS_ERROR){
643 log_err("ucnv_getSubstChars() with len < minBytesPerChar should throw U_INDEX_OUTOFBOUNDS_ERROR Got %s\n", myErrorName(err));
644 }
645 err=U_ZERO_ERROR;
646 ii=4;
647 ucnv_getSubstChars(myConverter, myptr, &ii, &err);
648 log_verbose("\n---Testing ucnv_setSubstChars.. with len < minBytesPerChar\n");
649 ucnv_setSubstChars(myConverter, myptr, 0, &err);
650 if(err != U_ILLEGAL_ARGUMENT_ERROR){
651 log_err("ucnv_setSubstChars() with len < minBytesPerChar should throw U_ILLEGAL_ARGUMENT_ERROR Got %s\n", myErrorName(err));
652 }
653 log_verbose("\n---Testing ucnv_setSubstChars.. with err != U_ZERO_ERROR \n");
654 strcpy(myptr, "abc");
655 ucnv_setSubstChars(myConverter, myptr, ii, &err);
656 err=U_ZERO_ERROR;
657 ucnv_getSubstChars(myConverter, save, &ii, &err);
658 if(strncmp(save, myptr, ii) == 0){
659 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't set the SubstChars and just return\n");
660 }
661 log_verbose("\n---Testing ucnv_getSubstChars.. with err != U_ZERO_ERROR \n");
662 err=U_ZERO_ERROR;
663 strcpy(myptr, "abc");
664 ucnv_setSubstChars(myConverter, myptr, ii, &err);
665 err=U_ILLEGAL_ARGUMENT_ERROR;
666 ucnv_getSubstChars(myConverter, save, &ii, &err);
667 if(strncmp(save, myptr, ii) == 0){
668 log_err("uncv_setSubstChars() with err != U_ZERO_ERROR shouldn't fill the SubstChars in the buffer, it just returns\n");
669 }
670 err=U_ZERO_ERROR;
671 /*------*/
672
673 #ifdef U_ENABLE_GENERIC_ISO_2022
674 /*resetState ucnv_reset()*/
675 log_verbose("\n---Testing ucnv_reset()..\n");
676 ucnv_reset(myConverter);
677 {
678 UChar32 c;
679 const uint8_t in[]={ 0x1b, 0x25, 0x42, 0x31, 0x32, 0x61, 0xc0, 0x80, 0xe0, 0x80, 0x80, 0xf0, 0x80, 0x80, 0x80};
680 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
681 UConverter *cnv=ucnv_open("ISO_2022", &err);
682 if(U_FAILURE(err)) {
683 log_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
684 }
685 c=ucnv_getNextUChar(cnv, &source, limit, &err);
686 if((U_FAILURE(err) || c != (UChar32)0x0031)) {
687 log_err("ucnv_getNextUChar() failed: %s\n", u_errorName(err));
688 }
689 ucnv_reset(cnv);
690 ucnv_close(cnv);
691
692 }
693 #endif
694
695 /*getDisplayName*/
696 log_verbose("\n---Testing ucnv_getDisplayName()...\n");
697 locale=CodePagesLocale[codepage_index];
698 len=0;
699 displayname=NULL;
700 disnamelen = ucnv_getDisplayName(myConverter, locale, displayname, len, &err);
701 if(err==U_BUFFER_OVERFLOW_ERROR) {
702 err=U_ZERO_ERROR;
703 displayname=(UChar*)malloc((disnamelen+1) * sizeof(UChar));
704 ucnv_getDisplayName(myConverter,locale,displayname,disnamelen+1, &err);
705 if(U_FAILURE(err)) {
706 log_err("getDisplayName failed. The error is %s\n", myErrorName(err));
707 }
708 else {
709 log_verbose(" getDisplayName o.k.\n");
710 }
711 free(displayname);
712 displayname=NULL;
713 }
714 else {
715 log_err("getDisplayName preflight doesn't work. Error is %s\n", myErrorName(err));
716 }
717 /*test ucnv_getDiaplayName with error condition*/
718 err= U_ILLEGAL_ARGUMENT_ERROR;
719 len=ucnv_getDisplayName(myConverter,locale,NULL,0, &err);
720 if( len !=0 ){
721 log_err("ucnv_getDisplayName() with err != U_ZERO_ERROR is supposed to return 0\n");
722 }
723 /*test ucnv_getDiaplayName with error condition*/
724 err=U_ZERO_ERROR;
725 len=ucnv_getDisplayName(NULL,locale,NULL,0, &err);
726 if( len !=0 || U_SUCCESS(err)){
727 log_err("ucnv_getDisplayName(NULL) with cnv == NULL is supposed to return 0\n");
728 }
729 err=U_ZERO_ERROR;
730
731 /* testing ucnv_setFromUCallBack() and ucnv_getFromUCallBack()*/
732 ucnv_getFromUCallBack(myConverter, &MIA1, &MIA1Context);
733
734 log_verbose("\n---Testing ucnv_setFromUCallBack...\n");
735 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err);
736 if (U_FAILURE(err) || oldFromUAction != MIA1 || oldFromUContext != MIA1Context)
737 {
738 log_err("FAILURE! %s\n", myErrorName(err));
739 }
740
741 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2);
742 if (MIA1_2 != otherUnicodeAction(MIA1) || MIA1Context2 != &BOM)
743 log_err("get From UCallBack failed\n");
744 else
745 log_verbose("get From UCallBack ok\n");
746
747 log_verbose("\n---Testing getFromUCallBack Roundtrip...\n");
748 ucnv_setFromUCallBack(myConverter,MIA1, MIA1Context, &oldFromUAction, &oldFromUContext, &err);
749 if (U_FAILURE(err) || oldFromUAction != otherUnicodeAction(MIA1) || oldFromUContext != &BOM)
750 {
751 log_err("FAILURE! %s\n", myErrorName(err));
752 }
753
754 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2);
755 if (MIA1_2 != MIA1 || MIA1Context2 != MIA1Context)
756 log_err("get From UCallBack action failed\n");
757 else
758 log_verbose("get From UCallBack action ok\n");
759
760 /*testing ucnv_setToUCallBack with error conditions*/
761 err=U_ILLEGAL_ARGUMENT_ERROR;
762 log_verbose("\n---Testing setFromUCallBack. with err != U_ZERO_ERROR..\n");
763 ucnv_setFromUCallBack(myConverter, otherUnicodeAction(MIA1), &BOM, &oldFromUAction, &oldFromUContext, &err);
764 ucnv_getFromUCallBack(myConverter, &MIA1_2, &MIA1Context2);
765 if(MIA1_2 == otherUnicodeAction(MIA1) || MIA1Context2 == &BOM){
766 log_err("To setFromUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
767 }
768 err=U_ZERO_ERROR;
769
770
771 /*testing ucnv_setToUCallBack() and ucnv_getToUCallBack()*/
772 ucnv_getToUCallBack(myConverter, &MIA2, &MIA2Context);
773
774 log_verbose("\n---Testing setTo UCallBack...\n");
775 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), &BOM, &oldToUAction, &oldToUContext, &err);
776 if (U_FAILURE(err) || oldToUAction != MIA2 || oldToUContext != MIA2Context)
777 {
778 log_err("FAILURE! %s\n", myErrorName(err));
779 }
780
781 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2);
782 if (MIA2_2 != otherCharAction(MIA2) || MIA2Context2 != &BOM)
783 log_err("To UCallBack failed\n");
784 else
785 log_verbose("To UCallBack ok\n");
786
787 log_verbose("\n---Testing setTo UCallBack Roundtrip...\n");
788 ucnv_setToUCallBack(myConverter,MIA2, MIA2Context, &oldToUAction, &oldToUContext, &err);
789 if (U_FAILURE(err) || oldToUAction != otherCharAction(MIA2) || oldToUContext != &BOM)
790 { log_err("FAILURE! %s\n", myErrorName(err)); }
791
792 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2);
793 if (MIA2_2 != MIA2 || MIA2Context2 != MIA2Context)
794 log_err("To UCallBack failed\n");
795 else
796 log_verbose("To UCallBack ok\n");
797
798 /*testing ucnv_setToUCallBack with error conditions*/
799 err=U_ILLEGAL_ARGUMENT_ERROR;
800 log_verbose("\n---Testing setToUCallBack. with err != U_ZERO_ERROR..\n");
801 ucnv_setToUCallBack(myConverter,otherCharAction(MIA2), NULL, &oldToUAction, &oldToUContext, &err);
802 ucnv_getToUCallBack(myConverter, &MIA2_2, &MIA2Context2);
803 if (MIA2_2 == otherCharAction(MIA2) || MIA2Context2 == &BOM){
804 log_err("To setToUCallBack with err != U_ZERO_ERROR is supposed to fail\n");
805 }
806 err=U_ZERO_ERROR;
807
808
809 /*getcodepageid testing ucnv_getCCSID() */
810 log_verbose("\n----Testing getCCSID....\n");
811 cp = ucnv_getCCSID(myConverter,&err);
812 if (U_FAILURE(err))
813 {
814 log_err("FAILURE!..... %s\n", myErrorName(err));
815 }
816 if (cp != CodePageNumberToTest[codepage_index])
817 log_err("Codepage number test failed\n");
818 else
819 log_verbose("Codepage number test OK\n");
820
821 /*testing ucnv_getCCSID() with err != U_ZERO_ERROR*/
822 err=U_ILLEGAL_ARGUMENT_ERROR;
823 if( ucnv_getCCSID(myConverter,&err) != -1){
824 log_err("ucnv_getCCSID() with err != U_ZERO_ERROR is supposed to fail\n");
825 }
826 err=U_ZERO_ERROR;
827
828 /*getCodepagePlatform testing ucnv_getPlatform()*/
829 log_verbose("\n---Testing getCodepagePlatform ..\n");
830 if (CodePagesPlatform[codepage_index]!=ucnv_getPlatform(myConverter, &err))
831 log_err("Platform codepage test failed\n");
832 else
833 log_verbose("Platform codepage test ok\n");
834
835 if (U_FAILURE(err))
836 {
837 log_err("FAILURE! %s\n", myErrorName(err));
838 }
839 /*testing ucnv_getPlatform() with err != U_ZERO_ERROR*/
840 err= U_ILLEGAL_ARGUMENT_ERROR;
841 if(ucnv_getPlatform(myConverter, &err) != UCNV_UNKNOWN){
842 log_err("ucnv)getPlatform with err != U_ZERO_ERROR is supposed to fail\n");
843 }
844 err=U_ZERO_ERROR;
845
846
847 /*Reads the BOM*/
848 {
849 // Note: gcc produces a compile warning if the return value from fread() is ignored.
850 size_t numRead = fread(&BOM, sizeof(UChar), 1, ucs_file_in);
851 (void)numRead;
852 }
853 if (BOM!=0xFEFF && BOM!=0xFFFE)
854 {
855 log_err("File Missing BOM...Bailing!\n");
856 fclose(ucs_file_in);
857 break;
858 }
859
860
861 /*Reads in the file*/
862 while(!feof(ucs_file_in)&&(i+=(int32_t)fread(ucs_file_buffer+i, sizeof(UChar), 1, ucs_file_in)))
863 {
864 myUChar = ucs_file_buffer[i-1];
865
866 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN*/
867 }
868
869 myUChar = ucs_file_buffer[i-1];
870 ucs_file_buffer[i-1] = (UChar)((BOM==0xFEFF)?myUChar:((myUChar >> 8) | (myUChar << 8))); /*adjust if BIG_ENDIAN Corner Case*/
871
872
873 /*testing ucnv_fromUChars() and ucnv_toUChars() */
874 /*uchar1---fromUChar--->output_cp_buffer --toUChar--->uchar2*/
875
876 uchar1=(UChar*)malloc(sizeof(UChar) * (i+1));
877 u_uastrcpy(uchar1,"");
878 u_strncpy(uchar1,ucs_file_buffer,i);
879 uchar1[i] = 0;
880
881 uchar3=(UChar*)malloc(sizeof(UChar)*(i+1));
882 u_uastrcpy(uchar3,"");
883 u_strncpy(uchar3,ucs_file_buffer,i);
884 uchar3[i] = 0;
885
886 /*Calls the Conversion Routine */
887 testLong1 = MAX_FILE_LEN;
888 log_verbose("\n---Testing ucnv_fromUChars()\n");
889 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err);
890 if (U_FAILURE(err))
891 {
892 log_err("\nFAILURE...%s\n", myErrorName(err));
893 }
894 else
895 log_verbose(" ucnv_fromUChars() o.k.\n");
896
897 /*test the conversion routine */
898 log_verbose("\n---Testing ucnv_toUChars()\n");
899 /*call it first time for trapping the targetcapacity and size needed to allocate memory for the buffer uchar2 */
900 targetcapacity2=0;
901 targetsize = ucnv_toUChars(myConverter,
902 NULL,
903 targetcapacity2,
904 output_cp_buffer,
905 (int32_t)strlen(output_cp_buffer),
906 &err);
907 /*if there is an buffer overflow then trap the values and pass them and make the actual call*/
908
909 if(err==U_BUFFER_OVERFLOW_ERROR)
910 {
911 err=U_ZERO_ERROR;
912 uchar2=(UChar*)malloc((targetsize+1) * sizeof(UChar));
913 targetsize = ucnv_toUChars(myConverter,
914 uchar2,
915 targetsize+1,
916 output_cp_buffer,
917 (int32_t)strlen(output_cp_buffer),
918 &err);
919
920 if(U_FAILURE(err))
921 log_err("ucnv_toUChars() FAILED %s\n", myErrorName(err));
922 else
923 log_verbose(" ucnv_toUChars() o.k.\n");
924
925 if(u_strcmp(uchar1,uchar2)!=0)
926 log_err("equality test failed with conversion routine\n");
927 }
928 else
929 {
930 log_err("ERR: calling toUChars: Didn't get U_BUFFER_OVERFLOW .. expected it.\n");
931 }
932 /*Testing ucnv_fromUChars and ucnv_toUChars with error conditions*/
933 err=U_ILLEGAL_ARGUMENT_ERROR;
934 log_verbose("\n---Testing ucnv_fromUChars() with err != U_ZERO_ERROR\n");
935 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, -1, &err);
936 if (targetcapacity !=0) {
937 log_err("\nFAILURE: ucnv_fromUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
938 }
939 err=U_ZERO_ERROR;
940 log_verbose("\n---Testing ucnv_fromUChars() with converter=NULL\n");
941 targetcapacity = ucnv_fromUChars(NULL, output_cp_buffer, testLong1, uchar1, -1, &err);
942 if (targetcapacity !=0 || err != U_ILLEGAL_ARGUMENT_ERROR) {
943 log_err("\nFAILURE: ucnv_fromUChars with converter=NULL is expected to fail\n");
944 }
945 err=U_ZERO_ERROR;
946 log_verbose("\n---Testing ucnv_fromUChars() with sourceLength = 0\n");
947 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, testLong1, uchar1, 0, &err);
948 if (targetcapacity !=0) {
949 log_err("\nFAILURE: ucnv_fromUChars with sourceLength 0 is expected to return 0\n");
950 }
951 log_verbose("\n---Testing ucnv_fromUChars() with targetLength = 0\n");
952 targetcapacity = ucnv_fromUChars(myConverter, output_cp_buffer, 0, uchar1, -1, &err);
953 if (err != U_BUFFER_OVERFLOW_ERROR) {
954 log_err("\nFAILURE: ucnv_fromUChars with targetLength 0 is expected to fail and throw U_BUFFER_OVERFLOW_ERROR\n");
955 }
956 /*toUChars with error conditions*/
957 targetsize = ucnv_toUChars(myConverter, uchar2, targetsize, output_cp_buffer, (int32_t)strlen(output_cp_buffer), &err);
958 if(targetsize != 0){
959 log_err("\nFAILURE: ucnv_toUChars with err != U_ZERO_ERROR is expected to fail and return 0\n");
960 }
961 err=U_ZERO_ERROR;
962 targetsize = ucnv_toUChars(myConverter, uchar2, -1, output_cp_buffer, (int32_t)strlen(output_cp_buffer), &err);
963 if(targetsize != 0 || err != U_ILLEGAL_ARGUMENT_ERROR){
964 log_err("\nFAILURE: ucnv_toUChars with targetsize < 0 is expected to throw U_ILLEGAL_ARGUMENT_ERROR and return 0\n");
965 }
966 err=U_ZERO_ERROR;
967 targetsize = ucnv_toUChars(myConverter, uchar2, 0, output_cp_buffer, 0, &err);
968 if (targetsize !=0) {
969 log_err("\nFAILURE: ucnv_toUChars with sourceLength 0 is expected to return 0\n");
970 }
971 targetcapacity2=0;
972 targetsize = ucnv_toUChars(myConverter, NULL, targetcapacity2, output_cp_buffer, (int32_t)strlen(output_cp_buffer), &err);
973 if (err != U_STRING_NOT_TERMINATED_WARNING) {
974 log_err("\nFAILURE: ucnv_toUChars(targetLength)->%s instead of U_STRING_NOT_TERMINATED_WARNING\n",
975 u_errorName(err));
976 }
977 err=U_ZERO_ERROR;
978 /*-----*/
979
980
981 /*testing for ucnv_fromUnicode() and ucnv_toUnicode() */
982 /*Clean up re-usable vars*/
983 log_verbose("Testing ucnv_fromUnicode().....\n");
984 tmp_ucs_buf=ucs_file_buffer_use;
985 ucnv_fromUnicode(myConverter, &mytarget_1,
986 mytarget + MAX_FILE_LEN,
987 &tmp_ucs_buf,
988 ucs_file_buffer_use+i,
989 NULL,
990 true,
991 &err);
992 consumedUni = (UChar*)tmp_consumedUni;
993 (void)consumedUni; /* Suppress set but not used warning. */
994
995 if (U_FAILURE(err))
996 {
997 log_err("FAILURE! %s\n", myErrorName(err));
998 }
999 else
1000 log_verbose("ucnv_fromUnicode() o.k.\n");
1001
1002 /*Uni1 ----ToUnicode----> Cp2 ----FromUnicode---->Uni3 */
1003 log_verbose("Testing ucnv_toUnicode().....\n");
1004 tmp_mytarget_use=mytarget_use;
1005 tmp_consumed = consumed;
1006 ucnv_toUnicode(myConverter, &my_ucs_file_buffer_1,
1007 my_ucs_file_buffer + MAX_FILE_LEN,
1008 &tmp_mytarget_use,
1009 mytarget_use + (mytarget_1 - mytarget),
1010 NULL,
1011 false,
1012 &err);
1013 consumed = (char*)tmp_consumed;
1014 if (U_FAILURE(err))
1015 {
1016 log_err("FAILURE! %s\n", myErrorName(err));
1017 }
1018 else
1019 log_verbose("ucnv_toUnicode() o.k.\n");
1020
1021
1022 log_verbose("\n---Testing RoundTrip ...\n");
1023
1024
1025 u_strncpy(uchar3, my_ucs_file_buffer,i);
1026 uchar3[i] = 0;
1027
1028 if(u_strcmp(uchar1,uchar3)==0)
1029 log_verbose("Equality test o.k.\n");
1030 else
1031 log_err("Equality test failed\n");
1032
1033 /*sanity compare */
1034 if(uchar2 == NULL)
1035 {
1036 log_err("uchar2 was NULL (ccapitst.c line %d), couldn't do sanity check\n", __LINE__);
1037 }
1038 else
1039 {
1040 if(u_strcmp(uchar2, uchar3)==0)
1041 log_verbose("Equality test o.k.\n");
1042 else
1043 log_err("Equality test failed\n");
1044 }
1045
1046 fclose(ucs_file_in);
1047 ucnv_close(myConverter);
1048 if (uchar1 != 0) free(uchar1);
1049 if (uchar2 != 0) free(uchar2);
1050 if (uchar3 != 0) free(uchar3);
1051 }
1052
1053 free((void*)mytarget);
1054 free((void*)output_cp_buffer);
1055 free((void*)ucs_file_buffer);
1056 free((void*)my_ucs_file_buffer);
1057 #endif
1058 }
1059
1060 #if !UCONFIG_NO_LEGACY_CONVERSION
otherUnicodeAction(UConverterFromUCallback MIA)1061 static UConverterFromUCallback otherUnicodeAction(UConverterFromUCallback MIA)
1062 {
1063 return (MIA==(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP)?(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_SUBSTITUTE:(UConverterFromUCallback)UCNV_FROM_U_CALLBACK_STOP;
1064 }
1065
otherCharAction(UConverterToUCallback MIA)1066 static UConverterToUCallback otherCharAction(UConverterToUCallback MIA)
1067 {
1068 return (MIA==(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP)?(UConverterToUCallback)UCNV_TO_U_CALLBACK_SUBSTITUTE:(UConverterToUCallback)UCNV_TO_U_CALLBACK_STOP;
1069 }
1070 #endif
1071
TestFlushCache(void)1072 static void TestFlushCache(void) {
1073 #if !UCONFIG_NO_LEGACY_CONVERSION
1074 UErrorCode err = U_ZERO_ERROR;
1075 UConverter* someConverters[5];
1076 int flushCount = 0;
1077
1078 /* flush the converter cache to get a consistent state before the flushing is tested */
1079 ucnv_flushCache();
1080
1081 /*Testing ucnv_open()*/
1082 /* Note: These converters have been chosen because they do NOT
1083 encode the Latin characters (U+0041, ...), and therefore are
1084 highly unlikely to be chosen as system default codepages */
1085
1086 someConverters[0] = ucnv_open("ibm-1047", &err);
1087 if (U_FAILURE(err)) {
1088 log_data_err("FAILURE! %s\n", myErrorName(err));
1089 }
1090
1091 someConverters[1] = ucnv_open("ibm-1047", &err);
1092 if (U_FAILURE(err)) {
1093 log_data_err("FAILURE! %s\n", myErrorName(err));
1094 }
1095
1096 someConverters[2] = ucnv_open("ibm-1047", &err);
1097 if (U_FAILURE(err)) {
1098 log_data_err("FAILURE! %s\n", myErrorName(err));
1099 }
1100
1101 someConverters[3] = ucnv_open("gb18030", &err);
1102 if (U_FAILURE(err)) {
1103 log_data_err("FAILURE! %s\n", myErrorName(err));
1104 }
1105
1106 someConverters[4] = ucnv_open("ibm-954", &err);
1107 if (U_FAILURE(err)) {
1108 log_data_err("FAILURE! %s\n", myErrorName(err));
1109 }
1110
1111
1112 /* Testing ucnv_flushCache() */
1113 log_verbose("\n---Testing ucnv_flushCache...\n");
1114 if ((flushCount=ucnv_flushCache())==0)
1115 log_verbose("Flush cache ok\n");
1116 else
1117 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount);
1118
1119 /*testing ucnv_close() and ucnv_flushCache() */
1120 ucnv_close(someConverters[0]);
1121 ucnv_close(someConverters[1]);
1122
1123 if ((flushCount=ucnv_flushCache())==0)
1124 log_verbose("Flush cache ok\n");
1125 else
1126 log_data_err("Flush Cache failed [line %d], expect 0 got %d \n", __LINE__, flushCount);
1127
1128 ucnv_close(someConverters[2]);
1129 ucnv_close(someConverters[3]);
1130
1131 if ((flushCount=ucnv_flushCache())==2)
1132 log_verbose("Flush cache ok\n"); /*because first, second and third are same */
1133 else
1134 log_data_err("Flush Cache failed line %d, got %d expected 2 or there is an error in ucnv_close()\n",
1135 __LINE__,
1136 flushCount);
1137
1138 ucnv_close(someConverters[4]);
1139 if ( (flushCount=ucnv_flushCache())==1)
1140 log_verbose("Flush cache ok\n");
1141 else
1142 log_data_err("Flush Cache failed line %d, expected 1 got %d \n", __LINE__, flushCount);
1143 #endif
1144 }
1145
1146 /**
1147 * Test the converter alias API, specifically the fuzzy matching of
1148 * alias names and the alias table integrity. Make sure each
1149 * converter has at least one alias (itself), and that its listed
1150 * aliases map back to itself. Check some hard-coded UTF-8 and
1151 * ISO_2022 aliases to make sure they work.
1152 */
TestAlias()1153 static void TestAlias() {
1154 int32_t i, ncnv;
1155 UErrorCode status = U_ZERO_ERROR;
1156
1157 /* Predetermined aliases that we expect to map back to ISO_2022
1158 * and UTF-8. UPDATE THIS DATA AS NECESSARY. */
1159 const char* ISO_2022_NAMES[] =
1160 {"ISO_2022,locale=ja,version=2", "ISO-2022-JP-2", "csISO2022JP2",
1161 "Iso-2022jP2", "isO-2022_Jp_2", "iSo--2022,locale=ja,version=2"};
1162 int32_t ISO_2022_NAMES_LENGTH = UPRV_LENGTHOF(ISO_2022_NAMES);
1163 const char *UTF8_NAMES[] =
1164 { "UTF-8", "utf-8", "utf8", "ibm-1208",
1165 "utf_8", "ibm1208", "cp1208" };
1166 int32_t UTF8_NAMES_LENGTH = UPRV_LENGTHOF(UTF8_NAMES);
1167
1168 struct {
1169 const char *name;
1170 const char *alias;
1171 } CONVERTERS_NAMES[] = {
1172 { "UTF-32BE", "UTF32_BigEndian" },
1173 { "UTF-32LE", "UTF32_LittleEndian" },
1174 { "UTF-32", "ISO-10646-UCS-4" },
1175 { "UTF32_PlatformEndian", "UTF32_PlatformEndian" },
1176 { "UTF-32", "ucs-4" }
1177 };
1178 int32_t CONVERTERS_NAMES_LENGTH = UPRV_LENGTHOF(CONVERTERS_NAMES);
1179
1180 /* When there are bugs in gencnval or in ucnv_io, converters can
1181 appear to have no aliases. */
1182 ncnv = ucnv_countAvailable();
1183 log_verbose("%d converters\n", ncnv);
1184 for (i=0; i<ncnv; ++i) {
1185 const char *name = ucnv_getAvailableName(i);
1186 const char *alias0;
1187 uint16_t na = ucnv_countAliases(name, &status);
1188 uint16_t j;
1189 UConverter *cnv;
1190
1191 if (na == 0) {
1192 log_err("FAIL: Converter \"%s\" (i=%d)"
1193 " has no aliases; expect at least one\n",
1194 name, i);
1195 continue;
1196 }
1197 cnv = ucnv_open(name, &status);
1198 if (U_FAILURE(status)) {
1199 log_data_err("FAIL: Converter \"%s\" (i=%d)"
1200 " can't be opened.\n",
1201 name, i);
1202 }
1203 else {
1204 if (strcmp(ucnv_getName(cnv, &status), name) != 0
1205 && (strstr(name, "PlatformEndian") == 0 && strstr(name, "OppositeEndian") == 0)) {
1206 log_err("FAIL: Converter \"%s\" returned \"%s\" for getName. "
1207 "They should be the same\n",
1208 name, ucnv_getName(cnv, &status));
1209 }
1210 }
1211 ucnv_close(cnv);
1212
1213 status = U_ZERO_ERROR;
1214 alias0 = ucnv_getAlias(name, 0, &status);
1215 for (j=1; j<na; ++j) {
1216 const char *alias;
1217 /* Make sure each alias maps back to the the same list of
1218 aliases. Assume that if alias 0 is the same, the whole
1219 list is the same (this should always be true). */
1220 const char *mapBack;
1221
1222 status = U_ZERO_ERROR;
1223 alias = ucnv_getAlias(name, j, &status);
1224 if (status == U_AMBIGUOUS_ALIAS_WARNING) {
1225 log_err("FAIL: Converter \"%s\"is ambiguous\n", name);
1226 }
1227
1228 if (alias == NULL) {
1229 log_err("FAIL: Converter \"%s\" -> "
1230 "alias[%d]=NULL\n",
1231 name, j);
1232 continue;
1233 }
1234
1235 mapBack = ucnv_getAlias(alias, 0, &status);
1236
1237 if (mapBack == NULL) {
1238 log_err("FAIL: Converter \"%s\" -> "
1239 "alias[%d]=\"%s\" -> "
1240 "alias[0]=NULL, exp. \"%s\"\n",
1241 name, j, alias, alias0);
1242 continue;
1243 }
1244
1245 if (0 != strcmp(alias0, mapBack)) {
1246 int32_t idx;
1247 UBool foundAlias = false;
1248 if (status == U_AMBIGUOUS_ALIAS_WARNING) {
1249 /* Make sure that we only get this mismapping when there is
1250 an ambiguous alias, and the other converter has this alias too. */
1251 for (idx = 0; idx < ucnv_countAliases(mapBack, &status); idx++) {
1252 if (strcmp(ucnv_getAlias(mapBack, (uint16_t)idx, &status), alias) == 0) {
1253 foundAlias = true;
1254 break;
1255 }
1256 }
1257 }
1258 /* else not ambiguous, and this is a real problem. foundAlias = false */
1259
1260 if (!foundAlias) {
1261 log_err("FAIL: Converter \"%s\" -> "
1262 "alias[%d]=\"%s\" -> "
1263 "alias[0]=\"%s\", exp. \"%s\"\n",
1264 name, j, alias, mapBack, alias0);
1265 }
1266 }
1267 }
1268 }
1269
1270
1271 /* Check a list of predetermined aliases that we expect to map
1272 * back to ISO_2022 and UTF-8. */
1273 for (i=1; i<ISO_2022_NAMES_LENGTH; ++i) {
1274 const char* mapBack = ucnv_getAlias(ISO_2022_NAMES[i], 0, &status);
1275 if(!mapBack) {
1276 log_data_err("Couldn't get alias for %s. You probably have no data\n", ISO_2022_NAMES[i]);
1277 continue;
1278 }
1279 if (0 != strcmp(mapBack, ISO_2022_NAMES[0])) {
1280 log_err("FAIL: \"%s\" -> \"%s\", expect \"ISO_2022,locale=ja,version=2\"\n",
1281 ISO_2022_NAMES[i], mapBack);
1282 }
1283 }
1284
1285
1286 for (i=1; i<UTF8_NAMES_LENGTH; ++i) {
1287 const char* mapBack = ucnv_getAlias(UTF8_NAMES[i], 0, &status);
1288 if(!mapBack) {
1289 log_data_err("Couldn't get alias for %s. You probably have no data\n", UTF8_NAMES[i]);
1290 continue;
1291 }
1292 if (mapBack && 0 != strcmp(mapBack, UTF8_NAMES[0])) {
1293 log_err("FAIL: \"%s\" -> \"%s\", expect UTF-8\n",
1294 UTF8_NAMES[i], mapBack);
1295 }
1296 }
1297
1298 /*
1299 * Check a list of predetermined aliases that we expect to map
1300 * back to predermined converter names.
1301 */
1302
1303 for (i = 0; i < CONVERTERS_NAMES_LENGTH; ++i) {
1304 const char* mapBack = ucnv_getAlias(CONVERTERS_NAMES[i].alias, 0, &status);
1305 if(!mapBack) {
1306 log_data_err("Couldn't get alias for %s. You probably have no data\n", CONVERTERS_NAMES[i].name);
1307 continue;
1308 }
1309 if (0 != strcmp(mapBack, CONVERTERS_NAMES[i].name)) {
1310 log_err("FAIL: \"%s\" -> \"%s\", expect %s\n",
1311 CONVERTERS_NAMES[i].alias, mapBack, CONVERTERS_NAMES[i].name);
1312 }
1313 }
1314
1315 }
1316
TestDuplicateAlias(void)1317 static void TestDuplicateAlias(void) {
1318 const char *alias;
1319 UErrorCode status = U_ZERO_ERROR;
1320
1321 status = U_ZERO_ERROR;
1322 alias = ucnv_getStandardName("Shift_JIS", "IBM", &status);
1323 if (alias == NULL || strcmp(alias, "ibm-943") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) {
1324 log_data_err("FAIL: Didn't get ibm-943 for Shift_JIS {IBM}. Got %s\n", alias);
1325 }
1326 status = U_ZERO_ERROR;
1327 alias = ucnv_getStandardName("ibm-943", "IANA", &status);
1328 if (alias == NULL || strcmp(alias, "Shift_JIS") != 0 || status != U_AMBIGUOUS_ALIAS_WARNING) {
1329 log_data_err("FAIL: Didn't get Shift_JIS for ibm-943 {IANA}. Got %s\n", alias);
1330 }
1331 status = U_ZERO_ERROR;
1332 alias = ucnv_getStandardName("ibm-943_P130-2000", "IANA", &status);
1333 if (alias != NULL || status == U_AMBIGUOUS_ALIAS_WARNING) {
1334 log_data_err("FAIL: Didn't get NULL for ibm-943 {IANA}. Got %s\n", alias);
1335 }
1336 }
1337
1338
1339 /* Test safe clone callback */
1340
TSCC_nextSerial()1341 static uint32_t TSCC_nextSerial()
1342 {
1343 static uint32_t n = 1;
1344
1345 return (n++);
1346 }
1347
1348 typedef struct
1349 {
1350 uint32_t magic; /* 0xC0FFEE to identify that the object is OK */
1351 uint32_t serial; /* minted from nextSerial, above */
1352 UBool wasClosed; /* close happened on the object */
1353 } TSCCContext;
1354
TSCC_clone(TSCCContext * ctx)1355 static TSCCContext *TSCC_clone(TSCCContext *ctx)
1356 {
1357 TSCCContext *newCtx = (TSCCContext *)malloc(sizeof(TSCCContext));
1358
1359 newCtx->serial = TSCC_nextSerial();
1360 newCtx->wasClosed = 0;
1361 newCtx->magic = 0xC0FFEE;
1362
1363 log_verbose("TSCC_clone: %p:%d -> new context %p:%d\n", ctx, ctx->serial, newCtx, newCtx->serial);
1364
1365 return newCtx;
1366 }
1367
1368 #if !UCONFIG_NO_LEGACY_CONVERSION
TSCC_fromU(const void * context,UConverterFromUnicodeArgs * fromUArgs,const UChar * codeUnits,int32_t length,UChar32 codePoint,UConverterCallbackReason reason,UErrorCode * err)1369 static void TSCC_fromU(const void *context,
1370 UConverterFromUnicodeArgs *fromUArgs,
1371 const UChar* codeUnits,
1372 int32_t length,
1373 UChar32 codePoint,
1374 UConverterCallbackReason reason,
1375 UErrorCode * err)
1376 {
1377 // suppress compiler warnings about unused variables
1378 (void)codeUnits;
1379 (void)length;
1380 (void)codePoint;
1381
1382 TSCCContext *ctx = (TSCCContext*)context;
1383 UConverterFromUCallback junkFrom;
1384
1385 log_verbose("TSCC_fromU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, fromUArgs->converter);
1386
1387 if(ctx->magic != 0xC0FFEE) {
1388 log_err("TSCC_fromU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic);
1389 return;
1390 }
1391
1392 if(reason == UCNV_CLONE) {
1393 UErrorCode subErr = U_ZERO_ERROR;
1394 TSCCContext *newCtx;
1395 TSCCContext *junkCtx;
1396 TSCCContext **pjunkCtx = &junkCtx;
1397
1398 /* "recreate" it */
1399 log_verbose("TSCC_fromU: cloning..\n");
1400 newCtx = TSCC_clone(ctx);
1401
1402 if(newCtx == NULL) {
1403 log_err("TSCC_fromU: internal clone failed on %p\n", ctx);
1404 }
1405
1406 /* now, SET it */
1407 ucnv_getFromUCallBack(fromUArgs->converter, &junkFrom, (const void**)pjunkCtx);
1408 ucnv_setFromUCallBack(fromUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr);
1409
1410 if(U_FAILURE(subErr)) {
1411 *err = subErr;
1412 }
1413 }
1414
1415 if(reason == UCNV_CLOSE) {
1416 log_verbose("TSCC_fromU: Context %p:%d closing\n", ctx, ctx->serial);
1417 ctx->wasClosed = true;
1418 }
1419 }
1420
TSCC_toU(const void * context,UConverterToUnicodeArgs * toUArgs,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * err)1421 static void TSCC_toU(const void *context,
1422 UConverterToUnicodeArgs *toUArgs,
1423 const char* codeUnits,
1424 int32_t length,
1425 UConverterCallbackReason reason,
1426 UErrorCode * err)
1427 {
1428 // suppress compiler warnings about unused variables
1429 (void)codeUnits;
1430 (void)length;
1431
1432 TSCCContext *ctx = (TSCCContext*)context;
1433 UConverterToUCallback junkFrom;
1434
1435 log_verbose("TSCC_toU: Context %p:%d called, reason %d on cnv %p\n", ctx, ctx->serial, reason, toUArgs->converter);
1436
1437 if(ctx->magic != 0xC0FFEE) {
1438 log_err("TSCC_toU: Context %p:%d magic is 0x%x should be 0xC0FFEE.\n", ctx,ctx->serial, ctx->magic);
1439 return;
1440 }
1441
1442 if(reason == UCNV_CLONE) {
1443 UErrorCode subErr = U_ZERO_ERROR;
1444 TSCCContext *newCtx;
1445 TSCCContext *junkCtx;
1446 TSCCContext **pjunkCtx = &junkCtx;
1447
1448 /* "recreate" it */
1449 log_verbose("TSCC_toU: cloning..\n");
1450 newCtx = TSCC_clone(ctx);
1451
1452 if(newCtx == NULL) {
1453 log_err("TSCC_toU: internal clone failed on %p\n", ctx);
1454 }
1455
1456 /* now, SET it */
1457 ucnv_getToUCallBack(toUArgs->converter, &junkFrom, (const void**)pjunkCtx);
1458 ucnv_setToUCallBack(toUArgs->converter, junkFrom, newCtx, NULL, NULL, &subErr);
1459
1460 if(U_FAILURE(subErr)) {
1461 *err = subErr;
1462 }
1463 }
1464
1465 if(reason == UCNV_CLOSE) {
1466 log_verbose("TSCC_toU: Context %p:%d closing\n", ctx, ctx->serial);
1467 ctx->wasClosed = true;
1468 }
1469 }
1470
TSCC_init(TSCCContext * q)1471 static void TSCC_init(TSCCContext *q)
1472 {
1473 q->magic = 0xC0FFEE;
1474 q->serial = TSCC_nextSerial();
1475 q->wasClosed = 0;
1476 }
1477
TSCC_print_log(TSCCContext * q,const char * name)1478 static void TSCC_print_log(TSCCContext *q, const char *name)
1479 {
1480 if(q==NULL) {
1481 log_verbose("TSCContext: %s is NULL!!\n", name);
1482 } else {
1483 if(q->magic != 0xC0FFEE) {
1484 log_err("TSCCContext: %p:%d's magic is %x, supposed to be 0xC0FFEE\n",
1485 q,q->serial, q->magic);
1486 }
1487 log_verbose("TSCCContext %p:%d=%s - magic %x, %s\n",
1488 q, q->serial, name, q->magic, q->wasClosed?"CLOSED":"open");
1489 }
1490 }
1491
TestConvertSafeCloneCallback()1492 static void TestConvertSafeCloneCallback()
1493 {
1494 UErrorCode err = U_ZERO_ERROR;
1495 TSCCContext from1, to1;
1496 TSCCContext *from2, *from3, *to2, *to3;
1497 TSCCContext **pfrom2 = &from2, **pfrom3 = &from3, **pto2 = &to2, **pto3 = &to3;
1498 char hunk[8192];
1499 int32_t hunkSize = 8192;
1500 UConverterFromUCallback junkFrom;
1501 UConverterToUCallback junkTo;
1502 UConverter *conv1, *conv2 = NULL;
1503
1504 conv1 = ucnv_open("iso-8859-3", &err);
1505
1506 if(U_FAILURE(err)) {
1507 log_data_err("Err opening iso-8859-3, %s\n", u_errorName(err));
1508 return;
1509 }
1510
1511 log_verbose("Opened conv1=%p\n", conv1);
1512
1513 TSCC_init(&from1);
1514 TSCC_init(&to1);
1515
1516 TSCC_print_log(&from1, "from1");
1517 TSCC_print_log(&to1, "to1");
1518
1519 ucnv_setFromUCallBack(conv1, TSCC_fromU, &from1, NULL, NULL, &err);
1520 log_verbose("Set from1 on conv1\n");
1521 TSCC_print_log(&from1, "from1");
1522
1523 ucnv_setToUCallBack(conv1, TSCC_toU, &to1, NULL, NULL, &err);
1524 log_verbose("Set to1 on conv1\n");
1525 TSCC_print_log(&to1, "to1");
1526
1527 conv2 = ucnv_safeClone(conv1, hunk, &hunkSize, &err);
1528 if(U_FAILURE(err)) {
1529 log_err("safeClone failed: %s\n", u_errorName(err));
1530 return;
1531 }
1532 log_verbose("Cloned to conv2=%p.\n", conv2);
1533
1534 /********** from *********************/
1535 ucnv_getFromUCallBack(conv2, &junkFrom, (const void**)pfrom2);
1536 ucnv_getFromUCallBack(conv1, &junkFrom, (const void**)pfrom3);
1537
1538 TSCC_print_log(from2, "from2");
1539 TSCC_print_log(from3, "from3(==from1)");
1540
1541 if(from2 == NULL) {
1542 log_err("FAIL! from2 is null \n");
1543 return;
1544 }
1545
1546 if(from3 == NULL) {
1547 log_err("FAIL! from3 is null \n");
1548 return;
1549 }
1550
1551 if(from3 != (&from1) ) {
1552 log_err("FAIL! conv1's FROM context changed!\n");
1553 }
1554
1555 if(from2 == (&from1) ) {
1556 log_err("FAIL! conv1's FROM context is the same as conv2's!\n");
1557 }
1558
1559 if(from1.wasClosed) {
1560 log_err("FAIL! from1 is closed \n");
1561 }
1562
1563 if(from2->wasClosed) {
1564 log_err("FAIL! from2 was closed\n");
1565 }
1566
1567 /********** to *********************/
1568 ucnv_getToUCallBack(conv2, &junkTo, (const void**)pto2);
1569 ucnv_getToUCallBack(conv1, &junkTo, (const void**)pto3);
1570
1571 TSCC_print_log(to2, "to2");
1572 TSCC_print_log(to3, "to3(==to1)");
1573
1574 if(to2 == NULL) {
1575 log_err("FAIL! to2 is null \n");
1576 return;
1577 }
1578
1579 if(to3 == NULL) {
1580 log_err("FAIL! to3 is null \n");
1581 return;
1582 }
1583
1584 if(to3 != (&to1) ) {
1585 log_err("FAIL! conv1's TO context changed!\n");
1586 }
1587
1588 if(to2 == (&to1) ) {
1589 log_err("FAIL! conv1's TO context is the same as conv2's!\n");
1590 }
1591
1592 if(to1.wasClosed) {
1593 log_err("FAIL! to1 is closed \n");
1594 }
1595
1596 if(to2->wasClosed) {
1597 log_err("FAIL! to2 was closed\n");
1598 }
1599
1600 /*************************************/
1601
1602 ucnv_close(conv1);
1603 log_verbose("ucnv_closed (conv1)\n");
1604 TSCC_print_log(&from1, "from1");
1605 TSCC_print_log(from2, "from2");
1606 TSCC_print_log(&to1, "to1");
1607 TSCC_print_log(to2, "to2");
1608
1609 if(from1.wasClosed == false) {
1610 log_err("FAIL! from1 is NOT closed \n");
1611 }
1612
1613 if(from2->wasClosed) {
1614 log_err("FAIL! from2 was closed\n");
1615 }
1616
1617 if(to1.wasClosed == false) {
1618 log_err("FAIL! to1 is NOT closed \n");
1619 }
1620
1621 if(to2->wasClosed) {
1622 log_err("FAIL! to2 was closed\n");
1623 }
1624
1625 ucnv_close(conv2);
1626 log_verbose("ucnv_closed (conv2)\n");
1627
1628 TSCC_print_log(&from1, "from1");
1629 TSCC_print_log(from2, "from2");
1630
1631 if(from1.wasClosed == false) {
1632 log_err("FAIL! from1 is NOT closed \n");
1633 }
1634
1635 if(from2->wasClosed == false) {
1636 log_err("FAIL! from2 was NOT closed\n");
1637 }
1638
1639 TSCC_print_log(&to1, "to1");
1640 TSCC_print_log(to2, "to2");
1641
1642 if(to1.wasClosed == false) {
1643 log_err("FAIL! to1 is NOT closed \n");
1644 }
1645
1646 if(to2->wasClosed == false) {
1647 log_err("FAIL! to2 was NOT closed\n");
1648 }
1649
1650 if(to2 != (&to1)) {
1651 free(to2); /* to1 is stack based */
1652 }
1653 if(from2 != (&from1)) {
1654 free(from2); /* from1 is stack based */
1655 }
1656 }
1657 #endif
1658
1659 static UBool
containsAnyOtherByte(uint8_t * p,int32_t length,uint8_t b)1660 containsAnyOtherByte(uint8_t *p, int32_t length, uint8_t b) {
1661 while(length>0) {
1662 if(*p!=b) {
1663 return true;
1664 }
1665 ++p;
1666 --length;
1667 }
1668 return false;
1669 }
1670
TestConvertSafeClone()1671 static void TestConvertSafeClone()
1672 {
1673 /* one 'regular' & all the 'private stateful' converters */
1674 static const char *const names[] = {
1675 #if !UCONFIG_NO_LEGACY_CONVERSION
1676 "ibm-1047",
1677 "ISO_2022,locale=zh,version=1",
1678 #endif
1679 "SCSU",
1680 #if !UCONFIG_NO_LEGACY_CONVERSION
1681 "HZ",
1682 "lmbcs",
1683 "ISCII,version=0",
1684 "ISO_2022,locale=kr,version=1",
1685 "ISO_2022,locale=jp,version=2",
1686 #endif
1687 "BOCU-1",
1688 "UTF-7",
1689 #if !UCONFIG_NO_LEGACY_CONVERSION
1690 "IMAP-mailbox-name",
1691 "ibm-1047-s390"
1692 #else
1693 "IMAP=mailbox-name"
1694 #endif
1695 };
1696
1697 /* store the actual sizes of each converter */
1698 int32_t actualSizes[UPRV_LENGTHOF(names)];
1699
1700 static const int32_t bufferSizes[] = {
1701 U_CNV_SAFECLONE_BUFFERSIZE,
1702 (int32_t)(3*sizeof(UConverter))/2, /* 1.5*sizeof(UConverter) */
1703 (int32_t)sizeof(UConverter)/2 /* 0.5*sizeof(UConverter) */
1704 };
1705
1706 char charBuffer[21]; /* Leave at an odd number for alignment testing */
1707 uint8_t buffer[3] [U_CNV_SAFECLONE_BUFFERSIZE];
1708 int32_t bufferSize, maxBufferSize;
1709 const char *maxName;
1710 UConverter * cnv, *cnv2;
1711 UErrorCode err;
1712
1713 char *pCharBuffer;
1714 const char *pConstCharBuffer;
1715 const char *charBufferLimit = charBuffer + UPRV_LENGTHOF(charBuffer);
1716 UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */
1717 UChar uniCharBuffer[20];
1718 char charSourceBuffer[] = { 0x1b, 0x24, 0x42 };
1719 const char *pCharSource = charSourceBuffer;
1720 const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer);
1721 UChar *pUCharTarget = uniCharBuffer;
1722 UChar *pUCharTargetLimit = uniCharBuffer + UPRV_LENGTHOF(uniCharBuffer);
1723 const UChar * pUniBuffer;
1724 const UChar *uniBufferLimit = uniBuffer + UPRV_LENGTHOF(uniBuffer);
1725 int32_t idx, j;
1726
1727 err = U_ZERO_ERROR;
1728 cnv = ucnv_open(names[0], &err);
1729 if(U_SUCCESS(err)) {
1730 /* Check the various error & informational states: */
1731
1732 /* Null status - just returns NULL */
1733 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
1734 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, NULL))
1735 {
1736 log_err("FAIL: Cloned converter failed to deal correctly with null status\n");
1737 }
1738 /* error status - should return 0 & keep error the same */
1739 err = U_MEMORY_ALLOCATION_ERROR;
1740 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || err != U_MEMORY_ALLOCATION_ERROR)
1741 {
1742 log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n");
1743 }
1744 err = U_ZERO_ERROR;
1745
1746 /* Null buffer size pointer is ok */
1747 if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], NULL, &err)) || U_FAILURE(err))
1748 {
1749 log_err("FAIL: Cloned converter failed to deal correctly with null bufferSize pointer\n");
1750 }
1751 ucnv_close(cnv2);
1752 err = U_ZERO_ERROR;
1753
1754 /* buffer size pointer is 0 - fill in pbufferSize with a size */
1755 bufferSize = 0;
1756 if (NULL != ucnv_safeClone(cnv, buffer[0], &bufferSize, &err) || U_FAILURE(err) || bufferSize <= 0)
1757 {
1758 log_err("FAIL: Cloned converter failed a sizing request ('preflighting')\n");
1759 }
1760 /* Verify our define is large enough */
1761 if (U_CNV_SAFECLONE_BUFFERSIZE < bufferSize)
1762 {
1763 log_err("FAIL: Pre-calculated buffer size is too small\n");
1764 }
1765 /* Verify we can use this run-time calculated size */
1766 if (NULL == (cnv2 = ucnv_safeClone(cnv, buffer[0], &bufferSize, &err)) || U_FAILURE(err))
1767 {
1768 log_err("FAIL: Converter can't be cloned with run-time size\n");
1769 }
1770 if (cnv2) {
1771 ucnv_close(cnv2);
1772 }
1773
1774 /* size one byte too small - should allocate & let us know */
1775 --bufferSize;
1776 if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
1777 {
1778 log_err("FAIL: Cloned converter failed to deal correctly with too-small buffer size\n");
1779 }
1780 if (cnv2) {
1781 ucnv_close(cnv2);
1782 }
1783
1784 err = U_ZERO_ERROR;
1785 bufferSize = U_CNV_SAFECLONE_BUFFERSIZE;
1786
1787 /* Null buffer pointer - return converter & set error to U_SAFECLONE_ALLOCATED_ERROR */
1788 if (NULL == (cnv2 = ucnv_safeClone(cnv, NULL, &bufferSize, &err)) || err != U_SAFECLONE_ALLOCATED_WARNING)
1789 {
1790 log_err("FAIL: Cloned converter failed to deal correctly with null buffer pointer\n");
1791 }
1792 if (cnv2) {
1793 ucnv_close(cnv2);
1794 }
1795
1796 err = U_ZERO_ERROR;
1797
1798 /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
1799 if (NULL != ucnv_safeClone(NULL, buffer[0], &bufferSize, &err) || err != U_ILLEGAL_ARGUMENT_ERROR)
1800 {
1801 log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n");
1802 }
1803
1804 ucnv_close(cnv);
1805 }
1806
1807 maxBufferSize = 0;
1808 maxName = "";
1809
1810 /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/
1811
1812 for(j = 0; j < UPRV_LENGTHOF(bufferSizes); ++j) {
1813 for (idx = 0; idx < UPRV_LENGTHOF(names); idx++)
1814 {
1815 err = U_ZERO_ERROR;
1816 cnv = ucnv_open(names[idx], &err);
1817 if(U_FAILURE(err)) {
1818 log_data_err("ucnv_open(\"%s\") failed - %s\n", names[idx], u_errorName(err));
1819 continue;
1820 }
1821
1822 if(j == 0) {
1823 /* preflight to get maxBufferSize */
1824 actualSizes[idx] = 0;
1825 ucnv_safeClone(cnv, NULL, &actualSizes[idx], &err);
1826 if(actualSizes[idx] > maxBufferSize) {
1827 maxBufferSize = actualSizes[idx];
1828 maxName = names[idx];
1829 }
1830 }
1831
1832 memset(buffer, 0xaa, sizeof(buffer));
1833
1834 bufferSize = bufferSizes[j];
1835 cnv2 = ucnv_safeClone(cnv, buffer[1], &bufferSize, &err);
1836
1837 /* close the original immediately to make sure that the clone works by itself */
1838 ucnv_close(cnv);
1839
1840 if( actualSizes[idx] <= (bufferSizes[j] - (int32_t)ALIGNOF(UConverter)) &&
1841 err == U_SAFECLONE_ALLOCATED_WARNING
1842 ) {
1843 log_err("ucnv_safeClone(%s) did a heap clone although the buffer was large enough\n", names[idx]);
1844 }
1845
1846 /* check if the clone function overwrote any bytes that it is not supposed to touch */
1847 if(bufferSize <= bufferSizes[j]) {
1848 /* used the stack buffer */
1849 if( containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer[0]), 0xaa) ||
1850 containsAnyOtherByte(buffer[1]+bufferSize, (int32_t)(sizeof(buffer)-(sizeof(buffer[0])+bufferSize)), 0xaa)
1851 ) {
1852 log_err("cloning %s in a stack buffer overwrote bytes outside the bufferSize %d (requested %d)\n",
1853 names[idx], bufferSize, bufferSizes[j]);
1854 }
1855 } else {
1856 /* heap-allocated the clone */
1857 if(containsAnyOtherByte(buffer[0], (int32_t)sizeof(buffer), 0xaa)) {
1858 log_err("cloning %s used the heap (bufferSize %d, requested %d) but overwrote stack buffer bytes\n",
1859 names[idx], bufferSize, bufferSizes[j]);
1860 }
1861 }
1862
1863 pCharBuffer = charBuffer;
1864 pUniBuffer = uniBuffer;
1865
1866 ucnv_fromUnicode(cnv2,
1867 &pCharBuffer,
1868 charBufferLimit,
1869 &pUniBuffer,
1870 uniBufferLimit,
1871 NULL,
1872 true,
1873 &err);
1874 if(U_FAILURE(err)){
1875 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err));
1876 }
1877 ucnv_toUnicode(cnv2,
1878 &pUCharTarget,
1879 pUCharTargetLimit,
1880 &pCharSource,
1881 pCharSourceLimit,
1882 NULL,
1883 true,
1884 &err
1885 );
1886
1887 if(U_FAILURE(err)){
1888 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err));
1889 }
1890
1891 pConstCharBuffer = charBuffer;
1892 if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err))
1893 {
1894 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err));
1895 }
1896 ucnv_close(cnv2);
1897 }
1898 }
1899
1900 log_verbose("ucnv_safeClone(): sizeof(UConverter)=%lu max preflighted clone size=%d (%s) U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1901 sizeof(UConverter), maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE);
1902 if(maxBufferSize > U_CNV_SAFECLONE_BUFFERSIZE) {
1903 log_err("ucnv_safeClone(): max preflighted clone size=%d (%s) is larger than U_CNV_SAFECLONE_BUFFERSIZE=%d\n",
1904 maxBufferSize, maxName, (int)U_CNV_SAFECLONE_BUFFERSIZE);
1905 }
1906 }
1907
1908
TestConvertClone()1909 static void TestConvertClone()
1910 {
1911 /* one 'regular' & all the 'private stateful' converters */
1912 static const char *const names[] = {
1913 #if !UCONFIG_NO_LEGACY_CONVERSION
1914 "ibm-1047",
1915 "ISO_2022,locale=zh,version=1",
1916 #endif
1917 "SCSU",
1918 #if !UCONFIG_NO_LEGACY_CONVERSION
1919 "HZ",
1920 "lmbcs",
1921 "ISCII,version=0",
1922 "ISO_2022,locale=kr,version=1",
1923 "ISO_2022,locale=jp,version=2",
1924 #endif
1925 "BOCU-1",
1926 "UTF-7",
1927 #if !UCONFIG_NO_LEGACY_CONVERSION
1928 "IMAP-mailbox-name",
1929 "ibm-1047-s390"
1930 #else
1931 "IMAP=mailbox-name"
1932 #endif
1933 };
1934
1935 char charBuffer[21]; /* Leave at an odd number for alignment testing */
1936 UConverter * cnv, *cnv2;
1937 UErrorCode err;
1938
1939 char *pCharBuffer;
1940 const char *pConstCharBuffer;
1941 const char *charBufferLimit = charBuffer + UPRV_LENGTHOF(charBuffer);
1942 UChar uniBuffer[] = {0x0058, 0x0059, 0x005A}; /* "XYZ" */
1943 UChar uniCharBuffer[20];
1944 char charSourceBuffer[] = { 0x1b, 0x24, 0x42 };
1945 const char *pCharSource = charSourceBuffer;
1946 const char *pCharSourceLimit = charSourceBuffer + sizeof(charSourceBuffer);
1947 UChar *pUCharTarget = uniCharBuffer;
1948 UChar *pUCharTargetLimit = uniCharBuffer + UPRV_LENGTHOF(uniCharBuffer);
1949 const UChar * pUniBuffer;
1950 const UChar *uniBufferLimit = uniBuffer + UPRV_LENGTHOF(uniBuffer);
1951 int32_t idx;
1952
1953 err = U_ZERO_ERROR;
1954 cnv = ucnv_open(names[0], &err);
1955 if(U_SUCCESS(err)) {
1956 /* Check the various error & informational states: */
1957
1958 /* Null status - just returns NULL */
1959 if (NULL != ucnv_clone(cnv, NULL))
1960 {
1961 log_err("FAIL: Cloned converter failed to deal correctly with null status\n");
1962 }
1963 /* error status - should return 0 & keep error the same */
1964 err = U_MEMORY_ALLOCATION_ERROR;
1965 if (NULL != ucnv_clone(cnv, &err) || err != U_MEMORY_ALLOCATION_ERROR)
1966 {
1967 log_err("FAIL: Cloned converter failed to deal correctly with incoming error status\n");
1968 }
1969 err = U_ZERO_ERROR;
1970
1971 /* Null buffer size pointer is ok */
1972 if (NULL == (cnv2 = ucnv_clone(cnv, &err)) || U_FAILURE(err))
1973 {
1974 log_err("FAIL: Failed to clone.\n");
1975 }
1976 ucnv_close(cnv2);
1977 err = U_ZERO_ERROR;
1978
1979 /* Null converter - return NULL & set U_ILLEGAL_ARGUMENT_ERROR */
1980 if (NULL != ucnv_clone(NULL, &err) || err != U_ILLEGAL_ARGUMENT_ERROR)
1981 {
1982 log_err("FAIL: Cloned converter failed to deal correctly with null converter pointer\n");
1983 }
1984
1985 ucnv_close(cnv);
1986 }
1987
1988 /* Do these cloned converters work at all - shuffle UChars to chars & back again..*/
1989 for (idx = 0; idx < UPRV_LENGTHOF(names); idx++)
1990 {
1991 err = U_ZERO_ERROR;
1992 cnv = ucnv_open(names[idx], &err);
1993 if(U_FAILURE(err)) {
1994 log_data_err("ucnv_open(\"%s\") failed - %s\n", names[idx], u_errorName(err));
1995 continue;
1996 }
1997
1998 cnv2 = ucnv_clone(cnv, &err);
1999
2000 /* close the original immediately to make sure that the clone works by itself */
2001 ucnv_close(cnv);
2002
2003 pCharBuffer = charBuffer;
2004 pUniBuffer = uniBuffer;
2005
2006 ucnv_fromUnicode(cnv2,
2007 &pCharBuffer,
2008 charBufferLimit,
2009 &pUniBuffer,
2010 uniBufferLimit,
2011 NULL,
2012 true,
2013 &err);
2014 if(U_FAILURE(err)){
2015 log_err("FAIL: cloned converter failed to do fromU conversion. Error: %s\n",u_errorName(err));
2016 }
2017 ucnv_toUnicode(cnv2,
2018 &pUCharTarget,
2019 pUCharTargetLimit,
2020 &pCharSource,
2021 pCharSourceLimit,
2022 NULL,
2023 true,
2024 &err
2025 );
2026
2027 if(U_FAILURE(err)){
2028 log_err("FAIL: cloned converter failed to do toU conversion. Error: %s\n",u_errorName(err));
2029 }
2030
2031 pConstCharBuffer = charBuffer;
2032 if (uniBuffer [0] != ucnv_getNextUChar(cnv2, &pConstCharBuffer, pCharBuffer, &err))
2033 {
2034 log_err("FAIL: Cloned converter failed to do conversion. Error: %s\n",u_errorName(err));
2035 }
2036 ucnv_close(cnv2);
2037 }
2038 }
2039
TestCCSID()2040 static void TestCCSID() {
2041 #if !UCONFIG_NO_LEGACY_CONVERSION
2042 UConverter *cnv;
2043 UErrorCode errorCode;
2044 int32_t ccsids[]={ 37, 850, 943, 949, 950, 1047, 1252, 1392, 33722 };
2045 int32_t i, ccsid;
2046
2047 for(i=0; i<UPRV_LENGTHOF(ccsids); ++i) {
2048 ccsid=ccsids[i];
2049
2050 errorCode=U_ZERO_ERROR;
2051 cnv=ucnv_openCCSID(ccsid, UCNV_IBM, &errorCode);
2052 if(U_FAILURE(errorCode)) {
2053 log_data_err("error: ucnv_openCCSID(%ld) failed (%s)\n", ccsid, u_errorName(errorCode));
2054 continue;
2055 }
2056
2057 if(ccsid!=ucnv_getCCSID(cnv, &errorCode)) {
2058 log_err("error: ucnv_getCCSID(ucnv_openCCSID(%ld))=%ld\n", ccsid, ucnv_getCCSID(cnv, &errorCode));
2059 }
2060
2061 /* skip gb18030(ccsid 1392) */
2062 if(ccsid != 1392 && UCNV_IBM!=ucnv_getPlatform(cnv, &errorCode)) {
2063 log_err("error: ucnv_getPlatform(ucnv_openCCSID(%ld))=%ld!=UCNV_IBM\n", ccsid, ucnv_getPlatform(cnv, &errorCode));
2064 }
2065
2066 ucnv_close(cnv);
2067 }
2068 #endif
2069 }
2070
2071 /* jitterbug 932: ucnv_convert() bugs --------------------------------------- */
2072
2073 /* CHUNK_SIZE defined in common\ucnv.c: */
2074 #define CHUNK_SIZE 1024
2075
2076 static void bug1(void);
2077 static void bug2(void);
2078 static void bug3(void);
2079
2080 static void
TestJ932(void)2081 TestJ932(void)
2082 {
2083 bug1(); /* Unicode intermediate buffer straddle bug */
2084 bug2(); /* pre-flighting size incorrect caused by simple overflow */
2085 bug3(); /* pre-flighting size incorrect caused by expansion overflow */
2086 }
2087
2088 /*
2089 * jitterbug 932: test chunking boundary conditions in
2090
2091 int32_t ucnv_convert(const char *toConverterName,
2092 const char *fromConverterName,
2093 char *target,
2094 int32_t targetSize,
2095 const char *source,
2096 int32_t sourceSize,
2097 UErrorCode * err)
2098
2099 * See discussions on the icu mailing list in
2100 * 2001-April with the subject "converter 'flush' question".
2101 *
2102 * Bug report and test code provided by Edward J. Batutis.
2103 */
bug1()2104 static void bug1()
2105 {
2106 #if !UCONFIG_NO_LEGACY_CONVERSION
2107 char char_in[CHUNK_SIZE+32];
2108 char char_out[CHUNK_SIZE*2];
2109
2110 /* GB 18030 equivalent of U+10000 is 90308130 */
2111 static const char test_seq[]={ (char)0x90u, 0x30, (char)0x81u, 0x30 };
2112
2113 UErrorCode err = U_ZERO_ERROR;
2114 int32_t i, test_seq_len = sizeof(test_seq);
2115
2116 /*
2117 * causes straddle bug in Unicode intermediate buffer by sliding the test sequence forward
2118 * until the straddle bug appears. I didn't want to hard-code everything so this test could
2119 * be expanded - however this is the only type of straddle bug I can think of at the moment -
2120 * a high surrogate in the last position of the Unicode intermediate buffer. Apparently no
2121 * other Unicode sequences cause a bug since combining sequences are not supported by the
2122 * converters.
2123 */
2124
2125 for (i = test_seq_len; i >= 0; i--) {
2126 /* put character sequence into input buffer */
2127 memset(char_in, 0x61, sizeof(char_in)); /* GB 18030 'a' */
2128 memcpy(char_in + (CHUNK_SIZE - i), test_seq, test_seq_len);
2129
2130 /* do the conversion */
2131 ucnv_convert("us-ascii", /* out */
2132 "gb18030", /* in */
2133 char_out,
2134 sizeof(char_out),
2135 char_in,
2136 sizeof(char_in),
2137 &err);
2138
2139 /* bug1: */
2140 if (err == U_TRUNCATED_CHAR_FOUND) {
2141 /* this happens when surrogate pair straddles the intermediate buffer in
2142 * T_UConverter_fromCodepageToCodepage */
2143 log_err("error j932 bug 1: expected success, got U_TRUNCATED_CHAR_FOUND\n");
2144 }
2145 }
2146 #endif
2147 }
2148
2149 /* bug2: pre-flighting loop bug: simple overflow causes bug */
bug2()2150 static void bug2()
2151 {
2152 /* US-ASCII "1234567890" */
2153 static const char source[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39 };
2154 #if !UCONFIG_ONLY_HTML_CONVERSION
2155 static const char sourceUTF8[]={ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, (char)0xef, (char)0x80, (char)0x80 };
2156 static const char sourceUTF32[]={ 0x00, 0x00, 0x00, 0x30,
2157 0x00, 0x00, 0x00, 0x31,
2158 0x00, 0x00, 0x00, 0x32,
2159 0x00, 0x00, 0x00, 0x33,
2160 0x00, 0x00, 0x00, 0x34,
2161 0x00, 0x00, 0x00, 0x35,
2162 0x00, 0x00, 0x00, 0x36,
2163 0x00, 0x00, 0x00, 0x37,
2164 0x00, 0x00, 0x00, 0x38,
2165 0x00, 0x00, (char)0xf0, 0x00};
2166 #endif
2167
2168 static char target[5];
2169
2170 UErrorCode err = U_ZERO_ERROR;
2171 int32_t size;
2172
2173 /* do the conversion */
2174 size = ucnv_convert("iso-8859-1", /* out */
2175 "us-ascii", /* in */
2176 target,
2177 sizeof(target),
2178 source,
2179 sizeof(source),
2180 &err);
2181
2182 if ( size != 10 ) {
2183 /* bug2: size is 5, should be 10 */
2184 log_data_err("error j932 bug 2 us-ascii->iso-8859-1: got preflighting size %d instead of 10\n", size);
2185 }
2186
2187 #if !UCONFIG_ONLY_HTML_CONVERSION
2188 err = U_ZERO_ERROR;
2189 /* do the conversion */
2190 size = ucnv_convert("UTF-32BE", /* out */
2191 "UTF-8", /* in */
2192 target,
2193 sizeof(target),
2194 sourceUTF8,
2195 sizeof(sourceUTF8),
2196 &err);
2197
2198 if ( size != 32 ) {
2199 /* bug2: size is 5, should be 32 */
2200 log_err("error j932 bug 2 UTF-8->UTF-32BE: got preflighting size %d instead of 32\n", size);
2201 }
2202
2203 err = U_ZERO_ERROR;
2204 /* do the conversion */
2205 size = ucnv_convert("UTF-8", /* out */
2206 "UTF-32BE", /* in */
2207 target,
2208 sizeof(target),
2209 sourceUTF32,
2210 sizeof(sourceUTF32),
2211 &err);
2212
2213 if ( size != 12 ) {
2214 /* bug2: size is 5, should be 12 */
2215 log_err("error j932 bug 2 UTF-32BE->UTF-8: got preflighting size %d instead of 12\n", size);
2216 }
2217 #endif
2218 }
2219
2220 /*
2221 * bug3: when the characters expand going from source to target codepage
2222 * you get bug3 in addition to bug2
2223 */
bug3()2224 static void bug3()
2225 {
2226 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
2227 char char_in[CHUNK_SIZE*4];
2228 char target[5];
2229 UErrorCode err = U_ZERO_ERROR;
2230 int32_t size;
2231
2232 /*
2233 * first get the buggy size from bug2 then
2234 * compare it to buggy size with an expansion
2235 */
2236 memset(char_in, 0x61, sizeof(char_in)); /* US-ASCII 'a' */
2237
2238 /* do the conversion */
2239 size = ucnv_convert("lmbcs", /* out */
2240 "us-ascii", /* in */
2241 target,
2242 sizeof(target),
2243 char_in,
2244 sizeof(char_in),
2245 &err);
2246
2247 if ( size != sizeof(char_in) ) {
2248 /*
2249 * bug2: size is 0x2805 (CHUNK_SIZE*2+5 - maybe 5 is the size of the overflow buffer
2250 * in the converter?), should be CHUNK_SIZE*4
2251 *
2252 * Markus 2001-05-18: 5 is the size of our target[] here, ucnv_convert() did not reset targetSize...
2253 */
2254 log_data_err("error j932 bug 2/3a: expected preflighting size 0x%04x, got 0x%04x\n", sizeof(char_in), size);
2255 }
2256
2257 /*
2258 * now do the conversion with expansion
2259 * ascii 0x08 expands to 0x0F 0x28 in lmbcs
2260 */
2261 memset(char_in, 8, sizeof(char_in));
2262 err = U_ZERO_ERROR;
2263
2264 /* do the conversion */
2265 size = ucnv_convert("lmbcs", /* out */
2266 "us-ascii", /* in */
2267 target,
2268 sizeof(target),
2269 char_in,
2270 sizeof(char_in),
2271 &err);
2272
2273 /* expect 2X expansion */
2274 if ( size != sizeof(char_in) * 2 ) {
2275 /*
2276 * bug3:
2277 * bug2 would lead us to expect 0x2805, but it isn't that either, it is 0x3c05:
2278 */
2279 log_data_err("error j932 bug 3b: expected 0x%04x, got 0x%04x\n", sizeof(char_in) * 2, size);
2280 }
2281 #endif
2282 }
2283
2284 static void
convertExStreaming(UConverter * srcCnv,UConverter * targetCnv,const char * src,int32_t srcLength,const char * expectTarget,int32_t expectTargetLength,int32_t chunkSize,const char * testName,UErrorCode expectCode)2285 convertExStreaming(UConverter *srcCnv, UConverter *targetCnv,
2286 const char *src, int32_t srcLength,
2287 const char *expectTarget, int32_t expectTargetLength,
2288 int32_t chunkSize,
2289 const char *testName,
2290 UErrorCode expectCode) {
2291 UChar pivotBuffer[CHUNK_SIZE];
2292 UChar *pivotSource, *pivotTarget;
2293 const UChar *pivotLimit;
2294
2295 char targetBuffer[CHUNK_SIZE];
2296 char *target;
2297 const char *srcLimit, *finalSrcLimit, *targetLimit;
2298
2299 int32_t targetLength;
2300
2301 UBool flush;
2302
2303 UErrorCode errorCode;
2304
2305 /* setup */
2306 if(chunkSize>CHUNK_SIZE) {
2307 chunkSize=CHUNK_SIZE;
2308 }
2309
2310 pivotSource=pivotTarget=pivotBuffer;
2311 pivotLimit=pivotBuffer+chunkSize;
2312
2313 finalSrcLimit=src+srcLength;
2314 target=targetBuffer;
2315 targetLimit=targetBuffer+chunkSize;
2316
2317 ucnv_resetToUnicode(srcCnv);
2318 ucnv_resetFromUnicode(targetCnv);
2319
2320 errorCode=U_ZERO_ERROR;
2321 flush=false;
2322
2323 /* convert, streaming-style (both converters and pivot keep state) */
2324 for(;;) {
2325 /* for testing, give ucnv_convertEx() at most <chunkSize> input/pivot/output units at a time */
2326 if(src+chunkSize<=finalSrcLimit) {
2327 srcLimit=src+chunkSize;
2328 } else {
2329 srcLimit=finalSrcLimit;
2330 }
2331 ucnv_convertEx(targetCnv, srcCnv,
2332 &target, targetLimit,
2333 &src, srcLimit,
2334 pivotBuffer, &pivotSource, &pivotTarget, pivotLimit,
2335 false, flush, &errorCode);
2336 targetLength=(int32_t)(target-targetBuffer);
2337 if(target>targetLimit) {
2338 log_err("ucnv_convertEx(%s) chunk[%d] target %p exceeds targetLimit %p\n",
2339 testName, chunkSize, target, targetLimit);
2340 break; /* TODO: major problem! */
2341 }
2342 if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
2343 /* continue converting another chunk */
2344 errorCode=U_ZERO_ERROR;
2345 if(targetLength+chunkSize<=(int32_t)sizeof(targetBuffer)) {
2346 targetLimit=target+chunkSize;
2347 } else {
2348 targetLimit=targetBuffer+(int32_t)sizeof(targetBuffer);
2349 }
2350 } else if(U_FAILURE(errorCode)) {
2351 /* failure */
2352 break;
2353 } else if(flush) {
2354 /* all done */
2355 break;
2356 } else if(src==finalSrcLimit && pivotSource==pivotTarget) {
2357 /* all consumed, now flush without input (separate from conversion for testing) */
2358 flush=true;
2359 }
2360 }
2361
2362 if(!(errorCode==expectCode || (expectCode==U_ZERO_ERROR && errorCode==U_STRING_NOT_TERMINATED_WARNING))) {
2363 log_err("ucnv_convertEx(%s) chunk[%d] results in %s instead of %s\n",
2364 testName, chunkSize, u_errorName(errorCode), u_errorName(expectCode));
2365 } else if(targetLength!=expectTargetLength) {
2366 log_err("ucnv_convertEx(%s) chunk[%d] writes %d bytes instead of %d\n",
2367 testName, chunkSize, targetLength, expectTargetLength);
2368 } else if(memcmp(targetBuffer, expectTarget, targetLength)!=0) {
2369 log_err("ucnv_convertEx(%s) chunk[%d] writes different bytes than expected\n",
2370 testName, chunkSize);
2371 }
2372 }
2373
2374 static void
convertExMultiStreaming(UConverter * srcCnv,UConverter * targetCnv,const char * src,int32_t srcLength,const char * expectTarget,int32_t expectTargetLength,const char * testName,UErrorCode expectCode)2375 convertExMultiStreaming(UConverter *srcCnv, UConverter *targetCnv,
2376 const char *src, int32_t srcLength,
2377 const char *expectTarget, int32_t expectTargetLength,
2378 const char *testName,
2379 UErrorCode expectCode) {
2380 convertExStreaming(srcCnv, targetCnv,
2381 src, srcLength,
2382 expectTarget, expectTargetLength,
2383 1, testName, expectCode);
2384 convertExStreaming(srcCnv, targetCnv,
2385 src, srcLength,
2386 expectTarget, expectTargetLength,
2387 3, testName, expectCode);
2388 convertExStreaming(srcCnv, targetCnv,
2389 src, srcLength,
2390 expectTarget, expectTargetLength,
2391 7, testName, expectCode);
2392 }
2393
TestConvertEx()2394 static void TestConvertEx() {
2395 #if !UCONFIG_NO_LEGACY_CONVERSION
2396 static const uint8_t
2397 utf8[]={
2398 /* 4e00 30a1 ff61 0410 */
2399 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2400 },
2401 shiftJIS[]={
2402 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2403 },
2404 errorTarget[]={
2405 /*
2406 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2407 * SUB, SUB, 0x40, SUB, SUB, 0x40
2408 */
2409 0xfc, 0xfc, 0xfc, 0xfc, 0x40, 0xfc, 0xfc, 0xfc, 0xfc, 0x40
2410 };
2411
2412 char srcBuffer[100], targetBuffer[100];
2413
2414 const char *src;
2415 char *target;
2416
2417 UChar pivotBuffer[100];
2418 UChar *pivotSource, *pivotTarget;
2419
2420 UConverter *cnv1, *cnv2;
2421 UErrorCode errorCode;
2422
2423 errorCode=U_ZERO_ERROR;
2424 cnv1=ucnv_open("UTF-8", &errorCode);
2425 if(U_FAILURE(errorCode)) {
2426 log_err("unable to open a UTF-8 converter - %s\n", u_errorName(errorCode));
2427 return;
2428 }
2429
2430 cnv2=ucnv_open("Shift-JIS", &errorCode);
2431 if(U_FAILURE(errorCode)) {
2432 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode));
2433 ucnv_close(cnv1);
2434 return;
2435 }
2436
2437 /* test ucnv_convertEx() with streaming conversion style */
2438 convertExMultiStreaming(cnv1, cnv2,
2439 (const char *)utf8, sizeof(utf8), (const char *)shiftJIS, sizeof(shiftJIS),
2440 "UTF-8 -> Shift-JIS", U_ZERO_ERROR);
2441
2442 convertExMultiStreaming(cnv2, cnv1,
2443 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8),
2444 "Shift-JIS -> UTF-8", U_ZERO_ERROR);
2445
2446 /* U_ZERO_ERROR because by default the SUB callbacks are set */
2447 convertExMultiStreaming(cnv1, cnv2,
2448 (const char *)shiftJIS, sizeof(shiftJIS), (const char *)errorTarget, sizeof(errorTarget),
2449 "shiftJIS[] UTF-8 -> Shift-JIS", U_ZERO_ERROR);
2450
2451 /* test some simple conversions */
2452
2453 /* NUL-terminated source and target */
2454 errorCode=U_STRING_NOT_TERMINATED_WARNING;
2455 memcpy(srcBuffer, utf8, sizeof(utf8));
2456 srcBuffer[sizeof(utf8)]=0;
2457 src=srcBuffer;
2458 target=targetBuffer;
2459 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2460 NULL, NULL, NULL, NULL, true, true, &errorCode);
2461 if( errorCode!=U_ZERO_ERROR ||
2462 target-targetBuffer!=sizeof(shiftJIS) ||
2463 *target!=0 ||
2464 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0
2465 ) {
2466 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s - writes %d bytes, expect %d\n",
2467 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS));
2468 }
2469
2470 /* NUL-terminated source and U_STRING_NOT_TERMINATED_WARNING */
2471 errorCode=U_AMBIGUOUS_ALIAS_WARNING;
2472 memset(targetBuffer, 0xff, sizeof(targetBuffer));
2473 src=srcBuffer;
2474 target=targetBuffer;
2475 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(shiftJIS), &src, NULL,
2476 NULL, NULL, NULL, NULL, true, true, &errorCode);
2477 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2478 target-targetBuffer!=sizeof(shiftJIS) ||
2479 *target!=(char)0xff ||
2480 memcmp(targetBuffer, shiftJIS, sizeof(shiftJIS))!=0
2481 ) {
2482 log_err("ucnv_convertEx(simple UTF-8 -> Shift_JIS) fails: %s, expect U_STRING_NOT_TERMINATED_WARNING - writes %d bytes, expect %d\n",
2483 u_errorName(errorCode), target-targetBuffer, sizeof(shiftJIS));
2484 }
2485
2486 /* bad arguments */
2487 errorCode=U_MESSAGE_PARSE_ERROR;
2488 src=srcBuffer;
2489 target=targetBuffer;
2490 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2491 NULL, NULL, NULL, NULL, true, true, &errorCode);
2492 if(errorCode!=U_MESSAGE_PARSE_ERROR) {
2493 log_err("ucnv_convertEx(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode));
2494 }
2495
2496 /* pivotLimit==pivotStart */
2497 errorCode=U_ZERO_ERROR;
2498 pivotSource=pivotTarget=pivotBuffer;
2499 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2500 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer, true, true, &errorCode);
2501 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2502 log_err("ucnv_convertEx(pivotLimit==pivotStart) sets %s\n", u_errorName(errorCode));
2503 }
2504
2505 /* *pivotSource==NULL */
2506 errorCode=U_ZERO_ERROR;
2507 pivotSource=NULL;
2508 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2509 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, true, true, &errorCode);
2510 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2511 log_err("ucnv_convertEx(*pivotSource==NULL) sets %s\n", u_errorName(errorCode));
2512 }
2513
2514 /* *source==NULL */
2515 errorCode=U_ZERO_ERROR;
2516 src=NULL;
2517 pivotSource=pivotBuffer;
2518 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2519 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+1, true, true, &errorCode);
2520 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2521 log_err("ucnv_convertEx(*source==NULL) sets %s\n", u_errorName(errorCode));
2522 }
2523
2524 /* streaming conversion without a pivot buffer */
2525 errorCode=U_ZERO_ERROR;
2526 src=srcBuffer;
2527 pivotSource=pivotBuffer;
2528 ucnv_convertEx(cnv2, cnv1, &target, targetBuffer+sizeof(targetBuffer), &src, NULL,
2529 NULL, &pivotSource, &pivotTarget, pivotBuffer+1, true, false, &errorCode);
2530 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
2531 log_err("ucnv_convertEx(pivotStart==NULL) sets %s\n", u_errorName(errorCode));
2532 }
2533
2534 ucnv_close(cnv1);
2535 ucnv_close(cnv2);
2536 #endif
2537 }
2538
2539 /* Test illegal UTF-8 input: Data and functions for TestConvertExFromUTF8(). */
2540 static const char *const badUTF8[]={
2541 /* trail byte */
2542 "\x80",
2543
2544 /* truncated multi-byte sequences */
2545 "\xd0",
2546 "\xe0",
2547 "\xe1",
2548 "\xed",
2549 "\xee",
2550 "\xf0",
2551 "\xf1",
2552 "\xf4",
2553 "\xf8",
2554 "\xfc",
2555
2556 "\xe0\x80",
2557 "\xe0\xa0",
2558 "\xe1\x80",
2559 "\xed\x80",
2560 "\xed\xa0",
2561 "\xee\x80",
2562 "\xf0\x80",
2563 "\xf0\x90",
2564 "\xf1\x80",
2565 "\xf4\x80",
2566 "\xf4\x90",
2567 "\xf8\x80",
2568 "\xfc\x80",
2569
2570 "\xf0\x80\x80",
2571 "\xf0\x90\x80",
2572 "\xf1\x80\x80",
2573 "\xf4\x80\x80",
2574 "\xf4\x90\x80",
2575 "\xf8\x80\x80",
2576 "\xfc\x80\x80",
2577
2578 "\xf8\x80\x80\x80",
2579 "\xfc\x80\x80\x80",
2580
2581 "\xfc\x80\x80\x80\x80",
2582
2583 /* complete sequences but non-shortest forms or out of range etc. */
2584 "\xc0\x80",
2585 "\xe0\x80\x80",
2586 "\xed\xa0\x80",
2587 "\xf0\x80\x80\x80",
2588 "\xf4\x90\x80\x80",
2589 "\xf8\x80\x80\x80\x80",
2590 "\xfc\x80\x80\x80\x80\x80",
2591 "\xfe",
2592 "\xff"
2593 };
2594
2595 #define ARG_CHAR_ARR_SIZE 8
2596
2597 /* get some character that can be converted and convert it */
getTestChar(UConverter * cnv,const char * converterName,char charUTF8[4],int32_t * pCharUTF8Length,char char0[ARG_CHAR_ARR_SIZE],int32_t * pChar0Length,char char1[ARG_CHAR_ARR_SIZE],int32_t * pChar1Length)2598 static UBool getTestChar(UConverter *cnv, const char *converterName,
2599 char charUTF8[4], int32_t *pCharUTF8Length,
2600 char char0[ARG_CHAR_ARR_SIZE], int32_t *pChar0Length,
2601 char char1[ARG_CHAR_ARR_SIZE], int32_t *pChar1Length) {
2602 UChar utf16[U16_MAX_LENGTH];
2603 int32_t utf16Length;
2604
2605 const UChar *utf16Source;
2606 char *target;
2607
2608 USet *set;
2609 UChar32 c;
2610 UErrorCode errorCode;
2611
2612 errorCode=U_ZERO_ERROR;
2613 set=uset_open(1, 0);
2614 ucnv_getUnicodeSet(cnv, set, UCNV_ROUNDTRIP_SET, &errorCode);
2615 c=uset_charAt(set, uset_size(set)/2);
2616 uset_close(set);
2617
2618 utf16Length=0;
2619 U16_APPEND_UNSAFE(utf16, utf16Length, c);
2620 *pCharUTF8Length=0;
2621 U8_APPEND_UNSAFE(charUTF8, *pCharUTF8Length, c);
2622
2623 utf16Source=utf16;
2624 target=char0;
2625 ucnv_fromUnicode(cnv,
2626 &target, char0+ARG_CHAR_ARR_SIZE,
2627 &utf16Source, utf16+utf16Length,
2628 NULL, false, &errorCode);
2629 *pChar0Length=(int32_t)(target-char0);
2630
2631 utf16Source=utf16;
2632 target=char1;
2633 ucnv_fromUnicode(cnv,
2634 &target, char1+ARG_CHAR_ARR_SIZE,
2635 &utf16Source, utf16+utf16Length,
2636 NULL, false, &errorCode);
2637 *pChar1Length=(int32_t)(target-char1);
2638
2639 if(U_FAILURE(errorCode)) {
2640 log_err("unable to get test character for %s - %s\n", converterName, u_errorName(errorCode));
2641 return false;
2642 }
2643 return true;
2644 }
2645
isOneTruncatedUTF8(const char * s,int32_t length)2646 static UBool isOneTruncatedUTF8(const char *s, int32_t length) {
2647 if(length==0) {
2648 return false;
2649 } else if(length==1) {
2650 return U8_IS_LEAD(s[0]);
2651 } else {
2652 int32_t count=U8_COUNT_TRAIL_BYTES(s[0]);
2653 if(length<=count) {
2654 // 2 or more bytes, but fewer than the lead byte indicates.
2655 int32_t oneLength=0;
2656 U8_FWD_1(s, oneLength, length);
2657 // Truncated if we reach the end of the string.
2658 // Not true if the lead byte and first trail byte do not start a valid sequence,
2659 // e.g., E0 80 -> oneLength=1.
2660 return oneLength==length;
2661 }
2662 return false;
2663 }
2664 }
2665
testFromTruncatedUTF8(UConverter * utf8Cnv,UConverter * cnv,const char * converterName,char charUTF8[4],int32_t charUTF8Length,char char0[8],int32_t char0Length,char char1[8],int32_t char1Length)2666 static void testFromTruncatedUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName,
2667 char charUTF8[4], int32_t charUTF8Length,
2668 char char0[8], int32_t char0Length,
2669 char char1[8], int32_t char1Length) {
2670 // suppress compiler warnings about unused variables
2671 (void)char0;
2672 (void)char0Length;
2673 (void)char1;
2674 (void)char1Length;
2675
2676 char utf8[16];
2677 int32_t utf8Length;
2678
2679 char output[16];
2680 int32_t outputLength;
2681
2682 char invalidChars[8];
2683 int8_t invalidLength;
2684
2685 const char *source;
2686 char *target;
2687
2688 UChar pivotBuffer[8];
2689 UChar *pivotSource, *pivotTarget;
2690
2691 UErrorCode errorCode;
2692 int32_t i;
2693
2694 /* test truncated sequences */
2695 errorCode=U_ZERO_ERROR;
2696 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2697
2698 memcpy(utf8, charUTF8, charUTF8Length);
2699
2700 for(i=0; i<UPRV_LENGTHOF(badUTF8); ++i) {
2701 /* truncated sequence? */
2702 int32_t length = (int32_t)strlen(badUTF8[i]);
2703 if(!isOneTruncatedUTF8(badUTF8[i], length)) {
2704 continue;
2705 }
2706
2707 /* assemble a string with the test character and the truncated sequence */
2708 memcpy(utf8+charUTF8Length, badUTF8[i], length);
2709 utf8Length=charUTF8Length+length;
2710
2711 /* convert and check the invalidChars */
2712 source=utf8;
2713 target=output;
2714 pivotSource=pivotTarget=pivotBuffer;
2715 errorCode=U_ZERO_ERROR;
2716 ucnv_convertEx(cnv, utf8Cnv,
2717 &target, output+sizeof(output),
2718 &source, utf8+utf8Length,
2719 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+UPRV_LENGTHOF(pivotBuffer),
2720 true, true, /* reset & flush */
2721 &errorCode);
2722 outputLength=(int32_t)(target-output);
2723 (void)outputLength; /* Suppress set but not used warning. */
2724 if(errorCode!=U_TRUNCATED_CHAR_FOUND || pivotSource!=pivotBuffer) {
2725 log_err("unexpected error %s from %s badUTF8[%ld]\n", u_errorName(errorCode), converterName, (long)i);
2726 continue;
2727 }
2728
2729 errorCode=U_ZERO_ERROR;
2730 invalidLength=(int8_t)sizeof(invalidChars);
2731 ucnv_getInvalidChars(utf8Cnv, invalidChars, &invalidLength, &errorCode);
2732 if(invalidLength!=length || 0!=memcmp(invalidChars, badUTF8[i], length)) {
2733 log_err("wrong invalidChars from %s badUTF8[%ld]\n", converterName, (long)i);
2734 }
2735 }
2736 }
2737
testFromBadUTF8(UConverter * utf8Cnv,UConverter * cnv,const char * converterName,char charUTF8[4],int32_t charUTF8Length,char char0[8],int32_t char0Length,char char1[8],int32_t char1Length)2738 static void testFromBadUTF8(UConverter *utf8Cnv, UConverter *cnv, const char *converterName,
2739 char charUTF8[4], int32_t charUTF8Length,
2740 char char0[8], int32_t char0Length,
2741 char char1[8], int32_t char1Length) {
2742 char utf8[600], expect[600];
2743 int32_t utf8Length, expectLength;
2744
2745 char testName[32];
2746
2747 UErrorCode errorCode;
2748 int32_t i;
2749
2750 errorCode=U_ZERO_ERROR;
2751 ucnv_setToUCallBack(utf8Cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, NULL, NULL, &errorCode);
2752
2753 /*
2754 * assemble an input string with the test character between each
2755 * bad sequence,
2756 * and an expected string with repeated test character output
2757 */
2758 memcpy(utf8, charUTF8, charUTF8Length);
2759 utf8Length=charUTF8Length;
2760
2761 memcpy(expect, char0, char0Length);
2762 expectLength=char0Length;
2763
2764 for(i=0; i<UPRV_LENGTHOF(badUTF8); ++i) {
2765 int32_t length = (int32_t)strlen(badUTF8[i]);
2766 memcpy(utf8+utf8Length, badUTF8[i], length);
2767 utf8Length+=length;
2768
2769 memcpy(utf8+utf8Length, charUTF8, charUTF8Length);
2770 utf8Length+=charUTF8Length;
2771
2772 memcpy(expect+expectLength, char1, char1Length);
2773 expectLength+=char1Length;
2774 }
2775
2776 /* expect that each bad UTF-8 sequence is detected and skipped */
2777 strcpy(testName, "from bad UTF-8 to ");
2778 strcat(testName, converterName);
2779
2780 convertExMultiStreaming(utf8Cnv, cnv,
2781 utf8, utf8Length,
2782 expect, expectLength,
2783 testName,
2784 U_ZERO_ERROR);
2785 }
2786
2787 /* Test illegal UTF-8 input. */
TestConvertExFromUTF8()2788 static void TestConvertExFromUTF8() {
2789 static const char *const converterNames[]={
2790 #if !UCONFIG_NO_LEGACY_CONVERSION
2791 "windows-1252",
2792 "shift-jis",
2793 #endif
2794 "us-ascii",
2795 "iso-8859-1",
2796 "utf-8"
2797 };
2798
2799 UConverter *utf8Cnv, *cnv;
2800 UErrorCode errorCode;
2801 int32_t i;
2802
2803 /* fromUnicode versions of some character, from initial state and later */
2804 char charUTF8[4], char0[8], char1[8];
2805 int32_t charUTF8Length, char0Length, char1Length;
2806
2807 errorCode=U_ZERO_ERROR;
2808 utf8Cnv=ucnv_open("UTF-8", &errorCode);
2809 if(U_FAILURE(errorCode)) {
2810 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode));
2811 return;
2812 }
2813
2814 for(i=0; i<UPRV_LENGTHOF(converterNames); ++i) {
2815 errorCode=U_ZERO_ERROR;
2816 cnv=ucnv_open(converterNames[i], &errorCode);
2817 if(U_FAILURE(errorCode)) {
2818 log_data_err("unable to open %s converter - %s\n", converterNames[i], u_errorName(errorCode));
2819 continue;
2820 }
2821 if(!getTestChar(cnv, converterNames[i], charUTF8, &charUTF8Length, char0, &char0Length, char1, &char1Length)) {
2822 continue;
2823 }
2824 testFromTruncatedUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length);
2825 testFromBadUTF8(utf8Cnv, cnv, converterNames[i], charUTF8, charUTF8Length, char0, char0Length, char1, char1Length);
2826 ucnv_close(cnv);
2827 }
2828 ucnv_close(utf8Cnv);
2829 }
2830
TestConvertExFromUTF8_C5F0()2831 static void TestConvertExFromUTF8_C5F0() {
2832 static const char *const converterNames[]={
2833 #if !UCONFIG_NO_LEGACY_CONVERSION
2834 "windows-1251",
2835 "shift-jis",
2836 #endif
2837 "us-ascii",
2838 "iso-8859-1",
2839 "utf-8"
2840 };
2841
2842 UConverter *utf8Cnv, *cnv;
2843 UErrorCode errorCode;
2844 int32_t i;
2845
2846 static const char bad_utf8[2]={ (char)0xC5, (char)0xF0 };
2847 /* Expect "��" (2x U+FFFD as decimal NCRs) */
2848 static const char twoNCRs[16]={
2849 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B,
2850 0x26, 0x23, 0x36, 0x35, 0x35, 0x33, 0x33, 0x3B
2851 };
2852 static const char twoFFFD[6]={
2853 (char)0xef, (char)0xbf, (char)0xbd,
2854 (char)0xef, (char)0xbf, (char)0xbd
2855 };
2856 const char *expected;
2857 int32_t expectedLength;
2858 char dest[20]; /* longer than longest expectedLength */
2859
2860 const char *src;
2861 char *target;
2862
2863 UChar pivotBuffer[128];
2864 UChar *pivotSource, *pivotTarget;
2865
2866 errorCode=U_ZERO_ERROR;
2867 utf8Cnv=ucnv_open("UTF-8", &errorCode);
2868 if(U_FAILURE(errorCode)) {
2869 log_data_err("unable to open UTF-8 converter - %s\n", u_errorName(errorCode));
2870 return;
2871 }
2872
2873 for(i=0; i<UPRV_LENGTHOF(converterNames); ++i) {
2874 errorCode=U_ZERO_ERROR;
2875 cnv=ucnv_open(converterNames[i], &errorCode);
2876 ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_XML_DEC,
2877 NULL, NULL, &errorCode);
2878 if(U_FAILURE(errorCode)) {
2879 log_data_err("unable to open %s converter - %s\n",
2880 converterNames[i], u_errorName(errorCode));
2881 continue;
2882 }
2883 src=bad_utf8;
2884 target=dest;
2885 uprv_memset(dest, 9, sizeof(dest));
2886 if(i==UPRV_LENGTHOF(converterNames)-1) {
2887 /* conversion to UTF-8 yields two U+FFFD directly */
2888 expected=twoFFFD;
2889 expectedLength=6;
2890 } else {
2891 /* conversion to a non-Unicode charset yields two NCRs */
2892 expected=twoNCRs;
2893 expectedLength=16;
2894 }
2895 pivotBuffer[0]=0;
2896 pivotBuffer[1]=1;
2897 pivotBuffer[2]=2;
2898 pivotSource=pivotTarget=pivotBuffer;
2899 ucnv_convertEx(
2900 cnv, utf8Cnv,
2901 &target, dest+expectedLength,
2902 &src, bad_utf8+sizeof(bad_utf8),
2903 pivotBuffer, &pivotSource, &pivotTarget, pivotBuffer+UPRV_LENGTHOF(pivotBuffer),
2904 true, true, &errorCode);
2905 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING || src!=bad_utf8+2 ||
2906 target!=dest+expectedLength || 0!=uprv_memcmp(dest, expected, expectedLength) ||
2907 dest[expectedLength]!=9
2908 ) {
2909 log_err("ucnv_convertEx(UTF-8 C5 F0 -> %s/decimal NCRs) failed\n", converterNames[i]);
2910 }
2911 ucnv_close(cnv);
2912 }
2913 ucnv_close(utf8Cnv);
2914 }
2915
2916 static void
TestConvertAlgorithmic()2917 TestConvertAlgorithmic() {
2918 #if !UCONFIG_NO_LEGACY_CONVERSION
2919 static const uint8_t
2920 utf8[]={
2921 /* 4e00 30a1 ff61 0410 */
2922 0xe4, 0xb8, 0x80, 0xe3, 0x82, 0xa1, 0xef, 0xbd, 0xa1, 0xd0, 0x90
2923 },
2924 shiftJIS[]={
2925 0x88, 0xea, 0x83, 0x40, 0xa1, 0x84, 0x40
2926 },
2927 /*errorTarget[]={*/
2928 /*
2929 * expected output when converting shiftJIS[] from UTF-8 to Shift-JIS:
2930 * SUB, SUB, 0x40, SUB, SUB, 0x40
2931 */
2932 /* 0x81, 0xa1, 0x81, 0xa1, 0x40, 0x81, 0xa1, 0x81, 0xa1, 0x40*/
2933 /*},*/
2934 utf16[]={
2935 0xfe, 0xff /* BOM only, no text */
2936 };
2937 #if !UCONFIG_ONLY_HTML_CONVERSION
2938 static const uint8_t utf32[]={
2939 0xff, 0xfe, 0, 0 /* BOM only, no text */
2940 };
2941 #endif
2942
2943 char target[100], utf8NUL[100], shiftJISNUL[100];
2944
2945 UConverter *cnv;
2946 UErrorCode errorCode;
2947
2948 int32_t length;
2949
2950 errorCode=U_ZERO_ERROR;
2951 cnv=ucnv_open("Shift-JIS", &errorCode);
2952 if(U_FAILURE(errorCode)) {
2953 log_data_err("unable to open a Shift-JIS converter - %s\n", u_errorName(errorCode));
2954 ucnv_close(cnv);
2955 return;
2956 }
2957
2958 memcpy(utf8NUL, utf8, sizeof(utf8));
2959 utf8NUL[sizeof(utf8)]=0;
2960 memcpy(shiftJISNUL, shiftJIS, sizeof(shiftJIS));
2961 shiftJISNUL[sizeof(shiftJIS)]=0;
2962
2963 /*
2964 * The to/from algorithmic convenience functions share a common implementation,
2965 * so we need not test all permutations of them.
2966 */
2967
2968 /* length in, not terminated out */
2969 errorCode=U_ZERO_ERROR;
2970 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF8, target, sizeof(shiftJIS), (const char *)utf8, sizeof(utf8), &errorCode);
2971 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
2972 length!=sizeof(shiftJIS) ||
2973 memcmp(target, shiftJIS, length)!=0
2974 ) {
2975 log_err("ucnv_fromAlgorithmic(UTF-8 -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect %d\n",
2976 u_errorName(errorCode), length, sizeof(shiftJIS));
2977 }
2978
2979 /* terminated in and out */
2980 memset(target, 0x55, sizeof(target));
2981 errorCode=U_STRING_NOT_TERMINATED_WARNING;
2982 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, -1, &errorCode);
2983 if( errorCode!=U_ZERO_ERROR ||
2984 length!=sizeof(utf8) ||
2985 memcmp(target, utf8, length)!=0
2986 ) {
2987 log_err("ucnv_toAlgorithmic(Shift-JIS -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect %d\n",
2988 u_errorName(errorCode), length, sizeof(shiftJIS));
2989 }
2990
2991 /* empty string, some target buffer */
2992 errorCode=U_STRING_NOT_TERMINATED_WARNING;
2993 length=ucnv_toAlgorithmic(UCNV_UTF8, cnv, target, sizeof(target), shiftJISNUL, 0, &errorCode);
2994 if( errorCode!=U_ZERO_ERROR ||
2995 length!=0
2996 ) {
2997 log_err("ucnv_toAlgorithmic(empty string -> UTF-8) fails (%s expect U_ZERO_ERROR), returns %d expect 0\n",
2998 u_errorName(errorCode), length);
2999 }
3000
3001 /* pseudo-empty string, no target buffer */
3002 errorCode=U_ZERO_ERROR;
3003 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode);
3004 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
3005 length!=0
3006 ) {
3007 log_err("ucnv_fromAlgorithmic(UTF-16 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
3008 u_errorName(errorCode), length);
3009 }
3010
3011 #if !UCONFIG_ONLY_HTML_CONVERSION
3012 errorCode=U_ZERO_ERROR;
3013 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF32, target, 0, (const char *)utf32, 4, &errorCode);
3014 if( errorCode!=U_STRING_NOT_TERMINATED_WARNING ||
3015 length!=0
3016 ) {
3017 log_err("ucnv_fromAlgorithmic(UTF-32 only BOM -> Shift-JIS) fails (%s expect U_STRING_NOT_TERMINATED_WARNING), returns %d expect 0\n",
3018 u_errorName(errorCode), length);
3019 }
3020 #endif
3021
3022 /* bad arguments */
3023 errorCode=U_MESSAGE_PARSE_ERROR;
3024 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, (const char *)utf16, 2, &errorCode);
3025 if(errorCode!=U_MESSAGE_PARSE_ERROR) {
3026 log_err("ucnv_fromAlgorithmic(U_MESSAGE_PARSE_ERROR) sets %s\n", u_errorName(errorCode));
3027 }
3028
3029 /* source==NULL */
3030 errorCode=U_ZERO_ERROR;
3031 length=ucnv_fromAlgorithmic(cnv, UCNV_UTF16, target, 0, NULL, 2, &errorCode);
3032 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
3033 log_err("ucnv_fromAlgorithmic(source==NULL) sets %s\n", u_errorName(errorCode));
3034 }
3035
3036 /* illegal alg. type */
3037 errorCode=U_ZERO_ERROR;
3038 length=ucnv_fromAlgorithmic(cnv, (UConverterType)99, target, 0, (const char *)utf16, 2, &errorCode);
3039 if(errorCode!=U_ILLEGAL_ARGUMENT_ERROR) {
3040 log_err("ucnv_fromAlgorithmic(illegal alg. type) sets %s\n", u_errorName(errorCode));
3041 }
3042 ucnv_close(cnv);
3043 #endif
3044 }
3045
3046 #if !UCONFIG_NO_FILE_IO && !UCONFIG_NO_LEGACY_CONVERSION
TestLMBCSMaxChar(void)3047 static void TestLMBCSMaxChar(void) {
3048 static const struct {
3049 int8_t maxSize;
3050 const char *name;
3051 } converter[] = {
3052 /* some non-LMBCS converters - perfect test setup here */
3053 { 1, "US-ASCII"},
3054 { 1, "ISO-8859-1"},
3055
3056 { 2, "UTF-16"},
3057 { 2, "UTF-16BE"},
3058 { 3, "UTF-8"},
3059 { 3, "CESU-8"},
3060 { 3, "SCSU"},
3061 { 4, "UTF-32"},
3062 { 4, "UTF-7"},
3063 { 4, "IMAP-mailbox-name"},
3064 { 4, "BOCU-1"},
3065
3066 { 1, "windows-1256"},
3067 { 2, "Shift-JIS"},
3068 { 2, "ibm-16684"},
3069 { 3, "ibm-930"},
3070 { 3, "ibm-1390"},
3071 { 4, "*test3"},
3072 { 16,"*test4"},
3073
3074 { 4, "ISCII"},
3075 { 4, "HZ"},
3076
3077 { 3, "ISO-2022"},
3078 { 8, "ISO-2022-KR"},
3079 { 6, "ISO-2022-JP"},
3080 { 8, "ISO-2022-CN"},
3081
3082 /* LMBCS */
3083 { 3, "LMBCS-1"},
3084 { 3, "LMBCS-2"},
3085 { 3, "LMBCS-3"},
3086 { 3, "LMBCS-4"},
3087 { 3, "LMBCS-5"},
3088 { 3, "LMBCS-6"},
3089 { 3, "LMBCS-8"},
3090 { 3, "LMBCS-11"},
3091 { 3, "LMBCS-16"},
3092 { 3, "LMBCS-17"},
3093 { 3, "LMBCS-18"},
3094 { 3, "LMBCS-19"}
3095 };
3096 int32_t idx;
3097
3098 for (idx = 0; idx < UPRV_LENGTHOF(converter); idx++) {
3099 UErrorCode status = U_ZERO_ERROR;
3100 UConverter *cnv = cnv_open(converter[idx].name, &status);
3101 if (U_FAILURE(status)) {
3102 continue;
3103 }
3104 if (converter[idx].maxSize != ucnv_getMaxCharSize(cnv)) {
3105 log_err("error: ucnv_getMaxCharSize(%s) expected %d, got %d\n",
3106 converter[idx].name, converter[idx].maxSize, ucnv_getMaxCharSize(cnv));
3107 }
3108 ucnv_close(cnv);
3109 }
3110
3111 /* mostly test that the macro compiles */
3112 if(UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10) {
3113 log_err("error UCNV_GET_MAX_BYTES_FOR_STRING(1, 2)<10\n");
3114 }
3115 }
3116 #endif
3117
TestJ1968(void)3118 static void TestJ1968(void) {
3119 UErrorCode err = U_ZERO_ERROR;
3120 UConverter *cnv;
3121 char myConvName[] = "My really really really really really really really really really really really"
3122 " really really really really really really really really really really really"
3123 " really really really really really really really really long converter name";
3124 UChar myConvNameU[sizeof(myConvName)];
3125
3126 u_charsToUChars(myConvName, myConvNameU, sizeof(myConvName));
3127
3128 err = U_ZERO_ERROR;
3129 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH+1] = 0;
3130 cnv = ucnv_openU(myConvNameU, &err);
3131 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3132 log_err("1U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3133 }
3134
3135 err = U_ZERO_ERROR;
3136 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0;
3137 cnv = ucnv_openU(myConvNameU, &err);
3138 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3139 log_err("2U) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3140 }
3141
3142 err = U_ZERO_ERROR;
3143 myConvNameU[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0;
3144 cnv = ucnv_openU(myConvNameU, &err);
3145 if (cnv || err != U_FILE_ACCESS_ERROR) {
3146 log_err("3U) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
3147 }
3148
3149
3150
3151
3152 err = U_ZERO_ERROR;
3153 cnv = ucnv_open(myConvName, &err);
3154 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3155 log_err("1) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3156 }
3157
3158 err = U_ZERO_ERROR;
3159 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = ',';
3160 cnv = ucnv_open(myConvName, &err);
3161 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3162 log_err("2) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3163 }
3164
3165 err = U_ZERO_ERROR;
3166 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ',';
3167 cnv = ucnv_open(myConvName, &err);
3168 if (cnv || err != U_FILE_ACCESS_ERROR) {
3169 log_err("3) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
3170 }
3171
3172 err = U_ZERO_ERROR;
3173 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ',';
3174 memcpy(myConvName + UCNV_MAX_CONVERTER_NAME_LENGTH, "locale=", 7);
3175 cnv = ucnv_open(myConvName, &err);
3176 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3177 log_err("4) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3178 }
3179
3180 /* The comma isn't really a part of the converter name. */
3181 err = U_ZERO_ERROR;
3182 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH] = 0;
3183 cnv = ucnv_open(myConvName, &err);
3184 if (cnv || err != U_FILE_ACCESS_ERROR) {
3185 log_err("5) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
3186 }
3187
3188 err = U_ZERO_ERROR;
3189 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = ' ';
3190 cnv = ucnv_open(myConvName, &err);
3191 if (cnv || err != U_ILLEGAL_ARGUMENT_ERROR) {
3192 log_err("6) Didn't get U_ILLEGAL_ARGUMENT_ERROR as expected %s\n", u_errorName(err));
3193 }
3194
3195 err = U_ZERO_ERROR;
3196 myConvName[UCNV_MAX_CONVERTER_NAME_LENGTH-1] = 0;
3197 cnv = ucnv_open(myConvName, &err);
3198 if (cnv || err != U_FILE_ACCESS_ERROR) {
3199 log_err("7) Didn't get U_FILE_ACCESS_ERROR as expected %s\n", u_errorName(err));
3200 }
3201
3202 }
3203
3204 #if !UCONFIG_NO_LEGACY_CONVERSION
3205 static void
testSwap(const char * name,UBool swap)3206 testSwap(const char *name, UBool swap) {
3207 /*
3208 * Test Unicode text.
3209 * Contains characters that are the highest for some of the
3210 * tested conversions, to make sure that the ucnvmbcs.c code that modifies the
3211 * tables copies the entire tables.
3212 */
3213 static const UChar text[]={
3214 0x61, 0xd, 0x62, 0xa, 0x4e00, 0x3000, 0xfffd, 0xa, 0x20, 0x85, 0xff5e, 0x7a
3215 };
3216
3217 UChar uNormal[32], uSwapped[32];
3218 char normal[32], swapped[32];
3219 const UChar *pcu;
3220 UChar *pu;
3221 char *pc;
3222 int32_t i, normalLength, swappedLength;
3223 UChar u;
3224 char c;
3225
3226 const char *swappedName;
3227 UConverter *cnv, *swapCnv;
3228 UErrorCode errorCode;
3229
3230 /* if the swap flag is false, then the test encoding is not EBCDIC and must not swap */
3231
3232 /* open both the normal and the LF/NL-swapping converters */
3233 strcpy(swapped, name);
3234 strcat(swapped, UCNV_SWAP_LFNL_OPTION_STRING);
3235
3236 errorCode=U_ZERO_ERROR;
3237 swapCnv=ucnv_open(swapped, &errorCode);
3238 cnv=ucnv_open(name, &errorCode);
3239 if(U_FAILURE(errorCode)) {
3240 log_data_err("TestEBCDICSwapLFNL error: unable to open %s or %s (%s)\n", name, swapped, u_errorName(errorCode));
3241 goto cleanup;
3242 }
3243
3244 /* the name must contain the swap option if and only if we expect the converter to swap */
3245 swappedName=ucnv_getName(swapCnv, &errorCode);
3246 if(U_FAILURE(errorCode)) {
3247 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl) failed (%s)\n", name, u_errorName(errorCode));
3248 goto cleanup;
3249 }
3250
3251 pc=strstr(swappedName, UCNV_SWAP_LFNL_OPTION_STRING);
3252 if(swap != (pc!=NULL)) {
3253 log_err("TestEBCDICSwapLFNL error: ucnv_getName(%s,swaplfnl)=%s should (%d) contain 'swaplfnl'\n", name, swappedName, swap);
3254 goto cleanup;
3255 }
3256
3257 /* convert to EBCDIC */
3258 pcu=text;
3259 pc=normal;
3260 ucnv_fromUnicode(cnv, &pc, normal+UPRV_LENGTHOF(normal), &pcu, text+UPRV_LENGTHOF(text), NULL, true, &errorCode);
3261 normalLength=(int32_t)(pc-normal);
3262
3263 pcu=text;
3264 pc=swapped;
3265 ucnv_fromUnicode(swapCnv, &pc, swapped+UPRV_LENGTHOF(swapped), &pcu, text+UPRV_LENGTHOF(text), NULL, true, &errorCode);
3266 swappedLength=(int32_t)(pc-swapped);
3267
3268 if(U_FAILURE(errorCode)) {
3269 log_err("TestEBCDICSwapLFNL error converting to %s - (%s)\n", name, u_errorName(errorCode));
3270 goto cleanup;
3271 }
3272
3273 /* compare EBCDIC output */
3274 if(normalLength!=swappedLength) {
3275 log_err("TestEBCDICSwapLFNL error converting to %s - output lengths %d vs. %d\n", name, normalLength, swappedLength);
3276 goto cleanup;
3277 }
3278 for(i=0; i<normalLength; ++i) {
3279 /* swap EBCDIC LF/NL for comparison */
3280 c=normal[i];
3281 if(swap) {
3282 if(c==0x15) {
3283 c=0x25;
3284 } else if(c==0x25) {
3285 c=0x15;
3286 }
3287 }
3288
3289 if(c!=swapped[i]) {
3290 log_err("TestEBCDICSwapLFNL error converting to %s - did not swap properly, output[%d]=0x%02x\n", name, i, (uint8_t)swapped[i]);
3291 goto cleanup;
3292 }
3293 }
3294
3295 /* convert back to Unicode (may not roundtrip) */
3296 pc=normal;
3297 pu=uNormal;
3298 ucnv_toUnicode(cnv, &pu, uNormal+UPRV_LENGTHOF(uNormal), (const char **)&pc, normal+normalLength, NULL, true, &errorCode);
3299 normalLength=(int32_t)(pu-uNormal);
3300
3301 pc=normal;
3302 pu=uSwapped;
3303 ucnv_toUnicode(swapCnv, &pu, uSwapped+UPRV_LENGTHOF(uSwapped), (const char **)&pc, normal+swappedLength, NULL, true, &errorCode);
3304 swappedLength=(int32_t)(pu-uSwapped);
3305
3306 if(U_FAILURE(errorCode)) {
3307 log_err("TestEBCDICSwapLFNL error converting from %s - (%s)\n", name, u_errorName(errorCode));
3308 goto cleanup;
3309 }
3310
3311 /* compare EBCDIC output */
3312 if(normalLength!=swappedLength) {
3313 log_err("TestEBCDICSwapLFNL error converting from %s - output lengths %d vs. %d\n", name, normalLength, swappedLength);
3314 goto cleanup;
3315 }
3316 for(i=0; i<normalLength; ++i) {
3317 /* swap EBCDIC LF/NL for comparison */
3318 u=uNormal[i];
3319 if(swap) {
3320 if(u==0xa) {
3321 u=0x85;
3322 } else if(u==0x85) {
3323 u=0xa;
3324 }
3325 }
3326
3327 if(u!=uSwapped[i]) {
3328 log_err("TestEBCDICSwapLFNL error converting from %s - did not swap properly, output[%d]=U+%04x\n", name, i, uSwapped[i]);
3329 goto cleanup;
3330 }
3331 }
3332
3333 /* clean up */
3334 cleanup:
3335 ucnv_close(cnv);
3336 ucnv_close(swapCnv);
3337 }
3338
3339 static void
TestEBCDICSwapLFNL()3340 TestEBCDICSwapLFNL() {
3341 static const struct {
3342 const char *name;
3343 UBool swap;
3344 } tests[]={
3345 { "ibm-37", true },
3346 { "ibm-1047", true },
3347 { "ibm-1140", true },
3348 { "ibm-930", true },
3349 { "iso-8859-3", false }
3350 };
3351
3352 int i;
3353
3354 for(i=0; i<UPRV_LENGTHOF(tests); ++i) {
3355 testSwap(tests[i].name, tests[i].swap);
3356 }
3357 }
3358 #else
3359 static void
TestEBCDICSwapLFNL()3360 TestEBCDICSwapLFNL() {
3361 /* test nothing... */
3362 }
3363 #endif
3364
TestFromUCountPending()3365 static void TestFromUCountPending(){
3366 #if !UCONFIG_NO_LEGACY_CONVERSION
3367 UErrorCode status = U_ZERO_ERROR;
3368 /* const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd}; */
3369 static const struct {
3370 UChar input[6];
3371 int32_t len;
3372 int32_t exp;
3373 }fromUnicodeTests[] = {
3374 /*m:n conversion*/
3375 {{0xdbc4},1,1},
3376 {{ 0xdbc4, 0xde34, 0xd84d},3,1},
3377 {{ 0xdbc4, 0xde34, 0xd900},3,3},
3378 };
3379 int i;
3380 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status);
3381 if(U_FAILURE(status)){
3382 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
3383 return;
3384 }
3385 for(i=0; i<UPRV_LENGTHOF(fromUnicodeTests); ++i) {
3386 char tgt[10];
3387 char* target = tgt;
3388 char* targetLimit = target + 10;
3389 const UChar* source = fromUnicodeTests[i].input;
3390 const UChar* sourceLimit = source + fromUnicodeTests[i].len;
3391 int32_t len = 0;
3392 ucnv_reset(cnv);
3393 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, false, &status);
3394 len = ucnv_fromUCountPending(cnv, &status);
3395 if(U_FAILURE(status)){
3396 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3397 status = U_ZERO_ERROR;
3398 continue;
3399 }
3400 if(len != fromUnicodeTests[i].exp){
3401 log_err("Did not get the expected output for ucnv_fromUInputConsumed.\n");
3402 }
3403 }
3404 status = U_ZERO_ERROR;
3405 {
3406 /*
3407 * The converter has to read the tail before it knows that
3408 * only head alone matches.
3409 * At the end, the output for head will overflow the target,
3410 * middle will be pending, and tail will not have been consumed.
3411 */
3412 /*
3413 \U00101234 -> x (<U101234> \x07 |0)
3414 \U00101234\U00050005 -> y (<U101234>+<U50005> \x07+\x00+\x01\x02\x0e+\x05 |0)
3415 \U00101234\U00050005\U00060006 -> z (<U101234>+<U50005>+<U60006> \x07+\x00+\x01\x02\x0f+\x09 |0)
3416 \U00060007 -> unassigned
3417 */
3418 static const UChar head[] = {0xDBC4,0xDE34,0xD900,0xDC05,0x0000};/* \U00101234\U00050005 */
3419 static const UChar middle[] = {0xD940,0x0000}; /* first half of \U00060006 or \U00060007 */
3420 static const UChar tail[] = {0xDC07,0x0000};/* second half of \U00060007 */
3421 char tgt[10];
3422 char* target = tgt;
3423 char* targetLimit = target + 2; /* expect overflow from converting \U00101234\U00050005 */
3424 const UChar* source = head;
3425 const UChar* sourceLimit = source + u_strlen(head);
3426 int32_t len = 0;
3427 ucnv_reset(cnv);
3428 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, false, &status);
3429 len = ucnv_fromUCountPending(cnv, &status);
3430 if(U_FAILURE(status)){
3431 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3432 status = U_ZERO_ERROR;
3433 }
3434 if(len!=4){
3435 log_err("ucnv_fromUInputHeld did not return correct length for head\n");
3436 }
3437 source = middle;
3438 sourceLimit = source + u_strlen(middle);
3439 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, false, &status);
3440 len = ucnv_fromUCountPending(cnv, &status);
3441 if(U_FAILURE(status)){
3442 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3443 status = U_ZERO_ERROR;
3444 }
3445 if(len!=5){
3446 log_err("ucnv_fromUInputHeld did not return correct length for middle\n");
3447 }
3448 source = tail;
3449 sourceLimit = source + u_strlen(tail);
3450 ucnv_fromUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, false, &status);
3451 if(status != U_BUFFER_OVERFLOW_ERROR){
3452 log_err("ucnv_fromUnicode call did not succeed. Error: %s\n", u_errorName(status));
3453 }
3454 status = U_ZERO_ERROR;
3455 len = ucnv_fromUCountPending(cnv, &status);
3456 /* middle[1] is pending, tail has not been consumed */
3457 if(U_FAILURE(status)){
3458 log_err("ucnv_fromUInputHeld call did not succeed. Error: %s\n", u_errorName(status));
3459 }
3460 if(len!=1){
3461 log_err("ucnv_fromUInputHeld did not return correct length for tail\n");
3462 }
3463 }
3464 ucnv_close(cnv);
3465 #endif
3466 }
3467
3468 static void
TestToUCountPending()3469 TestToUCountPending(){
3470 #if !UCONFIG_NO_LEGACY_CONVERSION
3471 UErrorCode status = U_ZERO_ERROR;
3472 static const struct {
3473 char input[6];
3474 int32_t len;
3475 int32_t exp;
3476 }toUnicodeTests[] = {
3477 /*m:n conversion*/
3478 {{0x05, 0x01, 0x02},3,3},
3479 {{0x01, 0x02},2,2},
3480 {{0x07, 0x00, 0x01, 0x02},4,4},
3481 };
3482
3483 int i;
3484 UConverterToUCallback *oldToUAction= NULL;
3485 UConverter* cnv = ucnv_openPackage(loadTestData(&status), "test3", &status);
3486 if(U_FAILURE(status)){
3487 log_data_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
3488 return;
3489 }
3490 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status);
3491 for(i=0; i<UPRV_LENGTHOF(toUnicodeTests); ++i) {
3492 UChar tgt[20];
3493 UChar* target = tgt;
3494 UChar* targetLimit = target + 20;
3495 const char* source = toUnicodeTests[i].input;
3496 const char* sourceLimit = source + toUnicodeTests[i].len;
3497 int32_t len = 0;
3498 ucnv_reset(cnv);
3499 ucnv_toUnicode(cnv, &target, targetLimit, &source, sourceLimit, NULL, false, &status);
3500 len = ucnv_toUCountPending(cnv,&status);
3501 if(U_FAILURE(status)){
3502 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3503 status = U_ZERO_ERROR;
3504 continue;
3505 }
3506 if(len != toUnicodeTests[i].exp){
3507 log_err("Did not get the expected output for ucnv_toUInputConsumed.\n");
3508 }
3509 }
3510 status = U_ZERO_ERROR;
3511 ucnv_close(cnv);
3512
3513 {
3514 /*
3515 * The converter has to read the tail before it knows that
3516 * only head alone matches.
3517 * At the end, the output for head will overflow the target,
3518 * mid will be pending, and tail will not have been consumed.
3519 */
3520 char head[] = { 0x01, 0x02, 0x03, 0x0a , 0x00};
3521 char mid[] = { 0x01, 0x02, 0x03, 0x0b, 0x00 };
3522 char tail[] = { 0x01, 0x02, 0x03, 0x0d, 0x00 };
3523 /*
3524 0x01, 0x02, 0x03, 0x0a -> x (<U23456> \x01\x02\x03\x0a |0)
3525 0x01, 0x02, 0x03, 0x0b -> y (<U000b> \x01\x02\x03\x0b |0)
3526 0x01, 0x02, 0x03, 0x0d -> z (<U34567> \x01\x02\x03\x0d |3)
3527 0x01, 0x02, 0x03, 0x0a + 0x01, 0x02, 0x03, 0x0b + 0x01 + many more -> z (see test4 "many bytes, and bytes per UChar")
3528 */
3529 UChar tgt[10];
3530 UChar* target = tgt;
3531 UChar* targetLimit = target + 1; /* expect overflow from converting */
3532 const char* source = head;
3533 const char* sourceLimit = source + strlen(head);
3534 int32_t len = 0;
3535 cnv = ucnv_openPackage(loadTestData(&status), "test4", &status);
3536 if(U_FAILURE(status)){
3537 log_err("Could not create converter for test3. Error: %s\n", u_errorName(status));
3538 return;
3539 }
3540 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, oldToUAction, NULL, &status);
3541 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, false, &status);
3542 len = ucnv_toUCountPending(cnv,&status);
3543 if(U_FAILURE(status)){
3544 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3545 }
3546 if(len != 4){
3547 log_err("Did not get the expected len for head.\n");
3548 }
3549 source=mid;
3550 sourceLimit = source+strlen(mid);
3551 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, false, &status);
3552 len = ucnv_toUCountPending(cnv,&status);
3553 if(U_FAILURE(status)){
3554 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3555 }
3556 if(len != 8){
3557 log_err("Did not get the expected len for mid.\n");
3558 }
3559
3560 source=tail;
3561 sourceLimit = source+strlen(tail);
3562 targetLimit = target;
3563 ucnv_toUnicode(cnv,&target, targetLimit, &source, sourceLimit, NULL, false, &status);
3564 if(status != U_BUFFER_OVERFLOW_ERROR){
3565 log_err("ucnv_toUnicode call did not succeed. Error: %s\n", u_errorName(status));
3566 }
3567 status = U_ZERO_ERROR;
3568 len = ucnv_toUCountPending(cnv,&status);
3569 /* mid[4] is pending, tail has not been consumed */
3570 if(U_FAILURE(status)){
3571 log_err("ucnv_toUCountPending call did not succeed. Error: %s\n", u_errorName(status));
3572 }
3573 if(len != 4){
3574 log_err("Did not get the expected len for tail.\n");
3575 }
3576 ucnv_close(cnv);
3577 }
3578 #endif
3579 }
3580
TestOneDefaultNameChange(const char * name,const char * expected)3581 static void TestOneDefaultNameChange(const char *name, const char *expected) {
3582 UErrorCode status = U_ZERO_ERROR;
3583 UConverter *cnv;
3584 ucnv_setDefaultName(name);
3585 if(strcmp(ucnv_getDefaultName(), expected)==0)
3586 log_verbose("setDefaultName of %s works.\n", name);
3587 else
3588 log_err("setDefaultName of %s failed\n", name);
3589 cnv=ucnv_open(NULL, &status);
3590 if (U_FAILURE(status) || cnv == NULL) {
3591 log_err("opening the default converter of %s failed\n", name);
3592 return;
3593 }
3594 if(strcmp(ucnv_getName(cnv, &status), expected)==0)
3595 log_verbose("ucnv_getName of %s works.\n", name);
3596 else
3597 log_err("ucnv_getName of %s failed\n", name);
3598 ucnv_close(cnv);
3599 }
3600
TestDefaultName(void)3601 static void TestDefaultName(void) {
3602 /*Testing ucnv_getDefaultName() and ucnv_setDefaultNAme()*/
3603 static char defaultName[UCNV_MAX_CONVERTER_NAME_LENGTH + 1];
3604 strcpy(defaultName, ucnv_getDefaultName());
3605
3606 log_verbose("getDefaultName returned %s\n", defaultName);
3607
3608 /*change the default name by setting it */
3609 TestOneDefaultNameChange("UTF-8", "UTF-8");
3610 #if U_CHARSET_IS_UTF8
3611 TestOneDefaultNameChange("ISCII,version=1", "UTF-8");
3612 TestOneDefaultNameChange("ISCII,version=2", "UTF-8");
3613 TestOneDefaultNameChange("ISO-8859-1", "UTF-8");
3614 #else
3615 # if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
3616 TestOneDefaultNameChange("ISCII,version=1", "ISCII,version=1");
3617 TestOneDefaultNameChange("ISCII,version=2", "ISCII,version=2");
3618 # endif
3619 TestOneDefaultNameChange("ISO-8859-1", "ISO-8859-1");
3620 #endif
3621
3622 /*set the default name back*/
3623 ucnv_setDefaultName(defaultName);
3624 }
3625
3626 /* Test that ucnv_compareNames() matches names according to spec. ----------- */
3627
3628 static int
sign(int n)3629 sign(int n) {
3630 if(n==0) {
3631 return 0;
3632 } else if(n<0) {
3633 return -1;
3634 } else /* n>0 */ {
3635 return 1;
3636 }
3637 }
3638
3639 static void
compareNames(const char ** names)3640 compareNames(const char **names) {
3641 const char *relation, *name1, *name2;
3642 int rel, result;
3643
3644 relation=*names++;
3645 if(*relation=='=') {
3646 rel = 0;
3647 } else if(*relation=='<') {
3648 rel = -1;
3649 } else {
3650 rel = 1;
3651 }
3652
3653 name1=*names++;
3654 if(name1==NULL) {
3655 return;
3656 }
3657 while((name2=*names++)!=NULL) {
3658 result=ucnv_compareNames(name1, name2);
3659 if(sign(result)!=rel) {
3660 log_err("ucnv_compareNames(\"%s\", \"%s\")=%d, sign!=%d\n", name1, name2, result, rel);
3661 }
3662 name1=name2;
3663 }
3664 }
3665
3666 static void
TestCompareNames()3667 TestCompareNames() {
3668 static const char *equalUTF8[]={ "=", "UTF-8", "utf_8", "u*T@f08", "Utf 8", NULL };
3669 static const char *equalIBM[]={ "=", "ibm-37", "IBM037", "i-B-m 00037", "ibm-0037", "IBM00037", NULL };
3670 static const char *lessMac[]={ "<", "macos-0_1-10.2", "macos-1-10.0.2", "macos-1-10.2", NULL };
3671 static const char *lessUTF080[]={ "<", "UTF-0008", "utf$080", "u*T@f0800", "Utf 0000000009", NULL };
3672
3673 compareNames(equalUTF8);
3674 compareNames(equalIBM);
3675 compareNames(lessMac);
3676 compareNames(lessUTF080);
3677 }
3678
3679 static void
TestSubstString()3680 TestSubstString() {
3681 static const UChar surrogate[1]={ 0xd900 };
3682 char buffer[16];
3683
3684 static const UChar sub[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3685 static const char subChars[5]={ 0x61, 0x62, 0x63, 0x64, 0x65 };
3686 UConverter *cnv;
3687 UErrorCode errorCode;
3688 int32_t length;
3689 int8_t len8;
3690
3691 /* UTF-16/32: test that the BOM is output before the sub character */
3692 errorCode=U_ZERO_ERROR;
3693 cnv=ucnv_open("UTF-16", &errorCode);
3694 if(U_FAILURE(errorCode)) {
3695 log_data_err("ucnv_open(UTF-16) failed - %s\n", u_errorName(errorCode));
3696 return;
3697 }
3698 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
3699 ucnv_close(cnv);
3700 if(U_FAILURE(errorCode) ||
3701 length!=4 ||
3702 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode)
3703 ) {
3704 log_err("ucnv_fromUChars(UTF-16, U+D900) did not write a BOM\n");
3705 }
3706
3707 errorCode=U_ZERO_ERROR;
3708 cnv=ucnv_open("UTF-32", &errorCode);
3709 if(U_FAILURE(errorCode)) {
3710 log_data_err("ucnv_open(UTF-32) failed - %s\n", u_errorName(errorCode));
3711 return;
3712 }
3713 length=ucnv_fromUChars(cnv, buffer, (int32_t)sizeof(buffer), surrogate, 1, &errorCode);
3714 ucnv_close(cnv);
3715 if(U_FAILURE(errorCode) ||
3716 length!=8 ||
3717 NULL == ucnv_detectUnicodeSignature(buffer, length, NULL, &errorCode)
3718 ) {
3719 log_err("ucnv_fromUChars(UTF-32, U+D900) did not write a BOM\n");
3720 }
3721
3722 /* Simple API test of ucnv_setSubstString() + ucnv_getSubstChars(). */
3723 errorCode=U_ZERO_ERROR;
3724 cnv=ucnv_open("ISO-8859-1", &errorCode);
3725 if(U_FAILURE(errorCode)) {
3726 log_data_err("ucnv_open(ISO-8859-1) failed - %s\n", u_errorName(errorCode));
3727 return;
3728 }
3729 ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode);
3730 if(U_FAILURE(errorCode)) {
3731 log_err("ucnv_setSubstString(ISO-8859-1, sub[5]) failed - %s\n", u_errorName(errorCode));
3732 } else {
3733 len8 = sizeof(buffer);
3734 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode);
3735 /* Stateless converter, we expect the string converted to charset bytes. */
3736 if(U_FAILURE(errorCode) || len8!=sizeof(subChars) || 0!=uprv_memcmp(buffer, subChars, len8)) {
3737 log_err("ucnv_getSubstChars(ucnv_setSubstString(ISO-8859-1, sub[5])) failed - %s\n", u_errorName(errorCode));
3738 }
3739 }
3740 ucnv_close(cnv);
3741
3742 #if !UCONFIG_NO_LEGACY_CONVERSION
3743 errorCode=U_ZERO_ERROR;
3744 cnv=ucnv_open("HZ", &errorCode);
3745 if(U_FAILURE(errorCode)) {
3746 log_data_err("ucnv_open(HZ) failed - %s\n", u_errorName(errorCode));
3747 return;
3748 }
3749 ucnv_setSubstString(cnv, sub, UPRV_LENGTHOF(sub), &errorCode);
3750 if(U_FAILURE(errorCode)) {
3751 log_err("ucnv_setSubstString(HZ, sub[5]) failed - %s\n", u_errorName(errorCode));
3752 } else {
3753 len8 = sizeof(buffer);
3754 ucnv_getSubstChars(cnv, buffer, &len8, &errorCode);
3755 /* Stateful converter, we expect that the Unicode string was set and that we get an empty char * string now. */
3756 if(U_FAILURE(errorCode) || len8!=0) {
3757 log_err("ucnv_getSubstChars(ucnv_setSubstString(HZ, sub[5])) failed - %s\n", u_errorName(errorCode));
3758 }
3759 }
3760 ucnv_close(cnv);
3761 #endif
3762 /*
3763 * Further testing of ucnv_setSubstString() is done via intltest convert.
3764 * We do not test edge cases of illegal arguments and similar because the
3765 * function implementation uses all of its parameters in calls to other
3766 * functions with UErrorCode parameters.
3767 */
3768 }
3769
3770 static void
InvalidArguments()3771 InvalidArguments() {
3772 UConverter *cnv;
3773 UErrorCode errorCode;
3774 char charBuffer[2] = {1, 1};
3775 char ucharAsCharBuffer[2] = {2, 2};
3776 char *charsPtr = charBuffer;
3777 UChar *ucharsPtr = (UChar *)ucharAsCharBuffer;
3778 UChar *ucharsBadPtr = (UChar *)(ucharAsCharBuffer + 1);
3779
3780 errorCode=U_ZERO_ERROR;
3781 cnv=ucnv_open("UTF-8", &errorCode);
3782 if(U_FAILURE(errorCode)) {
3783 log_err("ucnv_open() failed - %s\n", u_errorName(errorCode));
3784 return;
3785 }
3786
3787 errorCode=U_ZERO_ERROR;
3788 /* This one should fail because an incomplete UChar is being passed in */
3789 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsPtr, ucharsBadPtr, NULL, true, &errorCode);
3790 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3791 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode));
3792 }
3793
3794 errorCode=U_ZERO_ERROR;
3795 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3796 ucnv_fromUnicode(cnv, &charsPtr, charsPtr, (const UChar **)&ucharsBadPtr, ucharsPtr, NULL, true, &errorCode);
3797 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3798 log_err("ucnv_fromUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode));
3799 }
3800
3801 errorCode=U_ZERO_ERROR;
3802 /* This one should fail because an incomplete UChar is being passed in */
3803 ucnv_toUnicode(cnv, &ucharsPtr, ucharsBadPtr, (const char **)&charsPtr, charsPtr, NULL, true, &errorCode);
3804 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3805 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for incomplete UChar * buffer - %s\n", u_errorName(errorCode));
3806 }
3807
3808 errorCode=U_ZERO_ERROR;
3809 /* This one should fail because ucharsBadPtr is > than ucharsPtr */
3810 ucnv_toUnicode(cnv, &ucharsBadPtr, ucharsPtr, (const char **)&charsPtr, charsPtr, NULL, true, &errorCode);
3811 if(errorCode != U_ILLEGAL_ARGUMENT_ERROR) {
3812 log_err("ucnv_toUnicode() failed to return U_ILLEGAL_ARGUMENT_ERROR for bad limit pointer - %s\n", u_errorName(errorCode));
3813 }
3814
3815 if (charBuffer[0] != 1 || charBuffer[1] != 1
3816 || ucharAsCharBuffer[0] != 2 || ucharAsCharBuffer[1] != 2)
3817 {
3818 log_err("Data was incorrectly written to buffers\n");
3819 }
3820
3821 ucnv_close(cnv);
3822 }
3823
TestGetName()3824 static void TestGetName() {
3825 static const char *const names[] = {
3826 "Unicode", "UTF-16",
3827 "UnicodeBigUnmarked", "UTF-16BE",
3828 "UnicodeBig", "UTF-16BE,version=1",
3829 "UnicodeLittleUnmarked", "UTF-16LE",
3830 "UnicodeLittle", "UTF-16LE,version=1",
3831 "x-UTF-16LE-BOM", "UTF-16LE,version=1"
3832 };
3833 int32_t i;
3834 for(i = 0; i < UPRV_LENGTHOF(names); i += 2) {
3835 UErrorCode errorCode = U_ZERO_ERROR;
3836 UConverter *cnv = ucnv_open(names[i], &errorCode);
3837 if(U_SUCCESS(errorCode)) {
3838 const char *name = ucnv_getName(cnv, &errorCode);
3839 if(U_FAILURE(errorCode) || 0 != strcmp(name, names[i+1])) {
3840 log_err("ucnv_getName(%s) = %s != %s -- %s\n",
3841 names[i], name, names[i+1], u_errorName(errorCode));
3842 }
3843 ucnv_close(cnv);
3844 }
3845 }
3846 }
3847
TestUTFBOM()3848 static void TestUTFBOM() {
3849 static const UChar a16[] = { 0x61 };
3850 static const char *const names[] = {
3851 "UTF-16",
3852 "UTF-16,version=1",
3853 "UTF-16BE",
3854 "UnicodeBig",
3855 "UTF-16LE",
3856 "UnicodeLittle"
3857 };
3858 static const uint8_t expected[][5] = {
3859 #if U_IS_BIG_ENDIAN
3860 { 4, 0xfe, 0xff, 0, 0x61 },
3861 { 4, 0xfe, 0xff, 0, 0x61 },
3862 #else
3863 { 4, 0xff, 0xfe, 0x61, 0 },
3864 { 4, 0xff, 0xfe, 0x61, 0 },
3865 #endif
3866
3867 { 2, 0, 0x61 },
3868 { 4, 0xfe, 0xff, 0, 0x61 },
3869
3870 { 2, 0x61, 0 },
3871 { 4, 0xff, 0xfe, 0x61, 0 }
3872 };
3873
3874 char bytes[10];
3875 int32_t i;
3876
3877 for(i = 0; i < UPRV_LENGTHOF(names); ++i) {
3878 UErrorCode errorCode = U_ZERO_ERROR;
3879 UConverter *cnv = ucnv_open(names[i], &errorCode);
3880 int32_t length = 0;
3881 const uint8_t *exp = expected[i];
3882 if (U_FAILURE(errorCode)) {
3883 log_err_status(errorCode, "Unable to open converter: %s got error code: %s\n", names[i], u_errorName(errorCode));
3884 continue;
3885 }
3886 length = ucnv_fromUChars(cnv, bytes, (int32_t)sizeof(bytes), a16, 1, &errorCode);
3887
3888 if(U_FAILURE(errorCode) || length != exp[0] || 0 != memcmp(bytes, exp+1, length)) {
3889 log_err("unexpected %s BOM writing behavior -- %s\n",
3890 names[i], u_errorName(errorCode));
3891 }
3892 ucnv_close(cnv);
3893 }
3894 }
3895