1 // Copyright (C) 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ***************************************************************************/
8 /*****************************************************************************
9 *
10 * File NCNVCBTS
11 *
12 * Modification History:
13 * Name Date Description
14 * Madhu Katragadda 06/23/2000 Tests for Conveter FallBack API and Functionality
15 ******************************************************************************
16 */
17 #include <stdio.h>
18 #include "unicode/uloc.h"
19 #include "unicode/ucnv.h"
20 #include "unicode/ucnv_err.h"
21 #include "cintltst.h"
22 #include "unicode/utypes.h"
23 #include "unicode/ustring.h"
24 #include "ncnvfbts.h"
25 #include "cmemory.h"
26 #include "cstring.h"
27
28 #if !UCONFIG_NO_LEGACY_CONVERSION
29 #define NEW_MAX_BUFFER 999
30
31
32 #define nct_min(x,y) ((x<y) ? x : y)
33
34 static int32_t gInBufferSize = 0;
35 static int32_t gOutBufferSize = 0;
36 static char gNuConvTestName[1024];
37
my_ucnv_open(const char * cnv,UErrorCode * err)38 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
39 {
40 if(cnv && cnv[0] == '@') {
41 return ucnv_openPackage("testdata", cnv+1, err);
42 } else {
43 return ucnv_open(cnv, err);
44 }
45 }
46
47
printSeq(const unsigned char * a,int len)48 static void printSeq(const unsigned char* a, int len)
49 {
50 int i=0;
51 log_verbose("{");
52 while (i<len)
53 log_verbose("0x%02x ", a[i++]);
54 log_verbose("}\n");
55 }
56
printUSeq(const UChar * a,int len)57 static void printUSeq(const UChar* a, int len)
58 {
59 int i=0;
60 log_verbose("{U+");
61 while (i<len)
62 log_verbose("0x%04x ", a[i++]);
63 log_verbose("}\n");
64 }
65
printSeqErr(const unsigned char * a,int len)66 static void printSeqErr(const unsigned char* a, int len)
67 {
68 int i=0;
69 fprintf(stderr, "{");
70 while (i<len)
71 fprintf(stderr, "0x%02x ", a[i++]);
72 fprintf(stderr, "}\n");
73 }
74
printUSeqErr(const UChar * a,int len)75 static void printUSeqErr(const UChar* a, int len)
76 {
77 int i=0;
78 fprintf(stderr, "{U+");
79 while (i<len)
80 fprintf(stderr, "0x%04x ", a[i++]);
81 fprintf(stderr,"}\n");
82 }
83
TestConverterFallBack(void)84 static void TestConverterFallBack(void)
85 {
86 TestConvertFallBackWithBufferSizes(10,10);
87 TestConvertFallBackWithBufferSizes(2,3);
88 TestConvertFallBackWithBufferSizes(3,2);
89 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,1);
90 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,2);
91 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,3);
92 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,4);
93 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,5);
94 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,6);
95 TestConvertFallBackWithBufferSizes(1,NEW_MAX_BUFFER);
96 TestConvertFallBackWithBufferSizes(2,NEW_MAX_BUFFER);
97 TestConvertFallBackWithBufferSizes(3,NEW_MAX_BUFFER);
98 TestConvertFallBackWithBufferSizes(4,NEW_MAX_BUFFER);
99 TestConvertFallBackWithBufferSizes(5,NEW_MAX_BUFFER);
100 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
101
102 }
103
104
105 void addTestConverterFallBack(TestNode** root);
106
addTestConverterFallBack(TestNode ** root)107 void addTestConverterFallBack(TestNode** root)
108 {
109 #if !UCONFIG_NO_FILE_IO
110 addTest(root, &TestConverterFallBack, "tsconv/ncnvfbts/TestConverterFallBack");
111 #endif
112
113 }
114
115
116 /* Note that this test already makes use of statics, so it's not really
117 multithread safe.
118 This convenience function lets us make the error messages actually useful.
119 */
120
setNuConvTestName(const char * codepage,const char * direction)121 static void setNuConvTestName(const char *codepage, const char *direction)
122 {
123 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
124 codepage,
125 direction,
126 (int)gInBufferSize,
127 (int)gOutBufferSize);
128 }
129
130
testConvertFromUnicode(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UBool fallback,const int32_t * expectOffsets)131 static UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
132 const char *codepage, UBool fallback, const int32_t *expectOffsets)
133 {
134
135
136 UErrorCode status = U_ZERO_ERROR;
137 UConverter *conv = 0;
138 char junkout[NEW_MAX_BUFFER]; /* FIX */
139 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
140 const UChar *src;
141 char *end;
142 char *targ;
143 int32_t *offs;
144 int i;
145 int32_t realBufferSize;
146 char *realBufferEnd;
147 const UChar *realSourceEnd;
148 const UChar *sourceLimit;
149 UBool checkOffsets = TRUE;
150 UBool doFlush;
151 UBool action=FALSE;
152 char *p;
153
154
155 for(i=0;i<NEW_MAX_BUFFER;i++)
156 junkout[i] = (char)0xF0;
157 for(i=0;i<NEW_MAX_BUFFER;i++)
158 junokout[i] = 0xFF;
159 setNuConvTestName(codepage, "FROM");
160
161 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize,
162 gOutBufferSize);
163
164 conv = my_ucnv_open(codepage, &status);
165 if(U_FAILURE(status))
166 {
167 log_data_err("Couldn't open converter %s\n",codepage);
168 return TRUE;
169 }
170
171 log_verbose("Converter opened..\n");
172 /*----setting the callback routine----*/
173 ucnv_setFallback (conv, fallback);
174 action = ucnv_usesFallback(conv);
175 if(action != fallback){
176 log_err("FAIL: Error is setting fallback. Errocode=%s\n", myErrorName(status));
177 }
178 /*------------------------*/
179 src = source;
180 targ = junkout;
181 offs = junokout;
182
183 realBufferSize = UPRV_LENGTHOF(junkout);
184 realBufferEnd = junkout + realBufferSize;
185 realSourceEnd = source + sourceLen;
186
187 if ( gOutBufferSize != realBufferSize )
188 checkOffsets = FALSE;
189
190 if( gInBufferSize != NEW_MAX_BUFFER )
191 checkOffsets = FALSE;
192
193 do
194 {
195 end = nct_min(targ + gOutBufferSize, realBufferEnd);
196 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
197
198 doFlush = (UBool)(sourceLimit == realSourceEnd);
199
200 if(targ == realBufferEnd)
201 {
202 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
203 return FALSE;
204 }
205 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
206
207
208 status = U_ZERO_ERROR;
209
210 ucnv_fromUnicode (conv,
211 (char **)&targ,
212 (const char *)end,
213 &src,
214 sourceLimit,
215 checkOffsets ? offs : NULL,
216 doFlush, /* flush if we're at the end of the input data */
217 &status);
218
219 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (sourceLimit < realSourceEnd) );
220
221 if(U_FAILURE(status))
222 {
223 log_err("Problem doing toUnicode, errcode %d %s\n", myErrorName(status), gNuConvTestName);
224 return FALSE;
225 }
226
227 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
228 sourceLen, targ-junkout);
229 if(getTestOption(VERBOSITY_OPTION))
230 {
231 char junk[9999];
232 char offset_str[9999];
233
234 junk[0] = 0;
235 offset_str[0] = 0;
236 for(p = junkout;p<targ;p++)
237 {
238 sprintf(junk + uprv_strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
239 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
240 }
241
242 log_verbose(junk);
243 printSeq((const unsigned char*)expect, expectLen);
244 if ( checkOffsets )
245 {
246 log_verbose("\nOffsets:");
247 log_verbose(offset_str);
248 }
249 log_verbose("\n");
250 }
251 ucnv_close(conv);
252
253
254 if(expectLen != targ-junkout)
255 {
256 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
257 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
258 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
259 printSeqErr((const unsigned char*)expect, expectLen);
260 return FALSE;
261 }
262
263 if (checkOffsets && (expectOffsets != 0) )
264 {
265 log_verbose("\ncomparing %d offsets..\n", targ-junkout);
266 if(uprv_memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
267 log_err("\ndid not get the expected offsets while %s \n", gNuConvTestName);
268 log_err("Got : ");
269 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
270 for(p=junkout;p<targ;p++)
271 log_err("%d, ", junokout[p-junkout]);
272 log_err("\nExpected: ");
273 for(i=0; i<(targ-junkout); i++)
274 log_err("%d,", expectOffsets[i]);
275 }
276 }
277
278 log_verbose("\n\ncomparing..\n");
279 if(!memcmp(junkout, expect, expectLen))
280 {
281 log_verbose("Matches!\n");
282 return TRUE;
283 }
284 else
285 {
286 log_err("String does not match. %s\n", gNuConvTestName);
287 log_verbose("String does not match. %s\n", gNuConvTestName);
288 printSeqErr((const unsigned char*)junkout, expectLen);
289 printSeqErr((const unsigned char*)expect, expectLen);
290 return FALSE;
291 }
292 }
293
testConvertToUnicode(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UBool fallback,const int32_t * expectOffsets)294 static UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
295 const char *codepage, UBool fallback, const int32_t *expectOffsets)
296 {
297 UErrorCode status = U_ZERO_ERROR;
298 UConverter *conv = 0;
299 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
300 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
301 const char *src;
302 const char *realSourceEnd;
303 const char *srcLimit;
304 UChar *targ;
305 UChar *end;
306 int32_t *offs;
307 int i;
308 UBool checkOffsets = TRUE;
309 char junk[9999];
310 char offset_str[9999];
311 UChar *p;
312 UBool action;
313
314 int32_t realBufferSize;
315 UChar *realBufferEnd;
316
317
318 for(i=0;i<NEW_MAX_BUFFER;i++)
319 junkout[i] = 0xFFFE;
320
321 for(i=0;i<NEW_MAX_BUFFER;i++)
322 junokout[i] = -1;
323
324 setNuConvTestName(codepage, "TO");
325
326 log_verbose("\n========= %s\n", gNuConvTestName);
327
328 conv = my_ucnv_open(codepage, &status);
329 if(U_FAILURE(status))
330 {
331 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
332 return TRUE; /* because it has been logged */
333 }
334
335 log_verbose("Converter opened..\n");
336
337 src = (const char *)source;
338 targ = junkout;
339 offs = junokout;
340
341 realBufferSize = UPRV_LENGTHOF(junkout);
342 realBufferEnd = junkout + realBufferSize;
343 realSourceEnd = src + sourcelen;
344 /*----setting the fallback routine----*/
345 ucnv_setFallback (conv, fallback);
346 action = ucnv_usesFallback(conv);
347 if(action != fallback){
348 log_err("FAIL: Error is setting fallback. Errocode=%s\n", myErrorName(status));
349 }
350 /*-------------------------------------*/
351 if ( gOutBufferSize != realBufferSize )
352 checkOffsets = FALSE;
353
354 if( gInBufferSize != NEW_MAX_BUFFER )
355 checkOffsets = FALSE;
356
357 do
358 {
359 end = nct_min( targ + gOutBufferSize, realBufferEnd);
360 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
361
362 if(targ == realBufferEnd)
363 {
364 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
365 return FALSE;
366 }
367 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
368
369
370
371 status = U_ZERO_ERROR;
372
373 ucnv_toUnicode (conv,
374 &targ,
375 end,
376 (const char **)&src,
377 (const char *)srcLimit,
378 checkOffsets ? offs : NULL,
379 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
380 &status);
381 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (srcLimit < realSourceEnd) ); /* while we just need another buffer */
382
383
384 if(U_FAILURE(status))
385 {
386 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
387 return FALSE;
388 }
389
390 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
391 sourcelen, targ-junkout);
392 if(getTestOption(VERBOSITY_OPTION))
393 {
394
395 junk[0] = 0;
396 offset_str[0] = 0;
397
398 for(p = junkout;p<targ;p++)
399 {
400 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
401 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
402 }
403
404 log_verbose(junk);
405 printUSeq(expect, expectlen);
406 if ( checkOffsets )
407 {
408 log_verbose("\nOffsets:");
409 log_verbose(offset_str);
410 }
411 log_verbose("\n");
412 }
413 ucnv_close(conv);
414
415 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
416
417 if (checkOffsets && (expectOffsets != 0))
418 {
419 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
420 {
421 log_err("\n\ndid not get the expected offsets while %s \n", gNuConvTestName);
422 log_err("\nGot : ");
423 for(p=junkout;p<targ;p++)
424 log_err("%d, ", junokout[p-junkout]);
425 log_err("\nExpected: ");
426 for(i=0; i<(targ-junkout); i++)
427 log_err("%d,", expectOffsets[i]);
428 log_err("");
429 for(i=0; i<(targ-junkout); i++)
430 log_err("0x%04X,", junkout[i]);
431 log_err("");
432 for(i=0; i<(src-(const char *)source); i++)
433 log_err("0x%04X,", (unsigned char)source[i]);
434 }
435 }
436
437 if(!memcmp(junkout, expect, expectlen*2))
438 {
439 log_verbose("Matches!\n");
440 return TRUE;
441 }
442 else
443 {
444 log_err("String does not match. %s\n", gNuConvTestName);
445 log_verbose("String does not match. %s\n", gNuConvTestName);
446 printUSeqErr(junkout, expectlen);
447 printf("\n");
448 printUSeqErr(expect, expectlen);
449 return FALSE;
450 }
451 }
452
453
454
TestConvertFallBackWithBufferSizes(int32_t outsize,int32_t insize)455 static void TestConvertFallBackWithBufferSizes(int32_t outsize, int32_t insize )
456 {
457
458 static const UChar SBCSText[] =
459 { 0x0021, 0xFF01, 0x0022, 0xFF02, 0x0023, 0xFF03, 0x003A, 0xFF1A, 0x003B, 0xFF1B, 0x003C, 0xFF1C };
460 /* 21, ?, 22, ?, 23, ?, 3a, ?, 3b, ?, 3c, ? SBCS*/
461 static const uint8_t expectedNative[] =
462 { 0x21, 0x21, 0x22, 0x22, 0x23, 0x23, 0x3a, 0x3a, 0x3b, 0x3b, 0x3c, 0x3c};
463 static const UChar retrievedSBCSText[]=
464 { 0x0021, 0x0021, 0x0022, 0x0022, 0x0023, 0x0023, 0x003A, 0x003A, 0x003B, 0x003B, 0x003C, 0x003C };
465 static const int32_t toNativeOffs [] =
466 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b};
467 static const int32_t fromNativeoffs [] =
468 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
469
470
471 /* 1363 isn't DBCS, but it has the DBCS section */
472 static const UChar DBCSText[] =
473 { 0x00a1, 0x00ad, 0x2010, 0x00b7, 0x30fb};
474 static const uint8_t expectedIBM1363_DBCS[] =
475 { 0xa2, 0xae, 0xa1 ,0xa9, 0xa1, 0xa9,0xa1 ,0xa4, 0xa1, 0xa4};
476 static const UChar retrievedDBCSText[]=
477 { 0x00a1, 0x2010, 0x2010, 0x30fb, 0x30fb };
478 static const int32_t toIBM1363Offs_DBCS[] =
479 { 0x00, 0x00, 0x01,0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04};
480 static const int32_t fromIBM1363offs_DBCS[] =
481 { 0, 2, 4, 6, 8};
482
483
484 static const UChar MBCSText[] =
485 { 0x0001, 0x263a, 0x2013, 0x2014, 0x263b, 0x0002};
486 static const uint8_t expectedIBM950[] =
487 { 0x01, 0x01, 0xa1, 0x56, 0xa1, 0x56, 0x02, 0x02};
488 static const UChar retrievedMBCSText[]=
489 { 0x0001, 0x0001, 0x2014, 0x2014, 0x0002, 0x0002};
490 static const int32_t toIBM950Offs [] =
491 { 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x05};
492 static const int32_t fromIBM950offs [] =
493 { 0, 1, 2, 4, 6, 7};
494
495 static const UChar MBCSText1363[] =
496 { 0x0005,
497 0xffe8,
498 0x0007,
499 0x2022,
500 0x005c,
501 0x00b7,
502 0x3016,
503 0x30fb,
504 0x9a36};
505 static const uint8_t expectedIBM1363[] =
506 { 0x05,
507 0x05,
508 0x07,
509 0x07,
510 0x7f,
511 0xa1, 0xa4,
512 0xa1, 0xe0,
513 0xa1, 0xa4,
514 0xf5, 0xe2};
515 static const UChar retrievedMBCSText1363[]=
516 { 0x0005, 0x0005, 0x0007, 0x0007, 0x001a, 0x30fb, 0x25a1, 0x30fb, 0x9a36};
517 static const int32_t toIBM1363Offs [] =
518 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08};
519 static const int32_t fromIBM1363offs [] =
520 { 0, 1, 2, 3, 4, 5, 7, 9, 11};
521
522
523
524 static const char* nativeCodePage[]={
525 /*NLCS Mapping*/
526 "ibm-437",
527 "ibm-850",
528 "ibm-878",
529 "ibm-923",
530 "ibm-1051",
531 "ibm-1089",
532 "ibm-1250",
533 "ibm-1251",
534 "ibm-1253",
535 "ibm-1254",
536 "ibm-1255",
537 "ibm-1256",
538 "ibm-1257",
539 "ibm-1258",
540 "ibm-1276"
541 };
542
543 int32_t i=0;
544 gInBufferSize = insize;
545 gOutBufferSize = outsize;
546
547 for(i=0; i<UPRV_LENGTHOF(nativeCodePage); i++){
548 log_verbose("Testing %s\n", nativeCodePage[i]);
549 if(!testConvertFromUnicode(SBCSText, UPRV_LENGTHOF(SBCSText),
550 expectedNative, sizeof(expectedNative), nativeCodePage[i], TRUE, toNativeOffs ))
551 log_err("u-> %s(SBCS) with FallBack did not match.\n", nativeCodePage[i]);
552
553 if(!testConvertToUnicode(expectedNative, sizeof(expectedNative),
554 retrievedSBCSText, UPRV_LENGTHOF(retrievedSBCSText), nativeCodePage[i], TRUE, fromNativeoffs ))
555 log_err("%s->u(SBCS) with Fallback did not match.\n", nativeCodePage[i]);
556 }
557
558 /*DBCS*/
559 if(!testConvertFromUnicode(DBCSText, UPRV_LENGTHOF(DBCSText),
560 expectedIBM1363_DBCS, sizeof(expectedIBM1363_DBCS), "ibm-1363", TRUE, toIBM1363Offs_DBCS ))
561 log_err("u-> ibm-1363(DBCS portion) with FallBack did not match.\n");
562
563 if(!testConvertToUnicode(expectedIBM1363_DBCS, sizeof(expectedIBM1363_DBCS),
564 retrievedDBCSText, UPRV_LENGTHOF(retrievedDBCSText),"ibm-1363", TRUE, fromIBM1363offs_DBCS ))
565 log_err("ibm-1363->u(DBCS portion) with Fallback did not match.\n");
566
567
568 /*MBCS*/
569 if(!testConvertFromUnicode(MBCSText, UPRV_LENGTHOF(MBCSText),
570 expectedIBM950, sizeof(expectedIBM950), "ibm-950", TRUE, toIBM950Offs ))
571 log_err("u-> ibm-950(MBCS) with FallBack did not match.\n");
572
573 if(!testConvertToUnicode(expectedIBM950, sizeof(expectedIBM950),
574 retrievedMBCSText, UPRV_LENGTHOF(retrievedMBCSText),"ibm-950", TRUE, fromIBM950offs ))
575 log_err("ibm-950->u(MBCS) with Fallback did not match.\n");
576
577 /*commented untill data table is available*/
578 log_verbose("toUnicode fallback with fallback data for MBCS\n");
579 {
580 const uint8_t IBM950input[] = {
581 0xf4, 0x87, 0xa4, 0x4a, 0xf4, 0x88, 0xa4, 0x4b,
582 0xf9, 0x92, 0xdc, 0xb0, };
583 UChar expectedUnicodeText[]= { 0x5165, 0x5165, 0x516b, 0x516b, 0x9ef9, 0x9ef9};
584 int32_t fromIBM950inputOffs [] = { 0, 2, 4, 6, 8, 10};
585 /* for testing reverse fallback behavior */
586 UChar expectedFallbackFalse[]= { 0x5165, 0x5165, 0x516b, 0x516b, 0x9ef9, 0x9ef9};
587
588 if(!testConvertToUnicode(IBM950input, sizeof(IBM950input),
589 expectedUnicodeText, UPRV_LENGTHOF(expectedUnicodeText),"ibm-950", TRUE, fromIBM950inputOffs ))
590 log_err("ibm-950->u(MBCS) with Fallback did not match.\n");
591 if(!testConvertToUnicode(IBM950input, sizeof(IBM950input),
592 expectedFallbackFalse, UPRV_LENGTHOF(expectedFallbackFalse),"ibm-950", FALSE, fromIBM950inputOffs ))
593 log_err("ibm-950->u(MBCS) with Fallback did not match.\n");
594
595 }
596 log_verbose("toUnicode fallback with fallback data for euc-tw\n");
597 {
598 const uint8_t euc_tw_input[] = {
599 0xA7, 0xCC, 0x8E, 0xA2, 0xA1, 0xAB,
600 0xA8, 0xC7, 0xC8, 0xDE,
601 0xA8, 0xCD, 0x8E, 0xA2, 0xA2, 0xEA,};
602 UChar expectedUnicodeText[]= { 0x5C6E, 0x5C6E, 0x81FC, 0x81FC, 0x8278, 0x8278};
603 int32_t from_euc_tw_offs [] = { 0, 2, 6, 8, 10, 12};
604 /* for testing reverse fallback behavior */
605 UChar expectedFallbackFalse[]= { 0x5C6E, 0x5C6E, 0x81FC, 0x81FC, 0x8278, 0x8278};
606
607 if(!testConvertToUnicode(euc_tw_input, sizeof(euc_tw_input),
608 expectedUnicodeText, UPRV_LENGTHOF(expectedUnicodeText),"euc-tw", TRUE, from_euc_tw_offs ))
609 log_err("from euc-tw->u with Fallback did not match.\n");
610
611 if(!testConvertToUnicode(euc_tw_input, sizeof(euc_tw_input),
612 expectedFallbackFalse, UPRV_LENGTHOF(expectedFallbackFalse),"euc-tw", FALSE, from_euc_tw_offs ))
613 log_err("from euc-tw->u with Fallback false did not match.\n");
614
615
616 }
617 log_verbose("fromUnicode to euc-tw with fallback data euc-tw\n");
618 {
619 UChar inputText[]= { 0x0001, 0x008e, 0x203e, 0x2223, 0xff5c, 0x5296,
620 0x5C6E, 0x5C6E, 0x81FC, 0x81FC, 0x8278, 0x8278, 0xEDEC};
621 const uint8_t expected_euc_tw[] = {
622 0x01, 0x1a, 0xa2, 0xa3,
623 0xa2, 0xde, 0xa2, 0xde,
624 0x8e, 0xa2, 0xe5, 0xb9,
625 0x8e, 0xa2, 0xa1, 0xab, 0x8e, 0xa2, 0xa1, 0xab,
626 0xc8, 0xde, 0xc8, 0xde,
627 0x8e, 0xa2, 0xa2, 0xea, 0x8e, 0xa2, 0xa2, 0xea,
628 0x8e, 0xac, 0xc6, 0xf7};
629 int32_t to_euc_tw_offs [] = { 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 5, 5, 6, 6,
630 6, 6, 7, 7, 7, 7, 8, 8, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12};
631
632 if(!testConvertFromUnicode(inputText, UPRV_LENGTHOF(inputText),
633 expected_euc_tw, sizeof(expected_euc_tw), "euc-tw", TRUE, to_euc_tw_offs ))
634 log_err("u-> euc-tw with FallBack did not match.\n");
635
636 }
637
638 /*MBCS 1363*/
639 if(!testConvertFromUnicode(MBCSText1363, UPRV_LENGTHOF(MBCSText1363),
640 expectedIBM1363, sizeof(expectedIBM1363), "ibm-1363", TRUE, toIBM1363Offs ))
641 log_err("u-> ibm-1363(MBCS) with FallBack did not match.\n");
642
643 if(!testConvertToUnicode(expectedIBM1363, sizeof(expectedIBM1363),
644 retrievedMBCSText1363, UPRV_LENGTHOF(retrievedMBCSText1363),"ibm-1363", TRUE, fromIBM1363offs ))
645 log_err("ibm-1363->u(MBCS) with Fallback did not match.\n");
646
647
648 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
649 which is test file for MBCS conversion with single-byte codepage data.*/
650 {
651
652 /* MBCS with single byte codepage data test1.ucm*/
653 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xdbba, 0xdfcd, 0x0003};
654 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0x08, 0xff,};
655 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, 7};
656
657 const uint8_t test1input[] = { 0x00, 0x05, 0x06, 0x07, 0x08, 0x09};
658 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xfffd, 0xfffd, 0xfffe};
659 int32_t fromtest1Offs[] = { 0, 1, 2, 3, 3, 4,5};
660
661 /*from Unicode*/
662 if(!testConvertFromUnicode(unicodeInput, UPRV_LENGTHOF(unicodeInput),
663 expectedtest1, sizeof(expectedtest1), "@test1", TRUE, totest1Offs ))
664 log_err("u-> test1(MBCS conversion with single-byte) did not match.\n");
665
666 /*to Unicode*/
667 if(!testConvertToUnicode(test1input, sizeof(test1input),
668 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test1", TRUE, fromtest1Offs ))
669 log_err("test1(MBCS conversion with single-byte) -> u did not match.\n");
670
671 }
672
673 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
674 which is test file for MBCS conversion with three-byte codepage data.*/
675 {
676
677 /* MBCS with three byte codepage data test3.ucm*/
678 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xdbba, 0xdfcd, 0x000b, 0xd84d, 0xdc56, 0x000e, 0x0003, };
679 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x07, 0xff, 0x01, 0x02, 0x0b, 0x01, 0x02, 0x0a, 0xff, 0xff,};
680 int32_t totest3Offs[] = { 0, 1, 2, 3, 5, 7, 7, 7, 8, 8, 8, 10, 11};
681
682 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a,
683 0x01, 0x02, 0x0e, 0x01, 0x02, 0x0d, 0x03, 0x01, 0x02, 0x0f,};
684 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56,
685 0x000e, 0xd891, 0xdd67, 0xfffd, 0xfffd };
686 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10, 13, 13, 16, 17};
687
688 /*from Unicode*/
689 if(!testConvertFromUnicode(unicodeInput, UPRV_LENGTHOF(unicodeInput),
690 expectedtest3, sizeof(expectedtest3), "@test3", TRUE, totest3Offs ))
691 log_err("u-> test3(MBCS conversion with three-byte) did not match.\n");
692
693 /*to Unicode*/
694 if(!testConvertToUnicode(test3input, sizeof(test3input),
695 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test3", TRUE, fromtest3Offs ))
696 log_err("test3(MBCS conversion with three-byte) -> u did not match.\n");
697
698 }
699
700 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
701 which is test file for MBCS conversion with four-byte codepage data.*/
702 {
703
704 /* MBCS with three byte codepage data test4.ucm*/
705 const UChar unicodeInput[] =
706 { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xdbba, 0xdfcd,
707 0xd84d, 0xdc56, 0x000e, 0xd891, 0xdd67, 0x000f};
708 const uint8_t expectedtest4[] =
709 { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0xff,
710 0x01, 0x02, 0x03, 0x0a, 0xff, 0xff, 0xff};
711 int32_t totest4Offs[] =
712 { 0, 1, 2, 3, 3, 3, 3, 4, 6, 8, 8, 8, 8, 10, 11, 13};
713
714 const uint8_t test4input[] =
715 { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x08,
716 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0e, 0x01, 0x02, 0x03, 0x0d, 0x03, 0x01, 0x02, 0x03, 0x0c,};
717 const UChar expectedUnicode[] =
718 { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xdbba, 0xdfcd,
719 0xd84d, 0xdc56, 0x000e, 0xd891, 0xdd67, 0x1a, 0xfffd};
720 int32_t fromtest4Offs[] =
721 { 0, 1, 2, 3, 7, 7, 8, 8, 9, 9, 13, 17, 17, 21, 22,};
722
723 /*from Unicode*/
724 if(!testConvertFromUnicode(unicodeInput, UPRV_LENGTHOF(unicodeInput),
725 expectedtest4, sizeof(expectedtest4), "@test4", TRUE, totest4Offs ))
726 log_err("u-> test4(MBCS conversion with four-byte) did not match.\n");
727
728 /*to Unicode*/
729 if(!testConvertToUnicode(test4input, sizeof(test4input),
730 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test4", TRUE, fromtest4Offs ))
731 log_err("test4(MBCS conversion with four-byte) -> u did not match.\n");
732
733 }
734 /* Test for jitterbug 509 EBCDIC_STATEFUL Converters*/
735 {
736 const UChar unicodeInput[] = {0x00AF, 0x2013, 0x2223, 0x004C, 0x5F5D, 0xFF5E };
737 const uint8_t expectedtest1[] = {0x0E,0x42,0xA1, 0x44,0x4A, 0x42,0x4F, 0x0F,0xD3, 0x0E,0x65,0x60, 0x43,0xA1,0x0f };
738 int32_t totest1Offs[] = {0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5 };
739 const uint8_t test1input[] = {0x0E,0x42,0xA1, 0x44,0x4A, 0x42,0x4F, 0x0F,0xD3, 0x0E,0x65,0x60, 0x43,0xA1 };
740 const UChar expectedUnicode[] = {0x203e, 0x2014, 0xff5c, 0x004c, 0x5f5e, 0x223c };
741 int32_t fromtest1Offs[] = {1, 3, 5, 8, 10, 12 };
742 /*from Unicode*/
743 if(!testConvertFromUnicode(unicodeInput, UPRV_LENGTHOF(unicodeInput),
744 expectedtest1, sizeof(expectedtest1), "ibm-1371", TRUE, totest1Offs ))
745 log_err("u-> ibm-1371(MBCS conversion with single-byte) did not match.,\n");
746 /*to Unicode*/
747 if(!testConvertToUnicode(test1input, sizeof(test1input),
748 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "ibm-1371", TRUE, fromtest1Offs ))
749 log_err("ibm-1371(MBCS conversion with single-byte) -> u did not match.,\n");
750 }
751
752 }
753 #endif
754