1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ***************************************************************************/
8 /*****************************************************************************
9 *
10 * File NCNVCBTS
11 *
12 * Modification History:
13 * Name Date Description
14 * Madhu Katragadda 06/23/2000 Tests for Converter FallBack API and Functionality
15 ******************************************************************************
16 */
17 #include <stdbool.h>
18 #include <stdio.h>
19 #include "unicode/uloc.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/ucnv_err.h"
22 #include "cintltst.h"
23 #include "unicode/utypes.h"
24 #include "unicode/ustring.h"
25 #include "ncnvfbts.h"
26 #include "cmemory.h"
27 #include "cstring.h"
28
29 #if !UCONFIG_NO_LEGACY_CONVERSION
30 #define NEW_MAX_BUFFER 999
31
32
33 #define nct_min(x,y) ((x<y) ? x : y)
34
35 static int32_t gInBufferSize = 0;
36 static int32_t gOutBufferSize = 0;
37 static char gNuConvTestName[1024];
38
my_ucnv_open(const char * cnv,UErrorCode * err)39 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
40 {
41 if(cnv && cnv[0] == '@') {
42 return ucnv_openPackage("testdata", cnv+1, err);
43 } else {
44 return ucnv_open(cnv, err);
45 }
46 }
47
48
printSeq(const unsigned char * a,int len)49 static void printSeq(const unsigned char* a, int len)
50 {
51 int i=0;
52 log_verbose("{");
53 while (i<len)
54 log_verbose("0x%02x ", a[i++]);
55 log_verbose("}\n");
56 }
57
printUSeq(const UChar * a,int len)58 static void printUSeq(const UChar* a, int len)
59 {
60 int i=0;
61 log_verbose("{U+");
62 while (i<len)
63 log_verbose("0x%04x ", a[i++]);
64 log_verbose("}\n");
65 }
66
printSeqErr(const unsigned char * a,int len)67 static void printSeqErr(const unsigned char* a, int len)
68 {
69 int i=0;
70 fprintf(stderr, "{");
71 while (i<len)
72 fprintf(stderr, "0x%02x ", a[i++]);
73 fprintf(stderr, "}\n");
74 }
75
printUSeqErr(const UChar * a,int len)76 static void printUSeqErr(const UChar* a, int len)
77 {
78 int i=0;
79 fprintf(stderr, "{U+");
80 while (i<len)
81 fprintf(stderr, "0x%04x ", a[i++]);
82 fprintf(stderr,"}\n");
83 }
84
TestConverterFallBack(void)85 static void TestConverterFallBack(void)
86 {
87 TestConvertFallBackWithBufferSizes(10,10);
88 TestConvertFallBackWithBufferSizes(2,3);
89 TestConvertFallBackWithBufferSizes(3,2);
90 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,1);
91 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,2);
92 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,3);
93 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,4);
94 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,5);
95 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,6);
96 TestConvertFallBackWithBufferSizes(1,NEW_MAX_BUFFER);
97 TestConvertFallBackWithBufferSizes(2,NEW_MAX_BUFFER);
98 TestConvertFallBackWithBufferSizes(3,NEW_MAX_BUFFER);
99 TestConvertFallBackWithBufferSizes(4,NEW_MAX_BUFFER);
100 TestConvertFallBackWithBufferSizes(5,NEW_MAX_BUFFER);
101 TestConvertFallBackWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
102
103 }
104
105
106 void addTestConverterFallBack(TestNode** root);
107
addTestConverterFallBack(TestNode ** root)108 void addTestConverterFallBack(TestNode** root)
109 {
110 #if !UCONFIG_NO_FILE_IO
111 addTest(root, &TestConverterFallBack, "tsconv/ncnvfbts/TestConverterFallBack");
112 #endif
113
114 }
115
116
117 /* Note that this test already makes use of statics, so it's not really
118 multithread safe.
119 This convenience function lets us make the error messages actually useful.
120 */
121
setNuConvTestName(const char * codepage,const char * direction)122 static void setNuConvTestName(const char *codepage, const char *direction)
123 {
124 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
125 codepage,
126 direction,
127 (int)gInBufferSize,
128 (int)gOutBufferSize);
129 }
130
131
testConvertFromUnicode(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,UBool fallback,const int32_t * expectOffsets)132 static UBool testConvertFromUnicode(const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
133 const char *codepage, UBool fallback, const int32_t *expectOffsets)
134 {
135
136
137 UErrorCode status = U_ZERO_ERROR;
138 UConverter *conv = 0;
139 char junkout[NEW_MAX_BUFFER]; /* FIX */
140 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
141 const UChar *src;
142 char *end;
143 char *targ;
144 int32_t *offs;
145 int i;
146 int32_t realBufferSize;
147 char *realBufferEnd;
148 const UChar *realSourceEnd;
149 const UChar *sourceLimit;
150 UBool checkOffsets = true;
151 UBool doFlush;
152 UBool action=false;
153 char *p;
154
155
156 for(i=0;i<NEW_MAX_BUFFER;i++)
157 junkout[i] = (char)0xF0;
158 for(i=0;i<NEW_MAX_BUFFER;i++)
159 junokout[i] = 0xFF;
160 setNuConvTestName(codepage, "FROM");
161
162 log_verbose("\nTesting========= %s FROM \n inputbuffer= %d outputbuffer= %d\n", codepage, gInBufferSize,
163 gOutBufferSize);
164
165 conv = my_ucnv_open(codepage, &status);
166 if(U_FAILURE(status))
167 {
168 log_data_err("Couldn't open converter %s\n",codepage);
169 return true;
170 }
171
172 log_verbose("Converter opened..\n");
173 /*----setting the callback routine----*/
174 ucnv_setFallback (conv, fallback);
175 action = ucnv_usesFallback(conv);
176 if(action != fallback){
177 log_err("FAIL: Error is setting fallback. Errocode=%s\n", myErrorName(status));
178 }
179 /*------------------------*/
180 src = source;
181 targ = junkout;
182 offs = junokout;
183
184 realBufferSize = UPRV_LENGTHOF(junkout);
185 realBufferEnd = junkout + realBufferSize;
186 realSourceEnd = source + sourceLen;
187
188 if ( gOutBufferSize != realBufferSize )
189 checkOffsets = false;
190
191 if( gInBufferSize != NEW_MAX_BUFFER )
192 checkOffsets = false;
193
194 do
195 {
196 end = nct_min(targ + gOutBufferSize, realBufferEnd);
197 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
198
199 doFlush = (UBool)(sourceLimit == realSourceEnd);
200
201 if(targ == realBufferEnd)
202 {
203 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
204 return false;
205 }
206 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"true":"false");
207
208
209 status = U_ZERO_ERROR;
210
211 ucnv_fromUnicode (conv,
212 (char **)&targ,
213 (const char *)end,
214 &src,
215 sourceLimit,
216 checkOffsets ? offs : NULL,
217 doFlush, /* flush if we're at the end of the input data */
218 &status);
219
220 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (sourceLimit < realSourceEnd) );
221
222 if(U_FAILURE(status))
223 {
224 log_err("Problem doing toUnicode, errcode %d %s\n", myErrorName(status), gNuConvTestName);
225 return false;
226 }
227
228 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
229 sourceLen, targ-junkout);
230 if(getTestOption(VERBOSITY_OPTION))
231 {
232 char junk[9999];
233 char offset_str[9999];
234
235 junk[0] = 0;
236 offset_str[0] = 0;
237 for(p = junkout;p<targ;p++)
238 {
239 sprintf(junk + uprv_strlen(junk), "0x%02x, ", (0xFF) & (unsigned int)*p);
240 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (0xFF) & (unsigned int)junokout[p-junkout]);
241 }
242
243 log_verbose(junk);
244 printSeq((const unsigned char*)expect, expectLen);
245 if ( checkOffsets )
246 {
247 log_verbose("\nOffsets:");
248 log_verbose(offset_str);
249 }
250 log_verbose("\n");
251 }
252 ucnv_close(conv);
253
254
255 if(expectLen != targ-junkout)
256 {
257 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
258 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
259 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
260 printSeqErr((const unsigned char*)expect, expectLen);
261 return false;
262 }
263
264 if (checkOffsets && (expectOffsets != 0) )
265 {
266 log_verbose("\ncomparing %d offsets..\n", targ-junkout);
267 if(uprv_memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
268 log_err("\ndid not get the expected offsets while %s \n", gNuConvTestName);
269 log_err("Got : ");
270 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
271 for(p=junkout;p<targ;p++)
272 log_err("%d, ", junokout[p-junkout]);
273 log_err("\nExpected: ");
274 for(i=0; i<(targ-junkout); i++)
275 log_err("%d,", expectOffsets[i]);
276 }
277 }
278
279 log_verbose("\n\ncomparing..\n");
280 if(!memcmp(junkout, expect, expectLen))
281 {
282 log_verbose("Matches!\n");
283 return true;
284 }
285 else
286 {
287 log_err("String does not match. %s\n", gNuConvTestName);
288 log_verbose("String does not match. %s\n", gNuConvTestName);
289 printSeqErr((const unsigned char*)junkout, expectLen);
290 printSeqErr((const unsigned char*)expect, expectLen);
291 return false;
292 }
293 }
294
testConvertToUnicode(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,UBool fallback,const int32_t * expectOffsets)295 static UBool testConvertToUnicode( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
296 const char *codepage, UBool fallback, const int32_t *expectOffsets)
297 {
298 UErrorCode status = U_ZERO_ERROR;
299 UConverter *conv = 0;
300 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
301 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
302 const char *src;
303 const char *realSourceEnd;
304 const char *srcLimit;
305 UChar *targ;
306 UChar *end;
307 int32_t *offs;
308 int i;
309 UBool checkOffsets = true;
310 char junk[9999];
311 char offset_str[9999];
312 UChar *p;
313 UBool action;
314
315 int32_t realBufferSize;
316 UChar *realBufferEnd;
317
318
319 for(i=0;i<NEW_MAX_BUFFER;i++)
320 junkout[i] = 0xFFFE;
321
322 for(i=0;i<NEW_MAX_BUFFER;i++)
323 junokout[i] = -1;
324
325 setNuConvTestName(codepage, "TO");
326
327 log_verbose("\n========= %s\n", gNuConvTestName);
328
329 conv = my_ucnv_open(codepage, &status);
330 if(U_FAILURE(status))
331 {
332 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
333 return true; /* because it has been logged */
334 }
335
336 log_verbose("Converter opened..\n");
337
338 src = (const char *)source;
339 targ = junkout;
340 offs = junokout;
341
342 realBufferSize = UPRV_LENGTHOF(junkout);
343 realBufferEnd = junkout + realBufferSize;
344 realSourceEnd = src + sourcelen;
345 /*----setting the fallback routine----*/
346 ucnv_setFallback (conv, fallback);
347 action = ucnv_usesFallback(conv);
348 if(action != fallback){
349 log_err("FAIL: Error is setting fallback. Errocode=%s\n", myErrorName(status));
350 }
351 /*-------------------------------------*/
352 if ( gOutBufferSize != realBufferSize )
353 checkOffsets = false;
354
355 if( gInBufferSize != NEW_MAX_BUFFER )
356 checkOffsets = false;
357
358 do
359 {
360 end = nct_min( targ + gOutBufferSize, realBufferEnd);
361 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
362
363 if(targ == realBufferEnd)
364 {
365 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjey=%08lx %s",targ,gNuConvTestName);
366 return false;
367 }
368 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
369
370
371
372 status = U_ZERO_ERROR;
373
374 ucnv_toUnicode (conv,
375 &targ,
376 end,
377 (const char **)&src,
378 (const char *)srcLimit,
379 checkOffsets ? offs : NULL,
380 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
381 &status);
382 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (srcLimit < realSourceEnd) ); /* while we just need another buffer */
383
384
385 if(U_FAILURE(status))
386 {
387 log_err("Problem doing toUnicode, errcode %s %s\n", myErrorName(status), gNuConvTestName);
388 return false;
389 }
390
391 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
392 sourcelen, targ-junkout);
393 if(getTestOption(VERBOSITY_OPTION))
394 {
395
396 junk[0] = 0;
397 offset_str[0] = 0;
398
399 for(p = junkout;p<targ;p++)
400 {
401 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*p);
402 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[p-junkout]);
403 }
404
405 log_verbose(junk);
406 printUSeq(expect, expectlen);
407 if ( checkOffsets )
408 {
409 log_verbose("\nOffsets:");
410 log_verbose(offset_str);
411 }
412 log_verbose("\n");
413 }
414 ucnv_close(conv);
415
416 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
417
418 if (checkOffsets && (expectOffsets != 0))
419 {
420 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t)))
421 {
422 log_err("\n\ndid not get the expected offsets while %s \n", gNuConvTestName);
423 log_err("\nGot : ");
424 for(p=junkout;p<targ;p++)
425 log_err("%d, ", junokout[p-junkout]);
426 log_err("\nExpected: ");
427 for(i=0; i<(targ-junkout); i++)
428 log_err("%d,", expectOffsets[i]);
429 log_err("");
430 for(i=0; i<(targ-junkout); i++)
431 log_err("0x%04X,", junkout[i]);
432 log_err("");
433 for(i=0; i<(src-(const char *)source); i++)
434 log_err("0x%04X,", (unsigned char)source[i]);
435 }
436 }
437
438 if(!memcmp(junkout, expect, expectlen*2))
439 {
440 log_verbose("Matches!\n");
441 return true;
442 }
443 else
444 {
445 log_err("String does not match. %s\n", gNuConvTestName);
446 log_verbose("String does not match. %s\n", gNuConvTestName);
447 printUSeqErr(junkout, expectlen);
448 printf("\n");
449 printUSeqErr(expect, expectlen);
450 return false;
451 }
452 }
453
454
455
TestConvertFallBackWithBufferSizes(int32_t outsize,int32_t insize)456 static void TestConvertFallBackWithBufferSizes(int32_t outsize, int32_t insize )
457 {
458
459 static const UChar SBCSText[] =
460 { 0x0021, 0xFF01, 0x0022, 0xFF02, 0x0023, 0xFF03, 0x003A, 0xFF1A, 0x003B, 0xFF1B, 0x003C, 0xFF1C };
461 /* 21, ?, 22, ?, 23, ?, 3a, ?, 3b, ?, 3c, ? SBCS*/
462 static const uint8_t expectedNative[] =
463 { 0x21, 0x21, 0x22, 0x22, 0x23, 0x23, 0x3a, 0x3a, 0x3b, 0x3b, 0x3c, 0x3c};
464 static const UChar retrievedSBCSText[]=
465 { 0x0021, 0x0021, 0x0022, 0x0022, 0x0023, 0x0023, 0x003A, 0x003A, 0x003B, 0x003B, 0x003C, 0x003C };
466 static const int32_t toNativeOffs [] =
467 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b};
468 static const int32_t fromNativeoffs [] =
469 { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11};
470
471
472 /* 1363 isn't DBCS, but it has the DBCS section */
473 static const UChar DBCSText[] =
474 { 0x00a1, 0x00ad, 0x2010, 0x00b7, 0x30fb};
475 static const uint8_t expectedIBM1363_DBCS[] =
476 { 0xa2, 0xae, 0xa1 ,0xa9, 0xa1, 0xa9,0xa1 ,0xa4, 0xa1, 0xa4};
477 static const UChar retrievedDBCSText[]=
478 { 0x00a1, 0x2010, 0x2010, 0x30fb, 0x30fb };
479 static const int32_t toIBM1363Offs_DBCS[] =
480 { 0x00, 0x00, 0x01,0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04};
481 static const int32_t fromIBM1363offs_DBCS[] =
482 { 0, 2, 4, 6, 8};
483
484
485 static const UChar MBCSText[] =
486 { 0x0001, 0x263a, 0x2013, 0x2014, 0x263b, 0x0002};
487 static const uint8_t expectedIBM950[] =
488 { 0x01, 0x01, 0xa1, 0x56, 0xa1, 0x56, 0x02, 0x02};
489 static const UChar retrievedMBCSText[]=
490 { 0x0001, 0x0001, 0x2014, 0x2014, 0x0002, 0x0002};
491 static const int32_t toIBM950Offs [] =
492 { 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x05};
493 static const int32_t fromIBM950offs [] =
494 { 0, 1, 2, 4, 6, 7};
495
496 static const UChar MBCSText1363[] =
497 { 0x0005,
498 0xffe8,
499 0x0007,
500 0x2022,
501 0x005c,
502 0x00b7,
503 0x3016,
504 0x30fb,
505 0x9a36};
506 static const uint8_t expectedIBM1363[] =
507 { 0x05,
508 0x05,
509 0x07,
510 0x07,
511 0x7f,
512 0xa1, 0xa4,
513 0xa1, 0xe0,
514 0xa1, 0xa4,
515 0xf5, 0xe2};
516 static const UChar retrievedMBCSText1363[]=
517 { 0x0005, 0x0005, 0x0007, 0x0007, 0x001a, 0x30fb, 0x25a1, 0x30fb, 0x9a36};
518 static const int32_t toIBM1363Offs [] =
519 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08};
520 static const int32_t fromIBM1363offs [] =
521 { 0, 1, 2, 3, 4, 5, 7, 9, 11};
522
523
524
525 static const char* nativeCodePage[]={
526 /*NLCS Mapping*/
527 "ibm-437",
528 "ibm-850",
529 "ibm-878",
530 "ibm-923",
531 "ibm-1051",
532 "ibm-1089",
533 "ibm-1250",
534 "ibm-1251",
535 "ibm-1253",
536 "ibm-1254",
537 "ibm-1255",
538 "ibm-1256",
539 "ibm-1257",
540 "ibm-1258",
541 "ibm-1276"
542 };
543
544 int32_t i=0;
545 gInBufferSize = insize;
546 gOutBufferSize = outsize;
547
548 for(i=0; i<UPRV_LENGTHOF(nativeCodePage); i++){
549 log_verbose("Testing %s\n", nativeCodePage[i]);
550 if(!testConvertFromUnicode(SBCSText, UPRV_LENGTHOF(SBCSText),
551 expectedNative, sizeof(expectedNative), nativeCodePage[i], true, toNativeOffs ))
552 log_err("u-> %s(SBCS) with FallBack did not match.\n", nativeCodePage[i]);
553
554 if(!testConvertToUnicode(expectedNative, sizeof(expectedNative),
555 retrievedSBCSText, UPRV_LENGTHOF(retrievedSBCSText), nativeCodePage[i], true, fromNativeoffs ))
556 log_err("%s->u(SBCS) with Fallback did not match.\n", nativeCodePage[i]);
557 }
558
559 /*DBCS*/
560 if(!testConvertFromUnicode(DBCSText, UPRV_LENGTHOF(DBCSText),
561 expectedIBM1363_DBCS, sizeof(expectedIBM1363_DBCS), "ibm-1363", true, toIBM1363Offs_DBCS ))
562 log_err("u-> ibm-1363(DBCS portion) with FallBack did not match.\n");
563
564 if(!testConvertToUnicode(expectedIBM1363_DBCS, sizeof(expectedIBM1363_DBCS),
565 retrievedDBCSText, UPRV_LENGTHOF(retrievedDBCSText),"ibm-1363", true, fromIBM1363offs_DBCS ))
566 log_err("ibm-1363->u(DBCS portion) with Fallback did not match.\n");
567
568
569 /*MBCS*/
570 if(!testConvertFromUnicode(MBCSText, UPRV_LENGTHOF(MBCSText),
571 expectedIBM950, sizeof(expectedIBM950), "ibm-950", true, toIBM950Offs ))
572 log_err("u-> ibm-950(MBCS) with FallBack did not match.\n");
573
574 if(!testConvertToUnicode(expectedIBM950, sizeof(expectedIBM950),
575 retrievedMBCSText, UPRV_LENGTHOF(retrievedMBCSText),"ibm-950", true, fromIBM950offs ))
576 log_err("ibm-950->u(MBCS) with Fallback did not match.\n");
577
578 /*commented until data table is available*/
579 log_verbose("toUnicode fallback with fallback data for MBCS\n");
580 {
581 const uint8_t IBM950input[] = {
582 0xf4, 0x87, 0xa4, 0x4a, 0xf4, 0x88, 0xa4, 0x4b,
583 0xf9, 0x92, 0xdc, 0xb0, };
584 UChar expectedUnicodeText[]= { 0x5165, 0x5165, 0x516b, 0x516b, 0x9ef9, 0x9ef9};
585 int32_t fromIBM950inputOffs [] = { 0, 2, 4, 6, 8, 10};
586 /* for testing reverse fallback behavior */
587 UChar expectedFallbackFalse[]= { 0x5165, 0x5165, 0x516b, 0x516b, 0x9ef9, 0x9ef9};
588
589 if(!testConvertToUnicode(IBM950input, sizeof(IBM950input),
590 expectedUnicodeText, UPRV_LENGTHOF(expectedUnicodeText),"ibm-950", true, fromIBM950inputOffs ))
591 log_err("ibm-950->u(MBCS) with Fallback did not match.\n");
592 if(!testConvertToUnicode(IBM950input, sizeof(IBM950input),
593 expectedFallbackFalse, UPRV_LENGTHOF(expectedFallbackFalse),"ibm-950", false, fromIBM950inputOffs ))
594 log_err("ibm-950->u(MBCS) with Fallback did not match.\n");
595
596 }
597 log_verbose("toUnicode fallback with fallback data for euc-tw\n");
598 {
599 const uint8_t euc_tw_input[] = {
600 0xA7, 0xCC, 0x8E, 0xA2, 0xA1, 0xAB,
601 0xA8, 0xC7, 0xC8, 0xDE,
602 0xA8, 0xCD, 0x8E, 0xA2, 0xA2, 0xEA,};
603 UChar expectedUnicodeText[]= { 0x5C6E, 0x5C6E, 0x81FC, 0x81FC, 0x8278, 0x8278};
604 int32_t from_euc_tw_offs [] = { 0, 2, 6, 8, 10, 12};
605 /* for testing reverse fallback behavior */
606 UChar expectedFallbackFalse[]= { 0x5C6E, 0x5C6E, 0x81FC, 0x81FC, 0x8278, 0x8278};
607
608 if(!testConvertToUnicode(euc_tw_input, sizeof(euc_tw_input),
609 expectedUnicodeText, UPRV_LENGTHOF(expectedUnicodeText),"euc-tw", true, from_euc_tw_offs ))
610 log_err("from euc-tw->u with Fallback did not match.\n");
611
612 if(!testConvertToUnicode(euc_tw_input, sizeof(euc_tw_input),
613 expectedFallbackFalse, UPRV_LENGTHOF(expectedFallbackFalse),"euc-tw", false, from_euc_tw_offs ))
614 log_err("from euc-tw->u with Fallback false did not match.\n");
615
616
617 }
618 log_verbose("fromUnicode to euc-tw with fallback data euc-tw\n");
619 {
620 UChar inputText[]= { 0x0001, 0x008e, 0x203e, 0x2223, 0xff5c, 0x5296,
621 0x5C6E, 0x5C6E, 0x81FC, 0x81FC, 0x8278, 0x8278, 0xEDEC};
622 const uint8_t expected_euc_tw[] = {
623 0x01, 0x1a, 0xa2, 0xa3,
624 0xa2, 0xde, 0xa2, 0xde,
625 0x8e, 0xa2, 0xe5, 0xb9,
626 0x8e, 0xa2, 0xa1, 0xab, 0x8e, 0xa2, 0xa1, 0xab,
627 0xc8, 0xde, 0xc8, 0xde,
628 0x8e, 0xa2, 0xa2, 0xea, 0x8e, 0xa2, 0xa2, 0xea,
629 0x8e, 0xac, 0xc6, 0xf7};
630 int32_t to_euc_tw_offs [] = { 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 5, 5, 6, 6,
631 6, 6, 7, 7, 7, 7, 8, 8, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12};
632
633 if(!testConvertFromUnicode(inputText, UPRV_LENGTHOF(inputText),
634 expected_euc_tw, sizeof(expected_euc_tw), "euc-tw", true, to_euc_tw_offs ))
635 log_err("u-> euc-tw with FallBack did not match.\n");
636
637 }
638
639 /*MBCS 1363*/
640 if(!testConvertFromUnicode(MBCSText1363, UPRV_LENGTHOF(MBCSText1363),
641 expectedIBM1363, sizeof(expectedIBM1363), "ibm-1363", true, toIBM1363Offs ))
642 log_err("u-> ibm-1363(MBCS) with FallBack did not match.\n");
643
644 if(!testConvertToUnicode(expectedIBM1363, sizeof(expectedIBM1363),
645 retrievedMBCSText1363, UPRV_LENGTHOF(retrievedMBCSText1363),"ibm-1363", true, fromIBM1363offs ))
646 log_err("ibm-1363->u(MBCS) with Fallback did not match.\n");
647
648
649 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
650 which is test file for MBCS conversion with single-byte codepage data.*/
651 {
652
653 /* MBCS with single byte codepage data test1.ucm*/
654 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xdbba, 0xdfcd, 0x0003};
655 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0x08, 0xff,};
656 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, 7};
657
658 const uint8_t test1input[] = { 0x00, 0x05, 0x06, 0x07, 0x08, 0x09};
659 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xfffd, 0xfffd, 0xfffe};
660 int32_t fromtest1Offs[] = { 0, 1, 2, 3, 3, 4,5};
661
662 /*from Unicode*/
663 if(!testConvertFromUnicode(unicodeInput, UPRV_LENGTHOF(unicodeInput),
664 expectedtest1, sizeof(expectedtest1), "@test1", true, totest1Offs ))
665 log_err("u-> test1(MBCS conversion with single-byte) did not match.\n");
666
667 /*to Unicode*/
668 if(!testConvertToUnicode(test1input, sizeof(test1input),
669 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test1", true, fromtest1Offs ))
670 log_err("test1(MBCS conversion with single-byte) -> u did not match.\n");
671
672 }
673
674 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
675 which is test file for MBCS conversion with three-byte codepage data.*/
676 {
677
678 /* MBCS with three byte codepage data test3.ucm*/
679 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0xdbba, 0xdfcd, 0x000b, 0xd84d, 0xdc56, 0x000e, 0x0003, };
680 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x07, 0xff, 0x01, 0x02, 0x0b, 0x01, 0x02, 0x0a, 0xff, 0xff,};
681 int32_t totest3Offs[] = { 0, 1, 2, 3, 5, 7, 7, 7, 8, 8, 8, 10, 11};
682
683 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a,
684 0x01, 0x02, 0x0e, 0x01, 0x02, 0x0d, 0x03, 0x01, 0x02, 0x0f,};
685 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56,
686 0x000e, 0xd891, 0xdd67, 0xfffd, 0xfffd };
687 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10, 13, 13, 16, 17};
688
689 /*from Unicode*/
690 if(!testConvertFromUnicode(unicodeInput, UPRV_LENGTHOF(unicodeInput),
691 expectedtest3, sizeof(expectedtest3), "@test3", true, totest3Offs ))
692 log_err("u-> test3(MBCS conversion with three-byte) did not match.\n");
693
694 /*to Unicode*/
695 if(!testConvertToUnicode(test3input, sizeof(test3input),
696 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test3", true, fromtest3Offs ))
697 log_err("test3(MBCS conversion with three-byte) -> u did not match.\n");
698
699 }
700
701 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
702 which is test file for MBCS conversion with four-byte codepage data.*/
703 {
704
705 /* MBCS with three byte codepage data test4.ucm*/
706 const UChar unicodeInput[] =
707 { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xdbba, 0xdfcd,
708 0xd84d, 0xdc56, 0x000e, 0xd891, 0xdd67, 0x000f};
709 const uint8_t expectedtest4[] =
710 { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0xff,
711 0x01, 0x02, 0x03, 0x0a, 0xff, 0xff, 0xff};
712 int32_t totest4Offs[] =
713 { 0, 1, 2, 3, 3, 3, 3, 4, 6, 8, 8, 8, 8, 10, 11, 13};
714
715 const uint8_t test4input[] =
716 { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x08,
717 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0e, 0x01, 0x02, 0x03, 0x0d, 0x03, 0x01, 0x02, 0x03, 0x0c,};
718 const UChar expectedUnicode[] =
719 { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xdbba, 0xdfcd,
720 0xd84d, 0xdc56, 0x000e, 0xd891, 0xdd67, 0x1a, 0xfffd};
721 int32_t fromtest4Offs[] =
722 { 0, 1, 2, 3, 7, 7, 8, 8, 9, 9, 13, 17, 17, 21, 22,};
723
724 /*from Unicode*/
725 if(!testConvertFromUnicode(unicodeInput, UPRV_LENGTHOF(unicodeInput),
726 expectedtest4, sizeof(expectedtest4), "@test4", true, totest4Offs ))
727 log_err("u-> test4(MBCS conversion with four-byte) did not match.\n");
728
729 /*to Unicode*/
730 if(!testConvertToUnicode(test4input, sizeof(test4input),
731 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test4", true, fromtest4Offs ))
732 log_err("test4(MBCS conversion with four-byte) -> u did not match.\n");
733
734 }
735 /* Test for jitterbug 509 EBCDIC_STATEFUL Converters*/
736 {
737 const UChar unicodeInput[] = {0x00AF, 0x2013, 0x2223, 0x004C, 0x5F5D, 0xFF5E };
738 const uint8_t expectedtest1[] = {0x0E,0x42,0xA1, 0x44,0x4A, 0x42,0x4F, 0x0F,0xD3, 0x0E,0x65,0x60, 0x43,0xA1,0x0f };
739 int32_t totest1Offs[] = {0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5 };
740 const uint8_t test1input[] = {0x0E,0x42,0xA1, 0x44,0x4A, 0x42,0x4F, 0x0F,0xD3, 0x0E,0x65,0x60, 0x43,0xA1 };
741 const UChar expectedUnicode[] = {0x203e, 0x2014, 0xff5c, 0x004c, 0x5f5e, 0x223c };
742 int32_t fromtest1Offs[] = {1, 3, 5, 8, 10, 12 };
743 /*from Unicode*/
744 if(!testConvertFromUnicode(unicodeInput, UPRV_LENGTHOF(unicodeInput),
745 expectedtest1, sizeof(expectedtest1), "ibm-1371", true, totest1Offs ))
746 log_err("u-> ibm-1371(MBCS conversion with single-byte) did not match.,\n");
747 /*to Unicode*/
748 if(!testConvertToUnicode(test1input, sizeof(test1input),
749 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "ibm-1371", true, fromtest1Offs ))
750 log_err("ibm-1371(MBCS conversion with single-byte) -> u did not match.,\n");
751 }
752
753 }
754 #endif
755