1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*******************************************************************************
9 *
10 * File nucnvtst.c
11 *
12 * Modification History:
13 * Name Description
14 * Steven R. Loomis 7/8/1999 Adding input buffer test
15 ********************************************************************************
16 */
17 #include <stdio.h>
18 #include "cstring.h"
19 #include "unicode/uloc.h"
20 #include "unicode/ucnv.h"
21 #include "unicode/ucnv_err.h"
22 #include "unicode/ucnv_cb.h"
23 #include "cintltst.h"
24 #include "unicode/utypes.h"
25 #include "unicode/ustring.h"
26 #include "unicode/ucol.h"
27 #include "unicode/utf16.h"
28 #include "cmemory.h"
29 #include "nucnvtst.h"
30
31 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
32 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
33 #if !UCONFIG_NO_COLLATION
34 static void TestJitterbug981(void);
35 #endif
36 #if !UCONFIG_NO_LEGACY_CONVERSION
37 static void TestJitterbug1293(void);
38 #endif
39 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
40 static void TestConverterTypesAndStarters(void);
41 static void TestAmbiguous(void);
42 static void TestSignatureDetection(void);
43 static void TestUTF7(void);
44 static void TestIMAP(void);
45 static void TestUTF8(void);
46 static void TestCESU8(void);
47 static void TestUTF16(void);
48 static void TestUTF16BE(void);
49 static void TestUTF16LE(void);
50 static void TestUTF32(void);
51 static void TestUTF32BE(void);
52 static void TestUTF32LE(void);
53 static void TestLATIN1(void);
54
55 #if !UCONFIG_NO_LEGACY_CONVERSION
56 static void TestSBCS(void);
57 static void TestDBCS(void);
58 static void TestMBCS(void);
59 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
60 static void TestICCRunout(void);
61 #endif
62
63 #ifdef U_ENABLE_GENERIC_ISO_2022
64 static void TestISO_2022(void);
65 #endif
66
67 static void TestISO_2022_JP(void);
68 static void TestISO_2022_JP_1(void);
69 static void TestISO_2022_JP_2(void);
70 static void TestISO_2022_KR(void);
71 static void TestISO_2022_KR_1(void);
72 static void TestISO_2022_CN(void);
73 #if 0
74 /*
75 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
76 */
77 static void TestISO_2022_CN_EXT(void);
78 #endif
79 static void TestJIS(void);
80 static void TestHZ(void);
81 #endif
82
83 static void TestSCSU(void);
84
85 #if !UCONFIG_NO_LEGACY_CONVERSION
86 static void TestEBCDIC_STATEFUL(void);
87 static void TestGB18030(void);
88 static void TestLMBCS(void);
89 static void TestJitterbug255(void);
90 static void TestEBCDICUS4XML(void);
91 #if 0
92 /*
93 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
94 */
95 static void TestJitterbug915(void);
96 #endif
97 static void TestISCII(void);
98
99 static void TestCoverageMBCS(void);
100 static void TestJitterbug2346(void);
101 static void TestJitterbug2411(void);
102 static void TestJB5275(void);
103 static void TestJB5275_1(void);
104 static void TestJitterbug6175(void);
105
106 static void TestIsFixedWidth(void);
107 #endif
108
109 static void TestInBufSizes(void);
110
111 static void TestRoundTrippingAllUTF(void);
112 static void TestConv(const uint16_t in[],
113 int len,
114 const char* conv,
115 const char* lang,
116 char byteArr[],
117 int byteArrLen);
118
119 /* open a converter, using test data if it begins with '@' */
120 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
121
122
123 #define NEW_MAX_BUFFER 999
124
125 static int32_t gInBufferSize = NEW_MAX_BUFFER;
126 static int32_t gOutBufferSize = NEW_MAX_BUFFER;
127 static char gNuConvTestName[1024];
128
129 #define nct_min(x,y) ((x<y) ? x : y)
130
my_ucnv_open(const char * cnv,UErrorCode * err)131 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
132 {
133 if(cnv && cnv[0] == '@') {
134 return ucnv_openPackage(loadTestData(err), cnv+1, err);
135 } else {
136 return ucnv_open(cnv, err);
137 }
138 }
139
printSeq(const unsigned char * a,int len)140 static void printSeq(const unsigned char* a, int len)
141 {
142 int i=0;
143 log_verbose("{");
144 while (i<len)
145 log_verbose("0x%02x ", a[i++]);
146 log_verbose("}\n");
147 }
148
printUSeq(const UChar * a,int len)149 static void printUSeq(const UChar* a, int len)
150 {
151 int i=0;
152 log_verbose("{U+");
153 while (i<len) log_verbose("0x%04x ", a[i++]);
154 log_verbose("}\n");
155 }
156
printSeqErr(const unsigned char * a,int len)157 static void printSeqErr(const unsigned char* a, int len)
158 {
159 int i=0;
160 fprintf(stderr, "{");
161 while (i<len)
162 fprintf(stderr, "0x%02x ", a[i++]);
163 fprintf(stderr, "}\n");
164 }
165
printUSeqErr(const UChar * a,int len)166 static void printUSeqErr(const UChar* a, int len)
167 {
168 int i=0;
169 fprintf(stderr, "{U+");
170 while (i<len)
171 fprintf(stderr, "0x%04x ", a[i++]);
172 fprintf(stderr,"}\n");
173 }
174
175 static void
TestNextUChar(UConverter * cnv,const char * source,const char * limit,const int32_t results[],const char * message)176 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
177 {
178 const char* s0;
179 const char* s=(char*)source;
180 const int32_t *r=results;
181 UErrorCode errorCode=U_ZERO_ERROR;
182 UChar32 c;
183
184 while(s<limit) {
185 s0=s;
186 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
187 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
188 break; /* no more significant input */
189 } else if(U_FAILURE(errorCode)) {
190 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
191 break;
192 } else if(
193 /* test the expected number of input bytes only if >=0 */
194 (*r>=0 && (int32_t)(s-s0)!=*r) ||
195 c!=*(r+1)
196 ) {
197 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
198 message, c, (s-s0), *(r+1), *r);
199 break;
200 }
201 r+=2;
202 }
203 }
204
205 static void
TestNextUCharError(UConverter * cnv,const char * source,const char * limit,UErrorCode expected,const char * message)206 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
207 {
208 const char* s=(char*)source;
209 UErrorCode errorCode=U_ZERO_ERROR;
210 uint32_t c;
211 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
212 if(errorCode != expected){
213 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
214 }
215 if(c != 0xFFFD && c != 0xffff){
216 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
217 }
218
219 }
220
TestInBufSizes(void)221 static void TestInBufSizes(void)
222 {
223 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
224 #if 1
225 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
226 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
227 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
228 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
229 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
230 TestNewConvertWithBufferSizes(1,1);
231 TestNewConvertWithBufferSizes(2,3);
232 TestNewConvertWithBufferSizes(3,2);
233 #endif
234 }
235
TestOutBufSizes(void)236 static void TestOutBufSizes(void)
237 {
238 #if 1
239 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
240 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
241 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
242 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
243 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
244 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
245
246 #endif
247 }
248
249
addTestNewConvert(TestNode ** root)250 void addTestNewConvert(TestNode** root)
251 {
252 #if !UCONFIG_NO_FILE_IO
253 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
254 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
255 #endif
256 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
257 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
258 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
259 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
260 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
261 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
262
263 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
264 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
265 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
266 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
267 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
268 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
269 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
270 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
271
272 #if !UCONFIG_NO_LEGACY_CONVERSION
273 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
274 #endif
275
276 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
277
278 #if !UCONFIG_NO_LEGACY_CONVERSION
279 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
280 #if !UCONFIG_NO_FILE_IO
281 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
282 addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
283 #endif
284 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
285
286 #ifdef U_ENABLE_GENERIC_ISO_2022
287 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
288 #endif
289
290 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
291 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
292 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
293 // android-changed (no have ISO_2022_JP_2) -- addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
294 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
295 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
296 // android-changed (no ISO-2022-CN) -- addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
297 /*
298 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
299 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
300 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
301 */
302 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
303 #endif
304
305 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
306
307 #if !UCONFIG_NO_LEGACY_CONVERSION
308 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
309 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
310 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
311 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
312 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
313 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
314 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
315 #if !UCONFIG_NO_COLLATION
316 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
317 #endif
318
319 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
320 #endif
321
322
323 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
324 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
325 #endif
326
327 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
328
329 #if !UCONFIG_NO_LEGACY_CONVERSION
330 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
331 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
332 // android-removed (no full ISO2022 CJK tables) -- addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
333 addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth");
334 #endif
335 }
336
337
338 /* Note that this test already makes use of statics, so it's not really
339 multithread safe.
340 This convenience function lets us make the error messages actually useful.
341 */
342
setNuConvTestName(const char * codepage,const char * direction)343 static void setNuConvTestName(const char *codepage, const char *direction)
344 {
345 sprintf(gNuConvTestName, "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
346 codepage,
347 direction,
348 (int)gInBufferSize,
349 (int)gOutBufferSize);
350 }
351
352 typedef enum
353 {
354 TC_OK = 0, /* test was OK */
355 TC_MISMATCH = 1, /* Match failed - err was printed */
356 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */
357 } ETestConvertResult;
358
359 /* Note: This function uses global variables and it will not do offset
360 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
testConvertFromU(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,const int32_t * expectOffsets,UBool useFallback)361 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
362 const char *codepage, const int32_t *expectOffsets , UBool useFallback)
363 {
364 UErrorCode status = U_ZERO_ERROR;
365 UConverter *conv = 0;
366 char junkout[NEW_MAX_BUFFER]; /* FIX */
367 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
368 char *p;
369 const UChar *src;
370 char *end;
371 char *targ;
372 int32_t *offs;
373 int i;
374 int32_t realBufferSize;
375 char *realBufferEnd;
376 const UChar *realSourceEnd;
377 const UChar *sourceLimit;
378 UBool checkOffsets = TRUE;
379 UBool doFlush;
380
381 for(i=0;i<NEW_MAX_BUFFER;i++)
382 junkout[i] = (char)0xF0;
383 for(i=0;i<NEW_MAX_BUFFER;i++)
384 junokout[i] = 0xFF;
385
386 setNuConvTestName(codepage, "FROM");
387
388 log_verbose("\n========= %s\n", gNuConvTestName);
389
390 conv = my_ucnv_open(codepage, &status);
391
392 if(U_FAILURE(status))
393 {
394 log_data_err("Couldn't open converter %s\n",codepage);
395 return TC_FAIL;
396 }
397 if(useFallback){
398 ucnv_setFallback(conv,useFallback);
399 }
400
401 log_verbose("Converter opened..\n");
402
403 src = source;
404 targ = junkout;
405 offs = junokout;
406
407 realBufferSize = UPRV_LENGTHOF(junkout);
408 realBufferEnd = junkout + realBufferSize;
409 realSourceEnd = source + sourceLen;
410
411 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
412 checkOffsets = FALSE;
413
414 do
415 {
416 end = nct_min(targ + gOutBufferSize, realBufferEnd);
417 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
418
419 doFlush = (UBool)(sourceLimit == realSourceEnd);
420
421 if(targ == realBufferEnd) {
422 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
423 return TC_FAIL;
424 }
425 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"TRUE":"FALSE");
426
427
428 status = U_ZERO_ERROR;
429
430 ucnv_fromUnicode (conv,
431 &targ,
432 end,
433 &src,
434 sourceLimit,
435 checkOffsets ? offs : NULL,
436 doFlush, /* flush if we're at the end of the input data */
437 &status);
438 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
439
440 if(U_FAILURE(status)) {
441 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
442 return TC_FAIL;
443 }
444
445 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
446 sourceLen, targ-junkout);
447
448 if(getTestOption(VERBOSITY_OPTION))
449 {
450 char junk[9999];
451 char offset_str[9999];
452 char *ptr;
453
454 junk[0] = 0;
455 offset_str[0] = 0;
456 for(ptr = junkout;ptr<targ;ptr++) {
457 sprintf(junk + strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
458 sprintf(offset_str + strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
459 }
460
461 log_verbose(junk);
462 printSeq((const uint8_t *)expect, expectLen);
463 if ( checkOffsets ) {
464 log_verbose("\nOffsets:");
465 log_verbose(offset_str);
466 }
467 log_verbose("\n");
468 }
469 ucnv_close(conv);
470
471 if(expectLen != targ-junkout) {
472 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
473 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
474 fprintf(stderr, "Got:\n");
475 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
476 fprintf(stderr, "Expected:\n");
477 printSeqErr((const unsigned char*)expect, expectLen);
478 return TC_MISMATCH;
479 }
480
481 if (checkOffsets && (expectOffsets != 0) ) {
482 log_verbose("comparing %d offsets..\n", targ-junkout);
483 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
484 log_err("did not get the expected offsets. %s\n", gNuConvTestName);
485 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
486 log_err("\n");
487 log_err("Got : ");
488 for(p=junkout;p<targ;p++) {
489 log_err("%d,", junokout[p-junkout]);
490 }
491 log_err("\n");
492 log_err("Expected: ");
493 for(i=0; i<(targ-junkout); i++) {
494 log_err("%d,", expectOffsets[i]);
495 }
496 log_err("\n");
497 }
498 }
499
500 log_verbose("comparing..\n");
501 if(!memcmp(junkout, expect, expectLen)) {
502 log_verbose("Matches!\n");
503 return TC_OK;
504 } else {
505 log_err("String does not match u->%s\n", gNuConvTestName);
506 printUSeqErr(source, sourceLen);
507 fprintf(stderr, "Got:\n");
508 printSeqErr((const unsigned char *)junkout, expectLen);
509 fprintf(stderr, "Expected:\n");
510 printSeqErr((const unsigned char *)expect, expectLen);
511
512 return TC_MISMATCH;
513 }
514 }
515
516 /* Note: This function uses global variables and it will not do offset
517 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
testConvertToU(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,const int32_t * expectOffsets,UBool useFallback)518 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
519 const char *codepage, const int32_t *expectOffsets, UBool useFallback)
520 {
521 UErrorCode status = U_ZERO_ERROR;
522 UConverter *conv = 0;
523 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
524 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
525 const char *src;
526 const char *realSourceEnd;
527 const char *srcLimit;
528 UChar *p;
529 UChar *targ;
530 UChar *end;
531 int32_t *offs;
532 int i;
533 UBool checkOffsets = TRUE;
534
535 int32_t realBufferSize;
536 UChar *realBufferEnd;
537
538
539 for(i=0;i<NEW_MAX_BUFFER;i++)
540 junkout[i] = 0xFFFE;
541
542 for(i=0;i<NEW_MAX_BUFFER;i++)
543 junokout[i] = -1;
544
545 setNuConvTestName(codepage, "TO");
546
547 log_verbose("\n========= %s\n", gNuConvTestName);
548
549 conv = my_ucnv_open(codepage, &status);
550
551 if(U_FAILURE(status))
552 {
553 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
554 return TC_FAIL;
555 }
556 if(useFallback){
557 ucnv_setFallback(conv,useFallback);
558 }
559 log_verbose("Converter opened..\n");
560
561 src = (const char *)source;
562 targ = junkout;
563 offs = junokout;
564
565 realBufferSize = UPRV_LENGTHOF(junkout);
566 realBufferEnd = junkout + realBufferSize;
567 realSourceEnd = src + sourcelen;
568
569 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
570 checkOffsets = FALSE;
571
572 do
573 {
574 end = nct_min( targ + gOutBufferSize, realBufferEnd);
575 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
576
577 if(targ == realBufferEnd)
578 {
579 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
580 return TC_FAIL;
581 }
582 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
583
584 /* oldTarg = targ; */
585
586 status = U_ZERO_ERROR;
587
588 ucnv_toUnicode (conv,
589 &targ,
590 end,
591 &src,
592 srcLimit,
593 checkOffsets ? offs : NULL,
594 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of hte source data */
595 &status);
596
597 /* offs += (targ-oldTarg); */
598
599 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
600
601 if(U_FAILURE(status))
602 {
603 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
604 return TC_FAIL;
605 }
606
607 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
608 sourcelen, targ-junkout);
609 if(getTestOption(VERBOSITY_OPTION))
610 {
611 char junk[9999];
612 char offset_str[9999];
613 UChar *ptr;
614
615 junk[0] = 0;
616 offset_str[0] = 0;
617
618 for(ptr = junkout;ptr<targ;ptr++)
619 {
620 sprintf(junk + strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
621 sprintf(offset_str + strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
622 }
623
624 log_verbose(junk);
625 printUSeq(expect, expectlen);
626 if ( checkOffsets )
627 {
628 log_verbose("\nOffsets:");
629 log_verbose(offset_str);
630 }
631 log_verbose("\n");
632 }
633 ucnv_close(conv);
634
635 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
636
637 if (checkOffsets && (expectOffsets != 0))
638 {
639 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
640 log_err("did not get the expected offsets. %s\n",gNuConvTestName);
641 log_err("Got: ");
642 for(p=junkout;p<targ;p++) {
643 log_err("%d,", junokout[p-junkout]);
644 }
645 log_err("\n");
646 log_err("Expected: ");
647 for(i=0; i<(targ-junkout); i++) {
648 log_err("%d,", expectOffsets[i]);
649 }
650 log_err("\n");
651 log_err("output: ");
652 for(i=0; i<(targ-junkout); i++) {
653 log_err("%X,", junkout[i]);
654 }
655 log_err("\n");
656 log_err("input: ");
657 for(i=0; i<(src-(const char *)source); i++) {
658 log_err("%X,", (unsigned char)source[i]);
659 }
660 log_err("\n");
661 }
662 }
663
664 if(!memcmp(junkout, expect, expectlen*2))
665 {
666 log_verbose("Matches!\n");
667 return TC_OK;
668 }
669 else
670 {
671 log_err("String does not match. %s\n", gNuConvTestName);
672 log_verbose("String does not match. %s\n", gNuConvTestName);
673 printf("\nGot:");
674 printUSeqErr(junkout, expectlen);
675 printf("\nExpected:");
676 printUSeqErr(expect, expectlen);
677 return TC_MISMATCH;
678 }
679 }
680
681
TestNewConvertWithBufferSizes(int32_t outsize,int32_t insize)682 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
683 {
684 /** test chars #1 */
685 /* 1 2 3 1Han 2Han 3Han . */
686 static const UChar sampleText[] =
687 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
688 static const UChar sampleTextRoundTripUnmappable[] =
689 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
690
691
692 static const uint8_t expectedUTF8[] =
693 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
694 static const int32_t toUTF8Offs[] =
695 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
696 static const int32_t fmUTF8Offs[] =
697 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
698
699 #ifdef U_ENABLE_GENERIC_ISO_2022
700 /* Same as UTF8, but with ^[%B preceeding */
701 static const const uint8_t expectedISO2022[] =
702 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
703 static const int32_t toISO2022Offs[] =
704 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
705 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
706 static const int32_t fmISO2022Offs[] =
707 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
708 #endif
709
710 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
711 static const uint8_t expectedIBM930[] =
712 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
713 static const int32_t toIBM930Offs[] =
714 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
715 static const int32_t fmIBM930Offs[] =
716 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
717
718 /* 1 2 3 0 h1 h2 h3 . MBCS*/
719 static const uint8_t expectedIBM943[] =
720 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
721 static const int32_t toIBM943Offs [] =
722 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
723 static const int32_t fmIBM943Offs[] =
724 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
725
726 /* 1 2 3 0 h1 h2 h3 . DBCS*/
727 static const uint8_t expectedIBM9027[] =
728 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
729 static const int32_t toIBM9027Offs [] =
730 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
731
732 /* 1 2 3 0 <?> <?> <?> . SBCS*/
733 static const uint8_t expectedIBM920[] =
734 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
735 static const int32_t toIBM920Offs [] =
736 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
737
738 /* 1 2 3 0 <?> <?> <?> . SBCS*/
739 static const uint8_t expectedISO88593[] =
740 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
741 static const int32_t toISO88593Offs[] =
742 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
743
744 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
745 static const uint8_t expectedLATIN1[] =
746 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
747 static const int32_t toLATIN1Offs[] =
748 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
749
750
751 /* etc */
752 static const uint8_t expectedUTF16BE[] =
753 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
754 static const int32_t toUTF16BEOffs[]=
755 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
756 static const int32_t fmUTF16BEOffs[] =
757 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
758
759 static const uint8_t expectedUTF16LE[] =
760 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
761 static const int32_t toUTF16LEOffs[]=
762 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
763 static const int32_t fmUTF16LEOffs[] =
764 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
765
766 static const uint8_t expectedUTF32BE[] =
767 { 0x00, 0x00, 0x00, 0x31,
768 0x00, 0x00, 0x00, 0x32,
769 0x00, 0x00, 0x00, 0x33,
770 0x00, 0x00, 0x00, 0x00,
771 0x00, 0x00, 0x4e, 0x00,
772 0x00, 0x00, 0x4e, 0x8c,
773 0x00, 0x00, 0x4e, 0x09,
774 0x00, 0x00, 0x00, 0x2e,
775 0x00, 0x02, 0x00, 0x21 };
776 static const int32_t toUTF32BEOffs[]=
777 { 0x00, 0x00, 0x00, 0x00,
778 0x01, 0x01, 0x01, 0x01,
779 0x02, 0x02, 0x02, 0x02,
780 0x03, 0x03, 0x03, 0x03,
781 0x04, 0x04, 0x04, 0x04,
782 0x05, 0x05, 0x05, 0x05,
783 0x06, 0x06, 0x06, 0x06,
784 0x07, 0x07, 0x07, 0x07,
785 0x08, 0x08, 0x08, 0x08,
786 0x08, 0x08, 0x08, 0x08 };
787 static const int32_t fmUTF32BEOffs[] =
788 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
789
790 static const uint8_t expectedUTF32LE[] =
791 { 0x31, 0x00, 0x00, 0x00,
792 0x32, 0x00, 0x00, 0x00,
793 0x33, 0x00, 0x00, 0x00,
794 0x00, 0x00, 0x00, 0x00,
795 0x00, 0x4e, 0x00, 0x00,
796 0x8c, 0x4e, 0x00, 0x00,
797 0x09, 0x4e, 0x00, 0x00,
798 0x2e, 0x00, 0x00, 0x00,
799 0x21, 0x00, 0x02, 0x00 };
800 static const int32_t toUTF32LEOffs[]=
801 { 0x00, 0x00, 0x00, 0x00,
802 0x01, 0x01, 0x01, 0x01,
803 0x02, 0x02, 0x02, 0x02,
804 0x03, 0x03, 0x03, 0x03,
805 0x04, 0x04, 0x04, 0x04,
806 0x05, 0x05, 0x05, 0x05,
807 0x06, 0x06, 0x06, 0x06,
808 0x07, 0x07, 0x07, 0x07,
809 0x08, 0x08, 0x08, 0x08,
810 0x08, 0x08, 0x08, 0x08 };
811 static const int32_t fmUTF32LEOffs[] =
812 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
813
814
815
816
817 /** Test chars #2 **/
818
819 /* Sahha [health], slashed h's */
820 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
821 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
822
823 /* LMBCS */
824 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
825 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
826 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
827 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
828 /*********************************** START OF CODE finally *************/
829
830 gInBufferSize = insize;
831 gOutBufferSize = outsize;
832
833 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
834
835
836 /*UTF-8*/
837 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
838 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,FALSE );
839
840 log_verbose("Test surrogate behaviour for UTF8\n");
841 {
842 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
843 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
844 0xf0, 0x90, 0x90, 0x81,
845 0xef, 0xbf, 0xbd
846 };
847 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
848 testConvertFromU(testinput, UPRV_LENGTHOF(testinput),
849 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,FALSE );
850
851
852 }
853
854 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
855 /*ISO-2022*/
856 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
857 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,FALSE );
858 #endif
859
860 /*UTF16 LE*/
861 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
862 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,FALSE );
863 /*UTF16 BE*/
864 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
865 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,FALSE );
866 /*UTF32 LE*/
867 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
868 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,FALSE );
869 /*UTF32 BE*/
870 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
871 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,FALSE );
872
873 /*LATIN_1*/
874 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
875 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,FALSE );
876
877 #if !UCONFIG_NO_LEGACY_CONVERSION
878 /*EBCDIC_STATEFUL*/
879 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
880 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,FALSE );
881
882 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
883 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
884
885 /*MBCS*/
886
887 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
888 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,FALSE );
889 /*DBCS*/
890 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
891 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,FALSE );
892 /*SBCS*/
893 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
894 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,FALSE );
895 /*SBCS*/
896 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
897 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,FALSE );
898 #endif
899
900
901 /****/
902
903 /*UTF-8*/
904 testConvertToU(expectedUTF8, sizeof(expectedUTF8),
905 sampleText, UPRV_LENGTHOF(sampleText), "utf8", fmUTF8Offs,FALSE);
906 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
907 /*ISO-2022*/
908 testConvertToU(expectedISO2022, sizeof(expectedISO2022),
909 sampleText, UPRV_LENGTHOF(sampleText), "ISO_2022", fmISO2022Offs,FALSE);
910 #endif
911
912 /*UTF16 LE*/
913 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
914 sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE);
915 /*UTF16 BE*/
916 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
917 sampleText, UPRV_LENGTHOF(sampleText), "utf-16be", fmUTF16BEOffs,FALSE);
918 /*UTF32 LE*/
919 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
920 sampleText, UPRV_LENGTHOF(sampleText), "utf-32le", fmUTF32LEOffs,FALSE);
921 /*UTF32 BE*/
922 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
923 sampleText, UPRV_LENGTHOF(sampleText), "utf-32be", fmUTF32BEOffs,FALSE);
924
925 #if !UCONFIG_NO_LEGACY_CONVERSION
926 /*EBCDIC_STATEFUL*/
927 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
928 UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-930", fmIBM930Offs,FALSE);
929 /*MBCS*/
930 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
931 UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-943", fmIBM943Offs,FALSE);
932 #endif
933
934 /* Try it again to make sure it still works */
935 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
936 sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,FALSE);
937
938 #if !UCONFIG_NO_LEGACY_CONVERSION
939 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
940 malteseUChars, UPRV_LENGTHOF(malteseUChars), "latin3", NULL,FALSE);
941
942 testConvertFromU(malteseUChars, UPRV_LENGTHOF(malteseUChars),
943 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,FALSE );
944
945 /*LMBCS*/
946 testConvertFromU(LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars),
947 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,FALSE );
948 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
949 LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars), "LMBCS-1", fmLMBCSOffs,FALSE);
950 #endif
951
952 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
953 {
954 /* encode directly set D and set O */
955 static const uint8_t utf7[] = {
956 /*
957 Hi Mom -+Jjo--!
958 A+ImIDkQ.
959 +-
960 +ZeVnLIqe-
961 */
962 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
963 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
964 0x2b, 0x2d,
965 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
966 };
967 static const UChar unicode[] = {
968 /*
969 Hi Mom -<WHITE SMILING FACE>-!
970 A<NOT IDENTICAL TO><ALPHA>.
971 +
972 [Japanese word "nihongo"]
973 */
974 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
975 0x41, 0x2262, 0x0391, 0x2e,
976 0x2b,
977 0x65e5, 0x672c, 0x8a9e
978 };
979 static const int32_t toUnicodeOffsets[] = {
980 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
981 15, 17, 19, 23,
982 24,
983 27, 29, 32
984 };
985 static const int32_t fromUnicodeOffsets[] = {
986 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
987 11, 12, 12, 12, 13, 13, 13, 13, 14,
988 15, 15,
989 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
990 };
991
992 /* same but escaping set O (the exclamation mark) */
993 static const uint8_t utf7Restricted[] = {
994 /*
995 Hi Mom -+Jjo--+ACE-
996 A+ImIDkQ.
997 +-
998 +ZeVnLIqe-
999 */
1000 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
1001 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1002 0x2b, 0x2d,
1003 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
1004 };
1005 static const int32_t toUnicodeOffsetsR[] = {
1006 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1007 19, 21, 23, 27,
1008 28,
1009 31, 33, 36
1010 };
1011 static const int32_t fromUnicodeOffsetsR[] = {
1012 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1013 11, 12, 12, 12, 13, 13, 13, 13, 14,
1014 15, 15,
1015 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1016 };
1017
1018 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,FALSE);
1019
1020 testConvertToU(utf7, sizeof(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7", toUnicodeOffsets,FALSE);
1021
1022 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,FALSE);
1023
1024 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, UPRV_LENGTHOF(unicode), "UTF-7,version=1", toUnicodeOffsetsR,FALSE);
1025 }
1026
1027 /*
1028 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1029 * modified according to RFC 2060,
1030 * and supplemented with the one example in RFC 2060 itself.
1031 */
1032 {
1033 static const uint8_t imap[] = {
1034 /* Hi Mom -&Jjo--!
1035 A&ImIDkQ-.
1036 &-
1037 &ZeVnLIqe-
1038 \
1039 ~peter
1040 /mail
1041 /&ZeVnLIqe-
1042 /&U,BTFw-
1043 */
1044 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1045 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1046 0x26, 0x2d,
1047 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1048 0x5c,
1049 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1050 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1051 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1052 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1053 };
1054 static const UChar unicode[] = {
1055 /* Hi Mom -<WHITE SMILING FACE>-!
1056 A<NOT IDENTICAL TO><ALPHA>.
1057 &
1058 [Japanese word "nihongo"]
1059 \
1060 ~peter
1061 /mail
1062 /<65e5, 672c, 8a9e>
1063 /<53f0, 5317>
1064 */
1065 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1066 0x41, 0x2262, 0x0391, 0x2e,
1067 0x26,
1068 0x65e5, 0x672c, 0x8a9e,
1069 0x5c,
1070 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1071 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1072 0x2f, 0x65e5, 0x672c, 0x8a9e,
1073 0x2f, 0x53f0, 0x5317
1074 };
1075 static const int32_t toUnicodeOffsets[] = {
1076 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1077 15, 17, 19, 24,
1078 25,
1079 28, 30, 33,
1080 37,
1081 38, 39, 40, 41, 42, 43,
1082 44, 45, 46, 47, 48,
1083 49, 51, 53, 56,
1084 60, 62, 64
1085 };
1086 static const int32_t fromUnicodeOffsets[] = {
1087 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1088 11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1089 15, 15,
1090 16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1091 19,
1092 20, 21, 22, 23, 24, 25,
1093 26, 27, 28, 29, 30,
1094 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1095 35, 36, 36, 36, 37, 37, 37, 37, 37
1096 };
1097
1098 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,FALSE);
1099
1100 testConvertToU(imap, sizeof(imap), unicode, UPRV_LENGTHOF(unicode), "IMAP-mailbox-name", toUnicodeOffsets,FALSE);
1101 }
1102
1103 /* Test UTF-8 bad data handling*/
1104 {
1105 static const uint8_t utf8[]={
1106 0x61,
1107 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1108 0x00,
1109 0x62,
1110 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1111 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1112 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */
1113 0xdf, 0xbf, /* 7ff */
1114 0xbf, /* truncated tail */
1115 0xf4, 0x90, 0x80, 0x80, /* 110000 */
1116 0x02
1117 };
1118
1119 static const uint16_t utf8Expected[]={
1120 0x0061,
1121 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1122 0x0000,
1123 0x0062,
1124 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1125 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1126 0xdbff, 0xdfff,
1127 0x07ff,
1128 0xfffd,
1129 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1130 0x0002
1131 };
1132
1133 static const int32_t utf8Offsets[]={
1134 0,
1135 1, 2, 3, 4,
1136 5,
1137 6,
1138 7, 8, 9, 10, 11,
1139 12, 13, 14, 15, 16,
1140 17, 17,
1141 21,
1142 23,
1143 24, 25, 26, 27,
1144 28
1145 };
1146 testConvertToU(utf8, sizeof(utf8),
1147 utf8Expected, UPRV_LENGTHOF(utf8Expected), "utf-8", utf8Offsets ,FALSE);
1148
1149 }
1150
1151 /* Test UTF-32BE bad data handling*/
1152 {
1153 static const uint8_t utf32[]={
1154 0x00, 0x00, 0x00, 0x61,
1155 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
1156 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1157 0x00, 0x00, 0x00, 0x62,
1158 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1159 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
1160 0x00, 0x00, 0x01, 0x62,
1161 0x00, 0x00, 0x02, 0x62
1162 };
1163 static const uint16_t utf32Expected[]={
1164 0x0061,
1165 0xfffd, /* 0x110000 out of range */
1166 0xDBFF, /* 0x10FFFF in range */
1167 0xDFFF,
1168 0x0062,
1169 0xfffd, /* 0xffffffff out of range */
1170 0xfffd, /* 0x7fffffff out of range */
1171 0x0162,
1172 0x0262
1173 };
1174 static const int32_t utf32Offsets[]={
1175 0, 4, 8, 8, 12, 16, 20, 24, 28
1176 };
1177 static const uint8_t utf32ExpectedBack[]={
1178 0x00, 0x00, 0x00, 0x61,
1179 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */
1180 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1181 0x00, 0x00, 0x00, 0x62,
1182 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */
1183 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */
1184 0x00, 0x00, 0x01, 0x62,
1185 0x00, 0x00, 0x02, 0x62
1186 };
1187 static const int32_t utf32OffsetsBack[]={
1188 0,0,0,0,
1189 1,1,1,1,
1190 2,2,2,2,
1191 4,4,4,4,
1192 5,5,5,5,
1193 6,6,6,6,
1194 7,7,7,7,
1195 8,8,8,8
1196 };
1197
1198 testConvertToU(utf32, sizeof(utf32),
1199 utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32be", utf32Offsets ,FALSE);
1200 testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1201 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, FALSE);
1202 }
1203
1204 /* Test UTF-32LE bad data handling*/
1205 {
1206 static const uint8_t utf32[]={
1207 0x61, 0x00, 0x00, 0x00,
1208 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
1209 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1210 0x62, 0x00, 0x00, 0x00,
1211 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1212 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
1213 0x62, 0x01, 0x00, 0x00,
1214 0x62, 0x02, 0x00, 0x00,
1215 };
1216
1217 static const uint16_t utf32Expected[]={
1218 0x0061,
1219 0xfffd, /* 0x110000 out of range */
1220 0xDBFF, /* 0x10FFFF in range */
1221 0xDFFF,
1222 0x0062,
1223 0xfffd, /* 0xffffffff out of range */
1224 0xfffd, /* 0x7fffffff out of range */
1225 0x0162,
1226 0x0262
1227 };
1228 static const int32_t utf32Offsets[]={
1229 0, 4, 8, 8, 12, 16, 20, 24, 28
1230 };
1231 static const uint8_t utf32ExpectedBack[]={
1232 0x61, 0x00, 0x00, 0x00,
1233 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */
1234 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1235 0x62, 0x00, 0x00, 0x00,
1236 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */
1237 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */
1238 0x62, 0x01, 0x00, 0x00,
1239 0x62, 0x02, 0x00, 0x00
1240 };
1241 static const int32_t utf32OffsetsBack[]={
1242 0,0,0,0,
1243 1,1,1,1,
1244 2,2,2,2,
1245 4,4,4,4,
1246 5,5,5,5,
1247 6,6,6,6,
1248 7,7,7,7,
1249 8,8,8,8
1250 };
1251 testConvertToU(utf32, sizeof(utf32),
1252 utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32le", utf32Offsets,FALSE );
1253 testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1254 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, FALSE);
1255 }
1256 }
1257
TestCoverageMBCS()1258 static void TestCoverageMBCS(){
1259 #if 0
1260 UErrorCode status = U_ZERO_ERROR;
1261 const char *directory = loadTestData(&status);
1262 char* tdpath = NULL;
1263 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1264 int len = strlen(directory);
1265 char* index=NULL;
1266
1267 tdpath = (char*) malloc(sizeof(char) * (len * 2));
1268 uprv_strcpy(saveDirectory,u_getDataDirectory());
1269 log_verbose("Retrieved data directory %s \n",saveDirectory);
1270 uprv_strcpy(tdpath,directory);
1271 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1272
1273 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1274 *(index+1)=0;
1275 }
1276 u_setDataDirectory(tdpath);
1277 log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1278 #endif
1279
1280 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
1281 which is test file for MBCS conversion with single-byte codepage data.*/
1282 {
1283
1284 /* MBCS with single byte codepage data test1.ucm*/
1285 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1286 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1287 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, };
1288
1289 /*from Unicode*/
1290 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1291 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,FALSE );
1292 }
1293
1294 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
1295 which is test file for MBCS conversion with three-byte codepage data.*/
1296 {
1297
1298 /* MBCS with three byte codepage data test3.ucm*/
1299 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1300 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,};
1301 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1302
1303 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1304 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1305 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1306
1307 /*from Unicode*/
1308 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1309 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,FALSE );
1310
1311 /*to Unicode*/
1312 testConvertToU(test3input, sizeof(test3input),
1313 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test3", fromtest3Offs ,FALSE);
1314
1315 }
1316
1317 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
1318 which is test file for MBCS conversion with four-byte codepage data.*/
1319 {
1320
1321 /* MBCS with three byte codepage data test4.ucm*/
1322 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1323 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,};
1324 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1325
1326 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1327 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1328 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1329
1330 /*from Unicode*/
1331 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1332 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,FALSE );
1333
1334 /*to Unicode*/
1335 testConvertToU(test4input, sizeof(test4input),
1336 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test4", fromtest4Offs,FALSE );
1337
1338 }
1339 #if 0
1340 free(tdpath);
1341 /* restore the original data directory */
1342 log_verbose("Setting the data directory to %s \n", saveDirectory);
1343 u_setDataDirectory(saveDirectory);
1344 free(saveDirectory);
1345 #endif
1346
1347 }
1348
TestConverterType(const char * convName,UConverterType convType)1349 static void TestConverterType(const char *convName, UConverterType convType) {
1350 UConverter* myConverter;
1351 UErrorCode err = U_ZERO_ERROR;
1352
1353 myConverter = my_ucnv_open(convName, &err);
1354
1355 if (U_FAILURE(err)) {
1356 log_data_err("Failed to create an %s converter\n", convName);
1357 return;
1358 }
1359 else
1360 {
1361 if (ucnv_getType(myConverter)!=convType) {
1362 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1363 convName, convType);
1364 }
1365 else {
1366 log_verbose("ucnv_getType %s ok\n", convName);
1367 }
1368 }
1369 ucnv_close(myConverter);
1370 }
1371
TestConverterTypesAndStarters()1372 static void TestConverterTypesAndStarters()
1373 {
1374 #if !UCONFIG_NO_LEGACY_CONVERSION
1375 UConverter* myConverter;
1376 UErrorCode err = U_ZERO_ERROR;
1377 UBool mystarters[256];
1378
1379 /* const UBool expectedKSCstarters[256] = {
1380 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1381 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1382 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1383 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1384 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1385 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1386 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1387 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1388 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1389 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1390 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1391 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1392 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1393 FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
1394 FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1395 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1396 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1397 TRUE, TRUE, TRUE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE,
1398 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1399 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1400 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1401 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1402 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1403 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1404 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
1405 TRUE, TRUE, TRUE, TRUE, TRUE, TRUE};*/
1406
1407
1408 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types.");
1409
1410 myConverter = ucnv_open("ksc", &err);
1411 if (U_FAILURE(err)) {
1412 log_data_err("Failed to create an ibm-ksc converter\n");
1413 return;
1414 }
1415 else
1416 {
1417 if (ucnv_getType(myConverter)!=UCNV_MBCS)
1418 log_err("ucnv_getType Failed for ibm-949\n");
1419 else
1420 log_verbose("ucnv_getType ibm-949 ok\n");
1421
1422 if(myConverter!=NULL)
1423 ucnv_getStarters(myConverter, mystarters, &err);
1424
1425 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1426 log_err("Failed ucnv_getStarters for ksc\n");
1427 else
1428 log_verbose("ucnv_getStarters ok\n");*/
1429
1430 }
1431 ucnv_close(myConverter);
1432
1433 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1434 TestConverterType("ibm-878", UCNV_SBCS);
1435 #endif
1436
1437 TestConverterType("iso-8859-1", UCNV_LATIN_1);
1438
1439 TestConverterType("ibm-1208", UCNV_UTF8);
1440
1441 TestConverterType("utf-8", UCNV_UTF8);
1442 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1443 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1444 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1445 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1446
1447 #if !UCONFIG_NO_LEGACY_CONVERSION
1448
1449 #if defined(U_ENABLE_GENERIC_ISO_2022)
1450 TestConverterType("iso-2022", UCNV_ISO_2022);
1451 #endif
1452
1453 TestConverterType("hz", UCNV_HZ);
1454 #endif
1455
1456 TestConverterType("scsu", UCNV_SCSU);
1457
1458 #if !UCONFIG_NO_LEGACY_CONVERSION
1459 TestConverterType("x-iscii-de", UCNV_ISCII);
1460 #endif
1461
1462 TestConverterType("ascii", UCNV_US_ASCII);
1463 TestConverterType("utf-7", UCNV_UTF7);
1464 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1465 TestConverterType("bocu-1", UCNV_BOCU1);
1466 }
1467
1468 static void
TestAmbiguousConverter(UConverter * cnv)1469 TestAmbiguousConverter(UConverter *cnv) {
1470 static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
1471 UChar outUnicode[20]={ 0, 0, 0, 0 };
1472
1473 const char *s;
1474 UChar *u;
1475 UErrorCode errorCode;
1476 UBool isAmbiguous;
1477
1478 /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1479 errorCode=U_ZERO_ERROR;
1480 s=inBytes;
1481 u=outUnicode;
1482 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, TRUE, &errorCode);
1483 if(U_FAILURE(errorCode)) {
1484 /* we do not care about general failures in this test; the input may just not be mappable */
1485 return;
1486 }
1487
1488 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
1489 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1490 /* There are some encodings that are partially ASCII based,
1491 like the ISO-7 and GSM series of codepages, which we ignore. */
1492 return;
1493 }
1494
1495 isAmbiguous=ucnv_isAmbiguous(cnv);
1496
1497 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1498 if((outUnicode[2]!=0x5c)!=isAmbiguous) {
1499 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1500 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
1501 return;
1502 }
1503
1504 if(outUnicode[2]!=0x5c) {
1505 /* needs fixup, fix it */
1506 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1507 if(outUnicode[2]!=0x5c) {
1508 /* the fix failed */
1509 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1510 return;
1511 }
1512 }
1513 }
1514
TestAmbiguous()1515 static void TestAmbiguous()
1516 {
1517 UErrorCode status = U_ZERO_ERROR;
1518 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1519 static const char target[] = {
1520 /* "\\usr\\local\\share\\data\\icutest.txt" */
1521 0x5c, 0x75, 0x73, 0x72,
1522 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1523 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1524 0x5c, 0x64, 0x61, 0x74, 0x61,
1525 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1526 0
1527 };
1528 UChar asciiResult[200], sjisResult[200];
1529 int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
1530 const char *name;
1531
1532 /* enumerate all converters */
1533 status=U_ZERO_ERROR;
1534 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1535 cnv=ucnv_open(name, &status);
1536 if(U_SUCCESS(status)) {
1537 /* BEGIN android-changed. To save space Android does not build full ISO-2022-CN CJK tables. */
1538 const char* cnvName = ucnv_getName(cnv, &status);
1539 if (strlen(cnvName) < 8 ||
1540 strncmp(cnvName, "ISO_2022_CN", 8) != 0) {
1541 TestAmbiguousConverter(cnv);
1542 }
1543 /* END android-changed */
1544 ucnv_close(cnv);
1545 } else {
1546 log_err("error: unable to open available converter \"%s\"\n", name);
1547 status=U_ZERO_ERROR;
1548 }
1549 }
1550
1551 #if !UCONFIG_NO_LEGACY_CONVERSION
1552 sjis_cnv = ucnv_open("ibm-943", &status);
1553 if (U_FAILURE(status))
1554 {
1555 log_data_err("Failed to create a SJIS converter\n");
1556 return;
1557 }
1558 ascii_cnv = ucnv_open("LATIN-1", &status);
1559 if (U_FAILURE(status))
1560 {
1561 log_data_err("Failed to create a LATIN-1 converter\n");
1562 ucnv_close(sjis_cnv);
1563 return;
1564 }
1565 /* convert target from SJIS to Unicode */
1566 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, UPRV_LENGTHOF(sjisResult), target, (int32_t)strlen(target), &status);
1567 if (U_FAILURE(status))
1568 {
1569 log_err("Failed to convert the SJIS string.\n");
1570 ucnv_close(sjis_cnv);
1571 ucnv_close(ascii_cnv);
1572 return;
1573 }
1574 /* convert target from Latin-1 to Unicode */
1575 /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, UPRV_LENGTHOF(asciiResult), target, (int32_t)strlen(target), &status);
1576 if (U_FAILURE(status))
1577 {
1578 log_err("Failed to convert the Latin-1 string.\n");
1579 ucnv_close(sjis_cnv);
1580 ucnv_close(ascii_cnv);
1581 return;
1582 }
1583 if (!ucnv_isAmbiguous(sjis_cnv))
1584 {
1585 log_err("SJIS converter should contain ambiguous character mappings.\n");
1586 ucnv_close(sjis_cnv);
1587 ucnv_close(ascii_cnv);
1588 return;
1589 }
1590 if (u_strcmp(sjisResult, asciiResult) == 0)
1591 {
1592 log_err("File separators for SJIS don't need to be fixed.\n");
1593 }
1594 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1595 if (u_strcmp(sjisResult, asciiResult) != 0)
1596 {
1597 log_err("Fixing file separator for SJIS failed.\n");
1598 }
1599 ucnv_close(sjis_cnv);
1600 ucnv_close(ascii_cnv);
1601 #endif
1602 }
1603
1604 static void
TestSignatureDetection()1605 TestSignatureDetection(){
1606 /* with null terminated strings */
1607 {
1608 static const char* data[] = {
1609 "\xFE\xFF\x00\x00", /* UTF-16BE */
1610 "\xFF\xFE\x00\x00", /* UTF-16LE */
1611 "\xEF\xBB\xBF\x00", /* UTF-8 */
1612 "\x0E\xFE\xFF\x00", /* SCSU */
1613
1614 "\xFE\xFF", /* UTF-16BE */
1615 "\xFF\xFE", /* UTF-16LE */
1616 "\xEF\xBB\xBF", /* UTF-8 */
1617 "\x0E\xFE\xFF", /* SCSU */
1618
1619 "\xFE\xFF\x41\x42", /* UTF-16BE */
1620 "\xFF\xFE\x41\x41", /* UTF-16LE */
1621 "\xEF\xBB\xBF\x41", /* UTF-8 */
1622 "\x0E\xFE\xFF\x41", /* SCSU */
1623
1624 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */
1625 "\x2B\x2F\x76\x38\x41", /* UTF-7 */
1626 "\x2B\x2F\x76\x39\x41", /* UTF-7 */
1627 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */
1628 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */
1629
1630 "\xDD\x73\x66\x73" /* UTF-EBCDIC */
1631 };
1632 static const char* expected[] = {
1633 "UTF-16BE",
1634 "UTF-16LE",
1635 "UTF-8",
1636 "SCSU",
1637
1638 "UTF-16BE",
1639 "UTF-16LE",
1640 "UTF-8",
1641 "SCSU",
1642
1643 "UTF-16BE",
1644 "UTF-16LE",
1645 "UTF-8",
1646 "SCSU",
1647
1648 "UTF-7",
1649 "UTF-7",
1650 "UTF-7",
1651 "UTF-7",
1652 "UTF-7",
1653 "UTF-EBCDIC"
1654 };
1655 static const int32_t expectedLength[] ={
1656 2,
1657 2,
1658 3,
1659 3,
1660
1661 2,
1662 2,
1663 3,
1664 3,
1665
1666 2,
1667 2,
1668 3,
1669 3,
1670
1671 5,
1672 4,
1673 4,
1674 4,
1675 4,
1676 4
1677 };
1678 int i=0;
1679 UErrorCode err;
1680 int32_t signatureLength = -1;
1681 const char* source = NULL;
1682 const char* enc = NULL;
1683 for( ; i<UPRV_LENGTHOF(data); i++){
1684 err = U_ZERO_ERROR;
1685 source = data[i];
1686 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1687 if(U_FAILURE(err)){
1688 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1689 continue;
1690 }
1691 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1692 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1693 continue;
1694 }
1695 if(signatureLength != expectedLength[i]){
1696 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1697 }
1698 }
1699 }
1700 {
1701 static const char* data[] = {
1702 "\xFE\xFF\x00", /* UTF-16BE */
1703 "\xFF\xFE\x00", /* UTF-16LE */
1704 "\xEF\xBB\xBF\x00", /* UTF-8 */
1705 "\x0E\xFE\xFF\x00", /* SCSU */
1706 "\x00\x00\xFE\xFF", /* UTF-32BE */
1707 "\xFF\xFE\x00\x00", /* UTF-32LE */
1708 "\xFE\xFF", /* UTF-16BE */
1709 "\xFF\xFE", /* UTF-16LE */
1710 "\xEF\xBB\xBF", /* UTF-8 */
1711 "\x0E\xFE\xFF", /* SCSU */
1712 "\x00\x00\xFE\xFF", /* UTF-32BE */
1713 "\xFF\xFE\x00\x00", /* UTF-32LE */
1714 "\xFE\xFF\x41\x42", /* UTF-16BE */
1715 "\xFF\xFE\x41\x41", /* UTF-16LE */
1716 "\xEF\xBB\xBF\x41", /* UTF-8 */
1717 "\x0E\xFE\xFF\x41", /* SCSU */
1718 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1719 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1720 "\xFB\xEE\x28", /* BOCU-1 */
1721 "\xFF\x41\x42" /* NULL */
1722 };
1723 static const int len[] = {
1724 3,
1725 3,
1726 4,
1727 4,
1728 4,
1729 4,
1730 2,
1731 2,
1732 3,
1733 3,
1734 4,
1735 4,
1736 4,
1737 4,
1738 4,
1739 4,
1740 5,
1741 5,
1742 3,
1743 3
1744 };
1745
1746 static const char* expected[] = {
1747 "UTF-16BE",
1748 "UTF-16LE",
1749 "UTF-8",
1750 "SCSU",
1751 "UTF-32BE",
1752 "UTF-32LE",
1753 "UTF-16BE",
1754 "UTF-16LE",
1755 "UTF-8",
1756 "SCSU",
1757 "UTF-32BE",
1758 "UTF-32LE",
1759 "UTF-16BE",
1760 "UTF-16LE",
1761 "UTF-8",
1762 "SCSU",
1763 "UTF-32BE",
1764 "UTF-32LE",
1765 "BOCU-1",
1766 NULL
1767 };
1768 static const int32_t expectedLength[] ={
1769 2,
1770 2,
1771 3,
1772 3,
1773 4,
1774 4,
1775 2,
1776 2,
1777 3,
1778 3,
1779 4,
1780 4,
1781 2,
1782 2,
1783 3,
1784 3,
1785 4,
1786 4,
1787 3,
1788 0
1789 };
1790 int i=0;
1791 UErrorCode err;
1792 int32_t signatureLength = -1;
1793 int32_t sourceLength=-1;
1794 const char* source = NULL;
1795 const char* enc = NULL;
1796 for( ; i<UPRV_LENGTHOF(data); i++){
1797 err = U_ZERO_ERROR;
1798 source = data[i];
1799 sourceLength = len[i];
1800 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1801 if(U_FAILURE(err)){
1802 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1803 continue;
1804 }
1805 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1806 if(expected[i] !=NULL){
1807 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1808 continue;
1809 }
1810 }
1811 if(signatureLength != expectedLength[i]){
1812 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1813 }
1814 }
1815 }
1816 }
1817
TestUTF7()1818 static void TestUTF7() {
1819 /* test input */
1820 static const uint8_t in[]={
1821 /* H - +Jjo- - ! +- +2AHcAQ */
1822 0x48,
1823 0x2d,
1824 0x2b, 0x4a, 0x6a, 0x6f,
1825 0x2d, 0x2d,
1826 0x21,
1827 0x2b, 0x2d,
1828 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1829 };
1830
1831 /* expected test results */
1832 static const int32_t results[]={
1833 /* number of bytes read, code point */
1834 1, 0x48,
1835 1, 0x2d,
1836 4, 0x263a, /* <WHITE SMILING FACE> */
1837 2, 0x2d,
1838 1, 0x21,
1839 2, 0x2b,
1840 7, 0x10401
1841 };
1842
1843 const char *cnvName;
1844 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1845 UErrorCode errorCode=U_ZERO_ERROR;
1846 UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1847 if(U_FAILURE(errorCode)) {
1848 log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode));
1849 return;
1850 }
1851 TestNextUChar(cnv, source, limit, results, "UTF-7");
1852 /* Test the condition when source >= sourceLimit */
1853 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1854 cnvName = ucnv_getName(cnv, &errorCode);
1855 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1856 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1857 }
1858 ucnv_close(cnv);
1859 }
1860
TestIMAP()1861 static void TestIMAP() {
1862 /* test input */
1863 static const uint8_t in[]={
1864 /* H - &Jjo- - ! &- &2AHcAQ- \ */
1865 0x48,
1866 0x2d,
1867 0x26, 0x4a, 0x6a, 0x6f,
1868 0x2d, 0x2d,
1869 0x21,
1870 0x26, 0x2d,
1871 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1872 };
1873
1874 /* expected test results */
1875 static const int32_t results[]={
1876 /* number of bytes read, code point */
1877 1, 0x48,
1878 1, 0x2d,
1879 4, 0x263a, /* <WHITE SMILING FACE> */
1880 2, 0x2d,
1881 1, 0x21,
1882 2, 0x26,
1883 7, 0x10401
1884 };
1885
1886 const char *cnvName;
1887 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1888 UErrorCode errorCode=U_ZERO_ERROR;
1889 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1890 if(U_FAILURE(errorCode)) {
1891 log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode));
1892 return;
1893 }
1894 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1895 /* Test the condition when source >= sourceLimit */
1896 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1897 cnvName = ucnv_getName(cnv, &errorCode);
1898 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1899 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1900 }
1901 ucnv_close(cnv);
1902 }
1903
TestUTF8()1904 static void TestUTF8() {
1905 /* test input */
1906 static const uint8_t in[]={
1907 0x61,
1908 0xc2, 0x80,
1909 0xe0, 0xa0, 0x80,
1910 0xf0, 0x90, 0x80, 0x80,
1911 0xf4, 0x84, 0x8c, 0xa1,
1912 0xf0, 0x90, 0x90, 0x81
1913 };
1914
1915 /* expected test results */
1916 static const int32_t results[]={
1917 /* number of bytes read, code point */
1918 1, 0x61,
1919 2, 0x80,
1920 3, 0x800,
1921 4, 0x10000,
1922 4, 0x104321,
1923 4, 0x10401
1924 };
1925
1926 /* error test input */
1927 static const uint8_t in2[]={
1928 0x61,
1929 0xc0, 0x80, /* illegal non-shortest form */
1930 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1931 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1932 0xc0, 0xc0, /* illegal trail byte */
1933 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1934 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1935 0xfe, /* illegal byte altogether */
1936 0x62
1937 };
1938
1939 /* expected error test results */
1940 static const int32_t results2[]={
1941 /* number of bytes read, code point */
1942 1, 0x61,
1943 22, 0x62
1944 };
1945
1946 UConverterToUCallback cb;
1947 const void *p;
1948
1949 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1950 UErrorCode errorCode=U_ZERO_ERROR;
1951 UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1952 if(U_FAILURE(errorCode)) {
1953 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1954 return;
1955 }
1956 TestNextUChar(cnv, source, limit, results, "UTF-8");
1957 /* Test the condition when source >= sourceLimit */
1958 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1959
1960 /* test error behavior with a skip callback */
1961 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1962 source=(const char *)in2;
1963 limit=(const char *)(in2+sizeof(in2));
1964 TestNextUChar(cnv, source, limit, results2, "UTF-8");
1965
1966 ucnv_close(cnv);
1967 }
1968
TestCESU8()1969 static void TestCESU8() {
1970 /* test input */
1971 static const uint8_t in[]={
1972 0x61,
1973 0xc2, 0x80,
1974 0xe0, 0xa0, 0x80,
1975 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1976 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1977 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1978 0xef, 0xbf, 0xbc
1979 };
1980
1981 /* expected test results */
1982 static const int32_t results[]={
1983 /* number of bytes read, code point */
1984 1, 0x61,
1985 2, 0x80,
1986 3, 0x800,
1987 6, 0x10000,
1988 3, 0xdc01,
1989 -1,0xd802, /* may read 3 or 6 bytes */
1990 -1,0x10ffff,/* may read 0 or 3 bytes */
1991 3, 0xfffc
1992 };
1993
1994 /* error test input */
1995 static const uint8_t in2[]={
1996 0x61,
1997 0xc0, 0x80, /* illegal non-shortest form */
1998 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1999 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
2000 0xc0, 0xc0, /* illegal trail byte */
2001 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */
2002 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */
2003 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */
2004 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
2005 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
2006 0xfe, /* illegal byte altogether */
2007 0x62
2008 };
2009
2010 /* expected error test results */
2011 static const int32_t results2[]={
2012 /* number of bytes read, code point */
2013 1, 0x61,
2014 34, 0x62
2015 };
2016
2017 UConverterToUCallback cb;
2018 const void *p;
2019
2020 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
2021 UErrorCode errorCode=U_ZERO_ERROR;
2022 UConverter *cnv=ucnv_open("CESU-8", &errorCode);
2023 if(U_FAILURE(errorCode)) {
2024 log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
2025 return;
2026 }
2027 TestNextUChar(cnv, source, limit, results, "CESU-8");
2028 /* Test the condition when source >= sourceLimit */
2029 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2030
2031 /* test error behavior with a skip callback */
2032 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2033 source=(const char *)in2;
2034 limit=(const char *)(in2+sizeof(in2));
2035 TestNextUChar(cnv, source, limit, results2, "CESU-8");
2036
2037 ucnv_close(cnv);
2038 }
2039
TestUTF16()2040 static void TestUTF16() {
2041 /* test input */
2042 static const uint8_t in1[]={
2043 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2044 };
2045 static const uint8_t in2[]={
2046 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2047 };
2048 static const uint8_t in3[]={
2049 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2050 };
2051
2052 /* expected test results */
2053 static const int32_t results1[]={
2054 /* number of bytes read, code point */
2055 4, 0x4e00,
2056 2, 0xfeff
2057 };
2058 static const int32_t results2[]={
2059 /* number of bytes read, code point */
2060 4, 0x004e,
2061 2, 0xfffe
2062 };
2063 static const int32_t results3[]={
2064 /* number of bytes read, code point */
2065 2, 0xfefe,
2066 2, 0x4e00,
2067 2, 0xfeff,
2068 4, 0x20001
2069 };
2070
2071 const char *source, *limit;
2072
2073 UErrorCode errorCode=U_ZERO_ERROR;
2074 UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2075 if(U_FAILURE(errorCode)) {
2076 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2077 return;
2078 }
2079
2080 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2081 TestNextUChar(cnv, source, limit, results1, "UTF-16");
2082
2083 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2084 ucnv_resetToUnicode(cnv);
2085 TestNextUChar(cnv, source, limit, results2, "UTF-16");
2086
2087 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2088 ucnv_resetToUnicode(cnv);
2089 TestNextUChar(cnv, source, limit, results3, "UTF-16");
2090
2091 /* Test the condition when source >= sourceLimit */
2092 ucnv_resetToUnicode(cnv);
2093 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2094
2095 ucnv_close(cnv);
2096 }
2097
TestUTF16BE()2098 static void TestUTF16BE() {
2099 /* test input */
2100 static const uint8_t in[]={
2101 0x00, 0x61,
2102 0x00, 0xc0,
2103 0x00, 0x31,
2104 0x00, 0xf4,
2105 0xce, 0xfe,
2106 0xd8, 0x01, 0xdc, 0x01
2107 };
2108
2109 /* expected test results */
2110 static const int32_t results[]={
2111 /* number of bytes read, code point */
2112 2, 0x61,
2113 2, 0xc0,
2114 2, 0x31,
2115 2, 0xf4,
2116 2, 0xcefe,
2117 4, 0x10401
2118 };
2119
2120 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2121 UErrorCode errorCode=U_ZERO_ERROR;
2122 UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2123 if(U_FAILURE(errorCode)) {
2124 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2125 return;
2126 }
2127 TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2128 /* Test the condition when source >= sourceLimit */
2129 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2130 /*Test for the condition where there is an invalid character*/
2131 {
2132 static const uint8_t source2[]={0x61};
2133 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2134 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2135 }
2136 #if 0
2137 /*
2138 * Test disabled because currently the UTF-16BE/LE converters are supposed
2139 * to not set errors for unpaired surrogates.
2140 * This may change with
2141 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2142 */
2143
2144 /*Test for the condition where there is a surrogate pair*/
2145 {
2146 const uint8_t source2[]={0xd8, 0x01};
2147 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2148 }
2149 #endif
2150 ucnv_close(cnv);
2151 }
2152
2153 static void
TestUTF16LE()2154 TestUTF16LE() {
2155 /* test input */
2156 static const uint8_t in[]={
2157 0x61, 0x00,
2158 0x31, 0x00,
2159 0x4e, 0x2e,
2160 0x4e, 0x00,
2161 0x01, 0xd8, 0x01, 0xdc
2162 };
2163
2164 /* expected test results */
2165 static const int32_t results[]={
2166 /* number of bytes read, code point */
2167 2, 0x61,
2168 2, 0x31,
2169 2, 0x2e4e,
2170 2, 0x4e,
2171 4, 0x10401
2172 };
2173
2174 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2175 UErrorCode errorCode=U_ZERO_ERROR;
2176 UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2177 if(U_FAILURE(errorCode)) {
2178 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2179 return;
2180 }
2181 TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2182 /* Test the condition when source >= sourceLimit */
2183 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2184 /*Test for the condition where there is an invalid character*/
2185 {
2186 static const uint8_t source2[]={0x61};
2187 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2188 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2189 }
2190 #if 0
2191 /*
2192 * Test disabled because currently the UTF-16BE/LE converters are supposed
2193 * to not set errors for unpaired surrogates.
2194 * This may change with
2195 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2196 */
2197
2198 /*Test for the condition where there is a surrogate character*/
2199 {
2200 static const uint8_t source2[]={0x01, 0xd8};
2201 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2202 }
2203 #endif
2204
2205 ucnv_close(cnv);
2206 }
2207
TestUTF32()2208 static void TestUTF32() {
2209 /* test input */
2210 static const uint8_t in1[]={
2211 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
2212 };
2213 static const uint8_t in2[]={
2214 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00
2215 };
2216 static const uint8_t in3[]={
2217 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01
2218 };
2219
2220 /* expected test results */
2221 static const int32_t results1[]={
2222 /* number of bytes read, code point */
2223 8, 0x100f00,
2224 4, 0xfeff
2225 };
2226 static const int32_t results2[]={
2227 /* number of bytes read, code point */
2228 8, 0x0f1000,
2229 4, 0xfffe
2230 };
2231 static const int32_t results3[]={
2232 /* number of bytes read, code point */
2233 4, 0xfefe,
2234 4, 0x100f00,
2235 4, 0xfffd, /* unmatched surrogate */
2236 4, 0xfffd /* unmatched surrogate */
2237 };
2238
2239 const char *source, *limit;
2240
2241 UErrorCode errorCode=U_ZERO_ERROR;
2242 UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2243 if(U_FAILURE(errorCode)) {
2244 log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2245 return;
2246 }
2247
2248 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2249 TestNextUChar(cnv, source, limit, results1, "UTF-32");
2250
2251 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2252 ucnv_resetToUnicode(cnv);
2253 TestNextUChar(cnv, source, limit, results2, "UTF-32");
2254
2255 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2256 ucnv_resetToUnicode(cnv);
2257 TestNextUChar(cnv, source, limit, results3, "UTF-32");
2258
2259 /* Test the condition when source >= sourceLimit */
2260 ucnv_resetToUnicode(cnv);
2261 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2262
2263 ucnv_close(cnv);
2264 }
2265
2266 static void
TestUTF32BE()2267 TestUTF32BE() {
2268 /* test input */
2269 static const uint8_t in[]={
2270 0x00, 0x00, 0x00, 0x61,
2271 0x00, 0x00, 0x30, 0x61,
2272 0x00, 0x00, 0xdc, 0x00,
2273 0x00, 0x00, 0xd8, 0x00,
2274 0x00, 0x00, 0xdf, 0xff,
2275 0x00, 0x00, 0xff, 0xfe,
2276 0x00, 0x10, 0xab, 0xcd,
2277 0x00, 0x10, 0xff, 0xff
2278 };
2279
2280 /* expected test results */
2281 static const int32_t results[]={
2282 /* number of bytes read, code point */
2283 4, 0x61,
2284 4, 0x3061,
2285 4, 0xfffd,
2286 4, 0xfffd,
2287 4, 0xfffd,
2288 4, 0xfffe,
2289 4, 0x10abcd,
2290 4, 0x10ffff
2291 };
2292
2293 /* error test input */
2294 static const uint8_t in2[]={
2295 0x00, 0x00, 0x00, 0x61,
2296 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
2297 0x00, 0x00, 0x00, 0x62,
2298 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2299 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
2300 0x00, 0x00, 0x01, 0x62,
2301 0x00, 0x00, 0x02, 0x62
2302 };
2303
2304 /* expected error test results */
2305 static const int32_t results2[]={
2306 /* number of bytes read, code point */
2307 4, 0x61,
2308 8, 0x62,
2309 12, 0x162,
2310 4, 0x262
2311 };
2312
2313 UConverterToUCallback cb;
2314 const void *p;
2315
2316 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2317 UErrorCode errorCode=U_ZERO_ERROR;
2318 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2319 if(U_FAILURE(errorCode)) {
2320 log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2321 return;
2322 }
2323 TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2324
2325 /* Test the condition when source >= sourceLimit */
2326 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2327
2328 /* test error behavior with a skip callback */
2329 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2330 source=(const char *)in2;
2331 limit=(const char *)(in2+sizeof(in2));
2332 TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2333
2334 ucnv_close(cnv);
2335 }
2336
2337 static void
TestUTF32LE()2338 TestUTF32LE() {
2339 /* test input */
2340 static const uint8_t in[]={
2341 0x61, 0x00, 0x00, 0x00,
2342 0x61, 0x30, 0x00, 0x00,
2343 0x00, 0xdc, 0x00, 0x00,
2344 0x00, 0xd8, 0x00, 0x00,
2345 0xff, 0xdf, 0x00, 0x00,
2346 0xfe, 0xff, 0x00, 0x00,
2347 0xcd, 0xab, 0x10, 0x00,
2348 0xff, 0xff, 0x10, 0x00
2349 };
2350
2351 /* expected test results */
2352 static const int32_t results[]={
2353 /* number of bytes read, code point */
2354 4, 0x61,
2355 4, 0x3061,
2356 4, 0xfffd,
2357 4, 0xfffd,
2358 4, 0xfffd,
2359 4, 0xfffe,
2360 4, 0x10abcd,
2361 4, 0x10ffff
2362 };
2363
2364 /* error test input */
2365 static const uint8_t in2[]={
2366 0x61, 0x00, 0x00, 0x00,
2367 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
2368 0x62, 0x00, 0x00, 0x00,
2369 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2370 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
2371 0x62, 0x01, 0x00, 0x00,
2372 0x62, 0x02, 0x00, 0x00,
2373 };
2374
2375 /* expected error test results */
2376 static const int32_t results2[]={
2377 /* number of bytes read, code point */
2378 4, 0x61,
2379 8, 0x62,
2380 12, 0x162,
2381 4, 0x262,
2382 };
2383
2384 UConverterToUCallback cb;
2385 const void *p;
2386
2387 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2388 UErrorCode errorCode=U_ZERO_ERROR;
2389 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2390 if(U_FAILURE(errorCode)) {
2391 log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2392 return;
2393 }
2394 TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2395
2396 /* Test the condition when source >= sourceLimit */
2397 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2398
2399 /* test error behavior with a skip callback */
2400 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2401 source=(const char *)in2;
2402 limit=(const char *)(in2+sizeof(in2));
2403 TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2404
2405 ucnv_close(cnv);
2406 }
2407
2408 static void
TestLATIN1()2409 TestLATIN1() {
2410 /* test input */
2411 static const uint8_t in[]={
2412 0x61,
2413 0x31,
2414 0x32,
2415 0xc0,
2416 0xf0,
2417 0xf4,
2418 };
2419
2420 /* expected test results */
2421 static const int32_t results[]={
2422 /* number of bytes read, code point */
2423 1, 0x61,
2424 1, 0x31,
2425 1, 0x32,
2426 1, 0xc0,
2427 1, 0xf0,
2428 1, 0xf4,
2429 };
2430 static const uint16_t in1[] = {
2431 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2432 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2433 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2434 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2435 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2436 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2437 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2438 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2439 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2440 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2441 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2442 0xcb, 0x82
2443 };
2444 static const uint8_t out1[] = {
2445 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2446 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2447 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2448 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2449 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2450 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2451 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2452 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2453 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2454 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2455 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2456 0xcb, 0x82
2457 };
2458 static const uint16_t in2[]={
2459 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2460 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2461 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2462 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2463 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2464 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2465 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2466 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2467 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2468 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2469 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2470 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2471 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2472 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2473 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2474 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2475 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2476 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2477 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2478 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2479 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2480 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2481 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2482 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2483 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2484 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2485 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2486 0x37, 0x20, 0x2A, 0x2F,
2487 };
2488 static const unsigned char out2[]={
2489 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2490 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2491 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2492 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2493 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2494 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2495 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2496 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2497 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2498 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2499 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2500 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2501 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2502 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2503 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2504 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2505 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2506 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2507 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2508 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2509 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2510 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2511 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2512 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2513 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2514 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2515 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2516 0x37, 0x20, 0x2A, 0x2F,
2517 };
2518 const char *source=(const char *)in;
2519 const char *limit=(const char *)in+sizeof(in);
2520
2521 UErrorCode errorCode=U_ZERO_ERROR;
2522 UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2523 if(U_FAILURE(errorCode)) {
2524 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2525 return;
2526 }
2527 TestNextUChar(cnv, source, limit, results, "LATIN_1");
2528 /* Test the condition when source >= sourceLimit */
2529 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2530 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2531 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2532
2533 ucnv_close(cnv);
2534 }
2535
2536 static void
TestSBCS()2537 TestSBCS() {
2538 /* test input */
2539 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2540 /* expected test results */
2541 static const int32_t results[]={
2542 /* number of bytes read, code point */
2543 1, 0x61,
2544 1, 0xbf,
2545 1, 0xc4,
2546 1, 0x2021,
2547 1, 0xf8ff,
2548 1, 0x00d9
2549 };
2550
2551 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2552 UErrorCode errorCode=U_ZERO_ERROR;
2553 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2554 if(U_FAILURE(errorCode)) {
2555 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2556 return;
2557 }
2558 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2559 /* Test the condition when source >= sourceLimit */
2560 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2561 /*Test for Illegal character */ /*
2562 {
2563 static const uint8_t input1[]={ 0xA1 };
2564 const char* illegalsource=(const char*)input1;
2565 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal characte");
2566 }
2567 */
2568 ucnv_close(cnv);
2569 }
2570
2571 static void
TestDBCS()2572 TestDBCS() {
2573 /* test input */
2574 static const uint8_t in[]={
2575 0x44, 0x6a,
2576 0xc4, 0x9c,
2577 0x7a, 0x74,
2578 0x46, 0xab,
2579 0x42, 0x5b,
2580
2581 };
2582
2583 /* expected test results */
2584 static const int32_t results[]={
2585 /* number of bytes read, code point */
2586 2, 0x00a7,
2587 2, 0xe1d2,
2588 2, 0x6962,
2589 2, 0xf842,
2590 2, 0xffe5,
2591 };
2592
2593 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2594 UErrorCode errorCode=U_ZERO_ERROR;
2595
2596 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2597 if(U_FAILURE(errorCode)) {
2598 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2599 return;
2600 }
2601 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2602 /* Test the condition when source >= sourceLimit */
2603 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2604 /*Test for the condition where there is an invalid character*/
2605 {
2606 static const uint8_t source2[]={0x1a, 0x1b};
2607 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2608 }
2609 /*Test for the condition where we have a truncated char*/
2610 {
2611 static const uint8_t source1[]={0xc4};
2612 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2613 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2614 }
2615 ucnv_close(cnv);
2616 }
2617
2618 static void
TestMBCS()2619 TestMBCS() {
2620 /* test input */
2621 static const uint8_t in[]={
2622 0x01,
2623 0xa6, 0xa3,
2624 0x00,
2625 0xa6, 0xa1,
2626 0x08,
2627 0xc2, 0x76,
2628 0xc2, 0x78,
2629
2630 };
2631
2632 /* expected test results */
2633 static const int32_t results[]={
2634 /* number of bytes read, code point */
2635 1, 0x0001,
2636 2, 0x250c,
2637 1, 0x0000,
2638 2, 0x2500,
2639 1, 0x0008,
2640 2, 0xd60c,
2641 2, 0xd60e,
2642 };
2643
2644 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2645 UErrorCode errorCode=U_ZERO_ERROR;
2646
2647 UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2648 if(U_FAILURE(errorCode)) {
2649 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2650 return;
2651 }
2652 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2653 /* Test the condition when source >= sourceLimit */
2654 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2655 /*Test for the condition where there is an invalid character*/
2656 {
2657 static const uint8_t source2[]={0xa1, 0x80};
2658 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2659 }
2660 /*Test for the condition where we have a truncated char*/
2661 {
2662 static const uint8_t source1[]={0xc4};
2663 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2664 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2665 }
2666 ucnv_close(cnv);
2667
2668 }
2669
2670 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2671 static void
TestICCRunout()2672 TestICCRunout() {
2673 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2674
2675 const char *cnvName = "ibm-1363";
2676 UErrorCode status = U_ZERO_ERROR;
2677 const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2678 /* UChar expectUData[] = { 0x00a1, 0x001a }; */
2679 const char *source = sourceData;
2680 const char *sourceLim = sourceData+sizeof(sourceData);
2681 UChar c1, c2, c3;
2682 UConverter *cnv=ucnv_open(cnvName, &status);
2683 if(U_FAILURE(status)) {
2684 log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
2685 return;
2686 }
2687
2688 #if 0
2689 {
2690 UChar targetBuf[256];
2691 UChar *target = targetBuf;
2692 UChar *targetLim = target+256;
2693 ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, TRUE, &status);
2694
2695 log_info("After convert: target@%d, source@%d, status%s\n",
2696 target-targetBuf, source-sourceData, u_errorName(status));
2697
2698 if(U_FAILURE(status)) {
2699 log_err("Failed to convert: %s\n", u_errorName(status));
2700 } else {
2701
2702 }
2703 }
2704 #endif
2705
2706 c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2707 log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
2708
2709 c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2710 log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
2711
2712 c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2713 log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
2714
2715 if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
2716 log_verbose("OK\n");
2717 } else {
2718 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2719 }
2720
2721 ucnv_close(cnv);
2722
2723 }
2724 #endif
2725
2726 #ifdef U_ENABLE_GENERIC_ISO_2022
2727
2728 static void
TestISO_2022()2729 TestISO_2022() {
2730 /* test input */
2731 static const uint8_t in[]={
2732 0x1b, 0x25, 0x42,
2733 0x31,
2734 0x32,
2735 0x61,
2736 0xc2, 0x80,
2737 0xe0, 0xa0, 0x80,
2738 0xf0, 0x90, 0x80, 0x80
2739 };
2740
2741
2742
2743 /* expected test results */
2744 static const int32_t results[]={
2745 /* number of bytes read, code point */
2746 4, 0x0031, /* 4 bytes including the escape sequence */
2747 1, 0x0032,
2748 1, 0x61,
2749 2, 0x80,
2750 3, 0x800,
2751 4, 0x10000
2752 };
2753
2754 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2755 UErrorCode errorCode=U_ZERO_ERROR;
2756 UConverter *cnv;
2757
2758 cnv=ucnv_open("ISO_2022", &errorCode);
2759 if(U_FAILURE(errorCode)) {
2760 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2761 return;
2762 }
2763 TestNextUChar(cnv, source, limit, results, "ISO_2022");
2764
2765 /* Test the condition when source >= sourceLimit */
2766 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2767 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2768 /*Test for the condition where we have a truncated char*/
2769 {
2770 static const uint8_t source1[]={0xc4};
2771 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2772 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2773 }
2774 /*Test for the condition where there is an invalid character*/
2775 {
2776 static const uint8_t source2[]={0xa1, 0x01};
2777 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2778 }
2779 ucnv_close(cnv);
2780 }
2781
2782 #endif
2783
2784 static void
TestSmallTargetBuffer(const uint16_t * source,const UChar * sourceLimit,UConverter * cnv)2785 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2786 const UChar* uSource;
2787 const UChar* uSourceLimit;
2788 const char* cSource;
2789 const char* cSourceLimit;
2790 UChar *uTargetLimit =NULL;
2791 UChar *uTarget;
2792 char *cTarget;
2793 const char *cTargetLimit;
2794 char *cBuf;
2795 UChar *uBuf; /*,*test;*/
2796 int32_t uBufSize = 120;
2797 int len=0;
2798 int i=2;
2799 UErrorCode errorCode=U_ZERO_ERROR;
2800 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2801 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2802 ucnv_reset(cnv);
2803 for(;--i>0; ){
2804 uSource = (UChar*) source;
2805 uSourceLimit=(const UChar*)sourceLimit;
2806 cTarget = cBuf;
2807 uTarget = uBuf;
2808 cSource = cBuf;
2809 cTargetLimit = cBuf;
2810 uTargetLimit = uBuf;
2811
2812 do{
2813
2814 cTargetLimit = cTargetLimit+ i;
2815 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2816 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2817 errorCode=U_ZERO_ERROR;
2818 continue;
2819 }
2820
2821 if(U_FAILURE(errorCode)){
2822 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2823 return;
2824 }
2825
2826 }while (uSource<uSourceLimit);
2827
2828 cSourceLimit =cTarget;
2829 do{
2830 uTargetLimit=uTargetLimit+i;
2831 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2832 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2833 errorCode=U_ZERO_ERROR;
2834 continue;
2835 }
2836 if(U_FAILURE(errorCode)){
2837 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2838 return;
2839 }
2840 }while(cSource<cSourceLimit);
2841
2842 uSource = source;
2843 /*test =uBuf;*/
2844 for(len=0;len<(int)(source - sourceLimit);len++){
2845 if(uBuf[len]!=uSource[len]){
2846 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2847 }
2848 }
2849 }
2850 free(uBuf);
2851 free(cBuf);
2852 }
2853 /* Test for Jitterbug 778 */
TestToAndFromUChars(const uint16_t * source,const UChar * sourceLimit,UConverter * cnv)2854 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2855 const UChar* uSource;
2856 const UChar* uSourceLimit;
2857 const char* cSource;
2858 UChar *uTargetLimit =NULL;
2859 UChar *uTarget;
2860 char *cTarget;
2861 const char *cTargetLimit;
2862 char *cBuf;
2863 UChar *uBuf,*test;
2864 int32_t uBufSize = 120;
2865 int numCharsInTarget=0;
2866 UErrorCode errorCode=U_ZERO_ERROR;
2867 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2868 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2869 uSource = source;
2870 uSourceLimit=sourceLimit;
2871 cTarget = cBuf;
2872 cTargetLimit = cBuf +uBufSize*5;
2873 uTarget = uBuf;
2874 uTargetLimit = uBuf+ uBufSize*5;
2875 ucnv_reset(cnv);
2876 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2877 if(U_FAILURE(errorCode)){
2878 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2879 return;
2880 }
2881 cSource = cBuf;
2882 test =uBuf;
2883 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2884 if(U_FAILURE(errorCode)){
2885 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2886 return;
2887 }
2888 uSource = source;
2889 while(uSource<uSourceLimit){
2890 if(*test!=*uSource){
2891
2892 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2893 }
2894 uSource++;
2895 test++;
2896 }
2897 free(uBuf);
2898 free(cBuf);
2899 }
2900
TestSmallSourceBuffer(const uint16_t * source,const UChar * sourceLimit,UConverter * cnv)2901 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2902 const UChar* uSource;
2903 const UChar* uSourceLimit;
2904 const char* cSource;
2905 const char* cSourceLimit;
2906 UChar *uTargetLimit =NULL;
2907 UChar *uTarget;
2908 char *cTarget;
2909 const char *cTargetLimit;
2910 char *cBuf;
2911 UChar *uBuf; /*,*test;*/
2912 int32_t uBufSize = 120;
2913 int len=0;
2914 int i=2;
2915 const UChar *temp = sourceLimit;
2916 UErrorCode errorCode=U_ZERO_ERROR;
2917 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2918 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2919
2920 ucnv_reset(cnv);
2921 for(;--i>0;){
2922 uSource = (UChar*) source;
2923 cTarget = cBuf;
2924 uTarget = uBuf;
2925 cSource = cBuf;
2926 cTargetLimit = cBuf;
2927 uTargetLimit = uBuf+uBufSize*5;
2928 cTargetLimit = cTargetLimit+uBufSize*10;
2929 uSourceLimit=uSource;
2930 do{
2931
2932 if (uSourceLimit < sourceLimit) {
2933 uSourceLimit = uSourceLimit+1;
2934 }
2935 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,FALSE, &errorCode);
2936 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2937 errorCode=U_ZERO_ERROR;
2938 continue;
2939 }
2940
2941 if(U_FAILURE(errorCode)){
2942 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2943 return;
2944 }
2945
2946 }while (uSource<temp);
2947
2948 cSourceLimit =cBuf;
2949 do{
2950 if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2951 cSourceLimit = cSourceLimit+1;
2952 }
2953 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,FALSE,&errorCode);
2954 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2955 errorCode=U_ZERO_ERROR;
2956 continue;
2957 }
2958 if(U_FAILURE(errorCode)){
2959 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2960 return;
2961 }
2962 }while(cSource<cTarget);
2963
2964 uSource = source;
2965 /*test =uBuf;*/
2966 for(;len<(int)(source - sourceLimit);len++){
2967 if(uBuf[len]!=uSource[len]){
2968 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2969 }
2970 }
2971 }
2972 free(uBuf);
2973 free(cBuf);
2974 }
2975 static void
TestGetNextUChar2022(UConverter * cnv,const char * source,const char * limit,const uint16_t results[],const char * message)2976 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2977 const uint16_t results[], const char* message){
2978 /* const char* s0; */
2979 const char* s=(char*)source;
2980 const uint16_t *r=results;
2981 UErrorCode errorCode=U_ZERO_ERROR;
2982 uint32_t c,exC;
2983 ucnv_reset(cnv);
2984 while(s<limit) {
2985 /* s0=s; */
2986 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2987 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2988 break; /* no more significant input */
2989 } else if(U_FAILURE(errorCode)) {
2990 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2991 break;
2992 } else {
2993 if(U16_IS_LEAD(*r)){
2994 int i =0, len = 2;
2995 U16_NEXT(r, i, len, exC);
2996 r++;
2997 }else{
2998 exC = *r;
2999 }
3000 if(c!=(uint32_t)(exC))
3001 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c);
3002 }
3003 r++;
3004 }
3005 }
3006
TestJitterbug930(const char * enc)3007 static int TestJitterbug930(const char* enc){
3008 UErrorCode err = U_ZERO_ERROR;
3009 UConverter*converter;
3010 char out[80];
3011 char*target = out;
3012 UChar in[4];
3013 const UChar*source = in;
3014 int32_t off[80];
3015 int32_t* offsets = off;
3016 int numOffWritten=0;
3017 UBool flush = 0;
3018 converter = my_ucnv_open(enc, &err);
3019
3020 in[0] = 0x41; /* 0x4E00;*/
3021 in[1] = 0x4E01;
3022 in[2] = 0x4E02;
3023 in[3] = 0x4E03;
3024
3025 memset(off, '*', sizeof(off));
3026
3027 ucnv_fromUnicode (converter,
3028 &target,
3029 target+2,
3030 &source,
3031 source+3,
3032 offsets,
3033 flush,
3034 &err);
3035
3036 /* writes three bytes into the output buffer: 41 1B 24
3037 * but offsets contains 0 1 1
3038 */
3039 while(*offsets< off[10]){
3040 numOffWritten++;
3041 offsets++;
3042 }
3043 log_verbose("Testing Jitterbug 930 for encoding %s",enc);
3044 if(numOffWritten!= (int)(target-out)){
3045 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
3046 }
3047
3048 err = U_ZERO_ERROR;
3049
3050 memset(off,'*' , sizeof(off));
3051
3052 flush = 1;
3053 offsets=off;
3054 ucnv_fromUnicode (converter,
3055 &target,
3056 target+4,
3057 &source,
3058 source,
3059 offsets,
3060 flush,
3061 &err);
3062 numOffWritten=0;
3063 while(*offsets< off[10]){
3064 numOffWritten++;
3065 if(*offsets!= -1){
3066 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
3067 }
3068 offsets++;
3069 }
3070
3071 /* writes 42 43 7A into output buffer,
3072 * offsets contains -1 -1 -1
3073 */
3074 ucnv_close(converter);
3075 return 0;
3076 }
3077
3078 static void
TestHZ()3079 TestHZ() {
3080 /* test input */
3081 static const uint16_t in[]={
3082 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3083 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3084 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3085 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3086 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3087 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3088 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3089 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3090 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3091 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3092 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3093 0x005A, 0x005B, 0x005C, 0x000A
3094 };
3095 const UChar* uSource;
3096 const UChar* uSourceLimit;
3097 const char* cSource;
3098 const char* cSourceLimit;
3099 UChar *uTargetLimit =NULL;
3100 UChar *uTarget;
3101 char *cTarget;
3102 const char *cTargetLimit;
3103 char *cBuf = NULL;
3104 UChar *uBuf = NULL;
3105 UChar *test;
3106 int32_t uBufSize = 120;
3107 UErrorCode errorCode=U_ZERO_ERROR;
3108 UConverter *cnv = NULL;
3109 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3110 int32_t* myOff= offsets;
3111 cnv=ucnv_open("HZ", &errorCode);
3112 if(U_FAILURE(errorCode)) {
3113 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3114 goto cleanup;
3115 }
3116
3117 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3118 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3119 uSource = (const UChar*)in;
3120 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3121 cTarget = cBuf;
3122 cTargetLimit = cBuf +uBufSize*5;
3123 uTarget = uBuf;
3124 uTargetLimit = uBuf+ uBufSize*5;
3125 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3126 if(U_FAILURE(errorCode)){
3127 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3128 goto cleanup;
3129 }
3130 cSource = cBuf;
3131 cSourceLimit =cTarget;
3132 test =uBuf;
3133 myOff=offsets;
3134 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3135 if(U_FAILURE(errorCode)){
3136 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3137 goto cleanup;
3138 }
3139 uSource = (const UChar*)in;
3140 while(uSource<uSourceLimit){
3141 if(*test!=*uSource){
3142
3143 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3144 }
3145 uSource++;
3146 test++;
3147 }
3148 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3149 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3150 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3151 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3152 TestJitterbug930("csISO2022JP");
3153
3154 cleanup:
3155 ucnv_close(cnv);
3156 free(offsets);
3157 free(uBuf);
3158 free(cBuf);
3159 }
3160
3161 static void
TestISCII()3162 TestISCII(){
3163 /* test input */
3164 static const uint16_t in[]={
3165 /* test full range of Devanagari */
3166 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3167 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3168 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3169 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3170 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3171 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3172 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3173 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3174 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3175 0x096D,0x096E,0x096F,
3176 /* test Soft halant*/
3177 0x0915,0x094d, 0x200D,
3178 /* test explicit halant */
3179 0x0915,0x094d, 0x200c,
3180 /* test double danda */
3181 0x965,
3182 /* test ASCII */
3183 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3184 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3185 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3186 /* tests from Lotus */
3187 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3188 0x0930,0x094D,0x200D,
3189 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3190 0x0915,0x0921,0x002B,0x095F,
3191 /* tamil range */
3192 0x0B86, 0xB87, 0xB88,
3193 /* telugu range */
3194 0x0C05, 0x0C02, 0x0C03,0x0c31,
3195 /* kannada range */
3196 0x0C85, 0xC82, 0x0C83,
3197 /* test Abbr sign and Anudatta */
3198 0x0970, 0x952,
3199 /* 0x0958,
3200 0x0959,
3201 0x095A,
3202 0x095B,
3203 0x095C,
3204 0x095D,
3205 0x095E,
3206 0x095F,*/
3207 0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3208 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3209 0x090C ,
3210 0x0962,
3211 0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3212 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3213 0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3214 0x093D /* Avagraha 0xEA, 0xE9*/,
3215 0x0958,
3216 0x0959,
3217 0x095A,
3218 0x095B,
3219 0x095C,
3220 0x095D,
3221 0x095E,
3222 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3223 };
3224 static const unsigned char byteArr[]={
3225
3226 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3227 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3228 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3229 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3230 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3231 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3232 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3233 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3234 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3235 0xf8,0xf9,0xfa,
3236 /* test soft halant */
3237 0xb3, 0xE8, 0xE9,
3238 /* test explicit halant */
3239 0xb3, 0xE8, 0xE8,
3240 /* test double danda */
3241 0xea, 0xea,
3242 /* test ASCII */
3243 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3244 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3245 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3246 /* test ATR code */
3247
3248 /* tests from Lotus */
3249 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3250 0xEF,0x42,0xCF,0xE8,0xD9,
3251 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3252 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3253 /* tamil range */
3254 0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3255 /* telugu range */
3256 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3257 /* kannada range */
3258 0xEF, 0x48,0xa4, 0xa2, 0xa3,
3259 /* anudatta and abbreviation sign */
3260 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3261
3262
3263 0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3264
3265 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3266
3267 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3268
3269 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3270
3271 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3272
3273 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3274
3275 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3276
3277 0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3278
3279 0xB3, 0xE9, /* Ka + NUKTA */
3280
3281 0xB4, 0xE9, /* Kha + NUKTA */
3282
3283 0xB5, 0xE9, /* Ga + NUKTA */
3284
3285 0xBA, 0xE9,
3286
3287 0xBF, 0xE9,
3288
3289 0xC0, 0xE9,
3290
3291 0xC9, 0xE9,
3292 /* INV halant RA */
3293 0xD9, 0xE8, 0xCF,
3294 0x00, 0x00A0,
3295 /* just consume unhandled codepoints */
3296 0xEF, 0x30,
3297
3298 };
3299 testConvertToU(byteArr,(sizeof(byteArr)),in,UPRV_LENGTHOF(in),"x-iscii-de",NULL,TRUE);
3300 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3301
3302 }
3303
3304 static void
TestISO_2022_JP()3305 TestISO_2022_JP() {
3306 /* test input */
3307 static const uint16_t in[]={
3308 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3309 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3310 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3311 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3312 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3313 0x201D, 0x3014, 0x000D, 0x000A,
3314 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3315 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3316 };
3317 const UChar* uSource;
3318 const UChar* uSourceLimit;
3319 const char* cSource;
3320 const char* cSourceLimit;
3321 UChar *uTargetLimit =NULL;
3322 UChar *uTarget;
3323 char *cTarget;
3324 const char *cTargetLimit;
3325 char *cBuf = NULL;
3326 UChar *uBuf = NULL;
3327 UChar *test;
3328 int32_t uBufSize = 120;
3329 UErrorCode errorCode=U_ZERO_ERROR;
3330 UConverter *cnv = NULL;
3331 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3332 int32_t* myOff= offsets;
3333 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3334 if(U_FAILURE(errorCode)) {
3335 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3336 goto cleanup;
3337 }
3338
3339 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3340 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3341 uSource = (const UChar*)in;
3342 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3343 cTarget = cBuf;
3344 cTargetLimit = cBuf +uBufSize*5;
3345 uTarget = uBuf;
3346 uTargetLimit = uBuf+ uBufSize*5;
3347 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3348 if(U_FAILURE(errorCode)){
3349 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3350 goto cleanup;
3351 }
3352 cSource = cBuf;
3353 cSourceLimit =cTarget;
3354 test =uBuf;
3355 myOff=offsets;
3356 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3357 if(U_FAILURE(errorCode)){
3358 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3359 goto cleanup;
3360 }
3361
3362 uSource = (const UChar*)in;
3363 while(uSource<uSourceLimit){
3364 if(*test!=*uSource){
3365
3366 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3367 }
3368 uSource++;
3369 test++;
3370 }
3371
3372 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3373 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3374 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3375 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3376 TestJitterbug930("csISO2022JP");
3377
3378 cleanup:
3379 ucnv_close(cnv);
3380 free(uBuf);
3381 free(cBuf);
3382 free(offsets);
3383 }
3384
TestConv(const uint16_t in[],int len,const char * conv,const char * lang,char byteArr[],int byteArrLen)3385 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3386 const UChar* uSource;
3387 const UChar* uSourceLimit;
3388 const char* cSource;
3389 const char* cSourceLimit;
3390 UChar *uTargetLimit =NULL;
3391 UChar *uTarget;
3392 char *cTarget;
3393 const char *cTargetLimit;
3394 char *cBuf;
3395 UChar *uBuf,*test;
3396 int32_t uBufSize = 120*10;
3397 UErrorCode errorCode=U_ZERO_ERROR;
3398 UConverter *cnv;
3399 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3400 int32_t* myOff= offsets;
3401 cnv=my_ucnv_open(conv, &errorCode);
3402 if(U_FAILURE(errorCode)) {
3403 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3404 return;
3405 }
3406
3407 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
3408 cBuf =(char*)malloc(uBufSize * sizeof(char));
3409 uSource = (const UChar*)in;
3410 uSourceLimit=uSource+len;
3411 cTarget = cBuf;
3412 cTargetLimit = cBuf +uBufSize;
3413 uTarget = uBuf;
3414 uTargetLimit = uBuf+ uBufSize;
3415 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3416 if(U_FAILURE(errorCode)){
3417 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3418 return;
3419 }
3420 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3421 cSource = cBuf;
3422 cSourceLimit =cTarget;
3423 test =uBuf;
3424 myOff=offsets;
3425 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3426 if(U_FAILURE(errorCode)){
3427 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3428 return;
3429 }
3430
3431 uSource = (const UChar*)in;
3432 while(uSource<uSourceLimit){
3433 if(*test!=*uSource){
3434 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3435 }
3436 uSource++;
3437 test++;
3438 }
3439 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3440 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3441 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3442 if(byteArr && byteArrLen!=0){
3443 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3444 TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3445 {
3446 cSource = byteArr;
3447 cSourceLimit = cSource+byteArrLen;
3448 test=uBuf;
3449 myOff = offsets;
3450 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3451 if(U_FAILURE(errorCode)){
3452 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3453 return;
3454 }
3455
3456 uSource = (const UChar*)in;
3457 while(uSource<uSourceLimit){
3458 if(*test!=*uSource){
3459 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3460 }
3461 uSource++;
3462 test++;
3463 }
3464 }
3465 }
3466
3467 ucnv_close(cnv);
3468 free(uBuf);
3469 free(cBuf);
3470 free(offsets);
3471 }
3472 static UChar U_CALLCONV
_charAt(int32_t offset,void * context)3473 _charAt(int32_t offset, void *context) {
3474 return ((char*)context)[offset];
3475 }
3476
3477 static int32_t
unescape(UChar * dst,int32_t dstLen,const char * src,int32_t srcLen,UErrorCode * status)3478 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3479 int32_t srcIndex=0;
3480 int32_t dstIndex=0;
3481 if(U_FAILURE(*status)){
3482 return 0;
3483 }
3484 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3485 *status = U_ILLEGAL_ARGUMENT_ERROR;
3486 return 0;
3487 }
3488 if(srcLen==-1){
3489 srcLen = (int32_t)uprv_strlen(src);
3490 }
3491
3492 for (; srcIndex<srcLen; ) {
3493 UChar32 c = src[srcIndex++];
3494 if (c == 0x005C /*'\\'*/) {
3495 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3496 if (c == (UChar32)0xFFFFFFFF) {
3497 *status=U_INVALID_CHAR_FOUND; /* return empty string */
3498 break; /* invalid escape sequence */
3499 }
3500 }
3501 if(dstIndex < dstLen){
3502 if(c>0xFFFF){
3503 dst[dstIndex++] = U16_LEAD(c);
3504 if(dstIndex<dstLen){
3505 dst[dstIndex]=U16_TRAIL(c);
3506 }else{
3507 *status=U_BUFFER_OVERFLOW_ERROR;
3508 }
3509 }else{
3510 dst[dstIndex]=(UChar)c;
3511 }
3512
3513 }else{
3514 *status = U_BUFFER_OVERFLOW_ERROR;
3515 }
3516 dstIndex++; /* for preflighting */
3517 }
3518 return dstIndex;
3519 }
3520
3521 static void
TestFullRoundtrip(const char * cp)3522 TestFullRoundtrip(const char* cp){
3523 UChar usource[10] ={0};
3524 UChar nsrc[10] = {0};
3525 uint32_t i=1;
3526 int len=0, ulen;
3527 nsrc[0]=0x0061;
3528 /* Test codepoint 0 */
3529 TestConv(usource,1,cp,"",NULL,0);
3530 TestConv(usource,2,cp,"",NULL,0);
3531 nsrc[2]=0x5555;
3532 TestConv(nsrc,3,cp,"",NULL,0);
3533
3534 for(;i<=0x10FFFF;i++){
3535 if(i==0xD800){
3536 i=0xDFFF;
3537 continue;
3538 }
3539 if(i<=0xFFFF){
3540 usource[0] =(UChar) i;
3541 len=1;
3542 }else{
3543 usource[0]=U16_LEAD(i);
3544 usource[1]=U16_TRAIL(i);
3545 len=2;
3546 }
3547 ulen=len;
3548 if(i==0x80) {
3549 usource[2]=0;
3550 }
3551 /* Test only single code points */
3552 TestConv(usource,ulen,cp,"",NULL,0);
3553 /* Test codepoint repeated twice */
3554 usource[ulen]=usource[0];
3555 usource[ulen+1]=usource[1];
3556 ulen+=len;
3557 TestConv(usource,ulen,cp,"",NULL,0);
3558 /* Test codepoint repeated 3 times */
3559 usource[ulen]=usource[0];
3560 usource[ulen+1]=usource[1];
3561 ulen+=len;
3562 TestConv(usource,ulen,cp,"",NULL,0);
3563 /* Test codepoint in between 2 codepoints */
3564 nsrc[1]=usource[0];
3565 nsrc[2]=usource[1];
3566 nsrc[len+1]=0x5555;
3567 TestConv(nsrc,len+2,cp,"",NULL,0);
3568 uprv_memset(usource,0,sizeof(UChar)*10);
3569 }
3570 }
3571
3572 static void
TestRoundTrippingAllUTF(void)3573 TestRoundTrippingAllUTF(void){
3574 if(!getTestOption(QUICK_OPTION)){
3575 log_verbose("Running exhaustive round trip test for BOCU-1\n");
3576 TestFullRoundtrip("BOCU-1");
3577 log_verbose("Running exhaustive round trip test for SCSU\n");
3578 TestFullRoundtrip("SCSU");
3579 log_verbose("Running exhaustive round trip test for UTF-8\n");
3580 TestFullRoundtrip("UTF-8");
3581 log_verbose("Running exhaustive round trip test for CESU-8\n");
3582 TestFullRoundtrip("CESU-8");
3583 log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3584 TestFullRoundtrip("UTF-16BE");
3585 log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3586 TestFullRoundtrip("UTF-16LE");
3587 log_verbose("Running exhaustive round trip test for UTF-16\n");
3588 TestFullRoundtrip("UTF-16");
3589 log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3590 TestFullRoundtrip("UTF-32BE");
3591 log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3592 TestFullRoundtrip("UTF-32LE");
3593 log_verbose("Running exhaustive round trip test for UTF-32\n");
3594 TestFullRoundtrip("UTF-32");
3595 log_verbose("Running exhaustive round trip test for UTF-7\n");
3596 TestFullRoundtrip("UTF-7");
3597 log_verbose("Running exhaustive round trip test for UTF-7\n");
3598 TestFullRoundtrip("UTF-7,version=1");
3599 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3600 TestFullRoundtrip("IMAP-mailbox-name");
3601 /*
3602 *
3603 * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3604 * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3605 * The old mappings remain as fallbacks.
3606 * This test may be reintroduced at a later time.
3607 *
3608 * 110118 - mow
3609 */
3610 /*
3611 log_verbose("Running exhaustive round trip test for GB18030\n");
3612 TestFullRoundtrip("GB18030");
3613 */
3614 }
3615 }
3616
3617 static void
TestSCSU()3618 TestSCSU() {
3619
3620 static const uint16_t germanUTF16[]={
3621 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3622 };
3623
3624 static const uint8_t germanSCSU[]={
3625 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3626 };
3627
3628 static const uint16_t russianUTF16[]={
3629 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3630 };
3631
3632 static const uint8_t russianSCSU[]={
3633 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3634 };
3635
3636 static const uint16_t japaneseUTF16[]={
3637 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3638 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3639 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3640 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3641 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3642 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3643 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3644 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3645 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3646 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3647 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3648 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3649 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3650 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3651 0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3652 };
3653
3654 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3655 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3656 static const uint8_t japaneseSCSU[]={
3657 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3658 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3659 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3660 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3661 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3662 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3663 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3664 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3665 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3666 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3667 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3668 0xcb, 0x82
3669 };
3670
3671 static const uint16_t allFeaturesUTF16[]={
3672 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3673 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3674 0x01df, 0xf000, 0xdbff, 0xdfff
3675 };
3676
3677 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3678 * result here (34B vs. 35B)
3679 */
3680 static const uint8_t allFeaturesSCSU[]={
3681 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3682 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3683 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3684 0xdf, 0x14, 0x80, 0x15, 0xff
3685 };
3686 static const uint16_t monkeyIn[]={
3687 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3688 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3689 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3690 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3691 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3692 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3693 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3694 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3695 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3696 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3697 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3698 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3699 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3700 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3701 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3702 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3703 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3704 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3705 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3706 /* test non-BMP code points */
3707 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3708 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3709 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3710 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3711 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3712 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3713 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3714 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3715 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3716 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3717 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3718
3719
3720 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3721 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3722 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3723 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3724 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3725 };
3726 static const char *fTestCases [] = {
3727 "\\ud800\\udc00", /* smallest surrogate*/
3728 "\\ud8ff\\udcff",
3729 "\\udBff\\udFff", /* largest surrogate pair*/
3730 "\\ud834\\udc00",
3731 "\\U0010FFFF",
3732 "Hello \\u9292 \\u9192 World!",
3733 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3734 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3735
3736 "\\u0648\\u06c8", /* catch missing reset*/
3737 "\\u0648\\u06c8",
3738
3739 "\\u4444\\uE001", /* lowest quotable*/
3740 "\\u4444\\uf2FF", /* highest quotable*/
3741 "\\u4444\\uf188\\u4444",
3742 "\\u4444\\uf188\\uf288",
3743 "\\u4444\\uf188abc\\u0429\\uf288",
3744 "\\u9292\\u2222",
3745 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3746 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3747 "Hello World!123456",
3748 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3749
3750 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/
3751 "abc\\u4411d", /* uses SQU*/
3752 "abc\\u4411\\u4412d",/* uses SCU*/
3753 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3754 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3755 "\\u9292\\u2222",
3756 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3757 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3758 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3759
3760 "", /* empty input*/
3761 "\\u0000", /* smallest BMP character*/
3762 "\\uFFFF", /* largest BMP character*/
3763
3764 /* regression tests*/
3765 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3766 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3767 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3768 "\\u0041\\u00df\\u0401\\u015f",
3769 "\\u9066\\u2123abc",
3770 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3771 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3772 };
3773 int i=0;
3774 for(;i<UPRV_LENGTHOF(fTestCases);i++){
3775 const char* cSrc = fTestCases[i];
3776 UErrorCode status = U_ZERO_ERROR;
3777 int32_t cSrcLen,srcLen;
3778 UChar* src;
3779 /* UConverter* cnv = ucnv_open("SCSU",&status); */
3780 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3781 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3782 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3783 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3784 TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3785 free(src);
3786 }
3787 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3788 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3789 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3790 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3791 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3792 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3793 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3794 }
3795
3796 #if !UCONFIG_NO_LEGACY_CONVERSION
TestJitterbug2346()3797 static void TestJitterbug2346(){
3798 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3799 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3800 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3801
3802 UChar uTarget[500]={'\0'};
3803 UChar* utarget=uTarget;
3804 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3805
3806 char cTarget[500]={'\0'};
3807 char* ctarget=cTarget;
3808 char* ctargetLimit=cTarget+sizeof(cTarget);
3809 const char* csource=source;
3810 UChar* temp = expected;
3811 UErrorCode err=U_ZERO_ERROR;
3812
3813 UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3814 if(U_FAILURE(err)) {
3815 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3816 return;
3817 }
3818 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,TRUE,&err);
3819 if(U_FAILURE(err)) {
3820 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3821 return;
3822 }
3823 utargetLimit=utarget;
3824 utarget = uTarget;
3825 while(utarget<utargetLimit){
3826 if(*temp!=*utarget){
3827
3828 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3829 }
3830 utarget++;
3831 temp++;
3832 }
3833 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
3834 if(U_FAILURE(err)) {
3835 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3836 return;
3837 }
3838 ctargetLimit=ctarget;
3839 ctarget =cTarget;
3840 ucnv_close(conv);
3841
3842
3843 }
3844
3845 static void
TestISO_2022_JP_1()3846 TestISO_2022_JP_1() {
3847 /* test input */
3848 static const uint16_t in[]={
3849 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3850 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3851 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3852 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3853 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3854 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3855 0x201D, 0x000D, 0x000A,
3856 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3857 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3858 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3859 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3860 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3861 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3862 };
3863 const UChar* uSource;
3864 const UChar* uSourceLimit;
3865 const char* cSource;
3866 const char* cSourceLimit;
3867 UChar *uTargetLimit =NULL;
3868 UChar *uTarget;
3869 char *cTarget;
3870 const char *cTargetLimit;
3871 char *cBuf;
3872 UChar *uBuf,*test;
3873 int32_t uBufSize = 120;
3874 UErrorCode errorCode=U_ZERO_ERROR;
3875 UConverter *cnv;
3876
3877 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3878 if(U_FAILURE(errorCode)) {
3879 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3880 return;
3881 }
3882
3883 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3884 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3885 uSource = (const UChar*)in;
3886 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3887 cTarget = cBuf;
3888 cTargetLimit = cBuf +uBufSize*5;
3889 uTarget = uBuf;
3890 uTargetLimit = uBuf+ uBufSize*5;
3891 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,TRUE, &errorCode);
3892 if(U_FAILURE(errorCode)){
3893 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3894 return;
3895 }
3896 cSource = cBuf;
3897 cSourceLimit =cTarget;
3898 test =uBuf;
3899 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,TRUE,&errorCode);
3900 if(U_FAILURE(errorCode)){
3901 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3902 return;
3903 }
3904 uSource = (const UChar*)in;
3905 while(uSource<uSourceLimit){
3906 if(*test!=*uSource){
3907
3908 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3909 }
3910 uSource++;
3911 test++;
3912 }
3913 /*ucnv_close(cnv);
3914 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3915 /*Test for the condition where there is an invalid character*/
3916 ucnv_reset(cnv);
3917 {
3918 static const uint8_t source2[]={0x0e,0x24,0x053};
3919 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3920 }
3921 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3922 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3923 ucnv_close(cnv);
3924 free(uBuf);
3925 free(cBuf);
3926 }
3927
3928 static void
TestISO_2022_JP_2()3929 TestISO_2022_JP_2() {
3930 /* test input */
3931 static const uint16_t in[]={
3932 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3933 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3934 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3935 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3936 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3937 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3938 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3939 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3940 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3941 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3942 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3943 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3944 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3945 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3946 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3947 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3948 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3949 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3950 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3951 };
3952 const UChar* uSource;
3953 const UChar* uSourceLimit;
3954 const char* cSource;
3955 const char* cSourceLimit;
3956 UChar *uTargetLimit =NULL;
3957 UChar *uTarget;
3958 char *cTarget;
3959 const char *cTargetLimit;
3960 char *cBuf = NULL;
3961 UChar *uBuf = NULL;
3962 UChar *test;
3963 int32_t uBufSize = 120;
3964 UErrorCode errorCode=U_ZERO_ERROR;
3965 UConverter *cnv = NULL;
3966 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3967 int32_t* myOff= offsets;
3968 cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3969 if(U_FAILURE(errorCode)) {
3970 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3971 goto cleanup;
3972 }
3973
3974 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3975 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3976 uSource = (const UChar*)in;
3977 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3978 cTarget = cBuf;
3979 cTargetLimit = cBuf +uBufSize*5;
3980 uTarget = uBuf;
3981 uTargetLimit = uBuf+ uBufSize*5;
3982 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
3983 if(U_FAILURE(errorCode)){
3984 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3985 goto cleanup;
3986 }
3987 cSource = cBuf;
3988 cSourceLimit =cTarget;
3989 test =uBuf;
3990 myOff=offsets;
3991 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
3992 if(U_FAILURE(errorCode)){
3993 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3994 goto cleanup;
3995 }
3996 uSource = (const UChar*)in;
3997 while(uSource<uSourceLimit){
3998 if(*test!=*uSource){
3999
4000 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4001 }
4002 uSource++;
4003 test++;
4004 }
4005 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4006 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4007 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4008 /*Test for the condition where there is an invalid character*/
4009 ucnv_reset(cnv);
4010 {
4011 static const uint8_t source2[]={0x0e,0x24,0x053};
4012 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
4013 }
4014
4015 cleanup:
4016 ucnv_close(cnv);
4017 free(uBuf);
4018 free(cBuf);
4019 free(offsets);
4020 }
4021
4022 static void
TestISO_2022_KR()4023 TestISO_2022_KR() {
4024 /* test input */
4025 static const uint16_t in[]={
4026 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4027 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4028 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4029 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4030 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4031 ,0x53E3,0x53E4,0x000A,0x000D};
4032 const UChar* uSource;
4033 const UChar* uSourceLimit;
4034 const char* cSource;
4035 const char* cSourceLimit;
4036 UChar *uTargetLimit =NULL;
4037 UChar *uTarget;
4038 char *cTarget;
4039 const char *cTargetLimit;
4040 char *cBuf = NULL;
4041 UChar *uBuf = NULL;
4042 UChar *test;
4043 int32_t uBufSize = 120;
4044 UErrorCode errorCode=U_ZERO_ERROR;
4045 UConverter *cnv = NULL;
4046 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4047 int32_t* myOff= offsets;
4048 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
4049 if(U_FAILURE(errorCode)) {
4050 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4051 goto cleanup;
4052 }
4053
4054 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4055 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4056 uSource = (const UChar*)in;
4057 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4058 cTarget = cBuf;
4059 cTargetLimit = cBuf +uBufSize*5;
4060 uTarget = uBuf;
4061 uTargetLimit = uBuf+ uBufSize*5;
4062 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4063 if(U_FAILURE(errorCode)){
4064 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4065 goto cleanup;
4066 }
4067 cSource = cBuf;
4068 cSourceLimit =cTarget;
4069 test =uBuf;
4070 myOff=offsets;
4071 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4072 if(U_FAILURE(errorCode)){
4073 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4074 goto cleanup;
4075 }
4076 uSource = (const UChar*)in;
4077 while(uSource<uSourceLimit){
4078 if(*test!=*uSource){
4079 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4080 }
4081 uSource++;
4082 test++;
4083 }
4084 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4085 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4086 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4087 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4088 TestJitterbug930("csISO2022KR");
4089 /*Test for the condition where there is an invalid character*/
4090 ucnv_reset(cnv);
4091 {
4092 static const uint8_t source2[]={0x1b,0x24,0x053};
4093 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4094 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4095 }
4096
4097 cleanup:
4098 ucnv_close(cnv);
4099 free(uBuf);
4100 free(cBuf);
4101 free(offsets);
4102 }
4103
4104 static void
TestISO_2022_KR_1()4105 TestISO_2022_KR_1() {
4106 /* test input */
4107 static const uint16_t in[]={
4108 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4109 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4110 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4111 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4112 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4113 ,0x53E3,0x53E4,0x000A,0x000D};
4114 const UChar* uSource;
4115 const UChar* uSourceLimit;
4116 const char* cSource;
4117 const char* cSourceLimit;
4118 UChar *uTargetLimit =NULL;
4119 UChar *uTarget;
4120 char *cTarget;
4121 const char *cTargetLimit;
4122 char *cBuf = NULL;
4123 UChar *uBuf = NULL;
4124 UChar *test;
4125 int32_t uBufSize = 120;
4126 UErrorCode errorCode=U_ZERO_ERROR;
4127 UConverter *cnv = NULL;
4128 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4129 int32_t* myOff= offsets;
4130 cnv=ucnv_open("ibm-25546", &errorCode);
4131 if(U_FAILURE(errorCode)) {
4132 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4133 goto cleanup;
4134 }
4135
4136 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4137 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4138 uSource = (const UChar*)in;
4139 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4140 cTarget = cBuf;
4141 cTargetLimit = cBuf +uBufSize*5;
4142 uTarget = uBuf;
4143 uTargetLimit = uBuf+ uBufSize*5;
4144 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4145 if(U_FAILURE(errorCode)){
4146 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4147 goto cleanup;
4148 }
4149 cSource = cBuf;
4150 cSourceLimit =cTarget;
4151 test =uBuf;
4152 myOff=offsets;
4153 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4154 if(U_FAILURE(errorCode)){
4155 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4156 goto cleanup;
4157 }
4158 uSource = (const UChar*)in;
4159 while(uSource<uSourceLimit){
4160 if(*test!=*uSource){
4161 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4162 }
4163 uSource++;
4164 test++;
4165 }
4166 ucnv_reset(cnv);
4167 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4168 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4169 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4170 ucnv_reset(cnv);
4171 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4172 /*Test for the condition where there is an invalid character*/
4173 ucnv_reset(cnv);
4174 {
4175 static const uint8_t source2[]={0x1b,0x24,0x053};
4176 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4177 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4178 }
4179
4180 cleanup:
4181 ucnv_close(cnv);
4182 free(uBuf);
4183 free(cBuf);
4184 free(offsets);
4185 }
4186
TestJitterbug2411()4187 static void TestJitterbug2411(){
4188 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4189 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4190 UConverter* kr=NULL, *kr1=NULL;
4191 UErrorCode errorCode = U_ZERO_ERROR;
4192 UChar tgt[100]={'\0'};
4193 UChar* target = tgt;
4194 UChar* targetLimit = target+100;
4195 kr=ucnv_open("iso-2022-kr", &errorCode);
4196 if(U_FAILURE(errorCode)) {
4197 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4198 return;
4199 }
4200 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4201 if(U_FAILURE(errorCode)) {
4202 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4203 return;
4204 }
4205 kr1 = ucnv_open("ibm-25546", &errorCode);
4206 if(U_FAILURE(errorCode)) {
4207 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4208 return;
4209 }
4210 target = tgt;
4211 targetLimit = target+100;
4212 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,TRUE,&errorCode);
4213
4214 if(U_FAILURE(errorCode)) {
4215 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4216 return;
4217 }
4218
4219 ucnv_close(kr);
4220 ucnv_close(kr1);
4221
4222 }
4223
4224 static void
TestJIS()4225 TestJIS(){
4226 /* From Unicode moved to testdata/conversion.txt */
4227 /*To Unicode*/
4228 {
4229 static const uint8_t sampleTextJIS[] = {
4230 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4231 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4232 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4233 };
4234 static const uint16_t expectedISO2022JIS[] = {
4235 0x0041, 0x0042,
4236 0xFF81, 0xFF82,
4237 0x3000
4238 };
4239 static const int32_t toISO2022JISOffs[]={
4240 3,4,
4241 8,9,
4242 16
4243 };
4244
4245 static const uint8_t sampleTextJIS7[] = {
4246 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4247 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4248 0x1b,0x24,0x42,0x21,0x21,
4249 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */
4250 0x21,0x22,
4251 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4252 };
4253 static const uint16_t expectedISO2022JIS7[] = {
4254 0x0041, 0x0042,
4255 0xFF81, 0xFF82,
4256 0x3000,
4257 0xFF81, 0xFF82,
4258 0x3001,
4259 0x3000
4260 };
4261 static const int32_t toISO2022JIS7Offs[]={
4262 3,4,
4263 8,9,
4264 13,16,
4265 17,
4266 19,27
4267 };
4268 static const uint8_t sampleTextJIS8[] = {
4269 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4270 0xa1,0xc8,0xd9,/*Katakana Set*/
4271 0x1b,0x28,0x42,
4272 0x41,0x42,
4273 0xb1,0xc3, /*Katakana Set*/
4274 0x1b,0x24,0x42,0x21,0x21
4275 };
4276 static const uint16_t expectedISO2022JIS8[] = {
4277 0x0041, 0x0042,
4278 0xff61, 0xff88, 0xff99,
4279 0x0041, 0x0042,
4280 0xff71, 0xff83,
4281 0x3000
4282 };
4283 static const int32_t toISO2022JIS8Offs[]={
4284 3, 4, 5, 6,
4285 7, 11, 12, 13,
4286 14, 18,
4287 };
4288
4289 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4290 UPRV_LENGTHOF(expectedISO2022JIS),"JIS", toISO2022JISOffs,TRUE);
4291 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4292 UPRV_LENGTHOF(expectedISO2022JIS7),"JIS7", toISO2022JIS7Offs,TRUE);
4293 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4294 UPRV_LENGTHOF(expectedISO2022JIS8),"JIS8", toISO2022JIS8Offs,TRUE);
4295 }
4296
4297 }
4298
4299
4300 #if 0
4301 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
4302
4303 static void TestJitterbug915(){
4304 /* tests for roundtripping of the below sequence
4305 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
4306 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4307 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4308 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4309 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4310 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4311 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4312 */
4313 static const char cSource[]={
4314 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4315 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4316 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4317 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4318 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4319 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4320 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4321 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4322 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4323 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4324 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4325 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4326 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4327 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4328 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4329 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4330 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4331 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4332 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4333 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4334 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4335 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4336 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4337 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4338 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4339 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4340 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4341 0x37, 0x20, 0x2A, 0x2F
4342 };
4343 UChar uTarget[500]={'\0'};
4344 UChar* utarget=uTarget;
4345 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4346
4347 char cTarget[500]={'\0'};
4348 char* ctarget=cTarget;
4349 char* ctargetLimit=cTarget+sizeof(cTarget);
4350 const char* csource=cSource;
4351 const char* tempSrc = cSource;
4352 UErrorCode err=U_ZERO_ERROR;
4353
4354 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4355 if(U_FAILURE(err)) {
4356 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4357 return;
4358 }
4359 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,TRUE,&err);
4360 if(U_FAILURE(err)) {
4361 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4362 return;
4363 }
4364 utargetLimit=utarget;
4365 utarget = uTarget;
4366 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,TRUE,&err);
4367 if(U_FAILURE(err)) {
4368 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4369 return;
4370 }
4371 ctargetLimit=ctarget;
4372 ctarget =cTarget;
4373 while(ctarget<ctargetLimit){
4374 if(*ctarget != *tempSrc){
4375 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4376 }
4377 ++ctarget;
4378 ++tempSrc;
4379 }
4380
4381 ucnv_close(conv);
4382 }
4383
4384 static void
4385 TestISO_2022_CN_EXT() {
4386 /* test input */
4387 static const uint16_t in[]={
4388 /* test Non-BMP code points */
4389 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4390 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4391 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4392 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4393 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4394 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4395 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4396 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4397 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4398 0xD869, 0xDED5,
4399
4400 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4401 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4402 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4403 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4404 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4405 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4406 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4407 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4408 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4409 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4410 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4411 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4412 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4413 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4414 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4415 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4416 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4417 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4418
4419 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4420
4421 };
4422
4423 const UChar* uSource;
4424 const UChar* uSourceLimit;
4425 const char* cSource;
4426 const char* cSourceLimit;
4427 UChar *uTargetLimit =NULL;
4428 UChar *uTarget;
4429 char *cTarget;
4430 const char *cTargetLimit;
4431 char *cBuf = NULL;
4432 UChar *uBuf = NULL;
4433 UChar *test;
4434 int32_t uBufSize = 180;
4435 UErrorCode errorCode=U_ZERO_ERROR;
4436 UConverter *cnv = NULL;
4437 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4438 int32_t* myOff= offsets;
4439 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4440 if(U_FAILURE(errorCode)) {
4441 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4442 goto cleanup;
4443 }
4444
4445 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4446 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4447 uSource = (const UChar*)in;
4448 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4449 cTarget = cBuf;
4450 cTargetLimit = cBuf +uBufSize*5;
4451 uTarget = uBuf;
4452 uTargetLimit = uBuf+ uBufSize*5;
4453 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4454 if(U_FAILURE(errorCode)){
4455 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4456 goto cleanup;
4457 }
4458 cSource = cBuf;
4459 cSourceLimit =cTarget;
4460 test =uBuf;
4461 myOff=offsets;
4462 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4463 if(U_FAILURE(errorCode)){
4464 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4465 goto cleanup;
4466 }
4467 uSource = (const UChar*)in;
4468 while(uSource<uSourceLimit){
4469 if(*test!=*uSource){
4470 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4471 }
4472 else{
4473 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4474 }
4475 uSource++;
4476 test++;
4477 }
4478 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4479 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4480 /*Test for the condition where there is an invalid character*/
4481 ucnv_reset(cnv);
4482 {
4483 static const uint8_t source2[]={0x0e,0x24,0x053};
4484 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4485 }
4486
4487 cleanup:
4488 ucnv_close(cnv);
4489 free(uBuf);
4490 free(cBuf);
4491 free(offsets);
4492 }
4493 #endif
4494
4495 static void
TestISO_2022_CN()4496 TestISO_2022_CN() {
4497 /* test input */
4498 static const uint16_t in[]={
4499 /* jitterbug 951 */
4500 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4501 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4502 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4503 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4504 0x0020, 0x0045, 0x004e, 0x0044,
4505 /**/
4506 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4507 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4508 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4509 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4510 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4511 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4512 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4513 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4514 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4515 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4516 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4517 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4518 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4519 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4520 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4521 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4522 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4523
4524 };
4525 const UChar* uSource;
4526 const UChar* uSourceLimit;
4527 const char* cSource;
4528 const char* cSourceLimit;
4529 UChar *uTargetLimit =NULL;
4530 UChar *uTarget;
4531 char *cTarget;
4532 const char *cTargetLimit;
4533 char *cBuf = NULL;
4534 UChar *uBuf = NULL;
4535 UChar *test;
4536 int32_t uBufSize = 180;
4537 UErrorCode errorCode=U_ZERO_ERROR;
4538 UConverter *cnv = NULL;
4539 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4540 int32_t* myOff= offsets;
4541 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4542 if(U_FAILURE(errorCode)) {
4543 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4544 goto cleanup;
4545 }
4546
4547 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4548 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4549 uSource = (const UChar*)in;
4550 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4551 cTarget = cBuf;
4552 cTargetLimit = cBuf +uBufSize*5;
4553 uTarget = uBuf;
4554 uTargetLimit = uBuf+ uBufSize*5;
4555 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,TRUE, &errorCode);
4556 if(U_FAILURE(errorCode)){
4557 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4558 goto cleanup;
4559 }
4560 cSource = cBuf;
4561 cSourceLimit =cTarget;
4562 test =uBuf;
4563 myOff=offsets;
4564 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,TRUE,&errorCode);
4565 if(U_FAILURE(errorCode)){
4566 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4567 goto cleanup;
4568 }
4569 uSource = (const UChar*)in;
4570 while(uSource<uSourceLimit){
4571 if(*test!=*uSource){
4572 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4573 }
4574 else{
4575 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4576 }
4577 uSource++;
4578 test++;
4579 }
4580 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4581 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4582 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4583 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4584 TestJitterbug930("csISO2022CN");
4585 /*Test for the condition where there is an invalid character*/
4586 ucnv_reset(cnv);
4587 {
4588 static const uint8_t source2[]={0x0e,0x24,0x053};
4589 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4590 }
4591
4592 cleanup:
4593 ucnv_close(cnv);
4594 free(uBuf);
4595 free(cBuf);
4596 free(offsets);
4597 }
4598
4599 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4600 typedef struct {
4601 const char * converterName;
4602 const char * inputText;
4603 int inputTextLength;
4604 } EmptySegmentTest;
4605
4606 /* Callback for TestJitterbug6175, should only get called for empty segment errors */
UCNV_TO_U_CALLBACK_EMPTYSEGMENT(const void * context,UConverterToUnicodeArgs * toArgs,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * err)4607 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4608 int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
4609 // suppress compiler warnings about unused variables
4610 (void)context;
4611 (void)codeUnits;
4612 (void)length;
4613 if (reason > UCNV_IRREGULAR) {
4614 return;
4615 }
4616 if (reason != UCNV_IRREGULAR) {
4617 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4618 }
4619 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4620 *err = U_ZERO_ERROR;
4621 ucnv_cbToUWriteSub(toArgs,0,err);
4622 }
4623
4624 enum { kEmptySegmentToUCharsMax = 64 };
TestJitterbug6175(void)4625 static void TestJitterbug6175(void) {
4626 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4627 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4628 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4629 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4630 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4631 static const EmptySegmentTest emptySegmentTests[] = {
4632 /* converterName inputText inputTextLength */
4633 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4634 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4635 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4636 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4637 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) },
4638 /* terminator: */
4639 { NULL, NULL, 0, }
4640 };
4641 const EmptySegmentTest * testPtr;
4642 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4643 UErrorCode err = U_ZERO_ERROR;
4644 UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4645 if (U_FAILURE(err)) {
4646 log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4647 return;
4648 }
4649 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4650 if (U_FAILURE(err)) {
4651 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4652 ucnv_close(cnv);
4653 return;
4654 }
4655 {
4656 UChar toUChars[kEmptySegmentToUCharsMax];
4657 UChar * toUCharsPtr = toUChars;
4658 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4659 const char * inCharsPtr = testPtr->inputText;
4660 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4661 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, TRUE, &err);
4662 }
4663 ucnv_close(cnv);
4664 }
4665 }
4666
4667 static void
TestEBCDIC_STATEFUL()4668 TestEBCDIC_STATEFUL() {
4669 /* test input */
4670 static const uint8_t in[]={
4671 0x61,
4672 0x1a,
4673 0x0f, 0x4b,
4674 0x42,
4675 0x40,
4676 0x36,
4677 };
4678
4679 /* expected test results */
4680 static const int32_t results[]={
4681 /* number of bytes read, code point */
4682 1, 0x002f,
4683 1, 0x0092,
4684 2, 0x002e,
4685 1, 0xff62,
4686 1, 0x0020,
4687 1, 0x0096,
4688
4689 };
4690 static const uint8_t in2[]={
4691 0x0f,
4692 0xa1,
4693 0x01
4694 };
4695
4696 /* expected test results */
4697 static const int32_t results2[]={
4698 /* number of bytes read, code point */
4699 2, 0x203E,
4700 1, 0x0001,
4701 };
4702
4703 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4704 UErrorCode errorCode=U_ZERO_ERROR;
4705 UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4706 if(U_FAILURE(errorCode)) {
4707 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4708 return;
4709 }
4710 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4711 ucnv_reset(cnv);
4712 /* Test the condition when source >= sourceLimit */
4713 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4714 ucnv_reset(cnv);
4715 /*Test for the condition where source > sourcelimit after consuming the shift chracter */
4716 {
4717 static const uint8_t source1[]={0x0f};
4718 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4719 }
4720 /*Test for the condition where there is an invalid character*/
4721 ucnv_reset(cnv);
4722 {
4723 static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4724 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4725 }
4726 ucnv_reset(cnv);
4727 source=(const char*)in2;
4728 limit=(const char*)in2+sizeof(in2);
4729 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4730 ucnv_close(cnv);
4731
4732 }
4733
4734 static void
TestGB18030()4735 TestGB18030() {
4736 /* test input */
4737 static const uint8_t in[]={
4738 0x24,
4739 0x7f,
4740 0x81, 0x30, 0x81, 0x30,
4741 0xa8, 0xbf,
4742 0xa2, 0xe3,
4743 0xd2, 0xbb,
4744 0x82, 0x35, 0x8f, 0x33,
4745 0x84, 0x31, 0xa4, 0x39,
4746 0x90, 0x30, 0x81, 0x30,
4747 0xe3, 0x32, 0x9a, 0x35
4748 #if 0
4749 /*
4750 * Feature removed markus 2000-oct-26
4751 * Only some codepages must match surrogate pairs into supplementary code points -
4752 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4753 * GB 18030 provides direct encodings for supplementary code points, therefore
4754 * it must not combine two single-encoded surrogates into one code point.
4755 */
4756 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4757 #endif
4758 };
4759
4760 /* expected test results */
4761 static const int32_t results[]={
4762 /* number of bytes read, code point */
4763 1, 0x24,
4764 1, 0x7f,
4765 4, 0x80,
4766 2, 0x1f9,
4767 2, 0x20ac,
4768 2, 0x4e00,
4769 4, 0x9fa6,
4770 4, 0xffff,
4771 4, 0x10000,
4772 4, 0x10ffff
4773 #if 0
4774 /* Feature removed. See comment above. */
4775 8, 0x10000
4776 #endif
4777 };
4778
4779 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4780 UErrorCode errorCode=U_ZERO_ERROR;
4781 UConverter *cnv=ucnv_open("gb18030", &errorCode);
4782 if(U_FAILURE(errorCode)) {
4783 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4784 return;
4785 }
4786 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4787 ucnv_close(cnv);
4788 }
4789
4790 static void
TestLMBCS()4791 TestLMBCS() {
4792 /* LMBCS-1 string */
4793 static const uint8_t pszLMBCS[]={
4794 0x61,
4795 0x01, 0x29,
4796 0x81,
4797 0xA0,
4798 0x0F, 0x27,
4799 0x0F, 0x91,
4800 0x14, 0x0a, 0x74,
4801 0x14, 0xF6, 0x02,
4802 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4803 0x10, 0x88, 0xA0,
4804 };
4805
4806 /* Unicode UChar32 equivalents */
4807 static const UChar32 pszUnicode32[]={
4808 /* code point */
4809 0x00000061,
4810 0x00002013,
4811 0x000000FC,
4812 0x000000E1,
4813 0x00000007,
4814 0x00000091,
4815 0x00000a74,
4816 0x00000200,
4817 0x00023456, /* code point for surrogate pair */
4818 0x00005516
4819 };
4820
4821 /* Unicode UChar equivalents */
4822 static const UChar pszUnicode[]={
4823 /* code point */
4824 0x0061,
4825 0x2013,
4826 0x00FC,
4827 0x00E1,
4828 0x0007,
4829 0x0091,
4830 0x0a74,
4831 0x0200,
4832 0xD84D, /* low surrogate */
4833 0xDC56, /* high surrogate */
4834 0x5516
4835 };
4836
4837 /* expected test results */
4838 static const int offsets32[]={
4839 /* number of bytes read, code point */
4840 0,
4841 1,
4842 3,
4843 4,
4844 5,
4845 7,
4846 9,
4847 12,
4848 15,
4849 21,
4850 24
4851 };
4852
4853 /* expected test results */
4854 static const int offsets[]={
4855 /* number of bytes read, code point */
4856 0,
4857 1,
4858 3,
4859 4,
4860 5,
4861 7,
4862 9,
4863 12,
4864 15,
4865 18,
4866 21,
4867 24
4868 };
4869
4870
4871 UConverter *cnv;
4872
4873 #define NAME_LMBCS_1 "LMBCS-1"
4874 #define NAME_LMBCS_2 "LMBCS-2"
4875
4876
4877 /* Some basic open/close/property tests on some LMBCS converters */
4878 {
4879
4880 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */
4881 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/
4882 char get_subchars [1];
4883 const char * get_name;
4884 UConverter *cnv1;
4885 UConverter *cnv2;
4886
4887 int8_t len = sizeof(get_subchars);
4888
4889 UErrorCode errorCode=U_ZERO_ERROR;
4890
4891 /* Open */
4892 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4893 if(U_FAILURE(errorCode)) {
4894 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4895 return;
4896 }
4897 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4898 if(U_FAILURE(errorCode)) {
4899 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4900 return;
4901 }
4902
4903 /* Name */
4904 get_name = ucnv_getName (cnv1, &errorCode);
4905 if (strcmp(NAME_LMBCS_1,get_name)){
4906 log_err("Unexpected converter name: %s\n", get_name);
4907 }
4908 get_name = ucnv_getName (cnv2, &errorCode);
4909 if (strcmp(NAME_LMBCS_2,get_name)){
4910 log_err("Unexpected converter name: %s\n", get_name);
4911 }
4912
4913 /* substitution chars */
4914 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4915 if(U_FAILURE(errorCode)) {
4916 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4917 }
4918 if (len!=1){
4919 log_err("Unexpected length of sub chars\n");
4920 }
4921 if (get_subchars[0] != expected_subchars[0]){
4922 log_err("Unexpected value of sub chars\n");
4923 }
4924 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4925 if(U_FAILURE(errorCode)) {
4926 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4927 }
4928 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4929 if(U_FAILURE(errorCode)) {
4930 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4931 }
4932 if (len!=1){
4933 log_err("Unexpected length of sub chars\n");
4934 }
4935 if (get_subchars[0] != new_subchars[0]){
4936 log_err("Unexpected value of sub chars\n");
4937 }
4938 ucnv_close(cnv1);
4939 ucnv_close(cnv2);
4940
4941 }
4942
4943 /* LMBCS to Unicode - offsets */
4944 {
4945 UErrorCode errorCode=U_ZERO_ERROR;
4946
4947 const char * pSource = (const char *)pszLMBCS;
4948 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4949
4950 UChar Out [sizeof(pszUnicode) + 1];
4951 UChar * pOut = Out;
4952 UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
4953
4954 int32_t off [sizeof(offsets)];
4955
4956 /* last 'offset' in expected results is just the final size.
4957 (Makes other tests easier). Compensate here: */
4958
4959 off[UPRV_LENGTHOF(offsets)-1] = sizeof(pszLMBCS);
4960
4961
4962
4963 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4964 if(U_FAILURE(errorCode)) {
4965 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4966 return;
4967 }
4968
4969
4970
4971 ucnv_toUnicode (cnv,
4972 &pOut,
4973 OutLimit,
4974 &pSource,
4975 sourceLimit,
4976 off,
4977 TRUE,
4978 &errorCode);
4979
4980
4981 if (memcmp(off,offsets,sizeof(offsets)))
4982 {
4983 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4984 }
4985 if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4986 {
4987 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4988 }
4989 ucnv_close(cnv);
4990 }
4991 {
4992 /* LMBCS to Unicode - getNextUChar */
4993 const char * sourceStart;
4994 const char *source=(const char *)pszLMBCS;
4995 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4996 const UChar32 *results= pszUnicode32;
4997 const int *off = offsets32;
4998
4999 UErrorCode errorCode=U_ZERO_ERROR;
5000 UChar32 uniChar;
5001
5002 cnv=ucnv_open("LMBCS-1", &errorCode);
5003 if(U_FAILURE(errorCode)) {
5004 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5005 return;
5006 }
5007 else
5008 {
5009
5010 while(source<limit) {
5011 sourceStart=source;
5012 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
5013 if(U_FAILURE(errorCode)) {
5014 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
5015 break;
5016 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
5017 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
5018 uniChar, (source-sourceStart), *results, *off);
5019 break;
5020 }
5021 results++;
5022 off++;
5023 }
5024 }
5025 ucnv_close(cnv);
5026 }
5027 { /* test locale & optimization group operations: Unicode to LMBCS */
5028
5029 UErrorCode errorCode=U_ZERO_ERROR;
5030 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
5031 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
5032 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
5033 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
5034 const UChar * pUniOut = uniString;
5035 UChar * pUniIn = uniString;
5036 uint8_t lmbcsString [4];
5037 const char * pLMBCSOut = (const char *)lmbcsString;
5038 char * pLMBCSIn = (char *)lmbcsString;
5039
5040 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5041 ucnv_fromUnicode (cnv16he,
5042 &pLMBCSIn, (pLMBCSIn + UPRV_LENGTHOF(lmbcsString)),
5043 &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5044 NULL, 1, &errorCode);
5045
5046 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
5047 {
5048 log_err("LMBCS-16,locale=he gives unexpected translation\n");
5049 }
5050
5051 pLMBCSIn= (char *)lmbcsString;
5052 pUniOut = uniString;
5053 ucnv_fromUnicode (cnv01us,
5054 &pLMBCSIn, (const char *)(lmbcsString + UPRV_LENGTHOF(lmbcsString)),
5055 &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5056 NULL, 1, &errorCode);
5057
5058 if (lmbcsString[0] != 0x9F)
5059 {
5060 log_err("LMBCS-1,locale=US gives unexpected translation\n");
5061 }
5062
5063 /* single byte char from mbcs char set */
5064 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */
5065 pLMBCSOut = (const char *)lmbcsString;
5066 pUniIn = uniString;
5067 ucnv_toUnicode (cnv16jp,
5068 &pUniIn, pUniIn + 1,
5069 &pLMBCSOut, (pLMBCSOut + 1),
5070 NULL, 1, &errorCode);
5071 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5072 {
5073 log_err("Unexpected results from LMBCS-16 single byte char\n");
5074 }
5075 /* convert to group 1: should be 3 bytes */
5076 pLMBCSIn = (char *)lmbcsString;
5077 pUniOut = uniString;
5078 ucnv_fromUnicode (cnv01us,
5079 &pLMBCSIn, (const char *)(pLMBCSIn + 3),
5080 &pUniOut, pUniOut + 1,
5081 NULL, 1, &errorCode);
5082 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
5083 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
5084 {
5085 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5086 }
5087 pLMBCSOut = (const char *)lmbcsString;
5088 pUniIn = uniString;
5089 ucnv_toUnicode (cnv01us,
5090 &pUniIn, pUniIn + 1,
5091 &pLMBCSOut, (const char *)(pLMBCSOut + 3),
5092 NULL, 1, &errorCode);
5093 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5094 {
5095 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5096 }
5097 pLMBCSIn = (char *)lmbcsString;
5098 pUniOut = uniString;
5099 ucnv_fromUnicode (cnv16jp,
5100 &pLMBCSIn, (const char *)(pLMBCSIn + 1),
5101 &pUniOut, pUniOut + 1,
5102 NULL, 1, &errorCode);
5103 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
5104 {
5105 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5106 }
5107 ucnv_close(cnv16he);
5108 ucnv_close(cnv16jp);
5109 ucnv_close(cnv01us);
5110 }
5111 {
5112 /* Small source buffer testing, LMBCS -> Unicode */
5113
5114 UErrorCode errorCode=U_ZERO_ERROR;
5115
5116 const char * pSource = (const char *)pszLMBCS;
5117 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
5118 int codepointCount = 0;
5119
5120 UChar Out [sizeof(pszUnicode) + 1];
5121 UChar * pOut = Out;
5122 UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
5123
5124
5125 cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
5126 if(U_FAILURE(errorCode)) {
5127 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5128 return;
5129 }
5130
5131
5132 while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
5133 {
5134 ucnv_toUnicode (cnv,
5135 &pOut,
5136 OutLimit,
5137 &pSource,
5138 (pSource+1), /* claim that this is a 1- byte buffer */
5139 NULL,
5140 FALSE, /* FALSE means there might be more chars in the next buffer */
5141 &errorCode);
5142
5143 if (U_SUCCESS (errorCode))
5144 {
5145 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
5146 {
5147 /* we are on to the next code point: check value */
5148
5149 if (Out[0] != pszUnicode[codepointCount]){
5150 log_err("LMBCS->Uni result %lx should have been %lx \n",
5151 Out[0], pszUnicode[codepointCount]);
5152 }
5153
5154 pOut = Out; /* reset for accumulating next code point */
5155 codepointCount++;
5156 }
5157 }
5158 else
5159 {
5160 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5161 }
5162 }
5163 {
5164 /* limits & surrogate error testing */
5165 char LIn [sizeof(pszLMBCS)];
5166 const char * pLIn = LIn;
5167
5168 char LOut [sizeof(pszLMBCS)];
5169 char * pLOut = LOut;
5170
5171 UChar UOut [sizeof(pszUnicode)];
5172 UChar * pUOut = UOut;
5173
5174 UChar UIn [sizeof(pszUnicode)];
5175 const UChar * pUIn = UIn;
5176
5177 int32_t off [sizeof(offsets)];
5178 UChar32 uniChar;
5179
5180 errorCode=U_ZERO_ERROR;
5181
5182 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5183 pUIn++;
5184 ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, FALSE, &errorCode);
5185 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5186 {
5187 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5188 }
5189 pUIn--;
5190
5191 errorCode=U_ZERO_ERROR;
5192 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,FALSE, &errorCode);
5193 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5194 {
5195 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5196 }
5197 errorCode=U_ZERO_ERROR;
5198
5199 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5200 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5201 {
5202 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5203 }
5204 errorCode=U_ZERO_ERROR;
5205
5206 /* 0 byte source request - no error, no pointer movement */
5207 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,FALSE, &errorCode);
5208 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,FALSE, &errorCode);
5209 if(U_FAILURE(errorCode)) {
5210 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5211 }
5212 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5213 {
5214 log_err("Unexpected pointer move in 0 byte source request \n");
5215 }
5216 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5217 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5218 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5219 {
5220 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5221 }
5222 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5223 {
5224 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5225 }
5226 errorCode = U_ZERO_ERROR;
5227
5228 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5229
5230 pUIn = pszUnicode;
5231 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+UPRV_LENGTHOF(pszUnicode),off,FALSE, &errorCode);
5232 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5233 {
5234 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5235 }
5236
5237 errorCode = U_ZERO_ERROR;
5238
5239 pLIn = (const char *)pszLMBCS;
5240 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,FALSE, &errorCode);
5241 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5242 {
5243 log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5244 }
5245
5246 /* unpaired or chopped LMBCS surrogates */
5247
5248 /* OK high surrogate, Low surrogate is chopped */
5249 LIn [0] = (char)0x14;
5250 LIn [1] = (char)0xD8;
5251 LIn [2] = (char)0x01;
5252 LIn [3] = (char)0x14;
5253 LIn [4] = (char)0xDC;
5254 pLIn = LIn;
5255 errorCode = U_ZERO_ERROR;
5256 pUOut = UOut;
5257
5258 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5259 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5260 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5261 {
5262 log_err("Unexpected results on chopped low surrogate\n");
5263 }
5264
5265 /* chopped at surrogate boundary */
5266 LIn [0] = (char)0x14;
5267 LIn [1] = (char)0xD8;
5268 LIn [2] = (char)0x01;
5269 pLIn = LIn;
5270 errorCode = U_ZERO_ERROR;
5271 pUOut = UOut;
5272
5273 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+3),off,TRUE, &errorCode);
5274 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5275 {
5276 log_err("Unexpected results on chopped at surrogate boundary \n");
5277 }
5278
5279 /* unpaired surrogate plus valid Unichar */
5280 LIn [0] = (char)0x14;
5281 LIn [1] = (char)0xD8;
5282 LIn [2] = (char)0x01;
5283 LIn [3] = (char)0x14;
5284 LIn [4] = (char)0xC9;
5285 LIn [5] = (char)0xD0;
5286 pLIn = LIn;
5287 errorCode = U_ZERO_ERROR;
5288 pUOut = UOut;
5289
5290 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+6),off,TRUE, &errorCode);
5291 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5292 {
5293 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5294 }
5295
5296 /* unpaired surrogate plus chopped Unichar */
5297 LIn [0] = (char)0x14;
5298 LIn [1] = (char)0xD8;
5299 LIn [2] = (char)0x01;
5300 LIn [3] = (char)0x14;
5301 LIn [4] = (char)0xC9;
5302
5303 pLIn = LIn;
5304 errorCode = U_ZERO_ERROR;
5305 pUOut = UOut;
5306
5307 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5308 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5309 {
5310 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5311 }
5312
5313 /* unpaired surrogate plus valid non-Unichar */
5314 LIn [0] = (char)0x14;
5315 LIn [1] = (char)0xD8;
5316 LIn [2] = (char)0x01;
5317 LIn [3] = (char)0x0F;
5318 LIn [4] = (char)0x3B;
5319
5320 pLIn = LIn;
5321 errorCode = U_ZERO_ERROR;
5322 pUOut = UOut;
5323
5324 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,TRUE, &errorCode);
5325 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5326 {
5327 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5328 }
5329
5330 /* unpaired surrogate plus chopped non-Unichar */
5331 LIn [0] = (char)0x14;
5332 LIn [1] = (char)0xD8;
5333 LIn [2] = (char)0x01;
5334 LIn [3] = (char)0x0F;
5335
5336 pLIn = LIn;
5337 errorCode = U_ZERO_ERROR;
5338 pUOut = UOut;
5339
5340 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+4),off,TRUE, &errorCode);
5341
5342 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5343 {
5344 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5345 }
5346 }
5347 }
5348 ucnv_close(cnv); /* final cleanup */
5349 }
5350
5351
TestJitterbug255()5352 static void TestJitterbug255()
5353 {
5354 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5355 const char *testBuffer = (const char *)testBytes;
5356 const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5357 UErrorCode status = U_ZERO_ERROR;
5358 /*UChar32 result;*/
5359 UConverter *cnv = 0;
5360
5361 cnv = ucnv_open("shift-jis", &status);
5362 if (U_FAILURE(status) || cnv == 0) {
5363 log_data_err("Failed to open the converter for SJIS.\n");
5364 return;
5365 }
5366 while (testBuffer != testEnd)
5367 {
5368 /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5369 if (U_FAILURE(status))
5370 {
5371 log_err("Failed to convert the next UChar for SJIS.\n");
5372 break;
5373 }
5374 }
5375 ucnv_close(cnv);
5376 }
5377
TestEBCDICUS4XML()5378 static void TestEBCDICUS4XML()
5379 {
5380 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5381 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5382 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5383 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5384 char target_x[] = {0x00, 0x00, 0x00, 0x00};
5385 UChar *unicodes = unicodes_x;
5386 const UChar *toUnicodeMaps = toUnicodeMaps_x;
5387 char *target = target_x;
5388 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5389 UErrorCode status = U_ZERO_ERROR;
5390 UConverter *cnv = 0;
5391
5392 cnv = ucnv_open("ebcdic-xml-us", &status);
5393 if (U_FAILURE(status) || cnv == 0) {
5394 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5395 return;
5396 }
5397 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, TRUE, &status);
5398 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5399 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5400 u_errorName(status));
5401 printUSeqErr(unicodes_x, 3);
5402 printUSeqErr(toUnicodeMaps, 3);
5403 }
5404 status = U_ZERO_ERROR;
5405 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, TRUE, &status);
5406 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5407 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5408 u_errorName(status));
5409 printSeqErr((const unsigned char*)target_x, 3);
5410 printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5411 }
5412 ucnv_close(cnv);
5413 }
5414 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5415
5416 #if !UCONFIG_NO_COLLATION
5417
TestJitterbug981()5418 static void TestJitterbug981(){
5419 const UChar* rules;
5420 int32_t rules_length, target_cap, bytes_needed, buff_size;
5421 UErrorCode status = U_ZERO_ERROR;
5422 UConverter *utf8cnv;
5423 UCollator* myCollator;
5424 char *buff;
5425 int numNeeded=0;
5426 utf8cnv = ucnv_open ("utf8", &status);
5427 if(U_FAILURE(status)){
5428 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
5429 return;
5430 }
5431 myCollator = ucol_open("zh", &status);
5432 if(U_FAILURE(status)){
5433 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
5434 ucnv_close(utf8cnv);
5435 return;
5436 }
5437
5438 rules = ucol_getRules(myCollator, &rules_length);
5439 if(rules_length == 0) {
5440 log_data_err("missing zh tailoring rule string\n");
5441 ucol_close(myCollator);
5442 ucnv_close(utf8cnv);
5443 return;
5444 }
5445 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5446 buff = malloc(buff_size);
5447
5448 target_cap = 0;
5449 do {
5450 ucnv_reset(utf8cnv);
5451 status = U_ZERO_ERROR;
5452 if(target_cap >= buff_size) {
5453 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5454 break;
5455 }
5456 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5457 rules, rules_length, &status);
5458 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5459 if(numNeeded!=0 && numNeeded!= bytes_needed){
5460 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5461 break;
5462 }
5463 numNeeded = bytes_needed;
5464 } while (status == U_BUFFER_OVERFLOW_ERROR);
5465 ucol_close(myCollator);
5466 ucnv_close(utf8cnv);
5467 free(buff);
5468 }
5469
5470 #endif
5471
5472 #if !UCONFIG_NO_LEGACY_CONVERSION
TestJitterbug1293()5473 static void TestJitterbug1293(){
5474 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5475 char target[256];
5476 UErrorCode status = U_ZERO_ERROR;
5477 UConverter* conv=NULL;
5478 int32_t target_cap, bytes_needed, numNeeded = 0;
5479 conv = ucnv_open("shift-jis",&status);
5480 if(U_FAILURE(status)){
5481 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5482 return;
5483 }
5484
5485 do{
5486 target_cap =0;
5487 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5488 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5489 if(numNeeded!=0 && numNeeded!= bytes_needed){
5490 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5491 }
5492 numNeeded = bytes_needed;
5493 } while (status == U_BUFFER_OVERFLOW_ERROR);
5494 if(U_FAILURE(status)){
5495 log_err("An error occured in ucnv_fromUChars. Error: %s", u_errorName(status));
5496 return;
5497 }
5498 ucnv_close(conv);
5499 }
5500 #endif
5501
TestJB5275_1()5502 static void TestJB5275_1(){
5503
5504 static const char* data = "\x3B\xB3\x0A" /* Easy characters */
5505 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5506 /* Switch script: */
5507 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5508 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5509 "\xEF\x40\x3B\xB3\x0A";
5510 static const UChar expected[] ={
5511 0x003b, 0x0a15, 0x000a, /* Easy characters */
5512 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5513 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5514 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5515 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5516 };
5517
5518 UErrorCode status = U_ZERO_ERROR;
5519 UConverter* conv = ucnv_open("iscii-gur", &status);
5520 UChar dest[100] = {'\0'};
5521 UChar* target = dest;
5522 UChar* targetLimit = dest+100;
5523 const char* source = data;
5524 const char* sourceLimit = data+strlen(data);
5525 const UChar* exp = expected;
5526
5527 if (U_FAILURE(status)) {
5528 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
5529 return;
5530 }
5531
5532 log_verbose("Testing switching back to default script when new line is encountered.\n");
5533 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5534 if(U_FAILURE(status)){
5535 log_err("conversion failed: %s \n", u_errorName(status));
5536 }
5537 targetLimit = target;
5538 target = dest;
5539 printUSeq(target, (int)(targetLimit-target));
5540 while(target<targetLimit){
5541 if(*exp!=*target){
5542 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5543 }
5544 target++;
5545 exp++;
5546 }
5547 ucnv_close(conv);
5548 }
5549
TestJB5275()5550 static void TestJB5275(){
5551 static const char* data =
5552 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */
5553 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */
5554 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */
5555 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5556 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */
5557 "\xEF\x48\x38\xB3\x0A" /* Kannada test */
5558 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */
5559 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */
5560 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */
5561 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */;
5562 static const UChar expected[] ={
5563 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5564 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */
5565 0x0038, 0x0C95, 0x000A, /* Kannada test */
5566 0x0039, 0x0D15, 0x000A, /* Malayalam test */
5567 0x003A, 0x0A95, 0x000A, /* Gujarati test */
5568 0x003B, 0x0A15, 0x000A, /* Punjabi test */
5569 };
5570
5571 UErrorCode status = U_ZERO_ERROR;
5572 UConverter* conv = ucnv_open("iscii", &status);
5573 UChar dest[100] = {'\0'};
5574 UChar* target = dest;
5575 UChar* targetLimit = dest+100;
5576 const char* source = data;
5577 const char* sourceLimit = data+strlen(data);
5578 const UChar* exp = expected;
5579 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, TRUE, &status);
5580 if(U_FAILURE(status)){
5581 log_data_err("conversion failed: %s \n", u_errorName(status));
5582 }
5583 targetLimit = target;
5584 target = dest;
5585
5586 printUSeq(target, (int)(targetLimit-target));
5587
5588 while(target<targetLimit){
5589 if(*exp!=*target){
5590 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5591 }
5592 target++;
5593 exp++;
5594 }
5595 ucnv_close(conv);
5596 }
5597
5598 static void
TestIsFixedWidth()5599 TestIsFixedWidth() {
5600 UErrorCode status = U_ZERO_ERROR;
5601 UConverter *cnv = NULL;
5602 int32_t i;
5603
5604 const char *fixedWidth[] = {
5605 "US-ASCII",
5606 "UTF32",
5607 "ibm-5478_P100-1995"
5608 };
5609
5610 const char *notFixedWidth[] = {
5611 "GB18030",
5612 "UTF8",
5613 "windows-949-2000",
5614 "UTF16"
5615 };
5616
5617 for (i = 0; i < UPRV_LENGTHOF(fixedWidth); i++) {
5618 cnv = ucnv_open(fixedWidth[i], &status);
5619 if (cnv == NULL || U_FAILURE(status)) {
5620 log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status));
5621 continue;
5622 }
5623
5624 if (!ucnv_isFixedWidth(cnv, &status)) {
5625 log_err("%s is a fixedWidth converter but returned FALSE.\n", fixedWidth[i]);
5626 }
5627 ucnv_close(cnv);
5628 }
5629
5630 for (i = 0; i < UPRV_LENGTHOF(notFixedWidth); i++) {
5631 cnv = ucnv_open(notFixedWidth[i], &status);
5632 if (cnv == NULL || U_FAILURE(status)) {
5633 log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status));
5634 continue;
5635 }
5636
5637 if (ucnv_isFixedWidth(cnv, &status)) {
5638 log_err("%s is NOT a fixedWidth converter but returned TRUE.\n", notFixedWidth[i]);
5639 }
5640 ucnv_close(cnv);
5641 }
5642 }
5643