1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /********************************************************************
4 * COPYRIGHT:
5 * Copyright (c) 1997-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ********************************************************************/
8 /*******************************************************************************
9 *
10 * File nucnvtst.c
11 *
12 * Modification History:
13 * Name Description
14 * Steven R. Loomis 7/8/1999 Adding input buffer test
15 ********************************************************************************
16 */
17 #include <stdbool.h>
18 #include <stdio.h>
19 #include "cstring.h"
20 #include "unicode/uloc.h"
21 #include "unicode/ucnv.h"
22 #include "unicode/ucnv_err.h"
23 #include "unicode/ucnv_cb.h"
24 #include "cintltst.h"
25 #include "unicode/utypes.h"
26 #include "unicode/ustring.h"
27 #include "unicode/ucol.h"
28 #include "unicode/utf16.h"
29 #include "cmemory.h"
30 #include "nucnvtst.h"
31
32 static void TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message);
33 static void TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message);
34 #if !UCONFIG_NO_COLLATION
35 static void TestJitterbug981(void);
36 #endif
37 #if !UCONFIG_NO_LEGACY_CONVERSION
38 static void TestJitterbug1293(void);
39 #endif
40 static void TestNewConvertWithBufferSizes(int32_t osize, int32_t isize) ;
41 static void TestConverterTypesAndStarters(void);
42 static void TestAmbiguous(void);
43 static void TestSignatureDetection(void);
44 static void TestUTF7(void);
45 static void TestIMAP(void);
46 static void TestUTF8(void);
47 static void TestCESU8(void);
48 static void TestUTF16(void);
49 static void TestUTF16BE(void);
50 static void TestUTF16LE(void);
51 static void TestUTF32(void);
52 static void TestUTF32BE(void);
53 static void TestUTF32LE(void);
54 static void TestLATIN1(void);
55
56 #if !UCONFIG_NO_LEGACY_CONVERSION
57 static void TestSBCS(void);
58 static void TestDBCS(void);
59 static void TestMBCS(void);
60 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
61 static void TestICCRunout(void);
62 #endif
63
64 #ifdef U_ENABLE_GENERIC_ISO_2022
65 static void TestISO_2022(void);
66 #endif
67
68 static void TestISO_2022_JP(void);
69 static void TestISO_2022_JP_1(void);
70 static void TestISO_2022_JP_2(void);
71 static void TestISO_2022_KR(void);
72 static void TestISO_2022_KR_1(void);
73 static void TestISO_2022_CN(void);
74 #if 0
75 /*
76 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
77 */
78 static void TestISO_2022_CN_EXT(void);
79 #endif
80 static void TestJIS(void);
81 static void TestHZ(void);
82 #endif
83
84 static void TestSCSU(void);
85
86 #if !UCONFIG_NO_LEGACY_CONVERSION
87 static void TestEBCDIC_STATEFUL(void);
88 static void TestGB18030(void);
89 static void TestLMBCS(void);
90 static void TestJitterbug255(void);
91 static void TestEBCDICUS4XML(void);
92 #if 0
93 /*
94 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
95 */
96 static void TestJitterbug915(void);
97 #endif
98 static void TestISCII(void);
99
100 static void TestCoverageMBCS(void);
101 static void TestJitterbug2346(void);
102 static void TestJitterbug2411(void);
103 static void TestJB5275(void);
104 static void TestJB5275_1(void);
105 static void TestJitterbug6175(void);
106
107 static void TestIsFixedWidth(void);
108 #endif
109
110 static void TestInBufSizes(void);
111
112 static void TestRoundTrippingAllUTF(void);
113 static void TestConv(const uint16_t in[],
114 int len,
115 const char* conv,
116 const char* lang,
117 char byteArr[],
118 int byteArrLen);
119
120 /* open a converter, using test data if it begins with '@' */
121 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err);
122
123
124 #define NEW_MAX_BUFFER 999
125
126 static int32_t gInBufferSize = NEW_MAX_BUFFER;
127 static int32_t gOutBufferSize = NEW_MAX_BUFFER;
128 static char gNuConvTestName[1024];
129
130 #define nct_min(x,y) ((x<y) ? x : y)
131
my_ucnv_open(const char * cnv,UErrorCode * err)132 static UConverter *my_ucnv_open(const char *cnv, UErrorCode *err)
133 {
134 if(cnv && cnv[0] == '@') {
135 return ucnv_openPackage(loadTestData(err), cnv+1, err);
136 } else {
137 return ucnv_open(cnv, err);
138 }
139 }
140
printSeq(const unsigned char * a,int len)141 static void printSeq(const unsigned char* a, int len)
142 {
143 int i=0;
144 log_verbose("{");
145 while (i<len)
146 log_verbose("0x%02x ", a[i++]);
147 log_verbose("}\n");
148 }
149
printUSeq(const UChar * a,int len)150 static void printUSeq(const UChar* a, int len)
151 {
152 int i=0;
153 log_verbose("{U+");
154 while (i<len) log_verbose("0x%04x ", a[i++]);
155 log_verbose("}\n");
156 }
157
printSeqErr(const unsigned char * a,int len)158 static void printSeqErr(const unsigned char* a, int len)
159 {
160 int i=0;
161 fprintf(stderr, "{");
162 while (i<len)
163 fprintf(stderr, "0x%02x ", a[i++]);
164 fprintf(stderr, "}\n");
165 }
166
printUSeqErr(const UChar * a,int len)167 static void printUSeqErr(const UChar* a, int len)
168 {
169 int i=0;
170 fprintf(stderr, "{U+");
171 while (i<len)
172 fprintf(stderr, "0x%04x ", a[i++]);
173 fprintf(stderr,"}\n");
174 }
175
176 static void
TestNextUChar(UConverter * cnv,const char * source,const char * limit,const int32_t results[],const char * message)177 TestNextUChar(UConverter* cnv, const char* source, const char* limit, const int32_t results[], const char* message)
178 {
179 const char* s0;
180 const char* s=(char*)source;
181 const int32_t *r=results;
182 UErrorCode errorCode=U_ZERO_ERROR;
183 UChar32 c;
184
185 while(s<limit) {
186 s0=s;
187 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
188 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
189 break; /* no more significant input */
190 } else if(U_FAILURE(errorCode)) {
191 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
192 break;
193 } else if(
194 /* test the expected number of input bytes only if >=0 */
195 (*r>=0 && (int32_t)(s-s0)!=*r) ||
196 c!=*(r+1)
197 ) {
198 log_err("%s ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
199 message, c, (s-s0), *(r+1), *r);
200 break;
201 }
202 r+=2;
203 }
204 }
205
206 static void
TestNextUCharError(UConverter * cnv,const char * source,const char * limit,UErrorCode expected,const char * message)207 TestNextUCharError(UConverter* cnv, const char* source, const char* limit, UErrorCode expected, const char* message)
208 {
209 const char* s=(char*)source;
210 UErrorCode errorCode=U_ZERO_ERROR;
211 uint32_t c;
212 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
213 if(errorCode != expected){
214 log_err("FAIL: Expected:%s when %s-----Got:%s\n", myErrorName(expected), message, myErrorName(errorCode));
215 }
216 if(c != 0xFFFD && c != 0xffff){
217 log_err("FAIL: Expected return value of 0xfffd or 0xffff when %s-----Got 0x%lx\n", message, c);
218 }
219
220 }
221
TestInBufSizes(void)222 static void TestInBufSizes(void)
223 {
224 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,1);
225 #if 1
226 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,2);
227 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,3);
228 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,4);
229 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,5);
230 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,6);
231 TestNewConvertWithBufferSizes(1,1);
232 TestNewConvertWithBufferSizes(2,3);
233 TestNewConvertWithBufferSizes(3,2);
234 #endif
235 }
236
TestOutBufSizes(void)237 static void TestOutBufSizes(void)
238 {
239 #if 1
240 TestNewConvertWithBufferSizes(NEW_MAX_BUFFER,NEW_MAX_BUFFER);
241 TestNewConvertWithBufferSizes(1,NEW_MAX_BUFFER);
242 TestNewConvertWithBufferSizes(2,NEW_MAX_BUFFER);
243 TestNewConvertWithBufferSizes(3,NEW_MAX_BUFFER);
244 TestNewConvertWithBufferSizes(4,NEW_MAX_BUFFER);
245 TestNewConvertWithBufferSizes(5,NEW_MAX_BUFFER);
246
247 #endif
248 }
249
250
addTestNewConvert(TestNode ** root)251 void addTestNewConvert(TestNode** root)
252 {
253 #if !UCONFIG_NO_FILE_IO
254 addTest(root, &TestInBufSizes, "tsconv/nucnvtst/TestInBufSizes");
255 addTest(root, &TestOutBufSizes, "tsconv/nucnvtst/TestOutBufSizes");
256 #endif
257 addTest(root, &TestConverterTypesAndStarters, "tsconv/nucnvtst/TestConverterTypesAndStarters");
258 addTest(root, &TestAmbiguous, "tsconv/nucnvtst/TestAmbiguous");
259 addTest(root, &TestSignatureDetection, "tsconv/nucnvtst/TestSignatureDetection");
260 addTest(root, &TestUTF7, "tsconv/nucnvtst/TestUTF7");
261 addTest(root, &TestIMAP, "tsconv/nucnvtst/TestIMAP");
262 addTest(root, &TestUTF8, "tsconv/nucnvtst/TestUTF8");
263
264 /* test ucnv_getNextUChar() for charsets that encode single surrogates with complete byte sequences */
265 addTest(root, &TestCESU8, "tsconv/nucnvtst/TestCESU8");
266 addTest(root, &TestUTF16, "tsconv/nucnvtst/TestUTF16");
267 addTest(root, &TestUTF16BE, "tsconv/nucnvtst/TestUTF16BE");
268 addTest(root, &TestUTF16LE, "tsconv/nucnvtst/TestUTF16LE");
269 addTest(root, &TestUTF32, "tsconv/nucnvtst/TestUTF32");
270 addTest(root, &TestUTF32BE, "tsconv/nucnvtst/TestUTF32BE");
271 addTest(root, &TestUTF32LE, "tsconv/nucnvtst/TestUTF32LE");
272
273 #if !UCONFIG_NO_LEGACY_CONVERSION
274 addTest(root, &TestLMBCS, "tsconv/nucnvtst/TestLMBCS");
275 #endif
276
277 addTest(root, &TestLATIN1, "tsconv/nucnvtst/TestLATIN1");
278
279 #if !UCONFIG_NO_LEGACY_CONVERSION
280 addTest(root, &TestSBCS, "tsconv/nucnvtst/TestSBCS");
281 #if !UCONFIG_NO_FILE_IO
282 addTest(root, &TestDBCS, "tsconv/nucnvtst/TestDBCS");
283 addTest(root, &TestICCRunout, "tsconv/nucnvtst/TestICCRunout");
284 #endif
285 addTest(root, &TestMBCS, "tsconv/nucnvtst/TestMBCS");
286
287 #ifdef U_ENABLE_GENERIC_ISO_2022
288 addTest(root, &TestISO_2022, "tsconv/nucnvtst/TestISO_2022");
289 #endif
290
291 addTest(root, &TestISO_2022_JP, "tsconv/nucnvtst/TestISO_2022_JP");
292 addTest(root, &TestJIS, "tsconv/nucnvtst/TestJIS");
293 addTest(root, &TestISO_2022_JP_1, "tsconv/nucnvtst/TestISO_2022_JP_1");
294 // android-changed (no have ISO_2022_JP_2) -- addTest(root, &TestISO_2022_JP_2, "tsconv/nucnvtst/TestISO_2022_JP_2");
295 addTest(root, &TestISO_2022_KR, "tsconv/nucnvtst/TestISO_2022_KR");
296 addTest(root, &TestISO_2022_KR_1, "tsconv/nucnvtst/TestISO_2022_KR_1");
297 // android-changed (no ISO-2022-CN) -- addTest(root, &TestISO_2022_CN, "tsconv/nucnvtst/TestISO_2022_CN");
298 /*
299 * ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
300 addTest(root, &TestISO_2022_CN_EXT, "tsconv/nucnvtst/TestISO_2022_CN_EXT");
301 addTest(root, &TestJitterbug915, "tsconv/nucnvtst/TestJitterbug915");
302 */
303 addTest(root, &TestHZ, "tsconv/nucnvtst/TestHZ");
304 #endif
305
306 addTest(root, &TestSCSU, "tsconv/nucnvtst/TestSCSU");
307
308 #if !UCONFIG_NO_LEGACY_CONVERSION
309 addTest(root, &TestEBCDIC_STATEFUL, "tsconv/nucnvtst/TestEBCDIC_STATEFUL");
310 addTest(root, &TestGB18030, "tsconv/nucnvtst/TestGB18030");
311 addTest(root, &TestJitterbug255, "tsconv/nucnvtst/TestJitterbug255");
312 addTest(root, &TestEBCDICUS4XML, "tsconv/nucnvtst/TestEBCDICUS4XML");
313 addTest(root, &TestISCII, "tsconv/nucnvtst/TestISCII");
314 addTest(root, &TestJB5275, "tsconv/nucnvtst/TestJB5275");
315 addTest(root, &TestJB5275_1, "tsconv/nucnvtst/TestJB5275_1");
316 #if !UCONFIG_NO_COLLATION
317 addTest(root, &TestJitterbug981, "tsconv/nucnvtst/TestJitterbug981");
318 #endif
319
320 addTest(root, &TestJitterbug1293, "tsconv/nucnvtst/TestJitterbug1293");
321 #endif
322
323
324 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
325 addTest(root, &TestCoverageMBCS, "tsconv/nucnvtst/TestCoverageMBCS");
326 #endif
327
328 addTest(root, &TestRoundTrippingAllUTF, "tsconv/nucnvtst/TestRoundTrippingAllUTF");
329
330 #if !UCONFIG_NO_LEGACY_CONVERSION
331 addTest(root, &TestJitterbug2346, "tsconv/nucnvtst/TestJitterbug2346");
332 addTest(root, &TestJitterbug2411, "tsconv/nucnvtst/TestJitterbug2411");
333 // android-removed (no full ISO2022 CJK tables) -- addTest(root, &TestJitterbug6175, "tsconv/nucnvtst/TestJitterbug6175");
334 addTest(root, &TestIsFixedWidth, "tsconv/nucnvtst/TestIsFixedWidth");
335 #endif
336 }
337
338
339 /* Note that this test already makes use of statics, so it's not really
340 multithread safe.
341 This convenience function lets us make the error messages actually useful.
342 */
343
setNuConvTestName(const char * codepage,const char * direction)344 static void setNuConvTestName(const char *codepage, const char *direction)
345 {
346 snprintf(gNuConvTestName, sizeof(gNuConvTestName), "[Testing %s %s Unicode, InputBufSiz=%d, OutputBufSiz=%d]",
347 codepage,
348 direction,
349 (int)gInBufferSize,
350 (int)gOutBufferSize);
351 }
352
353 typedef enum
354 {
355 TC_OK = 0, /* test was OK */
356 TC_MISMATCH = 1, /* Match failed - err was printed */
357 TC_FAIL = 2 /* Test failed, don't print an err because it was already printed. */
358 } ETestConvertResult;
359
360 /* Note: This function uses global variables and it will not do offset
361 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
testConvertFromU(const UChar * source,int sourceLen,const uint8_t * expect,int expectLen,const char * codepage,const int32_t * expectOffsets,UBool useFallback)362 static ETestConvertResult testConvertFromU( const UChar *source, int sourceLen, const uint8_t *expect, int expectLen,
363 const char *codepage, const int32_t *expectOffsets , UBool useFallback)
364 {
365 UErrorCode status = U_ZERO_ERROR;
366 UConverter *conv = 0;
367 char junkout[NEW_MAX_BUFFER]; /* FIX */
368 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
369 char *p;
370 const UChar *src;
371 char *end;
372 char *targ;
373 int32_t *offs;
374 int i;
375 int32_t realBufferSize;
376 char *realBufferEnd;
377 const UChar *realSourceEnd;
378 const UChar *sourceLimit;
379 UBool checkOffsets = true;
380 UBool doFlush;
381
382 for(i=0;i<NEW_MAX_BUFFER;i++)
383 junkout[i] = (char)0xF0;
384 for(i=0;i<NEW_MAX_BUFFER;i++)
385 junokout[i] = 0xFF;
386
387 setNuConvTestName(codepage, "FROM");
388
389 log_verbose("\n========= %s\n", gNuConvTestName);
390
391 conv = my_ucnv_open(codepage, &status);
392
393 if(U_FAILURE(status))
394 {
395 log_data_err("Couldn't open converter %s\n",codepage);
396 return TC_FAIL;
397 }
398 if(useFallback){
399 ucnv_setFallback(conv,useFallback);
400 }
401
402 log_verbose("Converter opened..\n");
403
404 src = source;
405 targ = junkout;
406 offs = junokout;
407
408 realBufferSize = UPRV_LENGTHOF(junkout);
409 realBufferEnd = junkout + realBufferSize;
410 realSourceEnd = source + sourceLen;
411
412 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
413 checkOffsets = false;
414
415 do
416 {
417 end = nct_min(targ + gOutBufferSize, realBufferEnd);
418 sourceLimit = nct_min(src + gInBufferSize, realSourceEnd);
419
420 doFlush = (UBool)(sourceLimit == realSourceEnd);
421
422 if(targ == realBufferEnd) {
423 log_err("Error, overflowed the real buffer while about to call fromUnicode! targ=%08lx %s", targ, gNuConvTestName);
424 return TC_FAIL;
425 }
426 log_verbose("calling fromUnicode @ SOURCE:%08lx to %08lx TARGET: %08lx to %08lx, flush=%s\n", src,sourceLimit, targ,end, doFlush?"true":"false");
427
428
429 status = U_ZERO_ERROR;
430
431 ucnv_fromUnicode (conv,
432 &targ,
433 end,
434 &src,
435 sourceLimit,
436 checkOffsets ? offs : NULL,
437 doFlush, /* flush if we're at the end of the input data */
438 &status);
439 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && sourceLimit < realSourceEnd) );
440
441 if(U_FAILURE(status)) {
442 log_err("Problem doing fromUnicode to %s, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
443 return TC_FAIL;
444 }
445
446 log_verbose("\nConversion done [%d uchars in -> %d chars out]. \nResult :",
447 sourceLen, targ-junkout);
448
449 if(getTestOption(VERBOSITY_OPTION))
450 {
451 char junk[9999];
452 char offset_str[9999];
453 char *ptr;
454
455 junk[0] = 0;
456 offset_str[0] = 0;
457 for(ptr = junkout;ptr<targ;ptr++) {
458 snprintf(junk + strlen(junk), sizeof(junk)-strlen(junk), "0x%02x, ", (int)(0xFF & *ptr));
459 snprintf(offset_str + strlen(offset_str), sizeof(offset_str)-strlen(offset_str), "0x%02x, ", (int)(0xFF & junokout[ptr-junkout]));
460 }
461
462 log_verbose(junk);
463 printSeq((const uint8_t *)expect, expectLen);
464 if ( checkOffsets ) {
465 log_verbose("\nOffsets:");
466 log_verbose(offset_str);
467 }
468 log_verbose("\n");
469 }
470 ucnv_close(conv);
471
472 if(expectLen != targ-junkout) {
473 log_err("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
474 log_verbose("Expected %d chars out, got %d %s\n", expectLen, targ-junkout, gNuConvTestName);
475 fprintf(stderr, "Got:\n");
476 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
477 fprintf(stderr, "Expected:\n");
478 printSeqErr((const unsigned char*)expect, expectLen);
479 return TC_MISMATCH;
480 }
481
482 if (checkOffsets && (expectOffsets != 0) ) {
483 log_verbose("comparing %d offsets..\n", targ-junkout);
484 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t) )){
485 log_err("did not get the expected offsets. %s\n", gNuConvTestName);
486 printSeqErr((const unsigned char*)junkout, (int32_t)(targ-junkout));
487 log_err("\n");
488 log_err("Got : ");
489 for(p=junkout;p<targ;p++) {
490 log_err("%d,", junokout[p-junkout]);
491 }
492 log_err("\n");
493 log_err("Expected: ");
494 for(i=0; i<(targ-junkout); i++) {
495 log_err("%d,", expectOffsets[i]);
496 }
497 log_err("\n");
498 }
499 }
500
501 log_verbose("comparing..\n");
502 if(!memcmp(junkout, expect, expectLen)) {
503 log_verbose("Matches!\n");
504 return TC_OK;
505 } else {
506 log_err("String does not match u->%s\n", gNuConvTestName);
507 printUSeqErr(source, sourceLen);
508 fprintf(stderr, "Got:\n");
509 printSeqErr((const unsigned char *)junkout, expectLen);
510 fprintf(stderr, "Expected:\n");
511 printSeqErr((const unsigned char *)expect, expectLen);
512
513 return TC_MISMATCH;
514 }
515 }
516
517 /* Note: This function uses global variables and it will not do offset
518 checking without gOutBufferSize and gInBufferSize set to NEW_MAX_BUFFER */
testConvertToU(const uint8_t * source,int sourcelen,const UChar * expect,int expectlen,const char * codepage,const int32_t * expectOffsets,UBool useFallback)519 static ETestConvertResult testConvertToU( const uint8_t *source, int sourcelen, const UChar *expect, int expectlen,
520 const char *codepage, const int32_t *expectOffsets, UBool useFallback)
521 {
522 UErrorCode status = U_ZERO_ERROR;
523 UConverter *conv = 0;
524 UChar junkout[NEW_MAX_BUFFER]; /* FIX */
525 int32_t junokout[NEW_MAX_BUFFER]; /* FIX */
526 const char *src;
527 const char *realSourceEnd;
528 const char *srcLimit;
529 UChar *p;
530 UChar *targ;
531 UChar *end;
532 int32_t *offs;
533 int i;
534 UBool checkOffsets = true;
535
536 int32_t realBufferSize;
537 UChar *realBufferEnd;
538
539
540 for(i=0;i<NEW_MAX_BUFFER;i++)
541 junkout[i] = 0xFFFE;
542
543 for(i=0;i<NEW_MAX_BUFFER;i++)
544 junokout[i] = -1;
545
546 setNuConvTestName(codepage, "TO");
547
548 log_verbose("\n========= %s\n", gNuConvTestName);
549
550 conv = my_ucnv_open(codepage, &status);
551
552 if(U_FAILURE(status))
553 {
554 log_data_err("Couldn't open converter %s\n",gNuConvTestName);
555 return TC_FAIL;
556 }
557 if(useFallback){
558 ucnv_setFallback(conv,useFallback);
559 }
560 log_verbose("Converter opened..\n");
561
562 src = (const char *)source;
563 targ = junkout;
564 offs = junokout;
565
566 realBufferSize = UPRV_LENGTHOF(junkout);
567 realBufferEnd = junkout + realBufferSize;
568 realSourceEnd = src + sourcelen;
569
570 if ( gOutBufferSize != realBufferSize || gInBufferSize != NEW_MAX_BUFFER )
571 checkOffsets = false;
572
573 do
574 {
575 end = nct_min( targ + gOutBufferSize, realBufferEnd);
576 srcLimit = nct_min(realSourceEnd, src + gInBufferSize);
577
578 if(targ == realBufferEnd)
579 {
580 log_err("Error, the end would overflow the real output buffer while about to call toUnicode! tarjet=%08lx %s",targ,gNuConvTestName);
581 return TC_FAIL;
582 }
583 log_verbose("calling toUnicode @ %08lx to %08lx\n", targ,end);
584
585 /* oldTarg = targ; */
586
587 status = U_ZERO_ERROR;
588
589 ucnv_toUnicode (conv,
590 &targ,
591 end,
592 &src,
593 srcLimit,
594 checkOffsets ? offs : NULL,
595 (UBool)(srcLimit == realSourceEnd), /* flush if we're at the end of the source data */
596 &status);
597
598 /* offs += (targ-oldTarg); */
599
600 } while ( (status == U_BUFFER_OVERFLOW_ERROR) || (U_SUCCESS(status) && (srcLimit < realSourceEnd)) ); /* while we just need another buffer */
601
602 if(U_FAILURE(status))
603 {
604 log_err("Problem doing %s toUnicode, errcode %s %s\n", codepage, myErrorName(status), gNuConvTestName);
605 return TC_FAIL;
606 }
607
608 log_verbose("\nConversion done. %d bytes -> %d chars.\nResult :",
609 sourcelen, targ-junkout);
610 if(getTestOption(VERBOSITY_OPTION))
611 {
612 char junk[9999];
613 char offset_str[9999];
614 UChar *ptr;
615
616 junk[0] = 0;
617 offset_str[0] = 0;
618
619 for(ptr = junkout;ptr<targ;ptr++)
620 {
621 snprintf(junk + strlen(junk), sizeof(junk)-strlen(junk), "0x%04x, ", (0xFFFF) & (unsigned int)*ptr);
622 snprintf(offset_str + strlen(offset_str), sizeof(offset_str)-strlen(offset_str), "0x%04x, ", (0xFFFF) & (unsigned int)junokout[ptr-junkout]);
623 }
624
625 log_verbose(junk);
626 printUSeq(expect, expectlen);
627 if ( checkOffsets )
628 {
629 log_verbose("\nOffsets:");
630 log_verbose(offset_str);
631 }
632 log_verbose("\n");
633 }
634 ucnv_close(conv);
635
636 log_verbose("comparing %d uchars (%d bytes)..\n",expectlen,expectlen*2);
637
638 if (checkOffsets && (expectOffsets != 0))
639 {
640 if(memcmp(junokout,expectOffsets,(targ-junkout) * sizeof(int32_t))){
641 log_err("did not get the expected offsets. %s\n",gNuConvTestName);
642 log_err("Got: ");
643 for(p=junkout;p<targ;p++) {
644 log_err("%d,", junokout[p-junkout]);
645 }
646 log_err("\n");
647 log_err("Expected: ");
648 for(i=0; i<(targ-junkout); i++) {
649 log_err("%d,", expectOffsets[i]);
650 }
651 log_err("\n");
652 log_err("output: ");
653 for(i=0; i<(targ-junkout); i++) {
654 log_err("%X,", junkout[i]);
655 }
656 log_err("\n");
657 log_err("input: ");
658 for(i=0; i<(src-(const char *)source); i++) {
659 log_err("%X,", (unsigned char)source[i]);
660 }
661 log_err("\n");
662 }
663 }
664
665 if(!memcmp(junkout, expect, expectlen*2))
666 {
667 log_verbose("Matches!\n");
668 return TC_OK;
669 }
670 else
671 {
672 log_err("String does not match. %s\n", gNuConvTestName);
673 log_verbose("String does not match. %s\n", gNuConvTestName);
674 printf("\nGot:");
675 printUSeqErr(junkout, expectlen);
676 printf("\nExpected:");
677 printUSeqErr(expect, expectlen);
678 return TC_MISMATCH;
679 }
680 }
681
682
TestNewConvertWithBufferSizes(int32_t outsize,int32_t insize)683 static void TestNewConvertWithBufferSizes(int32_t outsize, int32_t insize )
684 {
685 /** test chars #1 */
686 /* 1 2 3 1Han 2Han 3Han . */
687 static const UChar sampleText[] =
688 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xD840, 0xDC21 };
689 static const UChar sampleTextRoundTripUnmappable[] =
690 { 0x0031, 0x0032, 0x0033, 0x0000, 0x4e00, 0x4e8c, 0x4e09, 0x002E, 0xfffd };
691
692
693 static const uint8_t expectedUTF8[] =
694 { 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E, 0xf0, 0xa0, 0x80, 0xa1 };
695 static const int32_t toUTF8Offs[] =
696 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07, 0x08, 0x08, 0x08, 0x08 };
697 static const int32_t fmUTF8Offs[] =
698 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0007, 0x000a, 0x000d, 0x000e, 0x000e };
699
700 #ifdef U_ENABLE_GENERIC_ISO_2022
701 /* Same as UTF8, but with ^[%B preceding */
702 static const const uint8_t expectedISO2022[] =
703 { 0x1b, 0x25, 0x42, 0x31, 0x32, 0x33, 0x00, 0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0x2E };
704 static const int32_t toISO2022Offs[] =
705 { -1, -1, -1, 0x00, 0x01, 0x02, 0x03, 0x04, 0x04,
706 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x07 }; /* right? */
707 static const int32_t fmISO2022Offs[] =
708 { 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x000a, 0x000d, 0x0010 }; /* is this right? */
709 #endif
710
711 /* 1 2 3 0, <SO> h1 h2 h3 <SI> . EBCDIC_STATEFUL */
712 static const uint8_t expectedIBM930[] =
713 { 0xF1, 0xF2, 0xF3, 0x00, 0x0E, 0x45, 0x41, 0x45, 0x42, 0x45, 0x43, 0x0F, 0x4B, 0x0e, 0xfe, 0xfe, 0x0f };
714 static const int32_t toIBM930Offs[] =
715 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, -1 };
716 static const int32_t fmIBM930Offs[] =
717 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0005, 0x0007, 0x0009, 0x000c, 0x000e };
718
719 /* 1 2 3 0 h1 h2 h3 . MBCS*/
720 static const uint8_t expectedIBM943[] =
721 { 0x31, 0x32, 0x33, 0x00, 0x88, 0xea, 0x93, 0xf1, 0x8e, 0x4f, 0x2e, 0xfc, 0xfc };
722 static const int32_t toIBM943Offs [] =
723 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x08, 0x08 };
724 static const int32_t fmIBM943Offs[] =
725 { 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0006, 0x0008, 0x000a, 0x000b };
726
727 /* 1 2 3 0 h1 h2 h3 . DBCS*/
728 static const uint8_t expectedIBM9027[] =
729 { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0x4c, 0x41, 0x4c, 0x48, 0x4c, 0x55, 0xfe, 0xfe, 0xfe, 0xfe };
730 static const int32_t toIBM9027Offs [] =
731 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08 };
732
733 /* 1 2 3 0 <?> <?> <?> . SBCS*/
734 static const uint8_t expectedIBM920[] =
735 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2e, 0x1a };
736 static const int32_t toIBM920Offs [] =
737 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
738
739 /* 1 2 3 0 <?> <?> <?> . SBCS*/
740 static const uint8_t expectedISO88593[] =
741 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
742 static const int32_t toISO88593Offs[] =
743 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
744
745 /* 1 2 3 0 <?> <?> <?> . <?> LATIN_1*/
746 static const uint8_t expectedLATIN1[] =
747 { 0x31, 0x32, 0x33, 0x00, 0x1a, 0x1a, 0x1a, 0x2E, 0x1a };
748 static const int32_t toLATIN1Offs[] =
749 { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 };
750
751
752 /* etc */
753 static const uint8_t expectedUTF16BE[] =
754 { 0x00, 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x4e, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x00, 0x2e, 0xd8, 0x40, 0xdc, 0x21 };
755 static const int32_t toUTF16BEOffs[]=
756 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
757 static const int32_t fmUTF16BEOffs[] =
758 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
759
760 static const uint8_t expectedUTF16LE[] =
761 { 0x31, 0x00, 0x32, 0x00, 0x33, 0x00, 0x00, 0x00, 0x00, 0x4e, 0x8c, 0x4e, 0x09, 0x4e, 0x2e, 0x00, 0x40, 0xd8, 0x21, 0xdc };
762 static const int32_t toUTF16LEOffs[]=
763 { 0x00, 0x00, 0x01, 0x01, 0x02, 0x02, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08 };
764 static const int32_t fmUTF16LEOffs[] =
765 { 0x0000, 0x0002, 0x0004, 0x0006, 0x0008, 0x000a, 0x000c, 0x000e, 0x0010, 0x0010 };
766
767 static const uint8_t expectedUTF32BE[] =
768 { 0x00, 0x00, 0x00, 0x31,
769 0x00, 0x00, 0x00, 0x32,
770 0x00, 0x00, 0x00, 0x33,
771 0x00, 0x00, 0x00, 0x00,
772 0x00, 0x00, 0x4e, 0x00,
773 0x00, 0x00, 0x4e, 0x8c,
774 0x00, 0x00, 0x4e, 0x09,
775 0x00, 0x00, 0x00, 0x2e,
776 0x00, 0x02, 0x00, 0x21 };
777 static const int32_t toUTF32BEOffs[]=
778 { 0x00, 0x00, 0x00, 0x00,
779 0x01, 0x01, 0x01, 0x01,
780 0x02, 0x02, 0x02, 0x02,
781 0x03, 0x03, 0x03, 0x03,
782 0x04, 0x04, 0x04, 0x04,
783 0x05, 0x05, 0x05, 0x05,
784 0x06, 0x06, 0x06, 0x06,
785 0x07, 0x07, 0x07, 0x07,
786 0x08, 0x08, 0x08, 0x08,
787 0x08, 0x08, 0x08, 0x08 };
788 static const int32_t fmUTF32BEOffs[] =
789 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
790
791 static const uint8_t expectedUTF32LE[] =
792 { 0x31, 0x00, 0x00, 0x00,
793 0x32, 0x00, 0x00, 0x00,
794 0x33, 0x00, 0x00, 0x00,
795 0x00, 0x00, 0x00, 0x00,
796 0x00, 0x4e, 0x00, 0x00,
797 0x8c, 0x4e, 0x00, 0x00,
798 0x09, 0x4e, 0x00, 0x00,
799 0x2e, 0x00, 0x00, 0x00,
800 0x21, 0x00, 0x02, 0x00 };
801 static const int32_t toUTF32LEOffs[]=
802 { 0x00, 0x00, 0x00, 0x00,
803 0x01, 0x01, 0x01, 0x01,
804 0x02, 0x02, 0x02, 0x02,
805 0x03, 0x03, 0x03, 0x03,
806 0x04, 0x04, 0x04, 0x04,
807 0x05, 0x05, 0x05, 0x05,
808 0x06, 0x06, 0x06, 0x06,
809 0x07, 0x07, 0x07, 0x07,
810 0x08, 0x08, 0x08, 0x08,
811 0x08, 0x08, 0x08, 0x08 };
812 static const int32_t fmUTF32LEOffs[] =
813 { 0x0000, 0x0004, 0x0008, 0x000c, 0x0010, 0x0014, 0x0018, 0x001c, 0x0020, 0x0020 };
814
815
816
817
818 /** Test chars #2 **/
819
820 /* Sahha [health], slashed h's */
821 static const UChar malteseUChars[] = { 0x0053, 0x0061, 0x0127, 0x0127, 0x0061 };
822 static const uint8_t expectedMaltese913[] = { 0x53, 0x61, 0xB1, 0xB1, 0x61 };
823
824 /* LMBCS */
825 static const UChar LMBCSUChars[] = { 0x0027, 0x010A, 0x0000, 0x0127, 0x2666, 0x0220 };
826 static const uint8_t expectedLMBCS[] = { 0x27, 0x06, 0x04, 0x00, 0x01, 0x73, 0x01, 0x04, 0x14, 0x02, 0x20 };
827 static const int32_t toLMBCSOffs[] = { 0x00, 0x01, 0x01, 0x02, 0x03, 0x03, 0x04, 0x04 , 0x05, 0x05, 0x05 };
828 static const int32_t fmLMBCSOffs[] = { 0x0000, 0x0001, 0x0003, 0x0004, 0x0006, 0x0008};
829 /*********************************** START OF CODE finally *************/
830
831 gInBufferSize = insize;
832 gOutBufferSize = outsize;
833
834 log_verbose("\n\n\nTesting conversions with InputBufferSize = %d, OutputBufferSize = %d\n", gInBufferSize, gOutBufferSize);
835
836
837 /*UTF-8*/
838 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
839 expectedUTF8, sizeof(expectedUTF8), "UTF8", toUTF8Offs,false );
840
841 log_verbose("Test surrogate behaviour for UTF8\n");
842 {
843 static const UChar testinput[]={ 0x20ac, 0xd801, 0xdc01, 0xdc01 };
844 static const uint8_t expectedUTF8test2[]= { 0xe2, 0x82, 0xac,
845 0xf0, 0x90, 0x90, 0x81,
846 0xef, 0xbf, 0xbd
847 };
848 static const int32_t offsets[]={ 0, 0, 0, 1, 1, 1, 1, 3, 3, 3 };
849 testConvertFromU(testinput, UPRV_LENGTHOF(testinput),
850 expectedUTF8test2, sizeof(expectedUTF8test2), "UTF8", offsets,false );
851
852
853 }
854
855 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
856 /*ISO-2022*/
857 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
858 expectedISO2022, sizeof(expectedISO2022), "ISO_2022", toISO2022Offs,false );
859 #endif
860
861 /*UTF16 LE*/
862 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
863 expectedUTF16LE, sizeof(expectedUTF16LE), "utf-16le", toUTF16LEOffs,false );
864 /*UTF16 BE*/
865 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
866 expectedUTF16BE, sizeof(expectedUTF16BE), "utf-16be", toUTF16BEOffs,false );
867 /*UTF32 LE*/
868 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
869 expectedUTF32LE, sizeof(expectedUTF32LE), "utf-32le", toUTF32LEOffs,false );
870 /*UTF32 BE*/
871 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
872 expectedUTF32BE, sizeof(expectedUTF32BE), "utf-32be", toUTF32BEOffs,false );
873
874 /*LATIN_1*/
875 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
876 expectedLATIN1, sizeof(expectedLATIN1), "LATIN_1", toLATIN1Offs,false );
877
878 #if !UCONFIG_NO_LEGACY_CONVERSION
879 /*EBCDIC_STATEFUL*/
880 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
881 expectedIBM930, sizeof(expectedIBM930), "ibm-930", toIBM930Offs,false );
882
883 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
884 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,false );
885
886 /*MBCS*/
887
888 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
889 expectedIBM943, sizeof(expectedIBM943), "ibm-943", toIBM943Offs,false );
890 /*DBCS*/
891 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
892 expectedIBM9027, sizeof(expectedIBM9027), "@ibm9027", toIBM9027Offs,false );
893 /*SBCS*/
894 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
895 expectedIBM920, sizeof(expectedIBM920), "ibm-920", toIBM920Offs,false );
896 /*SBCS*/
897 testConvertFromU(sampleText, UPRV_LENGTHOF(sampleText),
898 expectedISO88593, sizeof(expectedISO88593), "iso-8859-3", toISO88593Offs,false );
899 #endif
900
901
902 /****/
903
904 /*UTF-8*/
905 testConvertToU(expectedUTF8, sizeof(expectedUTF8),
906 sampleText, UPRV_LENGTHOF(sampleText), "utf8", fmUTF8Offs,false);
907 #if !UCONFIG_NO_LEGACY_CONVERSION && defined(U_ENABLE_GENERIC_ISO_2022)
908 /*ISO-2022*/
909 testConvertToU(expectedISO2022, sizeof(expectedISO2022),
910 sampleText, UPRV_LENGTHOF(sampleText), "ISO_2022", fmISO2022Offs,false);
911 #endif
912
913 /*UTF16 LE*/
914 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
915 sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,false);
916 /*UTF16 BE*/
917 testConvertToU(expectedUTF16BE, sizeof(expectedUTF16BE),
918 sampleText, UPRV_LENGTHOF(sampleText), "utf-16be", fmUTF16BEOffs,false);
919 /*UTF32 LE*/
920 testConvertToU(expectedUTF32LE, sizeof(expectedUTF32LE),
921 sampleText, UPRV_LENGTHOF(sampleText), "utf-32le", fmUTF32LEOffs,false);
922 /*UTF32 BE*/
923 testConvertToU(expectedUTF32BE, sizeof(expectedUTF32BE),
924 sampleText, UPRV_LENGTHOF(sampleText), "utf-32be", fmUTF32BEOffs,false);
925
926 #if !UCONFIG_NO_LEGACY_CONVERSION
927 /*EBCDIC_STATEFUL*/
928 testConvertToU(expectedIBM930, sizeof(expectedIBM930), sampleTextRoundTripUnmappable,
929 UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-930", fmIBM930Offs,false);
930 /*MBCS*/
931 testConvertToU(expectedIBM943, sizeof(expectedIBM943),sampleTextRoundTripUnmappable,
932 UPRV_LENGTHOF(sampleTextRoundTripUnmappable), "ibm-943", fmIBM943Offs,false);
933 #endif
934
935 /* Try it again to make sure it still works */
936 testConvertToU(expectedUTF16LE, sizeof(expectedUTF16LE),
937 sampleText, UPRV_LENGTHOF(sampleText), "utf-16le", fmUTF16LEOffs,false);
938
939 #if !UCONFIG_NO_LEGACY_CONVERSION
940 testConvertToU(expectedMaltese913, sizeof(expectedMaltese913),
941 malteseUChars, UPRV_LENGTHOF(malteseUChars), "latin3", NULL,false);
942
943 testConvertFromU(malteseUChars, UPRV_LENGTHOF(malteseUChars),
944 expectedMaltese913, sizeof(expectedMaltese913), "iso-8859-3", NULL,false );
945
946 /*LMBCS*/
947 testConvertFromU(LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars),
948 expectedLMBCS, sizeof(expectedLMBCS), "LMBCS-1", toLMBCSOffs,false );
949 testConvertToU(expectedLMBCS, sizeof(expectedLMBCS),
950 LMBCSUChars, UPRV_LENGTHOF(LMBCSUChars), "LMBCS-1", fmLMBCSOffs,false);
951 #endif
952
953 /* UTF-7 examples are mostly from http://www.imc.org/rfc2152 */
954 {
955 /* encode directly set D and set O */
956 static const uint8_t utf7[] = {
957 /*
958 Hi Mom -+Jjo--!
959 A+ImIDkQ.
960 +-
961 +ZeVnLIqe-
962 */
963 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
964 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
965 0x2b, 0x2d,
966 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
967 };
968 static const UChar unicode[] = {
969 /*
970 Hi Mom -<WHITE SMILING FACE>-!
971 A<NOT IDENTICAL TO><ALPHA>.
972 +
973 [Japanese word "nihongo"]
974 */
975 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
976 0x41, 0x2262, 0x0391, 0x2e,
977 0x2b,
978 0x65e5, 0x672c, 0x8a9e
979 };
980 static const int32_t toUnicodeOffsets[] = {
981 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
982 15, 17, 19, 23,
983 24,
984 27, 29, 32
985 };
986 static const int32_t fromUnicodeOffsets[] = {
987 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
988 11, 12, 12, 12, 13, 13, 13, 13, 14,
989 15, 15,
990 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
991 };
992
993 /* same but escaping set O (the exclamation mark) */
994 static const uint8_t utf7Restricted[] = {
995 /*
996 Hi Mom -+Jjo--+ACE-
997 A+ImIDkQ.
998 +-
999 +ZeVnLIqe-
1000 */
1001 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x2b, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x2b, 0x41, 0x43, 0x45, 0x2d,
1002 0x41, 0x2b, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2e,
1003 0x2b, 0x2d,
1004 0x2b, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d
1005 };
1006 static const int32_t toUnicodeOffsetsR[] = {
1007 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 15,
1008 19, 21, 23, 27,
1009 28,
1010 31, 33, 36
1011 };
1012 static const int32_t fromUnicodeOffsetsR[] = {
1013 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10, 10, 10, 10, 10,
1014 11, 12, 12, 12, 13, 13, 13, 13, 14,
1015 15, 15,
1016 16, 16, 16, 17, 17, 17, 18, 18, 18, 18
1017 };
1018
1019 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7, sizeof(utf7), "UTF-7", fromUnicodeOffsets,false);
1020
1021 testConvertToU(utf7, sizeof(utf7), unicode, UPRV_LENGTHOF(unicode), "UTF-7", toUnicodeOffsets,false);
1022
1023 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), utf7Restricted, sizeof(utf7Restricted), "UTF-7,version=1", fromUnicodeOffsetsR,false);
1024
1025 testConvertToU(utf7Restricted, sizeof(utf7Restricted), unicode, UPRV_LENGTHOF(unicode), "UTF-7,version=1", toUnicodeOffsetsR,false);
1026 }
1027
1028 /*
1029 * IMAP-mailbox-name examples are mostly from http://www.imc.org/rfc2152,
1030 * modified according to RFC 2060,
1031 * and supplemented with the one example in RFC 2060 itself.
1032 */
1033 {
1034 static const uint8_t imap[] = {
1035 /* Hi Mom -&Jjo--!
1036 A&ImIDkQ-.
1037 &-
1038 &ZeVnLIqe-
1039 \
1040 ~peter
1041 /mail
1042 /&ZeVnLIqe-
1043 /&U,BTFw-
1044 */
1045 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x26, 0x4a, 0x6a, 0x6f, 0x2d, 0x2d, 0x21,
1046 0x41, 0x26, 0x49, 0x6d, 0x49, 0x44, 0x6b, 0x51, 0x2d, 0x2e,
1047 0x26, 0x2d,
1048 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1049 0x5c,
1050 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1051 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1052 0x2f, 0x26, 0x5a, 0x65, 0x56, 0x6e, 0x4c, 0x49, 0x71, 0x65, 0x2d,
1053 0x2f, 0x26, 0x55, 0x2c, 0x42, 0x54, 0x46, 0x77, 0x2d
1054 };
1055 static const UChar unicode[] = {
1056 /* Hi Mom -<WHITE SMILING FACE>-!
1057 A<NOT IDENTICAL TO><ALPHA>.
1058 &
1059 [Japanese word "nihongo"]
1060 \
1061 ~peter
1062 /mail
1063 /<65e5, 672c, 8a9e>
1064 /<53f0, 5317>
1065 */
1066 0x48, 0x69, 0x20, 0x4d, 0x6f, 0x6d, 0x20, 0x2d, 0x263a, 0x2d, 0x21,
1067 0x41, 0x2262, 0x0391, 0x2e,
1068 0x26,
1069 0x65e5, 0x672c, 0x8a9e,
1070 0x5c,
1071 0x7e, 0x70, 0x65, 0x74, 0x65, 0x72,
1072 0x2f, 0x6d, 0x61, 0x69, 0x6c,
1073 0x2f, 0x65e5, 0x672c, 0x8a9e,
1074 0x2f, 0x53f0, 0x5317
1075 };
1076 static const int32_t toUnicodeOffsets[] = {
1077 0, 1, 2, 3, 4, 5, 6, 7, 9, 13, 14,
1078 15, 17, 19, 24,
1079 25,
1080 28, 30, 33,
1081 37,
1082 38, 39, 40, 41, 42, 43,
1083 44, 45, 46, 47, 48,
1084 49, 51, 53, 56,
1085 60, 62, 64
1086 };
1087 static const int32_t fromUnicodeOffsets[] = {
1088 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 9, 10,
1089 11, 12, 12, 12, 13, 13, 13, 13, 13, 14,
1090 15, 15,
1091 16, 16, 16, 17, 17, 17, 18, 18, 18, 18,
1092 19,
1093 20, 21, 22, 23, 24, 25,
1094 26, 27, 28, 29, 30,
1095 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 34,
1096 35, 36, 36, 36, 37, 37, 37, 37, 37
1097 };
1098
1099 testConvertFromU(unicode, UPRV_LENGTHOF(unicode), imap, sizeof(imap), "IMAP-mailbox-name", fromUnicodeOffsets,false);
1100
1101 testConvertToU(imap, sizeof(imap), unicode, UPRV_LENGTHOF(unicode), "IMAP-mailbox-name", toUnicodeOffsets,false);
1102 }
1103
1104 /* Test UTF-8 bad data handling*/
1105 {
1106 static const uint8_t utf8[]={
1107 0x61,
1108 0xf7, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1109 0x00,
1110 0x62,
1111 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1112 0xfb, 0xbf, 0xbf, 0xbf, 0xbf, /* > 10FFFF */
1113 0xf4, 0x8f, 0xbf, 0xbf, /* 10FFFF */
1114 0xdf, 0xbf, /* 7ff */
1115 0xbf, /* truncated tail */
1116 0xf4, 0x90, 0x80, 0x80, /* 110000 */
1117 0x02
1118 };
1119
1120 static const uint16_t utf8Expected[]={
1121 0x0061,
1122 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1123 0x0000,
1124 0x0062,
1125 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1126 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1127 0xdbff, 0xdfff,
1128 0x07ff,
1129 0xfffd,
1130 0xfffd, 0xfffd, 0xfffd, 0xfffd,
1131 0x0002
1132 };
1133
1134 static const int32_t utf8Offsets[]={
1135 0,
1136 1, 2, 3, 4,
1137 5,
1138 6,
1139 7, 8, 9, 10, 11,
1140 12, 13, 14, 15, 16,
1141 17, 17,
1142 21,
1143 23,
1144 24, 25, 26, 27,
1145 28
1146 };
1147 testConvertToU(utf8, sizeof(utf8),
1148 utf8Expected, UPRV_LENGTHOF(utf8Expected), "utf-8", utf8Offsets ,false);
1149
1150 }
1151
1152 /* Test UTF-32BE bad data handling*/
1153 {
1154 static const uint8_t utf32[]={
1155 0x00, 0x00, 0x00, 0x61,
1156 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
1157 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1158 0x00, 0x00, 0x00, 0x62,
1159 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1160 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
1161 0x00, 0x00, 0x01, 0x62,
1162 0x00, 0x00, 0x02, 0x62
1163 };
1164 static const uint16_t utf32Expected[]={
1165 0x0061,
1166 0xfffd, /* 0x110000 out of range */
1167 0xDBFF, /* 0x10FFFF in range */
1168 0xDFFF,
1169 0x0062,
1170 0xfffd, /* 0xffffffff out of range */
1171 0xfffd, /* 0x7fffffff out of range */
1172 0x0162,
1173 0x0262
1174 };
1175 static const int32_t utf32Offsets[]={
1176 0, 4, 8, 8, 12, 16, 20, 24, 28
1177 };
1178 static const uint8_t utf32ExpectedBack[]={
1179 0x00, 0x00, 0x00, 0x61,
1180 0x00, 0x00, 0xff, 0xfd, /* 0x110000 out of range */
1181 0x00, 0x10, 0xff, 0xff, /* 0x10FFFF in range */
1182 0x00, 0x00, 0x00, 0x62,
1183 0x00, 0x00, 0xff, 0xfd, /* 0xffffffff out of range */
1184 0x00, 0x00, 0xff, 0xfd, /* 0x7fffffff out of range */
1185 0x00, 0x00, 0x01, 0x62,
1186 0x00, 0x00, 0x02, 0x62
1187 };
1188 static const int32_t utf32OffsetsBack[]={
1189 0,0,0,0,
1190 1,1,1,1,
1191 2,2,2,2,
1192 4,4,4,4,
1193 5,5,5,5,
1194 6,6,6,6,
1195 7,7,7,7,
1196 8,8,8,8
1197 };
1198
1199 testConvertToU(utf32, sizeof(utf32),
1200 utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32be", utf32Offsets ,false);
1201 testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1202 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32be", utf32OffsetsBack, false);
1203 }
1204
1205 /* Test UTF-32LE bad data handling*/
1206 {
1207 static const uint8_t utf32[]={
1208 0x61, 0x00, 0x00, 0x00,
1209 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
1210 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1211 0x62, 0x00, 0x00, 0x00,
1212 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
1213 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
1214 0x62, 0x01, 0x00, 0x00,
1215 0x62, 0x02, 0x00, 0x00,
1216 };
1217
1218 static const uint16_t utf32Expected[]={
1219 0x0061,
1220 0xfffd, /* 0x110000 out of range */
1221 0xDBFF, /* 0x10FFFF in range */
1222 0xDFFF,
1223 0x0062,
1224 0xfffd, /* 0xffffffff out of range */
1225 0xfffd, /* 0x7fffffff out of range */
1226 0x0162,
1227 0x0262
1228 };
1229 static const int32_t utf32Offsets[]={
1230 0, 4, 8, 8, 12, 16, 20, 24, 28
1231 };
1232 static const uint8_t utf32ExpectedBack[]={
1233 0x61, 0x00, 0x00, 0x00,
1234 0xfd, 0xff, 0x00, 0x00, /* 0x110000 out of range */
1235 0xff, 0xff, 0x10, 0x00, /* 0x10FFFF in range */
1236 0x62, 0x00, 0x00, 0x00,
1237 0xfd, 0xff, 0x00, 0x00, /* 0xffffffff out of range */
1238 0xfd, 0xff, 0x00, 0x00, /* 0x7fffffff out of range */
1239 0x62, 0x01, 0x00, 0x00,
1240 0x62, 0x02, 0x00, 0x00
1241 };
1242 static const int32_t utf32OffsetsBack[]={
1243 0,0,0,0,
1244 1,1,1,1,
1245 2,2,2,2,
1246 4,4,4,4,
1247 5,5,5,5,
1248 6,6,6,6,
1249 7,7,7,7,
1250 8,8,8,8
1251 };
1252 testConvertToU(utf32, sizeof(utf32),
1253 utf32Expected, UPRV_LENGTHOF(utf32Expected), "utf-32le", utf32Offsets,false );
1254 testConvertFromU(utf32Expected, UPRV_LENGTHOF(utf32Expected),
1255 utf32ExpectedBack, sizeof(utf32ExpectedBack), "utf-32le", utf32OffsetsBack, false);
1256 }
1257 }
1258
TestCoverageMBCS(void)1259 static void TestCoverageMBCS(void){
1260 #if 0
1261 UErrorCode status = U_ZERO_ERROR;
1262 const char *directory = loadTestData(&status);
1263 char* tdpath = NULL;
1264 char* saveDirectory = (char*)malloc(sizeof(char) *(strlen(u_getDataDirectory())+1));
1265 int len = strlen(directory);
1266 char* index=NULL;
1267
1268 tdpath = (char*) malloc(sizeof(char) * (len * 2));
1269 uprv_strcpy(saveDirectory,u_getDataDirectory());
1270 log_verbose("Retrieved data directory %s \n",saveDirectory);
1271 uprv_strcpy(tdpath,directory);
1272 index=strrchr(tdpath,(char)U_FILE_SEP_CHAR);
1273
1274 if((unsigned int)(index-tdpath) != (strlen(tdpath)-1)){
1275 *(index+1)=0;
1276 }
1277 u_setDataDirectory(tdpath);
1278 log_verbose("ICU data directory is set to: %s \n" ,tdpath);
1279 #endif
1280
1281 /*some more test to increase the code coverage in MBCS. Create an test converter from test1.ucm
1282 which is test file for MBCS conversion with single-byte codepage data.*/
1283 {
1284
1285 /* MBCS with single byte codepage data test1.ucm*/
1286 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0xdbc4, 0xde34, 0x0003};
1287 const uint8_t expectedtest1[] = { 0x00, 0x05, 0xff, 0x07, 0xff,};
1288 int32_t totest1Offs[] = { 0, 1, 2, 3, 5, };
1289
1290 /*from Unicode*/
1291 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1292 expectedtest1, sizeof(expectedtest1), "@test1", totest1Offs,false );
1293 }
1294
1295 /*some more test to increase the code coverage in MBCS. Create an test converter from test3.ucm
1296 which is test file for MBCS conversion with three-byte codepage data.*/
1297 {
1298
1299 /* MBCS with three byte codepage data test3.ucm*/
1300 const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1301 const uint8_t expectedtest3[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0xff,};
1302 int32_t totest3Offs[] = { 0, 1, 2, 3, 3, 3, 4, 6, 6, 6, 8};
1303
1304 const uint8_t test3input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x0b, 0x07, 0x01, 0x02, 0x0a, 0x01, 0x02, 0x0c,};
1305 const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1306 int32_t fromtest3Offs[] = { 0, 1, 2, 3, 6, 6, 7, 7, 10 };
1307
1308 /*from Unicode*/
1309 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1310 expectedtest3, sizeof(expectedtest3), "@test3", totest3Offs,false );
1311
1312 /*to Unicode*/
1313 testConvertToU(test3input, sizeof(test3input),
1314 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test3", fromtest3Offs ,false);
1315
1316 }
1317
1318 /*some more test to increase the code coverage in MBCS. Create an test converter from test4.ucm
1319 which is test file for MBCS conversion with four-byte codepage data.*/
1320 {
1321
1322 /* MBCS with three byte codepage data test4.ucm*/
1323 static const UChar unicodeInput[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0x000e};
1324 static const uint8_t expectedtest4[] = { 0x00, 0x05, 0xff, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0xff,};
1325 static const int32_t totest4Offs[] = { 0, 1, 2, 3, 3, 3, 3, 4, 6, 6, 6, 6, 8,};
1326
1327 static const uint8_t test4input[] = { 0x00, 0x05, 0x06, 0x01, 0x02, 0x03, 0x0b, 0x07, 0x01, 0x02, 0x03, 0x0a, 0x01, 0x02, 0x03, 0x0c,};
1328 static const UChar expectedUnicode[] = { 0x20ac, 0x0005, 0x0006, 0x000b, 0xdbc4, 0xde34, 0xd84d, 0xdc56, 0xfffd};
1329 static const int32_t fromtest4Offs[] = { 0, 1, 2, 3, 7, 7, 8, 8, 12,};
1330
1331 /*from Unicode*/
1332 testConvertFromU(unicodeInput, UPRV_LENGTHOF(unicodeInput),
1333 expectedtest4, sizeof(expectedtest4), "@test4", totest4Offs,false );
1334
1335 /*to Unicode*/
1336 testConvertToU(test4input, sizeof(test4input),
1337 expectedUnicode, UPRV_LENGTHOF(expectedUnicode), "@test4", fromtest4Offs,false );
1338
1339 }
1340 #if 0
1341 free(tdpath);
1342 /* restore the original data directory */
1343 log_verbose("Setting the data directory to %s \n", saveDirectory);
1344 u_setDataDirectory(saveDirectory);
1345 free(saveDirectory);
1346 #endif
1347
1348 }
1349
TestConverterType(const char * convName,UConverterType convType)1350 static void TestConverterType(const char *convName, UConverterType convType) {
1351 UConverter* myConverter;
1352 UErrorCode err = U_ZERO_ERROR;
1353
1354 myConverter = my_ucnv_open(convName, &err);
1355
1356 if (U_FAILURE(err)) {
1357 log_data_err("Failed to create an %s converter\n", convName);
1358 return;
1359 }
1360 else
1361 {
1362 if (ucnv_getType(myConverter)!=convType) {
1363 log_err("ucnv_getType Failed for %s. Got enum value 0x%X\n",
1364 convName, convType);
1365 }
1366 else {
1367 log_verbose("ucnv_getType %s ok\n", convName);
1368 }
1369 }
1370 ucnv_close(myConverter);
1371 }
1372
TestConverterTypesAndStarters(void)1373 static void TestConverterTypesAndStarters(void)
1374 {
1375 #if !UCONFIG_NO_LEGACY_CONVERSION
1376 UConverter* myConverter;
1377 UErrorCode err = U_ZERO_ERROR;
1378 UBool mystarters[256];
1379
1380 /* const UBool expectedKSCstarters[256] = {
1381 false, false, false, false, false, false, false, false, false, false,
1382 false, false, false, false, false, false, false, false, false, false,
1383 false, false, false, false, false, false, false, false, false, false,
1384 false, false, false, false, false, false, false, false, false, false,
1385 false, false, false, false, false, false, false, false, false, false,
1386 false, false, false, false, false, false, false, false, false, false,
1387 false, false, false, false, false, false, false, false, false, false,
1388 false, false, false, false, false, false, false, false, false, false,
1389 false, false, false, false, false, false, false, false, false, false,
1390 false, false, false, false, false, false, false, false, false, false,
1391 false, false, false, false, false, false, false, false, false, false,
1392 false, false, false, false, false, false, false, false, false, false,
1393 false, false, false, false, false, false, false, false, false, false,
1394 false, false, false, false, false, false, false, false, false, false,
1395 false, false, false, true, true, true, true, true, true, true,
1396 true, true, true, true, true, true, true, true, true, true,
1397 true, true, true, true, true, true, true, true, true, true,
1398 true, true, true, false, false, true, true, true, true, true,
1399 true, true, true, true, true, true, true, true, true, true,
1400 true, true, true, true, true, true, true, true, true, true,
1401 true, true, true, true, true, true, true, true, true, true,
1402 true, true, true, true, true, true, true, true, true, true,
1403 true, true, true, true, true, true, true, true, true, true,
1404 true, true, true, true, true, true, true, true, true, true,
1405 true, true, true, true, true, true, true, true, true, true,
1406 true, true, true, true, true, true};*/
1407
1408
1409 log_verbose("Testing KSC, ibm-930, ibm-878 for starters and their conversion types.");
1410
1411 myConverter = ucnv_open("ksc", &err);
1412 if (U_FAILURE(err)) {
1413 log_data_err("Failed to create an ibm-ksc converter\n");
1414 return;
1415 }
1416 else
1417 {
1418 if (ucnv_getType(myConverter)!=UCNV_MBCS)
1419 log_err("ucnv_getType Failed for ibm-949\n");
1420 else
1421 log_verbose("ucnv_getType ibm-949 ok\n");
1422
1423 if(myConverter!=NULL)
1424 ucnv_getStarters(myConverter, mystarters, &err);
1425
1426 /*if (memcmp(expectedKSCstarters, mystarters, sizeof(expectedKSCstarters)))
1427 log_err("Failed ucnv_getStarters for ksc\n");
1428 else
1429 log_verbose("ucnv_getStarters ok\n");*/
1430
1431 }
1432 ucnv_close(myConverter);
1433
1434 TestConverterType("ibm-930", UCNV_EBCDIC_STATEFUL);
1435 TestConverterType("ibm-878", UCNV_SBCS);
1436 #endif
1437
1438 TestConverterType("iso-8859-1", UCNV_LATIN_1);
1439
1440 TestConverterType("ibm-1208", UCNV_UTF8);
1441
1442 TestConverterType("utf-8", UCNV_UTF8);
1443 TestConverterType("UTF-16BE", UCNV_UTF16_BigEndian);
1444 TestConverterType("UTF-16LE", UCNV_UTF16_LittleEndian);
1445 TestConverterType("UTF-32BE", UCNV_UTF32_BigEndian);
1446 TestConverterType("UTF-32LE", UCNV_UTF32_LittleEndian);
1447
1448 #if !UCONFIG_NO_LEGACY_CONVERSION
1449
1450 #if defined(U_ENABLE_GENERIC_ISO_2022)
1451 TestConverterType("iso-2022", UCNV_ISO_2022);
1452 #endif
1453
1454 TestConverterType("hz", UCNV_HZ);
1455 #endif
1456
1457 TestConverterType("scsu", UCNV_SCSU);
1458
1459 #if !UCONFIG_NO_LEGACY_CONVERSION
1460 TestConverterType("x-iscii-de", UCNV_ISCII);
1461 #endif
1462
1463 TestConverterType("ascii", UCNV_US_ASCII);
1464 TestConverterType("utf-7", UCNV_UTF7);
1465 TestConverterType("IMAP-mailbox-name", UCNV_IMAP_MAILBOX);
1466 TestConverterType("bocu-1", UCNV_BOCU1);
1467 }
1468
1469 static void
TestAmbiguousConverter(UConverter * cnv)1470 TestAmbiguousConverter(UConverter *cnv) {
1471 static const char inBytes[3]={ 0x61, 0x5B, 0x5c };
1472 UChar outUnicode[20]={ 0, 0, 0, 0 };
1473
1474 const char *s;
1475 UChar *u;
1476 UErrorCode errorCode;
1477 UBool isAmbiguous;
1478
1479 /* try to convert an 'a', a square bracket and a US-ASCII backslash */
1480 errorCode=U_ZERO_ERROR;
1481 s=inBytes;
1482 u=outUnicode;
1483 ucnv_toUnicode(cnv, &u, u+20, &s, s+3, NULL, true, &errorCode);
1484 if(U_FAILURE(errorCode)) {
1485 /* we do not care about general failures in this test; the input may just not be mappable */
1486 return;
1487 }
1488
1489 if(outUnicode[0]!=0x61 || outUnicode[1]!=0x5B || outUnicode[2]==0xfffd) {
1490 /* not a close ASCII-family encoding, or 0x5c is unassigned/illegal: this test is not applicable */
1491 /* There are some encodings that are partially ASCII based,
1492 like the ISO-7 and GSM series of codepages, which we ignore. */
1493 return;
1494 }
1495
1496 isAmbiguous=ucnv_isAmbiguous(cnv);
1497
1498 /* check that outUnicode[1]!=0x5c is exactly the same as ucnv_isAmbiguous() */
1499 if((outUnicode[2]!=0x5c)!=isAmbiguous) {
1500 log_err("error: converter \"%s\" needs a backslash fix: %d but ucnv_isAmbiguous()==%d\n",
1501 ucnv_getName(cnv, &errorCode), outUnicode[2]!=0x5c, isAmbiguous);
1502 return;
1503 }
1504
1505 if(outUnicode[2]!=0x5c) {
1506 /* needs fixup, fix it */
1507 ucnv_fixFileSeparator(cnv, outUnicode, (int32_t)(u-outUnicode));
1508 if(outUnicode[2]!=0x5c) {
1509 /* the fix failed */
1510 log_err("error: ucnv_fixFileSeparator(%s) failed\n", ucnv_getName(cnv, &errorCode));
1511 return;
1512 }
1513 }
1514 }
1515
TestAmbiguous(void)1516 static void TestAmbiguous(void)
1517 {
1518 UErrorCode status = U_ZERO_ERROR;
1519 UConverter *ascii_cnv = 0, *sjis_cnv = 0, *cnv;
1520 static const char target[] = {
1521 /* "\\usr\\local\\share\\data\\icutest.txt" */
1522 0x5c, 0x75, 0x73, 0x72,
1523 0x5c, 0x6c, 0x6f, 0x63, 0x61, 0x6c,
1524 0x5c, 0x73, 0x68, 0x61, 0x72, 0x65,
1525 0x5c, 0x64, 0x61, 0x74, 0x61,
1526 0x5c, 0x69, 0x63, 0x75, 0x74, 0x65, 0x73, 0x74, 0x2e, 0x74, 0x78, 0x74,
1527 0
1528 };
1529 UChar asciiResult[200], sjisResult[200];
1530 int32_t /*asciiLength = 0,*/ sjisLength = 0, i;
1531 const char *name;
1532
1533 /* enumerate all converters */
1534 status=U_ZERO_ERROR;
1535 for(i=0; (name=ucnv_getAvailableName(i))!=NULL; ++i) {
1536 cnv=ucnv_open(name, &status);
1537 if(U_SUCCESS(status)) {
1538 /* BEGIN android-changed. To save space Android does not build full ISO-2022-CN CJK tables. */
1539 const char* cnvName = ucnv_getName(cnv, &status);
1540 if (strlen(cnvName) < 8 ||
1541 strncmp(cnvName, "ISO_2022_CN", 8) != 0) {
1542 TestAmbiguousConverter(cnv);
1543 }
1544 /* END android-changed */
1545 ucnv_close(cnv);
1546 } else {
1547 log_err("error: unable to open available converter \"%s\"\n", name);
1548 status=U_ZERO_ERROR;
1549 }
1550 }
1551
1552 #if !UCONFIG_NO_LEGACY_CONVERSION
1553 sjis_cnv = ucnv_open("ibm-943", &status);
1554 if (U_FAILURE(status))
1555 {
1556 log_data_err("Failed to create a SJIS converter\n");
1557 return;
1558 }
1559 ascii_cnv = ucnv_open("LATIN-1", &status);
1560 if (U_FAILURE(status))
1561 {
1562 log_data_err("Failed to create a LATIN-1 converter\n");
1563 ucnv_close(sjis_cnv);
1564 return;
1565 }
1566 /* convert target from SJIS to Unicode */
1567 sjisLength = ucnv_toUChars(sjis_cnv, sjisResult, UPRV_LENGTHOF(sjisResult), target, (int32_t)strlen(target), &status);
1568 if (U_FAILURE(status))
1569 {
1570 log_err("Failed to convert the SJIS string.\n");
1571 ucnv_close(sjis_cnv);
1572 ucnv_close(ascii_cnv);
1573 return;
1574 }
1575 /* convert target from Latin-1 to Unicode */
1576 /*asciiLength =*/ ucnv_toUChars(ascii_cnv, asciiResult, UPRV_LENGTHOF(asciiResult), target, (int32_t)strlen(target), &status);
1577 if (U_FAILURE(status))
1578 {
1579 log_err("Failed to convert the Latin-1 string.\n");
1580 ucnv_close(sjis_cnv);
1581 ucnv_close(ascii_cnv);
1582 return;
1583 }
1584 if (!ucnv_isAmbiguous(sjis_cnv))
1585 {
1586 log_err("SJIS converter should contain ambiguous character mappings.\n");
1587 ucnv_close(sjis_cnv);
1588 ucnv_close(ascii_cnv);
1589 return;
1590 }
1591 if (u_strcmp(sjisResult, asciiResult) == 0)
1592 {
1593 log_err("File separators for SJIS don't need to be fixed.\n");
1594 }
1595 ucnv_fixFileSeparator(sjis_cnv, sjisResult, sjisLength);
1596 if (u_strcmp(sjisResult, asciiResult) != 0)
1597 {
1598 log_err("Fixing file separator for SJIS failed.\n");
1599 }
1600 ucnv_close(sjis_cnv);
1601 ucnv_close(ascii_cnv);
1602 #endif
1603 }
1604
1605 static void
TestSignatureDetection(void)1606 TestSignatureDetection(void){
1607 /* with null terminated strings */
1608 {
1609 static const char* data[] = {
1610 "\xFE\xFF\x00\x00", /* UTF-16BE */
1611 "\xFF\xFE\x00\x00", /* UTF-16LE */
1612 "\xEF\xBB\xBF\x00", /* UTF-8 */
1613 "\x0E\xFE\xFF\x00", /* SCSU */
1614
1615 "\xFE\xFF", /* UTF-16BE */
1616 "\xFF\xFE", /* UTF-16LE */
1617 "\xEF\xBB\xBF", /* UTF-8 */
1618 "\x0E\xFE\xFF", /* SCSU */
1619
1620 "\xFE\xFF\x41\x42", /* UTF-16BE */
1621 "\xFF\xFE\x41\x41", /* UTF-16LE */
1622 "\xEF\xBB\xBF\x41", /* UTF-8 */
1623 "\x0E\xFE\xFF\x41", /* SCSU */
1624
1625 "\x2B\x2F\x76\x38\x2D", /* UTF-7 */
1626 "\x2B\x2F\x76\x38\x41", /* UTF-7 */
1627 "\x2B\x2F\x76\x39\x41", /* UTF-7 */
1628 "\x2B\x2F\x76\x2B\x41", /* UTF-7 */
1629 "\x2B\x2F\x76\x2F\x41", /* UTF-7 */
1630
1631 "\xDD\x73\x66\x73" /* UTF-EBCDIC */
1632 };
1633 static const char* expected[] = {
1634 "UTF-16BE",
1635 "UTF-16LE",
1636 "UTF-8",
1637 "SCSU",
1638
1639 "UTF-16BE",
1640 "UTF-16LE",
1641 "UTF-8",
1642 "SCSU",
1643
1644 "UTF-16BE",
1645 "UTF-16LE",
1646 "UTF-8",
1647 "SCSU",
1648
1649 "UTF-7",
1650 "UTF-7",
1651 "UTF-7",
1652 "UTF-7",
1653 "UTF-7",
1654 "UTF-EBCDIC"
1655 };
1656 static const int32_t expectedLength[] ={
1657 2,
1658 2,
1659 3,
1660 3,
1661
1662 2,
1663 2,
1664 3,
1665 3,
1666
1667 2,
1668 2,
1669 3,
1670 3,
1671
1672 5,
1673 4,
1674 4,
1675 4,
1676 4,
1677 4
1678 };
1679 int i=0;
1680 UErrorCode err;
1681 int32_t signatureLength = -1;
1682 const char* source = NULL;
1683 const char* enc = NULL;
1684 for( ; i<UPRV_LENGTHOF(data); i++){
1685 err = U_ZERO_ERROR;
1686 source = data[i];
1687 enc = ucnv_detectUnicodeSignature(source, -1 , &signatureLength, &err);
1688 if(U_FAILURE(err)){
1689 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1690 continue;
1691 }
1692 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1693 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1694 continue;
1695 }
1696 if(signatureLength != expectedLength[i]){
1697 log_err("ucnv_detectUnicodeSignature failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1698 }
1699 }
1700 }
1701 {
1702 static const char* data[] = {
1703 "\xFE\xFF\x00", /* UTF-16BE */
1704 "\xFF\xFE\x00", /* UTF-16LE */
1705 "\xEF\xBB\xBF\x00", /* UTF-8 */
1706 "\x0E\xFE\xFF\x00", /* SCSU */
1707 "\x00\x00\xFE\xFF", /* UTF-32BE */
1708 "\xFF\xFE\x00\x00", /* UTF-32LE */
1709 "\xFE\xFF", /* UTF-16BE */
1710 "\xFF\xFE", /* UTF-16LE */
1711 "\xEF\xBB\xBF", /* UTF-8 */
1712 "\x0E\xFE\xFF", /* SCSU */
1713 "\x00\x00\xFE\xFF", /* UTF-32BE */
1714 "\xFF\xFE\x00\x00", /* UTF-32LE */
1715 "\xFE\xFF\x41\x42", /* UTF-16BE */
1716 "\xFF\xFE\x41\x41", /* UTF-16LE */
1717 "\xEF\xBB\xBF\x41", /* UTF-8 */
1718 "\x0E\xFE\xFF\x41", /* SCSU */
1719 "\x00\x00\xFE\xFF\x41", /* UTF-32BE */
1720 "\xFF\xFE\x00\x00\x42", /* UTF-32LE */
1721 "\xFB\xEE\x28", /* BOCU-1 */
1722 "\xFF\x41\x42" /* NULL */
1723 };
1724 static const int len[] = {
1725 3,
1726 3,
1727 4,
1728 4,
1729 4,
1730 4,
1731 2,
1732 2,
1733 3,
1734 3,
1735 4,
1736 4,
1737 4,
1738 4,
1739 4,
1740 4,
1741 5,
1742 5,
1743 3,
1744 3
1745 };
1746
1747 static const char* expected[] = {
1748 "UTF-16BE",
1749 "UTF-16LE",
1750 "UTF-8",
1751 "SCSU",
1752 "UTF-32BE",
1753 "UTF-32LE",
1754 "UTF-16BE",
1755 "UTF-16LE",
1756 "UTF-8",
1757 "SCSU",
1758 "UTF-32BE",
1759 "UTF-32LE",
1760 "UTF-16BE",
1761 "UTF-16LE",
1762 "UTF-8",
1763 "SCSU",
1764 "UTF-32BE",
1765 "UTF-32LE",
1766 "BOCU-1",
1767 NULL
1768 };
1769 static const int32_t expectedLength[] ={
1770 2,
1771 2,
1772 3,
1773 3,
1774 4,
1775 4,
1776 2,
1777 2,
1778 3,
1779 3,
1780 4,
1781 4,
1782 2,
1783 2,
1784 3,
1785 3,
1786 4,
1787 4,
1788 3,
1789 0
1790 };
1791 int i=0;
1792 UErrorCode err;
1793 int32_t signatureLength = -1;
1794 int32_t sourceLength=-1;
1795 const char* source = NULL;
1796 const char* enc = NULL;
1797 for( ; i<UPRV_LENGTHOF(data); i++){
1798 err = U_ZERO_ERROR;
1799 source = data[i];
1800 sourceLength = len[i];
1801 enc = ucnv_detectUnicodeSignature(source, sourceLength , &signatureLength, &err);
1802 if(U_FAILURE(err)){
1803 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Error: %s\n", source,i,u_errorName(err));
1804 continue;
1805 }
1806 if(enc == NULL || strcmp(enc,expected[i]) !=0){
1807 if(expected[i] !=NULL){
1808 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i. Expected: %s. Got: %s\n",source,i,expected[i],enc);
1809 continue;
1810 }
1811 }
1812 if(signatureLength != expectedLength[i]){
1813 log_err("ucnv_detectUnicodeSignature test2 failed for source : %s at index :%i.Expected Length: %i. Got length: %i\n",source,i,signatureLength,expectedLength[i]);
1814 }
1815 }
1816 }
1817 }
1818
TestUTF7(void)1819 static void TestUTF7(void) {
1820 /* test input */
1821 static const uint8_t in[]={
1822 /* H - +Jjo- - ! +- +2AHcAQ */
1823 0x48,
1824 0x2d,
1825 0x2b, 0x4a, 0x6a, 0x6f,
1826 0x2d, 0x2d,
1827 0x21,
1828 0x2b, 0x2d,
1829 0x2b, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51
1830 };
1831
1832 /* expected test results */
1833 static const int32_t results[]={
1834 /* number of bytes read, code point */
1835 1, 0x48,
1836 1, 0x2d,
1837 4, 0x263a, /* <WHITE SMILING FACE> */
1838 2, 0x2d,
1839 1, 0x21,
1840 2, 0x2b,
1841 7, 0x10401
1842 };
1843
1844 const char *cnvName;
1845 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1846 UErrorCode errorCode=U_ZERO_ERROR;
1847 UConverter *cnv=ucnv_open("UTF-7", &errorCode);
1848 if(U_FAILURE(errorCode)) {
1849 log_data_err("Unable to open a UTF-7 converter: %s\n", u_errorName(errorCode));
1850 return;
1851 }
1852 TestNextUChar(cnv, source, limit, results, "UTF-7");
1853 /* Test the condition when source >= sourceLimit */
1854 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1855 cnvName = ucnv_getName(cnv, &errorCode);
1856 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "UTF-7") != 0) {
1857 log_err("UTF-7 converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1858 }
1859 ucnv_close(cnv);
1860 }
1861
TestIMAP(void)1862 static void TestIMAP(void) {
1863 /* test input */
1864 static const uint8_t in[]={
1865 /* H - &Jjo- - ! &- &2AHcAQ- \ */
1866 0x48,
1867 0x2d,
1868 0x26, 0x4a, 0x6a, 0x6f,
1869 0x2d, 0x2d,
1870 0x21,
1871 0x26, 0x2d,
1872 0x26, 0x32, 0x41, 0x48, 0x63, 0x41, 0x51, 0x2d
1873 };
1874
1875 /* expected test results */
1876 static const int32_t results[]={
1877 /* number of bytes read, code point */
1878 1, 0x48,
1879 1, 0x2d,
1880 4, 0x263a, /* <WHITE SMILING FACE> */
1881 2, 0x2d,
1882 1, 0x21,
1883 2, 0x26,
1884 7, 0x10401
1885 };
1886
1887 const char *cnvName;
1888 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
1889 UErrorCode errorCode=U_ZERO_ERROR;
1890 UConverter *cnv=ucnv_open("IMAP-mailbox-name", &errorCode);
1891 if(U_FAILURE(errorCode)) {
1892 log_data_err("Unable to open a IMAP-mailbox-name converter: %s\n", u_errorName(errorCode));
1893 return;
1894 }
1895 TestNextUChar(cnv, source, limit, results, "IMAP-mailbox-name");
1896 /* Test the condition when source >= sourceLimit */
1897 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1898 cnvName = ucnv_getName(cnv, &errorCode);
1899 if (U_FAILURE(errorCode) || uprv_strcmp(cnvName, "IMAP-mailbox-name") != 0) {
1900 log_err("IMAP-mailbox-name converter is called %s: %s\n", cnvName, u_errorName(errorCode));
1901 }
1902 ucnv_close(cnv);
1903 }
1904
TestUTF8(void)1905 static void TestUTF8(void) {
1906 /* test input */
1907 static const uint8_t in[]={
1908 0x61,
1909 0xc2, 0x80,
1910 0xe0, 0xa0, 0x80,
1911 0xf0, 0x90, 0x80, 0x80,
1912 0xf4, 0x84, 0x8c, 0xa1,
1913 0xf0, 0x90, 0x90, 0x81
1914 };
1915
1916 /* expected test results */
1917 static const int32_t results[]={
1918 /* number of bytes read, code point */
1919 1, 0x61,
1920 2, 0x80,
1921 3, 0x800,
1922 4, 0x10000,
1923 4, 0x104321,
1924 4, 0x10401
1925 };
1926
1927 /* error test input */
1928 static const uint8_t in2[]={
1929 0x61,
1930 0xc0, 0x80, /* illegal non-shortest form */
1931 0xe0, 0x80, 0x80, /* illegal non-shortest form */
1932 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
1933 0xc0, 0xc0, /* illegal trail byte */
1934 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
1935 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
1936 0xfe, /* illegal byte altogether */
1937 0x62
1938 };
1939
1940 /* expected error test results */
1941 static const int32_t results2[]={
1942 /* number of bytes read, code point */
1943 1, 0x61,
1944 22, 0x62
1945 };
1946
1947 UConverterToUCallback cb;
1948 const void *p;
1949
1950 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
1951 UErrorCode errorCode=U_ZERO_ERROR;
1952 UConverter *cnv=ucnv_open("UTF-8", &errorCode);
1953 if(U_FAILURE(errorCode)) {
1954 log_err("Unable to open a UTF-8 converter: %s\n", u_errorName(errorCode));
1955 return;
1956 }
1957 TestNextUChar(cnv, source, limit, results, "UTF-8");
1958 /* Test the condition when source >= sourceLimit */
1959 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
1960
1961 /* test error behavior with a skip callback */
1962 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
1963 source=(const char *)in2;
1964 limit=(const char *)(in2+sizeof(in2));
1965 TestNextUChar(cnv, source, limit, results2, "UTF-8");
1966
1967 ucnv_close(cnv);
1968 }
1969
TestCESU8(void)1970 static void TestCESU8(void) {
1971 /* test input */
1972 static const uint8_t in[]={
1973 0x61,
1974 0xc2, 0x80,
1975 0xe0, 0xa0, 0x80,
1976 0xed, 0xa0, 0x80, 0xed, 0xb0, 0x80,
1977 0xed, 0xb0, 0x81, 0xed, 0xa0, 0x82,
1978 0xed, 0xaf, 0xbf, 0xed, 0xbf, 0xbf,
1979 0xef, 0xbf, 0xbc
1980 };
1981
1982 /* expected test results */
1983 static const int32_t results[]={
1984 /* number of bytes read, code point */
1985 1, 0x61,
1986 2, 0x80,
1987 3, 0x800,
1988 6, 0x10000,
1989 3, 0xdc01,
1990 -1,0xd802, /* may read 3 or 6 bytes */
1991 -1,0x10ffff,/* may read 0 or 3 bytes */
1992 3, 0xfffc
1993 };
1994
1995 /* error test input */
1996 static const uint8_t in2[]={
1997 0x61,
1998 0xc0, 0x80, /* illegal non-shortest form */
1999 0xe0, 0x80, 0x80, /* illegal non-shortest form */
2000 0xf0, 0x80, 0x80, 0x80, /* illegal non-shortest form */
2001 0xc0, 0xc0, /* illegal trail byte */
2002 0xf0, 0x90, 0x80, 0x80, /* illegal 4-byte supplementary code point */
2003 0xf4, 0x84, 0x8c, 0xa1, /* illegal 4-byte supplementary code point */
2004 0xf0, 0x90, 0x90, 0x81, /* illegal 4-byte supplementary code point */
2005 0xf4, 0x90, 0x80, 0x80, /* 0x110000 out of range */
2006 0xf8, 0x80, 0x80, 0x80, 0x80, /* too long */
2007 0xfe, /* illegal byte altogether */
2008 0x62
2009 };
2010
2011 /* expected error test results */
2012 static const int32_t results2[]={
2013 /* number of bytes read, code point */
2014 1, 0x61,
2015 34, 0x62
2016 };
2017
2018 UConverterToUCallback cb;
2019 const void *p;
2020
2021 const char *source=(const char *)in,*limit=(const char *)in+sizeof(in);
2022 UErrorCode errorCode=U_ZERO_ERROR;
2023 UConverter *cnv=ucnv_open("CESU-8", &errorCode);
2024 if(U_FAILURE(errorCode)) {
2025 log_data_err("Unable to open a CESU-8 converter: %s\n", u_errorName(errorCode));
2026 return;
2027 }
2028 TestNextUChar(cnv, source, limit, results, "CESU-8");
2029 /* Test the condition when source >= sourceLimit */
2030 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2031
2032 /* test error behavior with a skip callback */
2033 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2034 source=(const char *)in2;
2035 limit=(const char *)(in2+sizeof(in2));
2036 TestNextUChar(cnv, source, limit, results2, "CESU-8");
2037
2038 ucnv_close(cnv);
2039 }
2040
TestUTF16(void)2041 static void TestUTF16(void) {
2042 /* test input */
2043 static const uint8_t in1[]={
2044 0xfe, 0xff, 0x4e, 0x00, 0xfe, 0xff
2045 };
2046 static const uint8_t in2[]={
2047 0xff, 0xfe, 0x4e, 0x00, 0xfe, 0xff
2048 };
2049 static const uint8_t in3[]={
2050 0xfe, 0xfe, 0x4e, 0x00, 0xfe, 0xff, 0xd8, 0x40, 0xdc, 0x01
2051 };
2052
2053 /* expected test results */
2054 static const int32_t results1[]={
2055 /* number of bytes read, code point */
2056 4, 0x4e00,
2057 2, 0xfeff
2058 };
2059 static const int32_t results2[]={
2060 /* number of bytes read, code point */
2061 4, 0x004e,
2062 2, 0xfffe
2063 };
2064 static const int32_t results3[]={
2065 /* number of bytes read, code point */
2066 2, 0xfefe,
2067 2, 0x4e00,
2068 2, 0xfeff,
2069 4, 0x20001
2070 };
2071
2072 const char *source, *limit;
2073
2074 UErrorCode errorCode=U_ZERO_ERROR;
2075 UConverter *cnv=ucnv_open("UTF-16", &errorCode);
2076 if(U_FAILURE(errorCode)) {
2077 log_err("Unable to open a UTF-16 converter: %s\n", u_errorName(errorCode));
2078 return;
2079 }
2080
2081 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2082 TestNextUChar(cnv, source, limit, results1, "UTF-16");
2083
2084 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2085 ucnv_resetToUnicode(cnv);
2086 TestNextUChar(cnv, source, limit, results2, "UTF-16");
2087
2088 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2089 ucnv_resetToUnicode(cnv);
2090 TestNextUChar(cnv, source, limit, results3, "UTF-16");
2091
2092 /* Test the condition when source >= sourceLimit */
2093 ucnv_resetToUnicode(cnv);
2094 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2095
2096 ucnv_close(cnv);
2097 }
2098
TestUTF16BE(void)2099 static void TestUTF16BE(void) {
2100 /* test input */
2101 static const uint8_t in[]={
2102 0x00, 0x61,
2103 0x00, 0xc0,
2104 0x00, 0x31,
2105 0x00, 0xf4,
2106 0xce, 0xfe,
2107 0xd8, 0x01, 0xdc, 0x01
2108 };
2109
2110 /* expected test results */
2111 static const int32_t results[]={
2112 /* number of bytes read, code point */
2113 2, 0x61,
2114 2, 0xc0,
2115 2, 0x31,
2116 2, 0xf4,
2117 2, 0xcefe,
2118 4, 0x10401
2119 };
2120
2121 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2122 UErrorCode errorCode=U_ZERO_ERROR;
2123 UConverter *cnv=ucnv_open("utf-16be", &errorCode);
2124 if(U_FAILURE(errorCode)) {
2125 log_err("Unable to open a UTF16-BE converter: %s\n", u_errorName(errorCode));
2126 return;
2127 }
2128 TestNextUChar(cnv, source, limit, results, "UTF-16BE");
2129 /* Test the condition when source >= sourceLimit */
2130 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2131 /*Test for the condition where there is an invalid character*/
2132 {
2133 static const uint8_t source2[]={0x61};
2134 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2135 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2136 }
2137 #if 0
2138 /*
2139 * Test disabled because currently the UTF-16BE/LE converters are supposed
2140 * to not set errors for unpaired surrogates.
2141 * This may change with
2142 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2143 */
2144
2145 /*Test for the condition where there is a surrogate pair*/
2146 {
2147 const uint8_t source2[]={0xd8, 0x01};
2148 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2149 }
2150 #endif
2151 ucnv_close(cnv);
2152 }
2153
2154 static void
TestUTF16LE(void)2155 TestUTF16LE(void) {
2156 /* test input */
2157 static const uint8_t in[]={
2158 0x61, 0x00,
2159 0x31, 0x00,
2160 0x4e, 0x2e,
2161 0x4e, 0x00,
2162 0x01, 0xd8, 0x01, 0xdc
2163 };
2164
2165 /* expected test results */
2166 static const int32_t results[]={
2167 /* number of bytes read, code point */
2168 2, 0x61,
2169 2, 0x31,
2170 2, 0x2e4e,
2171 2, 0x4e,
2172 4, 0x10401
2173 };
2174
2175 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2176 UErrorCode errorCode=U_ZERO_ERROR;
2177 UConverter *cnv=ucnv_open("utf-16le", &errorCode);
2178 if(U_FAILURE(errorCode)) {
2179 log_err("Unable to open a UTF16-LE converter: %s\n", u_errorName(errorCode));
2180 return;
2181 }
2182 TestNextUChar(cnv, source, limit, results, "UTF-16LE");
2183 /* Test the condition when source >= sourceLimit */
2184 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2185 /*Test for the condition where there is an invalid character*/
2186 {
2187 static const uint8_t source2[]={0x61};
2188 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2189 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an invalid character");
2190 }
2191 #if 0
2192 /*
2193 * Test disabled because currently the UTF-16BE/LE converters are supposed
2194 * to not set errors for unpaired surrogates.
2195 * This may change with
2196 * Jitterbug 1838 - forbid converting surrogate code points in UTF-16/32
2197 */
2198
2199 /*Test for the condition where there is a surrogate character*/
2200 {
2201 static const uint8_t source2[]={0x01, 0xd8};
2202 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_TRUNCATED_CHAR_FOUND, "an truncated surrogate character");
2203 }
2204 #endif
2205
2206 ucnv_close(cnv);
2207 }
2208
TestUTF32(void)2209 static void TestUTF32(void) {
2210 /* test input */
2211 static const uint8_t in1[]={
2212 0x00, 0x00, 0xfe, 0xff, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xfe, 0xff
2213 };
2214 static const uint8_t in2[]={
2215 0xff, 0xfe, 0x00, 0x00, 0x00, 0x10, 0x0f, 0x00, 0xfe, 0xff, 0x00, 0x00
2216 };
2217 static const uint8_t in3[]={
2218 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x10, 0x0f, 0x00, 0x00, 0x00, 0xd8, 0x40, 0x00, 0x00, 0xdc, 0x01
2219 };
2220
2221 /* expected test results */
2222 static const int32_t results1[]={
2223 /* number of bytes read, code point */
2224 8, 0x100f00,
2225 4, 0xfeff
2226 };
2227 static const int32_t results2[]={
2228 /* number of bytes read, code point */
2229 8, 0x0f1000,
2230 4, 0xfffe
2231 };
2232 static const int32_t results3[]={
2233 /* number of bytes read, code point */
2234 4, 0xfefe,
2235 4, 0x100f00,
2236 4, 0xfffd, /* unmatched surrogate */
2237 4, 0xfffd /* unmatched surrogate */
2238 };
2239
2240 const char *source, *limit;
2241
2242 UErrorCode errorCode=U_ZERO_ERROR;
2243 UConverter *cnv=ucnv_open("UTF-32", &errorCode);
2244 if(U_FAILURE(errorCode)) {
2245 log_data_err("Unable to open a UTF-32 converter: %s\n", u_errorName(errorCode));
2246 return;
2247 }
2248
2249 source=(const char *)in1, limit=(const char *)in1+sizeof(in1);
2250 TestNextUChar(cnv, source, limit, results1, "UTF-32");
2251
2252 source=(const char *)in2, limit=(const char *)in2+sizeof(in2);
2253 ucnv_resetToUnicode(cnv);
2254 TestNextUChar(cnv, source, limit, results2, "UTF-32");
2255
2256 source=(const char *)in3, limit=(const char *)in3+sizeof(in3);
2257 ucnv_resetToUnicode(cnv);
2258 TestNextUChar(cnv, source, limit, results3, "UTF-32");
2259
2260 /* Test the condition when source >= sourceLimit */
2261 ucnv_resetToUnicode(cnv);
2262 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2263
2264 ucnv_close(cnv);
2265 }
2266
2267 static void
TestUTF32BE(void)2268 TestUTF32BE(void) {
2269 /* test input */
2270 static const uint8_t in[]={
2271 0x00, 0x00, 0x00, 0x61,
2272 0x00, 0x00, 0x30, 0x61,
2273 0x00, 0x00, 0xdc, 0x00,
2274 0x00, 0x00, 0xd8, 0x00,
2275 0x00, 0x00, 0xdf, 0xff,
2276 0x00, 0x00, 0xff, 0xfe,
2277 0x00, 0x10, 0xab, 0xcd,
2278 0x00, 0x10, 0xff, 0xff
2279 };
2280
2281 /* expected test results */
2282 static const int32_t results[]={
2283 /* number of bytes read, code point */
2284 4, 0x61,
2285 4, 0x3061,
2286 4, 0xfffd,
2287 4, 0xfffd,
2288 4, 0xfffd,
2289 4, 0xfffe,
2290 4, 0x10abcd,
2291 4, 0x10ffff
2292 };
2293
2294 /* error test input */
2295 static const uint8_t in2[]={
2296 0x00, 0x00, 0x00, 0x61,
2297 0x00, 0x11, 0x00, 0x00, /* 0x110000 out of range */
2298 0x00, 0x00, 0x00, 0x62,
2299 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2300 0x7f, 0xff, 0xff, 0xff, /* 0x7fffffff out of range */
2301 0x00, 0x00, 0x01, 0x62,
2302 0x00, 0x00, 0x02, 0x62
2303 };
2304
2305 /* expected error test results */
2306 static const int32_t results2[]={
2307 /* number of bytes read, code point */
2308 4, 0x61,
2309 8, 0x62,
2310 12, 0x162,
2311 4, 0x262
2312 };
2313
2314 UConverterToUCallback cb;
2315 const void *p;
2316
2317 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2318 UErrorCode errorCode=U_ZERO_ERROR;
2319 UConverter *cnv=ucnv_open("UTF-32BE", &errorCode);
2320 if(U_FAILURE(errorCode)) {
2321 log_data_err("Unable to open a UTF-32BE converter: %s\n", u_errorName(errorCode));
2322 return;
2323 }
2324 TestNextUChar(cnv, source, limit, results, "UTF-32BE");
2325
2326 /* Test the condition when source >= sourceLimit */
2327 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2328
2329 /* test error behavior with a skip callback */
2330 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2331 source=(const char *)in2;
2332 limit=(const char *)(in2+sizeof(in2));
2333 TestNextUChar(cnv, source, limit, results2, "UTF-32BE");
2334
2335 ucnv_close(cnv);
2336 }
2337
2338 static void
TestUTF32LE(void)2339 TestUTF32LE(void) {
2340 /* test input */
2341 static const uint8_t in[]={
2342 0x61, 0x00, 0x00, 0x00,
2343 0x61, 0x30, 0x00, 0x00,
2344 0x00, 0xdc, 0x00, 0x00,
2345 0x00, 0xd8, 0x00, 0x00,
2346 0xff, 0xdf, 0x00, 0x00,
2347 0xfe, 0xff, 0x00, 0x00,
2348 0xcd, 0xab, 0x10, 0x00,
2349 0xff, 0xff, 0x10, 0x00
2350 };
2351
2352 /* expected test results */
2353 static const int32_t results[]={
2354 /* number of bytes read, code point */
2355 4, 0x61,
2356 4, 0x3061,
2357 4, 0xfffd,
2358 4, 0xfffd,
2359 4, 0xfffd,
2360 4, 0xfffe,
2361 4, 0x10abcd,
2362 4, 0x10ffff
2363 };
2364
2365 /* error test input */
2366 static const uint8_t in2[]={
2367 0x61, 0x00, 0x00, 0x00,
2368 0x00, 0x00, 0x11, 0x00, /* 0x110000 out of range */
2369 0x62, 0x00, 0x00, 0x00,
2370 0xff, 0xff, 0xff, 0xff, /* 0xffffffff out of range */
2371 0xff, 0xff, 0xff, 0x7f, /* 0x7fffffff out of range */
2372 0x62, 0x01, 0x00, 0x00,
2373 0x62, 0x02, 0x00, 0x00,
2374 };
2375
2376 /* expected error test results */
2377 static const int32_t results2[]={
2378 /* number of bytes read, code point */
2379 4, 0x61,
2380 8, 0x62,
2381 12, 0x162,
2382 4, 0x262,
2383 };
2384
2385 UConverterToUCallback cb;
2386 const void *p;
2387
2388 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2389 UErrorCode errorCode=U_ZERO_ERROR;
2390 UConverter *cnv=ucnv_open("UTF-32LE", &errorCode);
2391 if(U_FAILURE(errorCode)) {
2392 log_data_err("Unable to open a UTF-32LE converter: %s\n", u_errorName(errorCode));
2393 return;
2394 }
2395 TestNextUChar(cnv, source, limit, results, "UTF-32LE");
2396
2397 /* Test the condition when source >= sourceLimit */
2398 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2399
2400 /* test error behavior with a skip callback */
2401 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_SKIP, NULL, &cb, &p, &errorCode);
2402 source=(const char *)in2;
2403 limit=(const char *)(in2+sizeof(in2));
2404 TestNextUChar(cnv, source, limit, results2, "UTF-32LE");
2405
2406 ucnv_close(cnv);
2407 }
2408
2409 static void
TestLATIN1(void)2410 TestLATIN1(void) {
2411 /* test input */
2412 static const uint8_t in[]={
2413 0x61,
2414 0x31,
2415 0x32,
2416 0xc0,
2417 0xf0,
2418 0xf4,
2419 };
2420
2421 /* expected test results */
2422 static const int32_t results[]={
2423 /* number of bytes read, code point */
2424 1, 0x61,
2425 1, 0x31,
2426 1, 0x32,
2427 1, 0xc0,
2428 1, 0xf0,
2429 1, 0xf4,
2430 };
2431 static const uint16_t in1[] = {
2432 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2433 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2434 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2435 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2436 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2437 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2438 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2439 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2440 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2441 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2442 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2443 0xcb, 0x82
2444 };
2445 static const uint8_t out1[] = {
2446 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
2447 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
2448 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
2449 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
2450 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
2451 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
2452 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
2453 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
2454 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
2455 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
2456 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
2457 0xcb, 0x82
2458 };
2459 static const uint16_t in2[]={
2460 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2461 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2462 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2463 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2464 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2465 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2466 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2467 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2468 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2469 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2470 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2471 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2472 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2473 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2474 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2475 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2476 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2477 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2478 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2479 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2480 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2481 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2482 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2483 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2484 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2485 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2486 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2487 0x37, 0x20, 0x2A, 0x2F,
2488 };
2489 static const unsigned char out2[]={
2490 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
2491 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
2492 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
2493 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
2494 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
2495 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
2496 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x0F, 0x2F, 0x2A, 0x70,
2497 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
2498 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
2499 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
2500 0x1B, 0x4F, 0x22, 0x48, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
2501 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
2502 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
2503 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
2504 0x4F, 0x22, 0x6C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2505 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2506 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
2507 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
2508 0x22, 0x5C, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
2509 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
2510 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
2511 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
2512 0x23, 0x71, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
2513 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
2514 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
2515 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
2516 0x6F, 0x0F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
2517 0x37, 0x20, 0x2A, 0x2F,
2518 };
2519 const char *source=(const char *)in;
2520 const char *limit=(const char *)in+sizeof(in);
2521
2522 UErrorCode errorCode=U_ZERO_ERROR;
2523 UConverter *cnv=ucnv_open("LATIN_1", &errorCode);
2524 if(U_FAILURE(errorCode)) {
2525 log_data_err("Unable to open a LATIN_1 converter: %s\n", u_errorName(errorCode));
2526 return;
2527 }
2528 TestNextUChar(cnv, source, limit, results, "LATIN_1");
2529 /* Test the condition when source >= sourceLimit */
2530 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2531 TestConv((uint16_t*)in1,sizeof(in1)/2,"LATIN_1","LATIN-1",(char*)out1,sizeof(out1));
2532 TestConv((uint16_t*)in2,sizeof(in2)/2,"ASCII","ASCII",(char*)out2,sizeof(out2));
2533
2534 ucnv_close(cnv);
2535 }
2536
2537 static void
TestSBCS(void)2538 TestSBCS(void) {
2539 /* test input */
2540 static const uint8_t in[]={ 0x61, 0xc0, 0x80, 0xe0, 0xf0, 0xf4};
2541 /* expected test results */
2542 static const int32_t results[]={
2543 /* number of bytes read, code point */
2544 1, 0x61,
2545 1, 0xbf,
2546 1, 0xc4,
2547 1, 0x2021,
2548 1, 0xf8ff,
2549 1, 0x00d9
2550 };
2551
2552 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2553 UErrorCode errorCode=U_ZERO_ERROR;
2554 UConverter *cnv=ucnv_open("x-mac-turkish", &errorCode);
2555 if(U_FAILURE(errorCode)) {
2556 log_data_err("Unable to open a SBCS(x-mac-turkish) converter: %s\n", u_errorName(errorCode));
2557 return;
2558 }
2559 TestNextUChar(cnv, source, limit, results, "SBCS(x-mac-turkish)");
2560 /* Test the condition when source >= sourceLimit */
2561 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2562 /*Test for Illegal character */ /*
2563 {
2564 static const uint8_t input1[]={ 0xA1 };
2565 const char* illegalsource=(const char*)input1;
2566 TestNextUCharError(cnv, illegalsource, illegalsource+sizeof(illegalsource), U_INVALID_CHAR_FOUND, "source has a illegal character");
2567 }
2568 */
2569 ucnv_close(cnv);
2570 }
2571
2572 static void
TestDBCS(void)2573 TestDBCS(void) {
2574 /* test input */
2575 static const uint8_t in[]={
2576 0x44, 0x6a,
2577 0xc4, 0x9c,
2578 0x7a, 0x74,
2579 0x46, 0xab,
2580 0x42, 0x5b,
2581
2582 };
2583
2584 /* expected test results */
2585 static const int32_t results[]={
2586 /* number of bytes read, code point */
2587 2, 0x00a7,
2588 2, 0xe1d2,
2589 2, 0x6962,
2590 2, 0xf842,
2591 2, 0xffe5,
2592 };
2593
2594 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2595 UErrorCode errorCode=U_ZERO_ERROR;
2596
2597 UConverter *cnv=my_ucnv_open("@ibm9027", &errorCode);
2598 if(U_FAILURE(errorCode)) {
2599 log_data_err("Unable to open a DBCS(@ibm9027) converter: %s\n", u_errorName(errorCode));
2600 return;
2601 }
2602 TestNextUChar(cnv, source, limit, results, "DBCS(@ibm9027)");
2603 /* Test the condition when source >= sourceLimit */
2604 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2605 /*Test for the condition where there is an invalid character*/
2606 {
2607 static const uint8_t source2[]={0x1a, 0x1b};
2608 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2609 }
2610 /*Test for the condition where we have a truncated char*/
2611 {
2612 static const uint8_t source1[]={0xc4};
2613 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2614 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2615 }
2616 ucnv_close(cnv);
2617 }
2618
2619 static void
TestMBCS(void)2620 TestMBCS(void) {
2621 /* test input */
2622 static const uint8_t in[]={
2623 0x01,
2624 0xa6, 0xa3,
2625 0x00,
2626 0xa6, 0xa1,
2627 0x08,
2628 0xc2, 0x76,
2629 0xc2, 0x78,
2630
2631 };
2632
2633 /* expected test results */
2634 static const int32_t results[]={
2635 /* number of bytes read, code point */
2636 1, 0x0001,
2637 2, 0x250c,
2638 1, 0x0000,
2639 2, 0x2500,
2640 1, 0x0008,
2641 2, 0xd60c,
2642 2, 0xd60e,
2643 };
2644
2645 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2646 UErrorCode errorCode=U_ZERO_ERROR;
2647
2648 UConverter *cnv=ucnv_open("ibm-1363", &errorCode);
2649 if(U_FAILURE(errorCode)) {
2650 log_data_err("Unable to open a MBCS(ibm-1363) converter: %s\n", u_errorName(errorCode));
2651 return;
2652 }
2653 TestNextUChar(cnv, source, limit, results, "MBCS(ibm-1363)");
2654 /* Test the condition when source >= sourceLimit */
2655 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2656 /*Test for the condition where there is an invalid character*/
2657 {
2658 static const uint8_t source2[]={0xa1, 0x80};
2659 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character");
2660 }
2661 /*Test for the condition where we have a truncated char*/
2662 {
2663 static const uint8_t source1[]={0xc4};
2664 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2665 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2666 }
2667 ucnv_close(cnv);
2668
2669 }
2670
2671 #if !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_FILE_IO
2672 static void
TestICCRunout(void)2673 TestICCRunout(void) {
2674 /* { "ibm-1363", :bin{ a2aea2 }, "\u00a1\u001a", :intvector{ 0, 2 }, :int{1}, :int{0}, "\", "?", :bin{""} } */
2675
2676 const char *cnvName = "ibm-1363";
2677 UErrorCode status = U_ZERO_ERROR;
2678 const char sourceData[] = { (char)0xa2, (char)0xae, (char)0xa2 };
2679 /* UChar expectUData[] = { 0x00a1, 0x001a }; */
2680 const char *source = sourceData;
2681 const char *sourceLim = sourceData+sizeof(sourceData);
2682 UChar c1, c2, c3;
2683 UConverter *cnv=ucnv_open(cnvName, &status);
2684 if(U_FAILURE(status)) {
2685 log_data_err("Unable to open %s converter: %s\n", cnvName, u_errorName(status));
2686 return;
2687 }
2688
2689 #if 0
2690 {
2691 UChar targetBuf[256];
2692 UChar *target = targetBuf;
2693 UChar *targetLim = target+256;
2694 ucnv_toUnicode(cnv, &target, targetLim, &source, sourceLim, NULL, true, &status);
2695
2696 log_info("After convert: target@%d, source@%d, status%s\n",
2697 target-targetBuf, source-sourceData, u_errorName(status));
2698
2699 if(U_FAILURE(status)) {
2700 log_err("Failed to convert: %s\n", u_errorName(status));
2701 } else {
2702
2703 }
2704 }
2705 #endif
2706
2707 c1=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2708 log_verbose("c1: U+%04X, source@%d, status %s\n", c1, source-sourceData, u_errorName(status));
2709
2710 c2=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2711 log_verbose("c2: U+%04X, source@%d, status %s\n", c2, source-sourceData, u_errorName(status));
2712
2713 c3=ucnv_getNextUChar(cnv, &source, sourceLim, &status);
2714 log_verbose("c3: U+%04X, source@%d, status %s\n", c3, source-sourceData, u_errorName(status));
2715
2716 if(status==U_INDEX_OUTOFBOUNDS_ERROR && c3==0xFFFF) {
2717 log_verbose("OK\n");
2718 } else {
2719 log_err("FAIL: c3 was not FFFF or err was not U_INDEXOUTOFBOUNDS_ERROR\n");
2720 }
2721
2722 ucnv_close(cnv);
2723
2724 }
2725 #endif
2726
2727 #ifdef U_ENABLE_GENERIC_ISO_2022
2728
2729 static void
TestISO_2022()2730 TestISO_2022() {
2731 /* test input */
2732 static const uint8_t in[]={
2733 0x1b, 0x25, 0x42,
2734 0x31,
2735 0x32,
2736 0x61,
2737 0xc2, 0x80,
2738 0xe0, 0xa0, 0x80,
2739 0xf0, 0x90, 0x80, 0x80
2740 };
2741
2742
2743
2744 /* expected test results */
2745 static const int32_t results[]={
2746 /* number of bytes read, code point */
2747 4, 0x0031, /* 4 bytes including the escape sequence */
2748 1, 0x0032,
2749 1, 0x61,
2750 2, 0x80,
2751 3, 0x800,
2752 4, 0x10000
2753 };
2754
2755 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
2756 UErrorCode errorCode=U_ZERO_ERROR;
2757 UConverter *cnv;
2758
2759 cnv=ucnv_open("ISO_2022", &errorCode);
2760 if(U_FAILURE(errorCode)) {
2761 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
2762 return;
2763 }
2764 TestNextUChar(cnv, source, limit, results, "ISO_2022");
2765
2766 /* Test the condition when source >= sourceLimit */
2767 TestNextUCharError(cnv, source, source-1, U_ILLEGAL_ARGUMENT_ERROR, "sourceLimit < source");
2768 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
2769 /*Test for the condition where we have a truncated char*/
2770 {
2771 static const uint8_t source1[]={0xc4};
2772 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
2773 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_TRUNCATED_CHAR_FOUND, "a character is truncated");
2774 }
2775 /*Test for the condition where there is an invalid character*/
2776 {
2777 static const uint8_t source2[]={0xa1, 0x01};
2778 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_CHAR_FOUND, "an invalid character");
2779 }
2780 ucnv_close(cnv);
2781 }
2782
2783 #endif
2784
2785 static void
TestSmallTargetBuffer(const uint16_t * source,const UChar * sourceLimit,UConverter * cnv)2786 TestSmallTargetBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2787 const UChar* uSource;
2788 const UChar* uSourceLimit;
2789 const char* cSource;
2790 const char* cSourceLimit;
2791 UChar *uTargetLimit =NULL;
2792 UChar *uTarget;
2793 char *cTarget;
2794 const char *cTargetLimit;
2795 char *cBuf;
2796 UChar *uBuf; /*,*test;*/
2797 int32_t uBufSize = 120;
2798 int len=0;
2799 int i=2;
2800 UErrorCode errorCode=U_ZERO_ERROR;
2801 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2802 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2803 ucnv_reset(cnv);
2804 for(;--i>0; ){
2805 uSource = (UChar*) source;
2806 uSourceLimit=(const UChar*)sourceLimit;
2807 cTarget = cBuf;
2808 uTarget = uBuf;
2809 cSource = cBuf;
2810 cTargetLimit = cBuf;
2811 uTargetLimit = uBuf;
2812
2813 do{
2814
2815 cTargetLimit = cTargetLimit+ i;
2816 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,false, &errorCode);
2817 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2818 errorCode=U_ZERO_ERROR;
2819 continue;
2820 }
2821
2822 if(U_FAILURE(errorCode)){
2823 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2824 return;
2825 }
2826
2827 }while (uSource<uSourceLimit);
2828
2829 cSourceLimit =cTarget;
2830 do{
2831 uTargetLimit=uTargetLimit+i;
2832 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,false,&errorCode);
2833 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2834 errorCode=U_ZERO_ERROR;
2835 continue;
2836 }
2837 if(U_FAILURE(errorCode)){
2838 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2839 return;
2840 }
2841 }while(cSource<cSourceLimit);
2842
2843 uSource = source;
2844 /*test =uBuf;*/
2845 for(len=0;len<(int)(source - sourceLimit);len++){
2846 if(uBuf[len]!=uSource[len]){
2847 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2848 }
2849 }
2850 }
2851 free(uBuf);
2852 free(cBuf);
2853 }
2854 /* Test for Jitterbug 778 */
TestToAndFromUChars(const uint16_t * source,const UChar * sourceLimit,UConverter * cnv)2855 static void TestToAndFromUChars(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2856 const UChar* uSource;
2857 const UChar* uSourceLimit;
2858 const char* cSource;
2859 UChar *uTargetLimit =NULL;
2860 UChar *uTarget;
2861 char *cTarget;
2862 const char *cTargetLimit;
2863 char *cBuf;
2864 UChar *uBuf,*test;
2865 int32_t uBufSize = 120;
2866 int numCharsInTarget=0;
2867 UErrorCode errorCode=U_ZERO_ERROR;
2868 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2869 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
2870 uSource = source;
2871 uSourceLimit=sourceLimit;
2872 cTarget = cBuf;
2873 cTargetLimit = cBuf +uBufSize*5;
2874 uTarget = uBuf;
2875 uTargetLimit = uBuf+ uBufSize*5;
2876 ucnv_reset(cnv);
2877 numCharsInTarget=ucnv_fromUChars(cnv, cTarget, (int32_t)(cTargetLimit-cTarget), uSource, (int32_t)(uSourceLimit-uSource), &errorCode);
2878 if(U_FAILURE(errorCode)){
2879 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
2880 return;
2881 }
2882 cSource = cBuf;
2883 test =uBuf;
2884 ucnv_toUChars(cnv,uTarget,(int32_t)(uTargetLimit-uTarget),cSource,numCharsInTarget,&errorCode);
2885 if(U_FAILURE(errorCode)){
2886 log_err("ucnv_toUChars conversion failed, reason %s\n", u_errorName(errorCode));
2887 return;
2888 }
2889 uSource = source;
2890 while(uSource<uSourceLimit){
2891 if(*test!=*uSource){
2892
2893 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
2894 }
2895 uSource++;
2896 test++;
2897 }
2898 free(uBuf);
2899 free(cBuf);
2900 }
2901
TestSmallSourceBuffer(const uint16_t * source,const UChar * sourceLimit,UConverter * cnv)2902 static void TestSmallSourceBuffer(const uint16_t* source, const UChar* sourceLimit,UConverter* cnv){
2903 const UChar* uSource;
2904 const UChar* uSourceLimit;
2905 const char* cSource;
2906 const char* cSourceLimit;
2907 UChar *uTargetLimit =NULL;
2908 UChar *uTarget;
2909 char *cTarget;
2910 const char *cTargetLimit;
2911 char *cBuf;
2912 UChar *uBuf; /*,*test;*/
2913 int32_t uBufSize = 120;
2914 int len=0;
2915 int i=2;
2916 const UChar *temp = sourceLimit;
2917 UErrorCode errorCode=U_ZERO_ERROR;
2918 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
2919 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
2920
2921 ucnv_reset(cnv);
2922 for(;--i>0;){
2923 uSource = (UChar*) source;
2924 cTarget = cBuf;
2925 uTarget = uBuf;
2926 cSource = cBuf;
2927 cTargetLimit = cBuf;
2928 uTargetLimit = uBuf+uBufSize*5;
2929 cTargetLimit = cTargetLimit+uBufSize*10;
2930 uSourceLimit=uSource;
2931 do{
2932
2933 if (uSourceLimit < sourceLimit) {
2934 uSourceLimit = uSourceLimit+1;
2935 }
2936 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,false, &errorCode);
2937 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2938 errorCode=U_ZERO_ERROR;
2939 continue;
2940 }
2941
2942 if(U_FAILURE(errorCode)){
2943 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2944 return;
2945 }
2946
2947 }while (uSource<temp);
2948
2949 cSourceLimit =cBuf;
2950 do{
2951 if (cSourceLimit < cBuf + (cTarget - cBuf)) {
2952 cSourceLimit = cSourceLimit+1;
2953 }
2954 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,false,&errorCode);
2955 if(errorCode==U_BUFFER_OVERFLOW_ERROR){
2956 errorCode=U_ZERO_ERROR;
2957 continue;
2958 }
2959 if(U_FAILURE(errorCode)){
2960 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
2961 return;
2962 }
2963 }while(cSource<cTarget);
2964
2965 uSource = source;
2966 /*test =uBuf;*/
2967 for(;len<(int)(source - sourceLimit);len++){
2968 if(uBuf[len]!=uSource[len]){
2969 log_err("Expected : \\u%04X \t Got: \\u%04X\n",uSource[len],(int)uBuf[len]) ;
2970 }
2971 }
2972 }
2973 free(uBuf);
2974 free(cBuf);
2975 }
2976 static void
TestGetNextUChar2022(UConverter * cnv,const char * source,const char * limit,const uint16_t results[],const char * message)2977 TestGetNextUChar2022(UConverter* cnv, const char* source, const char* limit,
2978 const uint16_t results[], const char* message){
2979 /* const char* s0; */
2980 const char* s=(char*)source;
2981 const uint16_t *r=results;
2982 UErrorCode errorCode=U_ZERO_ERROR;
2983 uint32_t c,exC;
2984 ucnv_reset(cnv);
2985 while(s<limit) {
2986 /* s0=s; */
2987 c=ucnv_getNextUChar(cnv, &s, limit, &errorCode);
2988 if(errorCode==U_INDEX_OUTOFBOUNDS_ERROR) {
2989 break; /* no more significant input */
2990 } else if(U_FAILURE(errorCode)) {
2991 log_err("%s ucnv_getNextUChar() failed: %s\n", message, u_errorName(errorCode));
2992 break;
2993 } else {
2994 if(U16_IS_LEAD(*r)){
2995 int i =0, len = 2;
2996 U16_NEXT(r, i, len, exC);
2997 r++;
2998 }else{
2999 exC = *r;
3000 }
3001 if(c!=(uint32_t)(exC))
3002 log_err("%s ucnv_getNextUChar() Expected: \\u%04X Got: \\u%04X \n",message,(uint32_t) (*r),c);
3003 }
3004 r++;
3005 }
3006 }
3007
TestJitterbug930(const char * enc)3008 static int TestJitterbug930(const char* enc){
3009 UErrorCode err = U_ZERO_ERROR;
3010 UConverter*converter;
3011 char out[80];
3012 char*target = out;
3013 UChar in[4];
3014 const UChar*source = in;
3015 int32_t off[80];
3016 int32_t* offsets = off;
3017 int numOffWritten=0;
3018 UBool flush = 0;
3019 converter = my_ucnv_open(enc, &err);
3020
3021 in[0] = 0x41; /* 0x4E00;*/
3022 in[1] = 0x4E01;
3023 in[2] = 0x4E02;
3024 in[3] = 0x4E03;
3025
3026 memset(off, '*', sizeof(off));
3027
3028 ucnv_fromUnicode (converter,
3029 &target,
3030 target+2,
3031 &source,
3032 source+3,
3033 offsets,
3034 flush,
3035 &err);
3036
3037 /* writes three bytes into the output buffer: 41 1B 24
3038 * but offsets contains 0 1 1
3039 */
3040 while(*offsets< off[10]){
3041 numOffWritten++;
3042 offsets++;
3043 }
3044 log_verbose("Testing Jitterbug 930 for encoding %s",enc);
3045 if(numOffWritten!= (int)(target-out)){
3046 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc, (int)(target-out),numOffWritten);
3047 }
3048
3049 err = U_ZERO_ERROR;
3050
3051 memset(off,'*' , sizeof(off));
3052
3053 flush = 1;
3054 offsets=off;
3055 ucnv_fromUnicode (converter,
3056 &target,
3057 target+4,
3058 &source,
3059 source,
3060 offsets,
3061 flush,
3062 &err);
3063 numOffWritten=0;
3064 while(*offsets< off[10]){
3065 numOffWritten++;
3066 if(*offsets!= -1){
3067 log_err("Jitterbug 930 test for enc: %s failed. Expected: %i Got: %i",enc,-1,*offsets) ;
3068 }
3069 offsets++;
3070 }
3071
3072 /* writes 42 43 7A into output buffer,
3073 * offsets contains -1 -1 -1
3074 */
3075 ucnv_close(converter);
3076 return 0;
3077 }
3078
3079 static void
TestHZ(void)3080 TestHZ(void) {
3081 /* test input */
3082 static const uint16_t in[]={
3083 0x3000, 0x3001, 0x3002, 0x00B7, 0x02C9, 0x02C7, 0x00A8, 0x3003, 0x3005, 0x2014,
3084 0xFF5E, 0x2016, 0x2026, 0x007E, 0x997C, 0x70B3, 0x75C5, 0x5E76, 0x73BB, 0x83E0,
3085 0x64AD, 0x62E8, 0x94B5, 0x000A, 0x6CE2, 0x535A, 0x52C3, 0x640F, 0x94C2, 0x7B94,
3086 0x4F2F, 0x5E1B, 0x8236, 0x000A, 0x8116, 0x818A, 0x6E24, 0x6CCA, 0x9A73, 0x6355,
3087 0x535C, 0x54FA, 0x8865, 0x000A, 0x57E0, 0x4E0D, 0x5E03, 0x6B65, 0x7C3F, 0x90E8,
3088 0x6016, 0x248F, 0x2490, 0x000A, 0x2491, 0x2492, 0x2493, 0x2494, 0x2495, 0x2496,
3089 0x2497, 0x2498, 0x2499, 0x000A, 0x249A, 0x249B, 0x2474, 0x2475, 0x2476, 0x2477,
3090 0x2478, 0x2479, 0x247A, 0x000A, 0x247B, 0x247C, 0x247D, 0x247E, 0x247F, 0x2480,
3091 0x2481, 0x2482, 0x2483, 0x000A, 0x0041, 0x0043, 0x0044, 0x0045, 0x0046, 0x007E,
3092 0x0048, 0x0049, 0x004A, 0x000A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050,
3093 0x0051, 0x0052, 0x0053, 0x000A, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059,
3094 0x005A, 0x005B, 0x005C, 0x000A
3095 };
3096 const UChar* uSource;
3097 const UChar* uSourceLimit;
3098 const char* cSource;
3099 const char* cSourceLimit;
3100 UChar *uTargetLimit =NULL;
3101 UChar *uTarget;
3102 char *cTarget;
3103 const char *cTargetLimit;
3104 char *cBuf = NULL;
3105 UChar *uBuf = NULL;
3106 UChar *test;
3107 int32_t uBufSize = 120;
3108 UErrorCode errorCode=U_ZERO_ERROR;
3109 UConverter *cnv = NULL;
3110 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3111 int32_t* myOff= offsets;
3112 cnv=ucnv_open("HZ", &errorCode);
3113 if(U_FAILURE(errorCode)) {
3114 log_data_err("Unable to open HZ converter: %s\n", u_errorName(errorCode));
3115 goto cleanup;
3116 }
3117
3118 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3119 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3120 uSource = (const UChar*)in;
3121 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3122 cTarget = cBuf;
3123 cTargetLimit = cBuf +uBufSize*5;
3124 uTarget = uBuf;
3125 uTargetLimit = uBuf+ uBufSize*5;
3126 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
3127 if(U_FAILURE(errorCode)){
3128 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3129 goto cleanup;
3130 }
3131 cSource = cBuf;
3132 cSourceLimit =cTarget;
3133 test =uBuf;
3134 myOff=offsets;
3135 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
3136 if(U_FAILURE(errorCode)){
3137 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3138 goto cleanup;
3139 }
3140 uSource = (const UChar*)in;
3141 while(uSource<uSourceLimit){
3142 if(*test!=*uSource){
3143
3144 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3145 }
3146 uSource++;
3147 test++;
3148 }
3149 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "HZ encoding");
3150 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3151 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3152 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3153 TestJitterbug930("csISO2022JP");
3154
3155 cleanup:
3156 ucnv_close(cnv);
3157 free(offsets);
3158 free(uBuf);
3159 free(cBuf);
3160 }
3161
3162 static void
TestISCII(void)3163 TestISCII(void){
3164 /* test input */
3165 static const uint16_t in[]={
3166 /* test full range of Devanagari */
3167 0x0901,0x0902,0x0903,0x0905,0x0906,0x0907,0x0908,0x0909,0x090A,
3168 0x090B,0x090E,0x090F,0x0910,0x090D,0x0912,0x0913,0x0914,0x0911,
3169 0x0915,0x0916,0x0917,0x0918,0x0919,0x091A,0x091B,0x091C,0x091D,
3170 0x091E,0x091F,0x0920,0x0921,0x0922,0x0923,0x0924,0x0925,0x0926,
3171 0x0927,0x0928,0x0929,0x092A,0x092B,0x092C,0x092D,0x092E,0x092F,
3172 0x095F,0x0930,0x0931,0x0932,0x0933,0x0934,0x0935,0x0936,0x0937,
3173 0x0938,0x0939,0x200D,0x093E,0x093F,0x0940,0x0941,0x0942,0x0943,
3174 0x0946,0x0947,0x0948,0x0945,0x094A,0x094B,0x094C,0x0949,0x094D,
3175 0x093d,0x0966,0x0967,0x0968,0x0969,0x096A,0x096B,0x096C,
3176 0x096D,0x096E,0x096F,
3177 /* test Soft halant*/
3178 0x0915,0x094d, 0x200D,
3179 /* test explicit halant */
3180 0x0915,0x094d, 0x200c,
3181 /* test double danda */
3182 0x965,
3183 /* test ASCII */
3184 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3185 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3186 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3187 /* tests from Lotus */
3188 0x0061,0x0915,0x000D,0x000A,0x0996,0x0043,
3189 0x0930,0x094D,0x200D,
3190 0x0901,0x000D,0x000A,0x0905,0x0985,0x0043,
3191 0x0915,0x0921,0x002B,0x095F,
3192 /* tamil range */
3193 0x0B86, 0xB87, 0xB88,
3194 /* telugu range */
3195 0x0C05, 0x0C02, 0x0C03,0x0c31,
3196 /* kannada range */
3197 0x0C85, 0xC82, 0x0C83,
3198 /* test Abbr sign and Anudatta */
3199 0x0970, 0x952,
3200 /* 0x0958,
3201 0x0959,
3202 0x095A,
3203 0x095B,
3204 0x095C,
3205 0x095D,
3206 0x095E,
3207 0x095F,*/
3208 0x0960 /* Vocallic RRI 0xAB, 0xE9*/,
3209 0x0944 /* Vowel Sign Vocallic RRI 0xDF, 0xE9 */,
3210 0x090C ,
3211 0x0962,
3212 0x0961 /* Vocallic LL 0xa6, 0xE9 */,
3213 0x0963 /* Vowel Sign Vocallic LL 0xdb, 0xE9, */,
3214 0x0950 /* OM Symbol 0xa1, 0xE9,*/,
3215 0x093D /* Avagraha 0xEA, 0xE9*/,
3216 0x0958,
3217 0x0959,
3218 0x095A,
3219 0x095B,
3220 0x095C,
3221 0x095D,
3222 0x095E,
3223 0x0020, 0x094D, 0x0930, 0x0000, 0x00A0
3224 };
3225 static const unsigned char byteArr[]={
3226
3227 0xa1,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,
3228 0xaa,0xab,0xac,0xad,0xae,0xaf,0xb0,0xb1,0xb2,
3229 0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xbb,
3230 0xbc,0xbd,0xbe,0xbf,0xc0,0xc1,0xc2,0xc3,0xc4,
3231 0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xcb,0xcc,0xcd,
3232 0xce,0xcf,0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,
3233 0xd7,0xd8,0xd9,0xda,0xdb,0xdc,0xdd,0xde,0xdf,
3234 0xe0,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,
3235 0xea,0xe9,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,
3236 0xf8,0xf9,0xfa,
3237 /* test soft halant */
3238 0xb3, 0xE8, 0xE9,
3239 /* test explicit halant */
3240 0xb3, 0xE8, 0xE8,
3241 /* test double danda */
3242 0xea, 0xea,
3243 /* test ASCII */
3244 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
3245 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
3246 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
3247 /* test ATR code */
3248
3249 /* tests from Lotus */
3250 0x61,0xEF,0x42,0xEF,0x30,0xB3,0x0D,0x0A,0xEF,0x43,0xB4,0x43,
3251 0xEF,0x42,0xCF,0xE8,0xD9,
3252 0xEF,0x42,0xA1,0x0D,0x0A,0xEF,0x42,0xA4,0xEF,0x43,0xA4,0x43,
3253 0xEF,0x42,0xB3,0xBF,0x2B,0xEF,0x42,0xCE,
3254 /* tamil range */
3255 0xEF, 0x44, 0xa5, 0xa6, 0xa7,
3256 /* telugu range */
3257 0xEF, 0x45,0xa4, 0xa2, 0xa3,0xd0,
3258 /* kannada range */
3259 0xEF, 0x48,0xa4, 0xa2, 0xa3,
3260 /* anudatta and abbreviation sign */
3261 0xEF, 0x42, 0xF0, 0xBF, 0xF0, 0xB8,
3262
3263
3264 0xAA, 0xE9,/* RI + NUKTA 0x0960*/
3265
3266 0xDF, 0xE9,/* Vowel sign RI + NUKTA 0x0944*/
3267
3268 0xa6, 0xE9,/* Vowel I + NUKTA 0x090C*/
3269
3270 0xdb, 0xE9,/* Vowel sign I + Nukta 0x0962*/
3271
3272 0xa7, 0xE9,/* Vowel II + NUKTA 0x0961*/
3273
3274 0xdc, 0xE9,/* Vowel sign II + Nukta 0x0963*/
3275
3276 0xa1, 0xE9,/* chandrabindu + Nukta 0x0950*/
3277
3278 0xEA, 0xE9, /* Danda + Nukta 0x093D*/
3279
3280 0xB3, 0xE9, /* Ka + NUKTA */
3281
3282 0xB4, 0xE9, /* Kha + NUKTA */
3283
3284 0xB5, 0xE9, /* Ga + NUKTA */
3285
3286 0xBA, 0xE9,
3287
3288 0xBF, 0xE9,
3289
3290 0xC0, 0xE9,
3291
3292 0xC9, 0xE9,
3293 /* INV halant RA */
3294 0xD9, 0xE8, 0xCF,
3295 0x00, 0x00A0,
3296 /* just consume unhandled codepoints */
3297 0xEF, 0x30,
3298
3299 };
3300 testConvertToU(byteArr,(sizeof(byteArr)),in,UPRV_LENGTHOF(in),"x-iscii-de",NULL,true);
3301 TestConv(in,(sizeof(in)/2),"ISCII,version=0","hindi", (char *)byteArr,sizeof(byteArr));
3302
3303 }
3304
3305 static void
TestISO_2022_JP(void)3306 TestISO_2022_JP(void) {
3307 /* test input */
3308 static const uint16_t in[]={
3309 0x0041,/*0x00E9,*/0x3000, 0x3001, 0x3002, 0x0020, 0x000D, 0x000A,
3310 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3311 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3312 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3313 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3314 0x201D, 0x3014, 0x000D, 0x000A,
3315 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3316 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3317 };
3318 const UChar* uSource;
3319 const UChar* uSourceLimit;
3320 const char* cSource;
3321 const char* cSourceLimit;
3322 UChar *uTargetLimit =NULL;
3323 UChar *uTarget;
3324 char *cTarget;
3325 const char *cTargetLimit;
3326 char *cBuf = NULL;
3327 UChar *uBuf = NULL;
3328 UChar *test;
3329 int32_t uBufSize = 120;
3330 UErrorCode errorCode=U_ZERO_ERROR;
3331 UConverter *cnv = NULL;
3332 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3333 int32_t* myOff= offsets;
3334 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3335 if(U_FAILURE(errorCode)) {
3336 log_data_err("Unable to open an ISO_2022_JP_1 converter: %s\n", u_errorName(errorCode));
3337 goto cleanup;
3338 }
3339
3340 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3341 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3342 uSource = (const UChar*)in;
3343 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3344 cTarget = cBuf;
3345 cTargetLimit = cBuf +uBufSize*5;
3346 uTarget = uBuf;
3347 uTargetLimit = uBuf+ uBufSize*5;
3348 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
3349 if(U_FAILURE(errorCode)){
3350 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3351 goto cleanup;
3352 }
3353 cSource = cBuf;
3354 cSourceLimit =cTarget;
3355 test =uBuf;
3356 myOff=offsets;
3357 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
3358 if(U_FAILURE(errorCode)){
3359 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3360 goto cleanup;
3361 }
3362
3363 uSource = (const UChar*)in;
3364 while(uSource<uSourceLimit){
3365 if(*test!=*uSource){
3366
3367 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3368 }
3369 uSource++;
3370 test++;
3371 }
3372
3373 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3374 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3375 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-JP encoding");
3376 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3377 TestJitterbug930("csISO2022JP");
3378
3379 cleanup:
3380 ucnv_close(cnv);
3381 free(uBuf);
3382 free(cBuf);
3383 free(offsets);
3384 }
3385
TestConv(const uint16_t in[],int len,const char * conv,const char * lang,char byteArr[],int byteArrLen)3386 static void TestConv(const uint16_t in[],int len, const char* conv, const char* lang, char byteArr[],int byteArrLen){
3387 const UChar* uSource;
3388 const UChar* uSourceLimit;
3389 const char* cSource;
3390 const char* cSourceLimit;
3391 UChar *uTargetLimit =NULL;
3392 UChar *uTarget;
3393 char *cTarget;
3394 const char *cTargetLimit;
3395 char *cBuf;
3396 UChar *uBuf,*test;
3397 int32_t uBufSize = 120*10;
3398 UErrorCode errorCode=U_ZERO_ERROR;
3399 UConverter *cnv;
3400 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) );
3401 int32_t* myOff= offsets;
3402 cnv=my_ucnv_open(conv, &errorCode);
3403 if(U_FAILURE(errorCode)) {
3404 log_data_err("Unable to open a %s converter: %s\n", conv, u_errorName(errorCode));
3405 return;
3406 }
3407
3408 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar));
3409 cBuf =(char*)malloc(uBufSize * sizeof(char));
3410 uSource = (const UChar*)in;
3411 uSourceLimit=uSource+len;
3412 cTarget = cBuf;
3413 cTargetLimit = cBuf +uBufSize;
3414 uTarget = uBuf;
3415 uTargetLimit = uBuf+ uBufSize;
3416 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
3417 if(U_FAILURE(errorCode)){
3418 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3419 return;
3420 }
3421 /*log_verbose("length of compressed string for language %s using %s:%i \n",conv,lang,(cTarget-cBuf));*/
3422 cSource = cBuf;
3423 cSourceLimit =cTarget;
3424 test =uBuf;
3425 myOff=offsets;
3426 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
3427 if(U_FAILURE(errorCode)){
3428 log_err("ucnv_toUnicode conversion failed, reason: %s\n", u_errorName(errorCode));
3429 return;
3430 }
3431
3432 uSource = (const UChar*)in;
3433 while(uSource<uSourceLimit){
3434 if(*test!=*uSource){
3435 log_err("for codepage %s : Expected : \\u%04X \t Got: \\u%04X\n",conv,*uSource,(int)*test) ;
3436 }
3437 uSource++;
3438 test++;
3439 }
3440 TestSmallTargetBuffer(in,(const UChar*)&in[len],cnv);
3441 TestSmallSourceBuffer(in,(const UChar*)&in[len],cnv);
3442 TestGetNextUChar2022(cnv, cBuf, cTarget, in, conv);
3443 if(byteArr && byteArrLen!=0){
3444 TestGetNextUChar2022(cnv, byteArr, (byteArr+byteArrLen), in, lang);
3445 TestToAndFromUChars(in,(const UChar*)&in[len],cnv);
3446 {
3447 cSource = byteArr;
3448 cSourceLimit = cSource+byteArrLen;
3449 test=uBuf;
3450 myOff = offsets;
3451 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
3452 if(U_FAILURE(errorCode)){
3453 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3454 return;
3455 }
3456
3457 uSource = (const UChar*)in;
3458 while(uSource<uSourceLimit){
3459 if(*test!=*uSource){
3460 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3461 }
3462 uSource++;
3463 test++;
3464 }
3465 }
3466 }
3467
3468 ucnv_close(cnv);
3469 free(uBuf);
3470 free(cBuf);
3471 free(offsets);
3472 }
3473 static UChar U_CALLCONV
_charAt(int32_t offset,void * context)3474 _charAt(int32_t offset, void *context) {
3475 return ((char*)context)[offset];
3476 }
3477
3478 static int32_t
unescape(UChar * dst,int32_t dstLen,const char * src,int32_t srcLen,UErrorCode * status)3479 unescape(UChar* dst, int32_t dstLen,const char* src,int32_t srcLen,UErrorCode *status){
3480 int32_t srcIndex=0;
3481 int32_t dstIndex=0;
3482 if(U_FAILURE(*status)){
3483 return 0;
3484 }
3485 if((dst==NULL && dstLen>0) || (src==NULL ) || dstLen < -1 || srcLen <-1 ){
3486 *status = U_ILLEGAL_ARGUMENT_ERROR;
3487 return 0;
3488 }
3489 if(srcLen==-1){
3490 srcLen = (int32_t)uprv_strlen(src);
3491 }
3492
3493 for (; srcIndex<srcLen; ) {
3494 UChar32 c = src[srcIndex++];
3495 if (c == 0x005C /*'\\'*/) {
3496 c = u_unescapeAt(_charAt,&srcIndex,srcLen,(void*)src); /* advances i*/
3497 if (c == (UChar32)0xFFFFFFFF) {
3498 *status=U_INVALID_CHAR_FOUND; /* return empty string */
3499 break; /* invalid escape sequence */
3500 }
3501 }
3502 if(dstIndex < dstLen){
3503 if(c>0xFFFF){
3504 dst[dstIndex++] = U16_LEAD(c);
3505 if(dstIndex<dstLen){
3506 dst[dstIndex]=U16_TRAIL(c);
3507 }else{
3508 *status=U_BUFFER_OVERFLOW_ERROR;
3509 }
3510 }else{
3511 dst[dstIndex]=(UChar)c;
3512 }
3513
3514 }else{
3515 *status = U_BUFFER_OVERFLOW_ERROR;
3516 }
3517 dstIndex++; /* for preflighting */
3518 }
3519 return dstIndex;
3520 }
3521
3522 static void
TestFullRoundtrip(const char * cp)3523 TestFullRoundtrip(const char* cp){
3524 UChar usource[10] ={0};
3525 UChar nsrc[10] = {0};
3526 uint32_t i=1;
3527 int len=0, ulen;
3528 nsrc[0]=0x0061;
3529 /* Test codepoint 0 */
3530 TestConv(usource,1,cp,"",NULL,0);
3531 TestConv(usource,2,cp,"",NULL,0);
3532 nsrc[2]=0x5555;
3533 TestConv(nsrc,3,cp,"",NULL,0);
3534
3535 for(;i<=0x10FFFF;i++){
3536 if(i==0xD800){
3537 i=0xDFFF;
3538 continue;
3539 }
3540 if(i<=0xFFFF){
3541 usource[0] =(UChar) i;
3542 len=1;
3543 }else{
3544 usource[0]=U16_LEAD(i);
3545 usource[1]=U16_TRAIL(i);
3546 len=2;
3547 }
3548 ulen=len;
3549 if(i==0x80) {
3550 usource[2]=0;
3551 }
3552 /* Test only single code points */
3553 TestConv(usource,ulen,cp,"",NULL,0);
3554 /* Test codepoint repeated twice */
3555 usource[ulen]=usource[0];
3556 usource[ulen+1]=usource[1];
3557 ulen+=len;
3558 TestConv(usource,ulen,cp,"",NULL,0);
3559 /* Test codepoint repeated 3 times */
3560 usource[ulen]=usource[0];
3561 usource[ulen+1]=usource[1];
3562 ulen+=len;
3563 TestConv(usource,ulen,cp,"",NULL,0);
3564 /* Test codepoint in between 2 codepoints */
3565 nsrc[1]=usource[0];
3566 nsrc[2]=usource[1];
3567 nsrc[len+1]=0x5555;
3568 TestConv(nsrc,len+2,cp,"",NULL,0);
3569 uprv_memset(usource,0,sizeof(UChar)*10);
3570 }
3571 }
3572
3573 static void
TestRoundTrippingAllUTF(void)3574 TestRoundTrippingAllUTF(void){
3575 if(!getTestOption(QUICK_OPTION)){
3576 log_verbose("Running exhaustive round trip test for BOCU-1\n");
3577 TestFullRoundtrip("BOCU-1");
3578 log_verbose("Running exhaustive round trip test for SCSU\n");
3579 TestFullRoundtrip("SCSU");
3580 log_verbose("Running exhaustive round trip test for UTF-8\n");
3581 TestFullRoundtrip("UTF-8");
3582 log_verbose("Running exhaustive round trip test for CESU-8\n");
3583 TestFullRoundtrip("CESU-8");
3584 log_verbose("Running exhaustive round trip test for UTF-16BE\n");
3585 TestFullRoundtrip("UTF-16BE");
3586 log_verbose("Running exhaustive round trip test for UTF-16LE\n");
3587 TestFullRoundtrip("UTF-16LE");
3588 log_verbose("Running exhaustive round trip test for UTF-16\n");
3589 TestFullRoundtrip("UTF-16");
3590 log_verbose("Running exhaustive round trip test for UTF-32BE\n");
3591 TestFullRoundtrip("UTF-32BE");
3592 log_verbose("Running exhaustive round trip test for UTF-32LE\n");
3593 TestFullRoundtrip("UTF-32LE");
3594 log_verbose("Running exhaustive round trip test for UTF-32\n");
3595 TestFullRoundtrip("UTF-32");
3596 log_verbose("Running exhaustive round trip test for UTF-7\n");
3597 TestFullRoundtrip("UTF-7");
3598 log_verbose("Running exhaustive round trip test for UTF-7\n");
3599 TestFullRoundtrip("UTF-7,version=1");
3600 log_verbose("Running exhaustive round trip test for IMAP-mailbox-name\n");
3601 TestFullRoundtrip("IMAP-mailbox-name");
3602 /*
3603 *
3604 * With the update to GB18030 2005 (Ticket #8274), this test will fail because the 2005 version of
3605 * GB18030 contains mappings to actual Unicode codepoints (which were previously mapped to PUA).
3606 * The old mappings remain as fallbacks.
3607 * This test may be reintroduced at a later time.
3608 *
3609 * 110118 - mow
3610 */
3611 /*
3612 log_verbose("Running exhaustive round trip test for GB18030\n");
3613 TestFullRoundtrip("GB18030");
3614 */
3615 }
3616 }
3617
3618 static void
TestSCSU(void)3619 TestSCSU(void) {
3620
3621 static const uint16_t germanUTF16[]={
3622 0x00d6, 0x006c, 0x0020, 0x0066, 0x006c, 0x0069, 0x0065, 0x00df, 0x0074
3623 };
3624
3625 static const uint8_t germanSCSU[]={
3626 0xd6, 0x6c, 0x20, 0x66, 0x6c, 0x69, 0x65, 0xdf, 0x74
3627 };
3628
3629 static const uint16_t russianUTF16[]={
3630 0x041c, 0x043e, 0x0441, 0x043a, 0x0432, 0x0430
3631 };
3632
3633 static const uint8_t russianSCSU[]={
3634 0x12, 0x9c, 0xbe, 0xc1, 0xba, 0xb2, 0xb0
3635 };
3636
3637 static const uint16_t japaneseUTF16[]={
3638 0x3000, 0x266a, 0x30ea, 0x30f3, 0x30b4, 0x53ef, 0x611b,
3639 0x3044, 0x3084, 0x53ef, 0x611b, 0x3044, 0x3084, 0x30ea, 0x30f3,
3640 0x30b4, 0x3002, 0x534a, 0x4e16, 0x7d00, 0x3082, 0x524d, 0x306b,
3641 0x6d41, 0x884c, 0x3057, 0x305f, 0x300c, 0x30ea, 0x30f3, 0x30b4,
3642 0x306e, 0x6b4c, 0x300d, 0x304c, 0x3074, 0x3063, 0x305f, 0x308a,
3643 0x3059, 0x308b, 0x304b, 0x3082, 0x3057, 0x308c, 0x306a, 0x3044,
3644 0x3002, 0x7c73, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x30b3, 0x30f3,
3645 0x30d4, 0x30e5, 0x30fc, 0x30bf, 0x793e, 0x306e, 0x30d1, 0x30bd,
3646 0x30b3, 0x30f3, 0x300c, 0x30de, 0x30c3, 0x30af, 0xff08, 0x30de,
3647 0x30c3, 0x30ad, 0x30f3, 0x30c8, 0x30c3, 0x30b7, 0x30e5, 0xff09,
3648 0x300d, 0x3092, 0x3001, 0x3053, 0x3088, 0x306a, 0x304f, 0x611b,
3649 0x3059, 0x308b, 0x4eba, 0x305f, 0x3061, 0x306e, 0x3053, 0x3068,
3650 0x3060, 0x3002, 0x300c, 0x30a2, 0x30c3, 0x30d7, 0x30eb, 0x4fe1,
3651 0x8005, 0x300d, 0x306a, 0x3093, 0x3066, 0x8a00, 0x3044, 0x65b9,
3652 0x307e, 0x3067, 0x3042, 0x308b, 0x3002
3653 };
3654
3655 /* SCSUEncoder produces a slightly longer result (179B vs. 178B) because of one different choice:
3656 it uses an SQn once where a longer look-ahead could have shown that SCn is more efficient */
3657 static const uint8_t japaneseSCSU[]={
3658 0x08, 0x00, 0x1b, 0x4c, 0xea, 0x16, 0xca, 0xd3, 0x94, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84,
3659 0xc4, 0x0f, 0x53, 0xef, 0x61, 0x1b, 0xe5, 0x84, 0xc4, 0x16, 0xca, 0xd3, 0x94, 0x08, 0x02, 0x0f,
3660 0x53, 0x4a, 0x4e, 0x16, 0x7d, 0x00, 0x30, 0x82, 0x52, 0x4d, 0x30, 0x6b, 0x6d, 0x41, 0x88, 0x4c,
3661 0xe5, 0x97, 0x9f, 0x08, 0x0c, 0x16, 0xca, 0xd3, 0x94, 0x15, 0xae, 0x0e, 0x6b, 0x4c, 0x08, 0x0d,
3662 0x8c, 0xb4, 0xa3, 0x9f, 0xca, 0x99, 0xcb, 0x8b, 0xc2, 0x97, 0xcc, 0xaa, 0x84, 0x08, 0x02, 0x0e,
3663 0x7c, 0x73, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x93, 0xd3, 0xb4, 0xc5, 0xdc, 0x9f, 0x0e, 0x79, 0x3e,
3664 0x06, 0xae, 0xb1, 0x9d, 0x93, 0xd3, 0x08, 0x0c, 0xbe, 0xa3, 0x8f, 0x08, 0x88, 0xbe, 0xa3, 0x8d,
3665 0xd3, 0xa8, 0xa3, 0x97, 0xc5, 0x17, 0x89, 0x08, 0x0d, 0x15, 0xd2, 0x08, 0x01, 0x93, 0xc8, 0xaa,
3666 0x8f, 0x0e, 0x61, 0x1b, 0x99, 0xcb, 0x0e, 0x4e, 0xba, 0x9f, 0xa1, 0xae, 0x93, 0xa8, 0xa0, 0x08,
3667 0x02, 0x08, 0x0c, 0xe2, 0x16, 0xa3, 0xb7, 0xcb, 0x0f, 0x4f, 0xe1, 0x80, 0x05, 0xec, 0x60, 0x8d,
3668 0xea, 0x06, 0xd3, 0xe6, 0x0f, 0x8a, 0x00, 0x30, 0x44, 0x65, 0xb9, 0xe4, 0xfe, 0xe7, 0xc2, 0x06,
3669 0xcb, 0x82
3670 };
3671
3672 static const uint16_t allFeaturesUTF16[]={
3673 0x0041, 0x00df, 0x0401, 0x015f, 0x00df, 0x01df, 0xf000, 0xdbff,
3674 0xdfff, 0x000d, 0x000a, 0x0041, 0x00df, 0x0401, 0x015f, 0x00df,
3675 0x01df, 0xf000, 0xdbff, 0xdfff
3676 };
3677
3678 /* see comment at japaneseSCSU: the same kind of different choice yields a slightly shorter
3679 * result here (34B vs. 35B)
3680 */
3681 static const uint8_t allFeaturesSCSU[]={
3682 0x41, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x1b, 0x03,
3683 0xdf, 0x1c, 0x88, 0x80, 0x0b, 0xbf, 0xff, 0xff, 0x0d, 0x0a,
3684 0x41, 0x10, 0xdf, 0x12, 0x81, 0x03, 0x5f, 0x10, 0xdf, 0x13,
3685 0xdf, 0x14, 0x80, 0x15, 0xff
3686 };
3687 static const uint16_t monkeyIn[]={
3688 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3689 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3690 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3691 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3692 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3693 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3694 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3695 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3696 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3697 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3698 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3699 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3700 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3701 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3702 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3703 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3704 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3705 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3706 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A,
3707 /* test non-BMP code points */
3708 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
3709 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
3710 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
3711 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
3712 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
3713 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
3714 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
3715 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
3716 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
3717 0xD869, 0xDED5, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xD800, 0xDC00, 0xDBFF, 0xDFFF,
3718 0xDBFF, 0xDFFF, 0xDBFF, 0xDFFF,
3719
3720
3721 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
3722 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
3723 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
3724 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
3725 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
3726 };
3727 static const char *fTestCases [] = {
3728 "\\ud800\\udc00", /* smallest surrogate*/
3729 "\\ud8ff\\udcff",
3730 "\\udBff\\udFff", /* largest surrogate pair*/
3731 "\\ud834\\udc00",
3732 "\\U0010FFFF",
3733 "Hello \\u9292 \\u9192 World!",
3734 "Hell\\u0429o \\u9292 \\u9192 W\\u00e4rld!",
3735 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3736
3737 "\\u0648\\u06c8", /* catch missing reset*/
3738 "\\u0648\\u06c8",
3739
3740 "\\u4444\\uE001", /* lowest quotable*/
3741 "\\u4444\\uf2FF", /* highest quotable*/
3742 "\\u4444\\uf188\\u4444",
3743 "\\u4444\\uf188\\uf288",
3744 "\\u4444\\uf188abc\\u0429\\uf288",
3745 "\\u9292\\u2222",
3746 "Hell\\u0429\\u04230o \\u9292 \\u9292W\\u00e4\\u0192rld!",
3747 "Hell\\u0429o \\u9292 \\u9292W\\u00e4rld!",
3748 "Hello World!123456",
3749 "Hello W\\u0081\\u011f\\u0082!", /* Latin 1 run*/
3750
3751 "abc\\u0301\\u0302", /* uses SQn for u301 u302*/
3752 "abc\\u4411d", /* uses SQU*/
3753 "abc\\u4411\\u4412d",/* uses SCU*/
3754 "abc\\u0401\\u0402\\u047f\\u00a5\\u0405", /* uses SQn for ua5*/
3755 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000", /* SJIS like data*/
3756 "\\u9292\\u2222",
3757 "\\u9191\\u9191\\u3041\\u9191\\u3041\\u3041\\u3000",
3758 "\\u9999\\u3051\\u300c\\u9999\\u9999\\u3060\\u9999\\u3065\\u3065\\u3065\\u300c",
3759 "\\u3000\\u266a\\u30ea\\u30f3\\u30b4\\u53ef\\u611b\\u3044\\u3084\\u53ef\\u611b\\u3044\\u3084\\u30ea\\u30f3\\u30b4\\u3002",
3760
3761 "", /* empty input*/
3762 "\\u0000", /* smallest BMP character*/
3763 "\\uFFFF", /* largest BMP character*/
3764
3765 /* regression tests*/
3766 "\\u6441\\ub413\\ua733\\uf8fe\\ueedb\\u587f\\u195f\\u4899\\uf23d\\u49fd\\u0aac\\u5792\\ufc22\\ufc3c\\ufc46\\u00aa",
3767 "\\u00df\\u01df\\uf000\\udbff\\udfff\\u000d\n\\u0041\\u00df\\u0401\\u015f\\u00df\\u01df\\uf000\\udbff\\udfff",
3768 "\\u30f9\\u8321\\u05e5\\u181c\\ud72b\\u2019\\u99c9\\u2f2f\\uc10c\\u82e1\\u2c4d\\u1ebc\\u6013\\u66dc\\ubbde\\u94a5\\u4726\\u74af\\u3083\\u55b9\\u000c",
3769 "\\u0041\\u00df\\u0401\\u015f",
3770 "\\u9066\\u2123abc",
3771 "\\ud266\\u43d7\\u\\ue386\\uc9c0\\u4a6b\\u9222\\u901f\\u7410\\ua63f\\u539b\\u9596\\u482e\\u9d47\\ucfe4\\u7b71\\uc280\\uf26a\\u982f\\u862a\\u4edd\\uf513\\ufda6\\u869d\\u2ee0\\ua216\\u3ff6\\u3c70\\u89c0\\u9576\\ud5ec\\ubfda\\u6cca\\u5bb3\\ubcea\\u554c\\u914e\\ufa4a\\uede3\\u2990\\ud2f5\\u2729\\u5141\\u0f26\\uccd8\\u5413\\ud196\\ubbe2\\u51b9\\u9b48\\u0dc8\\u2195\\u21a2\\u21e9\\u00e4\\u9d92\\u0bc0\\u06c5",
3772 "\\uf95b\\u2458\\u2468\\u0e20\\uf51b\\ue36e\\ubfc1\\u0080\\u02dd\\uf1b5\\u0cf3\\u6059\\u7489",
3773 };
3774 int i=0;
3775 for(;i<UPRV_LENGTHOF(fTestCases);i++){
3776 const char* cSrc = fTestCases[i];
3777 UErrorCode status = U_ZERO_ERROR;
3778 int32_t cSrcLen,srcLen;
3779 UChar* src;
3780 /* UConverter* cnv = ucnv_open("SCSU",&status); */
3781 cSrcLen = srcLen = (int32_t)uprv_strlen(fTestCases[i]);
3782 src = (UChar*) malloc((sizeof(UChar) * srcLen) + sizeof(UChar));
3783 srcLen=unescape(src,srcLen,cSrc,cSrcLen,&status);
3784 log_verbose("Testing roundtrip for src: %s at index :%d\n",cSrc,i);
3785 TestConv(src,srcLen,"SCSU","Coverage",NULL,0);
3786 free(src);
3787 }
3788 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features", (char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3789 TestConv(allFeaturesUTF16,(sizeof(allFeaturesUTF16)/2),"SCSU","all features",(char *)allFeaturesSCSU,sizeof(allFeaturesSCSU));
3790 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3791 TestConv(japaneseUTF16,(sizeof(japaneseUTF16)/2),"SCSU,locale=ja","japaneese",(char *)japaneseSCSU,sizeof(japaneseSCSU));
3792 TestConv(germanUTF16,(sizeof(germanUTF16)/2),"SCSU","german",(char *)germanSCSU,sizeof(germanSCSU));
3793 TestConv(russianUTF16,(sizeof(russianUTF16)/2), "SCSU","russian",(char *)russianSCSU,sizeof(russianSCSU));
3794 TestConv(monkeyIn,(sizeof(monkeyIn)/2),"SCSU","monkey",NULL,0);
3795 }
3796
3797 #if !UCONFIG_NO_LEGACY_CONVERSION
TestJitterbug2346(void)3798 static void TestJitterbug2346(void){
3799 char source[] = { 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a,
3800 0x1b,0x24,0x42,0x3d,0x45,0x1b,0x28,0x4a,0x0d,0x0a};
3801 uint16_t expected[] = {0x91CD,0x000D,0x000A,0x91CD,0x000D,0x000A};
3802
3803 UChar uTarget[500]={'\0'};
3804 UChar* utarget=uTarget;
3805 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
3806
3807 char cTarget[500]={'\0'};
3808 char* ctarget=cTarget;
3809 char* ctargetLimit=cTarget+sizeof(cTarget);
3810 const char* csource=source;
3811 UChar* temp = expected;
3812 UErrorCode err=U_ZERO_ERROR;
3813
3814 UConverter* conv =ucnv_open("ISO_2022_JP",&err);
3815 if(U_FAILURE(err)) {
3816 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
3817 return;
3818 }
3819 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(source),NULL,true,&err);
3820 if(U_FAILURE(err)) {
3821 log_err("ISO_2022_JP to Unicode conversion failed: %s\n", u_errorName(err));
3822 return;
3823 }
3824 utargetLimit=utarget;
3825 utarget = uTarget;
3826 while(utarget<utargetLimit){
3827 if(*temp!=*utarget){
3828
3829 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*utarget,(int)*temp) ;
3830 }
3831 utarget++;
3832 temp++;
3833 }
3834 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,true,&err);
3835 if(U_FAILURE(err)) {
3836 log_err("ISO_2022_JP from Unicode conversion failed: %s\n", u_errorName(err));
3837 return;
3838 }
3839 ctargetLimit=ctarget;
3840 ctarget =cTarget;
3841 ucnv_close(conv);
3842
3843
3844 }
3845
3846 static void
TestISO_2022_JP_1(void)3847 TestISO_2022_JP_1(void) {
3848 /* test input */
3849 static const uint16_t in[]={
3850 0x3000, 0x3001, 0x3002, 0x0020, 0xFF0E, 0x30FB, 0xFF1A, 0xFF1B, 0x000D, 0x000A,
3851 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3852 0x52C8, 0x52CC, 0x52CF, 0x52D1, 0x52D4, 0x52D6, 0x52DB, 0x52DC, 0x000D, 0x000A,
3853 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3854 0x3005, 0x3006, 0x3007, 0x30FC, 0x2015, 0x2010, 0xFF0F, 0x005C, 0x000D, 0x000A,
3855 0x3013, 0x2018, 0x2026, 0x2025, 0x2018, 0x2019, 0x201C, 0x000D, 0x000A,
3856 0x201D, 0x000D, 0x000A,
3857 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3858 0x4F94, 0x4F97, 0x52BA, 0x52BB, 0x52BD, 0x52C0, 0x52C4, 0x52C6, 0x000D, 0x000A,
3859 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3860 0x4F78, 0x4F79, 0x4F7A, 0x4F7D, 0x4F7E, 0x4F81, 0x4F82, 0x4F84, 0x000D, 0x000A,
3861 0x4F85, 0x4F89, 0x4F8A, 0x4F8C, 0x4F8E, 0x4F90, 0x4F92, 0x4F93, 0x000D, 0x000A,
3862 0x52E1, 0x52E5, 0x52E8, 0x52E9, 0x000D, 0x000A
3863 };
3864 const UChar* uSource;
3865 const UChar* uSourceLimit;
3866 const char* cSource;
3867 const char* cSourceLimit;
3868 UChar *uTargetLimit =NULL;
3869 UChar *uTarget;
3870 char *cTarget;
3871 const char *cTargetLimit;
3872 char *cBuf;
3873 UChar *uBuf,*test;
3874 int32_t uBufSize = 120;
3875 UErrorCode errorCode=U_ZERO_ERROR;
3876 UConverter *cnv;
3877
3878 cnv=ucnv_open("ISO_2022_JP_1", &errorCode);
3879 if(U_FAILURE(errorCode)) {
3880 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3881 return;
3882 }
3883
3884 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3885 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3886 uSource = (const UChar*)in;
3887 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3888 cTarget = cBuf;
3889 cTargetLimit = cBuf +uBufSize*5;
3890 uTarget = uBuf;
3891 uTargetLimit = uBuf+ uBufSize*5;
3892 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,NULL,true, &errorCode);
3893 if(U_FAILURE(errorCode)){
3894 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3895 return;
3896 }
3897 cSource = cBuf;
3898 cSourceLimit =cTarget;
3899 test =uBuf;
3900 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,NULL,true,&errorCode);
3901 if(U_FAILURE(errorCode)){
3902 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3903 return;
3904 }
3905 uSource = (const UChar*)in;
3906 while(uSource<uSourceLimit){
3907 if(*test!=*uSource){
3908
3909 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
3910 }
3911 uSource++;
3912 test++;
3913 }
3914 /*ucnv_close(cnv);
3915 cnv=ucnv_open("ISO_2022,locale=jp,version=1", &errorCode);*/
3916 /*Test for the condition where there is an invalid character*/
3917 ucnv_reset(cnv);
3918 {
3919 static const uint8_t source2[]={0x0e,0x24,0x053};
3920 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-1]");
3921 }
3922 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3923 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
3924 ucnv_close(cnv);
3925 free(uBuf);
3926 free(cBuf);
3927 }
3928
3929 static void
TestISO_2022_JP_2(void)3930 TestISO_2022_JP_2(void) {
3931 /* test input */
3932 static const uint16_t in[]={
3933 0x00A8, 0x3003, 0x3005, 0x2015, 0xFF5E, 0x2016, 0x2026, 0x2018, 0x000D, 0x000A,
3934 0x2019, 0x201C, 0x201D, 0x3014, 0x3015, 0x3008, 0x3009, 0x300A, 0x000D, 0x000A,
3935 0x300B, 0x300C, 0x300D, 0x300E, 0x300F, 0x3016, 0x3017, 0x3010, 0x000D, 0x000A,
3936 0x3011, 0x00B1, 0x00D7, 0x00F7, 0x2236, 0x2227, 0x7FC1, 0x8956, 0x000D, 0x000A,
3937 0x9D2C, 0x9D0E, 0x9EC4, 0x5CA1, 0x6C96, 0x837B, 0x5104, 0x5C4B, 0x000D, 0x000A,
3938 0x61B6, 0x81C6, 0x6876, 0x7261, 0x4E59, 0x4FFA, 0x5378, 0x57F7, 0x000D, 0x000A,
3939 0x57F4, 0x57F9, 0x57FA, 0x57FC, 0x5800, 0x5802, 0x5805, 0x5806, 0x000D, 0x000A,
3940 0x580A, 0x581E, 0x6BB5, 0x6BB7, 0x6BBA, 0x6BBC, 0x9CE2, 0x977C, 0x000D, 0x000A,
3941 0x6BBF, 0x6BC1, 0x6BC5, 0x6BC6, 0x6BCB, 0x6BCD, 0x6BCF, 0x6BD2, 0x000D, 0x000A,
3942 0x6BD3, 0x6BD4, 0x6BD6, 0x6BD7, 0x6BD8, 0x6BDB, 0x6BEB, 0x6BEC, 0x000D, 0x000A,
3943 0x6C05, 0x6C08, 0x6C0F, 0x6C11, 0x6C13, 0x6C23, 0x6C34, 0x0041, 0x000D, 0x000A,
3944 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x000D, 0x000A,
3945 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x000D, 0x000A,
3946 0x0053, 0x0054, 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A, 0x000D, 0x000A,
3947 0x005B, 0x9792, 0x9CCC, 0x9CCD, 0x9CCE, 0x9CCF, 0x9CD0, 0x9CD3, 0x000D, 0x000A,
3948 0x9CD4, 0x9CD5, 0x9CD7, 0x9CD8, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
3949 0x9785, 0x9791, 0x00BD, 0x0390, 0x0385, 0x0386, 0x0388, 0x0389, 0x000D, 0x000A,
3950 0x038E, 0x038F, 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x000D, 0x000A,
3951 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x038A, 0x038C, 0x039C, 0x000D, 0x000A
3952 };
3953 const UChar* uSource;
3954 const UChar* uSourceLimit;
3955 const char* cSource;
3956 const char* cSourceLimit;
3957 UChar *uTargetLimit =NULL;
3958 UChar *uTarget;
3959 char *cTarget;
3960 const char *cTargetLimit;
3961 char *cBuf = NULL;
3962 UChar *uBuf = NULL;
3963 UChar *test;
3964 int32_t uBufSize = 120;
3965 UErrorCode errorCode=U_ZERO_ERROR;
3966 UConverter *cnv = NULL;
3967 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
3968 int32_t* myOff= offsets;
3969 cnv=ucnv_open("ISO_2022_JP_2", &errorCode);
3970 if(U_FAILURE(errorCode)) {
3971 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
3972 goto cleanup;
3973 }
3974
3975 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
3976 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
3977 uSource = (const UChar*)in;
3978 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
3979 cTarget = cBuf;
3980 cTargetLimit = cBuf +uBufSize*5;
3981 uTarget = uBuf;
3982 uTargetLimit = uBuf+ uBufSize*5;
3983 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
3984 if(U_FAILURE(errorCode)){
3985 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
3986 goto cleanup;
3987 }
3988 cSource = cBuf;
3989 cSourceLimit =cTarget;
3990 test =uBuf;
3991 myOff=offsets;
3992 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
3993 if(U_FAILURE(errorCode)){
3994 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
3995 goto cleanup;
3996 }
3997 uSource = (const UChar*)in;
3998 while(uSource<uSourceLimit){
3999 if(*test!=*uSource){
4000
4001 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4002 }
4003 uSource++;
4004 test++;
4005 }
4006 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4007 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4008 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4009 /*Test for the condition where there is an invalid character*/
4010 ucnv_reset(cnv);
4011 {
4012 static const uint8_t source2[]={0x0e,0x24,0x053};
4013 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-JP-2]");
4014 }
4015
4016 cleanup:
4017 ucnv_close(cnv);
4018 free(uBuf);
4019 free(cBuf);
4020 free(offsets);
4021 }
4022
4023 static void
TestISO_2022_KR(void)4024 TestISO_2022_KR(void) {
4025 /* test input */
4026 static const uint16_t in[]={
4027 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4028 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4029 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4030 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4031 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4032 ,0x53E3,0x53E4,0x000A,0x000D};
4033 const UChar* uSource;
4034 const UChar* uSourceLimit;
4035 const char* cSource;
4036 const char* cSourceLimit;
4037 UChar *uTargetLimit =NULL;
4038 UChar *uTarget;
4039 char *cTarget;
4040 const char *cTargetLimit;
4041 char *cBuf = NULL;
4042 UChar *uBuf = NULL;
4043 UChar *test;
4044 int32_t uBufSize = 120;
4045 UErrorCode errorCode=U_ZERO_ERROR;
4046 UConverter *cnv = NULL;
4047 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4048 int32_t* myOff= offsets;
4049 cnv=ucnv_open("ISO_2022,locale=kr", &errorCode);
4050 if(U_FAILURE(errorCode)) {
4051 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4052 goto cleanup;
4053 }
4054
4055 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4056 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4057 uSource = (const UChar*)in;
4058 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4059 cTarget = cBuf;
4060 cTargetLimit = cBuf +uBufSize*5;
4061 uTarget = uBuf;
4062 uTargetLimit = uBuf+ uBufSize*5;
4063 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
4064 if(U_FAILURE(errorCode)){
4065 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4066 goto cleanup;
4067 }
4068 cSource = cBuf;
4069 cSourceLimit =cTarget;
4070 test =uBuf;
4071 myOff=offsets;
4072 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
4073 if(U_FAILURE(errorCode)){
4074 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4075 goto cleanup;
4076 }
4077 uSource = (const UChar*)in;
4078 while(uSource<uSourceLimit){
4079 if(*test!=*uSource){
4080 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4081 }
4082 uSource++;
4083 test++;
4084 }
4085 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4086 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4087 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4088 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4089 TestJitterbug930("csISO2022KR");
4090 /*Test for the condition where there is an invalid character*/
4091 ucnv_reset(cnv);
4092 {
4093 static const uint8_t source2[]={0x1b,0x24,0x053};
4094 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4095 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4096 }
4097
4098 cleanup:
4099 ucnv_close(cnv);
4100 free(uBuf);
4101 free(cBuf);
4102 free(offsets);
4103 }
4104
4105 static void
TestISO_2022_KR_1(void)4106 TestISO_2022_KR_1(void) {
4107 /* test input */
4108 static const uint16_t in[]={
4109 0x9F4B,0x9F4E,0x9F52,0x9F5F,0x9F61,0x9F67,0x9F6A,0x000A,0x000D
4110 ,0x9F6C,0x9F77,0x9F8D,0x9F90,0x9F95,0x9F9C,0xAC00,0xAC01,0xAC04
4111 ,0xAC07,0xAC08,0xAC09,0x0025,0x0026,0x0027,0x000A,0x000D,0x0028,0x0029
4112 ,0x002A,0x002B,0x002C,0x002D,0x002E,0x53C3,0x53C8,0x53C9,0x53CA,0x53CB
4113 ,0x53CD,0x53D4,0x53D6,0x53D7,0x53DB,0x000A,0x000D,0x53E1,0x53E2
4114 ,0x53E3,0x53E4,0x000A,0x000D};
4115 const UChar* uSource;
4116 const UChar* uSourceLimit;
4117 const char* cSource;
4118 const char* cSourceLimit;
4119 UChar *uTargetLimit =NULL;
4120 UChar *uTarget;
4121 char *cTarget;
4122 const char *cTargetLimit;
4123 char *cBuf = NULL;
4124 UChar *uBuf = NULL;
4125 UChar *test;
4126 int32_t uBufSize = 120;
4127 UErrorCode errorCode=U_ZERO_ERROR;
4128 UConverter *cnv = NULL;
4129 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4130 int32_t* myOff= offsets;
4131 cnv=ucnv_open("ibm-25546", &errorCode);
4132 if(U_FAILURE(errorCode)) {
4133 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4134 goto cleanup;
4135 }
4136
4137 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4138 cBuf =(char*)malloc(uBufSize * sizeof(char) * 5);
4139 uSource = (const UChar*)in;
4140 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4141 cTarget = cBuf;
4142 cTargetLimit = cBuf +uBufSize*5;
4143 uTarget = uBuf;
4144 uTargetLimit = uBuf+ uBufSize*5;
4145 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
4146 if(U_FAILURE(errorCode)){
4147 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4148 goto cleanup;
4149 }
4150 cSource = cBuf;
4151 cSourceLimit =cTarget;
4152 test =uBuf;
4153 myOff=offsets;
4154 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
4155 if(U_FAILURE(errorCode)){
4156 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4157 goto cleanup;
4158 }
4159 uSource = (const UChar*)in;
4160 while(uSource<uSourceLimit){
4161 if(*test!=*uSource){
4162 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,*test) ;
4163 }
4164 uSource++;
4165 test++;
4166 }
4167 ucnv_reset(cnv);
4168 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-KR encoding");
4169 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4170 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4171 ucnv_reset(cnv);
4172 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4173 /*Test for the condition where there is an invalid character*/
4174 ucnv_reset(cnv);
4175 {
4176 static const uint8_t source2[]={0x1b,0x24,0x053};
4177 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
4178 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ILLEGAL_ESCAPE_SEQUENCE, "an invalid character [ISO-2022-KR]");
4179 }
4180
4181 cleanup:
4182 ucnv_close(cnv);
4183 free(uBuf);
4184 free(cBuf);
4185 free(offsets);
4186 }
4187
TestJitterbug2411(void)4188 static void TestJitterbug2411(void){
4189 static const char* source = "\x1b\x24\x29\x43\x6b\x6b\x6e\x6e\x6a\x68\x70\x6f\x69\x75\x79\x71\x77\x65\x68\x67\x0A"
4190 "\x1b\x24\x29\x43\x6a\x61\x73\x64\x66\x6a\x61\x73\x64\x66\x68\x6f\x69\x75\x79\x1b\x24\x29\x43";
4191 UConverter* kr=NULL, *kr1=NULL;
4192 UErrorCode errorCode = U_ZERO_ERROR;
4193 UChar tgt[100]={'\0'};
4194 UChar* target = tgt;
4195 UChar* targetLimit = target+100;
4196 kr=ucnv_open("iso-2022-kr", &errorCode);
4197 if(U_FAILURE(errorCode)) {
4198 log_data_err("Unable to open a iso-2022-kr converter: %s\n", u_errorName(errorCode));
4199 return;
4200 }
4201 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,true,&errorCode);
4202 if(U_FAILURE(errorCode)) {
4203 log_err("iso-2022-kr cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4204 return;
4205 }
4206 kr1 = ucnv_open("ibm-25546", &errorCode);
4207 if(U_FAILURE(errorCode)) {
4208 log_data_err("Unable to open a iso-2022-kr_1 converter: %s\n", u_errorName(errorCode));
4209 return;
4210 }
4211 target = tgt;
4212 targetLimit = target+100;
4213 ucnv_toUnicode(kr,&target,targetLimit,&source,source+uprv_strlen(source),NULL,true,&errorCode);
4214
4215 if(U_FAILURE(errorCode)) {
4216 log_err("iso-2022-kr_1 cannot handle multiple escape sequences : %s\n", u_errorName(errorCode));
4217 return;
4218 }
4219
4220 ucnv_close(kr);
4221 ucnv_close(kr1);
4222
4223 }
4224
4225 static void
TestJIS(void)4226 TestJIS(void){
4227 /* From Unicode moved to testdata/conversion.txt */
4228 /*To Unicode*/
4229 {
4230 static const uint8_t sampleTextJIS[] = {
4231 0x1b,0x28,0x48,0x41,0x42, /*jis-Roman*/
4232 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4233 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4234 };
4235 static const uint16_t expectedISO2022JIS[] = {
4236 0x0041, 0x0042,
4237 0xFF81, 0xFF82,
4238 0x3000
4239 };
4240 static const int32_t toISO2022JISOffs[]={
4241 3,4,
4242 8,9,
4243 16
4244 };
4245
4246 static const uint8_t sampleTextJIS7[] = {
4247 0x1b,0x28,0x48,0x41,0x42, /*JIS7-Roman*/
4248 0x1b,0x28,0x49,0x41,0x42, /*Katakana Set*/
4249 0x1b,0x24,0x42,0x21,0x21,
4250 0x0e,0x41,0x42,0x0f, /*Test Katakana set with SI and SO */
4251 0x21,0x22,
4252 0x1b,0x26,0x40,0x1b,0x24,0x42,0x21,0x21 /*recognize and ignore <esc>&@*/
4253 };
4254 static const uint16_t expectedISO2022JIS7[] = {
4255 0x0041, 0x0042,
4256 0xFF81, 0xFF82,
4257 0x3000,
4258 0xFF81, 0xFF82,
4259 0x3001,
4260 0x3000
4261 };
4262 static const int32_t toISO2022JIS7Offs[]={
4263 3,4,
4264 8,9,
4265 13,16,
4266 17,
4267 19,27
4268 };
4269 static const uint8_t sampleTextJIS8[] = {
4270 0x1b,0x28,0x48,0x41,0x42, /*JIS8-Roman*/
4271 0xa1,0xc8,0xd9,/*Katakana Set*/
4272 0x1b,0x28,0x42,
4273 0x41,0x42,
4274 0xb1,0xc3, /*Katakana Set*/
4275 0x1b,0x24,0x42,0x21,0x21
4276 };
4277 static const uint16_t expectedISO2022JIS8[] = {
4278 0x0041, 0x0042,
4279 0xff61, 0xff88, 0xff99,
4280 0x0041, 0x0042,
4281 0xff71, 0xff83,
4282 0x3000
4283 };
4284 static const int32_t toISO2022JIS8Offs[]={
4285 3, 4, 5, 6,
4286 7, 11, 12, 13,
4287 14, 18,
4288 };
4289
4290 testConvertToU(sampleTextJIS,sizeof(sampleTextJIS),expectedISO2022JIS,
4291 UPRV_LENGTHOF(expectedISO2022JIS),"JIS", toISO2022JISOffs,true);
4292 testConvertToU(sampleTextJIS7,sizeof(sampleTextJIS7),expectedISO2022JIS7,
4293 UPRV_LENGTHOF(expectedISO2022JIS7),"JIS7", toISO2022JIS7Offs,true);
4294 testConvertToU(sampleTextJIS8,sizeof(sampleTextJIS8),expectedISO2022JIS8,
4295 UPRV_LENGTHOF(expectedISO2022JIS8),"JIS8", toISO2022JIS8Offs,true);
4296 }
4297
4298 }
4299
4300
4301 #if 0
4302 ICU 4.4 (ticket #7314) removes mappings for CNS 11643 planes 3..7
4303
4304 static void TestJitterbug915(){
4305 /* tests for roundtripping of the below sequence
4306 \x1b$)G\x0E#!#"###$#%#&#'#(#)#*#+ / *plane 1 * /
4307 \x1b$*H\x1bN"!\x1bN""\x1bN"#\x1bN"$\x1bN"% / *plane 2 * /
4308 \x1b$+I\x1bO"D\x1bO"E\x1bO"F\x1bO"G\x1bO"H / *plane 3 * /
4309 \x1b$+J\x1bO!D\x1bO!E\x1bO"j\x1bO"k\x1bO"l / *plane 4 * /
4310 \x1b$+K\x1bO!t\x1bO"P\x1bO"Q\x1bO#7\x1bO"\ / *plane 5 * /
4311 \x1b$+L\x1bO!#\x1bO",\x1bO#N\x1bO!n\x1bO#q / *plane 6 * /
4312 \x1b$+M\x1bO"q\x1bO!N\x1bO!j\x1bO#:\x1bO#o / *plane 7 * /
4313 */
4314 static const char cSource[]={
4315 0x1B, 0x24, 0x29, 0x47, 0x0E, 0x23, 0x21, 0x23, 0x22, 0x23,
4316 0x23, 0x23, 0x24, 0x23, 0x25, 0x23, 0x26, 0x23, 0x27, 0x23,
4317 0x28, 0x23, 0x29, 0x23, 0x2A, 0x23, 0x2B, 0x0F, 0x2F, 0x2A,
4318 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20, 0x31, 0x20, 0x2A, 0x2F,
4319 0x0D, 0x0A, 0x1B, 0x24, 0x2A, 0x48, 0x1B, 0x4E, 0x22, 0x21,
4320 0x1B, 0x4E, 0x22, 0x22, 0x1B, 0x4E, 0x22, 0x23, 0x1B, 0x4E,
4321 0x22, 0x24, 0x1B, 0x4E, 0x22, 0x25, 0x2F, 0x2A, 0x70,
4322 0x6C, 0x61, 0x6E, 0x65, 0x32, 0x2A, 0x2F, 0x20, 0x0D, 0x0A,
4323 0x1B, 0x24, 0x2B, 0x49, 0x1B, 0x4F, 0x22, 0x44, 0x1B, 0x4F,
4324 0x22, 0x45, 0x1B, 0x4F, 0x22, 0x46, 0x1B, 0x4F, 0x22, 0x47,
4325 0x1B, 0x4F, 0x22, 0x48, 0x2F, 0x2A, 0x70, 0x6C, 0x61,
4326 0x6E, 0x65, 0x20, 0x33, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B,
4327 0x24, 0x2B, 0x4A, 0x1B, 0x4F, 0x21, 0x44, 0x1B, 0x4F, 0x21,
4328 0x45, 0x1B, 0x4F, 0x22, 0x6A, 0x1B, 0x4F, 0x22, 0x6B, 0x1B,
4329 0x4F, 0x22, 0x6C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4330 0x65, 0x20, 0x34, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4331 0x2B, 0x4B, 0x1B, 0x4F, 0x21, 0x74, 0x1B, 0x4F, 0x22, 0x50,
4332 0x1B, 0x4F, 0x22, 0x51, 0x1B, 0x4F, 0x23, 0x37, 0x1B, 0x4F,
4333 0x22, 0x5C, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E,
4334 0x65, 0x20, 0x35, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24,
4335 0x2B, 0x4C, 0x1B, 0x4F, 0x21, 0x23, 0x1B, 0x4F, 0x22, 0x2C,
4336 0x1B, 0x4F, 0x23, 0x4E, 0x1B, 0x4F, 0x21, 0x6E, 0x1B, 0x4F,
4337 0x23, 0x71, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65,
4338 0x20, 0x36, 0x20, 0x2A, 0x2F, 0x0D, 0x0A, 0x1B, 0x24, 0x2B,
4339 0x4D, 0x1B, 0x4F, 0x22, 0x71, 0x1B, 0x4F, 0x21, 0x4E, 0x1B,
4340 0x4F, 0x21, 0x6A, 0x1B, 0x4F, 0x23, 0x3A, 0x1B, 0x4F, 0x23,
4341 0x6F, 0x2F, 0x2A, 0x70, 0x6C, 0x61, 0x6E, 0x65, 0x20,
4342 0x37, 0x20, 0x2A, 0x2F
4343 };
4344 UChar uTarget[500]={'\0'};
4345 UChar* utarget=uTarget;
4346 UChar* utargetLimit=uTarget+sizeof(uTarget)/2;
4347
4348 char cTarget[500]={'\0'};
4349 char* ctarget=cTarget;
4350 char* ctargetLimit=cTarget+sizeof(cTarget);
4351 const char* csource=cSource;
4352 const char* tempSrc = cSource;
4353 UErrorCode err=U_ZERO_ERROR;
4354
4355 UConverter* conv =ucnv_open("ISO_2022_CN_EXT",&err);
4356 if(U_FAILURE(err)) {
4357 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(err));
4358 return;
4359 }
4360 ucnv_toUnicode(conv,&utarget,utargetLimit,&csource,csource+sizeof(cSource),NULL,true,&err);
4361 if(U_FAILURE(err)) {
4362 log_err("iso-2022-CN to Unicode conversion failed: %s\n", u_errorName(err));
4363 return;
4364 }
4365 utargetLimit=utarget;
4366 utarget = uTarget;
4367 ucnv_fromUnicode(conv,&ctarget,ctargetLimit,(const UChar**)&utarget,utargetLimit,NULL,true,&err);
4368 if(U_FAILURE(err)) {
4369 log_err("iso-2022-CN from Unicode conversion failed: %s\n", u_errorName(err));
4370 return;
4371 }
4372 ctargetLimit=ctarget;
4373 ctarget =cTarget;
4374 while(ctarget<ctargetLimit){
4375 if(*ctarget != *tempSrc){
4376 log_err("j915[%d] Expected : \\x%02X \t Got: \\x%02X\n", (int)(ctarget-cTarget), *ctarget,(int)*tempSrc) ;
4377 }
4378 ++ctarget;
4379 ++tempSrc;
4380 }
4381
4382 ucnv_close(conv);
4383 }
4384
4385 static void
4386 TestISO_2022_CN_EXT() {
4387 /* test input */
4388 static const uint16_t in[]={
4389 /* test Non-BMP code points */
4390 0xD869, 0xDE99, 0xD869, 0xDE9C, 0xD869, 0xDE9D, 0xD869, 0xDE9E, 0xD869, 0xDE9F,
4391 0xD869, 0xDEA0, 0xD869, 0xDEA5, 0xD869, 0xDEA6, 0xD869, 0xDEA7, 0xD869, 0xDEA8,
4392 0xD869, 0xDEAB, 0xD869, 0xDEAC, 0xD869, 0xDEAD, 0xD869, 0xDEAE, 0xD869, 0xDEAF,
4393 0xD869, 0xDEB0, 0xD869, 0xDEB1, 0xD869, 0xDEB3, 0xD869, 0xDEB5, 0xD869, 0xDEB6,
4394 0xD869, 0xDEB7, 0xD869, 0xDEB8, 0xD869, 0xDEB9, 0xD869, 0xDEBA, 0xD869, 0xDEBB,
4395 0xD869, 0xDEBC, 0xD869, 0xDEBD, 0xD869, 0xDEBE, 0xD869, 0xDEBF, 0xD869, 0xDEC0,
4396 0xD869, 0xDEC1, 0xD869, 0xDEC2, 0xD869, 0xDEC3, 0xD869, 0xDEC4, 0xD869, 0xDEC8,
4397 0xD869, 0xDECA, 0xD869, 0xDECB, 0xD869, 0xDECD, 0xD869, 0xDECE, 0xD869, 0xDECF,
4398 0xD869, 0xDED0, 0xD869, 0xDED1, 0xD869, 0xDED2, 0xD869, 0xDED3, 0xD869, 0xDED4,
4399 0xD869, 0xDED5,
4400
4401 0x4DB3, 0x4DB4, 0x4DB5, 0x4E00, 0x4E00, 0x4E01, 0x4E02, 0x4E03, 0x000D, 0x000A,
4402 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x33E0, 0x33E6, 0x000D, 0x000A,
4403 0x4E05, 0x4E07, 0x4E04, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x000D, 0x000A,
4404 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4405 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4406 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4407 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4408 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4409 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4410 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4411 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4412 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4413 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4414 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x3443, 0x3444, 0x000D, 0x000A,
4415 0x3445, 0x3449, 0x344A, 0x344B, 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A,
4416 0x60F6, 0x60F7, 0x60F8, 0x60F9, 0x60FA, 0x60FB, 0x60FC, 0x60FD, 0x000D, 0x000A,
4417 0x60FE, 0x60FF, 0x6100, 0x6101, 0x6102, 0x0041, 0x0042, 0x0043, 0x000D, 0x000A,
4418 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A, 0x004B, 0x000D, 0x000A,
4419
4420 0x33E7, 0x33E8, 0x33E9, 0x33EA, 0x000D, 0x000A
4421
4422 };
4423
4424 const UChar* uSource;
4425 const UChar* uSourceLimit;
4426 const char* cSource;
4427 const char* cSourceLimit;
4428 UChar *uTargetLimit =NULL;
4429 UChar *uTarget;
4430 char *cTarget;
4431 const char *cTargetLimit;
4432 char *cBuf = NULL;
4433 UChar *uBuf = NULL;
4434 UChar *test;
4435 int32_t uBufSize = 180;
4436 UErrorCode errorCode=U_ZERO_ERROR;
4437 UConverter *cnv = NULL;
4438 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4439 int32_t* myOff= offsets;
4440 cnv=ucnv_open("ISO_2022,locale=cn,version=1", &errorCode);
4441 if(U_FAILURE(errorCode)) {
4442 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4443 goto cleanup;
4444 }
4445
4446 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4447 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4448 uSource = (const UChar*)in;
4449 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4450 cTarget = cBuf;
4451 cTargetLimit = cBuf +uBufSize*5;
4452 uTarget = uBuf;
4453 uTargetLimit = uBuf+ uBufSize*5;
4454 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
4455 if(U_FAILURE(errorCode)){
4456 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4457 goto cleanup;
4458 }
4459 cSource = cBuf;
4460 cSourceLimit =cTarget;
4461 test =uBuf;
4462 myOff=offsets;
4463 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
4464 if(U_FAILURE(errorCode)){
4465 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4466 goto cleanup;
4467 }
4468 uSource = (const UChar*)in;
4469 while(uSource<uSourceLimit){
4470 if(*test!=*uSource){
4471 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4472 }
4473 else{
4474 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4475 }
4476 uSource++;
4477 test++;
4478 }
4479 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4480 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4481 /*Test for the condition where there is an invalid character*/
4482 ucnv_reset(cnv);
4483 {
4484 static const uint8_t source2[]={0x0e,0x24,0x053};
4485 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN-EXT]");
4486 }
4487
4488 cleanup:
4489 ucnv_close(cnv);
4490 free(uBuf);
4491 free(cBuf);
4492 free(offsets);
4493 }
4494 #endif
4495
4496 static void
TestISO_2022_CN(void)4497 TestISO_2022_CN(void) {
4498 /* test input */
4499 static const uint16_t in[]={
4500 /* jitterbug 951 */
4501 0xFF2D, 0xFF49, 0xFF58, 0xFF45, 0xFF44, 0x0020, 0xFF43, 0xFF48, 0xFF41, 0xFF52,
4502 0x0020, 0xFF06, 0x0020, 0xFF11, 0xFF12, 0xFF13, 0xFF14, 0xFF15, 0xFF16, 0xFF17,
4503 0xFF18, 0xFF19, 0xFF10, 0x0020, 0xFF4E, 0xFF55, 0xFF4D, 0xFF42, 0xFF45, 0xFF52,
4504 0x0020, 0xFF54, 0xFF45, 0xFF53, 0xFF54, 0x0020, 0xFF4C, 0xFF49, 0xFF4E, 0xFF45,
4505 0x0020, 0x0045, 0x004e, 0x0044,
4506 /**/
4507 0x4E00, 0x4E00, 0x4E01, 0x4E03, 0x60F6, 0x60F7, 0x60F8, 0x60FB, 0x000D, 0x000A,
4508 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x60FB, 0x60FC, 0x000D, 0x000A,
4509 0x4E07, 0x4E08, 0x4E08, 0x4E09, 0x4E0A, 0x4E0B, 0x0042, 0x0043, 0x000D, 0x000A,
4510 0x4E0C, 0x0021, 0x0022, 0x0023, 0x0024, 0xFF40, 0xFF41, 0xFF42, 0x000D, 0x000A,
4511 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49, 0xFF4A, 0x000D, 0x000A,
4512 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0x6332, 0x63B0, 0x643F, 0x000D, 0x000A,
4513 0x64D8, 0x8004, 0x6BEA, 0x6BF3, 0x6BFD, 0x6BF5, 0x6BF9, 0x6C05, 0x000D, 0x000A,
4514 0x6C07, 0x6C06, 0x6C0D, 0x6C15, 0x9CD9, 0x9CDC, 0x9CDD, 0x9CDF, 0x000D, 0x000A,
4515 0x9CE2, 0x977C, 0x9785, 0x9791, 0x9792, 0x9794, 0x97AF, 0x97AB, 0x000D, 0x000A,
4516 0x97A3, 0x97B2, 0x97B4, 0x9AB1, 0x9AB0, 0x9AB7, 0x9E58, 0x9AB6, 0x000D, 0x000A,
4517 0x9ABA, 0x9ABC, 0x9AC1, 0x9AC0, 0x9AC5, 0x9AC2, 0x9ACB, 0x9ACC, 0x000D, 0x000A,
4518 0x9AD1, 0x9B45, 0x9B43, 0x9B47, 0x9B49, 0x9B48, 0x9B4D, 0x9B51, 0x000D, 0x000A,
4519 0x98E8, 0x990D, 0x992E, 0x9955, 0x9954, 0x9ADF, 0x60FE, 0x60FF, 0x000D, 0x000A,
4520 0x60F2, 0x60F3, 0x60F4, 0x60F5, 0x000D, 0x000A, 0x60F9, 0x60FA, 0x000D, 0x000A,
4521 0x6100, 0x6101, 0x0041, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x000D, 0x000A,
4522 0x247D, 0x247E, 0x247F, 0x2480, 0x2481, 0x2482, 0x2483, 0x2484, 0x2485, 0x2486,
4523 0x2487, 0x2460, 0x2461, 0xFF20, 0xFF21, 0xFF22, 0x0049, 0x004A, 0x000D, 0x000A,
4524
4525 };
4526 const UChar* uSource;
4527 const UChar* uSourceLimit;
4528 const char* cSource;
4529 const char* cSourceLimit;
4530 UChar *uTargetLimit =NULL;
4531 UChar *uTarget;
4532 char *cTarget;
4533 const char *cTargetLimit;
4534 char *cBuf = NULL;
4535 UChar *uBuf = NULL;
4536 UChar *test;
4537 int32_t uBufSize = 180;
4538 UErrorCode errorCode=U_ZERO_ERROR;
4539 UConverter *cnv = NULL;
4540 int32_t* offsets = (int32_t*) malloc(uBufSize * sizeof(int32_t) * 5);
4541 int32_t* myOff= offsets;
4542 cnv=ucnv_open("ISO_2022,locale=cn,version=0", &errorCode);
4543 if(U_FAILURE(errorCode)) {
4544 log_data_err("Unable to open a iso-2022 converter: %s\n", u_errorName(errorCode));
4545 goto cleanup;
4546 }
4547
4548 uBuf = (UChar*)malloc(uBufSize * sizeof(UChar)*5);
4549 cBuf =(char*)malloc(uBufSize * sizeof(char) * 10);
4550 uSource = (const UChar*)in;
4551 uSourceLimit=(const UChar*)in + UPRV_LENGTHOF(in);
4552 cTarget = cBuf;
4553 cTargetLimit = cBuf +uBufSize*5;
4554 uTarget = uBuf;
4555 uTargetLimit = uBuf+ uBufSize*5;
4556 ucnv_fromUnicode( cnv , &cTarget, cTargetLimit,&uSource,uSourceLimit,myOff,true, &errorCode);
4557 if(U_FAILURE(errorCode)){
4558 log_err("ucnv_fromUnicode conversion failed reason %s\n", u_errorName(errorCode));
4559 goto cleanup;
4560 }
4561 cSource = cBuf;
4562 cSourceLimit =cTarget;
4563 test =uBuf;
4564 myOff=offsets;
4565 ucnv_toUnicode(cnv,&uTarget,uTargetLimit,&cSource,cSourceLimit,myOff,true,&errorCode);
4566 if(U_FAILURE(errorCode)){
4567 log_err("ucnv_toUnicode conversion failed reason %s\n", u_errorName(errorCode));
4568 goto cleanup;
4569 }
4570 uSource = (const UChar*)in;
4571 while(uSource<uSourceLimit){
4572 if(*test!=*uSource){
4573 log_err("Expected : \\u%04X \t Got: \\u%04X\n",*uSource,(int)*test) ;
4574 }
4575 else{
4576 log_verbose(" Got: \\u%04X\n",(int)*test) ;
4577 }
4578 uSource++;
4579 test++;
4580 }
4581 TestGetNextUChar2022(cnv, cBuf, cTarget, in, "ISO-2022-CN encoding");
4582 TestSmallTargetBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4583 TestSmallSourceBuffer(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4584 TestToAndFromUChars(in,(const UChar*)in + UPRV_LENGTHOF(in),cnv);
4585 TestJitterbug930("csISO2022CN");
4586 /*Test for the condition where there is an invalid character*/
4587 ucnv_reset(cnv);
4588 {
4589 static const uint8_t source2[]={0x0e,0x24,0x053};
4590 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [ISO-2022-CN]");
4591 }
4592
4593 cleanup:
4594 ucnv_close(cnv);
4595 free(uBuf);
4596 free(cBuf);
4597 free(offsets);
4598 }
4599
4600 /* Tests for empty segments in ISO-2022-JP/KR/CN, HZ, check that UConverterCallbackReason is UCNV_IRREGULAR */
4601 typedef struct {
4602 const char * converterName;
4603 const char * inputText;
4604 int inputTextLength;
4605 } EmptySegmentTest;
4606
4607 /* Callback for TestJitterbug6175, should only get called for empty segment errors */
UCNV_TO_U_CALLBACK_EMPTYSEGMENT(const void * context,UConverterToUnicodeArgs * toArgs,const char * codeUnits,int32_t length,UConverterCallbackReason reason,UErrorCode * err)4608 static void UCNV_TO_U_CALLBACK_EMPTYSEGMENT( const void *context, UConverterToUnicodeArgs *toArgs, const char* codeUnits,
4609 int32_t length, UConverterCallbackReason reason, UErrorCode * err ) {
4610 // suppress compiler warnings about unused variables
4611 (void)context;
4612 (void)codeUnits;
4613 (void)length;
4614 if (reason > UCNV_IRREGULAR) {
4615 return;
4616 }
4617 if (reason != UCNV_IRREGULAR) {
4618 log_err("toUnicode callback invoked for empty segment but reason is not UCNV_IRREGULAR\n");
4619 }
4620 /* Standard stuff below from UCNV_TO_U_CALLBACK_SUBSTITUTE */
4621 *err = U_ZERO_ERROR;
4622 ucnv_cbToUWriteSub(toArgs,0,err);
4623 }
4624
4625 enum { kEmptySegmentToUCharsMax = 64 };
TestJitterbug6175(void)4626 static void TestJitterbug6175(void) {
4627 static const char iso2022jp_a[] = { 0x61, 0x62, 0x1B,0x24,0x42, 0x1B,0x28,0x42, 0x63, 0x64, 0x0D, 0x0A };
4628 static const char iso2022kr_a[] = { 0x1B,0x24,0x29,0x43, 0x61, 0x0E, 0x0F, 0x62, 0x0D, 0x0A };
4629 static const char iso2022cn_a[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x0F, 0x1B,0x24,0x2A,0x48, 0x1B,0x4E, 0x6A,0x65, 0x63, 0x0D, 0x0A };
4630 static const char iso2022cn_b[] = { 0x61, 0x1B,0x24,0x29,0x41, 0x62, 0x0E, 0x1B,0x24,0x29,0x47, 0x68,0x64, 0x0F, 0x63, 0x0D, 0x0A };
4631 static const char hzGB2312_a[] = { 0x61, 0x62, 0x7E,0x7B, 0x7E,0x7D, 0x63, 0x64 };
4632 static const EmptySegmentTest emptySegmentTests[] = {
4633 /* converterName inputText inputTextLength */
4634 { "ISO-2022-JP", iso2022jp_a, sizeof(iso2022jp_a) },
4635 { "ISO-2022-KR", iso2022kr_a, sizeof(iso2022kr_a) },
4636 { "ISO-2022-CN", iso2022cn_a, sizeof(iso2022cn_a) },
4637 { "ISO-2022-CN", iso2022cn_b, sizeof(iso2022cn_b) },
4638 { "HZ-GB-2312", hzGB2312_a, sizeof(hzGB2312_a) },
4639 /* terminator: */
4640 { NULL, NULL, 0, }
4641 };
4642 const EmptySegmentTest * testPtr;
4643 for (testPtr = emptySegmentTests; testPtr->converterName != NULL; ++testPtr) {
4644 UErrorCode err = U_ZERO_ERROR;
4645 UConverter * cnv = ucnv_open(testPtr->converterName, &err);
4646 if (U_FAILURE(err)) {
4647 log_data_err("Unable to open %s converter: %s\n", testPtr->converterName, u_errorName(err));
4648 return;
4649 }
4650 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_EMPTYSEGMENT, NULL, NULL, NULL, &err);
4651 if (U_FAILURE(err)) {
4652 log_data_err("Unable to setToUCallBack for %s converter: %s\n", testPtr->converterName, u_errorName(err));
4653 ucnv_close(cnv);
4654 return;
4655 }
4656 {
4657 UChar toUChars[kEmptySegmentToUCharsMax];
4658 UChar * toUCharsPtr = toUChars;
4659 const UChar * toUCharsLimit = toUCharsPtr + kEmptySegmentToUCharsMax;
4660 const char * inCharsPtr = testPtr->inputText;
4661 const char * inCharsLimit = inCharsPtr + testPtr->inputTextLength;
4662 ucnv_toUnicode(cnv, &toUCharsPtr, toUCharsLimit, &inCharsPtr, inCharsLimit, NULL, true, &err);
4663 }
4664 ucnv_close(cnv);
4665 }
4666 }
4667
4668 static void
TestEBCDIC_STATEFUL(void)4669 TestEBCDIC_STATEFUL(void) {
4670 /* test input */
4671 static const uint8_t in[]={
4672 0x61,
4673 0x1a,
4674 0x0f, 0x4b,
4675 0x42,
4676 0x40,
4677 0x36,
4678 };
4679
4680 /* expected test results */
4681 static const int32_t results[]={
4682 /* number of bytes read, code point */
4683 1, 0x002f,
4684 1, 0x0092,
4685 2, 0x002e,
4686 1, 0xff62,
4687 1, 0x0020,
4688 1, 0x0096,
4689
4690 };
4691 static const uint8_t in2[]={
4692 0x0f,
4693 0xa1,
4694 0x01
4695 };
4696
4697 /* expected test results */
4698 static const int32_t results2[]={
4699 /* number of bytes read, code point */
4700 2, 0x203E,
4701 1, 0x0001,
4702 };
4703
4704 const char *source=(const char *)in, *limit=(const char *)in+sizeof(in);
4705 UErrorCode errorCode=U_ZERO_ERROR;
4706 UConverter *cnv=ucnv_open("ibm-930", &errorCode);
4707 if(U_FAILURE(errorCode)) {
4708 log_data_err("Unable to open a EBCDIC_STATEFUL(ibm-930) converter: %s\n", u_errorName(errorCode));
4709 return;
4710 }
4711 TestNextUChar(cnv, source, limit, results, "EBCDIC_STATEFUL(ibm-930)");
4712 ucnv_reset(cnv);
4713 /* Test the condition when source >= sourceLimit */
4714 TestNextUCharError(cnv, source, source, U_INDEX_OUTOFBOUNDS_ERROR, "sourceLimit <= source");
4715 ucnv_reset(cnv);
4716 /*Test for the condition where source > sourcelimit after consuming the shift character */
4717 {
4718 static const uint8_t source1[]={0x0f};
4719 TestNextUCharError(cnv, (const char*)source1, (const char*)source1+sizeof(source1), U_INDEX_OUTOFBOUNDS_ERROR, "a character is truncated");
4720 }
4721 /*Test for the condition where there is an invalid character*/
4722 ucnv_reset(cnv);
4723 {
4724 static const uint8_t source2[]={0x0e, 0x7F, 0xFF};
4725 TestNextUCharError(cnv, (const char*)source2, (const char*)source2+sizeof(source2), U_ZERO_ERROR, "an invalid character [EBCDIC STATEFUL]");
4726 }
4727 ucnv_reset(cnv);
4728 source=(const char*)in2;
4729 limit=(const char*)in2+sizeof(in2);
4730 TestNextUChar(cnv,source,limit,results2,"EBCDIC_STATEFUL(ibm-930),seq#2");
4731 ucnv_close(cnv);
4732
4733 }
4734
4735 static void
TestGB18030(void)4736 TestGB18030(void) {
4737 /* test input */
4738 static const uint8_t in[]={
4739 0x24,
4740 0x7f,
4741 0x81, 0x30, 0x81, 0x30,
4742 0xa8, 0xbf,
4743 0xa2, 0xe3,
4744 0xd2, 0xbb,
4745 0x82, 0x35, 0x8f, 0x33,
4746 0x84, 0x31, 0xa4, 0x39,
4747 0x90, 0x30, 0x81, 0x30,
4748 0xe3, 0x32, 0x9a, 0x35
4749 #if 0
4750 /*
4751 * Feature removed markus 2000-oct-26
4752 * Only some codepages must match surrogate pairs into supplementary code points -
4753 * see javadoc for ucnv_getNextUChar() and implementation notes in ucnvmbcs.c .
4754 * GB 18030 provides direct encodings for supplementary code points, therefore
4755 * it must not combine two single-encoded surrogates into one code point.
4756 */
4757 0x83, 0x36, 0xc8, 0x30, 0x83, 0x37, 0xb0, 0x34 /* separately encoded surrogates */
4758 #endif
4759 };
4760
4761 /* expected test results */
4762 static const int32_t results[]={
4763 /* number of bytes read, code point */
4764 1, 0x24,
4765 1, 0x7f,
4766 4, 0x80,
4767 2, 0x1f9,
4768 2, 0x20ac,
4769 2, 0x4e00,
4770 4, 0x9fa6,
4771 4, 0xffff,
4772 4, 0x10000,
4773 4, 0x10ffff
4774 #if 0
4775 /* Feature removed. See comment above. */
4776 8, 0x10000
4777 #endif
4778 };
4779
4780 /* const char *source=(const char *)in,*limit=(const char *)in+sizeof(in); */
4781 UErrorCode errorCode=U_ZERO_ERROR;
4782 UConverter *cnv=ucnv_open("gb18030", &errorCode);
4783 if(U_FAILURE(errorCode)) {
4784 log_data_err("Unable to open a gb18030 converter: %s\n", u_errorName(errorCode));
4785 return;
4786 }
4787 TestNextUChar(cnv, (const char *)in, (const char *)in+sizeof(in), results, "gb18030");
4788 ucnv_close(cnv);
4789 }
4790
4791 static void
TestLMBCS(void)4792 TestLMBCS(void) {
4793 /* LMBCS-1 string */
4794 static const uint8_t pszLMBCS[]={
4795 0x61,
4796 0x01, 0x29,
4797 0x81,
4798 0xA0,
4799 0x0F, 0x27,
4800 0x0F, 0x91,
4801 0x14, 0x0a, 0x74,
4802 0x14, 0xF6, 0x02,
4803 0x14, 0xd8, 0x4d, 0x14, 0xdc, 0x56, /* UTF-16 surrogate pair */
4804 0x10, 0x88, 0xA0,
4805 };
4806
4807 /* Unicode UChar32 equivalents */
4808 static const UChar32 pszUnicode32[]={
4809 /* code point */
4810 0x00000061,
4811 0x00002013,
4812 0x000000FC,
4813 0x000000E1,
4814 0x00000007,
4815 0x00000091,
4816 0x00000a74,
4817 0x00000200,
4818 0x00023456, /* code point for surrogate pair */
4819 0x00005516
4820 };
4821
4822 /* Unicode UChar equivalents */
4823 static const UChar pszUnicode[]={
4824 /* code point */
4825 0x0061,
4826 0x2013,
4827 0x00FC,
4828 0x00E1,
4829 0x0007,
4830 0x0091,
4831 0x0a74,
4832 0x0200,
4833 0xD84D, /* low surrogate */
4834 0xDC56, /* high surrogate */
4835 0x5516
4836 };
4837
4838 /* expected test results */
4839 static const int offsets32[]={
4840 /* number of bytes read, code point */
4841 0,
4842 1,
4843 3,
4844 4,
4845 5,
4846 7,
4847 9,
4848 12,
4849 15,
4850 21,
4851 24
4852 };
4853
4854 /* expected test results */
4855 static const int offsets[]={
4856 /* number of bytes read, code point */
4857 0,
4858 1,
4859 3,
4860 4,
4861 5,
4862 7,
4863 9,
4864 12,
4865 15,
4866 18,
4867 21,
4868 24
4869 };
4870
4871
4872 UConverter *cnv;
4873
4874 #define NAME_LMBCS_1 "LMBCS-1"
4875 #define NAME_LMBCS_2 "LMBCS-2"
4876
4877
4878 /* Some basic open/close/property tests on some LMBCS converters */
4879 {
4880
4881 char expected_subchars[] = {0x3F}; /* ANSI Question Mark */
4882 char new_subchars [] = {0x7F}; /* subst char used by SmartSuite..*/
4883 char get_subchars [1];
4884 const char * get_name;
4885 UConverter *cnv1;
4886 UConverter *cnv2;
4887
4888 int8_t len = sizeof(get_subchars);
4889
4890 UErrorCode errorCode=U_ZERO_ERROR;
4891
4892 /* Open */
4893 cnv1=ucnv_open(NAME_LMBCS_1, &errorCode);
4894 if(U_FAILURE(errorCode)) {
4895 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
4896 return;
4897 }
4898 cnv2=ucnv_open(NAME_LMBCS_2, &errorCode);
4899 if(U_FAILURE(errorCode)) {
4900 log_data_err("Unable to open a LMBCS-2 converter: %s\n", u_errorName(errorCode));
4901 return;
4902 }
4903
4904 /* Name */
4905 get_name = ucnv_getName (cnv1, &errorCode);
4906 if (strcmp(NAME_LMBCS_1,get_name)){
4907 log_err("Unexpected converter name: %s\n", get_name);
4908 }
4909 get_name = ucnv_getName (cnv2, &errorCode);
4910 if (strcmp(NAME_LMBCS_2,get_name)){
4911 log_err("Unexpected converter name: %s\n", get_name);
4912 }
4913
4914 /* substitution chars */
4915 ucnv_getSubstChars (cnv1, get_subchars, &len, &errorCode);
4916 if(U_FAILURE(errorCode)) {
4917 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4918 }
4919 if (len!=1){
4920 log_err("Unexpected length of sub chars\n");
4921 }
4922 if (get_subchars[0] != expected_subchars[0]){
4923 log_err("Unexpected value of sub chars\n");
4924 }
4925 ucnv_setSubstChars (cnv2,new_subchars, len, &errorCode);
4926 if(U_FAILURE(errorCode)) {
4927 log_err("Failure on set subst chars: %s\n", u_errorName(errorCode));
4928 }
4929 ucnv_getSubstChars (cnv2, get_subchars, &len, &errorCode);
4930 if(U_FAILURE(errorCode)) {
4931 log_err("Failure on get subst chars: %s\n", u_errorName(errorCode));
4932 }
4933 if (len!=1){
4934 log_err("Unexpected length of sub chars\n");
4935 }
4936 if (get_subchars[0] != new_subchars[0]){
4937 log_err("Unexpected value of sub chars\n");
4938 }
4939 ucnv_close(cnv1);
4940 ucnv_close(cnv2);
4941
4942 }
4943
4944 /* LMBCS to Unicode - offsets */
4945 {
4946 UErrorCode errorCode=U_ZERO_ERROR;
4947
4948 const char * pSource = (const char *)pszLMBCS;
4949 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
4950
4951 UChar Out [sizeof(pszUnicode) + 1];
4952 UChar * pOut = Out;
4953 UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
4954
4955 int32_t off [sizeof(offsets)];
4956
4957 /* last 'offset' in expected results is just the final size.
4958 (Makes other tests easier). Compensate here: */
4959
4960 off[UPRV_LENGTHOF(offsets)-1] = sizeof(pszLMBCS);
4961
4962
4963
4964 cnv=ucnv_open("lmbcs", &errorCode); /* use generic name for LMBCS-1 */
4965 if(U_FAILURE(errorCode)) {
4966 log_data_err("Unable to open a LMBCS converter: %s\n", u_errorName(errorCode));
4967 return;
4968 }
4969
4970
4971
4972 ucnv_toUnicode (cnv,
4973 &pOut,
4974 OutLimit,
4975 &pSource,
4976 sourceLimit,
4977 off,
4978 true,
4979 &errorCode);
4980
4981
4982 if (memcmp(off,offsets,sizeof(offsets)))
4983 {
4984 log_err("LMBCS->Uni: Calculated offsets do not match expected results\n");
4985 }
4986 if (memcmp(Out,pszUnicode,sizeof(pszUnicode)))
4987 {
4988 log_err("LMBCS->Uni: Calculated codepoints do not match expected results\n");
4989 }
4990 ucnv_close(cnv);
4991 }
4992 {
4993 /* LMBCS to Unicode - getNextUChar */
4994 const char * sourceStart;
4995 const char *source=(const char *)pszLMBCS;
4996 const char *limit=(const char *)pszLMBCS+sizeof(pszLMBCS);
4997 const UChar32 *results= pszUnicode32;
4998 const int *off = offsets32;
4999
5000 UErrorCode errorCode=U_ZERO_ERROR;
5001 UChar32 uniChar;
5002
5003 cnv=ucnv_open("LMBCS-1", &errorCode);
5004 if(U_FAILURE(errorCode)) {
5005 log_data_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5006 return;
5007 }
5008 else
5009 {
5010
5011 while(source<limit) {
5012 sourceStart=source;
5013 uniChar=ucnv_getNextUChar(cnv, &source, source + (off[1] - off[0]), &errorCode);
5014 if(U_FAILURE(errorCode)) {
5015 log_err("LMBCS-1 ucnv_getNextUChar() failed: %s\n", u_errorName(errorCode));
5016 break;
5017 } else if(source-sourceStart != off[1] - off[0] || uniChar != *results) {
5018 log_err("LMBCS-1 ucnv_getNextUChar() result %lx from %d bytes, should have been %lx from %d bytes.\n",
5019 uniChar, (source-sourceStart), *results, *off);
5020 break;
5021 }
5022 results++;
5023 off++;
5024 }
5025 }
5026 ucnv_close(cnv);
5027 }
5028 { /* test locale & optimization group operations: Unicode to LMBCS */
5029
5030 UErrorCode errorCode=U_ZERO_ERROR;
5031 UConverter *cnv16he = ucnv_open("LMBCS-16,locale=he", &errorCode);
5032 UConverter *cnv16jp = ucnv_open("LMBCS-16,locale=ja_JP", &errorCode);
5033 UConverter *cnv01us = ucnv_open("LMBCS-1,locale=us_EN", &errorCode);
5034 UChar uniString [] = {0x0192}; /* Latin Small letter f with hook */
5035 const UChar * pUniOut = uniString;
5036 UChar * pUniIn = uniString;
5037 uint8_t lmbcsString [4];
5038 const char * pLMBCSOut = (const char *)lmbcsString;
5039 char * pLMBCSIn = (char *)lmbcsString;
5040
5041 /* 0192 (hook) converts to both group 3 & group 1. input locale should differentiate */
5042 ucnv_fromUnicode (cnv16he,
5043 &pLMBCSIn, (pLMBCSIn + UPRV_LENGTHOF(lmbcsString)),
5044 &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5045 NULL, 1, &errorCode);
5046
5047 if (lmbcsString[0] != 0x3 || lmbcsString[1] != 0x83)
5048 {
5049 log_err("LMBCS-16,locale=he gives unexpected translation\n");
5050 }
5051
5052 pLMBCSIn= (char *)lmbcsString;
5053 pUniOut = uniString;
5054 ucnv_fromUnicode (cnv01us,
5055 &pLMBCSIn, (const char *)(lmbcsString + UPRV_LENGTHOF(lmbcsString)),
5056 &pUniOut, pUniOut + UPRV_LENGTHOF(uniString),
5057 NULL, 1, &errorCode);
5058
5059 if (lmbcsString[0] != 0x9F)
5060 {
5061 log_err("LMBCS-1,locale=US gives unexpected translation\n");
5062 }
5063
5064 /* single byte char from mbcs char set */
5065 lmbcsString[0] = 0xAE; /* 1/2 width katakana letter small Yo */
5066 pLMBCSOut = (const char *)lmbcsString;
5067 pUniIn = uniString;
5068 ucnv_toUnicode (cnv16jp,
5069 &pUniIn, pUniIn + 1,
5070 &pLMBCSOut, (pLMBCSOut + 1),
5071 NULL, 1, &errorCode);
5072 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+1 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5073 {
5074 log_err("Unexpected results from LMBCS-16 single byte char\n");
5075 }
5076 /* convert to group 1: should be 3 bytes */
5077 pLMBCSIn = (char *)lmbcsString;
5078 pUniOut = uniString;
5079 ucnv_fromUnicode (cnv01us,
5080 &pLMBCSIn, (const char *)(pLMBCSIn + 3),
5081 &pUniOut, pUniOut + 1,
5082 NULL, 1, &errorCode);
5083 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+3 || pUniOut != uniString+1
5084 || lmbcsString[0] != 0x10 || lmbcsString[1] != 0x10 || lmbcsString[2] != 0xAE)
5085 {
5086 log_err("Unexpected results to LMBCS-1 single byte mbcs char\n");
5087 }
5088 pLMBCSOut = (const char *)lmbcsString;
5089 pUniIn = uniString;
5090 ucnv_toUnicode (cnv01us,
5091 &pUniIn, pUniIn + 1,
5092 &pLMBCSOut, (const char *)(pLMBCSOut + 3),
5093 NULL, 1, &errorCode);
5094 if (U_FAILURE(errorCode) || pLMBCSOut != (const char *)lmbcsString+3 || pUniIn != uniString+1 || uniString[0] != 0xFF6E)
5095 {
5096 log_err("Unexpected results from LMBCS-1 single byte mbcs char\n");
5097 }
5098 pLMBCSIn = (char *)lmbcsString;
5099 pUniOut = uniString;
5100 ucnv_fromUnicode (cnv16jp,
5101 &pLMBCSIn, (const char *)(pLMBCSIn + 1),
5102 &pUniOut, pUniOut + 1,
5103 NULL, 1, &errorCode);
5104 if (U_FAILURE(errorCode) || pLMBCSIn != (const char *)lmbcsString+1 || pUniOut != uniString+1 || lmbcsString[0] != 0xAE)
5105 {
5106 log_err("Unexpected results to LMBCS-16 single byte mbcs char\n");
5107 }
5108 ucnv_close(cnv16he);
5109 ucnv_close(cnv16jp);
5110 ucnv_close(cnv01us);
5111 }
5112 {
5113 /* Small source buffer testing, LMBCS -> Unicode */
5114
5115 UErrorCode errorCode=U_ZERO_ERROR;
5116
5117 const char * pSource = (const char *)pszLMBCS;
5118 const char * sourceLimit = (const char *)pszLMBCS + sizeof(pszLMBCS);
5119 int codepointCount = 0;
5120
5121 UChar Out [sizeof(pszUnicode) + 1];
5122 UChar * pOut = Out;
5123 UChar * OutLimit = Out + UPRV_LENGTHOF(pszUnicode);
5124
5125
5126 cnv = ucnv_open(NAME_LMBCS_1, &errorCode);
5127 if(U_FAILURE(errorCode)) {
5128 log_err("Unable to open a LMBCS-1 converter: %s\n", u_errorName(errorCode));
5129 return;
5130 }
5131
5132
5133 while ((pSource < sourceLimit) && U_SUCCESS (errorCode))
5134 {
5135 ucnv_toUnicode (cnv,
5136 &pOut,
5137 OutLimit,
5138 &pSource,
5139 (pSource+1), /* claim that this is a 1- byte buffer */
5140 NULL,
5141 false, /* false means there might be more chars in the next buffer */
5142 &errorCode);
5143
5144 if (U_SUCCESS (errorCode))
5145 {
5146 if ((pSource - (const char *)pszLMBCS) == offsets [codepointCount+1])
5147 {
5148 /* we are on to the next code point: check value */
5149
5150 if (Out[0] != pszUnicode[codepointCount]){
5151 log_err("LMBCS->Uni result %lx should have been %lx \n",
5152 Out[0], pszUnicode[codepointCount]);
5153 }
5154
5155 pOut = Out; /* reset for accumulating next code point */
5156 codepointCount++;
5157 }
5158 }
5159 else
5160 {
5161 log_err("Unexpected Error on toUnicode: %s\n", u_errorName(errorCode));
5162 }
5163 }
5164 {
5165 /* limits & surrogate error testing */
5166 char LIn [sizeof(pszLMBCS)];
5167 const char * pLIn = LIn;
5168
5169 char LOut [sizeof(pszLMBCS)];
5170 char * pLOut = LOut;
5171
5172 UChar UOut [sizeof(pszUnicode)];
5173 UChar * pUOut = UOut;
5174
5175 UChar UIn [sizeof(pszUnicode)];
5176 const UChar * pUIn = UIn;
5177
5178 int32_t off [sizeof(offsets)];
5179 UChar32 uniChar;
5180
5181 errorCode=U_ZERO_ERROR;
5182
5183 /* negative source request should always return U_ILLEGAL_ARGUMENT_ERROR */
5184 pUIn++;
5185 ucnv_fromUnicode(cnv, &pLOut, pLOut+1, &pUIn, pUIn-1, off, false, &errorCode);
5186 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5187 {
5188 log_err("Unexpected Error on negative source request to ucnv_fromUnicode: %s\n", u_errorName(errorCode));
5189 }
5190 pUIn--;
5191
5192 errorCode=U_ZERO_ERROR;
5193 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)(pLIn-1),off,false, &errorCode);
5194 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5195 {
5196 log_err("Unexpected Error on negative source request to ucnv_toUnicode: %s\n", u_errorName(errorCode));
5197 }
5198 errorCode=U_ZERO_ERROR;
5199
5200 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)(pLIn-1), &errorCode);
5201 if (errorCode != U_ILLEGAL_ARGUMENT_ERROR)
5202 {
5203 log_err("Unexpected Error on negative source request to ucnv_getNextUChar: %s\n", u_errorName(errorCode));
5204 }
5205 errorCode=U_ZERO_ERROR;
5206
5207 /* 0 byte source request - no error, no pointer movement */
5208 ucnv_toUnicode(cnv, &pUOut,pUOut+1,(const char **)&pLIn,(const char *)pLIn,off,false, &errorCode);
5209 ucnv_fromUnicode(cnv, &pLOut,pLOut+1,&pUIn,pUIn,off,false, &errorCode);
5210 if(U_FAILURE(errorCode)) {
5211 log_err("0 byte source request: unexpected error: %s\n", u_errorName(errorCode));
5212 }
5213 if ((pUOut != UOut) || (pUIn != UIn) || (pLOut != LOut) || (pLIn != LIn))
5214 {
5215 log_err("Unexpected pointer move in 0 byte source request \n");
5216 }
5217 /*0 byte source request - GetNextUChar : error & value == fffe or ffff */
5218 uniChar = ucnv_getNextUChar(cnv, (const char **)&pLIn, (const char *)pLIn, &errorCode);
5219 if (errorCode != U_INDEX_OUTOFBOUNDS_ERROR)
5220 {
5221 log_err("Unexpected Error on 0-byte source request to ucnv_getnextUChar: %s\n", u_errorName(errorCode));
5222 }
5223 if (((uint32_t)uniChar - 0xfffe) > 1) /* not 0xfffe<=uniChar<=0xffff */
5224 {
5225 log_err("Unexpected value on 0-byte source request to ucnv_getnextUChar \n");
5226 }
5227 errorCode = U_ZERO_ERROR;
5228
5229 /* running out of target room : U_BUFFER_OVERFLOW_ERROR */
5230
5231 pUIn = pszUnicode;
5232 ucnv_fromUnicode(cnv, &pLOut,pLOut+offsets[4],&pUIn,pUIn+UPRV_LENGTHOF(pszUnicode),off,false, &errorCode);
5233 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pLOut != LOut + offsets[4] || pUIn != pszUnicode+4 )
5234 {
5235 log_err("Unexpected results on out of target room to ucnv_fromUnicode\n");
5236 }
5237
5238 errorCode = U_ZERO_ERROR;
5239
5240 pLIn = (const char *)pszLMBCS;
5241 ucnv_toUnicode(cnv, &pUOut,pUOut+4,&pLIn,(pLIn+sizeof(pszLMBCS)),off,false, &errorCode);
5242 if (errorCode != U_BUFFER_OVERFLOW_ERROR || pUOut != UOut + 4 || pLIn != (const char *)pszLMBCS+offsets[4])
5243 {
5244 log_err("Unexpected results on out of target room to ucnv_toUnicode\n");
5245 }
5246
5247 /* unpaired or chopped LMBCS surrogates */
5248
5249 /* OK high surrogate, Low surrogate is chopped */
5250 LIn [0] = (char)0x14;
5251 LIn [1] = (char)0xD8;
5252 LIn [2] = (char)0x01;
5253 LIn [3] = (char)0x14;
5254 LIn [4] = (char)0xDC;
5255 pLIn = LIn;
5256 errorCode = U_ZERO_ERROR;
5257 pUOut = UOut;
5258
5259 ucnv_setToUCallBack(cnv, UCNV_TO_U_CALLBACK_STOP, NULL, NULL, NULL, &errorCode);
5260 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,true, &errorCode);
5261 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5262 {
5263 log_err("Unexpected results on chopped low surrogate\n");
5264 }
5265
5266 /* chopped at surrogate boundary */
5267 LIn [0] = (char)0x14;
5268 LIn [1] = (char)0xD8;
5269 LIn [2] = (char)0x01;
5270 pLIn = LIn;
5271 errorCode = U_ZERO_ERROR;
5272 pUOut = UOut;
5273
5274 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+3),off,true, &errorCode);
5275 if (UOut[0] != 0xD801 || U_FAILURE(errorCode) || pUOut != UOut + 1 || pLIn != LIn + 3)
5276 {
5277 log_err("Unexpected results on chopped at surrogate boundary \n");
5278 }
5279
5280 /* unpaired surrogate plus valid Unichar */
5281 LIn [0] = (char)0x14;
5282 LIn [1] = (char)0xD8;
5283 LIn [2] = (char)0x01;
5284 LIn [3] = (char)0x14;
5285 LIn [4] = (char)0xC9;
5286 LIn [5] = (char)0xD0;
5287 pLIn = LIn;
5288 errorCode = U_ZERO_ERROR;
5289 pUOut = UOut;
5290
5291 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+6),off,true, &errorCode);
5292 if (UOut[0] != 0xD801 || UOut[1] != 0xC9D0 || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 6)
5293 {
5294 log_err("Unexpected results after unpaired surrogate plus valid Unichar \n");
5295 }
5296
5297 /* unpaired surrogate plus chopped Unichar */
5298 LIn [0] = (char)0x14;
5299 LIn [1] = (char)0xD8;
5300 LIn [2] = (char)0x01;
5301 LIn [3] = (char)0x14;
5302 LIn [4] = (char)0xC9;
5303
5304 pLIn = LIn;
5305 errorCode = U_ZERO_ERROR;
5306 pUOut = UOut;
5307
5308 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,true, &errorCode);
5309 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 5)
5310 {
5311 log_err("Unexpected results after unpaired surrogate plus chopped Unichar \n");
5312 }
5313
5314 /* unpaired surrogate plus valid non-Unichar */
5315 LIn [0] = (char)0x14;
5316 LIn [1] = (char)0xD8;
5317 LIn [2] = (char)0x01;
5318 LIn [3] = (char)0x0F;
5319 LIn [4] = (char)0x3B;
5320
5321 pLIn = LIn;
5322 errorCode = U_ZERO_ERROR;
5323 pUOut = UOut;
5324
5325 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+5),off,true, &errorCode);
5326 if (UOut[0] != 0xD801 || UOut[1] != 0x1B || U_FAILURE(errorCode) || pUOut != UOut + 2 || pLIn != LIn + 5)
5327 {
5328 log_err("Unexpected results after unpaired surrogate plus valid non-Unichar\n");
5329 }
5330
5331 /* unpaired surrogate plus chopped non-Unichar */
5332 LIn [0] = (char)0x14;
5333 LIn [1] = (char)0xD8;
5334 LIn [2] = (char)0x01;
5335 LIn [3] = (char)0x0F;
5336
5337 pLIn = LIn;
5338 errorCode = U_ZERO_ERROR;
5339 pUOut = UOut;
5340
5341 ucnv_toUnicode(cnv, &pUOut,pUOut+UPRV_LENGTHOF(UOut),(const char **)&pLIn,(const char *)(pLIn+4),off,true, &errorCode);
5342
5343 if (UOut[0] != 0xD801 || errorCode != U_TRUNCATED_CHAR_FOUND || pUOut != UOut + 1 || pLIn != LIn + 4)
5344 {
5345 log_err("Unexpected results after unpaired surrogate plus chopped non-Unichar\n");
5346 }
5347 }
5348 }
5349 ucnv_close(cnv); /* final cleanup */
5350 }
5351
5352
TestJitterbug255(void)5353 static void TestJitterbug255(void)
5354 {
5355 static const uint8_t testBytes[] = { 0x95, 0xcf, 0x8a, 0xb7, 0x0d, 0x0a, 0x00 };
5356 const char *testBuffer = (const char *)testBytes;
5357 const char *testEnd = (const char *)testBytes + sizeof(testBytes);
5358 UErrorCode status = U_ZERO_ERROR;
5359 /*UChar32 result;*/
5360 UConverter *cnv = 0;
5361
5362 cnv = ucnv_open("shift-jis", &status);
5363 if (U_FAILURE(status) || cnv == 0) {
5364 log_data_err("Failed to open the converter for SJIS.\n");
5365 return;
5366 }
5367 while (testBuffer != testEnd)
5368 {
5369 /*result = */ucnv_getNextUChar (cnv, &testBuffer, testEnd , &status);
5370 if (U_FAILURE(status))
5371 {
5372 log_err("Failed to convert the next UChar for SJIS.\n");
5373 break;
5374 }
5375 }
5376 ucnv_close(cnv);
5377 }
5378
TestEBCDICUS4XML(void)5379 static void TestEBCDICUS4XML(void)
5380 {
5381 UChar unicodes_x[] = {0x0000, 0x0000, 0x0000, 0x0000};
5382 static const UChar toUnicodeMaps_x[] = {0x000A, 0x000A, 0x000D, 0x0000};
5383 static const char fromUnicodeMaps_x[] = {0x25, 0x25, 0x0D, 0x00};
5384 static const char newLines_x[] = {0x25, 0x15, 0x0D, 0x00};
5385 char target_x[] = {0x00, 0x00, 0x00, 0x00};
5386 UChar *unicodes = unicodes_x;
5387 const UChar *toUnicodeMaps = toUnicodeMaps_x;
5388 char *target = target_x;
5389 const char* fromUnicodeMaps = fromUnicodeMaps_x, *newLines = newLines_x;
5390 UErrorCode status = U_ZERO_ERROR;
5391 UConverter *cnv = 0;
5392
5393 cnv = ucnv_open("ebcdic-xml-us", &status);
5394 if (U_FAILURE(status) || cnv == 0) {
5395 log_data_err("Failed to open the converter for EBCDIC-XML-US.\n");
5396 return;
5397 }
5398 ucnv_toUnicode(cnv, &unicodes, unicodes+3, (const char**)&newLines, newLines+3, NULL, true, &status);
5399 if (U_FAILURE(status) || memcmp(unicodes_x, toUnicodeMaps, sizeof(UChar)*3) != 0) {
5400 log_err("To Unicode conversion failed in EBCDICUS4XML test. %s\n",
5401 u_errorName(status));
5402 printUSeqErr(unicodes_x, 3);
5403 printUSeqErr(toUnicodeMaps, 3);
5404 }
5405 status = U_ZERO_ERROR;
5406 ucnv_fromUnicode(cnv, &target, target+3, (const UChar**)&toUnicodeMaps, toUnicodeMaps+3, NULL, true, &status);
5407 if (U_FAILURE(status) || memcmp(target_x, fromUnicodeMaps, sizeof(char)*3) != 0) {
5408 log_err("From Unicode conversion failed in EBCDICUS4XML test. %s\n",
5409 u_errorName(status));
5410 printSeqErr((const unsigned char*)target_x, 3);
5411 printSeqErr((const unsigned char*)fromUnicodeMaps, 3);
5412 }
5413 ucnv_close(cnv);
5414 }
5415 #endif /* #if !UCONFIG_NO_LEGACY_COLLATION */
5416
5417 #if !UCONFIG_NO_COLLATION
5418
TestJitterbug981(void)5419 static void TestJitterbug981(void){
5420 const UChar* rules;
5421 int32_t rules_length, target_cap, bytes_needed, buff_size;
5422 UErrorCode status = U_ZERO_ERROR;
5423 UConverter *utf8cnv;
5424 UCollator* myCollator;
5425 char *buff;
5426 int numNeeded=0;
5427 utf8cnv = ucnv_open ("utf8", &status);
5428 if(U_FAILURE(status)){
5429 log_err("Could not open UTF-8 converter. Error: %s\n", u_errorName(status));
5430 return;
5431 }
5432 myCollator = ucol_open("zh", &status);
5433 if(U_FAILURE(status)){
5434 log_data_err("Could not open collator for zh locale. Error: %s\n", u_errorName(status));
5435 ucnv_close(utf8cnv);
5436 return;
5437 }
5438
5439 rules = ucol_getRules(myCollator, &rules_length);
5440 if(rules_length == 0) {
5441 log_data_err("missing zh tailoring rule string\n");
5442 ucol_close(myCollator);
5443 ucnv_close(utf8cnv);
5444 return;
5445 }
5446 buff_size = rules_length * ucnv_getMaxCharSize(utf8cnv);
5447 buff = malloc(buff_size);
5448
5449 target_cap = 0;
5450 do {
5451 ucnv_reset(utf8cnv);
5452 status = U_ZERO_ERROR;
5453 if(target_cap >= buff_size) {
5454 log_err("wanted %d bytes, only %d available\n", target_cap, buff_size);
5455 break;
5456 }
5457 bytes_needed = ucnv_fromUChars(utf8cnv, buff, target_cap,
5458 rules, rules_length, &status);
5459 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5460 if(numNeeded!=0 && numNeeded!= bytes_needed){
5461 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5462 break;
5463 }
5464 numNeeded = bytes_needed;
5465 } while (status == U_BUFFER_OVERFLOW_ERROR);
5466 ucol_close(myCollator);
5467 ucnv_close(utf8cnv);
5468 free(buff);
5469 }
5470
5471 #endif
5472
5473 #if !UCONFIG_NO_LEGACY_CONVERSION
TestJitterbug1293(void)5474 static void TestJitterbug1293(void){
5475 static const UChar src[] = {0x30DE, 0x30A4, 0x5E83, 0x544A, 0x30BF, 0x30A4, 0x30D7,0x000};
5476 char target[256];
5477 UErrorCode status = U_ZERO_ERROR;
5478 UConverter* conv=NULL;
5479 int32_t target_cap, bytes_needed, numNeeded = 0;
5480 conv = ucnv_open("shift-jis",&status);
5481 if(U_FAILURE(status)){
5482 log_data_err("Could not open Shift-Jis converter. Error: %s", u_errorName(status));
5483 return;
5484 }
5485
5486 do{
5487 target_cap =0;
5488 bytes_needed = ucnv_fromUChars(conv,target,256,src,u_strlen(src),&status);
5489 target_cap = (bytes_needed > target_cap) ? bytes_needed : target_cap +1;
5490 if(numNeeded!=0 && numNeeded!= bytes_needed){
5491 log_err("ucnv_fromUChars returns different values for required capacity in pre-flight and conversion modes");
5492 }
5493 numNeeded = bytes_needed;
5494 } while (status == U_BUFFER_OVERFLOW_ERROR);
5495 if(U_FAILURE(status)){
5496 log_err("An error occurred in ucnv_fromUChars. Error: %s", u_errorName(status));
5497 return;
5498 }
5499 ucnv_close(conv);
5500 }
5501 #endif
5502
TestJB5275_1(void)5503 static void TestJB5275_1(void){
5504
5505 static const char* data = "\x3B\xB3\x0A" /* Easy characters */
5506 "\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5507 /* Switch script: */
5508 "\xEF\x43\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Bengali test */
5509 "\x3B\xB3\x0A" /* Easy characters - new line, so should default!*/
5510 "\xEF\x40\x3B\xB3\x0A";
5511 static const UChar expected[] ={
5512 0x003b, 0x0a15, 0x000a, /* Easy characters */
5513 0x0a5c, 0x0a4d, 0x0a39, 0x0a5c, 0x0a4d, 0x0a39, 0x000a, /* Gurmukhi test */
5514 0x09dd, 0x09dc, 0x09cd, 0x09b9, 0x000a, /* Switch script: to Bengali*/
5515 0x003b, 0x0a15, 0x000a, /* Easy characters - new line, so should default!*/
5516 0x003b, 0x0a15, 0x000a /* Back to Gurmukhi*/
5517 };
5518
5519 UErrorCode status = U_ZERO_ERROR;
5520 UConverter* conv = ucnv_open("iscii-gur", &status);
5521 UChar dest[100] = {'\0'};
5522 UChar* target = dest;
5523 UChar* targetLimit = dest+100;
5524 const char* source = data;
5525 const char* sourceLimit = data+strlen(data);
5526 const UChar* exp = expected;
5527
5528 if (U_FAILURE(status)) {
5529 log_data_err("Unable to open converter: iscii-gur got errorCode: %s\n", u_errorName(status));
5530 return;
5531 }
5532
5533 log_verbose("Testing switching back to default script when new line is encountered.\n");
5534 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, true, &status);
5535 if(U_FAILURE(status)){
5536 log_err("conversion failed: %s \n", u_errorName(status));
5537 }
5538 targetLimit = target;
5539 target = dest;
5540 printUSeq(target, (int)(targetLimit-target));
5541 while(target<targetLimit){
5542 if(*exp!=*target){
5543 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5544 }
5545 target++;
5546 exp++;
5547 }
5548 ucnv_close(conv);
5549 }
5550
TestJB5275(void)5551 static void TestJB5275(void){
5552 static const char* data =
5553 /* "\xEF\x42\xEF\x41\xA4\xD5\xE5\xB3\xEA\x0A" unsupported sequence \xEF\x41 */
5554 /* "\xEF\x42\xEF\x41\xD4\xDA\xB3\xE8\xEA\x0A" unsupported sequence \xEF\x41 */
5555 /* "\xEF\x44\xEF\x41\xC8\xE1\x8B\xDB\xB3\xE8 \xB3\xE4\xC1\xE8\x0A" unsupported sequence \xEF\x41 */
5556 "\xEF\x4B\xC0\xE9\xBF\xE9\xE8\xD8\x0A" /* Gurmukhi test */
5557 "\xEF\x4A\xC0\xD4\xBF\xD4\xE8\xD8\x0A" /* Gujarati test */
5558 "\xEF\x48\x38\xB3\x0A" /* Kannada test */
5559 "\xEF\x49\x39\xB3\x0A" /* Malayalam test */
5560 "\xEF\x4A\x3A\xB3\x0A" /* Gujarati test */
5561 "\xEF\x4B\x3B\xB3\x0A" /* Punjabi test */
5562 /* "\xEF\x4C\x3C\xB3\x0A" unsupported sequence \xEF\x41 */;
5563 static const UChar expected[] ={
5564 0x0A5C, 0x0A4D, 0x0A39, 0x0A5C, 0x0A4D, 0x0A39, 0x000A, /* Gurmukhi test */
5565 0x0AA2, 0x0AB5, 0x0AA1, 0x0AB5, 0x0ACD, 0x0AB9, 0x000A, /* Gujarati test */
5566 0x0038, 0x0C95, 0x000A, /* Kannada test */
5567 0x0039, 0x0D15, 0x000A, /* Malayalam test */
5568 0x003A, 0x0A95, 0x000A, /* Gujarati test */
5569 0x003B, 0x0A15, 0x000A, /* Punjabi test */
5570 };
5571
5572 UErrorCode status = U_ZERO_ERROR;
5573 UConverter* conv = ucnv_open("iscii", &status);
5574 UChar dest[100] = {'\0'};
5575 UChar* target = dest;
5576 UChar* targetLimit = dest+100;
5577 const char* source = data;
5578 const char* sourceLimit = data+strlen(data);
5579 const UChar* exp = expected;
5580 ucnv_toUnicode(conv, &target, targetLimit, &source, sourceLimit, NULL, true, &status);
5581 if(U_FAILURE(status)){
5582 log_data_err("conversion failed: %s \n", u_errorName(status));
5583 }
5584 targetLimit = target;
5585 target = dest;
5586
5587 printUSeq(target, (int)(targetLimit-target));
5588
5589 while(target<targetLimit){
5590 if(*exp!=*target){
5591 log_err("did not get the expected output. \\u%04X != \\u%04X (got)\n", *exp, *target);
5592 }
5593 target++;
5594 exp++;
5595 }
5596 ucnv_close(conv);
5597 }
5598
5599 static void
TestIsFixedWidth(void)5600 TestIsFixedWidth(void) {
5601 UErrorCode status = U_ZERO_ERROR;
5602 UConverter *cnv = NULL;
5603 int32_t i;
5604
5605 const char *fixedWidth[] = {
5606 "US-ASCII",
5607 "UTF32",
5608 "ibm-5478_P100-1995"
5609 };
5610
5611 const char *notFixedWidth[] = {
5612 "GB18030",
5613 "UTF8",
5614 "windows-949-2000",
5615 "UTF16"
5616 };
5617
5618 for (i = 0; i < UPRV_LENGTHOF(fixedWidth); i++) {
5619 cnv = ucnv_open(fixedWidth[i], &status);
5620 if (cnv == NULL || U_FAILURE(status)) {
5621 log_data_err("Error open converter: %s - %s \n", fixedWidth[i], u_errorName(status));
5622 continue;
5623 }
5624
5625 if (!ucnv_isFixedWidth(cnv, &status)) {
5626 log_err("%s is a fixedWidth converter but returned false.\n", fixedWidth[i]);
5627 }
5628 ucnv_close(cnv);
5629 }
5630
5631 for (i = 0; i < UPRV_LENGTHOF(notFixedWidth); i++) {
5632 cnv = ucnv_open(notFixedWidth[i], &status);
5633 if (cnv == NULL || U_FAILURE(status)) {
5634 log_data_err("Error open converter: %s - %s \n", notFixedWidth[i], u_errorName(status));
5635 continue;
5636 }
5637
5638 if (ucnv_isFixedWidth(cnv, &status)) {
5639 log_err("%s is NOT a fixedWidth converter but returned true.\n", notFixedWidth[i]);
5640 }
5641 ucnv_close(cnv);
5642 }
5643 }
5644